1#! /usr/bin/python
2
3# generate the linzer-feig multiply-add idct for ia64
4# (c) 2002 Christian Schwarz <schwarz@ira.uka.de>,
5#          Haiko Gaisser <haiko@gaisser.de>,
6#          Sebastian Hack <mail@s-hack.de>
7
8
9import math
10
11pre_shuffle = [ 0, 4, 2, 6, 1, 7, 3, 5 ]
12post_shuffle = [ 0, 1, 6, 3, 7, 2, 5, 4 ]
13
14constants = 16
15float_scratch = range(32, 32+constants)
16regbase = max(float_scratch)+1
17intregbase = 33
18
19def print_matrix(matrix,s=''):
20    if s != '':
21        print "\n\t// %s" % s
22    for i in range(0, 8):
23        print "\t// ",
24        for j in range(0, 4):
25            print "%2d" % matrix[i*4+j],
26        print ""
27
28def exchange_elements(list, a, b):
29    """ Exchange two list elements
30    """
31    (list[a], list[b]) = (list[b], list[a])
32
33def alloc_regs(matrix, n):
34    """ get the smallest register not used by the matrix
35    """
36
37    regs = [ ]
38    for i in range(0, n):
39        m = regbase
40        while m in matrix or m in regs:
41            m = m + 1
42        regs.append(m)
43    return regs
44
45def transpose_2x2_submatrix(matrix, i, j):
46    """ transpose a 2x2 submatrix in the 8x8 matrix
47    """
48    a = j
49    b = i
50
51    tmp = matrix[i*8+j]
52    matrix[i*8+j] = matrix[a*8+b]
53    matrix[a*8+b] = tmp
54
55    tmp = matrix[i*8+j+4]
56    matrix[i*8+j+4] = matrix[a*8+b+4]
57    matrix[a*8+b+4] = tmp
58
59
60def transpose(matrix):
61    """ register renaming for transpose
62    """
63    regs = alloc_regs(matrix, 16)
64    save_regs = regs[:]
65
66    # emit code ...
67    for i in range(1,8,2):
68        for j in range(0,4):
69            r1 = matrix[(i-1)*4+j]
70            r2 = matrix[i*4+j]
71            print '\tfmix.r  f%d = f%d, f%d' % (save_regs.pop(0), r1, r2)
72
73    print '\t;;'
74
75    for i in range(0,8,2):
76        for j in range(0,4):
77            r1 = matrix[i*4+j]
78            r2 = matrix[(i+1)*4+j]
79            print '\tfmix.l  f%d = f%d, f%d' % (r1, r1, r2)
80
81    print '\t;;'
82
83    # first stage, transpose the 2x2 matrices
84    for i in range(1,8,2):
85        for j in range(0,4):
86            r = matrix[i*4+j]
87            matrix[i*4+j] = regs.pop(0)
88
89#    print_matrix(matrix)
90
91    # exchange the 2x2 matrices by renaming the registers
92    for i in range(0, 4):
93        for j in range(i+1, 4):
94            transpose_2x2_submatrix(matrix, i, j)
95
96#    print ''
97#    print_matrix(matrix)
98#    print "transpose"
99#    print_matrix(matrix)
100
101# register renaming for 8 regs containing a column
102def shuffle_column(matrix, col, permutation):
103    l = [ ]
104    for i in range(0,8):
105        l.append(matrix[i*4+col])
106    for i in range(0,8):
107        matrix[i*4+col] = l[permutation[i]]
108
109def butterfly(matrix, col, i, j, c1, c2):
110    """ register renaming for a butterfly operation in a column
111    """
112    ri = matrix[i*4+col]
113    rj = matrix[j*4+col]
114    regs = alloc_regs(matrix, 1)
115
116    print '\t// (f%d, f%d) = (f%d, f%d) $ (%s, %s), (line %d, %d)' % \
117          (regs[0], rj, ri, rj, c1, c2, i, j)
118    print '\tfpma    f%d = f%d, %s, f%d' % (regs[0], rj, c1, ri)
119    print '\tfpnma   f%d = f%d, %s, f%d' % (rj, rj, c2, ri)
120    print '\t;;'
121
122    matrix[i*4+col] = regs[0]
123
124
125def column_idct(matrix, col):
126
127    print_matrix(matrix, "before pre shuffle")
128    shuffle_column(matrix, col, pre_shuffle)
129    print_matrix(matrix, "after pre shuffle")
130
131    butterfly(matrix, col, 0, 1, 'c0', 'c0')
132    butterfly(matrix, col, 2, 3, 'c1', 'c2')
133    butterfly(matrix, col, 4, 5, 'c3', 'c4')
134    butterfly(matrix, col, 6, 7, 'c5', 'c6')
135    print '\t;;'
136    butterfly(matrix, col, 0, 3, 'c7', 'c7')
137    butterfly(matrix, col, 1, 2, 'c8', 'c8')
138    butterfly(matrix, col, 4, 6, 'c9', 'c9')
139    butterfly(matrix, col, 5, 7, 'c10', 'c10')
140    print '\t;;'
141    butterfly(matrix, col, 5, 6, 'c11', 'c11')
142    butterfly(matrix, col, 0, 4, 'c12', 'c12')
143    butterfly(matrix, col, 3, 7, 'c14', 'c14')
144    print '\t;;'
145    butterfly(matrix, col, 1, 5, 'c13', 'c13')
146    butterfly(matrix, col, 2, 6, 'c13', 'c13')
147
148    print_matrix(matrix, "before post shuffle")
149    shuffle_column(matrix, col, post_shuffle)
150    print_matrix(matrix, "after post shuffle")
151
152def gen_idct(matrix):
153
154    for j in range(0, 2):
155        for i in range(0, 4):
156           print '\tfpma    f%d = f%d, c0, f0' \
157                 % (2 * (matrix[i],))
158        print '\t;;'
159        for i in range(0,4):
160            column_idct(matrix, i)
161        print '\t;;'
162        transpose(matrix)
163
164def gen_consts():
165    print 'addreg1 = r14'
166    print 'addreg2 = r15'
167
168    for i in range(0, constants):
169        print 'c%d = f%d' % (i, float_scratch.pop(0))
170
171    sqrt2 = math.sqrt(2.0)
172    t = [ ]
173    s = [ ]
174    c = [ ]
175    for i in range(0,5):
176        t.append(math.tan(i * math.pi / 16))
177        s.append(math.sin(i * math.pi / 16))
178        c.append(math.cos(i * math.pi / 16))
179
180    consts = [ ]
181    consts.append(1.0 / (2.0 * sqrt2))
182    consts.append(-1 / t[2])
183    consts.append(-t[2])
184    consts.append(t[1])
185    consts.append(1 / t[1])
186    consts.append(t[3])
187    consts.append(1 / t[3])
188    consts.append(0.5 * c[2])
189    consts.append(0.5 * s[2])
190    consts.append(c[3] / c[1])
191    consts.append(s[3] / s[1])
192    consts.append(c[1] / s[1])
193    consts.append(0.5 * c[1])
194    consts.append(0.5 * s[1] * c[4])
195    consts.append(0.5 * s[1])
196    consts.append(1.0)
197
198    print '.sdata'
199    for i in range(0, constants):
200        if i % 2 == 0:
201            print '.align 16'
202        print '.data_c%d:' % i
203        print '.single %.30f, %.30f' % (consts[i], consts[i])
204    print ''
205
206def gen_load(matrix):
207
208    for i in range(0, 64, 2):
209        print '\tld2  r%d = [addreg1], 4' % (intregbase+i)
210        print '\tld2  r%d = [addreg2], 4' % (intregbase+i+1)
211        print '\t;;'
212
213    for i in range(0, 64, 2):
214        print '\tsxt2  r%d = r%d' % (2*(intregbase+i,))
215        print '\tsxt2  r%d = r%d' % (2*(intregbase+i+1,))
216    print '\t;;'
217
218    for i in range(0, 64, 2):
219        print '\tsetf.sig  f%d = r%d' % (regbase+i, intregbase+i)
220        print '\tsetf.sig  f%d = r%d' % (regbase+i+1, intregbase+i+1)
221    print '\t;;'
222
223    for i in range(0, 64, 2):
224        print '\tfcvt.xf  f%d = f%d' % (2*(regbase+i,))
225        print '\tfcvt.xf  f%d = f%d' % (2*(regbase+i+1,))
226    print '\t;;'
227
228    for i in range(0, 32):
229        print '\tfpack    f%d = f%d, f%d' \
230              % (regbase+i, regbase+2*i, regbase+2*i+1)
231        print '\t;;'
232
233    """
234    for i in range(0, len(matrix)):
235        print '\tld2  r18 = [addreg1], 4'
236        print '\tld2  r19 = [addreg2], 4'
237        print '\t;;'
238        print '\tsxt2 r18 = r18'
239        print '\tsxt2 r19 = r19'
240        print '\t;;'
241        print '\tsetf.sig f18 = r18'
242        print '\tsetf.sig f19 = r19'
243        print '\t;;'
244        print '\tfcvt.xf  f18 = f18'
245        print '\tfcvt.xf  f19 = f19'
246        print '\t;;'
247        print '\tfpack      f%d = f18, f19' % (matrix[i])
248        print '\t;;'
249    """
250
251def gen_store(matrix):
252    print '\tmov   addreg1 = in0'
253    print '\tadd   addreg2 = 4, in0'
254    print '\t;;'
255
256    for i in range(0, len(matrix)):
257        print '\tfpcvt.fx f%d = f%d' % (2*(matrix[i],))
258    print '\t;;'
259
260    for i in range(0, len(matrix)):
261        print '\tgetf.sig r%d = f%d' % (intregbase+i, matrix[i])
262    print '\t;;'
263
264    for i in range(0, len(matrix)):
265        print '\tshl      r%d = r%d, 7' % (2*(intregbase+i,))
266    print '\t;;'
267
268    for i in range(0, len(matrix)):
269        print '\tpack4.sss r%d = r%d, r0' % (2*(intregbase+i,))
270    print '\t;;'
271
272    for i in range(0, len(matrix)):
273        print '\tpshr2    r%d = r%d, 7' % (2*(intregbase+i,))
274    print '\t;;'
275
276    for i in range(0, len(matrix)):
277        print '\tmux2     r%d = r%d, 0xe1' % (2*(intregbase+i,))
278    print '\t;;'
279
280    for i in range(0, len(matrix), 2):
281        print '\tst4   [addreg1] = r%d, 8' % (intregbase+i)
282        print '\tst4   [addreg2] = r%d, 8' % (intregbase+i+1)
283	print '\t;;'
284
285def main():
286    gen_consts()
287
288    print '.text'
289    print '.global idct_ia64'
290    print '.global idct_ia64_init'
291    print '.align 16'
292    print '.proc idct_ia64_init'
293    print 'idct_ia64_init:'
294    print 'br.ret.sptk.few b0'
295    print '.endp'
296    print '.align 16'
297    print '.proc idct_ia64'
298    print 'idct_ia64:'
299
300    f = open('idct_init.s')
301    print f.read()
302    f.close()
303
304    matrix = [ ]
305    for i in range(0,32):
306        matrix.append(regbase + i)
307
308    gen_load(matrix)
309#    print_matrix(matrix)
310    gen_idct(matrix)
311#    transpose(matrix)
312    print_matrix(matrix)
313    gen_store(matrix)
314
315    f = open('idct_fini.s')
316    print f.read()
317    f.close()
318
319    print '.endp'
320
321
322if __name__ == "__main__":
323    main()
324