xref: /qemu/target/sparc/vis_helper.c (revision ca61e750)
1 /*
2  * VIS op helpers
3  *
4  *  Copyright (c) 2003-2005 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #include "qemu/osdep.h"
21 #include "cpu.h"
22 #include "exec/helper-proto.h"
23 
24 /* This function uses non-native bit order */
25 #define GET_FIELD(X, FROM, TO)                                  \
26     ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1))
27 
28 /* This function uses the order in the manuals, i.e. bit 0 is 2^0 */
29 #define GET_FIELD_SP(X, FROM, TO)               \
30     GET_FIELD(X, 63 - (TO), 63 - (FROM))
31 
32 target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize)
33 {
34     return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) |
35         (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) |
36         (GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) |
37         (GET_FIELD_SP(pixel_addr, 56, 59) << 13) |
38         (GET_FIELD_SP(pixel_addr, 35, 38) << 9) |
39         (GET_FIELD_SP(pixel_addr, 13, 16) << 5) |
40         (((pixel_addr >> 55) & 1) << 4) |
41         (GET_FIELD_SP(pixel_addr, 33, 34) << 2) |
42         GET_FIELD_SP(pixel_addr, 11, 12);
43 }
44 
45 #if HOST_BIG_ENDIAN
46 #define VIS_B64(n) b[7 - (n)]
47 #define VIS_W64(n) w[3 - (n)]
48 #define VIS_SW64(n) sw[3 - (n)]
49 #define VIS_L64(n) l[1 - (n)]
50 #define VIS_B32(n) b[3 - (n)]
51 #define VIS_W32(n) w[1 - (n)]
52 #else
53 #define VIS_B64(n) b[n]
54 #define VIS_W64(n) w[n]
55 #define VIS_SW64(n) sw[n]
56 #define VIS_L64(n) l[n]
57 #define VIS_B32(n) b[n]
58 #define VIS_W32(n) w[n]
59 #endif
60 
61 typedef union {
62     uint8_t b[8];
63     uint16_t w[4];
64     int16_t sw[4];
65     uint32_t l[2];
66     uint64_t ll;
67     float64 d;
68 } VIS64;
69 
70 typedef union {
71     uint8_t b[4];
72     uint16_t w[2];
73     uint32_t l;
74     float32 f;
75 } VIS32;
76 
77 uint64_t helper_fpmerge(uint64_t src1, uint64_t src2)
78 {
79     VIS64 s, d;
80 
81     s.ll = src1;
82     d.ll = src2;
83 
84     /* Reverse calculation order to handle overlap */
85     d.VIS_B64(7) = s.VIS_B64(3);
86     d.VIS_B64(6) = d.VIS_B64(3);
87     d.VIS_B64(5) = s.VIS_B64(2);
88     d.VIS_B64(4) = d.VIS_B64(2);
89     d.VIS_B64(3) = s.VIS_B64(1);
90     d.VIS_B64(2) = d.VIS_B64(1);
91     d.VIS_B64(1) = s.VIS_B64(0);
92     /* d.VIS_B64(0) = d.VIS_B64(0); */
93 
94     return d.ll;
95 }
96 
97 uint64_t helper_fmul8x16(uint64_t src1, uint64_t src2)
98 {
99     VIS64 s, d;
100     uint32_t tmp;
101 
102     s.ll = src1;
103     d.ll = src2;
104 
105 #define PMUL(r)                                                 \
106     tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B64(r);       \
107     if ((tmp & 0xff) > 0x7f) {                                  \
108         tmp += 0x100;                                           \
109     }                                                           \
110     d.VIS_W64(r) = tmp >> 8;
111 
112     PMUL(0);
113     PMUL(1);
114     PMUL(2);
115     PMUL(3);
116 #undef PMUL
117 
118     return d.ll;
119 }
120 
121 uint64_t helper_fmul8x16al(uint64_t src1, uint64_t src2)
122 {
123     VIS64 s, d;
124     uint32_t tmp;
125 
126     s.ll = src1;
127     d.ll = src2;
128 
129 #define PMUL(r)                                                 \
130     tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r);       \
131     if ((tmp & 0xff) > 0x7f) {                                  \
132         tmp += 0x100;                                           \
133     }                                                           \
134     d.VIS_W64(r) = tmp >> 8;
135 
136     PMUL(0);
137     PMUL(1);
138     PMUL(2);
139     PMUL(3);
140 #undef PMUL
141 
142     return d.ll;
143 }
144 
145 uint64_t helper_fmul8x16au(uint64_t src1, uint64_t src2)
146 {
147     VIS64 s, d;
148     uint32_t tmp;
149 
150     s.ll = src1;
151     d.ll = src2;
152 
153 #define PMUL(r)                                                 \
154     tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r);       \
155     if ((tmp & 0xff) > 0x7f) {                                  \
156         tmp += 0x100;                                           \
157     }                                                           \
158     d.VIS_W64(r) = tmp >> 8;
159 
160     PMUL(0);
161     PMUL(1);
162     PMUL(2);
163     PMUL(3);
164 #undef PMUL
165 
166     return d.ll;
167 }
168 
169 uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2)
170 {
171     VIS64 s, d;
172     uint32_t tmp;
173 
174     s.ll = src1;
175     d.ll = src2;
176 
177 #define PMUL(r)                                                         \
178     tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
179     if ((tmp & 0xff) > 0x7f) {                                          \
180         tmp += 0x100;                                                   \
181     }                                                                   \
182     d.VIS_W64(r) = tmp >> 8;
183 
184     PMUL(0);
185     PMUL(1);
186     PMUL(2);
187     PMUL(3);
188 #undef PMUL
189 
190     return d.ll;
191 }
192 
193 uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2)
194 {
195     VIS64 s, d;
196     uint32_t tmp;
197 
198     s.ll = src1;
199     d.ll = src2;
200 
201 #define PMUL(r)                                                         \
202     tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
203     if ((tmp & 0xff) > 0x7f) {                                          \
204         tmp += 0x100;                                                   \
205     }                                                                   \
206     d.VIS_W64(r) = tmp >> 8;
207 
208     PMUL(0);
209     PMUL(1);
210     PMUL(2);
211     PMUL(3);
212 #undef PMUL
213 
214     return d.ll;
215 }
216 
217 uint64_t helper_fmuld8sux16(uint64_t src1, uint64_t src2)
218 {
219     VIS64 s, d;
220     uint32_t tmp;
221 
222     s.ll = src1;
223     d.ll = src2;
224 
225 #define PMUL(r)                                                         \
226     tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
227     if ((tmp & 0xff) > 0x7f) {                                          \
228         tmp += 0x100;                                                   \
229     }                                                                   \
230     d.VIS_L64(r) = tmp;
231 
232     /* Reverse calculation order to handle overlap */
233     PMUL(1);
234     PMUL(0);
235 #undef PMUL
236 
237     return d.ll;
238 }
239 
240 uint64_t helper_fmuld8ulx16(uint64_t src1, uint64_t src2)
241 {
242     VIS64 s, d;
243     uint32_t tmp;
244 
245     s.ll = src1;
246     d.ll = src2;
247 
248 #define PMUL(r)                                                         \
249     tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
250     if ((tmp & 0xff) > 0x7f) {                                          \
251         tmp += 0x100;                                                   \
252     }                                                                   \
253     d.VIS_L64(r) = tmp;
254 
255     /* Reverse calculation order to handle overlap */
256     PMUL(1);
257     PMUL(0);
258 #undef PMUL
259 
260     return d.ll;
261 }
262 
263 uint64_t helper_fexpand(uint64_t src1, uint64_t src2)
264 {
265     VIS32 s;
266     VIS64 d;
267 
268     s.l = (uint32_t)src1;
269     d.ll = src2;
270     d.VIS_W64(0) = s.VIS_B32(0) << 4;
271     d.VIS_W64(1) = s.VIS_B32(1) << 4;
272     d.VIS_W64(2) = s.VIS_B32(2) << 4;
273     d.VIS_W64(3) = s.VIS_B32(3) << 4;
274 
275     return d.ll;
276 }
277 
278 #define VIS_HELPER(name, F)                             \
279     uint64_t name##16(uint64_t src1, uint64_t src2)     \
280     {                                                   \
281         VIS64 s, d;                                     \
282                                                         \
283         s.ll = src1;                                    \
284         d.ll = src2;                                    \
285                                                         \
286         d.VIS_W64(0) = F(d.VIS_W64(0), s.VIS_W64(0));   \
287         d.VIS_W64(1) = F(d.VIS_W64(1), s.VIS_W64(1));   \
288         d.VIS_W64(2) = F(d.VIS_W64(2), s.VIS_W64(2));   \
289         d.VIS_W64(3) = F(d.VIS_W64(3), s.VIS_W64(3));   \
290                                                         \
291         return d.ll;                                    \
292     }                                                   \
293                                                         \
294     uint32_t name##16s(uint32_t src1, uint32_t src2)    \
295     {                                                   \
296         VIS32 s, d;                                     \
297                                                         \
298         s.l = src1;                                     \
299         d.l = src2;                                     \
300                                                         \
301         d.VIS_W32(0) = F(d.VIS_W32(0), s.VIS_W32(0));   \
302         d.VIS_W32(1) = F(d.VIS_W32(1), s.VIS_W32(1));   \
303                                                         \
304         return d.l;                                     \
305     }                                                   \
306                                                         \
307     uint64_t name##32(uint64_t src1, uint64_t src2)     \
308     {                                                   \
309         VIS64 s, d;                                     \
310                                                         \
311         s.ll = src1;                                    \
312         d.ll = src2;                                    \
313                                                         \
314         d.VIS_L64(0) = F(d.VIS_L64(0), s.VIS_L64(0));   \
315         d.VIS_L64(1) = F(d.VIS_L64(1), s.VIS_L64(1));   \
316                                                         \
317         return d.ll;                                    \
318     }                                                   \
319                                                         \
320     uint32_t name##32s(uint32_t src1, uint32_t src2)    \
321     {                                                   \
322         VIS32 s, d;                                     \
323                                                         \
324         s.l = src1;                                     \
325         d.l = src2;                                     \
326                                                         \
327         d.l = F(d.l, s.l);                              \
328                                                         \
329         return d.l;                                     \
330     }
331 
332 #define FADD(a, b) ((a) + (b))
333 #define FSUB(a, b) ((a) - (b))
334 VIS_HELPER(helper_fpadd, FADD)
335 VIS_HELPER(helper_fpsub, FSUB)
336 
337 #define VIS_CMPHELPER(name, F)                                    \
338     uint64_t name##16(uint64_t src1, uint64_t src2)               \
339     {                                                             \
340         VIS64 s, d;                                               \
341                                                                   \
342         s.ll = src1;                                              \
343         d.ll = src2;                                              \
344                                                                   \
345         d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0;     \
346         d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0;    \
347         d.VIS_W64(0) |= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0;    \
348         d.VIS_W64(0) |= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0;    \
349         d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0;           \
350                                                                   \
351         return d.ll;                                              \
352     }                                                             \
353                                                                   \
354     uint64_t name##32(uint64_t src1, uint64_t src2)               \
355     {                                                             \
356         VIS64 s, d;                                               \
357                                                                   \
358         s.ll = src1;                                              \
359         d.ll = src2;                                              \
360                                                                   \
361         d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0;     \
362         d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0;    \
363         d.VIS_L64(1) = 0;                                         \
364                                                                   \
365         return d.ll;                                              \
366     }
367 
368 #define FCMPGT(a, b) ((a) > (b))
369 #define FCMPEQ(a, b) ((a) == (b))
370 #define FCMPLE(a, b) ((a) <= (b))
371 #define FCMPNE(a, b) ((a) != (b))
372 
373 VIS_CMPHELPER(helper_fcmpgt, FCMPGT)
374 VIS_CMPHELPER(helper_fcmpeq, FCMPEQ)
375 VIS_CMPHELPER(helper_fcmple, FCMPLE)
376 VIS_CMPHELPER(helper_fcmpne, FCMPNE)
377 
378 uint64_t helper_pdist(uint64_t sum, uint64_t src1, uint64_t src2)
379 {
380     int i;
381     for (i = 0; i < 8; i++) {
382         int s1, s2;
383 
384         s1 = (src1 >> (56 - (i * 8))) & 0xff;
385         s2 = (src2 >> (56 - (i * 8))) & 0xff;
386 
387         /* Absolute value of difference. */
388         s1 -= s2;
389         if (s1 < 0) {
390             s1 = -s1;
391         }
392 
393         sum += s1;
394     }
395 
396     return sum;
397 }
398 
399 uint32_t helper_fpack16(uint64_t gsr, uint64_t rs2)
400 {
401     int scale = (gsr >> 3) & 0xf;
402     uint32_t ret = 0;
403     int byte;
404 
405     for (byte = 0; byte < 4; byte++) {
406         uint32_t val;
407         int16_t src = rs2 >> (byte * 16);
408         int32_t scaled = src << scale;
409         int32_t from_fixed = scaled >> 7;
410 
411         val = (from_fixed < 0 ?  0 :
412                from_fixed > 255 ?  255 : from_fixed);
413 
414         ret |= val << (8 * byte);
415     }
416 
417     return ret;
418 }
419 
420 uint64_t helper_fpack32(uint64_t gsr, uint64_t rs1, uint64_t rs2)
421 {
422     int scale = (gsr >> 3) & 0x1f;
423     uint64_t ret = 0;
424     int word;
425 
426     ret = (rs1 << 8) & ~(0x000000ff000000ffULL);
427     for (word = 0; word < 2; word++) {
428         uint64_t val;
429         int32_t src = rs2 >> (word * 32);
430         int64_t scaled = (int64_t)src << scale;
431         int64_t from_fixed = scaled >> 23;
432 
433         val = (from_fixed < 0 ? 0 :
434                (from_fixed > 255) ? 255 : from_fixed);
435 
436         ret |= val << (32 * word);
437     }
438 
439     return ret;
440 }
441 
442 uint32_t helper_fpackfix(uint64_t gsr, uint64_t rs2)
443 {
444     int scale = (gsr >> 3) & 0x1f;
445     uint32_t ret = 0;
446     int word;
447 
448     for (word = 0; word < 2; word++) {
449         uint32_t val;
450         int32_t src = rs2 >> (word * 32);
451         int64_t scaled = (int64_t)src << scale;
452         int64_t from_fixed = scaled >> 16;
453 
454         val = (from_fixed < -32768 ? -32768 :
455                from_fixed > 32767 ?  32767 : from_fixed);
456 
457         ret |= (val & 0xffff) << (word * 16);
458     }
459 
460     return ret;
461 }
462 
463 uint64_t helper_bshuffle(uint64_t gsr, uint64_t src1, uint64_t src2)
464 {
465     union {
466         uint64_t ll[2];
467         uint8_t b[16];
468     } s;
469     VIS64 r;
470     uint32_t i, mask, host;
471 
472     /* Set up S such that we can index across all of the bytes.  */
473 #if HOST_BIG_ENDIAN
474     s.ll[0] = src1;
475     s.ll[1] = src2;
476     host = 0;
477 #else
478     s.ll[1] = src1;
479     s.ll[0] = src2;
480     host = 15;
481 #endif
482     mask = gsr >> 32;
483 
484     for (i = 0; i < 8; ++i) {
485         unsigned e = (mask >> (28 - i*4)) & 0xf;
486         r.VIS_B64(i) = s.b[e ^ host];
487     }
488 
489     return r.ll;
490 }
491