xref: /qemu/tests/tcg/hexagon/mem_noshuf.c (revision 78f314cf)
1 /*
2  *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include <stdio.h>
19 #include <stdint.h>
20 #include <stdbool.h>
21 
22 int err;
23 
24 #include "hex_test.h"
25 
26 /*
27  *  Make sure that the :mem_noshuf packet attribute is honored.
28  *  This is important when the addresses overlap.
29  *  The store instruction in slot 1 effectively executes first,
30  *  followed by the load instruction in slot 0.
31  */
32 
33 #define MEM_NOSHUF32(NAME, ST_TYPE, LD_TYPE, ST_OP, LD_OP) \
34 static inline uint32_t NAME(ST_TYPE * p, LD_TYPE * q, ST_TYPE x) \
35 { \
36     uint32_t ret; \
37     asm volatile("{\n\t" \
38                  "    " #ST_OP "(%1) = %3\n\t" \
39                  "    %0 = " #LD_OP "(%2)\n\t" \
40                  "}:mem_noshuf\n" \
41                  : "=r"(ret) \
42                  : "r"(p), "r"(q), "r"(x) \
43                  : "memory"); \
44     return ret; \
45 }
46 
47 #define MEM_NOSHUF64(NAME, ST_TYPE, LD_TYPE, ST_OP, LD_OP) \
48 static inline uint64_t NAME(ST_TYPE * p, LD_TYPE * q, ST_TYPE x) \
49 { \
50     uint64_t ret; \
51     asm volatile("{\n\t" \
52                  "    " #ST_OP "(%1) = %3\n\t" \
53                  "    %0 = " #LD_OP "(%2)\n\t" \
54                  "}:mem_noshuf\n" \
55                  : "=r"(ret) \
56                  : "r"(p), "r"(q), "r"(x) \
57                  : "memory"); \
58     return ret; \
59 }
60 
61 /* Store byte combinations */
62 MEM_NOSHUF32(mem_noshuf_sb_lb,  int8_t,       int8_t,           memb, memb)
63 MEM_NOSHUF32(mem_noshuf_sb_lub, int8_t,       uint8_t,          memb, memub)
64 MEM_NOSHUF32(mem_noshuf_sb_lh,  int8_t,       int16_t,          memb, memh)
65 MEM_NOSHUF32(mem_noshuf_sb_luh, int8_t,       uint16_t,         memb, memuh)
66 MEM_NOSHUF32(mem_noshuf_sb_lw,  int8_t,       int32_t,          memb, memw)
67 MEM_NOSHUF64(mem_noshuf_sb_ld,  int8_t,       int64_t,          memb, memd)
68 
69 /* Store half combinations */
70 MEM_NOSHUF32(mem_noshuf_sh_lb,  int16_t,      int8_t,           memh, memb)
71 MEM_NOSHUF32(mem_noshuf_sh_lub, int16_t,      uint8_t,          memh, memub)
72 MEM_NOSHUF32(mem_noshuf_sh_lh,  int16_t,      int16_t,          memh, memh)
73 MEM_NOSHUF32(mem_noshuf_sh_luh, int16_t,      uint16_t,         memh, memuh)
74 MEM_NOSHUF32(mem_noshuf_sh_lw,  int16_t,      int32_t,          memh, memw)
75 MEM_NOSHUF64(mem_noshuf_sh_ld,  int16_t,      int64_t,          memh, memd)
76 
77 /* Store word combinations */
78 MEM_NOSHUF32(mem_noshuf_sw_lb,  int32_t,      int8_t,           memw, memb)
79 MEM_NOSHUF32(mem_noshuf_sw_lub, int32_t,      uint8_t,          memw, memub)
80 MEM_NOSHUF32(mem_noshuf_sw_lh,  int32_t,      int16_t,          memw, memh)
81 MEM_NOSHUF32(mem_noshuf_sw_luh, int32_t,      uint16_t,         memw, memuh)
82 MEM_NOSHUF32(mem_noshuf_sw_lw,  int32_t,      int32_t,          memw, memw)
83 MEM_NOSHUF64(mem_noshuf_sw_ld,  int32_t,      int64_t,          memw, memd)
84 
85 /* Store double combinations */
86 MEM_NOSHUF32(mem_noshuf_sd_lb,  int64_t,      int8_t,           memd, memb)
87 MEM_NOSHUF32(mem_noshuf_sd_lub, int64_t,      uint8_t,          memd, memub)
88 MEM_NOSHUF32(mem_noshuf_sd_lh,  int64_t,      int16_t,          memd, memh)
89 MEM_NOSHUF32(mem_noshuf_sd_luh, int64_t,      uint16_t,         memd, memuh)
90 MEM_NOSHUF32(mem_noshuf_sd_lw,  int64_t,      int32_t,          memd, memw)
91 MEM_NOSHUF64(mem_noshuf_sd_ld,  int64_t,      int64_t,          memd, memd)
92 
93 static inline int pred_lw_sw(bool pred, int32_t *p, int32_t *q,
94                              int32_t x, int32_t y)
95 {
96     int ret;
97     asm volatile("p0 = cmp.eq(%5, #0)\n\t"
98                  "%0 = %3\n\t"
99                  "{\n\t"
100                  "    memw(%1) = %4\n\t"
101                  "    if (!p0) %0 = memw(%2)\n\t"
102                  "}:mem_noshuf\n"
103                  : "=&r"(ret)
104                  : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred)
105                  : "p0", "memory");
106     return ret;
107 }
108 
109 static inline int pred_lw_sw_pi(bool pred, int32_t *p, int32_t *q,
110                                 int32_t x, int32_t y)
111 {
112     int ret;
113     asm volatile("p0 = cmp.eq(%5, #0)\n\t"
114                  "%0 = %3\n\t"
115                  "r7 = %2\n\t"
116                  "{\n\t"
117                  "    memw(%1) = %4\n\t"
118                  "    if (!p0) %0 = memw(r7++#4)\n\t"
119                  "}:mem_noshuf\n"
120                  : "=&r"(ret)
121                  : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred)
122                  : "r7", "p0", "memory");
123     return ret;
124 }
125 
126 static inline int64_t pred_ld_sd(bool pred, int64_t *p, int64_t *q,
127                                  int64_t x, int64_t y)
128 {
129     int64_t ret;
130     asm volatile("p0 = cmp.eq(%5, #0)\n\t"
131                  "%0 = %3\n\t"
132                  "{\n\t"
133                  "    memd(%1) = %4\n\t"
134                  "    if (!p0) %0 = memd(%2)\n\t"
135                  "}:mem_noshuf\n"
136                  : "=&r"(ret)
137                  : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred)
138                  : "p0", "memory");
139     return ret;
140 }
141 
142 static inline int64_t pred_ld_sd_pi(bool pred, int64_t *p, int64_t *q,
143                                     int64_t x, int64_t y)
144 {
145     int64_t ret;
146     asm volatile("p0 = cmp.eq(%5, #0)\n\t"
147                  "%0 = %3\n\t"
148                  "r7 = %2\n\t"
149                  "{\n\t"
150                  "    memd(%1) = %4\n\t"
151                  "    if (!p0) %0 = memd(r7++#8)\n\t"
152                  "}:mem_noshuf\n"
153                  : "=&r"(ret)
154                  : "r"(p), "r"(q), "r"(x), "r"(y), "r"(pred)
155                  : "r7", "p0", "memory");
156     return ret;
157 }
158 
159 static inline int32_t cancel_sw_lb(bool pred, int32_t *p, int8_t *q, int32_t x)
160 {
161     int32_t ret;
162     asm volatile("p0 = cmp.eq(%4, #0)\n\t"
163                  "{\n\t"
164                  "    if (!p0) memw(%1) = %3\n\t"
165                  "    %0 = memb(%2)\n\t"
166                  "}:mem_noshuf\n"
167                  : "=r"(ret)
168                  : "r"(p), "r"(q), "r"(x), "r"(pred)
169                  : "p0", "memory");
170     return ret;
171 }
172 
173 static inline int64_t cancel_sw_ld(bool pred, int32_t *p, int64_t *q, int32_t x)
174 {
175     int64_t ret;
176     asm volatile("p0 = cmp.eq(%4, #0)\n\t"
177                  "{\n\t"
178                  "    if (!p0) memw(%1) = %3\n\t"
179                  "    %0 = memd(%2)\n\t"
180                  "}:mem_noshuf\n"
181                  : "=r"(ret)
182                  : "r"(p), "r"(q), "r"(x), "r"(pred)
183                  : "p0", "memory");
184     return ret;
185 }
186 
187 typedef union {
188     int64_t d[2];
189     uint64_t ud[2];
190     int32_t w[4];
191     uint32_t uw[4];
192     int16_t h[8];
193     uint16_t uh[8];
194     int8_t b[16];
195     uint8_t ub[16];
196 } Memory;
197 
198 int main()
199 {
200     Memory n;
201     uint32_t res32;
202     uint64_t res64;
203 
204     /*
205      * Store byte combinations
206      */
207     n.w[0] = ~0;
208     res32 = mem_noshuf_sb_lb(&n.b[0], &n.b[0], 0x87);
209     check32(res32, 0xffffff87);
210 
211     n.w[0] = ~0;
212     res32 = mem_noshuf_sb_lub(&n.b[0], &n.ub[0], 0x87);
213     check32(res32, 0x00000087);
214 
215     n.w[0] = ~0;
216     res32 = mem_noshuf_sb_lh(&n.b[0], &n.h[0], 0x87);
217     check32(res32, 0xffffff87);
218 
219     n.w[0] = ~0;
220     res32 = mem_noshuf_sb_luh(&n.b[0], &n.uh[0], 0x87);
221     check32(res32, 0x0000ff87);
222 
223     n.w[0] = ~0;
224     res32 = mem_noshuf_sb_lw(&n.b[0], &n.w[0], 0x87);
225     check32(res32, 0xffffff87);
226 
227     n.d[0] = ~0LL;
228     res64 = mem_noshuf_sb_ld(&n.b[0], &n.d[0], 0x87);
229     check64(res64, 0xffffffffffffff87LL);
230 
231     /*
232      * Store half combinations
233      */
234     n.w[0] = ~0;
235     res32 = mem_noshuf_sh_lb(&n.h[0], &n.b[0], 0x8787);
236     check32(res32, 0xffffff87);
237 
238     n.w[0] = ~0;
239     res32 = mem_noshuf_sh_lub(&n.h[0], &n.ub[1], 0x8f87);
240     check32(res32, 0x0000008f);
241 
242     n.w[0] = ~0;
243     res32 = mem_noshuf_sh_lh(&n.h[0], &n.h[0], 0x8a87);
244     check32(res32, 0xffff8a87);
245 
246     n.w[0] = ~0;
247     res32 = mem_noshuf_sh_luh(&n.h[0], &n.uh[0], 0x8a87);
248     check32(res32, 0x8a87);
249 
250     n.w[0] = ~0;
251     res32 = mem_noshuf_sh_lw(&n.h[1], &n.w[0], 0x8a87);
252     check32(res32, 0x8a87ffff);
253 
254     n.w[0] = ~0;
255     res64 = mem_noshuf_sh_ld(&n.h[1], &n.d[0], 0x8a87);
256     check64(res64, 0xffffffff8a87ffffLL);
257 
258     /*
259      * Store word combinations
260      */
261     n.w[0] = ~0;
262     res32 = mem_noshuf_sw_lb(&n.w[0], &n.b[0], 0x12345687);
263     check32(res32, 0xffffff87);
264 
265     n.w[0] = ~0;
266     res32 = mem_noshuf_sw_lub(&n.w[0], &n.ub[0], 0x12345687);
267     check32(res32, 0x00000087);
268 
269     n.w[0] = ~0;
270     res32 = mem_noshuf_sw_lh(&n.w[0], &n.h[0], 0x1234f678);
271     check32(res32, 0xfffff678);
272 
273     n.w[0] = ~0;
274     res32 = mem_noshuf_sw_luh(&n.w[0], &n.uh[0], 0x12345678);
275     check32(res32, 0x00005678);
276 
277     n.w[0] = ~0;
278     res32 = mem_noshuf_sw_lw(&n.w[0], &n.w[0], 0x12345678);
279     check32(res32, 0x12345678);
280 
281     n.d[0] = ~0LL;
282     res64 = mem_noshuf_sw_ld(&n.w[0], &n.d[0], 0x12345678);
283     check64(res64, 0xffffffff12345678LL);
284 
285     /*
286      * Store double combinations
287      */
288     n.d[0] = ~0LL;
289     res32 = mem_noshuf_sd_lb(&n.d[0], &n.b[1], 0x123456789abcdef0);
290     check32(res32, 0xffffffde);
291 
292     n.d[0] = ~0LL;
293     res32 = mem_noshuf_sd_lub(&n.d[0], &n.ub[1], 0x123456789abcdef0);
294     check32(res32, 0x000000de);
295 
296     n.d[0] = ~0LL;
297     res32 = mem_noshuf_sd_lh(&n.d[0], &n.h[1], 0x123456789abcdef0);
298     check32(res32, 0xffff9abc);
299 
300     n.d[0] = ~0LL;
301     res32 = mem_noshuf_sd_luh(&n.d[0], &n.uh[1], 0x123456789abcdef0);
302     check32(res32, 0x00009abc);
303 
304     n.d[0] = ~0LL;
305     res32 = mem_noshuf_sd_lw(&n.d[0], &n.w[1], 0x123456789abcdef0);
306     check32(res32, 0x12345678);
307 
308     n.d[0] = ~0LL;
309     res64 = mem_noshuf_sd_ld(&n.d[0], &n.d[0], 0x123456789abcdef0);
310     check64(res64, 0x123456789abcdef0LL);
311 
312     /*
313      * Predicated word stores
314      */
315     n.w[0] = ~0;
316     res32 = cancel_sw_lb(false, &n.w[0], &n.b[0], 0x12345678);
317     check32(res32, 0xffffffff);
318 
319     n.w[0] = ~0;
320     res32 = cancel_sw_lb(true, &n.w[0], &n.b[0], 0x12345687);
321     check32(res32, 0xffffff87);
322 
323     /*
324      * Predicated double stores
325      */
326     n.d[0] = ~0LL;
327     res64 = cancel_sw_ld(false, &n.w[0], &n.d[0], 0x12345678);
328     check64(res64, 0xffffffffffffffffLL);
329 
330     n.d[0] = ~0LL;
331     res64 = cancel_sw_ld(true, &n.w[0], &n.d[0], 0x12345678);
332     check64(res64, 0xffffffff12345678LL);
333 
334     n.d[0] = ~0LL;
335     res64 = cancel_sw_ld(false, &n.w[1], &n.d[0], 0x12345678);
336     check64(res64, 0xffffffffffffffffLL);
337 
338     n.d[0] = ~0LL;
339     res64 = cancel_sw_ld(true, &n.w[1], &n.d[0], 0x12345678);
340     check64(res64, 0x12345678ffffffffLL);
341 
342     /*
343      * No overlap tests
344      */
345     n.w[0] = ~0;
346     res32 = mem_noshuf_sb_lb(&n.b[1], &n.b[0], 0x87);
347     check32(res32, 0xffffffff);
348 
349     n.w[0] = ~0;
350     res32 = mem_noshuf_sb_lb(&n.b[0], &n.b[1], 0x87);
351     check32(res32, 0xffffffff);
352 
353     n.w[0] = ~0;
354     res32 = mem_noshuf_sh_lh(&n.h[1], &n.h[0], 0x8787);
355     check32(res32, 0xffffffff);
356 
357     n.w[0] = ~0;
358     res32 = mem_noshuf_sh_lh(&n.h[0], &n.h[1], 0x8787);
359     check32(res32, 0xffffffff);
360 
361     n.d[0] = ~0LL;
362     res32 = mem_noshuf_sw_lw(&n.w[0], &n.w[1], 0x12345678);
363     check32(res32, 0xffffffff);
364 
365     n.d[0] = ~0LL;
366     res32 = mem_noshuf_sw_lw(&n.w[1], &n.w[0], 0x12345678);
367     check32(res32, 0xffffffff);
368 
369     n.d[0] = ~0LL;
370     n.d[1] = ~0LL;
371     res64 = mem_noshuf_sd_ld(&n.d[1], &n.d[0], 0x123456789abcdef0LL);
372     check64(res64, 0xffffffffffffffffLL);
373 
374     n.d[0] = ~0LL;
375     n.d[1] = ~0LL;
376     res64 = mem_noshuf_sd_ld(&n.d[0], &n.d[1], 0x123456789abcdef0LL);
377     check64(res64, 0xffffffffffffffffLL);
378 
379     n.w[0] = ~0;
380     res32 = pred_lw_sw(false, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda);
381     check32(res32, 0x12345678);
382     check32(n.w[0], 0xc0ffeeda);
383 
384     n.w[0] = ~0;
385     res32 = pred_lw_sw(true, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda);
386     check32(res32, 0xc0ffeeda);
387     check32(n.w[0], 0xc0ffeeda);
388 
389     n.w[0] = ~0;
390     res32 = pred_lw_sw_pi(false, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda);
391     check32(res32, 0x12345678);
392     check32(n.w[0], 0xc0ffeeda);
393 
394     n.w[0] = ~0;
395     res32 = pred_lw_sw_pi(true, &n.w[0], &n.w[0], 0x12345678, 0xc0ffeeda);
396     check32(res32, 0xc0ffeeda);
397     check32(n.w[0], 0xc0ffeeda);
398 
399     n.d[0] = ~0LL;
400     res64 = pred_ld_sd(false, &n.d[0], &n.d[0],
401                        0x1234567812345678LL, 0xc0ffeedac0ffeedaLL);
402     check64(res64, 0x1234567812345678LL);
403     check64(n.d[0], 0xc0ffeedac0ffeedaLL);
404 
405     n.d[0] = ~0LL;
406     res64 = pred_ld_sd(true, &n.d[0], &n.d[0],
407                        0x1234567812345678LL, 0xc0ffeedac0ffeedaLL);
408     check64(res64, 0xc0ffeedac0ffeedaLL);
409     check64(n.d[0], 0xc0ffeedac0ffeedaLL);
410 
411     n.d[0] = ~0LL;
412     res64 = pred_ld_sd_pi(false, &n.d[0], &n.d[0],
413                           0x1234567812345678LL, 0xc0ffeedac0ffeedaLL);
414     check64(res64, 0x1234567812345678LL);
415     check64(n.d[0], 0xc0ffeedac0ffeedaLL);
416 
417     n.d[0] = ~0LL;
418     res64 = pred_ld_sd_pi(true, &n.d[0], &n.d[0],
419                           0x1234567812345678LL, 0xc0ffeedac0ffeedaLL);
420     check64(res64, 0xc0ffeedac0ffeedaLL);
421     check64(n.d[0], 0xc0ffeedac0ffeedaLL);
422 
423     puts(err ? "FAIL" : "PASS");
424     return err;
425 }
426