xref: /qemu/tests/tcg/hexagon/circ.c (revision 336d354b)
1 /*
2  *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include <stdio.h>
19 
20 #define DEBUG          0
21 #define DEBUG_PRINTF(...) \
22     do { \
23         if (DEBUG) { \
24             printf(__VA_ARGS__); \
25         } \
26     } while (0)
27 
28 
29 #define NBYTES         (1 << 8)
30 #define NHALFS         (NBYTES / sizeof(short))
31 #define NWORDS         (NBYTES / sizeof(int))
32 #define NDOBLS         (NBYTES / sizeof(long long))
33 
34 long long     dbuf[NDOBLS] __attribute__((aligned(1 << 12))) = {0};
35 int           wbuf[NWORDS] __attribute__((aligned(1 << 12))) = {0};
36 short         hbuf[NHALFS] __attribute__((aligned(1 << 12))) = {0};
37 unsigned char bbuf[NBYTES] __attribute__((aligned(1 << 12))) = {0};
38 
39 /*
40  * We use the C preporcessor to deal with the combinations of types
41  */
42 
43 #define INIT(BUF, N) \
44     void init_##BUF(void) \
45     { \
46         int i; \
47         for (i = 0; i < N; i++) { \
48             BUF[i] = i; \
49         } \
50     } \
51 
52 INIT(bbuf, NBYTES)
53 INIT(hbuf, NHALFS)
54 INIT(wbuf, NWORDS)
55 INIT(dbuf, NDOBLS)
56 
57 /*
58  * Macros for performing circular load
59  *     RES         result
60  *     ADDR        address
61  *     START       start address of buffer
62  *     LEN         length of buffer (in bytes)
63  *     INC         address increment (in bytes for IMM, elements for REG)
64  */
65 #define CIRC_LOAD_IMM(SIZE, RES, ADDR, START, LEN, INC) \
66     __asm__( \
67         "r4 = %3\n\t" \
68         "m0 = r4\n\t" \
69         "cs0 = %2\n\t" \
70         "%0 = mem" #SIZE "(%1++#" #INC ":circ(M0))\n\t" \
71         : "=r"(RES), "+r"(ADDR) \
72         : "r"(START), "r"(LEN) \
73         : "r4", "m0", "cs0")
74 #define CIRC_LOAD_IMM_b(RES, ADDR, START, LEN, INC) \
75     CIRC_LOAD_IMM(b, RES, ADDR, START, LEN, INC)
76 #define CIRC_LOAD_IMM_ub(RES, ADDR, START, LEN, INC) \
77     CIRC_LOAD_IMM(ub, RES, ADDR, START, LEN, INC)
78 #define CIRC_LOAD_IMM_h(RES, ADDR, START, LEN, INC) \
79     CIRC_LOAD_IMM(h, RES, ADDR, START, LEN, INC)
80 #define CIRC_LOAD_IMM_uh(RES, ADDR, START, LEN, INC) \
81     CIRC_LOAD_IMM(uh, RES, ADDR, START, LEN, INC)
82 #define CIRC_LOAD_IMM_w(RES, ADDR, START, LEN, INC) \
83     CIRC_LOAD_IMM(w, RES, ADDR, START, LEN, INC)
84 #define CIRC_LOAD_IMM_d(RES, ADDR, START, LEN, INC) \
85     CIRC_LOAD_IMM(d, RES, ADDR, START, LEN, INC)
86 
87 /*
88  * The mreg has the following pieces
89  *     mreg[31:28]              increment[10:7]
90  *     mreg[27:24]              K value (used Hexagon v3 and earlier)
91  *     mreg[23:17]              increment[6:0]
92  *     mreg[16:0]               circular buffer length
93  */
94 static int build_mreg(int inc, int K, int len)
95 {
96     return ((inc & 0x780) << 21) |
97            ((K & 0xf) << 24) |
98            ((inc & 0x7f) << 17) |
99            (len & 0x1ffff);
100 }
101 
102 #define CIRC_LOAD_REG(SIZE, RES, ADDR, START, LEN, INC) \
103     __asm__( \
104         "r4 = %2\n\t" \
105         "m1 = r4\n\t" \
106         "cs1 = %3\n\t" \
107         "%0 = mem" #SIZE "(%1++I:circ(M1))\n\t" \
108         : "=r"(RES), "+r"(ADDR) \
109         : "r"(build_mreg((INC), 0, (LEN))), \
110           "r"(START) \
111         : "r4", "m1", "cs1")
112 #define CIRC_LOAD_REG_b(RES, ADDR, START, LEN, INC) \
113     CIRC_LOAD_REG(b, RES, ADDR, START, LEN, INC)
114 #define CIRC_LOAD_REG_ub(RES, ADDR, START, LEN, INC) \
115     CIRC_LOAD_REG(ub, RES, ADDR, START, LEN, INC)
116 #define CIRC_LOAD_REG_h(RES, ADDR, START, LEN, INC) \
117     CIRC_LOAD_REG(h, RES, ADDR, START, LEN, INC)
118 #define CIRC_LOAD_REG_uh(RES, ADDR, START, LEN, INC) \
119     CIRC_LOAD_REG(uh, RES, ADDR, START, LEN, INC)
120 #define CIRC_LOAD_REG_w(RES, ADDR, START, LEN, INC) \
121     CIRC_LOAD_REG(w, RES, ADDR, START, LEN, INC)
122 #define CIRC_LOAD_REG_d(RES, ADDR, START, LEN, INC) \
123     CIRC_LOAD_REG(d, RES, ADDR, START, LEN, INC)
124 
125 /*
126  * Macros for performing circular store
127  *     VAL         value to store
128  *     ADDR        address
129  *     START       start address of buffer
130  *     LEN         length of buffer (in bytes)
131  *     INC         address increment (in bytes for IMM, elements for REG)
132  */
133 #define CIRC_STORE_IMM(SIZE, PART, VAL, ADDR, START, LEN, INC) \
134     __asm__( \
135         "r4 = %3\n\t" \
136         "m0 = r4\n\t" \
137         "cs0 = %1\n\t" \
138         "mem" #SIZE "(%0++#" #INC ":circ(M0)) = %2" PART "\n\t" \
139         : "+r"(ADDR) \
140         : "r"(START), "r"(VAL), "r"(LEN) \
141         : "r4", "m0", "cs0", "memory")
142 #define CIRC_STORE_IMM_b(VAL, ADDR, START, LEN, INC) \
143     CIRC_STORE_IMM(b, "", VAL, ADDR, START, LEN, INC)
144 #define CIRC_STORE_IMM_h(VAL, ADDR, START, LEN, INC) \
145     CIRC_STORE_IMM(h, "", VAL, ADDR, START, LEN, INC)
146 #define CIRC_STORE_IMM_f(VAL, ADDR, START, LEN, INC) \
147     CIRC_STORE_IMM(h, ".H", VAL, ADDR, START, LEN, INC)
148 #define CIRC_STORE_IMM_w(VAL, ADDR, START, LEN, INC) \
149     CIRC_STORE_IMM(w, "", VAL, ADDR, START, LEN, INC)
150 #define CIRC_STORE_IMM_d(VAL, ADDR, START, LEN, INC) \
151     CIRC_STORE_IMM(d, "", VAL, ADDR, START, LEN, INC)
152 
153 #define CIRC_STORE_NEW_IMM(SIZE, VAL, ADDR, START, LEN, INC) \
154     __asm__( \
155         "r4 = %3\n\t" \
156         "m0 = r4\n\t" \
157         "cs0 = %1\n\t" \
158         "{\n\t" \
159         "    r5 = %2\n\t" \
160         "    mem" #SIZE "(%0++#" #INC ":circ(M0)) = r5.new\n\t" \
161         "}\n\t" \
162         : "+r"(ADDR) \
163         : "r"(START), "r"(VAL), "r"(LEN) \
164         : "r4", "r5", "m0", "cs0", "memory")
165 #define CIRC_STORE_IMM_bnew(VAL, ADDR, START, LEN, INC) \
166     CIRC_STORE_NEW_IMM(b, VAL, ADDR, START, LEN, INC)
167 #define CIRC_STORE_IMM_hnew(VAL, ADDR, START, LEN, INC) \
168     CIRC_STORE_NEW_IMM(h, VAL, ADDR, START, LEN, INC)
169 #define CIRC_STORE_IMM_wnew(VAL, ADDR, START, LEN, INC) \
170     CIRC_STORE_NEW_IMM(w, VAL, ADDR, START, LEN, INC)
171 
172 #define CIRC_STORE_REG(SIZE, PART, VAL, ADDR, START, LEN, INC) \
173     __asm__( \
174         "r4 = %1\n\t" \
175         "m1 = r4\n\t" \
176         "cs1 = %2\n\t" \
177         "mem" #SIZE "(%0++I:circ(M1)) = %3" PART "\n\t" \
178         : "+r"(ADDR) \
179         : "r"(build_mreg((INC), 0, (LEN))), \
180           "r"(START), \
181           "r"(VAL) \
182         : "r4", "m1", "cs1", "memory")
183 #define CIRC_STORE_REG_b(VAL, ADDR, START, LEN, INC) \
184     CIRC_STORE_REG(b, "", VAL, ADDR, START, LEN, INC)
185 #define CIRC_STORE_REG_h(VAL, ADDR, START, LEN, INC) \
186     CIRC_STORE_REG(h, "", VAL, ADDR, START, LEN, INC)
187 #define CIRC_STORE_REG_f(VAL, ADDR, START, LEN, INC) \
188     CIRC_STORE_REG(h, ".H", VAL, ADDR, START, LEN, INC)
189 #define CIRC_STORE_REG_w(VAL, ADDR, START, LEN, INC) \
190     CIRC_STORE_REG(w, "", VAL, ADDR, START, LEN, INC)
191 #define CIRC_STORE_REG_d(VAL, ADDR, START, LEN, INC) \
192     CIRC_STORE_REG(d, "", VAL, ADDR, START, LEN, INC)
193 
194 #define CIRC_STORE_NEW_REG(SIZE, VAL, ADDR, START, LEN, INC) \
195     __asm__( \
196         "r4 = %1\n\t" \
197         "m1 = r4\n\t" \
198         "cs1 = %2\n\t" \
199         "{\n\t" \
200         "    r5 = %3\n\t" \
201         "    mem" #SIZE "(%0++I:circ(M1)) = r5.new\n\t" \
202         "}\n\t" \
203         : "+r"(ADDR) \
204         : "r"(build_mreg((INC), 0, (LEN))), \
205           "r"(START), \
206           "r"(VAL) \
207         : "r4", "r5", "m1", "cs1", "memory")
208 #define CIRC_STORE_REG_bnew(VAL, ADDR, START, LEN, INC) \
209     CIRC_STORE_NEW_REG(b, VAL, ADDR, START, LEN, INC)
210 #define CIRC_STORE_REG_hnew(VAL, ADDR, START, LEN, INC) \
211     CIRC_STORE_NEW_REG(h, VAL, ADDR, START, LEN, INC)
212 #define CIRC_STORE_REG_wnew(VAL, ADDR, START, LEN, INC) \
213     CIRC_STORE_NEW_REG(w, VAL, ADDR, START, LEN, INC)
214 
215 
216 int err;
217 
218 /* We'll test increments +1 and -1 */
219 void check_load(int i, long long result, int inc, int size)
220 {
221     int expect = (i * inc);
222     while (expect >= size) {
223         expect -= size;
224     }
225     while (expect < 0) {
226         expect += size;
227     }
228     if (result != expect) {
229         printf("ERROR(%d): %lld != %d\n", i, result, expect);
230         err++;
231     }
232 }
233 
234 #define TEST_LOAD_IMM(SZ, TYPE, BUF, BUFSIZE, INC, FMT) \
235 void circ_test_load_imm_##SZ(void) \
236 { \
237     TYPE *p = (TYPE *)BUF; \
238     int size = 10; \
239     int i; \
240     for (i = 0; i < BUFSIZE; i++) { \
241         TYPE element; \
242         CIRC_LOAD_IMM_##SZ(element, p, BUF, size * sizeof(TYPE), (INC)); \
243         DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \
244                      i, p, element); \
245         check_load(i, element, ((INC) / (int)sizeof(TYPE)), size); \
246     } \
247     p = (TYPE *)BUF; \
248     for (i = 0; i < BUFSIZE; i++) { \
249         TYPE element; \
250         CIRC_LOAD_IMM_##SZ(element, p, BUF, size * sizeof(TYPE), -(INC)); \
251         DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \
252                      i, p, element); \
253         check_load(i, element, (-(INC) / (int)sizeof(TYPE)), size); \
254     } \
255 }
256 
257 TEST_LOAD_IMM(b,  char,           bbuf, NBYTES, 1, d)
258 TEST_LOAD_IMM(ub, unsigned char,  bbuf, NBYTES, 1, d)
259 TEST_LOAD_IMM(h,  short,          hbuf, NHALFS, 2, d)
260 TEST_LOAD_IMM(uh, unsigned short, hbuf, NHALFS, 2, d)
261 TEST_LOAD_IMM(w,  int,            wbuf, NWORDS, 4, d)
262 TEST_LOAD_IMM(d,  long long,      dbuf, NDOBLS, 8, lld)
263 
264 #define TEST_LOAD_REG(SZ, TYPE, BUF, BUFSIZE, FMT) \
265 void circ_test_load_reg_##SZ(void) \
266 { \
267     TYPE *p = (TYPE *)BUF; \
268     int size = 13; \
269     int i; \
270     for (i = 0; i < BUFSIZE; i++) { \
271         TYPE element; \
272         CIRC_LOAD_REG_##SZ(element, p, BUF, size * sizeof(TYPE), 1); \
273         DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \
274                      i, p, element); \
275         check_load(i, element, 1, size); \
276     } \
277     p = (TYPE *)BUF; \
278     for (i = 0; i < BUFSIZE; i++) { \
279         TYPE element; \
280         CIRC_LOAD_REG_##SZ(element, p, BUF, size * sizeof(TYPE), -1); \
281         DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \
282                      i, p, element); \
283         check_load(i, element, -1, size); \
284     } \
285 }
286 
287 TEST_LOAD_REG(b,  char,           bbuf, NBYTES, d)
288 TEST_LOAD_REG(ub, unsigned char,  bbuf, NBYTES, d)
289 TEST_LOAD_REG(h,  short,          hbuf, NHALFS, d)
290 TEST_LOAD_REG(uh, unsigned short, hbuf, NHALFS, d)
291 TEST_LOAD_REG(w,  int,            wbuf, NWORDS, d)
292 TEST_LOAD_REG(d,  long long,      dbuf, NDOBLS, lld)
293 
294 /* The circular stores will wrap around somewhere inside the buffer */
295 #define CIRC_VAL(SZ, TYPE, BUFSIZE) \
296 TYPE circ_val_##SZ(int i, int inc, int size) \
297 { \
298     int mod = BUFSIZE % size; \
299     int elem = i * inc; \
300     if (elem < 0) { \
301         if (-elem <= size - mod) { \
302             return (elem + BUFSIZE - mod); \
303         } else { \
304             return (elem + BUFSIZE + size - mod); \
305         } \
306     } else if (elem < mod) {\
307         return (elem + BUFSIZE - mod); \
308     } else { \
309         return (elem + BUFSIZE - size - mod); \
310     } \
311 }
312 
313 CIRC_VAL(b, unsigned char, NBYTES)
314 CIRC_VAL(h, short,         NHALFS)
315 CIRC_VAL(w, int,           NWORDS)
316 CIRC_VAL(d, long long,     NDOBLS)
317 
318 /*
319  * Circular stores should only write to the first "size" elements of the buffer
320  * the remainder of the elements should have BUF[i] == i
321  */
322 #define CHECK_STORE(SZ, BUF, BUFSIZE, FMT) \
323 void check_store_##SZ(int inc, int size) \
324 { \
325     int i; \
326     for (i = 0; i < size; i++) { \
327         DEBUG_PRINTF(#BUF "[%3d] = 0x%02" #FMT ", guess = 0x%02" #FMT "\n", \
328                      i, BUF[i], circ_val_##SZ(i, inc, size)); \
329         if (BUF[i] != circ_val_##SZ(i, inc, size)) { \
330             printf("ERROR(%3d): 0x%02" #FMT " != 0x%02" #FMT "\n", \
331                    i, BUF[i], circ_val_##SZ(i, inc, size)); \
332             err++; \
333         } \
334     } \
335     for (i = size; i < BUFSIZE; i++) { \
336         if (BUF[i] != i) { \
337             printf("ERROR(%3d): 0x%02" #FMT " != 0x%02x\n", i, BUF[i], i); \
338             err++; \
339         } \
340     } \
341 }
342 
343 CHECK_STORE(b, bbuf, NBYTES, x)
344 CHECK_STORE(h, hbuf, NHALFS, x)
345 CHECK_STORE(w, wbuf, NWORDS, x)
346 CHECK_STORE(d, dbuf, NDOBLS, llx)
347 
348 #define CIRC_TEST_STORE_IMM(SZ, CHK, TYPE, BUF, BUFSIZE, SHIFT, INC) \
349 void circ_test_store_imm_##SZ(void) \
350 { \
351     unsigned int size = 27; \
352     TYPE *p = BUF; \
353     TYPE val = 0; \
354     int i; \
355     init_##BUF(); \
356     for (i = 0; i < BUFSIZE; i++) { \
357         CIRC_STORE_IMM_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), INC); \
358         val++; \
359     } \
360     check_store_##CHK(((INC) / (int)sizeof(TYPE)), size); \
361     p = BUF; \
362     val = 0; \
363     init_##BUF(); \
364     for (i = 0; i < BUFSIZE; i++) { \
365         CIRC_STORE_IMM_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), \
366                             -(INC)); \
367         val++; \
368     } \
369     check_store_##CHK((-(INC) / (int)sizeof(TYPE)), size); \
370 }
371 
372 CIRC_TEST_STORE_IMM(b,    b, unsigned char, bbuf, NBYTES, 0,  1)
373 CIRC_TEST_STORE_IMM(h,    h, short,         hbuf, NHALFS, 0,  2)
374 CIRC_TEST_STORE_IMM(f,    h, short,         hbuf, NHALFS, 16, 2)
375 CIRC_TEST_STORE_IMM(w,    w, int,           wbuf, NWORDS, 0,  4)
376 CIRC_TEST_STORE_IMM(d,    d, long long,     dbuf, NDOBLS, 0,  8)
377 CIRC_TEST_STORE_IMM(bnew, b, unsigned char, bbuf, NBYTES, 0,  1)
378 CIRC_TEST_STORE_IMM(hnew, h, short,         hbuf, NHALFS, 0,  2)
379 CIRC_TEST_STORE_IMM(wnew, w, int,           wbuf, NWORDS, 0,  4)
380 
381 #define CIRC_TEST_STORE_REG(SZ, CHK, TYPE, BUF, BUFSIZE, SHIFT) \
382 void circ_test_store_reg_##SZ(void) \
383 { \
384     TYPE *p = BUF; \
385     unsigned int size = 19; \
386     TYPE val = 0; \
387     int i; \
388     init_##BUF(); \
389     for (i = 0; i < BUFSIZE; i++) { \
390         CIRC_STORE_REG_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), 1); \
391         val++; \
392     } \
393     check_store_##CHK(1, size); \
394     p = BUF; \
395     val = 0; \
396     init_##BUF(); \
397     for (i = 0; i < BUFSIZE; i++) { \
398         CIRC_STORE_REG_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), -1); \
399         val++; \
400     } \
401     check_store_##CHK(-1, size); \
402 }
403 
404 CIRC_TEST_STORE_REG(b,    b, unsigned char, bbuf, NBYTES, 0)
405 CIRC_TEST_STORE_REG(h,    h, short,         hbuf, NHALFS, 0)
406 CIRC_TEST_STORE_REG(f,    h, short,         hbuf, NHALFS, 16)
407 CIRC_TEST_STORE_REG(w,    w, int,           wbuf, NWORDS, 0)
408 CIRC_TEST_STORE_REG(d,    d, long long,     dbuf, NDOBLS, 0)
409 CIRC_TEST_STORE_REG(bnew, b, unsigned char, bbuf, NBYTES, 0)
410 CIRC_TEST_STORE_REG(hnew, h, short,         hbuf, NHALFS, 0)
411 CIRC_TEST_STORE_REG(wnew, w, int,           wbuf, NWORDS, 0)
412 
413 /* Test the old scheme used in Hexagon V3 */
414 static void circ_test_v3(void)
415 {
416     int *p = wbuf;
417     int size = 15;
418     /* set high bit in K to test unsigned extract in fcirc */
419     int K = 8;      /* 1024 bytes */
420     int element;
421     int i;
422 
423     init_wbuf();
424 
425     for (i = 0; i < NWORDS; i++) {
426         __asm__(
427             "r4 = %2\n\t"
428             "m1 = r4\n\t"
429             "%0 = memw(%1++I:circ(M1))\n\t"
430             : "=r"(element), "+r"(p)
431             : "r"(build_mreg(1, K, size * sizeof(int)))
432             : "r4", "m1");
433         DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2d\n", i, p, element);
434         check_load(i, element, 1, size);
435     }
436 }
437 
438 int main()
439 {
440     init_bbuf();
441     init_hbuf();
442     init_wbuf();
443     init_dbuf();
444 
445     DEBUG_PRINTF("NBYTES = %d\n", NBYTES);
446     DEBUG_PRINTF("Address of dbuf = 0x%p\n", dbuf);
447     DEBUG_PRINTF("Address of wbuf = 0x%p\n", wbuf);
448     DEBUG_PRINTF("Address of hbuf = 0x%p\n", hbuf);
449     DEBUG_PRINTF("Address of bbuf = 0x%p\n", bbuf);
450 
451     circ_test_load_imm_b();
452     circ_test_load_imm_ub();
453     circ_test_load_imm_h();
454     circ_test_load_imm_uh();
455     circ_test_load_imm_w();
456     circ_test_load_imm_d();
457 
458     circ_test_load_reg_b();
459     circ_test_load_reg_ub();
460     circ_test_load_reg_h();
461     circ_test_load_reg_uh();
462     circ_test_load_reg_w();
463     circ_test_load_reg_d();
464 
465     circ_test_store_imm_b();
466     circ_test_store_imm_h();
467     circ_test_store_imm_f();
468     circ_test_store_imm_w();
469     circ_test_store_imm_d();
470     circ_test_store_imm_bnew();
471     circ_test_store_imm_hnew();
472     circ_test_store_imm_wnew();
473 
474     circ_test_store_reg_b();
475     circ_test_store_reg_h();
476     circ_test_store_reg_f();
477     circ_test_store_reg_w();
478     circ_test_store_reg_d();
479     circ_test_store_reg_bnew();
480     circ_test_store_reg_hnew();
481     circ_test_store_reg_wnew();
482 
483     circ_test_v3();
484 
485     puts(err ? "FAIL" : "PASS");
486     return err ? 1 : 0;
487 }
488