xref: /qemu/tests/tcg/hexagon/circ.c (revision 60f782b6)
1 /*
2  *  Copyright(c) 2019-2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
3  *
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include <stdio.h>
19 #include <stdint.h>
20 
21 int err;
22 
23 #include "hex_test.h"
24 
25 #define DEBUG          0
26 #define DEBUG_PRINTF(...) \
27     do { \
28         if (DEBUG) { \
29             printf(__VA_ARGS__); \
30         } \
31     } while (0)
32 
33 
34 #define NBYTES         (1 << 8)
35 #define NHALFS         (NBYTES / sizeof(short))
36 #define NWORDS         (NBYTES / sizeof(int))
37 #define NDOBLS         (NBYTES / sizeof(long long))
38 
39 int64_t       dbuf[NDOBLS] __attribute__((aligned(1 << 12))) = {0};
40 int32_t       wbuf[NWORDS] __attribute__((aligned(1 << 12))) = {0};
41 int16_t       hbuf[NHALFS] __attribute__((aligned(1 << 12))) = {0};
42 uint8_t       bbuf[NBYTES] __attribute__((aligned(1 << 12))) = {0};
43 
44 /*
45  * We use the C preporcessor to deal with the combinations of types
46  */
47 
48 #define INIT(BUF, N) \
49     void init_##BUF(void) \
50     { \
51         for (int i = 0; i < N; i++) { \
52             BUF[i] = i; \
53         } \
54     } \
55 
56 INIT(bbuf, NBYTES)
57 INIT(hbuf, NHALFS)
58 INIT(wbuf, NWORDS)
59 INIT(dbuf, NDOBLS)
60 
61 /*
62  * Macros for performing circular load
63  *     RES         result
64  *     ADDR        address
65  *     START       start address of buffer
66  *     LEN         length of buffer (in bytes)
67  *     INC         address increment (in bytes for IMM, elements for REG)
68  */
69 #define CIRC_LOAD_IMM(SIZE, RES, ADDR, START, LEN, INC) \
70     __asm__( \
71         "r4 = %3\n\t" \
72         "m0 = r4\n\t" \
73         "cs0 = %2\n\t" \
74         "%0 = mem" #SIZE "(%1++#" #INC ":circ(M0))\n\t" \
75         : "=r"(RES), "+r"(ADDR) \
76         : "r"(START), "r"(LEN) \
77         : "r4", "m0", "cs0")
78 #define CIRC_LOAD_IMM_b(RES, ADDR, START, LEN, INC) \
79     CIRC_LOAD_IMM(b, RES, ADDR, START, LEN, INC)
80 #define CIRC_LOAD_IMM_ub(RES, ADDR, START, LEN, INC) \
81     CIRC_LOAD_IMM(ub, RES, ADDR, START, LEN, INC)
82 #define CIRC_LOAD_IMM_h(RES, ADDR, START, LEN, INC) \
83     CIRC_LOAD_IMM(h, RES, ADDR, START, LEN, INC)
84 #define CIRC_LOAD_IMM_uh(RES, ADDR, START, LEN, INC) \
85     CIRC_LOAD_IMM(uh, RES, ADDR, START, LEN, INC)
86 #define CIRC_LOAD_IMM_w(RES, ADDR, START, LEN, INC) \
87     CIRC_LOAD_IMM(w, RES, ADDR, START, LEN, INC)
88 #define CIRC_LOAD_IMM_d(RES, ADDR, START, LEN, INC) \
89     CIRC_LOAD_IMM(d, RES, ADDR, START, LEN, INC)
90 
91 /*
92  * The mreg has the following pieces
93  *     mreg[31:28]              increment[10:7]
94  *     mreg[27:24]              K value (used Hexagon v3 and earlier)
95  *     mreg[23:17]              increment[6:0]
96  *     mreg[16:0]               circular buffer length
97  */
98 static int32_t build_mreg(int32_t inc, int32_t K, int32_t len)
99 {
100     return ((inc & 0x780) << 21) |
101            ((K & 0xf) << 24) |
102            ((inc & 0x7f) << 17) |
103            (len & 0x1ffff);
104 }
105 
106 #define CIRC_LOAD_REG(SIZE, RES, ADDR, START, LEN, INC) \
107     __asm__( \
108         "r4 = %2\n\t" \
109         "m1 = r4\n\t" \
110         "cs1 = %3\n\t" \
111         "%0 = mem" #SIZE "(%1++I:circ(M1))\n\t" \
112         : "=r"(RES), "+r"(ADDR) \
113         : "r"(build_mreg((INC), 0, (LEN))), \
114           "r"(START) \
115         : "r4", "m1", "cs1")
116 #define CIRC_LOAD_REG_b(RES, ADDR, START, LEN, INC) \
117     CIRC_LOAD_REG(b, RES, ADDR, START, LEN, INC)
118 #define CIRC_LOAD_REG_ub(RES, ADDR, START, LEN, INC) \
119     CIRC_LOAD_REG(ub, RES, ADDR, START, LEN, INC)
120 #define CIRC_LOAD_REG_h(RES, ADDR, START, LEN, INC) \
121     CIRC_LOAD_REG(h, RES, ADDR, START, LEN, INC)
122 #define CIRC_LOAD_REG_uh(RES, ADDR, START, LEN, INC) \
123     CIRC_LOAD_REG(uh, RES, ADDR, START, LEN, INC)
124 #define CIRC_LOAD_REG_w(RES, ADDR, START, LEN, INC) \
125     CIRC_LOAD_REG(w, RES, ADDR, START, LEN, INC)
126 #define CIRC_LOAD_REG_d(RES, ADDR, START, LEN, INC) \
127     CIRC_LOAD_REG(d, RES, ADDR, START, LEN, INC)
128 
129 /*
130  * Macros for performing circular store
131  *     VAL         value to store
132  *     ADDR        address
133  *     START       start address of buffer
134  *     LEN         length of buffer (in bytes)
135  *     INC         address increment (in bytes for IMM, elements for REG)
136  */
137 #define CIRC_STORE_IMM(SIZE, PART, VAL, ADDR, START, LEN, INC) \
138     __asm__( \
139         "r4 = %3\n\t" \
140         "m0 = r4\n\t" \
141         "cs0 = %1\n\t" \
142         "mem" #SIZE "(%0++#" #INC ":circ(M0)) = %2" PART "\n\t" \
143         : "+r"(ADDR) \
144         : "r"(START), "r"(VAL), "r"(LEN) \
145         : "r4", "m0", "cs0", "memory")
146 #define CIRC_STORE_IMM_b(VAL, ADDR, START, LEN, INC) \
147     CIRC_STORE_IMM(b, "", VAL, ADDR, START, LEN, INC)
148 #define CIRC_STORE_IMM_h(VAL, ADDR, START, LEN, INC) \
149     CIRC_STORE_IMM(h, "", VAL, ADDR, START, LEN, INC)
150 #define CIRC_STORE_IMM_f(VAL, ADDR, START, LEN, INC) \
151     CIRC_STORE_IMM(h, ".H", VAL, ADDR, START, LEN, INC)
152 #define CIRC_STORE_IMM_w(VAL, ADDR, START, LEN, INC) \
153     CIRC_STORE_IMM(w, "", VAL, ADDR, START, LEN, INC)
154 #define CIRC_STORE_IMM_d(VAL, ADDR, START, LEN, INC) \
155     CIRC_STORE_IMM(d, "", VAL, ADDR, START, LEN, INC)
156 
157 #define CIRC_STORE_NEW_IMM(SIZE, VAL, ADDR, START, LEN, INC) \
158     __asm__( \
159         "r4 = %3\n\t" \
160         "m0 = r4\n\t" \
161         "cs0 = %1\n\t" \
162         "{\n\t" \
163         "    r5 = %2\n\t" \
164         "    mem" #SIZE "(%0++#" #INC ":circ(M0)) = r5.new\n\t" \
165         "}\n\t" \
166         : "+r"(ADDR) \
167         : "r"(START), "r"(VAL), "r"(LEN) \
168         : "r4", "r5", "m0", "cs0", "memory")
169 #define CIRC_STORE_IMM_bnew(VAL, ADDR, START, LEN, INC) \
170     CIRC_STORE_NEW_IMM(b, VAL, ADDR, START, LEN, INC)
171 #define CIRC_STORE_IMM_hnew(VAL, ADDR, START, LEN, INC) \
172     CIRC_STORE_NEW_IMM(h, VAL, ADDR, START, LEN, INC)
173 #define CIRC_STORE_IMM_wnew(VAL, ADDR, START, LEN, INC) \
174     CIRC_STORE_NEW_IMM(w, VAL, ADDR, START, LEN, INC)
175 
176 #define CIRC_STORE_REG(SIZE, PART, VAL, ADDR, START, LEN, INC) \
177     __asm__( \
178         "r4 = %1\n\t" \
179         "m1 = r4\n\t" \
180         "cs1 = %2\n\t" \
181         "mem" #SIZE "(%0++I:circ(M1)) = %3" PART "\n\t" \
182         : "+r"(ADDR) \
183         : "r"(build_mreg((INC), 0, (LEN))), \
184           "r"(START), \
185           "r"(VAL) \
186         : "r4", "m1", "cs1", "memory")
187 #define CIRC_STORE_REG_b(VAL, ADDR, START, LEN, INC) \
188     CIRC_STORE_REG(b, "", VAL, ADDR, START, LEN, INC)
189 #define CIRC_STORE_REG_h(VAL, ADDR, START, LEN, INC) \
190     CIRC_STORE_REG(h, "", VAL, ADDR, START, LEN, INC)
191 #define CIRC_STORE_REG_f(VAL, ADDR, START, LEN, INC) \
192     CIRC_STORE_REG(h, ".H", VAL, ADDR, START, LEN, INC)
193 #define CIRC_STORE_REG_w(VAL, ADDR, START, LEN, INC) \
194     CIRC_STORE_REG(w, "", VAL, ADDR, START, LEN, INC)
195 #define CIRC_STORE_REG_d(VAL, ADDR, START, LEN, INC) \
196     CIRC_STORE_REG(d, "", VAL, ADDR, START, LEN, INC)
197 
198 #define CIRC_STORE_NEW_REG(SIZE, VAL, ADDR, START, LEN, INC) \
199     __asm__( \
200         "r4 = %1\n\t" \
201         "m1 = r4\n\t" \
202         "cs1 = %2\n\t" \
203         "{\n\t" \
204         "    r5 = %3\n\t" \
205         "    mem" #SIZE "(%0++I:circ(M1)) = r5.new\n\t" \
206         "}\n\t" \
207         : "+r"(ADDR) \
208         : "r"(build_mreg((INC), 0, (LEN))), \
209           "r"(START), \
210           "r"(VAL) \
211         : "r4", "r5", "m1", "cs1", "memory")
212 #define CIRC_STORE_REG_bnew(VAL, ADDR, START, LEN, INC) \
213     CIRC_STORE_NEW_REG(b, VAL, ADDR, START, LEN, INC)
214 #define CIRC_STORE_REG_hnew(VAL, ADDR, START, LEN, INC) \
215     CIRC_STORE_NEW_REG(h, VAL, ADDR, START, LEN, INC)
216 #define CIRC_STORE_REG_wnew(VAL, ADDR, START, LEN, INC) \
217     CIRC_STORE_NEW_REG(w, VAL, ADDR, START, LEN, INC)
218 
219 
220 /* We'll test increments +1 and -1 */
221 void __check_load(int line, int32_t i, int64_t res, int32_t inc, int32_t size)
222 {
223     int32_t expect = (i * inc);
224     while (expect >= size) {
225         expect -= size;
226     }
227     while (expect < 0) {
228         expect += size;
229     }
230     __check32(line, res, expect);
231 }
232 
233 #define check_load(I, RES, INC, SZ) __check_load(__LINE__, I, RES, INC, SZ)
234 
235 #define TEST_LOAD_IMM(SZ, TYPE, BUF, BUFSIZE, INC, FMT) \
236 void circ_test_load_imm_##SZ(void) \
237 { \
238     TYPE *p = (TYPE *)BUF; \
239     int32_t size = 10; \
240     for (int i = 0; i < BUFSIZE; i++) { \
241         TYPE element; \
242         CIRC_LOAD_IMM_##SZ(element, p, BUF, size * sizeof(TYPE), (INC)); \
243         DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \
244                      i, p, element); \
245         check_load(i, element, ((INC) / (int)sizeof(TYPE)), size); \
246     } \
247     p = (TYPE *)BUF; \
248     for (int i = 0; i < BUFSIZE; i++) { \
249         TYPE element; \
250         CIRC_LOAD_IMM_##SZ(element, p, BUF, size * sizeof(TYPE), -(INC)); \
251         DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \
252                      i, p, element); \
253         check_load(i, element, (-(INC) / (int)sizeof(TYPE)), size); \
254     } \
255 }
256 
257 TEST_LOAD_IMM(b,  int8_t,         bbuf, NBYTES, 1, d)
258 TEST_LOAD_IMM(ub, uint8_t,        bbuf, NBYTES, 1, d)
259 TEST_LOAD_IMM(h,  int16_t,        hbuf, NHALFS, 2, d)
260 TEST_LOAD_IMM(uh, uint16_t,       hbuf, NHALFS, 2, d)
261 TEST_LOAD_IMM(w,  int32_t,        wbuf, NWORDS, 4, d)
262 TEST_LOAD_IMM(d,  int64_t,        dbuf, NDOBLS, 8, lld)
263 
264 #define TEST_LOAD_REG(SZ, TYPE, BUF, BUFSIZE, FMT) \
265 void circ_test_load_reg_##SZ(void) \
266 { \
267     TYPE *p = (TYPE *)BUF; \
268     int32_t size = 13; \
269     for (int i = 0; i < BUFSIZE; i++) { \
270         TYPE element; \
271         CIRC_LOAD_REG_##SZ(element, p, BUF, size * sizeof(TYPE), 1); \
272         DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \
273                      i, p, element); \
274         check_load(i, element, 1, size); \
275     } \
276     p = (TYPE *)BUF; \
277     for (int i = 0; i < BUFSIZE; i++) { \
278         TYPE element; \
279         CIRC_LOAD_REG_##SZ(element, p, BUF, size * sizeof(TYPE), -1); \
280         DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \
281                      i, p, element); \
282         check_load(i, element, -1, size); \
283     } \
284 }
285 
286 TEST_LOAD_REG(b,  int8_t,         bbuf, NBYTES, d)
287 TEST_LOAD_REG(ub, uint8_t,        bbuf, NBYTES, d)
288 TEST_LOAD_REG(h,  int16_t,        hbuf, NHALFS, d)
289 TEST_LOAD_REG(uh, uint16_t,       hbuf, NHALFS, d)
290 TEST_LOAD_REG(w,  int32_t,        wbuf, NWORDS, d)
291 TEST_LOAD_REG(d,  int64_t,        dbuf, NDOBLS, lld)
292 
293 /* The circular stores will wrap around somewhere inside the buffer */
294 #define CIRC_VAL(SZ, TYPE, BUFSIZE) \
295 TYPE circ_val_##SZ(int i, int32_t inc, int32_t size) \
296 { \
297     int mod = BUFSIZE % size; \
298     int elem = i * inc; \
299     if (elem < 0) { \
300         if (-elem <= size - mod) { \
301             return (elem + BUFSIZE - mod); \
302         } else { \
303             return (elem + BUFSIZE + size - mod); \
304         } \
305     } else if (elem < mod) {\
306         return (elem + BUFSIZE - mod); \
307     } else { \
308         return (elem + BUFSIZE - size - mod); \
309     } \
310 }
311 
312 CIRC_VAL(b, uint8_t,       NBYTES)
313 CIRC_VAL(h, int16_t,       NHALFS)
314 CIRC_VAL(w, int32_t,       NWORDS)
315 CIRC_VAL(d, int64_t,       NDOBLS)
316 
317 /*
318  * Circular stores should only write to the first "size" elements of the buffer
319  * the remainder of the elements should have BUF[i] == i
320  */
321 #define CHECK_STORE(SZ, BUF, BUFSIZE, FMT) \
322 void check_store_##SZ(int32_t inc, int32_t size) \
323 { \
324     for (int i = 0; i < size; i++) { \
325         DEBUG_PRINTF(#BUF "[%3d] = 0x%02" #FMT ", guess = 0x%02" #FMT "\n", \
326                      i, BUF[i], circ_val_##SZ(i, inc, size)); \
327         check64(BUF[i], circ_val_##SZ(i, inc, size)); \
328     } \
329     for (int i = size; i < BUFSIZE; i++) { \
330         check64(BUF[i], i); \
331     } \
332 }
333 
334 CHECK_STORE(b, bbuf, NBYTES, x)
335 CHECK_STORE(h, hbuf, NHALFS, x)
336 CHECK_STORE(w, wbuf, NWORDS, x)
337 CHECK_STORE(d, dbuf, NDOBLS, llx)
338 
339 #define CIRC_TEST_STORE_IMM(SZ, CHK, TYPE, BUF, BUFSIZE, SHIFT, INC) \
340 void circ_test_store_imm_##SZ(void) \
341 { \
342     uint32_t size = 27; \
343     TYPE *p = BUF; \
344     TYPE val = 0; \
345     init_##BUF(); \
346     for (int i = 0; i < BUFSIZE; i++) { \
347         CIRC_STORE_IMM_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), INC); \
348         val++; \
349     } \
350     check_store_##CHK(((INC) / (int)sizeof(TYPE)), size); \
351     p = BUF; \
352     val = 0; \
353     init_##BUF(); \
354     for (int i = 0; i < BUFSIZE; i++) { \
355         CIRC_STORE_IMM_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), \
356                             -(INC)); \
357         val++; \
358     } \
359     check_store_##CHK((-(INC) / (int)sizeof(TYPE)), size); \
360 }
361 
362 CIRC_TEST_STORE_IMM(b,    b, uint8_t,       bbuf, NBYTES, 0,  1)
363 CIRC_TEST_STORE_IMM(h,    h, int16_t,       hbuf, NHALFS, 0,  2)
364 CIRC_TEST_STORE_IMM(f,    h, int16_t,       hbuf, NHALFS, 16, 2)
365 CIRC_TEST_STORE_IMM(w,    w, int32_t,       wbuf, NWORDS, 0,  4)
366 CIRC_TEST_STORE_IMM(d,    d, int64_t,       dbuf, NDOBLS, 0,  8)
367 CIRC_TEST_STORE_IMM(bnew, b, uint8_t,       bbuf, NBYTES, 0,  1)
368 CIRC_TEST_STORE_IMM(hnew, h, int16_t,       hbuf, NHALFS, 0,  2)
369 CIRC_TEST_STORE_IMM(wnew, w, int32_t,       wbuf, NWORDS, 0,  4)
370 
371 #define CIRC_TEST_STORE_REG(SZ, CHK, TYPE, BUF, BUFSIZE, SHIFT) \
372 void circ_test_store_reg_##SZ(void) \
373 { \
374     TYPE *p = BUF; \
375     uint32_t size = 19; \
376     TYPE val = 0; \
377     init_##BUF(); \
378     for (int i = 0; i < BUFSIZE; i++) { \
379         CIRC_STORE_REG_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), 1); \
380         val++; \
381     } \
382     check_store_##CHK(1, size); \
383     p = BUF; \
384     val = 0; \
385     init_##BUF(); \
386     for (int i = 0; i < BUFSIZE; i++) { \
387         CIRC_STORE_REG_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), -1); \
388         val++; \
389     } \
390     check_store_##CHK(-1, size); \
391 }
392 
393 CIRC_TEST_STORE_REG(b,    b, uint8_t,       bbuf, NBYTES, 0)
394 CIRC_TEST_STORE_REG(h,    h, int16_t,       hbuf, NHALFS, 0)
395 CIRC_TEST_STORE_REG(f,    h, int16_t,       hbuf, NHALFS, 16)
396 CIRC_TEST_STORE_REG(w,    w, int32_t,       wbuf, NWORDS, 0)
397 CIRC_TEST_STORE_REG(d,    d, int64_t,       dbuf, NDOBLS, 0)
398 CIRC_TEST_STORE_REG(bnew, b, uint8_t,       bbuf, NBYTES, 0)
399 CIRC_TEST_STORE_REG(hnew, h, int16_t,       hbuf, NHALFS, 0)
400 CIRC_TEST_STORE_REG(wnew, w, int32_t,       wbuf, NWORDS, 0)
401 
402 /* Test the old scheme used in Hexagon V3 */
403 static void circ_test_v3(void)
404 {
405     int *p = wbuf;
406     int32_t size = 15;
407     /* set high bit in K to test unsigned extract in fcirc */
408     int32_t K = 8;      /* 1024 bytes */
409     int32_t element;
410 
411     init_wbuf();
412 
413     for (int i = 0; i < NWORDS; i++) {
414         __asm__(
415             "r4 = %2\n\t"
416             "m1 = r4\n\t"
417             "%0 = memw(%1++I:circ(M1))\n\t"
418             : "=r"(element), "+r"(p)
419             : "r"(build_mreg(1, K, size * sizeof(int)))
420             : "r4", "m1");
421         DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2d\n", i, p, element);
422         check_load(i, element, 1, size);
423     }
424 }
425 
426 int main()
427 {
428     init_bbuf();
429     init_hbuf();
430     init_wbuf();
431     init_dbuf();
432 
433     DEBUG_PRINTF("NBYTES = %d\n", NBYTES);
434     DEBUG_PRINTF("Address of dbuf = 0x%p\n", dbuf);
435     DEBUG_PRINTF("Address of wbuf = 0x%p\n", wbuf);
436     DEBUG_PRINTF("Address of hbuf = 0x%p\n", hbuf);
437     DEBUG_PRINTF("Address of bbuf = 0x%p\n", bbuf);
438 
439     circ_test_load_imm_b();
440     circ_test_load_imm_ub();
441     circ_test_load_imm_h();
442     circ_test_load_imm_uh();
443     circ_test_load_imm_w();
444     circ_test_load_imm_d();
445 
446     circ_test_load_reg_b();
447     circ_test_load_reg_ub();
448     circ_test_load_reg_h();
449     circ_test_load_reg_uh();
450     circ_test_load_reg_w();
451     circ_test_load_reg_d();
452 
453     circ_test_store_imm_b();
454     circ_test_store_imm_h();
455     circ_test_store_imm_f();
456     circ_test_store_imm_w();
457     circ_test_store_imm_d();
458     circ_test_store_imm_bnew();
459     circ_test_store_imm_hnew();
460     circ_test_store_imm_wnew();
461 
462     circ_test_store_reg_b();
463     circ_test_store_reg_h();
464     circ_test_store_reg_f();
465     circ_test_store_reg_w();
466     circ_test_store_reg_d();
467     circ_test_store_reg_bnew();
468     circ_test_store_reg_hnew();
469     circ_test_store_reg_wnew();
470 
471     circ_test_v3();
472 
473     puts(err ? "FAIL" : "PASS");
474     return err ? 1 : 0;
475 }
476