/* * Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #include #define DEBUG 0 #define DEBUG_PRINTF(...) \ do { \ if (DEBUG) { \ printf(__VA_ARGS__); \ } \ } while (0) #define NBYTES (1 << 8) #define NHALFS (NBYTES / sizeof(short)) #define NWORDS (NBYTES / sizeof(int)) #define NDOBLS (NBYTES / sizeof(long long)) long long dbuf[NDOBLS] __attribute__((aligned(1 << 12))) = {0}; int wbuf[NWORDS] __attribute__((aligned(1 << 12))) = {0}; short hbuf[NHALFS] __attribute__((aligned(1 << 12))) = {0}; unsigned char bbuf[NBYTES] __attribute__((aligned(1 << 12))) = {0}; /* * We use the C preporcessor to deal with the combinations of types */ #define INIT(BUF, N) \ void init_##BUF(void) \ { \ int i; \ for (i = 0; i < N; i++) { \ BUF[i] = i; \ } \ } \ INIT(bbuf, NBYTES) INIT(hbuf, NHALFS) INIT(wbuf, NWORDS) INIT(dbuf, NDOBLS) /* * Macros for performing circular load * RES result * ADDR address * START start address of buffer * LEN length of buffer (in bytes) * INC address increment (in bytes for IMM, elements for REG) */ #define CIRC_LOAD_IMM(SIZE, RES, ADDR, START, LEN, INC) \ __asm__( \ "r4 = %3\n\t" \ "m0 = r4\n\t" \ "cs0 = %2\n\t" \ "%0 = mem" #SIZE "(%1++#" #INC ":circ(M0))\n\t" \ : "=r"(RES), "+r"(ADDR) \ : "r"(START), "r"(LEN) \ : "r4", "m0", "cs0") #define CIRC_LOAD_IMM_b(RES, ADDR, START, LEN, INC) \ CIRC_LOAD_IMM(b, RES, ADDR, START, LEN, INC) #define CIRC_LOAD_IMM_ub(RES, ADDR, START, LEN, INC) \ CIRC_LOAD_IMM(ub, RES, ADDR, START, LEN, INC) #define CIRC_LOAD_IMM_h(RES, ADDR, START, LEN, INC) \ CIRC_LOAD_IMM(h, RES, ADDR, START, LEN, INC) #define CIRC_LOAD_IMM_uh(RES, ADDR, START, LEN, INC) \ CIRC_LOAD_IMM(uh, RES, ADDR, START, LEN, INC) #define CIRC_LOAD_IMM_w(RES, ADDR, START, LEN, INC) \ CIRC_LOAD_IMM(w, RES, ADDR, START, LEN, INC) #define CIRC_LOAD_IMM_d(RES, ADDR, START, LEN, INC) \ CIRC_LOAD_IMM(d, RES, ADDR, START, LEN, INC) /* * The mreg has the following pieces * mreg[31:28] increment[10:7] * mreg[27:24] K value (used Hexagon v3 and earlier) * mreg[23:17] increment[6:0] * mreg[16:0] circular buffer length */ static int build_mreg(int inc, int K, int len) { return ((inc & 0x780) << 21) | ((K & 0xf) << 24) | ((inc & 0x7f) << 17) | (len & 0x1ffff); } #define CIRC_LOAD_REG(SIZE, RES, ADDR, START, LEN, INC) \ __asm__( \ "r4 = %2\n\t" \ "m1 = r4\n\t" \ "cs1 = %3\n\t" \ "%0 = mem" #SIZE "(%1++I:circ(M1))\n\t" \ : "=r"(RES), "+r"(ADDR) \ : "r"(build_mreg((INC), 0, (LEN))), \ "r"(START) \ : "r4", "m1", "cs1") #define CIRC_LOAD_REG_b(RES, ADDR, START, LEN, INC) \ CIRC_LOAD_REG(b, RES, ADDR, START, LEN, INC) #define CIRC_LOAD_REG_ub(RES, ADDR, START, LEN, INC) \ CIRC_LOAD_REG(ub, RES, ADDR, START, LEN, INC) #define CIRC_LOAD_REG_h(RES, ADDR, START, LEN, INC) \ CIRC_LOAD_REG(h, RES, ADDR, START, LEN, INC) #define CIRC_LOAD_REG_uh(RES, ADDR, START, LEN, INC) \ CIRC_LOAD_REG(uh, RES, ADDR, START, LEN, INC) #define CIRC_LOAD_REG_w(RES, ADDR, START, LEN, INC) \ CIRC_LOAD_REG(w, RES, ADDR, START, LEN, INC) #define CIRC_LOAD_REG_d(RES, ADDR, START, LEN, INC) \ CIRC_LOAD_REG(d, RES, ADDR, START, LEN, INC) /* * Macros for performing circular store * VAL value to store * ADDR address * START start address of buffer * LEN length of buffer (in bytes) * INC address increment (in bytes for IMM, elements for REG) */ #define CIRC_STORE_IMM(SIZE, PART, VAL, ADDR, START, LEN, INC) \ __asm__( \ "r4 = %3\n\t" \ "m0 = r4\n\t" \ "cs0 = %1\n\t" \ "mem" #SIZE "(%0++#" #INC ":circ(M0)) = %2" PART "\n\t" \ : "+r"(ADDR) \ : "r"(START), "r"(VAL), "r"(LEN) \ : "r4", "m0", "cs0", "memory") #define CIRC_STORE_IMM_b(VAL, ADDR, START, LEN, INC) \ CIRC_STORE_IMM(b, "", VAL, ADDR, START, LEN, INC) #define CIRC_STORE_IMM_h(VAL, ADDR, START, LEN, INC) \ CIRC_STORE_IMM(h, "", VAL, ADDR, START, LEN, INC) #define CIRC_STORE_IMM_f(VAL, ADDR, START, LEN, INC) \ CIRC_STORE_IMM(h, ".H", VAL, ADDR, START, LEN, INC) #define CIRC_STORE_IMM_w(VAL, ADDR, START, LEN, INC) \ CIRC_STORE_IMM(w, "", VAL, ADDR, START, LEN, INC) #define CIRC_STORE_IMM_d(VAL, ADDR, START, LEN, INC) \ CIRC_STORE_IMM(d, "", VAL, ADDR, START, LEN, INC) #define CIRC_STORE_NEW_IMM(SIZE, VAL, ADDR, START, LEN, INC) \ __asm__( \ "r4 = %3\n\t" \ "m0 = r4\n\t" \ "cs0 = %1\n\t" \ "{\n\t" \ " r5 = %2\n\t" \ " mem" #SIZE "(%0++#" #INC ":circ(M0)) = r5.new\n\t" \ "}\n\t" \ : "+r"(ADDR) \ : "r"(START), "r"(VAL), "r"(LEN) \ : "r4", "r5", "m0", "cs0", "memory") #define CIRC_STORE_IMM_bnew(VAL, ADDR, START, LEN, INC) \ CIRC_STORE_NEW_IMM(b, VAL, ADDR, START, LEN, INC) #define CIRC_STORE_IMM_hnew(VAL, ADDR, START, LEN, INC) \ CIRC_STORE_NEW_IMM(h, VAL, ADDR, START, LEN, INC) #define CIRC_STORE_IMM_wnew(VAL, ADDR, START, LEN, INC) \ CIRC_STORE_NEW_IMM(w, VAL, ADDR, START, LEN, INC) #define CIRC_STORE_REG(SIZE, PART, VAL, ADDR, START, LEN, INC) \ __asm__( \ "r4 = %1\n\t" \ "m1 = r4\n\t" \ "cs1 = %2\n\t" \ "mem" #SIZE "(%0++I:circ(M1)) = %3" PART "\n\t" \ : "+r"(ADDR) \ : "r"(build_mreg((INC), 0, (LEN))), \ "r"(START), \ "r"(VAL) \ : "r4", "m1", "cs1", "memory") #define CIRC_STORE_REG_b(VAL, ADDR, START, LEN, INC) \ CIRC_STORE_REG(b, "", VAL, ADDR, START, LEN, INC) #define CIRC_STORE_REG_h(VAL, ADDR, START, LEN, INC) \ CIRC_STORE_REG(h, "", VAL, ADDR, START, LEN, INC) #define CIRC_STORE_REG_f(VAL, ADDR, START, LEN, INC) \ CIRC_STORE_REG(h, ".H", VAL, ADDR, START, LEN, INC) #define CIRC_STORE_REG_w(VAL, ADDR, START, LEN, INC) \ CIRC_STORE_REG(w, "", VAL, ADDR, START, LEN, INC) #define CIRC_STORE_REG_d(VAL, ADDR, START, LEN, INC) \ CIRC_STORE_REG(d, "", VAL, ADDR, START, LEN, INC) #define CIRC_STORE_NEW_REG(SIZE, VAL, ADDR, START, LEN, INC) \ __asm__( \ "r4 = %1\n\t" \ "m1 = r4\n\t" \ "cs1 = %2\n\t" \ "{\n\t" \ " r5 = %3\n\t" \ " mem" #SIZE "(%0++I:circ(M1)) = r5.new\n\t" \ "}\n\t" \ : "+r"(ADDR) \ : "r"(build_mreg((INC), 0, (LEN))), \ "r"(START), \ "r"(VAL) \ : "r4", "r5", "m1", "cs1", "memory") #define CIRC_STORE_REG_bnew(VAL, ADDR, START, LEN, INC) \ CIRC_STORE_NEW_REG(b, VAL, ADDR, START, LEN, INC) #define CIRC_STORE_REG_hnew(VAL, ADDR, START, LEN, INC) \ CIRC_STORE_NEW_REG(h, VAL, ADDR, START, LEN, INC) #define CIRC_STORE_REG_wnew(VAL, ADDR, START, LEN, INC) \ CIRC_STORE_NEW_REG(w, VAL, ADDR, START, LEN, INC) int err; /* We'll test increments +1 and -1 */ void check_load(int i, long long result, int inc, int size) { int expect = (i * inc); while (expect >= size) { expect -= size; } while (expect < 0) { expect += size; } if (result != expect) { printf("ERROR(%d): %lld != %d\n", i, result, expect); err++; } } #define TEST_LOAD_IMM(SZ, TYPE, BUF, BUFSIZE, INC, FMT) \ void circ_test_load_imm_##SZ(void) \ { \ TYPE *p = (TYPE *)BUF; \ int size = 10; \ int i; \ for (i = 0; i < BUFSIZE; i++) { \ TYPE element; \ CIRC_LOAD_IMM_##SZ(element, p, BUF, size * sizeof(TYPE), (INC)); \ DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \ i, p, element); \ check_load(i, element, ((INC) / (int)sizeof(TYPE)), size); \ } \ p = (TYPE *)BUF; \ for (i = 0; i < BUFSIZE; i++) { \ TYPE element; \ CIRC_LOAD_IMM_##SZ(element, p, BUF, size * sizeof(TYPE), -(INC)); \ DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \ i, p, element); \ check_load(i, element, (-(INC) / (int)sizeof(TYPE)), size); \ } \ } TEST_LOAD_IMM(b, char, bbuf, NBYTES, 1, d) TEST_LOAD_IMM(ub, unsigned char, bbuf, NBYTES, 1, d) TEST_LOAD_IMM(h, short, hbuf, NHALFS, 2, d) TEST_LOAD_IMM(uh, unsigned short, hbuf, NHALFS, 2, d) TEST_LOAD_IMM(w, int, wbuf, NWORDS, 4, d) TEST_LOAD_IMM(d, long long, dbuf, NDOBLS, 8, lld) #define TEST_LOAD_REG(SZ, TYPE, BUF, BUFSIZE, FMT) \ void circ_test_load_reg_##SZ(void) \ { \ TYPE *p = (TYPE *)BUF; \ int size = 13; \ int i; \ for (i = 0; i < BUFSIZE; i++) { \ TYPE element; \ CIRC_LOAD_REG_##SZ(element, p, BUF, size * sizeof(TYPE), 1); \ DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \ i, p, element); \ check_load(i, element, 1, size); \ } \ p = (TYPE *)BUF; \ for (i = 0; i < BUFSIZE; i++) { \ TYPE element; \ CIRC_LOAD_REG_##SZ(element, p, BUF, size * sizeof(TYPE), -1); \ DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2" #FMT "\n", \ i, p, element); \ check_load(i, element, -1, size); \ } \ } TEST_LOAD_REG(b, char, bbuf, NBYTES, d) TEST_LOAD_REG(ub, unsigned char, bbuf, NBYTES, d) TEST_LOAD_REG(h, short, hbuf, NHALFS, d) TEST_LOAD_REG(uh, unsigned short, hbuf, NHALFS, d) TEST_LOAD_REG(w, int, wbuf, NWORDS, d) TEST_LOAD_REG(d, long long, dbuf, NDOBLS, lld) /* The circular stores will wrap around somewhere inside the buffer */ #define CIRC_VAL(SZ, TYPE, BUFSIZE) \ TYPE circ_val_##SZ(int i, int inc, int size) \ { \ int mod = BUFSIZE % size; \ int elem = i * inc; \ if (elem < 0) { \ if (-elem <= size - mod) { \ return (elem + BUFSIZE - mod); \ } else { \ return (elem + BUFSIZE + size - mod); \ } \ } else if (elem < mod) {\ return (elem + BUFSIZE - mod); \ } else { \ return (elem + BUFSIZE - size - mod); \ } \ } CIRC_VAL(b, unsigned char, NBYTES) CIRC_VAL(h, short, NHALFS) CIRC_VAL(w, int, NWORDS) CIRC_VAL(d, long long, NDOBLS) /* * Circular stores should only write to the first "size" elements of the buffer * the remainder of the elements should have BUF[i] == i */ #define CHECK_STORE(SZ, BUF, BUFSIZE, FMT) \ void check_store_##SZ(int inc, int size) \ { \ int i; \ for (i = 0; i < size; i++) { \ DEBUG_PRINTF(#BUF "[%3d] = 0x%02" #FMT ", guess = 0x%02" #FMT "\n", \ i, BUF[i], circ_val_##SZ(i, inc, size)); \ if (BUF[i] != circ_val_##SZ(i, inc, size)) { \ printf("ERROR(%3d): 0x%02" #FMT " != 0x%02" #FMT "\n", \ i, BUF[i], circ_val_##SZ(i, inc, size)); \ err++; \ } \ } \ for (i = size; i < BUFSIZE; i++) { \ if (BUF[i] != i) { \ printf("ERROR(%3d): 0x%02" #FMT " != 0x%02x\n", i, BUF[i], i); \ err++; \ } \ } \ } CHECK_STORE(b, bbuf, NBYTES, x) CHECK_STORE(h, hbuf, NHALFS, x) CHECK_STORE(w, wbuf, NWORDS, x) CHECK_STORE(d, dbuf, NDOBLS, llx) #define CIRC_TEST_STORE_IMM(SZ, CHK, TYPE, BUF, BUFSIZE, SHIFT, INC) \ void circ_test_store_imm_##SZ(void) \ { \ unsigned int size = 27; \ TYPE *p = BUF; \ TYPE val = 0; \ int i; \ init_##BUF(); \ for (i = 0; i < BUFSIZE; i++) { \ CIRC_STORE_IMM_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), INC); \ val++; \ } \ check_store_##CHK(((INC) / (int)sizeof(TYPE)), size); \ p = BUF; \ val = 0; \ init_##BUF(); \ for (i = 0; i < BUFSIZE; i++) { \ CIRC_STORE_IMM_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), \ -(INC)); \ val++; \ } \ check_store_##CHK((-(INC) / (int)sizeof(TYPE)), size); \ } CIRC_TEST_STORE_IMM(b, b, unsigned char, bbuf, NBYTES, 0, 1) CIRC_TEST_STORE_IMM(h, h, short, hbuf, NHALFS, 0, 2) CIRC_TEST_STORE_IMM(f, h, short, hbuf, NHALFS, 16, 2) CIRC_TEST_STORE_IMM(w, w, int, wbuf, NWORDS, 0, 4) CIRC_TEST_STORE_IMM(d, d, long long, dbuf, NDOBLS, 0, 8) CIRC_TEST_STORE_IMM(bnew, b, unsigned char, bbuf, NBYTES, 0, 1) CIRC_TEST_STORE_IMM(hnew, h, short, hbuf, NHALFS, 0, 2) CIRC_TEST_STORE_IMM(wnew, w, int, wbuf, NWORDS, 0, 4) #define CIRC_TEST_STORE_REG(SZ, CHK, TYPE, BUF, BUFSIZE, SHIFT) \ void circ_test_store_reg_##SZ(void) \ { \ TYPE *p = BUF; \ unsigned int size = 19; \ TYPE val = 0; \ int i; \ init_##BUF(); \ for (i = 0; i < BUFSIZE; i++) { \ CIRC_STORE_REG_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), 1); \ val++; \ } \ check_store_##CHK(1, size); \ p = BUF; \ val = 0; \ init_##BUF(); \ for (i = 0; i < BUFSIZE; i++) { \ CIRC_STORE_REG_##SZ(val << SHIFT, p, BUF, size * sizeof(TYPE), -1); \ val++; \ } \ check_store_##CHK(-1, size); \ } CIRC_TEST_STORE_REG(b, b, unsigned char, bbuf, NBYTES, 0) CIRC_TEST_STORE_REG(h, h, short, hbuf, NHALFS, 0) CIRC_TEST_STORE_REG(f, h, short, hbuf, NHALFS, 16) CIRC_TEST_STORE_REG(w, w, int, wbuf, NWORDS, 0) CIRC_TEST_STORE_REG(d, d, long long, dbuf, NDOBLS, 0) CIRC_TEST_STORE_REG(bnew, b, unsigned char, bbuf, NBYTES, 0) CIRC_TEST_STORE_REG(hnew, h, short, hbuf, NHALFS, 0) CIRC_TEST_STORE_REG(wnew, w, int, wbuf, NWORDS, 0) /* Test the old scheme used in Hexagon V3 */ static void circ_test_v3(void) { int *p = wbuf; int size = 15; int K = 4; /* 64 bytes */ int element; int i; init_wbuf(); for (i = 0; i < NWORDS; i++) { __asm__( "r4 = %2\n\t" "m1 = r4\n\t" "%0 = memw(%1++I:circ(M1))\n\t" : "=r"(element), "+r"(p) : "r"(build_mreg(1, K, size * sizeof(int))) : "r4", "m1"); DEBUG_PRINTF("i = %2d, p = 0x%p, element = %2d\n", i, p, element); check_load(i, element, 1, size); } } int main() { init_bbuf(); init_hbuf(); init_wbuf(); init_dbuf(); DEBUG_PRINTF("NBYTES = %d\n", NBYTES); DEBUG_PRINTF("Address of dbuf = 0x%p\n", dbuf); DEBUG_PRINTF("Address of wbuf = 0x%p\n", wbuf); DEBUG_PRINTF("Address of hbuf = 0x%p\n", hbuf); DEBUG_PRINTF("Address of bbuf = 0x%p\n", bbuf); circ_test_load_imm_b(); circ_test_load_imm_ub(); circ_test_load_imm_h(); circ_test_load_imm_uh(); circ_test_load_imm_w(); circ_test_load_imm_d(); circ_test_load_reg_b(); circ_test_load_reg_ub(); circ_test_load_reg_h(); circ_test_load_reg_uh(); circ_test_load_reg_w(); circ_test_load_reg_d(); circ_test_store_imm_b(); circ_test_store_imm_h(); circ_test_store_imm_f(); circ_test_store_imm_w(); circ_test_store_imm_d(); circ_test_store_imm_bnew(); circ_test_store_imm_hnew(); circ_test_store_imm_wnew(); circ_test_store_reg_b(); circ_test_store_reg_h(); circ_test_store_reg_f(); circ_test_store_reg_w(); circ_test_store_reg_d(); circ_test_store_reg_bnew(); circ_test_store_reg_hnew(); circ_test_store_reg_wnew(); circ_test_v3(); puts(err ? "FAIL" : "PASS"); return err ? 1 : 0; }