1 /* { dg-do compile } */ 2 /* { dg-options "-O2 -ftree-vectorize -fno-trapping-math -ffinite-math-only" } */ 3 4 #include <stdint.h> 5 6 #define add(A, B) ((A) + (B)) 7 #define sub(A, B) ((A) - (B)) 8 #define mul(A, B) ((A) * (B)) 9 #define div(A, B) ((A) / (B)) 10 #define max(A, B) ((A) > (B) ? (A) : (B)) 11 #define min(A, B) ((A) < (B) ? (A) : (B)) 12 #define and(A, B) ((A) & (B)) 13 #define ior(A, B) ((A) | (B)) 14 #define xor(A, B) ((A) ^ (B)) 15 16 #define DEF_LOOP(TYPE, CMPTYPE, OP) \ 17 void __attribute__((noipa)) \ 18 f_##OP##_##TYPE (TYPE *restrict dest, CMPTYPE *restrict cond, \ 19 CMPTYPE limit, TYPE *restrict src1, \ 20 TYPE *restrict src2, unsigned int n) \ 21 { \ 22 for (unsigned int i = 0; i < n; ++i) \ 23 { \ 24 TYPE truev = OP (src1[i], src2[i]); \ 25 dest[i] = cond[i] < limit ? truev : src2[i]; \ 26 } \ 27 } 28 29 #define FOR_EACH_INT_TYPE(T, TYPE) \ 30 T (TYPE, TYPE, add) \ 31 T (TYPE, TYPE, sub) \ 32 T (TYPE, TYPE, mul) \ 33 T (TYPE, TYPE, max) \ 34 T (TYPE, TYPE, min) \ 35 T (TYPE, TYPE, and) \ 36 T (TYPE, TYPE, ior) \ 37 T (TYPE, TYPE, xor) 38 39 #define FOR_EACH_FP_TYPE(T, TYPE, CMPTYPE, SUFFIX) \ 40 T (TYPE, CMPTYPE, add) \ 41 T (TYPE, CMPTYPE, sub) \ 42 T (TYPE, CMPTYPE, mul) \ 43 T (TYPE, CMPTYPE, div) \ 44 T (TYPE, CMPTYPE, __builtin_fmax##SUFFIX) \ 45 T (TYPE, CMPTYPE, __builtin_fmin##SUFFIX) 46 47 #define FOR_EACH_LOOP(T) \ 48 FOR_EACH_INT_TYPE (T, int8_t) \ 49 FOR_EACH_INT_TYPE (T, int16_t) \ 50 FOR_EACH_INT_TYPE (T, int32_t) \ 51 FOR_EACH_INT_TYPE (T, int64_t) \ 52 FOR_EACH_INT_TYPE (T, uint8_t) \ 53 FOR_EACH_INT_TYPE (T, uint16_t) \ 54 FOR_EACH_INT_TYPE (T, uint32_t) \ 55 FOR_EACH_INT_TYPE (T, uint64_t) \ 56 FOR_EACH_FP_TYPE (T, _Float16, uint16_t, f16) \ 57 FOR_EACH_FP_TYPE (T, float, float, f32) \ 58 FOR_EACH_FP_TYPE (T, double, double, f64) 59 60 FOR_EACH_LOOP (DEF_LOOP) 61 62 /* { dg-final { scan-assembler-not {\tsel\t} } } */ 63 /* { dg-final { scan-assembler-not {\tmov\tz[0-9]+\.., z[0-9]+} } } */ 64 65 /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ 66 /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ 67 /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ 68 /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ 69 70 /* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ 71 /* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ 72 /* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ 73 /* { dg-final { scan-assembler-times {\tsubr\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ 74 75 /* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ 76 /* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ 77 /* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ 78 /* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ 79 80 /* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ 81 /* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ 82 /* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ 83 /* { dg-final { scan-assembler-times {\tsmax\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ 84 85 /* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ 86 /* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ 87 /* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ 88 /* { dg-final { scan-assembler-times {\tsmin\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ 89 90 /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ 91 /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ 92 /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ 93 /* { dg-final { scan-assembler-times {\tumax\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ 94 95 /* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.b, p[0-7]/m,} 1 } } */ 96 /* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ 97 /* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ 98 /* { dg-final { scan-assembler-times {\tumin\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ 99 100 /* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ 101 /* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ 102 /* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ 103 /* { dg-final { scan-assembler-times {\tand\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ 104 105 /* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ 106 /* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ 107 /* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ 108 /* { dg-final { scan-assembler-times {\torr\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ 109 110 /* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.b, p[0-7]/m,} 2 } } */ 111 /* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.h, p[0-7]/m,} 2 } } */ 112 /* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m,} 2 } } */ 113 /* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m,} 2 } } */ 114 115 /* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ 116 /* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ 117 /* { dg-final { scan-assembler-times {\tfadd\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ 118 119 /* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ 120 /* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ 121 /* { dg-final { scan-assembler-times {\tfsubr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ 122 123 /* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ 124 /* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ 125 /* { dg-final { scan-assembler-times {\tfmul\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ 126 127 /* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ 128 /* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ 129 /* { dg-final { scan-assembler-times {\tfdivr\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ 130 131 /* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ 132 /* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ 133 /* { dg-final { scan-assembler-times {\tfmaxnm\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ 134 135 /* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.h, p[0-7]/m,} 1 } } */ 136 /* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ 137 /* { dg-final { scan-assembler-times {\tfminnm\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ 138