1 /* { dg-do assemble { target aarch64_asm_sve_ok } } */
2 /* { dg-options "-O2 -ftree-vectorize --save-temps" } */
3 
4 #include <stdint.h>
5 
6 #ifndef TYPE
7 #define TYPE uint8_t
8 #define ITYPE int8_t
9 #endif
10 
11 void __attribute__ ((noinline, noclone))
f2(TYPE * __restrict a,TYPE * __restrict b,TYPE * __restrict c,ITYPE n)12 f2 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c, ITYPE n)
13 {
14   for (ITYPE i = 0; i < n; ++i)
15     {
16       a[i] = c[i * 2];
17       b[i] = c[i * 2 + 1];
18     }
19 }
20 
21 void __attribute__ ((noinline, noclone))
f3(TYPE * __restrict a,TYPE * __restrict b,TYPE * __restrict c,TYPE * __restrict d,ITYPE n)22 f3 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
23     TYPE *__restrict d, ITYPE n)
24 {
25   for (ITYPE i = 0; i < n; ++i)
26     {
27       a[i] = d[i * 3];
28       b[i] = d[i * 3 + 1];
29       c[i] = d[i * 3 + 2];
30     }
31 }
32 
33 void __attribute__ ((noinline, noclone))
f4(TYPE * __restrict a,TYPE * __restrict b,TYPE * __restrict c,TYPE * __restrict d,TYPE * __restrict e,ITYPE n)34 f4 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
35     TYPE *__restrict d, TYPE *__restrict e, ITYPE n)
36 {
37   for (ITYPE i = 0; i < n; ++i)
38     {
39       a[i] = e[i * 4];
40       b[i] = e[i * 4 + 1];
41       c[i] = e[i * 4 + 2];
42       d[i] = e[i * 4 + 3];
43     }
44 }
45 
46 void __attribute__ ((noinline, noclone))
g2(TYPE * __restrict a,TYPE * __restrict b,TYPE * __restrict c,ITYPE n)47 g2 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c, ITYPE n)
48 {
49   for (ITYPE i = 0; i < n; ++i)
50     {
51       c[i * 2] = a[i];
52       c[i * 2 + 1] = b[i];
53     }
54 }
55 
56 void __attribute__ ((noinline, noclone))
g3(TYPE * __restrict a,TYPE * __restrict b,TYPE * __restrict c,TYPE * __restrict d,ITYPE n)57 g3 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
58     TYPE *__restrict d, ITYPE n)
59 {
60   for (ITYPE i = 0; i < n; ++i)
61     {
62       d[i * 3] = a[i];
63       d[i * 3 + 1] = b[i];
64       d[i * 3 + 2] = c[i];
65     }
66 }
67 
68 void __attribute__ ((noinline, noclone))
g4(TYPE * __restrict a,TYPE * __restrict b,TYPE * __restrict c,TYPE * __restrict d,TYPE * __restrict e,ITYPE n)69 g4 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
70     TYPE *__restrict d, TYPE *__restrict e, ITYPE n)
71 {
72   for (ITYPE i = 0; i < n; ++i)
73     {
74       e[i * 4] = a[i];
75       e[i * 4 + 1] = b[i];
76       e[i * 4 + 2] = c[i];
77       e[i * 4 + 3] = d[i];
78     }
79 }
80 
81 /* { dg-final { scan-assembler {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} } } */
82 /* { dg-final { scan-assembler {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */
83 /* { dg-final { scan-assembler {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} } } */
84 /* { dg-final { scan-assembler {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} } } */
85 /* { dg-final { scan-assembler {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */
86 /* { dg-final { scan-assembler {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} } } */
87