1 /* { dg-do compile { target lp64 } } */
2 /* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */
3 /* { dg-final { check-function-bodies "**" "" } } */
4
5 #pragma GCC aarch64 "arm_sve.h"
6
7 /*
8 ** callee1:
9 ** ptrue p3\.b, all
10 ** ...
11 ** ld1w (z[0-9]+\.s), p3/z, \[x1, #3, mul vl\]
12 ** ...
13 ** st4w {z[0-9]+\.s - \1\}, p0, \[x0\]
14 ** st2w {z3\.s - z4\.s}, p1, \[x0\]
15 ** st3w {z5\.s - z7\.s}, p2, \[x0\]
16 ** ret
17 */
18 void __attribute__((noipa))
callee1(void * x0,svint32x3_t z0,svint32x2_t z3,svint32x3_t z5,svint32x4_t stack1,svint32_t stack2,svbool_t p0,svbool_t p1,svbool_t p2)19 callee1 (void *x0, svint32x3_t z0, svint32x2_t z3, svint32x3_t z5,
20 svint32x4_t stack1, svint32_t stack2, svbool_t p0,
21 svbool_t p1, svbool_t p2)
22 {
23 svst4_s32 (p0, x0, stack1);
24 svst2_s32 (p1, x0, z3);
25 svst3_s32 (p2, x0, z5);
26 }
27
28 /*
29 ** callee2:
30 ** ptrue p3\.b, all
31 ** ld1w (z[0-9]+\.s), p3/z, \[x2\]
32 ** st1w \1, p0, \[x0\]
33 ** st2w {z3\.s - z4\.s}, p1, \[x0\]
34 ** st3w {z0\.s - z2\.s}, p2, \[x0\]
35 ** ret
36 */
37 void __attribute__((noipa))
callee2(void * x0,svint32x3_t z0,svint32x2_t z3,svint32x3_t z5,svint32x4_t stack1,svint32_t stack2,svbool_t p0,svbool_t p1,svbool_t p2)38 callee2 (void *x0, svint32x3_t z0, svint32x2_t z3, svint32x3_t z5,
39 svint32x4_t stack1, svint32_t stack2, svbool_t p0,
40 svbool_t p1, svbool_t p2)
41 {
42 svst1_s32 (p0, x0, stack2);
43 svst2_s32 (p1, x0, z3);
44 svst3_s32 (p2, x0, z0);
45 }
46
47 void __attribute__((noipa))
caller(void * x0)48 caller (void *x0)
49 {
50 svbool_t pg;
51 pg = svptrue_b8 ();
52 callee1 (x0,
53 svld3_vnum_s32 (pg, x0, -9),
54 svld2_vnum_s32 (pg, x0, -2),
55 svld3_vnum_s32 (pg, x0, 0),
56 svld4_vnum_s32 (pg, x0, 8),
57 svld1_vnum_s32 (pg, x0, 5),
58 svptrue_pat_b8 (SV_VL1),
59 svptrue_pat_b16 (SV_VL2),
60 svptrue_pat_b32 (SV_VL3));
61 }
62
63 /* { dg-final { scan-assembler {\tld3w\t{z0\.s - z2\.s}, p[0-7]/z, \[x0, #-9, mul vl\]\n} } } */
64 /* { dg-final { scan-assembler {\tld2w\t{z3\.s - z4\.s}, p[0-7]/z, \[x0, #-2, mul vl\]\n} } } */
65 /* { dg-final { scan-assembler {\tld3w\t{z5\.s - z7\.s}, p[0-7]/z, \[x0\]\n} } } */
66 /* { dg-final { scan-assembler {\tld4w\t{(z[0-9]+\.s) - z[0-9]+\.s}.*\tst1w\t\1, p[0-7], \[x1\]\n} } } */
67 /* { dg-final { scan-assembler {\tld4w\t{z[0-9]+\.s - (z[0-9]+\.s)}.*\tst1w\t\1, p[0-7], \[x1, #3, mul vl\]\n} } } */
68 /* { dg-final { scan-assembler {\tld1w\t(z[0-9]+\.s), p[0-7]/z, \[x0, #5, mul vl\]\n.*\tst1w\t\1, p[0-7], \[x2\]\n} } } */
69 /* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */
70 /* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */
71 /* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */
72