1 /* { dg-do compile { target lp64 } } */
2 /* { dg-options "-O -mbig-endian -fno-stack-clash-protection -g" } */
3 /* { dg-final { check-function-bodies "**" "" } } */
4 
5 #pragma GCC aarch64 "arm_sve.h"
6 
7 /*
8 ** callee:
9 **	addvl	sp, sp, #-1
10 **	str	p4, \[sp\]
11 **	ptrue	p4\.b, all
12 ** (
13 **	ld1h	(z[0-9]+\.h), p4/z, \[x1, #1, mul vl\]
14 **	ld1h	(z[0-9]+\.h), p4/z, \[x1\]
15 **	st2h	{\2 - \1}, p0, \[x0\]
16 ** |
17 **	ld1h	(z[0-9]+\.h), p4/z, \[x1\]
18 **	ld1h	(z[0-9]+\.h), p4/z, \[x1, #1, mul vl\]
19 **	st2h	{\3 - \4}, p0, \[x0\]
20 ** )
21 **	st4h	{z0\.h - z3\.h}, p1, \[x0\]
22 **	st3h	{z4\.h - z6\.h}, p2, \[x0\]
23 **	st1h	z7\.h, p3, \[x0\]
24 **	ldr	p4, \[sp\]
25 **	addvl	sp, sp, #1
26 **	ret
27 */
28 void __attribute__((noipa))
callee(void * x0,svfloat16x4_t z0,svfloat16x3_t z4,svfloat16x2_t stack,svfloat16_t z7,svbool_t p0,svbool_t p1,svbool_t p2,svbool_t p3)29 callee (void *x0, svfloat16x4_t z0, svfloat16x3_t z4, svfloat16x2_t stack,
30 	svfloat16_t z7, svbool_t p0, svbool_t p1, svbool_t p2, svbool_t p3)
31 {
32   svst2 (p0, x0, stack);
33   svst4 (p1, x0, z0);
34   svst3 (p2, x0, z4);
35   svst1_f16 (p3, x0, z7);
36 }
37 
38 void __attribute__((noipa))
caller(void * x0)39 caller (void *x0)
40 {
41   svbool_t pg;
42   pg = svptrue_b8 ();
43   callee (x0,
44 	  svld4_vnum_f16 (pg, x0, -8),
45 	  svld3_vnum_f16 (pg, x0, -3),
46 	  svld2_vnum_f16 (pg, x0, 0),
47 	  svld1_vnum_f16 (pg, x0, 2),
48 	  svptrue_pat_b8 (SV_VL1),
49 	  svptrue_pat_b16 (SV_VL2),
50 	  svptrue_pat_b32 (SV_VL3),
51 	  svptrue_pat_b64 (SV_VL4));
52 }
53 
54 /* { dg-final { scan-assembler {\tld4h\t{z0\.h - z3\.h}, p[0-7]/z, \[x0, #-8, mul vl\]\n} } } */
55 /* { dg-final { scan-assembler {\tld3h\t{z4\.h - z6\.h}, p[0-7]/z, \[x0, #-3, mul vl\]\n} } } */
56 /* { dg-final { scan-assembler {\tld1h\tz7\.h, p[0-7]/z, \[x0, #2, mul vl\]\n} } } */
57 /* { dg-final { scan-assembler {\tmov\tx1, sp\n} } } */
58 /* { dg-final { scan-assembler {\tld2h\t{(z[0-9]+\.h) - z[0-9]+\.h}.*\tst1h\t\1, p[0-7], \[x1\]\n} } } */
59 /* { dg-final { scan-assembler {\tld2h\t{z[0-9]+\.h - (z[0-9]+\.h)}.*\tst1h\t\1, p[0-7], \[x1, #1, mul vl\]\n} } } */
60 /* { dg-final { scan-assembler {\tptrue\tp0\.b, vl1\n} } } */
61 /* { dg-final { scan-assembler {\tptrue\tp1\.h, vl2\n} } } */
62 /* { dg-final { scan-assembler {\tptrue\tp2\.s, vl3\n} } } */
63 /* { dg-final { scan-assembler {\tptrue\tp3\.d, vl4\n} } } */
64