1 /* { dg-do compile { target { ! ia32 } } } */
2 /* { dg-options "-O2 -mavx512vl -masm=att" } */
3 
4 typedef float V1 __attribute__((vector_size (16)));
5 typedef float V2 __attribute__((vector_size (32)));
6 typedef int V4 __attribute__((vector_size (16)));
7 typedef int V5 __attribute__((vector_size (32)));
8 
9 void
f1(V1 x)10 f1 (V1 x)
11 {
12   register V1 a __asm ("xmm16");
13   a = x;
14   asm volatile ("" : "+v" (a));
15   a = __builtin_shuffle (a, (V4) { 0, 0, 0, 0 });
16   asm volatile ("" : "+v" (a));
17 }
18 
19 void
f2(V1 x)20 f2 (V1 x)
21 {
22   register V1 a __asm ("xmm16");
23   a = x;
24   asm volatile ("" : "+v" (a));
25   a = __builtin_shuffle (a, (V4) { 1, 1, 1, 1 });
26   asm volatile ("" : "+v" (a));
27 }
28 
29 void
f3(V1 x)30 f3 (V1 x)
31 {
32   register V1 a __asm ("xmm16");
33   a = x;
34   asm volatile ("" : "+v" (a));
35   a = __builtin_shuffle (a, (V4) { 2, 2, 2, 2 });
36   asm volatile ("" : "+v" (a));
37 }
38 
39 void
f4(V1 x)40 f4 (V1 x)
41 {
42   register V1 a __asm ("xmm16");
43   a = x;
44   asm volatile ("" : "+v" (a));
45   a = __builtin_shuffle (a, (V4) { 3, 3, 3, 3 });
46   asm volatile ("" : "+v" (a));
47 }
48 
49 void
f5(V1 * x)50 f5 (V1 *x)
51 {
52   register V1 a __asm ("xmm16");
53   a = __builtin_shuffle (*x, (V4) { 0, 0, 0, 0 });
54   asm volatile ("" : "+v" (a));
55 }
56 
57 void
f6(V1 * x)58 f6 (V1 *x)
59 {
60   register V1 a __asm ("xmm16");
61   a = __builtin_shuffle (*x, (V4) { 1, 1, 1, 1 });
62   asm volatile ("" : "+v" (a));
63 }
64 
65 void
f7(V1 * x)66 f7 (V1 *x)
67 {
68   register V1 a __asm ("xmm16");
69   a = __builtin_shuffle (*x, (V4) { 2, 2, 2, 2 });
70   asm volatile ("" : "+v" (a));
71 }
72 
73 void
f8(V1 * x)74 f8 (V1 *x)
75 {
76   register V1 a __asm ("xmm16");
77   a = __builtin_shuffle (*x, (V4) { 3, 3, 3, 3 });
78   asm volatile ("" : "+v" (a));
79 }
80 
81 void
f9(V2 x)82 f9 (V2 x)
83 {
84   register V2 a __asm ("xmm16");
85   a = x;
86   asm volatile ("" : "+v" (a));
87   a = __builtin_shuffle (a, (V5) { 0, 0, 0, 0, 0, 0, 0, 0 });
88   asm volatile ("" : "+v" (a));
89 }
90 
91 void
f10(V2 x)92 f10 (V2 x)
93 {
94   register V2 a __asm ("xmm16");
95   a = x;
96   asm volatile ("" : "+v" (a));
97   a = __builtin_shuffle (a, (V5) { 1, 1, 1, 1, 1, 1, 1, 1 });
98   asm volatile ("" : "+v" (a));
99 }
100 
101 void
f11(V2 x)102 f11 (V2 x)
103 {
104   register V2 a __asm ("xmm16");
105   a = x;
106   asm volatile ("" : "+v" (a));
107   a = __builtin_shuffle (a, (V5) { 4, 4, 4, 4, 4, 4, 4, 4 });
108   asm volatile ("" : "+v" (a));
109 }
110 
111 void
f12(V2 x)112 f12 (V2 x)
113 {
114   register V2 a __asm ("xmm16");
115   a = x;
116   asm volatile ("" : "+v" (a));
117   a = __builtin_shuffle (a, (V5) { 5, 5, 5, 5, 5, 5, 5, 5 });
118   asm volatile ("" : "+v" (a));
119 }
120 
121 void
f13(V2 * x)122 f13 (V2 *x)
123 {
124   register V2 a __asm ("xmm16");
125   a = __builtin_shuffle (*x, (V5) { 0, 0, 0, 0, 0, 0, 0, 0 });
126   asm volatile ("" : "+v" (a));
127 }
128 
129 void
f14(V2 * x)130 f14 (V2 *x)
131 {
132   register V2 a __asm ("xmm16");
133   a = __builtin_shuffle (*x, (V5) { 1, 1, 1, 1, 1, 1, 1, 1 });
134   asm volatile ("" : "+v" (a));
135 }
136 
137 void
f15(V2 * x)138 f15 (V2 *x)
139 {
140   register V2 a __asm ("xmm16");
141   a = __builtin_shuffle (*x, (V5) { 4, 4, 4, 4, 4, 4, 4, 4 });
142   asm volatile ("" : "+v" (a));
143 }
144 
145 void
f16(V2 * x)146 f16 (V2 *x)
147 {
148   register V2 a __asm ("xmm16");
149   a = __builtin_shuffle (*x, (V5) { 5, 5, 5, 5, 5, 5, 5, 5 });
150   asm volatile ("" : "+v" (a));
151 }
152 
153 /* { dg-final { scan-assembler-times "vbroadcastss\[^\n\r]*%\[re\]di\[^\n\r]*%xmm16" 4 } } */
154 /* { dg-final { scan-assembler-times "vbroadcastss\[^\n\r]*%xmm16\[^\n\r]*%ymm16" 3 } } */
155 /* { dg-final { scan-assembler-times "vbroadcastss\[^\n\r]*%\[re\]di\[^\n\r]*%ymm16" 3 } } */
156 /* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$0\[^\n\r]*%xmm16\[^\n\r]*%xmm16" 1 } } */
157 /* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$85\[^\n\r]*%xmm16\[^\n\r]*%xmm16" 1 } } */
158 /* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$170\[^\n\r]*%xmm16\[^\n\r]*%xmm16" 1 } } */
159 /* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$255\[^\n\r]*%xmm16\[^\n\r]*%xmm16" 1 } } */
160 /* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$0\[^\n\r]*%ymm16\[^\n\r]*%ymm16" 1 } } */
161 /* { dg-final { scan-assembler-times "vpermilps\[^\n\r]*\\\$85\[^\n\r]*%ymm16\[^\n\r]*%ymm16" 2 } } */
162 /* { dg-final { scan-assembler-times "vshuff32x4\[^\n\r]*\\\$3\[^\n\r]*%ymm16\[^\n\r]*%ymm16\[^\n\r]*%ymm16" 2 } } */
163