1 /* { dg-do compile { target { lp64 } } }  */
2 /* { dg-options "-mavx -O2 -mabi=ms -mno-avx512f -masm=att" } */
3 /* { dg-final { scan-assembler-times {(?n)(?:vmovdqa[1-9]*|vmovap[sd])[\t ]*%xmm[0-9]+, [0-9]*\(%rsp\)} 10 } } */
4 /* { dg-final { scan-assembler-times {(?n)(?:vmovdqa[1-9]*|vmovap[sd])[\t ]*[0-9]*\(%rsp\), %xmm[0-9]+} 10 } } */
5 
6 #include <immintrin.h>
7 
test(char * dest)8 void test(char *dest)
9 {
10   __m256i ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7, ymm8, ymm9, ymm10, ymm11, ymm12, ymm13, ymm14, ymm15;
11   asm volatile ("vmovdqa\t%%ymm0, %0\n\t"
12 		"vmovdqa\t%%ymm0, %1\n\t"
13 		"vmovdqa\t%%ymm0, %2\n\t"
14 		"vmovdqa\t%%ymm0, %3\n\t"
15 		"vmovdqa\t%%ymm0, %4\n\t"
16 		"vmovdqa\t%%ymm0, %5\n\t"
17 		"vmovdqa\t%%ymm0, %6\n\t"
18 		"vmovdqa\t%%ymm0, %7\n\t"
19 		"vmovdqa\t%%ymm0, %8\n\t"
20 		"vmovdqa\t%%ymm0, %9\n\t"
21 		"vmovdqa\t%%ymm0, %10\n\t"
22 		"vmovdqa\t%%ymm0, %11\n\t"
23 		"vmovdqa\t%%ymm0, %12\n\t"
24 		"vmovdqa\t%%ymm0, %13\n\t"
25 		"vmovdqa\t%%ymm0, %14\n\t"
26 		"vmovdqa\t%%ymm0, %15\n\t"
27 		: "=v" (ymm1), "=v" (ymm2), "=v"(ymm3), "=v" (ymm4), "=v" (ymm5),
28 		  "=v" (ymm6), "=v" (ymm7), "=v"(ymm8), "=v" (ymm9), "=v" (ymm10),
29 		  "=v" (ymm11), "=v" (ymm12), "=v"(ymm13), "=v" (ymm14), "=v" (ymm15),
30 		  "=v"(ymm0)
31 		::);
32   _mm256_zeroupper();
33   _mm256_storeu_si256((__m256i *)dest, ymm1);
34   _mm256_storeu_si256((__m256i *)(dest + 32), ymm2);
35   _mm256_storeu_si256((__m256i *)(dest + 32 * 2), ymm3);
36   _mm256_storeu_si256((__m256i *)(dest + 32 * 3), ymm4);
37   _mm256_storeu_si256((__m256i *)(dest + 32 * 4), ymm5);
38   _mm256_storeu_si256((__m256i *)(dest + 32 * 5), ymm6);
39   _mm256_storeu_si256((__m256i *)(dest + 32 * 6), ymm7);
40   _mm256_storeu_si256((__m256i *)(dest + 32 * 7), ymm8);
41   _mm256_storeu_si256((__m256i *)(dest + 32 * 8), ymm9);
42   _mm256_storeu_si256((__m256i *)(dest + 32 * 9), ymm10);
43   _mm256_storeu_si256((__m256i *)(dest + 32 * 10), ymm11);
44   _mm256_storeu_si256((__m256i *)(dest + 32 * 11), ymm12);
45   _mm256_storeu_si256((__m256i *)(dest + 32 * 12), ymm13);
46   _mm256_storeu_si256((__m256i *)(dest + 32 * 13), ymm14);
47   _mm256_storeu_si256((__m256i *)(dest + 32 * 14), ymm15);
48 }
49