1 /* PR target/99321 */
2 /* { dg-do assemble { target lp64 } } */
3 /* { dg-require-effective-target avx512vl } */
4 /* { dg-require-effective-target assembler_march_noavx512bw } */
5 /* { dg-options "-O2 -mavx512vl -mno-avx512bw -Wa,-march=+noavx512bw" } */
6 
7 #include <x86intrin.h>
8 
9 typedef unsigned char V1 __attribute__((vector_size (16)));
10 typedef unsigned char V2 __attribute__((vector_size (32)));
11 typedef unsigned short V3 __attribute__((vector_size (16)));
12 typedef unsigned short V4 __attribute__((vector_size (32)));
13 
f1(void)14 void f1 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_abs_epi8 ((__m128i) b); __asm ("" : : "v" (a)); }
f2(void)15 void f2 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_abs_epi8 ((__m256i) b); __asm ("" : : "v" (a)); }
f3(void)16 void f3 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_abs_epi16 ((__m128i) b); __asm ("" : : "v" (a)); }
f4(void)17 void f4 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_abs_epi16 ((__m256i) b); __asm ("" : : "v" (a)); }
f5(void)18 void f5 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_adds_epi8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
f6(void)19 void f6 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_adds_epi8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
f7(void)20 void f7 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_adds_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
f8(void)21 void f8 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_adds_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
f9(void)22 void f9 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_subs_epi8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
f10(void)23 void f10 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_subs_epi8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
f11(void)24 void f11 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_subs_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
f12(void)25 void f12 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_subs_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
f13(void)26 void f13 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_adds_epu8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
f14(void)27 void f14 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_adds_epu8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
f15(void)28 void f15 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_adds_epu16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
f16(void)29 void f16 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_adds_epu16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
f17(void)30 void f17 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_subs_epu8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
f18(void)31 void f18 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_subs_epu8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
f19(void)32 void f19 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_subs_epu16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
f20(void)33 void f20 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_subs_epu16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
f21(void)34 void f21 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_alignr_epi8 ((__m128i) a, (__m128i) b, 5); __asm ("" : : "v" (a)); }
f22(void)35 void f22 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_alignr_epi8 ((__m256i) a, (__m256i) b, 5); __asm ("" : : "v" (a)); }
f23(void)36 void f23 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_adds_epu16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
f24(void)37 void f24 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_avg_epu8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
f25(void)38 void f25 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_avg_epu8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
f26(void)39 void f26 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_avg_epu16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
f27(void)40 void f27 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_avg_epu16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
f28(void)41 void f28 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_broadcastb_epi8 ((__m128i) b); __asm ("" : : "v" (a)); }
f29(void)42 void f29 (void) { register V2 a __asm ("%xmm16"); register V1 b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_broadcastb_epi8 ((__m128i) b); __asm ("" : : "v" (a)); }
f30(void)43 void f30 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_broadcastw_epi16 ((__m128i) b); __asm ("" : : "v" (a)); }
f31(void)44 void f31 (void) { register V4 a __asm ("%xmm16"); register V3 b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_broadcastw_epi16 ((__m128i) b); __asm ("" : : "v" (a)); }
f32(void)45 int f32 (void) { register V1 a __asm ("%xmm16"); __asm ("" : "=v" (a)); return _mm_extract_epi8 ((__m128i) a, 3); }
f33(void)46 int f33 (void) { register V3 a __asm ("%xmm16"); __asm ("" : "=v" (a)); return _mm_extract_epi16 ((__m128i) a, 3); }
f34(int c)47 void f34 (int c) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_insert_epi8 ((__m128i) b, c, 5); __asm ("" : : "v" (a)); }
f35(int c)48 void f35 (int c) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_insert_epi16 ((__m128i) b, c, 5); __asm ("" : : "v" (a)); }
f36(void)49 void f36 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_maddubs_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
f37(void)50 void f37 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_maddubs_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
f38(void)51 void f38 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_madd_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
f39(void)52 void f39 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_madd_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
f40(void)53 void f40 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_cvtepi8_epi16 ((__m128i) b); __asm ("" : : "v" (a)); }
f41(void)54 void f41 (void) { register V4 a __asm ("%xmm16"); register V3 b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_cvtepi8_epi16 ((__m128i) b); __asm ("" : : "v" (a)); }
f42(void)55 void f42 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_cvtepu8_epi16 ((__m128i) b); __asm ("" : : "v" (a)); }
f43(void)56 void f43 (void) { register V4 a __asm ("%xmm16"); register V3 b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_cvtepu8_epi16 ((__m128i) b); __asm ("" : : "v" (a)); }
f44(void)57 void f44 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_mulhrs_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
f45(void)58 void f45 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_mulhrs_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
f46(void)59 void f46 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_mulhi_epu16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
f47(void)60 void f47 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_mulhi_epu16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
f48(void)61 void f48 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_mulhi_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
f49(void)62 void f49 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_mulhi_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
f50(void)63 void f50 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_sad_epu8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
f51(void)64 void f51 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_sad_epu8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
f52(void)65 void f52 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_shuffle_epi8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
f53(void)66 void f53 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_shuffle_epi8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
f54(void)67 void f54 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_shufflehi_epi16 ((__m128i) b, 0x5b); __asm ("" : : "v" (a)); }
f55(void)68 void f55 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_shufflehi_epi16 ((__m256i) b, 0x5b); __asm ("" : : "v" (a)); }
f56(void)69 void f56 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_shufflelo_epi16 ((__m128i) b, 0x5b); __asm ("" : : "v" (a)); }
f57(void)70 void f57 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_shufflelo_epi16 ((__m256i) b, 0x5b); __asm ("" : : "v" (a)); }
f58(void)71 void f58 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_slli_si128 ((__m128i) b, 3); __asm ("" : : "v" (a)); }
f59(void)72 void f59 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_slli_si256 ((__m256i) b, 3); __asm ("" : : "v" (a)); }
f60(void)73 void f60 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_srli_si128 ((__m128i) b, 3); __asm ("" : : "v" (a)); }
f61(void)74 void f61 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_srli_si256 ((__m256i) b, 3); __asm ("" : : "v" (a)); }
f62(void)75 void f62 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_sll_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
f63(void)76 void f63 (void) { register V4 a __asm ("%xmm16"); register V3 b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_sll_epi16 ((__m256i) a, (__m128i) b); __asm ("" : : "v" (a)); }
f64(void)77 void f64 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_slli_epi16 ((__m128i) b, 7); __asm ("" : : "v" (a)); }
f65(void)78 void f65 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_slli_epi16 ((__m256i) b, 7); __asm ("" : : "v" (a)); }
f66(void)79 void f66 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_srl_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
f67(void)80 void f67 (void) { register V4 a __asm ("%xmm16"); register V3 b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_srl_epi16 ((__m256i) a, (__m128i) b); __asm ("" : : "v" (a)); }
f68(void)81 void f68 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_srli_epi16 ((__m128i) b, 7); __asm ("" : : "v" (a)); }
f69(void)82 void f69 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_srli_epi16 ((__m256i) b, 7); __asm ("" : : "v" (a)); }
f70(void)83 void f70 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_sra_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
f71(void)84 void f71 (void) { register V4 a __asm ("%xmm16"); register V3 b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_sra_epi16 ((__m256i) a, (__m128i) b); __asm ("" : : "v" (a)); }
f72(void)85 void f72 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_srai_epi16 ((__m128i) b, 7); __asm ("" : : "v" (a)); }
f73(void)86 void f73 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_srai_epi16 ((__m256i) b, 7); __asm ("" : : "v" (a)); }
f74(void)87 void f74 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_unpackhi_epi8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
f75(void)88 void f75 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_unpackhi_epi8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
f76(void)89 void f76 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_unpackhi_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
f77(void)90 void f77 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_unpackhi_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
f78(void)91 void f78 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_unpacklo_epi8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
f79(void)92 void f79 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_unpacklo_epi8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
f80(void)93 void f80 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_unpacklo_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
f81(void)94 void f81 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_unpacklo_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
95