1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // REQUIRES: powerpc-registered-target
3 
4 // RUN: %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr8 -DNO_WARN_X86_INTRINSICS %s \
5 // RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK-P8,CHECK,CHECK-BE
6 // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -DNO_WARN_X86_INTRINSICS %s \
7 // RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK-P8,CHECK,CHECK-LE
8 // RUN: %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr9 -DNO_WARN_X86_INTRINSICS %s \
9 // RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK-P9,CHECK,CHECK-BE
10 // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr9 -DNO_WARN_X86_INTRINSICS %s \
11 // RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n| FileCheck %s --check-prefixes=CHECK-P9,CHECK,CHECK-LE
12 
13 #include <mmintrin.h>
14 
15 unsigned long long int ull1, ull2;
16 int i1, i2;
17 short s[4];
18 signed char c[8];
19 long long int ll1;
20 __m64 m1, m2, res;
21 
22 void __attribute__((noinline))
test_add()23 test_add() {
24   res = _mm_add_pi32(m1, m2);
25   res = _mm_add_pi16(m1, m2);
26   res = _mm_add_pi8(m1, m2);
27   res = _mm_adds_pu16(m1, m2);
28   res = _mm_adds_pu8(m1, m2);
29   res = _mm_adds_pi16(m1, m2);
30   res = _mm_adds_pi8(m1, m2);
31 }
32 
33 // CHECK-LABEL: @test_add
34 
35 // CHECK: define available_externally i64 @_mm_add_pi32
36 
37 // CHECK-P9: [[REG1:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
38 // CHECK-P9-NEXT: [[REG2:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG1]] to <4 x i32>
39 // CHECK-P9-NEXT: store <4 x i32> [[REG2]], <4 x i32>* [[REG3:[0-9a-zA-Z_%.]+]], align 16
40 // CHECK-P9-NEXT: [[REG4:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
41 // CHECK-P9-NEXT: [[REG5:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
42 // CHECK-P9-NEXT: [[REG6:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG5]] to <4 x i32>
43 // CHECK-P9-NEXT: store <4 x i32> [[REG6]], <4 x i32>* [[REG7:[0-9a-zA-Z_%.]+]], align 16
44 // CHECK-P9-NEXT: [[REG8:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG3]], align 16
45 // CHECK-P9-NEXT: [[REG9:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG7]], align 16
46 // CHECK-P9-NEXT: [[REG10:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_add(int vector[4], int vector[4])(<4 x i32> [[REG8]], <4 x i32> [[REG9]])
47 // CHECK-P9-NEXT: store <4 x i32> [[REG10]], <4 x i32>* [[REG11:[0-9a-zA-Z_%.]+]], align 16
48 // CHECK-P9-NEXT: [[REG12:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG11]], align 16
49 // CHECK-P9-NEXT: [[REG13:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> %6 to <2 x i64>
50 // CHECK-P9-NEXT: [[REG14:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG13]], i32 0
51 // CHECK-P9-NEXT: ret i64 [[REG14]]
52 
53 // CHECK-P8: [[REG15:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
54 // CHECK-P8-NEXT: [[REG16:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG15]]
55 // CHECK-P8: [[REG17:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
56 // CHECK-P8-NEXT: [[REG18:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG17]]
57 // CHECK-P8-NEXT: add nsw i32 [[REG16]], [[REG18]]
58 // CHECK-P8: [[REG19:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
59 // CHECK-P8-NEXT: [[REG20:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG19]]
60 // CHECK-P8: [[REG21:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
61 // CHECK-P8-NEXT: [[REG22:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG21]]
62 // CHECK-P8-NEXT: add nsw i32 [[REG20]], [[REG22]]
63 
64 // CHECK: define available_externally i64 @_mm_add_pi16
65 // CHECK: [[REG23:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
66 // CHECK-NEXT: [[REG24:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG23]] to <8 x i16>
67 // CHECK-NEXT: store <8 x i16> [[REG24]], <8 x i16>* [[REG25:[0-9a-zA-Z_%.]+]]
68 // CHECK: [[REG26:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
69 // CHECK-NEXT: [[REG27:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG26]] to <8 x i16>
70 // CHECK-NEXT: store <8 x i16> [[REG27]], <8 x i16>* [[REG28:[0-9a-zA-Z_%.]+]]
71 // CHECK-NEXT: [[REG29:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG25]], align 16
72 // CHECK-NEXT: [[REG30:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG28]], align 16
73 // CHECK-NEXT: call <8 x i16> @vec_add(short vector[8], short vector[8])(<8 x i16> [[REG29]], <8 x i16> [[REG30]])
74 
75 // CHECK: define available_externally i64 @_mm_add_pi8
76 // CHECK: [[REG31:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
77 // CHECK-NEXT: [[REG32:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG31]] to <16 x i8>
78 // CHECK-NEXT: store <16 x i8> [[REG32]], <16 x i8>* [[REG33:[0-9a-zA-Z_%.]+]]
79 // CHECK: [[REG34:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
80 // CHECK-NEXT: [[REG35:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG34]] to <16 x i8>
81 // CHECK-NEXT: store <16 x i8> [[REG35]], <16 x i8>* [[REG36:[0-9a-zA-Z_%.]+]]
82 // CHECK-NEXT: [[REG37:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG33]], align 16
83 // CHECK-NEXT: [[REG38:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG36]], align 16
84 // CHECK-NEXT: call <16 x i8> @vec_add(signed char vector[16], signed char vector[16])(<16 x i8> [[REG37]], <16 x i8> [[REG38]])
85 
86 // CHECK: define available_externally i64 @_mm_adds_pu16
87 // CHECK: [[REG39:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
88 // CHECK-NEXT: [[REG40:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG39]] to <8 x i16>
89 // CHECK-NEXT: store <8 x i16> [[REG40]], <8 x i16>* [[REG41:[0-9a-zA-Z_%.]+]]
90 // CHECK: [[REG42:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
91 // CHECK-NEXT: [[REG43:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG42]] to <8 x i16>
92 // CHECK-NEXT: store <8 x i16> [[REG43]], <8 x i16>* [[REG44:[0-9a-zA-Z_%.]+]]
93 // CHECK-NEXT: [[REG45:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG41]], align 16
94 // CHECK-NEXT: [[REG46:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG44]], align 16
95 // CHECK-NEXT: call <8 x i16> @vec_adds(unsigned short vector[8], unsigned short vector[8])(<8 x i16> [[REG45]], <8 x i16> [[REG46]])
96 
97 // CHECK: define available_externally i64 @_mm_adds_pu8
98 // CHECK: [[REG47:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
99 // CHECK-NEXT: [[REG48:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG47]] to <16 x i8>
100 // CHECK-NEXT: store <16 x i8> [[REG48]], <16 x i8>* [[REG49:[0-9a-zA-Z_%.]+]]
101 // CHECK: [[REG50:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
102 // CHECK-NEXT: [[REG51:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG50]] to <16 x i8>
103 // CHECK-NEXT: store <16 x i8> [[REG51]], <16 x i8>* [[REG52:[0-9a-zA-Z_%.]+]]
104 // CHECK-NEXT: [[REG53:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG49]], align 16
105 // CHECK-NEXT: [[REG54:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG52]], align 16
106 // CHECK-NEXT: call <16 x i8> @vec_adds(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG53]], <16 x i8> [[REG54]])
107 
108 // CHECK: define available_externally i64 @_mm_adds_pi16
109 // CHECK: [[REG55:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
110 // CHECK-NEXT: [[REG56:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG55]] to <8 x i16>
111 // CHECK-NEXT: store <8 x i16> [[REG56]], <8 x i16>* [[REG57:[0-9a-zA-Z_%.]+]]
112 // CHECK: [[REG58:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
113 // CHECK-NEXT: [[REG59:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG58]] to <8 x i16>
114 // CHECK-NEXT: store <8 x i16> [[REG59]], <8 x i16>* [[REG60:[0-9a-zA-Z_%.]+]]
115 // CHECK-NEXT: [[REG61:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG57]], align 16
116 // CHECK-NEXT: [[REG62:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG60]], align 16
117 // CHECK-NEXT: call <8 x i16> @vec_adds(short vector[8], short vector[8])(<8 x i16> [[REG61]], <8 x i16> [[REG62]])
118 
119 // CHECK: define available_externally i64 @_mm_adds_pi8
120 // CHECK: [[REG63:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
121 // CHECK-NEXT: [[REG64:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG63]] to <16 x i8>
122 // CHECK-NEXT: store <16 x i8> [[REG64]], <16 x i8>* [[REG65:[0-9a-zA-Z_%.]+]]
123 // CHECK: [[REG66:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats
124 // CHECK-NEXT: [[REG67:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG66]] to <16 x i8>
125 // CHECK-NEXT: store <16 x i8> [[REG67]], <16 x i8>* [[REG68:[0-9a-zA-Z_%.]+]]
126 // CHECK-NEXT: [[REG69:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG65]], align 16
127 // CHECK-NEXT: [[REG70:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG68]], align 16
128 // CHECK-NEXT: call <16 x i8> @vec_adds(signed char vector[16], signed char vector[16])(<16 x i8> [[REG69]], <16 x i8> [[REG70]])
129 
130 void __attribute__((noinline))
test_alt_name_add()131 test_alt_name_add() {
132   res = _m_paddb(m1, m2);
133   res = _m_paddd(m1, m2);
134   res = _m_paddsb(m1, m2);
135   res = _m_paddsw(m1, m2);
136   res = _m_paddusb(m1, m2);
137   res = _m_paddusw(m1, m2);
138   res = _m_paddw(m1, m2);
139 }
140 
141 // CHECK-LABEL: @test_alt_name_add
142 
143 // CHECK: define available_externally i64 @_m_paddb
144 // CHECK: [[REG71:[0-9a-zA-Z_%.]+]] = call i64 @_mm_add_pi8
145 // CHECK-NEXT: ret i64 [[REG71]]
146 
147 // CHECK: define available_externally i64 @_m_paddd
148 // CHECK: [[REG72:[0-9a-zA-Z_%.]+]] = call i64 @_mm_add_pi32
149 // CHECK-NEXT: ret i64 [[REG72]]
150 
151 // CHECK: define available_externally i64 @_m_paddsb
152 // CHECK: [[REG73:[0-9a-zA-Z_%.]+]] = call i64 @_mm_adds_pi8
153 // CHECK-NEXT: ret i64 [[REG73]]
154 
155 // CHECK: define available_externally i64 @_m_paddsw
156 // CHECK: [[REG74:[0-9a-zA-Z_%.]+]] = call i64 @_mm_adds_pi16
157 // CHECK-NEXT: ret i64 [[REG74]]
158 
159 // CHECK: define available_externally i64 @_m_paddusb
160 // CHECK: [[REG75:[0-9a-zA-Z_%.]+]] = call i64 @_mm_adds_pu8
161 // CHECK-NEXT: ret i64 [[REG75]]
162 
163 // CHECK: define available_externally i64 @_m_paddusw
164 // CHECK: [[REG76:[0-9a-zA-Z_%.]+]] = call i64 @_mm_adds_pu16
165 // CHECK-NEXT: ret i64 [[REG76]]
166 
167 // CHECK: define available_externally i64 @_m_paddw
168 // CHECK: [[REG77:[0-9a-zA-Z_%.]+]] = call i64 @_mm_add_pi16
169 // CHECK-NEXT: ret i64 [[REG77]]
170 
171 void __attribute__((noinline))
test_cmp()172 test_cmp() {
173   res = _mm_cmpeq_pi32(m1, m2);
174   res = _mm_cmpeq_pi16(m1, m2);
175   res = _mm_cmpeq_pi8(m1, m2);
176   res = _mm_cmpgt_pi32(m1, m2);
177   res = _mm_cmpgt_pi16(m1, m2);
178   res = _mm_cmpgt_pi8(m1, m2);
179 }
180 
181 // CHECK-LABEL: @test_cmp
182 
183 // CHECK: define available_externally i64 @_mm_cmpeq_pi32
184 
185 // CHECK-P9: [[REG78:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
186 // CHECK-P9-NEXT: [[REG79:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG78]] to <4 x i32>
187 // CHECK-P9-NEXT: store <4 x i32> [[REG79]], <4 x i32>* [[REG80:[0-9a-zA-Z_%.]+]]
188 // CHECK-P9: [[REG81:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
189 // CHECK-P9-NEXT: [[REG82:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG81]] to <4 x i32>
190 // CHECK-P9-NEXT: store <4 x i32> [[REG82]], <4 x i32>* [[REG83:[0-9a-zA-Z_%.]+]]
191 // CHECK-P9-NEXT: [[REG84:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG80]]
192 // CHECK-P9-NEXT: [[REG85:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG83]]
193 // CHECK-P9-NEXT: call <4 x i32> @vec_cmpeq(int vector[4], int vector[4])(<4 x i32> [[REG84]], <4 x i32> [[REG85]])
194 
195 // CHECK-P8-COUNT-2: {{[0-9a-zA-Z_%.]+}} = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}
196 // CHECK-P8: [[REG86:[0-9a-zA-Z_%.]+]] = icmp eq i32 {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
197 // CHECK-P8: [[REG87:[0-9a-zA-Z_%.]+]] = select i1 [[REG86]], i32 -1, i32 0
198 // CHECK-P8: {{[0-9a-zA-Z_%.]+}} = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}
199 // CHECK-P8-NEXT: store i32 [[REG87]], i32* {{[0-9a-zA-Z_%.]+}}
200 // CHECK-P8-COUNT-2: {{[0-9a-zA-Z_%.]+}} = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}
201 // CHECK-P8: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}
202 
203 // CHECK: define available_externally i64 @_mm_cmpeq_pi16
204 // CHECK: [[REG88:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
205 // CHECK-NEXT: [[REG89:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG88]] to <8 x i16>
206 // CHECK-NEXT: store <8 x i16> [[REG89]], <8 x i16>* [[REG90:[0-9a-zA-Z_%.]+]]
207 // CHECK: [[REG91:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
208 // CHECK-NEXT: [[REG92:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG91]] to <8 x i16>
209 // CHECK-NEXT: store <8 x i16> [[REG92]], <8 x i16>* [[REG93:[0-9a-zA-Z_%.]+]]
210 // CHECK-NEXT: [[REG94:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG90]], align 16
211 // CHECK-NEXT: [[REG95:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG93]], align 16
212 // CHECK-NEXT: call <8 x i16> @vec_cmpeq(short vector[8], short vector[8])(<8 x i16> [[REG94]], <8 x i16> [[REG95]])
213 
214 // CHECK: define available_externally i64 @_mm_cmpeq_pi8
215 // CHECK: call i64 asm "cmpb $0,$1,$2;\0A", "=r,r,r"
216 // CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* [[REG96:[0-9a-zA-Z_%.]+]], align 8
217 // CHECK-NEXT: [[REG97:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG96]], align 8
218 // CHECK-NEXT: ret i64 [[REG97]]
219 
220 // CHECK: define available_externally i64 @_mm_cmpgt_pi32
221 
222 // CHECK-P9: [[REG98:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
223 // CHECK-P9-NEXT: [[REG99:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG98]] to <4 x i32>
224 // CHECK-P9-NEXT: store <4 x i32> [[REG99]], <4 x i32>* [[REG100:[0-9a-zA-Z_%.]+]]
225 // CHECK-P9: [[REG101:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
226 // CHECK-P9-NEXT: [[REG102:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG101]] to <4 x i32>
227 // CHECK-P9-NEXT: store <4 x i32> [[REG102]], <4 x i32>* [[REG103:[0-9a-zA-Z_%.]+]]
228 // CHECK-P9-NEXT: [[REG104:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG100]]
229 // CHECK-P9-NEXT: [[REG105:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG103]]
230 // CHECK-P9-NEXT: call <4 x i32> @vec_cmpgt(int vector[4], int vector[4])(<4 x i32> [[REG104]], <4 x i32> [[REG85]])
231 
232 // CHECK-P8-COUNT-2: {{[0-9a-zA-Z_%.]+}} = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}
233 // CHECK-P8: [[REG106:[0-9a-zA-Z_%.]+]] = icmp sgt i32 {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
234 // CHECK-P8: [[REG107:[0-9a-zA-Z_%.]+]] = select i1 [[REG106]], i32 -1, i32 0
235 // CHECK-P8: {{[0-9a-zA-Z_%.]+}} = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}
236 // CHECK-P8-NEXT: store i32 [[REG107]], i32* {{[0-9a-zA-Z_%.]+}}
237 // CHECK-P8-COUNT-2: {{[0-9a-zA-Z_%.]+}} = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}
238 // CHECK-P8: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}
239 
240 // CHECK: define available_externally i64 @_mm_cmpgt_pi16
241 // CHECK: [[REG108:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
242 // CHECK-NEXT: [[REG109:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG108]] to <8 x i16>
243 // CHECK-NEXT: store <8 x i16> [[REG109]], <8 x i16>* [[REG110:[0-9a-zA-Z_%.]+]]
244 // CHECK: [[REG111:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
245 // CHECK-NEXT: [[REG112:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG111]] to <8 x i16>
246 // CHECK-NEXT: store <8 x i16> [[REG112]], <8 x i16>* [[REG113:[0-9a-zA-Z_%.]+]]
247 // CHECK-NEXT: [[REG114:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG110]]
248 // CHECK-NEXT: [[REG115:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG113]]
249 // CHECK-NEXT: call <8 x i16> @vec_cmpgt(short vector[8], short vector[8])(<8 x i16> [[REG114]], <8 x i16> [[REG115]])
250 
251 // CHECK: define available_externally i64 @_mm_cmpgt_pi8
252 // CHECK: [[REG116:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
253 // CHECK-NEXT: [[REG117:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG116]] to <16 x i8>
254 // CHECK-NEXT: store <16 x i8> [[REG117]], <16 x i8>* [[REG118:[0-9a-zA-Z_%.]+]]
255 // CHECK: [[REG119:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
256 // CHECK-NEXT: [[REG120:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG119]] to <16 x i8>
257 // CHECK-NEXT: store <16 x i8> [[REG120]], <16 x i8>* [[REG121:[0-9a-zA-Z_%.]+]]
258 // CHECK-NEXT: [[REG122:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG118]]
259 // CHECK-NEXT: [[REG123:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG121]]
260 // CHECK-NEXT: call <16 x i8> @vec_cmpgt(signed char vector[16], signed char vector[16])(<16 x i8> [[REG122]], <16 x i8> [[REG123]])
261 
262 void __attribute__((noinline))
test_alt_name_cmp()263 test_alt_name_cmp() {
264   res = _m_pcmpeqb(m1, m2);
265   res = _m_pcmpeqd(m1, m2);
266   res = _m_pcmpeqw(m1, m2);
267   res = _m_pcmpgtb(m1, m2);
268   res = _m_pcmpgtd(m1, m2);
269   res = _m_pcmpgtw(m1, m2);
270 }
271 
272 // CHECK-LABEL: @test_alt_name_cmp
273 
274 // CHECK: define available_externally i64 @_m_pcmpeqb
275 // CHECK: [[REG124:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cmpeq_pi8
276 // CHECK-NEXT: ret i64 [[REG124]]
277 
278 // CHECK: define available_externally i64 @_m_pcmpeqd
279 // CHECK: [[REG125:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cmpeq_pi32
280 // CHECK-NEXT: ret i64 [[REG125]]
281 
282 // CHECK: define available_externally i64 @_m_pcmpeqw
283 // CHECK: [[REG126:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cmpeq_pi16
284 // CHECK-NEXT: ret i64 [[REG126]]
285 
286 // CHECK: define available_externally i64 @_m_pcmpgtb
287 // CHECK: [[REG127:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cmpgt_pi8
288 // CHECK-NEXT: ret i64 [[REG127]]
289 
290 // CHECK: define available_externally i64 @_m_pcmpgtd
291 // CHECK: [[REG128:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cmpgt_pi32
292 // CHECK-NEXT: ret i64 [[REG128]]
293 
294 // CHECK: define available_externally i64 @_m_pcmpgtw
295 // CHECK: [[REG129:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cmpgt_pi16
296 // CHECK-NEXT: ret i64 [[REG129]]
297 
298 void __attribute__((noinline))
test_convert()299 test_convert() {
300   ll1 = _mm_cvtm64_si64(m1);
301   m1 = _mm_cvtsi32_si64(i1);
302   m1 = _mm_cvtsi64_m64(ll1);
303   i1 = _mm_cvtsi64_si32(m1);
304 }
305 
306 // CHECK-LABEL: @test_convert
307 
308 // CHECK: define available_externally i64 @_mm_cvtm64_si64
309 // CHECK: [[REG130:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
310 // CHECK-NEXT: ret i64 [[REG130]]
311 
312 // CHECK: define available_externally i64 @_mm_cvtsi32_si64
313 // CHECK: [[REG131:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}
314 // CHECK-NEXT: [[REG132:[0-9a-zA-Z_%.]+]] = zext i32 [[REG131]] to i64
315 // CHECK-NEXT: ret i64 [[REG132]]
316 
317 // CHECK: define available_externally i64 @_mm_cvtsi64_m64
318 // CHECK: [[REG133:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
319 // CHECK-NEXT: ret i64 [[REG133]]
320 
321 // CHECK: define available_externally signext i32 @_mm_cvtsi64_si32
322 // CHECK: [[REG134:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
323 // CHECK-NEXT: [[REG135:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG134]] to i32
324 // CHECK-NEXT: ret i32 [[REG135]]
325 
326 void __attribute__((noinline))
test_alt_name_convert()327 test_alt_name_convert() {
328   m1 = _m_from_int(i1);
329   m1 = _m_from_int64(ll1);
330   i1 = _m_to_int(m1);
331   ll1 = _m_to_int64(m1);
332 }
333 
334 // CHECK-LABEL: @test_alt_name_convert
335 
336 // CHECK: define available_externally i64 @_m_from_int
337 // CHECK: [[REG136:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cvtsi32_si64
338 // CHECK-NEXT: ret i64 [[REG136]]
339 
340 // CHECK: define available_externally i64 @_m_from_int64
341 // CHECK: [[REG137:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}
342 // CHECK-NEXT: ret i64 [[REG137]]
343 
344 // CHECK: define available_externally signext i32 @_m_to_int
345 // CHECK: [[REG138:[0-9a-zA-Z_%.]+]] = call signext i32 @_mm_cvtsi64_si32
346 // CHECK-NEXT: ret i32 [[REG138]]
347 
348 // CHECK: define available_externally i64 @_m_to_int64
349 // CHECK: [[REG139:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}
350 // CHECK-NEXT: ret i64 [[REG139]]
351 
352 void __attribute__((noinline))
test_empty()353 test_empty() {
354   _mm_empty();
355   _m_empty();
356 }
357 
358 // CHECK-LABEL: @test_empty
359 
360 // CHECK: define available_externally void @_mm_empty
361 // CHECK-NEXT: entry
362 // CHECK-NEXT: ret void
363 
364 // CHECK: define available_externally void @_m_empty
365 // CHECK-NEXT: entry
366 // CHECK-NEXT: ret void
367 
368 void __attribute__((noinline))
test_logic()369 test_logic() {
370   res = _mm_and_si64(m1, m2);
371   res = _mm_andnot_si64(m1, m2);
372   res = _mm_or_si64(m1, m2);
373   res = _mm_xor_si64(m1, m2);
374 }
375 
376 // CHECK-LABEL: @test_logic
377 
378 // CHECK: define available_externally i64 @_mm_and_si64
379 // CHECK: [[REG140:[0-9a-zA-Z_%.]+]] = and i64 {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
380 // CHECK-NEXT: ret i64 [[REG140]]
381 
382 // CHECK: define available_externally i64 @_mm_andnot_si64
383 // CHECK: [[REG141:[0-9a-zA-Z_%.]+]] = xor i64 {{[0-9a-zA-Z_%.]+}}, -1
384 // CHECK: [[REG142:[0-9a-zA-Z_%.]+]] = and i64 [[REG141]], {{[0-9a-zA-Z_%.]+}}
385 // CHECK-NEXT: ret i64 [[REG142]]
386 
387 // CHECK: define available_externally i64 @_mm_or_si64
388 // CHECK: [[REG143:[0-9a-zA-Z_%.]+]] = or i64 {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
389 // CHECK-NEXT: ret i64 [[REG143]]
390 
391 // CHECK: define available_externally i64 @_mm_xor_si64
392 // CHECK: [[REG144:[0-9a-zA-Z_%.]+]] = xor i64 {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
393 // CHECK-NEXT: ret i64 [[REG144]]
394 
395 void __attribute__((noinline))
test_alt_name_logic()396 test_alt_name_logic() {
397   res = _m_pand(m1, m2);
398   res = _m_pandn(m1, m2);
399   res = _m_por(m1, m2);
400   res = _m_pxor(m1, m2);
401 }
402 
403 // CHECK-LABEL: @test_alt_name_logic
404 
405 // CHECK: define available_externally i64 @_m_pand
406 // CHECK: [[REG145:[0-9a-zA-Z_%.]+]] = call i64 @_mm_and_si64
407 // CHECK-NEXT: ret i64 [[REG145]]
408 
409 // CHECK: define available_externally i64 @_m_pandn
410 // CHECK: [[REG146:[0-9a-zA-Z_%.]+]] = call i64 @_mm_andnot_si64
411 // CHECK-NEXT: ret i64 [[REG146]]
412 
413 // CHECK: define available_externally i64 @_m_por
414 // CHECK: [[REG147:[0-9a-zA-Z_%.]+]] = call i64 @_mm_or_si64
415 // CHECK-NEXT: ret i64 [[REG147]]
416 
417 // CHECK: define available_externally i64 @_m_pxor
418 // CHECK: [[REG148:[0-9a-zA-Z_%.]+]] = call i64 @_mm_xor_si64
419 // CHECK-NEXT: ret i64 [[REG148]]
420 
421 void __attribute__((noinline))
test_madd()422 test_madd() {
423   res = _mm_madd_pi16(m1, m2);
424   res = _m_pmaddwd(m1, m2);
425 }
426 
427 // CHECK-LABEL: @test_madd
428 
429 // CHECK: define available_externally i64 @_mm_madd_pi16
430 // CHECK: store <4 x i32> zeroinitializer, <4 x i32>* [[REG149:[0-9a-zA-Z_%.]+]], align 16
431 // CHECK: [[REG150:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
432 // CHECK-NEXT: [[REG151:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG150]] to <8 x i16>
433 // CHECK-NEXT: store <8 x i16> [[REG151]], <8 x i16>* [[REG152:[0-9a-zA-Z_%.]+]], align 16
434 // CHECK: [[REG153:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
435 // CHECK-NEXT: [[REG154:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG153]] to <8 x i16>
436 // CHECK-NEXT: store <8 x i16> [[REG154]], <8 x i16>* [[REG155:[0-9a-zA-Z_%.]+]], align 16
437 // CHECK-NEXT: [[REG156:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG152]], align 16
438 // CHECK-NEXT: [[REG157:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG155]], align 16
439 // CHECK-NEXT: [[REG158:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG149]], align 16
440 // CHECK-NEXT: [[REG159:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vmsumshm(<8 x i16> [[REG156]], <8 x i16> [[REG157]], <4 x i32> [[REG158]])
441 // CHECK-NEXT: store <4 x i32> [[REG159]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
442 
443 // CHECK: define available_externally i64 @_m_pmaddwd
444 // CHECK: [[REG160:[0-9a-zA-Z_%.]+]] = call i64 @_mm_madd_pi16
445 // CHECK-NEXT: ret i64 [[REG160]]
446 
447 void __attribute__((noinline))
test_mul()448 test_mul() {
449   res = _mm_mulhi_pi16(m1, m2);
450   res = _mm_mullo_pi16(m1, m2);
451 }
452 
453 // CHECK-LABEL: @test_mul
454 
455 // CHECK: define available_externally i64 @_mm_mulhi_pi16
456 // CHECK-BE: store <16 x i8> <i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21, i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
457 // CHECK-LE: store <16 x i8> <i8 2, i8 3, i8 18, i8 19, i8 6, i8 7, i8 22, i8 23, i8 10, i8 11, i8 26, i8 27, i8 14, i8 15, i8 30, i8 31>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
458 // CHECK: [[REG161:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
459 // CHECK: store <8 x i16> {{[0-9a-zA-Z_%.]+}}, <8 x i16>* [[REG162:[0-9a-zA-Z_%.]+]], align 16
460 // CHECK-NEXT: [[REG163:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
461 // CHECK-NEXT: [[REG164:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
462 // CHECK-NEXT: [[REG165:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG164]] to <8 x i16>
463 // CHECK-NEXT: store <8 x i16> [[REG165]], <8 x i16>* [[REG166:[0-9a-zA-Z_%.]+]], align 16
464 // CHECK-NEXT: [[REG167:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG162]], align 16
465 // CHECK-NEXT: [[REG168:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG166]], align 16
466 // CHECK-NEXT: [[REG169:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vmulesh(<8 x i16> [[REG167]], <8 x i16> [[REG168]])
467 // CHECK-NEXT: store <4 x i32> [[REG169]], <4 x i32>* [[REG170:[0-9a-zA-Z_%.]+]], align 16
468 // CHECK-NEXT: [[REG171:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG162]], align 16
469 // CHECK-NEXT: [[REG172:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG166]], align 16
470 // CHECK-NEXT: [[REG173:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vmulosh(<8 x i16> [[REG171]], <8 x i16> [[REG172]])
471 // CHECK-NEXT: store <4 x i32> [[REG173]], <4 x i32>* [[REG174:[0-9a-zA-Z_%.]+]], align 16
472 // CHECK-NEXT: [[REG175:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG170]], align 16
473 // CHECK-NEXT: [[REG176:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG174]], align 16
474 // CHECK-NEXT: [[REG177:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
475 // CHECK-NEXT: call <4 x i32> @vec_perm(int vector[4], int vector[4], unsigned char vector[16])(<4 x i32> [[REG175]], <4 x i32> [[REG176]], <16 x i8> [[REG177]])
476 
477 // CHECK: define available_externally i64 @_mm_mullo_pi16
478 // CHECK: [[REG178:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
479 // CHECK-NEXT: [[REG179:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG178]] to <8 x i16>
480 // CHECK-NEXT: store <8 x i16> [[REG179]], <8 x i16>* [[REG180:[0-9a-zA-Z_%.]+]], align 16
481 // CHECK-NEXT: [[REG181:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
482 // CHECK-NEXT: [[REG182:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
483 // CHECK-NEXT: [[REG183:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG182]] to <8 x i16>
484 // CHECK-NEXT: store <8 x i16> [[REG183]], <8 x i16>* [[REG184:[0-9a-zA-Z_%.]+]], align 16
485 // CHECK-NEXT: [[REG185:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG180]], align 16
486 // CHECK-NEXT: [[REG186:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG184]], align 16
487 // CHECK-NEXT: [[REG187:[0-9a-zA-Z_%.]+]] = mul <8 x i16> [[REG185]], [[REG186]]
488 // CHECK-NEXT: store <8 x i16> [[REG187]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
489 
490 void __attribute__((noinline))
test_alt_name_mul()491 test_alt_name_mul() {
492   res = _m_pmulhw(m1, m2);
493   res = _m_pmullw(m1, m2);
494 }
495 
496 // CHECK-LABEL: @test_alt_name_mul
497 
498 // CHECK: define available_externally i64 @_m_pmulhw
499 // CHECK: [[REG188:[0-9a-zA-Z_%.]+]] = call i64 @_mm_mulhi_pi16
500 // CHECK-NEXT: ret i64 [[REG188]]
501 
502 // CHECK: define available_externally i64 @_m_pmullw
503 // CHECK: [[REG189:[0-9a-zA-Z_%.]+]] = call i64 @_mm_mullo_pi16
504 // CHECK-NEXT: ret i64 [[REG189]]
505 
506 void __attribute__((noinline))
test_packs()507 test_packs() {
508   res = _mm_packs_pu16((__m64)ull1, (__m64)ull2);
509   res = _mm_packs_pi16((__m64)ull1, (__m64)ull2);
510   res = _mm_packs_pi32((__m64)ull1, (__m64)ull2);
511 }
512 
513 // CHECK-LABEL: @test_packs
514 
515 // CHECK: define available_externally i64 @_mm_packs_pu16(i64 [[REG190:[0-9a-zA-Z_%.]+]], i64 [[REG191:[0-9a-zA-Z_%.]+]])
516 // CHECK: store i64 [[REG190]], i64* [[REG192:[0-9a-zA-Z_%.]+]], align 8
517 // CHECK-NEXT: store i64 [[REG191]], i64* [[REG193:[0-9a-zA-Z_%.]+]], align 8
518 // CHECK-LE: load i64, i64* [[REG192]], align 8
519 // CHECK: load i64, i64* [[REG193]], align 8
520 // CHECK-BE: load i64, i64* [[REG192]], align 8
521 // CHECK: [[REG194:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_cmplt
522 // CHECK-NEXT: store <8 x i16> [[REG194]], <8 x i16>* [[REG195:[0-9a-zA-Z_%.]+]], align 16
523 // CHECK-NEXT: [[REG196:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG197:[0-9a-zA-Z_%.]+]], align 16
524 // CHECK-NEXT: [[REG198:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG197]], align 16
525 // CHECK-NEXT: [[REG199:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_packs(unsigned short vector[8], unsigned short vector[8])(<8 x i16> [[REG196]], <8 x i16> [[REG198]])
526 // CHECK-NEXT: store <16 x i8> [[REG199]], <16 x i8>* [[REG200:[0-9a-zA-Z_%.]+]], align 16
527 // CHECK-NEXT: [[REG201:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG195]], align 16
528 // CHECK-NEXT: [[REG202:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG195]], align 16
529 // CHECK-NEXT: [[REG203:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_pack(bool vector[8], bool vector[8])(<8 x i16> [[REG201]], <8 x i16> [[REG202]])
530 // CHECK-NEXT: store <16 x i8> [[REG203]], <16 x i8>* [[REG204:[0-9a-zA-Z_%.]+]], align 16
531 // CHECK-NEXT: [[REG205:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG200]], align 16
532 // CHECK-NEXT: [[REG206:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG204]], align 16
533 // CHECK-NEXT: call <16 x i8> @vec_sel(unsigned char vector[16], unsigned char vector[16], bool vector[16])(<16 x i8> [[REG205]], <16 x i8> zeroinitializer, <16 x i8> [[REG206]])
534 
535 // CHECK: define available_externally i64 @_mm_packs_pi16(i64 [[REG207:[0-9a-zA-Z_%.]+]], i64 [[REG208:[0-9a-zA-Z_%.]+]])
536 // CHECK: store i64 [[REG207]], i64* [[REG209:[0-9a-zA-Z_%.]+]], align 8
537 // CHECK-NEXT: store i64 [[REG208]], i64* [[REG210:[0-9a-zA-Z_%.]+]], align 8
538 // CHECK-LE: load i64, i64* [[REG209]], align 8
539 // CHECK: load i64, i64* [[REG210]], align 8
540 // CHECK-BE: load i64, i64* [[REG209]], align 8
541 // CHECK: [[REG211:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG212:[0-9a-zA-Z_%.]+]], align 16
542 // CHECK-NEXT: [[REG213:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG212]], align 16
543 // CHECK-NEXT: call <16 x i8> @vec_packs(short vector[8], short vector[8])(<8 x i16> [[REG211]], <8 x i16> [[REG213]])
544 
545 // CHECK: define available_externally i64 @_mm_packs_pi32(i64 [[REG214:[0-9a-zA-Z_%.]+]], i64 [[REG215:[0-9a-zA-Z_%.]+]])
546 // CHECK: store i64 [[REG214]], i64* [[REG216:[0-9a-zA-Z_%.]+]], align 8
547 // CHECK-NEXT: store i64 [[REG215]], i64* [[REG217:[0-9a-zA-Z_%.]+]], align 8
548 // CHECK-LE: load i64, i64* [[REG216]], align 8
549 // CHECK: load i64, i64* [[REG217]], align 8
550 // CHECK-BE: load i64, i64* [[REG216]], align 8
551 // CHECK: [[REG218:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG219:[0-9a-zA-Z_%.]+]], align 16
552 // CHECK-NEXT: [[REG220:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG219]], align 16
553 // CHECK-NEXT: call <8 x i16> @vec_packs(int vector[4], int vector[4])(<4 x i32> [[REG218]], <4 x i32> [[REG220]])
554 
555 void __attribute__((noinline))
test_alt_name_packs()556 test_alt_name_packs() {
557   res = _m_packssdw(m1, m2);
558   res = _m_packsswb(m1, m2);
559   res = _m_packuswb(m1, m2);
560 }
561 
562 // CHECK-LABEL: @test_alt_name_packs
563 
564 // CHECK: define available_externally i64 @_m_packssdw
565 // CHECK: [[REG221:[0-9a-zA-Z_%.]+]] = call i64 @_mm_packs_pi32
566 // CHECK-NEXT: ret i64 [[REG221]]
567 
568 // CHECK: define available_externally i64 @_m_packsswb
569 // CHECK: [[REG222:[0-9a-zA-Z_%.]+]] = call i64 @_mm_packs_pi16
570 // CHECK-NEXT: ret i64 [[REG222]]
571 
572 // CHECK: define available_externally i64 @_m_packuswb
573 // CHECK: [[REG223:[0-9a-zA-Z_%.]+]] = call i64 @_mm_packs_pu16
574 // CHECK-NEXT: ret i64 [[REG223]]
575 
576 void __attribute__((noinline))
test_set()577 test_set() {
578   m1 = _mm_set_pi32(2134, -128);
579   m1 = _mm_set_pi16(2134, -128, 1234, 6354);
580   m1 = _mm_set_pi8(-128, 10, 0, 123, -1, -5, 127, 5);
581 }
582 
583 // CHECK-LABEL: @test_set
584 
585 // CHECK: define available_externally i64 @_mm_set_pi32
586 // CHECK: [[REG224:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
587 // CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* [[REG224]]
588 // CHECK: [[REG225:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
589 // CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* [[REG225]]
590 
591 // CHECK: define available_externally i64 @_mm_set_pi16
592 // CHECK: [[REG226:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
593 // CHECK-NEXT: store i16 {{[0-9a-zA-Z_%.]+}}, i16* [[REG226]]
594 // CHECK: [[REG227:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
595 // CHECK-NEXT: store i16 {{[0-9a-zA-Z_%.]+}}, i16* [[REG227]]
596 // CHECK: [[REG228:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 2
597 // CHECK-NEXT: store i16 {{[0-9a-zA-Z_%.]+}}, i16* [[REG228]]
598 // CHECK: [[REG229:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 3
599 // CHECK-NEXT: store i16 {{[0-9a-zA-Z_%.]+}}, i16* [[REG229]]
600 
601 // CHECK: define available_externally i64 @_mm_set_pi8
602 // CHECK: [[REG230:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [8 x i8], [8 x i8]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
603 // CHECK-NEXT: store i8 {{[0-9a-zA-Z_%.]+}}, i8* [[REG230]]
604 // CHECK: [[REG231:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [8 x i8], [8 x i8]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
605 // CHECK-NEXT: store i8 {{[0-9a-zA-Z_%.]+}}, i8* [[REG231]]
606 // CHECK: [[REG232:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [8 x i8], [8 x i8]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 2
607 // CHECK-NEXT: store i8 {{[0-9a-zA-Z_%.]+}}, i8* [[REG232]]
608 // CHECK: [[REG233:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [8 x i8], [8 x i8]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 3
609 // CHECK-NEXT: store i8 {{[0-9a-zA-Z_%.]+}}, i8* [[REG233]]
610 // CHECK: [[REG234:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [8 x i8], [8 x i8]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 4
611 // CHECK-NEXT: store i8 {{[0-9a-zA-Z_%.]+}}, i8* [[REG234]]
612 // CHECK: [[REG235:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [8 x i8], [8 x i8]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 5
613 // CHECK-NEXT: store i8 {{[0-9a-zA-Z_%.]+}}, i8* [[REG235]]
614 // CHECK: [[REG236:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [8 x i8], [8 x i8]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 6
615 // CHECK-NEXT: store i8 {{[0-9a-zA-Z_%.]+}}, i8* [[REG236]]
616 // CHECK: [[REG237:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [8 x i8], [8 x i8]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 7
617 // CHECK-NEXT: store i8 {{[0-9a-zA-Z_%.]+}}, i8* [[REG237]]
618 
619 void __attribute__((noinline))
test_set1()620 test_set1() {
621   res = _mm_set1_pi32(i1);
622   res = _mm_set1_pi16(s[0]);
623   res = _mm_set1_pi8(c[0]);
624 }
625 
626 // CHECK-LABEL: @test_set1
627 
628 // CHECK: define available_externally i64 @_mm_set1_pi32
629 // CHECK: [[REG244:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
630 // CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* [[REG244]], align 8
631 // CHECK-NEXT: [[REG245:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
632 // CHECK: [[REG246:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
633 // CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* [[REG246]], align 4
634 
635 // CHECK: define available_externally i64 @_mm_set1_pi16
636 
637 // CHECK-P9: [[REG247:[0-9a-zA-Z_%.]+]] = load i16, i16* {{[0-9a-zA-Z_%.]+}}, align 2
638 // CHECK-P9-NEXT: [[REG248:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_splats(short)(i16 signext [[REG247]])
639 // CHECK-P9-NEXT: store <8 x i16> %call, <8 x i16>* [[REG249:[0-9a-zA-Z_%.]+]], align 16
640 // CHECK-P9-NEXT: [[REG250:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG249:[0-9a-zA-Z_%.]+]], align 16
641 // CHECK-P9-NEXT: [[REG251:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG250]] to <2 x i64>
642 // CHECK-P9-NEXT: [[REG252:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG251]], i32 0
643 // CHECK-P9-NEXT: ret i64 [[REG252]]
644 
645 // CHECK-P8: [[REG253:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG254:[0-9a-zA-Z_%.]+]], align 2
646 // CHECK-P8: [[REG255:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
647 // CHECK-P8-NEXT: store i16 [[REG253]], i16* [[REG255]], align 8
648 // CHECK-P8-NEXT: [[REG250:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG254]], align 2
649 // CHECK-P8: [[REG256:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
650 // CHECK-P8-NEXT: store i16 [[REG250]], i16* [[REG256]], align 2
651 // CHECK-P8-NEXT: [[REG251:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG254]], align 2
652 // CHECK-P8: [[REG257:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 2
653 // CHECK-P8-NEXT: store i16 [[REG251]], i16* [[REG257]], align 4
654 // CHECK-P8-NEXT: [[REG258:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG254]], align 2
655 // CHECK-P8: [[REG259:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 3
656 // CHECK-P8-NEXT: store i16 [[REG258]], i16* [[REG259]], align 2
657 
658 // CHECK: define available_externally i64 @_mm_set1_pi8
659 // CHECK: [[REG260:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_splats(signed char)(i8 signext {{[0-9a-zA-Z_%.]+}})
660 // CHECK-NEXT: store <16 x i8> [[REG260]], <16 x i8>* [[REG261:[0-9a-zA-Z_%.]+]], align 16
661 // CHECK-NEXT: [[REG262:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG261:[0-9a-zA-Z_%.]+]], align 16
662 // CHECK-NEXT: [[REG263:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> %1 to <2 x i64>
663 // CHECK-NEXT: [[REG264:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG263]], i32 0
664 // CHECK-NEXT: ret i64 [[REG264]]
665 
666 void __attribute__((noinline))
test_setr()667 test_setr() {
668   res = _mm_setr_pi32(i1, i2);
669   res = _mm_setr_pi16(s[0], s[1], s[2], s[3]);
670   res = _mm_setr_pi8(c[0], c[1], c[2], c[3], c[4], c[5], c[6], c[7]);
671 }
672 
673 // CHECK-LABEL: @test_setr
674 
675 // CHECK: define available_externally i64 @_mm_setr_pi32
676 // CHECK: [[REG265:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
677 // CHECK: [[REG266:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
678 // CHECK-NEXT: store i32 [[REG265:[0-9a-zA-Z_%.]+]], i32* [[REG266]], align 8
679 // CHECK-NEXT: [[REG267:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
680 // CHECK: [[REG268:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
681 // CHECK-NEXT: store i32 [[REG267]], i32* [[REG268]], align 4
682 
683 // CHECK: define available_externally i64 @_mm_setr_pi16
684 // CHECK: [[REG269:[0-9a-zA-Z_%.]+]] = call i64 @_mm_set_pi16
685 // CHECK-NEXT: ret i64 [[REG269]]
686 
687 // CHECK: define available_externally i64 @_mm_setr_pi8
688 // CHECK: [[REG270:[0-9a-zA-Z_%.]+]] = call i64 @_mm_set_pi8
689 // CHECK-NEXT: ret i64 [[REG270]]
690 
691 void __attribute__((noinline))
test_setzero()692 test_setzero() {
693   res = _mm_setzero_si64();
694 }
695 
696 // CHECK-LABEL: @test_setzero
697 
698 // CHECK: define available_externally i64 @_mm_setzero_si64
699 // CHECK: entry
700 // CHECK-NEXT: ret i64 0
701 
702 void __attribute__((noinline))
test_sll()703 test_sll() {
704   res = _mm_sll_pi16(m1, m2);
705   res = _mm_sll_pi32(m1, m2);
706   res = _mm_sll_si64(m1, m2);
707   res = _mm_slli_pi16(m1, i1);
708   res = _mm_slli_pi32(m1, i1);
709   res = _mm_slli_si64(m1, i1);
710 }
711 
712 // CHECK-LABEL: @test_sll
713 
714 // CHECK: define available_externally i64 @_mm_sll_pi16
715 // CHECK: [[REG271:[0-9a-zA-Z_%.]+]] = icmp ule i64 {{[0-9a-zA-Z_%.]+}}, 15
716 // CHECK-NEXT: br i1 [[REG271]], label %[[REG272:[0-9a-zA-Z_.]+]], label %[[REG273:[0-9a-zA-Z_.]+]]
717 // CHECK: [[REG272]]
718 // CHECK: call <2 x i64> @vec_splats(unsigned long long)
719 // CHECK: trunc i64 {{[0-9a-zA-Z_%.]+}} to i16
720 // CHECK-NEXT: call <8 x i16> @vec_splats(unsigned short)
721 // CHECK: call <8 x i16> @vec_sl(short vector[8], unsigned short vector[8])
722 // CHECK: store i64 [[REG274:[0-9a-zA-Z_%.]+]], i64* [[REG275:[0-9a-zA-Z_%.]+]], align 8
723 // CHECK-NEXT: br label %[[REG276:[0-9a-zA-Z_%.]+]]
724 // CHECK: [[REG273]]
725 // CHECK-NEXT: store i64 0, i64* [[REG275]], align 8
726 // CHECK-NEXT: br label %[[REG276]]
727 // CHECK: [[REG276]]
728 // CHECK-NEXT: [[REG277:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG275]], align 8
729 // CHECK-NEXT: ret i64 [[REG277]]
730 
731 // CHECK: define available_externally i64 @_mm_sll_pi32
732 // CHECK: [[REG278:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
733 // CHECK-NEXT: [[REG279:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG278]]
734 // CHECK-NEXT: [[REG280:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}
735 // CHECK-NEXT: [[REG281:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG280]] to i32
736 // CHECK-NEXT: [[REG282:[0-9a-zA-Z_%.]+]] = shl i32 [[REG279]], [[REG281]]
737 // CHECK: [[REG283:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
738 // CHECK-NEXT: store i32 [[REG282]], i32* [[REG283]]
739 // CHECK: [[REG284:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
740 // CHECK-NEXT: [[REG285:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG284]], align 4
741 // CHECK: trunc i64 {{[0-9a-zA-Z_%.]+}} to i32
742 // CHECK-NEXT: [[REG286:[0-9a-zA-Z_%.]+]] = shl i32 [[REG285]], {{[0-9a-zA-Z_%.]+}}
743 // CHECK: [[REG287:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
744 // CHECK-NEXT: store i32 [[REG286]], i32* [[REG287]], align 4
745 
746 // CHECK: define available_externally i64 @_mm_sll_si64
747 // CHECK: [[REG288:[0-9a-zA-Z_%.]+]] = shl i64 {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
748 // CHECK-NEXT: ret i64 [[REG288]]
749 
750 // CHECK: define available_externally i64 @_mm_slli_pi16
751 // CHECK: [[REG289:[0-9a-zA-Z_%.]+]] = sext i32 {{[0-9a-zA-Z_%.]+}} to i64
752 // CHECK-NEXT: [[REG290:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sll_pi16(i64 {{[0-9a-zA-Z_%.]+}}, i64 [[REG289]])
753 // CHECK-NEXT: ret i64 [[REG290]]
754 
755 // CHECK: define available_externally i64 @_mm_slli_pi32
756 // CHECK: [[REG291:[0-9a-zA-Z_%.]+]] = sext i32 {{[0-9a-zA-Z_%.]+}} to i64
757 // CHECK-NEXT: [[REG292:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sll_pi32(i64 {{[0-9a-zA-Z_%.]+}}, i64 [[REG291]])
758 // CHECK-NEXT: ret i64 [[REG292]]
759 
760 // CHECK: define available_externally i64 @_mm_slli_si64
761 // CHECK: [[REG293:[0-9a-zA-Z_%.]+]] = zext i32 {{[0-9a-zA-Z_%.]+}} to i64
762 // CHECK-NEXT: [[REG294:[0-9a-zA-Z_%.]+]] = shl i64 {{[0-9a-zA-Z_%.]+}}, [[REG293]]
763 // CHECK-NEXT: ret i64 [[REG294]]
764 
765 void __attribute__((noinline))
test_alt_name_sll()766 test_alt_name_sll() {
767   res = _m_pslld(m1, m2);
768   res = _m_pslldi(m1, i1);
769   res = _m_psllq(m1, m2);
770   res = _m_psllqi(m1, i1);
771   res = _m_psllw(m1, m2);
772   res = _m_psllwi(m1, i1);
773 }
774 
775 // CHECK-LABEL: @test_alt_name_sll
776 
777 // CHECK: define available_externally i64 @_m_pslld
778 // CHECK: [[REG295:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sll_pi32
779 // CHECK-NEXT: ret i64 [[REG295]]
780 
781 // CHECK: define available_externally i64 @_m_pslldi
782 // CHECK: [[REG296:[0-9a-zA-Z_%.]+]] = call i64 @_mm_slli_pi32
783 // CHECK-NEXT: ret i64 [[REG296]]
784 
785 // CHECK: define available_externally i64 @_m_psllq
786 // CHECK: [[REG297:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sll_si64
787 // CHECK-NEXT: ret i64 [[REG297]]
788 
789 // CHECK: define available_externally i64 @_m_psllqi
790 // CHECK: [[REG298:[0-9a-zA-Z_%.]+]] = call i64 @_mm_slli_si64
791 // CHECK-NEXT: ret i64 [[REG298]]
792 
793 // CHECK: define available_externally i64 @_m_psllw
794 // CHECK: [[REG299:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sll_pi16
795 // CHECK-NEXT: ret i64 [[REG299]]
796 
797 // CHECK: define available_externally i64 @_m_psllwi
798 // CHECK: [[REG300:[0-9a-zA-Z_%.]+]] = call i64 @_mm_slli_pi16
799 // CHECK-NEXT: ret i64 [[REG300]]
800 
801 void __attribute__((noinline))
test_sra()802 test_sra() {
803   res = _mm_sra_pi32(m1, m2);
804   res = _mm_sra_pi16(m1, m2);
805   res = _mm_srai_pi32(m1, i1);
806   res = _mm_srai_pi16(m1, i1);
807 }
808 
809 // CHECK-LABEL: @test_sra
810 
811 // CHECK: define available_externally i64 @_mm_sra_pi32
812 // CHECK: [[REG301:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
813 // CHECK-NEXT: [[REG302:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG301]], align 8
814 // CHECK-NEXT: [[REG303:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
815 // CHECK-NEXT: [[REG304:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG303]] to i32
816 // CHECK-NEXT: [[REG305:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG302]], [[REG304]]
817 // CHECK: [[REG306:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
818 // CHECK-NEXT: store i32 [[REG305]], i32* [[REG306]], align 8
819 // CHECK: [[REG307:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
820 // CHECK-NEXT: [[REG308:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG307]], align 4
821 // CHECK-NEXT: [[REG309:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
822 // CHECK-NEXT: [[REG310:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG309]] to i32
823 // CHECK-NEXT: [[REG311:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG308]], [[REG310]]
824 // CHECK: [[REG312:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
825 // CHECK-NEXT: store i32 [[REG311]], i32* [[REG312]], align 4
826 
827 // CHECK: define available_externally i64 @_mm_sra_pi16
828 // CHECK: [[REG313:[0-9a-zA-Z_%.]+]] = icmp ule i64 {{[0-9a-zA-Z_%.]+}}, 15
829 // CHECK-NEXT: br i1 [[REG313]], label %[[REG314:[0-9a-zA-Z_%.]+]], label %[[REG315:[0-9a-zA-Z_%.]+]]
830 // CHECK: [[REG314]]
831 // CHECK: [[REG316:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
832 // CHECK-NEXT: [[REG317:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG316]] to <8 x i16>
833 // CHECK-NEXT: store <8 x i16> [[REG317]], <8 x i16>* [[REG318:[0-9a-zA-Z_%.]+]], align 16
834 // CHECK-NEXT: [[REG319:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
835 // CHECK-NEXT: [[REG320:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG319]] to i16
836 // CHECK-NEXT: [[REG321:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_splats(unsigned short)(i16 zeroext [[REG320]])
837 // CHECK-NEXT: store <8 x i16> [[REG321]], <8 x i16>* [[REG322:[0-9a-zA-Z_%.]+]], align 16
838 // CHECK-NEXT: [[REG323:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG318]], align 16
839 // CHECK-NEXT: [[REG324:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG322]], align 16
840 // CHECK-NEXT: call <8 x i16> @vec_sra(short vector[8], unsigned short vector[8])(<8 x i16> [[REG323]], <8 x i16> [[REG324]])
841 // CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* [[REG325:[0-9a-zA-Z_%.]+]], align 8
842 // CHECK-NEXT: br label %[[REG326:[0-9a-zA-Z_%.]+]]
843 // CHECK: [[REG315]]
844 // CHECK-NEXT: store i64 0, i64* [[REG325]], align 8
845 // CHECK-NEXT: br label %[[REG326]]
846 // CHECK: [[REG326]]
847 // CHECK-NEXT: [[REG327:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG325]], align 8
848 // CHECK-NEXT: ret i64 [[REG327]]
849 
850 // CHECK: define available_externally i64 @_mm_srai_pi32
851 // CHECK: [[REG328:[0-9a-zA-Z_%.]+]] = sext i32 {{[0-9a-zA-Z_%.]+}} to i64
852 // CHECK-NEXT: [[REG329:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sra_pi32(i64 {{[0-9a-zA-Z_%.]+}}, i64 [[REG328]])
853 // CHECK-NEXT: ret i64 [[REG329]]
854 
855 // CHECK: define available_externally i64 @_mm_srai_pi16
856 // CHECK: [[REG330:[0-9a-zA-Z_%.]+]] = sext i32 {{[0-9a-zA-Z_%.]+}} to i64
857 // CHECK-NEXT: [[REG331:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sra_pi16(i64 {{[0-9a-zA-Z_%.]+}}, i64 [[REG330]])
858 // CHECK-NEXT: ret i64 [[REG331]]
859 
860 void __attribute__((noinline))
test_alt_name_sra()861 test_alt_name_sra() {
862   res = _m_psrad(m1, m2);
863   res = _m_psraw(m1, m2);
864   res = _m_psradi(m1, i1);
865   res = _m_psrawi(m1, i1);
866 }
867 
868 // CHECK-LABEL: @test_alt_name_sra
869 
870 // CHECK: define available_externally i64 @_m_psrad
871 // CHECK: [[REG332:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sra_pi32
872 // CHECK-NEXT: ret i64 [[REG332]]
873 
874 // CHECK: define available_externally i64 @_m_psraw
875 // CHECK: [[REG333:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sra_pi16
876 // CHECK-NEXT: ret i64 [[REG333]]
877 
878 // CHECK: define available_externally i64 @_m_psradi
879 // CHECK: [[REG334:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srai_pi32
880 // CHECK-NEXT: ret i64 [[REG334]]
881 
882 // CHECK: define available_externally i64 @_m_psrawi
883 // CHECK: [[REG335:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srai_pi16
884 // CHECK-NEXT: ret i64 [[REG335]]
885 
886 void __attribute__((noinline))
test_srl()887 test_srl() {
888   res = _mm_srl_si64(m1, m2);
889   res = _mm_srl_pi32(m1, m2);
890   res = _mm_srl_pi16(m1, m2);
891   res = _mm_srli_si64(m1, i1);
892   res = _mm_srli_pi32(m1, i1);
893   res = _mm_srli_pi16(m1, i1);
894 }
895 
896 // CHECK-LABEL: @test_srl
897 
898 // CHECK: define available_externally i64 @_mm_srl_si64
899 // CHECK: [[REG336:[0-9a-zA-Z_%.]+]] = lshr i64 {{[0-9a-zA-Z_%.]+}}, {{[0-9a-zA-Z_%.]+}}
900 // CHECK-NEXT: ret i64 [[REG336]]
901 
902 // CHECK: define available_externally i64 @_mm_srl_pi32
903 // CHECK: [[REG337:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
904 // CHECK-NEXT: [[REG338:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG337]], align 8
905 // CHECK-NEXT: [[REG339:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
906 // CHECK-NEXT: [[REG340:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG339]] to i32
907 // CHECK-NEXT: [[REG341:[0-9a-zA-Z_%.]+]] = lshr i32 [[REG338]], [[REG340]]
908 // CHECK: [[REG342:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
909 // CHECK-NEXT: store i32 [[REG341]], i32* [[REG342]], align 8
910 // CHECK: [[REG343:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
911 // CHECK-NEXT: [[REG344:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG343]], align 4
912 // CHECK-NEXT: [[REG345:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
913 // CHECK-NEXT: [[REG346:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG345]] to i32
914 // CHECK-NEXT: [[REG347:[0-9a-zA-Z_%.]+]] = lshr i32 [[REG344]], [[REG346]]
915 // CHECK: [[REG348:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
916 // CHECK-NEXT: store i32 [[REG347]], i32* [[REG348]], align 4
917 
918 // CHECK: define available_externally i64 @_mm_srl_pi16
919 // CHECK: [[REG349:[0-9a-zA-Z_%.]+]] = icmp ule i64 {{[0-9a-zA-Z_%.]+}}, 15
920 // CHECK-NEXT: br i1 [[REG349]], label %[[REG350:[0-9a-zA-Z_%.]+]], label %[[REG351:[0-9a-zA-Z_%.]+]]
921 // CHECK: [[REG350]]
922 // CHECK: [[REG352:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
923 // CHECK-NEXT: [[REG353:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG352]] to <8 x i16>
924 // CHECK-NEXT: store <8 x i16> [[REG353]], <8 x i16>* [[REG354:[0-9a-zA-Z_%.]+]], align 16
925 // CHECK-NEXT: [[REG355:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
926 // CHECK-NEXT: [[REG356:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG355]] to i16
927 // CHECK-NEXT: [[REG357:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_splats(unsigned short)(i16 zeroext [[REG356]])
928 // CHECK-NEXT: store <8 x i16> [[REG357]], <8 x i16>* [[REG358:[0-9a-zA-Z_%.]+]], align 16
929 // CHECK-NEXT: [[REG359:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG354]], align 16
930 // CHECK-NEXT: [[REG360:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG358]], align 16
931 // CHECK-NEXT: call <8 x i16> @vec_sr(unsigned short vector[8], unsigned short vector[8])(<8 x i16> [[REG359]], <8 x i16> [[REG360]])
932 // CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* [[REG361:[0-9a-zA-Z_%.]+]], align 8
933 // CHECK-NEXT: br label %[[REG362:[0-9a-zA-Z_%.]+]]
934 // CHECK: [[REG351]]
935 // CHECK-NEXT: store i64 0, i64* {{[0-9a-zA-Z_%.]+}}, align 8
936 // CHECK-NEXT: br label %[[REG362]]
937 // CHECK: [[REG362]]
938 // CHECK-NEXT: [[REG363:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG361]], align 8
939 // CHECK-NEXT: ret i64 [[REG363]]
940 
941 // CHECK: define available_externally i64 @_mm_srli_si64
942 // CHECK: [[REG364:[0-9a-zA-Z_%.]+]] = zext i32 {{[0-9a-zA-Z_%.]+}} to i64
943 // CHECK-NEXT: [[REG365:[0-9a-zA-Z_%.]+]] = lshr i64 {{[0-9a-zA-Z_%.]+}}, [[REG364]]
944 // CHECK-NEXT: ret i64 [[REG365]]
945 
946 // CHECK: define available_externally i64 @_mm_srli_pi32
947 // CHECK: [[REG366:[0-9a-zA-Z_%.]+]] = sext i32 {{[0-9a-zA-Z_%.]+}} to i64
948 // CHECK-NEXT: [[REG367:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srl_pi32(i64 {{[0-9a-zA-Z_%.]+}}, i64 [[REG366]])
949 // CHECK-NEXT: ret i64 [[REG367]]
950 
951 // CHECK: define available_externally i64 @_mm_srli_pi16
952 // CHECK: [[REG366:[0-9a-zA-Z_%.]+]] = sext i32 {{[0-9a-zA-Z_%.]+}} to i64
953 // CHECK-NEXT: [[REG368:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srl_pi16(i64 {{[0-9a-zA-Z_%.]+}}, i64 [[REG366]])
954 // CHECK-NEXT: ret i64 [[REG368]]
955 
956 void __attribute__((noinline))
test_alt_name_srl()957 test_alt_name_srl() {
958   res = _m_psrlq(m1, m2);
959   res = _m_psrld(m1, m2);
960   res = _m_psrlw(m1, m2);
961   res = _m_psrlqi(m1, i1);
962   res = _m_psrldi(m1, i1);
963   res = _m_psrlwi(m1, i1);
964 }
965 
966 // CHECK-LABEL: @test_alt_name_srl
967 
968 // CHECK: define available_externally i64 @_m_psrlq
969 // CHECK: [[REG369:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srl_si64
970 // CHECK-NEXT: ret i64 [[REG369]]
971 
972 // CHECK: define available_externally i64 @_m_psrld
973 // CHECK: [[REG370:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srl_pi32
974 // CHECK-NEXT: ret i64 [[REG370]]
975 
976 // CHECK: define available_externally i64 @_m_psrlw
977 // CHECK: [[REG371:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srl_pi16
978 // CHECK-NEXT: ret i64 [[REG371]]
979 
980 // CHECK: define available_externally i64 @_m_psrlqi
981 // CHECK: [[REG372:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srli_si64
982 // CHECK-NEXT: ret i64 [[REG372]]
983 
984 // CHECK: define available_externally i64 @_m_psrldi
985 // CHECK: [[REG373:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srli_pi32
986 // CHECK-NEXT: ret i64 [[REG373]]
987 
988 // CHECK: define available_externally i64 @_m_psrlwi
989 // CHECK: [[REG374:[0-9a-zA-Z_%.]+]] = call i64 @_mm_srli_pi16
990 // CHECK-NEXT: ret i64 [[REG374]]
991 
992 void __attribute__((noinline))
test_sub()993 test_sub() {
994   res = _mm_sub_pi32(m1, m2);
995   res = _mm_sub_pi16(m1, m2);
996   res = _mm_sub_pi8(m1, m2);
997   res = _mm_subs_pi16(m1, m2);
998   res = _mm_subs_pi8(m1, m2);
999   res = _mm_subs_pu16(m1, m2);
1000   res = _mm_subs_pu8(m1, m2);
1001 }
1002 
1003 // CHECK-LABEL: @test_sub
1004 
1005 // CHECK: define available_externally i64 @_mm_sub_pi32
1006 
1007 // CHECK-P8: [[REG375:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
1008 // CHECK-P8-NEXT: [[REG376:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG375]], align 8
1009 // CHECK-P8: [[REG377:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
1010 // CHECK-P8-NEXT: [[REG378:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG377]], align 8
1011 // CHECK-P8-NEXT: sub nsw i32 [[REG376]], [[REG378]]
1012 // CHECK-P8: [[REG379:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
1013 // CHECK-P8-NEXT: [[REG380:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG379]], align 4
1014 // CHECK-P8: [[REG381:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
1015 // CHECK-P8-NEXT: [[REG382:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG381]], align 4
1016 // CHECK-P8-NEXT: sub nsw i32 [[REG380]], [[REG382]]
1017 
1018 // CHECK-P9: [[REG383:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
1019 // CHECK-P9-NEXT: [[REG384:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG383]] to <4 x i32>
1020 // CHECK-P9-NEXT: store <4 x i32> [[REG384]], <4 x i32>* [[REG385:[0-9a-zA-Z_%.]+]], align 16
1021 // CHECK-P9: [[REG386:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
1022 // CHECK-P9-NEXT: [[REG387:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG386]] to <4 x i32>
1023 // CHECK-P9-NEXT: store <4 x i32> [[REG387]], <4 x i32>* [[REG388:[0-9a-zA-Z_%.]+]], align 16
1024 // CHECK-P9-NEXT: [[REG389:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG385]], align 16
1025 // CHECK-P9-NEXT: [[REG390:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG388]], align 16
1026 // CHECK-P9-NEXT: call <4 x i32> @vec_sub(int vector[4], int vector[4])(<4 x i32> [[REG389]], <4 x i32> [[REG390]])
1027 
1028 // CHECK: define available_externally i64 @_mm_sub_pi16
1029 // CHECK: [[REG391:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
1030 // CHECK-NEXT: [[REG392:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG391]] to <8 x i16>
1031 // CHECK-NEXT: store <8 x i16> [[REG392]], <8 x i16>* [[REG393:[0-9a-zA-Z_%.]+]], align 16
1032 // CHECK: [[REG394:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
1033 // CHECK-NEXT: [[REG395:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG394]] to <8 x i16>
1034 // CHECK-NEXT: store <8 x i16> [[REG395]], <8 x i16>* [[REG396:[0-9a-zA-Z_%.]+]], align 16
1035 // CHECK-NEXT: [[REG397:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG393]], align 16
1036 // CHECK-NEXT: [[REG398:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG396]], align 16
1037 // CHECK-NEXT: call <8 x i16> @vec_sub(short vector[8], short vector[8])(<8 x i16> [[REG397]], <8 x i16> [[REG398]])
1038 
1039 // CHECK: define available_externally i64 @_mm_sub_pi8
1040 // CHECK: [[REG399:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
1041 // CHECK-NEXT: [[REG400:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG399]] to <16 x i8>
1042 // CHECK-NEXT: store <16 x i8> [[REG400]], <16 x i8>* [[REG401:[0-9a-zA-Z_%.]+]], align 16
1043 // CHECK: [[REG402:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
1044 // CHECK-NEXT: [[REG403:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG402]] to <16 x i8>
1045 // CHECK-NEXT: store <16 x i8> [[REG403]], <16 x i8>* [[REG404:[0-9a-zA-Z_%.]+]], align 16
1046 // CHECK-NEXT: [[REG405:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG401]], align 16
1047 // CHECK-NEXT: [[REG406:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG404]], align 16
1048 // CHECK-NEXT: call <16 x i8> @vec_sub(signed char vector[16], signed char vector[16])(<16 x i8> [[REG405]], <16 x i8> [[REG406]])
1049 
1050 // CHECK: define available_externally i64 @_mm_subs_pi16
1051 // CHECK: [[REG407:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
1052 // CHECK-NEXT: [[REG408:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG407]] to <8 x i16>
1053 // CHECK-NEXT: store <8 x i16> [[REG408]], <8 x i16>* [[REG409:[0-9a-zA-Z_%.]+]], align 16
1054 // CHECK: [[REG410:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
1055 // CHECK-NEXT: [[REG411:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG410]] to <8 x i16>
1056 // CHECK-NEXT: store <8 x i16> [[REG411]], <8 x i16>* [[REG412:[0-9a-zA-Z_%.]+]], align 16
1057 // CHECK-NEXT: [[REG413:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG409]], align 16
1058 // CHECK-NEXT: [[REG414:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG412]], align 16
1059 // CHECK-NEXT: call <8 x i16> @vec_subs(short vector[8], short vector[8])(<8 x i16> [[REG413]], <8 x i16> [[REG414]])
1060 
1061 // CHECK: define available_externally i64 @_mm_subs_pi8
1062 // CHECK: [[REG415:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
1063 // CHECK-NEXT: [[REG416:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG415]] to <16 x i8>
1064 // CHECK-NEXT: store <16 x i8> [[REG416]], <16 x i8>* [[REG417:[0-9a-zA-Z_%.]+]], align 16
1065 // CHECK: [[REG418:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
1066 // CHECK-NEXT: [[REG419:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG418]] to <16 x i8>
1067 // CHECK-NEXT: store <16 x i8> [[REG419]], <16 x i8>* [[REG420:[0-9a-zA-Z_%.]+]], align 16
1068 // CHECK-NEXT: [[REG421:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG417]], align 16
1069 // CHECK-NEXT: [[REG422:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG420]], align 16
1070 // CHECK-NEXT: call <16 x i8> @vec_subs(signed char vector[16], signed char vector[16])(<16 x i8> [[REG421]], <16 x i8> [[REG422]])
1071 
1072 // CHECK: define available_externally i64 @_mm_subs_pu16
1073 // CHECK: [[REG423:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
1074 // CHECK-NEXT: [[REG424:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG423]] to <8 x i16>
1075 // CHECK-NEXT: store <8 x i16> [[REG424]], <8 x i16>* [[REG425:[0-9a-zA-Z_%.]+]], align 16
1076 // CHECK: [[REG426:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
1077 // CHECK-NEXT: [[REG427:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG426]] to <8 x i16>
1078 // CHECK-NEXT: store <8 x i16> [[REG427]], <8 x i16>* [[REG428:[0-9a-zA-Z_%.]+]], align 16
1079 // CHECK-NEXT: [[REG429:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG425]], align 16
1080 // CHECK-NEXT: [[REG430:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* [[REG428]], align 16
1081 // CHECK-NEXT: call <8 x i16> @vec_subs(unsigned short vector[8], unsigned short vector[8])(<8 x i16> [[REG429]], <8 x i16> [[REG430]])
1082 
1083 // CHECK: define available_externally i64 @_mm_subs_pu8
1084 // CHECK: [[REG431:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
1085 // CHECK-NEXT: [[REG432:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG431]] to <16 x i8>
1086 // CHECK-NEXT: store <16 x i8> [[REG432]], <16 x i8>* [[REG433:[0-9a-zA-Z_%.]+]], align 16
1087 // CHECK: [[REG434:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 {{[0-9a-zA-Z_%.]+}})
1088 // CHECK-NEXT: [[REG435:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG434]] to <16 x i8>
1089 // CHECK-NEXT: store <16 x i8> [[REG435]], <16 x i8>* [[REG436:[0-9a-zA-Z_%.]+]], align 16
1090 // CHECK-NEXT: [[REG437:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG433]], align 16
1091 // CHECK-NEXT: [[REG438:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG436]], align 16
1092 // CHECK-NEXT: call <16 x i8> @vec_subs(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG437]], <16 x i8> [[REG438]])
1093 
1094 void __attribute__((noinline))
test_alt_name_sub()1095 test_alt_name_sub() {
1096   res = _m_psubd(m1, m2);
1097   res = _m_psubw(m1, m2);
1098   res = _m_psubb(m1, m2);
1099   res = _m_psubsw(m1, m2);
1100   res = _m_psubsb(m1, m2);
1101   res = _m_psubusw(m1, m2);
1102   res = _m_psubusb(m1, m2);
1103 }
1104 
1105 // CHECK-LABEL: @test_alt_name_sub
1106 
1107 // CHECK: define available_externally i64 @_m_psubd
1108 // CHECK: [[REG439:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sub_pi32
1109 // CHECK-NEXT: ret i64 [[REG439]]
1110 
1111 // CHECK: define available_externally i64 @_m_psubw
1112 // CHECK: [[REG440:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sub_pi16
1113 // CHECK-NEXT: ret i64 [[REG440]]
1114 
1115 // CHECK: define available_externally i64 @_m_psubb
1116 // CHECK: [[REG441:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sub_pi8
1117 // CHECK-NEXT: ret i64 [[REG441]]
1118 
1119 // CHECK: define available_externally i64 @_m_psubsw
1120 // CHECK: [[REG442:[0-9a-zA-Z_%.]+]] = call i64 @_mm_subs_pi16
1121 // CHECK-NEXT: ret i64 [[REG442]]
1122 
1123 // CHECK: define available_externally i64 @_m_psubsb
1124 // CHECK: [[REG443:[0-9a-zA-Z_%.]+]] = call i64 @_mm_subs_pi8
1125 // CHECK-NEXT: ret i64 [[REG443]]
1126 
1127 // CHECK: define available_externally i64 @_m_psubusw
1128 // CHECK: [[REG444:[0-9a-zA-Z_%.]+]] = call i64 @_mm_subs_pu16
1129 // CHECK-NEXT: ret i64 [[REG444]]
1130 
1131 // CHECK: define available_externally i64 @_m_psubusb
1132 // CHECK: [[REG445:[0-9a-zA-Z_%.]+]] = call i64 @_mm_subs_pu8
1133 // CHECK-NEXT: ret i64 [[REG445]]
1134 
1135 void __attribute__((noinline))
test_unpack()1136 test_unpack() {
1137   res = _mm_unpackhi_pi32(m1, m2);
1138   res = _mm_unpackhi_pi16(m1, m2);
1139   res = _mm_unpackhi_pi8(m1, m2);
1140   res = _mm_unpacklo_pi32(m1, m2);
1141   res = _mm_unpacklo_pi16(m1, m2);
1142   res = _mm_unpacklo_pi8(m1, m2);
1143 }
1144 
1145 // CHECK-LABEL: @test_unpack
1146 
1147 // CHECK: define available_externally i64 @_mm_unpackhi_pi32
1148 // CHECK: [[REG446:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
1149 // CHECK-NEXT: [[REG447:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG446]], align 4
1150 // CHECK-NEXT: [[REG448:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* [[REG449:[0-9a-zA-Z_%.]+]] to [2 x i32]*
1151 // CHECK-NEXT: [[REG450:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* [[REG448]], i64 0, i64 0
1152 // CHECK-NEXT: store i32 [[REG447]], i32* [[REG450]], align 8
1153 // CHECK: [[REG451:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
1154 // CHECK-NEXT: [[REG452:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG451]], align 4
1155 // CHECK-NEXT: [[REG453:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* [[REG449]] to [2 x i32]*
1156 // CHECK-NEXT: [[REG454:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* [[REG453]], i64 0, i64 1
1157 // CHECK-NEXT: store i32 [[REG452]], i32* [[REG454]], align 4
1158 
1159 // CHECK: define available_externally i64 @_mm_unpackhi_pi16
1160 // CHECK: [[REG455:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 2
1161 // CHECK-NEXT: [[REG456:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG455]], align 4
1162 // CHECK: [[REG457:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
1163 // CHECK-NEXT: store i16 [[REG456]], i16* [[REG457]], align 8
1164 // CHECK: [[REG458:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 2
1165 // CHECK-NEXT: [[REG459:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG458]], align 4
1166 // CHECK: [[REG460:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
1167 // CHECK-NEXT: store i16 [[REG459]], i16* [[REG460]], align 2
1168 // CHECK: [[REG461:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 3
1169 // CHECK-NEXT: [[REG462:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG461]], align 2
1170 // CHECK: [[REG463:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 2
1171 // CHECK-NEXT: store i16 [[REG462]], i16* [[REG463]], align 4
1172 // CHECK: [[REG464:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 3
1173 // CHECK-NEXT: [[REG465:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG464]], align 2
1174 // CHECK: [[REG466:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 3
1175 // CHECK-NEXT: store i16 [[REG465]], i16* [[REG466]], align 2
1176 
1177 // CHECK: define available_externally i64 @_mm_unpackhi_pi8
1178 // CHECK: [[REG467:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
1179 // CHECK-NEXT: [[REG468:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG467]] to <16 x i8>
1180 // CHECK-NEXT: store <16 x i8> [[REG468]], <16 x i8>* [[REG469:[0-9a-zA-Z_%.]+]], align 16
1181 // CHECK-NEXT: [[REG470:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1182 // CHECK-NEXT: [[REG471:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
1183 // CHECK-NEXT: [[REG472:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG471]] to <16 x i8>
1184 // CHECK-NEXT: store <16 x i8> [[REG472]], <16 x i8>* [[REG473:[0-9a-zA-Z_%.]+]], align 16
1185 // CHECK-NEXT: [[REG474:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG469]], align 16
1186 // CHECK-NEXT: [[REG475:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG473]], align 16
1187 // CHECK-NEXT: call <16 x i8> @vec_mergel(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG474]], <16 x i8> [[REG475]])
1188 
1189 // CHECK: define available_externally i64 @_mm_unpacklo_pi32
1190 // CHECK: [[REG476:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
1191 // CHECK-NEXT: [[REG477:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG476]], align 8
1192 // CHECK: [[REG478:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
1193 // CHECK-NEXT: store i32 [[REG477]], i32* [[REG478]], align 8
1194 // CHECK: [[REG479:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
1195 // CHECK-NEXT: [[REG480:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG479]], align 8
1196 // CHECK: [[REG481:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
1197 // CHECK-NEXT: store i32 [[REG480]], i32* [[REG481]], align 4
1198 
1199 // CHECK: define available_externally i64 @_mm_unpacklo_pi16
1200 // CHECK: [[REG482:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
1201 // CHECK-NEXT: [[REG483:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG482]], align 8
1202 // CHECK: [[REG484:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
1203 // CHECK-NEXT: store i16 [[REG483]], i16* [[REG484]], align 8
1204 // CHECK: [[REG485:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 0
1205 // CHECK-NEXT: [[REG486:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG485]], align 8
1206 // CHECK: [[REG487:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
1207 // CHECK-NEXT: store i16 [[REG486]], i16* [[REG487]], align 2
1208 // CHECK: [[REG488:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
1209 // CHECK-NEXT: [[REG489:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG488]], align 2
1210 // CHECK: [[REG490:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 2
1211 // CHECK-NEXT: store i16 [[REG489]], i16* [[REG490]], align 4
1212 // CHECK: [[REG491:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 1
1213 // CHECK-NEXT: [[REG492:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG491]], align 2
1214 // CHECK: [[REG493:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* {{[0-9a-zA-Z_%.]+}}, i64 0, i64 3
1215 // CHECK-NEXT: store i16 [[REG492]], i16* [[REG493]], align 2
1216 
1217 // CHECK: define available_externally i64 @_mm_unpacklo_pi8
1218 // CHECK: [[REG494:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
1219 // CHECK-NEXT: [[REG495:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG494]] to <16 x i8>
1220 // CHECK-NEXT: store <16 x i8> [[REG495]], <16 x i8>* [[REG496:[0-9a-zA-Z_%.]+]], align 16
1221 // CHECK-NEXT: [[REG497:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1222 // CHECK-NEXT: [[REG498:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
1223 // CHECK-NEXT: [[REG499:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG498]] to <16 x i8>
1224 // CHECK-NEXT: store <16 x i8> [[REG499]], <16 x i8>* [[REG500:[0-9a-zA-Z_%.]+]], align 16
1225 // CHECK-NEXT: [[REG501:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG496]], align 16
1226 // CHECK-NEXT: [[REG502:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* [[REG500]], align 16
1227 // CHECK-NEXT: [[REG503:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_mergel(unsigned char vector[16], unsigned char vector[16])
1228 
1229 void __attribute__((noinline))
test_alt_name_unpack()1230 test_alt_name_unpack() {
1231   res = _m_punpckhdq(m1, m2);
1232   res = _m_punpckhwd(m1, m2);
1233   res = _m_punpckhbw(m1, m2);
1234   res = _m_punpckldq(m1, m2);
1235   res = _m_punpcklwd(m1, m2);
1236   res = _m_punpcklbw(m1, m2);
1237 }
1238 
1239 // CHECK-LABEL: @test_alt_name_unpack
1240 
1241 // CHECK: define available_externally i64 @_m_punpckhdq
1242 // CHECK: [[REG238:[0-9a-zA-Z_%.]+]] = call i64 @_mm_unpackhi_pi32
1243 // CHECK-NEXT: ret i64 [[REG238]]
1244 
1245 // CHECK: define available_externally i64 @_m_punpckhwd
1246 // CHECK: [[REG239:[0-9a-zA-Z_%.]+]] = call i64 @_mm_unpackhi_pi16
1247 // CHECK-NEXT: ret i64 [[REG239]]
1248 
1249 // CHECK: define available_externally i64 @_m_punpckhbw
1250 // CHECK: [[REG240:[0-9a-zA-Z_%.]+]] = call i64 @_mm_unpackhi_pi8
1251 // CHECK-NEXT: ret i64 [[REG240]]
1252 
1253 // CHECK: define available_externally i64 @_m_punpckldq
1254 // CHECK: [[REG241:[0-9a-zA-Z_%.]+]] = call i64 @_mm_unpacklo_pi32
1255 // CHECK-NEXT: ret i64 [[REG241]]
1256 
1257 // CHECK: define available_externally i64 @_m_punpcklwd
1258 // CHECK: [[REG242:[0-9a-zA-Z_%.]+]] = call i64 @_mm_unpacklo_pi16
1259 // CHECK-NEXT: ret i64 [[REG242]]
1260 
1261 // CHECK: define available_externally i64 @_m_punpcklbw
1262 // CHECK: [[REG243:[0-9a-zA-Z_%.]+]] = call i64 @_mm_unpacklo_pi8
1263 // CHECK-NEXT: ret i64 [[REG243]]
1264