1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // REQUIRES: powerpc-registered-target
3 
4 // RUN: %clang -S -emit-llvm -target powerpc64-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
5 // RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-BE
6 // RUN: %clang -x c++ -fsyntax-only -target powerpc64-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
7 // RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns
8 // RUN: %clang -S -emit-llvm -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
9 // RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns -o - | llvm-cxxfilt -n | FileCheck %s --check-prefixes=CHECK,CHECK-LE
10 // RUN: %clang -x c++ -fsyntax-only -target powerpc64le-unknown-linux-gnu -mcpu=pwr8 -ffreestanding -DNO_WARN_X86_INTRINSICS %s \
11 // RUN:   -fno-discard-value-names -mllvm -disable-llvm-optzns
12 
13 #include <xmmintrin.h>
14 
15 __m128 res, m1, m2;
16 __m64 res64, ms[2];
17 float fs[4];
18 int i, i2;
19 long long i64;
20 
21 // CHECK-LE-DAG: @_mm_shuffle_pi16.permute_selectors = internal constant [4 x i16] [i16 2312, i16 2826, i16 3340, i16 3854], align 2
22 // CHECK-BE-DAG: @_mm_shuffle_pi16.permute_selectors = internal constant [4 x i16] [i16 1543, i16 1029, i16 515, i16 1], align 2
23 
24 // CHECK-LE-DAG: @_mm_shuffle_ps.permute_selectors = internal constant [4 x i32] [i32 50462976, i32 117835012, i32 185207048, i32 252579084], align 4
25 // CHECK-BE-DAG: @_mm_shuffle_ps.permute_selectors = internal constant [4 x i32] [i32 66051, i32 67438087, i32 134810123, i32 202182159], align 4
26 
27 void __attribute__((noinline))
test_add()28 test_add() {
29   res = _mm_add_ps(m1, m2);
30   res = _mm_add_ss(m1, m2);
31 }
32 
33 // CHECK-LABEL: @test_add
34 
35 // CHECK: define available_externally <4 x float> @_mm_add_ps(<4 x float> [[REG1:[0-9a-zA-Z_%.]+]], <4 x float> [[REG2:[0-9a-zA-Z_%.]+]])
36 // CHECK: store <4 x float> [[REG1]], <4 x float>* [[REG3:[0-9a-zA-Z_%.]+]], align 16
37 // CHECK-NEXT: store <4 x float> [[REG2]], <4 x float>* [[REG4:[0-9a-zA-Z_%.]+]], align 16
38 // CHECK-NEXT: [[REG5:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG3]], align 16
39 // CHECK-NEXT: [[REG6:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG4]], align 16
40 // CHECK-NEXT: [[REG7:[0-9a-zA-Z_%.]+]] = fadd <4 x float> [[REG5]], [[REG6]]
41 // CHECK-NEXT: ret <4 x float> [[REG7]]
42 
43 // CHECK: define available_externally <4 x float> @_mm_add_ss(<4 x float> [[REG8:[0-9a-zA-Z_%.]+]], <4 x float> [[REG9:[0-9a-zA-Z_%.]+]])
44 // CHECK: store <4 x float> [[REG8]], <4 x float>* [[REG10:[0-9a-zA-Z_%.]+]], align 16
45 // CHECK-NEXT: store <4 x float> [[REG9]], <4 x float>* [[REG11:[0-9a-zA-Z_%.]+]], align 16
46 // CHECK-NEXT: [[REG12:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG10]], align 16
47 // CHECK-NEXT: [[REG13:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG12]], i32 zeroext 0)
48 // CHECK-NEXT: store <4 x float> [[REG13]], <4 x float>* [[REG14:[0-9a-zA-Z_%.]+]], align 16
49 // CHECK-NEXT: [[REG15:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG11]], align 16
50 // CHECK-NEXT: [[REG16:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG15]], i32 zeroext 0)
51 // CHECK-NEXT: store <4 x float> [[REG16]], <4 x float>* [[REG17:[0-9a-zA-Z_%.]+]], align 16
52 // CHECK-NEXT: [[REG18:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG14]], align 16
53 // CHECK-NEXT: [[REG19:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG17]], align 16
54 // CHECK-NEXT: [[REG20:[0-9a-zA-Z_%.]+]] = fadd <4 x float> [[REG18]], [[REG19]]
55 // CHECK-NEXT: store <4 x float> [[REG20]], <4 x float>* [[REG21:[0-9a-zA-Z_%.]+]], align 16
56 // CHECK-NEXT: [[REG22:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG10]], align 16
57 // CHECK-NEXT: [[REG23:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG21]], align 16
58 // CHECK-NEXT: [[REG24:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG22]], <4 x float> [[REG23]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
59 // CHECK-NEXT: ret <4 x float> [[REG24]]
60 
61 void __attribute__((noinline))
test_avg()62 test_avg() {
63   res64 = _mm_avg_pu16(ms[0], ms[1]);
64   res64 = _mm_avg_pu8(ms[0], ms[1]);
65 }
66 
67 // CHECK-LABEL: @test_avg
68 
69 // CHECK: define available_externally i64 @_mm_avg_pu16
70 // CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
71 // CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
72 // CHECK-NEXT: [[REG25:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
73 // CHECK-NEXT: [[REG26:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG25]])
74 // CHECK-NEXT: [[REG27:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG26]] to <8 x i16>
75 // CHECK-NEXT: store <8 x i16> [[REG27]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
76 // CHECK-NEXT: [[REG28:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
77 // CHECK-NEXT: [[REG29:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG28]])
78 // CHECK-NEXT: [[REG30:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG29]] to <8 x i16>
79 // CHECK-NEXT: store <8 x i16> [[REG30]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
80 // CHECK-NEXT: [[REG31:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
81 // CHECK-NEXT: [[REG32:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
82 // CHECK-NEXT: [[REG33:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_avg(unsigned short vector[8], unsigned short vector[8])(<8 x i16> [[REG31]], <8 x i16> [[REG32]])
83 // CHECK-NEXT: store <8 x i16> [[REG33]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
84 // CHECK-NEXT: [[REG34:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
85 // CHECK-NEXT: [[REG35:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG34]] to <2 x i64>
86 // CHECK-NEXT: [[REG36:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG35]], i32 0
87 // CHECK-NEXT: ret i64 [[REG36]]
88 
89 // CHECK: define available_externally i64 @_mm_avg_pu8
90 // CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
91 // CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
92 // CHECK-NEXT: [[REG37:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
93 // CHECK-NEXT: [[REG38:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG37]])
94 // CHECK-NEXT: [[REG39:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG38]] to <16 x i8>
95 // CHECK-NEXT: store <16 x i8> [[REG39]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
96 // CHECK-NEXT: [[REG40:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
97 // CHECK-NEXT: [[REG41:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG40]])
98 // CHECK-NEXT: [[REG42:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG41]] to <16 x i8>
99 // CHECK-NEXT: store <16 x i8> [[REG42]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
100 // CHECK-NEXT: [[REG43:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
101 // CHECK-NEXT: [[REG44:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
102 // CHECK-NEXT: [[REG45:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_avg(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG43]], <16 x i8> [[REG44]])
103 // CHECK-NEXT: store <16 x i8> [[REG45]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
104 // CHECK-NEXT: [[REG46:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
105 // CHECK-NEXT: [[REG47:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG46]] to <2 x i64>
106 // CHECK-NEXT: [[REG48:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG47]], i32 0
107 // CHECK-NEXT: ret i64 [[REG48]]
108 
109 void __attribute__((noinline))
test_alt_name_avg()110 test_alt_name_avg() {
111   res64 = _m_pavgw(ms[0], ms[1]);
112   res64 = _m_pavgb(ms[0], ms[1]);
113 }
114 
115 // CHECK-LABEL: @test_alt_name_avg
116 
117 // CHECK: define available_externally i64 @_m_pavgw
118 // CHECK: [[REG49:[0-9a-zA-Z_%.]+]] = call i64 @_mm_avg_pu16
119 // CHECK-NEXT: ret i64 [[REG49]]
120 
121 // CHECK: define available_externally i64 @_m_pavgb
122 // CHECK: [[REG50:[0-9a-zA-Z_%.]+]] = call i64 @_mm_avg_pu8
123 // CHECK-NEXT: ret i64 [[REG50]]
124 
125 void __attribute__((noinline))
test_cmp()126 test_cmp() {
127   res = _mm_cmpeq_ps(m1, m2);
128   res = _mm_cmpeq_ss(m1, m2);
129   res = _mm_cmpge_ps(m1, m2);
130   res = _mm_cmpge_ss(m1, m2);
131   res = _mm_cmpgt_ps(m1, m2);
132   res = _mm_cmpgt_ss(m1, m2);
133   res = _mm_cmple_ps(m1, m2);
134   res = _mm_cmple_ss(m1, m2);
135   res = _mm_cmplt_ps(m1, m2);
136   res = _mm_cmplt_ss(m1, m2);
137   res = _mm_cmpneq_ps(m1, m2);
138   res = _mm_cmpneq_ss(m1, m2);
139   res = _mm_cmpnge_ps(m1, m2);
140   res = _mm_cmpnge_ss(m1, m2);
141   res = _mm_cmpngt_ps(m1, m2);
142   res = _mm_cmpngt_ss(m1, m2);
143   res = _mm_cmpnle_ps(m1, m2);
144   res = _mm_cmpnle_ss(m1, m2);
145   res = _mm_cmpnlt_ps(m1, m2);
146   res = _mm_cmpnlt_ss(m1, m2);
147   res = _mm_cmpord_ps(m1, m2);
148   res = _mm_cmpord_ss(m1, m2);
149   res = _mm_cmpunord_ps(m1, m2);
150   res = _mm_cmpunord_ss(m1, m2);
151 }
152 
153 // CHECK-LABEL: @test_cmp
154 
155 // CHECK: define available_externally <4 x float> @_mm_cmpeq_ps
156 // CHECK: [[REG51:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpeq(float vector[4], float vector[4])
157 // CHECK-NEXT: [[REG52:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG51]] to <4 x float>
158 // CHECK-NEXT: ret <4 x float> [[REG52]]
159 
160 // CHECK: define available_externally <4 x float> @_mm_cmpeq_ss
161 // CHECK: [[REG53:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
162 // CHECK-NEXT: store <4 x float> [[REG53]], <4 x float>* [[REG54:[0-9a-zA-Z_%.]+]], align 16
163 // CHECK: [[REG55:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
164 // CHECK-NEXT: store <4 x float> [[REG55]], <4 x float>* [[REG56:[0-9a-zA-Z_%.]+]], align 16
165 // CHECK-NEXT: [[REG57:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG54]], align 16
166 // CHECK-NEXT: [[REG58:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG56]], align 16
167 // CHECK-NEXT: call <4 x i32> @vec_cmpeq(float vector[4], float vector[4])(<4 x float> [[REG57]], <4 x float> [[REG58]])
168 // CHECK: [[REG59:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
169 // CHECK-NEXT: ret <4 x float> [[REG59]]
170 
171 // CHECK: define available_externally <4 x float> @_mm_cmpge_ps
172 // CHECK: [[REG60:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpge(float vector[4], float vector[4])
173 // CHECK-NEXT: [[REG61:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG60]] to <4 x float>
174 // CHECK-NEXT: ret <4 x float> [[REG61]]
175 
176 // CHECK: define available_externally <4 x float> @_mm_cmpge_ss
177 // CHECK: [[REG62:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
178 // CHECK-NEXT: store <4 x float> [[REG62]], <4 x float>* [[REG63:[0-9a-zA-Z_%.]+]], align 16
179 // CHECK: [[REG64:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
180 // CHECK-NEXT: store <4 x float> [[REG64]], <4 x float>* [[REG65:[0-9a-zA-Z_%.]+]], align 16
181 // CHECK-NEXT: [[REG66:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG63]], align 16
182 // CHECK-NEXT: [[REG67:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG65]], align 16
183 // CHECK-NEXT: call <4 x i32> @vec_cmpge(float vector[4], float vector[4])(<4 x float> [[REG66]], <4 x float> [[REG67]])
184 // CHECK: [[REG68:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
185 // CHECK-NEXT: ret <4 x float> [[REG68]]
186 
187 // CHECK: define available_externally <4 x float> @_mm_cmpgt_ps
188 // CHECK: [[REG69:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])
189 // CHECK-NEXT: [[REG70:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG69]] to <4 x float>
190 // CHECK-NEXT: ret <4 x float> [[REG70]]
191 
192 // CHECK: define available_externally <4 x float> @_mm_cmpgt_ss
193 // CHECK: [[REG71:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
194 // CHECK-NEXT: store <4 x float> [[REG71]], <4 x float>* [[REG72:[0-9a-zA-Z_%.]+]], align 16
195 // CHECK: [[REG73:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
196 // CHECK-NEXT: store <4 x float> [[REG73]], <4 x float>* [[REG74:[0-9a-zA-Z_%.]+]], align 16
197 // CHECK-NEXT: [[REG75:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG72]], align 16
198 // CHECK-NEXT: [[REG76:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG74]], align 16
199 // CHECK-NEXT: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])(<4 x float> [[REG75]], <4 x float> [[REG76]])
200 // CHECK: [[REG77:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
201 // CHECK-NEXT: ret <4 x float> [[REG77]]
202 
203 // CHECK: define available_externally <4 x float> @_mm_cmple_ps
204 // CHECK: [[REG78:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmple(float vector[4], float vector[4])
205 // CHECK-NEXT: [[REG79:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG78]] to <4 x float>
206 // CHECK-NEXT: ret <4 x float> [[REG79]]
207 
208 // CHECK: define available_externally <4 x float> @_mm_cmple_ss
209 // CHECK: [[REG80:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
210 // CHECK-NEXT: store <4 x float> [[REG80]], <4 x float>* [[REG81:[0-9a-zA-Z_%.]+]], align 16
211 // CHECK: [[REG82:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
212 // CHECK-NEXT: store <4 x float> [[REG82]], <4 x float>* [[REG83:[0-9a-zA-Z_%.]+]], align 16
213 // CHECK-NEXT: [[REG84:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG81]], align 16
214 // CHECK-NEXT: [[REG85:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG83]], align 16
215 // CHECK-NEXT: call <4 x i32> @vec_cmple(float vector[4], float vector[4])(<4 x float> [[REG84]], <4 x float> [[REG85]])
216 // CHECK: [[REG86:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
217 // CHECK-NEXT: ret <4 x float> [[REG86]]
218 
219 // CHECK: define available_externally <4 x float> @_mm_cmplt_ps
220 // CHECK: [[REG87:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmplt(float vector[4], float vector[4])
221 // CHECK-NEXT: [[REG88:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG87]] to <4 x float>
222 // CHECK-NEXT: ret <4 x float> [[REG88]]
223 
224 // CHECK: @_mm_cmplt_ss
225 // CHECK: [[REG89:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, i32 zeroext 0)
226 // CHECK-NEXT: store <4 x float> [[REG89]], <4 x float>* [[REG90:[0-9a-zA-Z_%.]+]], align 16
227 // CHECK: [[REG91:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, i32 zeroext 0)
228 // CHECK-NEXT: store <4 x float> [[REG91]], <4 x float>* [[REG92:[0-9a-zA-Z_%.]+]], align 16
229 // CHECK-NEXT: [[REG93:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG90]], align 16
230 // CHECK-NEXT: [[REG94:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG92]], align 16
231 // CHECK-NEXT: call <4 x i32> @vec_cmplt(float vector[4], float vector[4])(<4 x float> [[REG93]], <4 x float> [[REG94]])
232 // CHECK: [[REG95:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
233 // CHECK-NEXT: ret <4 x float> [[REG95]]
234 
235 // CHECK: define available_externally <4 x float> @_mm_cmpneq_ps
236 // CHECK: [[REG96:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpeq(float vector[4], float vector[4])
237 // CHECK-NEXT: [[REG97:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG96]] to <4 x float>
238 // CHECK-NEXT: store <4 x float> [[REG97]], <4 x float>* [[REG98:[0-9a-zA-Z_%.]+]], align 16
239 // CHECK-NEXT: [[REG99:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG98]], align 16
240 // CHECK-NEXT: [[REG100:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG98]], align 16
241 // CHECK-NEXT: [[REG101:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_nor(float vector[4], float vector[4])(<4 x float> [[REG99]], <4 x float> [[REG100]])
242 // CHECK-NEXT: ret <4 x float> [[REG101]]
243 
244 // CHECK: define available_externally <4 x float> @_mm_cmpneq_ss
245 // CHECK: [[REG102:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
246 // CHECK-NEXT: store <4 x float> [[REG102]], <4 x float>* [[REG103:[0-9a-zA-Z_%.]+]], align 16
247 // CHECK: [[REG104:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
248 // CHECK-NEXT: store <4 x float> [[REG104]], <4 x float>* [[REG105:[0-9a-zA-Z_%.]+]], align 16
249 // CHECK-NEXT: [[REG106:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG103]], align 16
250 // CHECK-NEXT: [[REG107:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG105]], align 16
251 // CHECK-NEXT: call <4 x i32> @vec_cmpeq(float vector[4], float vector[4])(<4 x float> [[REG106]], <4 x float> [[REG107]])
252 // CHECK: call <4 x float> @vec_nor(float vector[4], float vector[4])
253 // CHECK: [[REG108:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
254 // CHECK-NEXT: ret <4 x float> [[REG108]]
255 
256 // CHECK: define available_externally <4 x float> @_mm_cmpnge_ps
257 // CHECK: [[REG109:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmplt(float vector[4], float vector[4])
258 // CHECK-NEXT: [[REG110:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG109]] to <4 x float>
259 // CHECK-NEXT: ret <4 x float> [[REG110]]
260 
261 // CHECK: define available_externally <4 x float> @_mm_cmpnge_ss
262 // CHECK: [[REG111:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
263 // CHECK-NEXT: store <4 x float> [[REG111]], <4 x float>* [[REG112:[0-9a-zA-Z_%.]+]], align 16
264 // CHECK: [[REG113:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
265 // CHECK-NEXT: store <4 x float> [[REG113]], <4 x float>* [[REG114:[0-9a-zA-Z_%.]+]], align 16
266 // CHECK-NEXT: [[REG115:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG112]], align 16
267 // CHECK-NEXT: [[REG116:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG114]], align 16
268 // CHECK-NEXT: call <4 x i32> @vec_cmplt(float vector[4], float vector[4])(<4 x float> [[REG115]], <4 x float> [[REG116]])
269 // CHECK: [[REG117:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
270 // CHECK-NEXT: ret <4 x float> [[REG117]]
271 
272 // CHECK: define available_externally <4 x float> @_mm_cmpngt_ps
273 // CHECK: [[REG118:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmple(float vector[4], float vector[4])
274 // CHECK-NEXT: [[REG119:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG118]] to <4 x float>
275 // CHECK-NEXT: ret <4 x float> [[REG119]]
276 
277 // CHECK: define available_externally <4 x float> @_mm_cmpngt_ss
278 // CHECK: [[REG120:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
279 // CHECK-NEXT: store <4 x float> [[REG120]], <4 x float>* [[REG121:[0-9a-zA-Z_%.]+]], align 16
280 // CHECK: [[REG122:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
281 // CHECK-NEXT: store <4 x float> [[REG122]], <4 x float>* [[REG123:[0-9a-zA-Z_%.]+]], align 16
282 // CHECK-NEXT: [[REG124:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG121]], align 16
283 // CHECK-NEXT: [[REG125:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG123]], align 16
284 // CHECK-NEXT: call <4 x i32> @vec_cmple(float vector[4], float vector[4])(<4 x float> [[REG124]], <4 x float> [[REG125]])
285 // CHECK: [[REG126:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
286 // CHECK-NEXT: ret <4 x float> [[REG126]]
287 
288 // CHECK: define available_externally <4 x float> @_mm_cmpnle_ps
289 // CHECK: [[REG127:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])
290 // CHECK-NEXT: [[REG128:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG127]] to <4 x float>
291 // CHECK-NEXT: ret <4 x float> [[REG128]]
292 
293 // CHECK: define available_externally <4 x float> @_mm_cmpnle_ss
294 // CHECK: [[REG129:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
295 // CHECK-NEXT: store <4 x float> [[REG129]], <4 x float>* [[REG130:[0-9a-zA-Z_%.]+]], align 16
296 // CHECK: [[REG131:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
297 // CHECK-NEXT: store <4 x float> [[REG131]], <4 x float>* [[REG132:[0-9a-zA-Z_%.]+]], align 16
298 // CHECK-NEXT: [[REG133:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG130]], align 16
299 // CHECK-NEXT: [[REG134:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG132]], align 16
300 // CHECK-NEXT: call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])(<4 x float> [[REG133]], <4 x float> [[REG134]])
301 // CHECK: [[REG135:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
302 // CHECK-NEXT: ret <4 x float> [[REG135]]
303 
304 // CHECK: define available_externally <4 x float> @_mm_cmpnlt_ps
305 // CHECK: [[REG136:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpge(float vector[4], float vector[4])
306 // CHECK-NEXT: [[REG137:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG136]] to <4 x float>
307 // CHECK-NEXT: ret <4 x float> [[REG137]]
308 
309 // CHECK: define available_externally <4 x float> @_mm_cmpnlt_ss
310 // CHECK: [[REG138:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
311 // CHECK-NEXT: store <4 x float> [[REG138]], <4 x float>* [[REG139:[0-9a-zA-Z_%.]+]], align 16
312 // CHECK: [[REG140:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
313 // CHECK-NEXT: store <4 x float> [[REG140]], <4 x float>* [[REG141:[0-9a-zA-Z_%.]+]], align 16
314 // CHECK-NEXT: [[REG142:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG139]], align 16
315 // CHECK-NEXT: [[REG143:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG141]], align 16
316 // CHECK-NEXT: call <4 x i32> @vec_cmpge(float vector[4], float vector[4])(<4 x float> [[REG142]], <4 x float> [[REG143]])
317 // CHECK: [[REG144:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
318 // CHECK-NEXT: ret <4 x float> [[REG144]]
319 
320 // CHECK: define available_externally <4 x float> @_mm_cmpord_ps
321 // CHECK: [[REG145:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}})
322 // CHECK-NEXT: [[REG146:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG145]] to <4 x i32>
323 // CHECK-NEXT: store <4 x i32> [[REG146]], <4 x i32>* [[REG147:[0-9a-zA-Z_%.]+]], align 16
324 // CHECK: [[REG148:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}})
325 // CHECK-NEXT: [[REG149:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG148]] to <4 x i32>
326 // CHECK-NEXT: store <4 x i32> [[REG149]], <4 x i32>* [[REG150:[0-9a-zA-Z_%.]+]], align 16
327 // CHECK-NEXT: [[REG151:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG147]], align 16
328 // CHECK-NEXT: [[REG152:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> [[REG151]])
329 // CHECK-NEXT: store <4 x i32> [[REG152]], <4 x i32>* [[REG153:[0-9a-zA-Z_%.]+]], align 16
330 // CHECK-NEXT: [[REG154:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG150]], align 16
331 // CHECK-NEXT: [[REG155:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> [[REG154]])
332 // CHECK-NEXT: store <4 x i32> [[REG155]], <4 x i32>* [[REG156:[0-9a-zA-Z_%.]+]], align 16
333 // CHECK-NEXT: [[REG157:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG153]], align 16
334 // CHECK-NEXT: [[REG158:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG156]], align 16
335 // CHECK-NEXT: [[REG159:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_and(unsigned int vector[4], unsigned int vector[4])(<4 x i32> {{[0-9a-zA-Z_%.]+}}, <4 x i32> {{[0-9a-zA-Z_%.]+}})
336 // CHECK-NEXT: [[REG160:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG159]] to <4 x float>
337 // CHECK-NEXT: ret <4 x float> [[REG160]]
338 
339 // CHECK: define available_externally <4 x float> @_mm_cmpord_ss
340 // CHECK: [[REG161:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])
341 // CHECK-NEXT: [[REG162:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG161]] to <4 x i32>
342 // CHECK-NEXT: store <4 x i32> [[REG162]], <4 x i32>* [[REG163:[0-9a-zA-Z_%.]+]], align 16
343 // CHECK: [[REG164:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])
344 // CHECK-NEXT: [[REG165:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG164]] to <4 x i32>
345 // CHECK-NEXT: store <4 x i32> [[REG165]], <4 x i32>* [[REG166:[0-9a-zA-Z_%.]+]], align 16
346 // CHECK-NEXT: [[REG167:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG163]], align 16
347 // CHECK-NEXT: [[REG168:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> [[REG167]])
348 // CHECK-NEXT: store <4 x i32> [[REG168]], <4 x i32>* [[REG161:[0-9a-zA-Z_%.]+]], align 16
349 // CHECK-NEXT: [[REG169:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG166]], align 16
350 // CHECK-NEXT: [[REG170:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> [[REG169]])
351 // CHECK-NEXT: store <4 x i32> [[REG170]], <4 x i32>* [[REG171:[0-9a-zA-Z_%.]+]], align 16
352 // CHECK-NEXT: [[REG172:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG161]], align 16
353 // CHECK-NEXT: [[REG173:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG171]], align 16
354 // CHECK-NEXT: [[REG174:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_and(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG172]], <4 x i32> [[REG173]])
355 // CHECK-NEXT: store <4 x i32> [[REG174]], <4 x i32>* [[REG161]], align 16
356 // CHECK: [[REG175:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG161]], align 16
357 // CHECK-NEXT: [[REG176:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG175]] to <4 x float>
358 // CHECK-NEXT: [[REG177:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> [[REG176]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
359 // CHECK-NEXT: ret <4 x float> [[REG177]]
360 
361 // CHECK: define available_externally <4 x float> @_mm_cmpunord_ps
362 // CHECK: [[REG178:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
363 // CHECK-NEXT: [[REG179:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])(<4 x float> [[REG178]])
364 // CHECK-NEXT: [[REG180:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG179]] to <4 x i32>
365 // CHECK-NEXT: store <4 x i32> [[REG180]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
366 // CHECK-NEXT: [[REG181:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
367 // CHECK-NEXT: [[REG182:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])(<4 x float> [[REG181]])
368 // CHECK-NEXT: [[REG183:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG182]] to <4 x i32>
369 // CHECK-NEXT: store <4 x i32> [[REG183]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
370 // CHECK-NEXT: [[REG184:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
371 // CHECK-NEXT: [[REG185:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG184]], <4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
372 // CHECK-NEXT: store <4 x i32> [[REG185]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
373 // CHECK-NEXT: [[REG186:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
374 // CHECK-NEXT: [[REG187:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG186]], <4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
375 // CHECK-NEXT: store <4 x i32> [[REG187]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
376 // CHECK-NEXT: [[REG188:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
377 // CHECK-NEXT: [[REG189:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
378 // CHECK-NEXT: [[REG190:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_or(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG188]], <4 x i32> [[REG189]])
379 // CHECK-NEXT: [[REG191:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG190]] to <4 x float>
380 // CHECK-NEXT: ret <4 x float> [[REG191]]
381 
382 // CHECK: define available_externally <4 x float> @_mm_cmpunord_ss
383 // CHECK: [[REG192:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
384 // CHECK-NEXT: [[REG193:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])(<4 x float> [[REG192]])
385 // CHECK-NEXT: [[REG194:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG193]] to <4 x i32>
386 // CHECK-NEXT: store <4 x i32> [[REG194]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
387 // CHECK-NEXT: [[REG195:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
388 // CHECK-NEXT: [[REG196:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_abs(float vector[4])(<4 x float> [[REG195]])
389 // CHECK-NEXT: [[REG197:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG196]] to <4 x i32>
390 // CHECK-NEXT: store <4 x i32> [[REG197]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
391 // CHECK-NEXT: [[REG198:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
392 // CHECK-NEXT: [[REG199:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG198]], <4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
393 // CHECK-NEXT: store <4 x i32> [[REG199]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
394 // CHECK-NEXT: [[REG200:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
395 // CHECK-NEXT: [[REG201:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG200]], <4 x i32> <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
396 // CHECK-NEXT: store <4 x i32> [[REG201]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
397 // CHECK-NEXT: [[REG202:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
398 // CHECK-NEXT: [[REG203:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
399 // CHECK-NEXT: [[REG204:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_or(unsigned int vector[4], unsigned int vector[4])(<4 x i32> [[REG202]], <4 x i32> [[REG203]])
400 // CHECK-NEXT: store <4 x i32> [[REG204]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
401 // CHECK-NEXT: [[REG205:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
402 // CHECK-NEXT: [[REG206:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
403 // CHECK-NEXT: [[REG207:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG206]] to <4 x float>
404 // CHECK-NEXT: [[REG208:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG205]], <4 x float> [[REG207]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
405 // CHECK-NEXT: ret <4 x float> [[REG208]]
406 
407 void __attribute__((noinline))
test_comi()408 test_comi() {
409   i = _mm_comieq_ss(m1, m2);
410   i = _mm_comige_ss(m1, m2);
411   i = _mm_comigt_ss(m1, m2);
412   i = _mm_comile_ss(m1, m2);
413   i = _mm_comilt_ss(m1, m2);
414   i = _mm_comineq_ss(m1, m2);
415 }
416 
417 // CHECK-LABEL: @test_comi
418 
419 // CHECK: define available_externally signext i32 @_mm_comieq_ss
420 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
421 // CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
422 // CHECK-NEXT: [[REG209:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
423 // CHECK-NEXT: [[REG210:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG209]], i32 0
424 // CHECK-NEXT: [[REG211:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
425 // CHECK-NEXT: [[REG212:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG211]], i32 0
426 // CHECK-NEXT: [[REG213:[0-9a-zA-Z_%.]+]] = fcmp oeq float [[REG210]], [[REG212]]
427 // CHECK-NEXT: [[REG214:[0-9a-zA-Z_%.]+]] = zext i1 [[REG213]] to i32
428 // CHECK-NEXT: ret i32 [[REG214]]
429 
430 // CHECK: define available_externally signext i32 @_mm_comige_ss
431 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
432 // CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
433 // CHECK-NEXT: [[REG215:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
434 // CHECK-NEXT: [[REG216:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG215]], i32 0
435 // CHECK-NEXT: [[REG217:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
436 // CHECK-NEXT: [[REG218:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG217]], i32 0
437 // CHECK-NEXT: [[REG219:[0-9a-zA-Z_%.]+]] = fcmp oge float [[REG216]], [[REG218]]
438 // CHECK-NEXT: [[REG220:[0-9a-zA-Z_%.]+]] = zext i1 [[REG219]] to i32
439 // CHECK-NEXT: ret i32 [[REG220]]
440 
441 // CHECK: define available_externally signext i32 @_mm_comigt_ss
442 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
443 // CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
444 // CHECK-NEXT: [[REG221:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
445 // CHECK-NEXT: [[REG222:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG221]], i32 0
446 // CHECK-NEXT: [[REG223:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
447 // CHECK-NEXT: [[REG224:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG223]], i32 0
448 // CHECK-NEXT: [[REG225:[0-9a-zA-Z_%.]+]] = fcmp ogt float [[REG222]], [[REG224]]
449 // CHECK-NEXT: [[REG226:[0-9a-zA-Z_%.]+]] = zext i1 [[REG225]] to i32
450 // CHECK-NEXT: ret i32 [[REG226]]
451 
452 // CHECK: define available_externally signext i32 @_mm_comile_ss
453 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
454 // CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
455 // CHECK-NEXT: [[REG227:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
456 // CHECK-NEXT: [[REG228:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG227]], i32 0
457 // CHECK-NEXT: [[REG229:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
458 // CHECK-NEXT: [[REG230:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG229]], i32 0
459 // CHECK-NEXT: [[REG231:[0-9a-zA-Z_%.]+]] = fcmp ole float [[REG228]], [[REG230]]
460 // CHECK-NEXT: [[REG232:[0-9a-zA-Z_%.]+]] = zext i1 [[REG231]] to i32
461 // CHECK-NEXT: ret i32 [[REG232]]
462 
463 // CHECK: define available_externally signext i32 @_mm_comilt_ss
464 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
465 // CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
466 // CHECK-NEXT: [[REG233:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
467 // CHECK-NEXT: [[REG234:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG233]], i32 0
468 // CHECK-NEXT: [[REG235:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
469 // CHECK-NEXT: [[REG236:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG235]], i32 0
470 // CHECK-NEXT: [[REG237:[0-9a-zA-Z_%.]+]] = fcmp olt float [[REG234]], [[REG236]]
471 // CHECK-NEXT: [[REG238:[0-9a-zA-Z_%.]+]] = zext i1 [[REG237]] to i32
472 // CHECK-NEXT: ret i32 [[REG238]]
473 
474 // CHECK: define available_externally signext i32 @_mm_comineq_ss
475 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
476 // CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
477 // CHECK-NEXT: [[REG239:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
478 // CHECK-NEXT: [[REG240:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG239]], i32 0
479 // CHECK-NEXT: [[REG241:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
480 // CHECK-NEXT: [[REG242:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG241]], i32 0
481 // CHECK-NEXT: [[REG243:[0-9a-zA-Z_%.]+]] = fcmp une float [[REG240]], [[REG242]]
482 // CHECK-NEXT: [[REG244:[0-9a-zA-Z_%.]+]] = zext i1 [[REG243]] to i32
483 // CHECK-NEXT: ret i32 [[REG244]]
484 
485 void __attribute__((noinline))
test_convert()486 test_convert() {
487   res = _mm_cvt_pi2ps(m1, ms[1]);
488   res64 = _mm_cvt_ps2pi(m1);
489   res = _mm_cvt_si2ss(m1, i);
490   i = _mm_cvt_ss2si(m1);
491   res = _mm_cvtpi16_ps(ms[0]);
492   res = _mm_cvtpi32_ps(m1, ms[1]);
493   res = _mm_cvtpi32x2_ps(ms[0], ms[1]);
494   res = _mm_cvtpi8_ps(ms[0]);
495   res64 = _mm_cvtps_pi16(m1);
496   res64 = _mm_cvtps_pi32(m1);
497   res64 = _mm_cvtps_pi8(m1);
498   res = _mm_cvtpu16_ps(ms[0]);
499   res = _mm_cvtpu8_ps(ms[0]);
500   res = _mm_cvtsi32_ss(m1, i);
501   res = _mm_cvtsi64_ss(m1, i64);
502   fs[0] = _mm_cvtss_f32(m1);
503   i = _mm_cvtss_si32(m1);
504   i64 = _mm_cvtss_si64(m1);
505   res64 = _mm_cvtt_ps2pi(m1);
506   i = _mm_cvtt_ss2si(m1);
507   res64 = _mm_cvttps_pi32(m1);
508   i = _mm_cvttss_si32(m1);
509   i64 = _mm_cvttss_si64(m1);
510 }
511 
512 // CHECK-LABEL: @test_convert
513 
514 // CHECK: define available_externally <4 x float> @_mm_cvt_pi2ps
515 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
516 // CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
517 // CHECK-NEXT: [[REG245:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
518 // CHECK-NEXT: [[REG246:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
519 // CHECK-NEXT: [[REG247:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_cvtpi32_ps(<4 x float> [[REG245]], i64 [[REG246]])
520 // CHECK-NEXT: ret <4 x float> [[REG247]]
521 
522 // CHECK: define available_externally i64 @_mm_cvt_ps2pi
523 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
524 // CHECK-NEXT: [[REG248:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
525 // CHECK-NEXT: [[REG249:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cvtps_pi32(<4 x float> [[REG248]])
526 // CHECK-NEXT: ret i64 [[REG249]]
527 
528 // CHECK: define available_externally <4 x float> @_mm_cvt_si2ss
529 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
530 // CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}, align 4
531 // CHECK-NEXT: [[REG250:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
532 // CHECK-NEXT: [[REG251:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
533 // CHECK-NEXT: [[REG252:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_cvtsi32_ss(<4 x float> [[REG250]], i32 signext [[REG251]])
534 // CHECK-NEXT: ret <4 x float> [[REG252]]
535 
536 // CHECK: define available_externally signext i32 @_mm_cvt_ss2si
537 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
538 // CHECK-NEXT: [[REG253:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
539 // CHECK-NEXT: [[REG254:[0-9a-zA-Z_%.]+]] = call signext i32 @_mm_cvtss_si32(<4 x float> [[REG253]])
540 // CHECK-NEXT: ret i32 [[REG254]]
541 
542 // CHECK: define available_externally <4 x float> @_mm_cvtpi16_ps
543 // CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
544 // CHECK-NEXT: [[REG255:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
545 // CHECK-NEXT: [[REG256:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG255]], i32 0
546 // CHECK-NEXT: [[REG257:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
547 // CHECK-NEXT: [[REG258:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG256]], i64 [[REG257]], i32 1
548 // CHECK-NEXT: store <2 x i64> [[REG258]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
549 // CHECK-NEXT: [[REG259:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
550 // CHECK-NEXT: [[REG260:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG259]] to <8 x i16>
551 // CHECK-NEXT: store <8 x i16> [[REG260]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
552 // CHECK-NEXT: [[REG261:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
553 // CHECK-NEXT: [[REG262:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vupklsh(short vector[8])(<8 x i16> [[REG261]])
554 // CHECK-NEXT: store <4 x i32> [[REG262]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
555 // CHECK-NEXT: [[REG263:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
556 // CHECK-NEXT: [[REG264:[0-9a-zA-Z_%.]+]] = call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> [[REG263]], i32 0)
557 // CHECK-NEXT: store <4 x float> [[REG264]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
558 // CHECK-NEXT: [[REG265:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
559 // CHECK-NEXT: ret <4 x float> [[REG265]]
560 
561 // CHECK: define available_externally <4 x float> @_mm_cvtpi32_ps
562 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
563 // CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
564 // CHECK-NEXT: [[REG266:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
565 // CHECK-NEXT: [[REG267:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG266]], i32 0
566 // CHECK-NEXT: [[REG268:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
567 // CHECK-NEXT: [[REG269:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG267]], i64 [[REG268]], i32 1
568 // CHECK-NEXT: store <2 x i64> [[REG269]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
569 // CHECK-NEXT: [[REG270:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
570 // CHECK-NEXT: [[REG271:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG270]] to <4 x i32>
571 // CHECK-NEXT: store <4 x i32> [[REG271]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
572 // CHECK-NEXT: [[REG272:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
573 // CHECK-NEXT: [[REG273:[0-9a-zA-Z_%.]+]] = call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> [[REG272]], i32 0)
574 // CHECK-NEXT: store <4 x float> [[REG273]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
575 // CHECK-NEXT: [[REG274:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
576 // CHECK-NEXT: [[REG275:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG274]] to <2 x i64>
577 // CHECK-NEXT: [[REG276:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG275]], i32 0
578 // CHECK-NEXT: [[REG277:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG276]], i32 0
579 // CHECK-NEXT: [[REG278:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
580 // CHECK-NEXT: [[REG279:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG278]] to <2 x i64>
581 // CHECK-NEXT: [[REG280:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG279]], i32 1
582 // CHECK-NEXT: [[REG281:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG277]], i64 [[REG280]], i32 1
583 // CHECK-NEXT: store <2 x i64> [[REG281]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
584 // CHECK-NEXT: [[REG282:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
585 // CHECK-NEXT: [[REG283:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG282]] to <4 x float>
586 // CHECK-NEXT: ret <4 x float> [[REG283]]
587 
588 // CHECK: define available_externally <4 x float> @_mm_cvtpi32x2_ps
589 // CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
590 // CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
591 // CHECK-NEXT: [[REG284:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
592 // CHECK-NEXT: [[REG285:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG284]], i32 0
593 // CHECK-NEXT: [[REG286:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
594 // CHECK-NEXT: [[REG287:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG285]], i64 [[REG286]], i32 1
595 // CHECK-NEXT: store <2 x i64> [[REG287]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
596 // CHECK-NEXT: [[REG288:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
597 // CHECK-NEXT: [[REG289:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG288]] to <4 x i32>
598 // CHECK-NEXT: store <4 x i32> [[REG289]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
599 // CHECK-NEXT: [[REG290:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
600 // CHECK-NEXT: [[REG291:[0-9a-zA-Z_%.]+]] = call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> [[REG290]], i32 0)
601 // CHECK-NEXT: store <4 x float> [[REG291]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
602 // CHECK-NEXT: [[REG292:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
603 // CHECK-NEXT: ret <4 x float> [[REG292]]
604 
605 // CHECK: define available_externally <4 x float> @_mm_cvtpi8_ps
606 // CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
607 // CHECK-NEXT: [[REG293:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
608 // CHECK-NEXT: [[REG294:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG293]], i32 0
609 // CHECK-NEXT: [[REG295:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
610 // CHECK-NEXT: [[REG296:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG294]], i64 [[REG295]], i32 1
611 // CHECK-NEXT: store <2 x i64> [[REG296]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
612 // CHECK-NEXT: [[REG297:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
613 // CHECK-NEXT: [[REG298:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG297]] to <16 x i8>
614 // CHECK-NEXT: store <16 x i8> [[REG298]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
615 // CHECK-NEXT: [[REG299:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
616 // CHECK-NEXT: [[REG300:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_vupkhsb(signed char vector[16])(<16 x i8> [[REG299]])
617 // CHECK-NEXT: store <8 x i16> [[REG300]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
618 // CHECK-NEXT: [[REG301:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
619 // CHECK-NEXT: [[REG302:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vupkhsh(short vector[8])(<8 x i16> [[REG301]])
620 // CHECK-NEXT: store <4 x i32> [[REG302]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
621 // CHECK-NEXT: [[REG303:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
622 // CHECK-NEXT: [[REG304:[0-9a-zA-Z_%.]+]] = call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> [[REG303]], i32 0)
623 // CHECK-NEXT: store <4 x float> [[REG304]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
624 // CHECK-NEXT: [[REG305:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
625 // CHECK-NEXT: ret <4 x float> [[REG305]]
626 
627 // CHECK: define available_externally i64 @_mm_cvtps_pi16
628 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
629 // CHECK-NEXT: [[REG306:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
630 // CHECK-NEXT: [[REG307:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_rint(float vector[4])(<4 x float> [[REG306]])
631 // CHECK-NEXT: store <4 x float> [[REG307]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
632 // CHECK-NEXT: [[REG308:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
633 // CHECK-NEXT: [[REG309:[0-9a-zA-Z_%.]+]] = call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> [[REG308]], i32 0)
634 // CHECK-NEXT: store <4 x i32> [[REG309]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
635 // CHECK-NEXT: [[REG310:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
636 // CHECK-NEXT: [[REG311:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
637 // CHECK-NEXT: [[REG312:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_pack(int vector[4], int vector[4])(<4 x i32> [[REG310]], <4 x i32> [[REG311]])
638 // CHECK-NEXT: [[REG313:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG312]] to <2 x i64>
639 // CHECK-NEXT: store <2 x i64> [[REG313]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
640 // CHECK-NEXT: [[REG314:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
641 // CHECK-NEXT: [[REG315:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG314]], i32 0
642 // CHECK-NEXT: ret i64 [[REG315]]
643 
644 // CHECK: define available_externally i64 @_mm_cvtps_pi32
645 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
646 // CHECK-NEXT: [[REG316:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
647 // CHECK-NEXT: [[REG317:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG316]] to <2 x i64>
648 // CHECK-NEXT: [[REG318:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splat(long long vector[2], unsigned int)(<2 x i64> [[REG317]], i32 zeroext 0)
649 // CHECK-NEXT: [[REG319:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG318]] to <4 x float>
650 // CHECK-NEXT: store <4 x float> [[REG319]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
651 // CHECK-NEXT: [[REG320:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
652 // CHECK-NEXT: [[REG321:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_rint(float vector[4])(<4 x float> [[REG320]])
653 // CHECK-NEXT: store <4 x float> [[REG321]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
654 // CHECK-NEXT: [[REG322:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
655 // CHECK-NEXT: [[REG323:[0-9a-zA-Z_%.]+]] = call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> [[REG322]], i32 0)
656 // CHECK-NEXT: [[REG324:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG323]] to <2 x i64>
657 // CHECK-NEXT: store <2 x i64> [[REG324]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
658 // CHECK-NEXT: [[REG325:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
659 // CHECK-NEXT: [[REG326:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG325]], i32 0
660 // CHECK-NEXT: ret i64 [[REG326]]
661 
662 // CHECK: define available_externally i64 @_mm_cvtps_pi8
663 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
664 // CHECK-NEXT: [[REG327:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
665 // CHECK-NEXT: [[REG328:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_rint(float vector[4])(<4 x float> [[REG327]])
666 // CHECK-NEXT: store <4 x float> [[REG328]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
667 // CHECK-NEXT: [[REG329:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
668 // CHECK-NEXT: [[REG330:[0-9a-zA-Z_%.]+]] = call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> [[REG329]], i32 0)
669 // CHECK-NEXT: store <4 x i32> [[REG330]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
670 // CHECK-NEXT: [[REG331:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
671 // CHECK-NEXT: [[REG332:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_pack(int vector[4], int vector[4])(<4 x i32> [[REG331]], <4 x i32> zeroinitializer)
672 // CHECK-NEXT: store <8 x i16> [[REG332]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
673 // CHECK-NEXT: [[REG333:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
674 // CHECK-NEXT: [[REG334:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
675 // CHECK-NEXT: [[REG335:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_pack(short vector[8], short vector[8])(<8 x i16> [[REG333]], <8 x i16> [[REG334]])
676 // CHECK-NEXT: store <16 x i8> [[REG335]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
677 // CHECK-NEXT: [[REG336:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
678 // CHECK-NEXT: [[REG337:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG336]] to <2 x i64>
679 // CHECK-NEXT: [[REG338:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG337]], i32 0
680 // CHECK-NEXT: ret i64 [[REG338]]
681 
682 // CHECK: define available_externally <4 x float> @_mm_cvtpu16_ps
683 // CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
684 // CHECK-NEXT: store <8 x i16> zeroinitializer, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
685 // CHECK-NEXT: [[REG339:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
686 // CHECK-NEXT: [[REG340:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG339]], i32 0
687 // CHECK-NEXT: [[REG341:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
688 // CHECK-NEXT: [[REG342:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG340]], i64 [[REG341]], i32 1
689 // CHECK-NEXT: store <2 x i64> [[REG342]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
690 // CHECK-NEXT: [[REG343:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
691 // CHECK-NEXT: [[REG344:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG343]] to <8 x i16>
692 // CHECK-NEXT: store <8 x i16> [[REG344]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
693 // CHECK-NEXT: [[REG345:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
694 // CHECK-LE-NEXT: [[REG346:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_mergel(unsigned short vector[8], unsigned short vector[8])(<8 x i16> [[REG345]], <8 x i16> zeroinitializer)
695 // CHECK-BE-NEXT: [[REG346:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_mergel(unsigned short vector[8], unsigned short vector[8])(<8 x i16> zeroinitializer, <8 x i16> [[REG345]])
696 // CHECK-NEXT: [[REG347:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG346]] to <4 x i32>
697 // CHECK-NEXT: store <4 x i32> [[REG347]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
698 // CHECK-NEXT: [[REG348:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
699 // CHECK-NEXT: [[REG349:[0-9a-zA-Z_%.]+]] = call <4 x float> @llvm.ppc.altivec.vcfux(<4 x i32> [[REG348]], i32 0)
700 // CHECK-NEXT: store <4 x float> [[REG349]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
701 // CHECK-NEXT: [[REG350:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
702 // CHECK-NEXT: ret <4 x float> [[REG350]]
703 
704 // CHECK: define available_externally <4 x float> @_mm_cvtpu8_ps
705 // CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
706 // CHECK-NEXT: store <16 x i8> zeroinitializer, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
707 // CHECK-NEXT: [[REG351:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
708 // CHECK-NEXT: [[REG352:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> undef, i64 [[REG351]], i32 0
709 // CHECK-NEXT: [[REG353:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
710 // CHECK-NEXT: [[REG354:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG352]], i64 [[REG353]], i32 1
711 // CHECK-NEXT: store <2 x i64> [[REG354]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
712 // CHECK-NEXT: [[REG355:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
713 // CHECK-NEXT: [[REG356:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG355]] to <16 x i8>
714 // CHECK-NEXT: store <16 x i8> [[REG356]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
715 // CHECK-NEXT: [[REG357:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
716 // CHECK-LE-NEXT: [[REG358:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_mergel(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG357]], <16 x i8> zeroinitializer)
717 // CHECK-BE-NEXT: [[REG358:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_mergel(unsigned char vector[16], unsigned char vector[16])(<16 x i8> zeroinitializer, <16 x i8> [[REG357]])
718 // CHECK-NEXT: [[REG359:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG358]] to <8 x i16>
719 // CHECK-NEXT: store <8 x i16> [[REG359]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
720 // CHECK-NEXT: [[REG360:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
721 // CHECK-LE-NEXT: [[REG361:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_mergeh(unsigned short vector[8], unsigned short vector[8])(<8 x i16> [[REG360]], <8 x i16> zeroinitializer)
722 // CHECK-BE-NEXT: [[REG361:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_mergeh(unsigned short vector[8], unsigned short vector[8])(<8 x i16> zeroinitializer, <8 x i16> [[REG360]])
723 // CHECK-NEXT: [[REG362:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG361]] to <4 x i32>
724 // CHECK-NEXT: store <4 x i32> [[REG362]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
725 // CHECK-NEXT: [[REG363:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
726 // CHECK-NEXT: [[REG364:[0-9a-zA-Z_%.]+]] = call <4 x float> @llvm.ppc.altivec.vcfux(<4 x i32> [[REG363]], i32 0)
727 // CHECK-NEXT: store <4 x float> [[REG364]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
728 // CHECK-NEXT: [[REG365:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
729 // CHECK-NEXT: ret <4 x float> [[REG365]]
730 
731 // CHECK: define available_externally <4 x float> @_mm_cvtsi32_ss
732 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
733 // CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}, align 4
734 // CHECK-NEXT: [[REG366:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
735 // CHECK-NEXT: [[REG367:[0-9a-zA-Z_%.]+]] = sitofp i32 [[REG366]] to float
736 // CHECK-NEXT: store float [[REG367]], float* {{[0-9a-zA-Z_%.]+}}, align 4
737 // CHECK-NEXT: [[REG368:[0-9a-zA-Z_%.]+]] = load float, float* {{[0-9a-zA-Z_%.]+}}, align 4
738 // CHECK-NEXT: [[REG369:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
739 // CHECK-NEXT: [[REG370:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG369]], float [[REG368]], i32 0
740 // CHECK-NEXT: store <4 x float> [[REG370]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
741 // CHECK-NEXT: [[REG371:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
742 // CHECK-NEXT: ret <4 x float> [[REG371]]
743 
744 // CHECK: define available_externally <4 x float> @_mm_cvtsi64_ss
745 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
746 // CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
747 // CHECK-NEXT: [[REG372:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
748 // CHECK-NEXT: [[REG373:[0-9a-zA-Z_%.]+]] = sitofp i64 [[REG372]] to float
749 // CHECK-NEXT: store float [[REG373]], float* {{[0-9a-zA-Z_%.]+}}, align 4
750 // CHECK-NEXT: [[REG374:[0-9a-zA-Z_%.]+]] = load float, float* {{[0-9a-zA-Z_%.]+}}, align 4
751 // CHECK-NEXT: [[REG375:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
752 // CHECK-NEXT: [[REG376:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG375]], float [[REG374]], i32 0
753 // CHECK-NEXT: store <4 x float> [[REG376]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
754 // CHECK-NEXT: [[REG377:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
755 // CHECK-NEXT: ret <4 x float> [[REG377]]
756 
757 // CHECK: define available_externally float @_mm_cvtss_f32
758 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
759 // CHECK-NEXT: [[REG378:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
760 // CHECK-NEXT: [[REG379:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG378]], i32 0
761 // CHECK-NEXT: ret float [[REG379]]
762 
763 // CHECK: define available_externally signext i32 @_mm_cvtss_si32
764 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
765 // CHECK-NEXT: store i64 0, i64* {{[0-9a-zA-Z_%.]+}}, align 8
766 // CHECK-NEXT: [[REG380:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
767 // CHECK-LE-NEXT: [[REG381:[0-9a-zA-Z_%.]+]] = call { <4 x float>, i64, double } asm "xxsldwi ${0:x},${0:x},${0:x},3;\0Axscvspdp ${2:x},${0:x};\0Afctiw  $2,$2;\0Amfvsrd  $1,${2:x};\0A", "=^wa,=r,=f,0"(<4 x float> [[REG380]])
768 // CHECK-BE-NEXT: [[REG381:[0-9a-zA-Z_%.]+]] = call { <4 x float>, i64, double } asm "xscvspdp ${2:x},${0:x};\0Afctiw  $2,$2;\0Amfvsrd  $1,${2:x};\0A", "=^wa,=r,=f,0"(<4 x float> [[REG380]])
769 // CHECK-NEXT: [[REG382:[0-9a-zA-Z_%.]+]] = extractvalue { <4 x float>, i64, double } [[REG381]], 0
770 // CHECK-NEXT: [[REG383:[0-9a-zA-Z_%.]+]] = extractvalue { <4 x float>, i64, double } [[REG381]], 1
771 // CHECK-NEXT: [[REG384:[0-9a-zA-Z_%.]+]] = extractvalue { <4 x float>, i64, double } [[REG381]], 2
772 // CHECK-NEXT: store <4 x float> [[REG382]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
773 // CHECK-NEXT: store i64 [[REG383]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
774 // CHECK-NEXT: store double [[REG384]], double* {{[0-9a-zA-Z_%.]+}}, align 8
775 // CHECK-NEXT: [[REG385:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
776 // CHECK-NEXT: [[REG386:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG385]] to i32
777 // CHECK-NEXT: ret i32 [[REG386]]
778 
779 // CHECK: define available_externally i64 @_mm_cvtss_si64
780 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
781 // CHECK-NEXT: store i64 0, i64* {{[0-9a-zA-Z_%.]+}}, align 8
782 // CHECK-NEXT: [[REG387:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
783 // CHECK-LE-NEXT: [[REG388:[0-9a-zA-Z_%.]+]] = call { <4 x float>, i64, double } asm "xxsldwi ${0:x},${0:x},${0:x},3;\0Axscvspdp ${2:x},${0:x};\0Afctid  $2,$2;\0Amfvsrd  $1,${2:x};\0A", "=^wa,=r,=f,0"(<4 x float> [[REG387]])
784 // CHECK-BE-NEXT: [[REG388:[0-9a-zA-Z_%.]+]] = call { <4 x float>, i64, double } asm "xscvspdp ${2:x},${0:x};\0Afctid  $2,$2;\0Amfvsrd  $1,${2:x};\0A", "=^wa,=r,=f,0"(<4 x float> [[REG387]])
785 // CHECK-NEXT: [[REG389:[0-9a-zA-Z_%.]+]] = extractvalue { <4 x float>, i64, double } [[REG388]], 0
786 // CHECK-NEXT: [[REG390:[0-9a-zA-Z_%.]+]] = extractvalue { <4 x float>, i64, double } [[REG388]], 1
787 // CHECK-NEXT: [[REG391:[0-9a-zA-Z_%.]+]] = extractvalue { <4 x float>, i64, double } [[REG388]], 2
788 // CHECK-NEXT: store <4 x float> [[REG389]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
789 // CHECK-NEXT: store i64 [[REG390]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
790 // CHECK-NEXT: store double [[REG391]], double* {{[0-9a-zA-Z_%.]+}}, align 8
791 // CHECK-NEXT: [[REG392:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
792 // CHECK-NEXT: ret i64 [[REG392]]
793 
794 // CHECK: define available_externally i64 @_mm_cvtt_ps2pi
795 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
796 // CHECK: [[REG393:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
797 // CHECK-NEXT: [[REG394:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cvttps_pi32(<4 x float> [[REG393]])
798 // CHECK-NEXT: ret i64 [[REG394]]
799 
800 // CHECK: define available_externally signext i32 @_mm_cvtt_ss2si
801 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
802 // CHECK-NEXT: [[REG395:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
803 // CHECK-NEXT: [[REG396:[0-9a-zA-Z_%.]+]] = call signext i32 @_mm_cvttss_si32(<4 x float> [[REG395]])
804 // CHECK-NEXT: ret i32 [[REG396]]
805 
806 // CHECK: define available_externally i64 @_mm_cvttps_pi32
807 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
808 // CHECK-NEXT: [[REG397:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
809 // CHECK-NEXT: [[REG398:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG397]] to <2 x i64>
810 // CHECK-NEXT: [[REG399:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splat(long long vector[2], unsigned int)(<2 x i64> [[REG398]], i32 zeroext 0)
811 // CHECK-NEXT: [[REG400:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG399]] to <4 x float>
812 // CHECK-NEXT: store <4 x float> [[REG400]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
813 // CHECK-NEXT: [[REG401:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
814 // CHECK-NEXT: [[REG402:[0-9a-zA-Z_%.]+]] = call <4 x i32> @llvm.ppc.altivec.vctsxs(<4 x float> [[REG401]], i32 0)
815 // CHECK-NEXT: [[REG403:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG402]] to <2 x i64>
816 // CHECK-NEXT: store <2 x i64> [[REG403]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
817 // CHECK-NEXT: [[REG404:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
818 // CHECK-NEXT: [[REG405:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG404]], i32 0
819 // CHECK-NEXT: ret i64 [[REG405]]
820 
821 // CHECK: define available_externally signext i32 @_mm_cvttss_si32
822 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
823 // CHECK-NEXT: [[REG406:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
824 // CHECK-NEXT: [[REG407:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG406]], i32 0
825 // CHECK-NEXT: store float [[REG407]], float* {{[0-9a-zA-Z_%.]+}}, align 4
826 // CHECK-NEXT: [[REG408:[0-9a-zA-Z_%.]+]] = load float, float* {{[0-9a-zA-Z_%.]+}}, align 4
827 // CHECK-NEXT: [[REG409:[0-9a-zA-Z_%.]+]] = fptosi float [[REG408]] to i32
828 // CHECK-NEXT: ret i32 [[REG409]]
829 
830 // CHECK: define available_externally i64 @_mm_cvttss_si64
831 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
832 // CHECK-NEXT: [[REG410:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
833 // CHECK-NEXT: [[REG411:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG410]], i32 0
834 // CHECK-NEXT: store float [[REG411]], float* {{[0-9a-zA-Z_%.]+}}, align 4
835 // CHECK-NEXT: [[REG412:[0-9a-zA-Z_%.]+]] = load float, float* {{[0-9a-zA-Z_%.]+}}, align 4
836 // CHECK-NEXT: [[REG413:[0-9a-zA-Z_%.]+]] = fptosi float [[REG412]] to i64
837 // CHECK-NEXT: ret i64 [[REG413]]
838 
839 void __attribute__((noinline))
test_div()840 test_div() {
841   res = _mm_div_ps(m1, m2);
842   res = _mm_div_ss(m1, m2);
843 }
844 
845 // CHECK-LABEL: @test_div
846 
847 // CHECK: define available_externally <4 x float> @_mm_div_ps(<4 x float> [[REG414:[0-9a-zA-Z_%.]+]], <4 x float> [[REG415:[0-9a-zA-Z_%.]+]])
848 // CHECK: store <4 x float> [[REG414]], <4 x float>* [[REG416:[0-9a-zA-Z_%.]+]], align 16
849 // CHECK-NEXT: store <4 x float> [[REG415]], <4 x float>* [[REG417:[0-9a-zA-Z_%.]+]], align 16
850 // CHECK-NEXT: [[REG418:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG416]], align 16
851 // CHECK-NEXT: [[REG419:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG417]], align 16
852 // CHECK-NEXT: [[REG420:[0-9a-zA-Z_%.]+]] = fdiv <4 x float> [[REG418]], [[REG419]]
853 // CHECK-NEXT: ret <4 x float> [[REG420]]
854 
855 // CHECK: define available_externally <4 x float> @_mm_div_ss(<4 x float> [[REG421:[0-9a-zA-Z_%.]+]], <4 x float> [[REG422:[0-9a-zA-Z_%.]+]])
856 // CHECK: store <4 x float> [[REG421]], <4 x float>* [[REG423:[0-9a-zA-Z_%.]+]], align 16
857 // CHECK-NEXT: store <4 x float> [[REG422]], <4 x float>* [[REG424:[0-9a-zA-Z_%.]+]], align 16
858 // CHECK-NEXT: [[REG425:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG423]], align 16
859 // CHECK-NEXT: [[REG426:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG425]], i32 zeroext 0)
860 // CHECK-NEXT: store <4 x float> [[REG426]], <4 x float>* [[REG427:[0-9a-zA-Z_%.]+]], align 16
861 // CHECK-NEXT: [[REG428:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG424]], align 16
862 // CHECK-NEXT: [[REG429:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG428]], i32 zeroext 0)
863 // CHECK-NEXT: store <4 x float> [[REG429]], <4 x float>* [[REG430:[0-9a-zA-Z_%.]+]], align 16
864 // CHECK-NEXT: [[REG431:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG427]], align 16
865 // CHECK-NEXT: [[REG432:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG430]], align 16
866 // CHECK-NEXT: [[REG433:[0-9a-zA-Z_%.]+]] = fdiv <4 x float> [[REG431]], [[REG432]]
867 // CHECK-NEXT: store <4 x float> [[REG433]], <4 x float>* [[REG434:[0-9a-zA-Z_%.]+]], align 16
868 // CHECK-NEXT: [[REG435:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG423]], align 16
869 // CHECK-NEXT: [[REG436:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG434]], align 16
870 // CHECK-NEXT: [[REG437:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG435]], <4 x float> [[REG436]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
871 // CHECK-NEXT: ret <4 x float> [[REG437]]
872 
873 void __attribute__((noinline))
test_extract()874 test_extract() {
875   i = _mm_extract_pi16(ms[0], i2);
876   i = _m_pextrw(ms[0], i2);
877 }
878 
879 // CHECK-LABEL: @test_extract
880 
881 // CHECK: define available_externally signext i32 @_mm_extract_pi16
882 // CHECK: [[REG438:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
883 // CHECK-NEXT: [[REG439:[0-9a-zA-Z_%.]+]] = and i32 [[REG438]], 3
884 // CHECK-NEXT: store i32 [[REG439]], i32* {{[0-9a-zA-Z_%.]+}}, align 4
885 // CHECK-BE: sub i32 3, {{[0-9a-zA-Z_%.]+}}
886 // CHECK: [[REG440:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
887 // CHECK: [[REG441:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
888 // CHECK: [[REG442:[0-9a-zA-Z_%.]+]] = mul i32 [[REG441]], 16
889 // CHECK: [[REG443:[0-9a-zA-Z_%.]+]] = zext i32 [[REG442]] to i64
890 // CHECK-NEXT: [[REG444:[0-9a-zA-Z_%.]+]] = lshr i64 [[REG440]], [[REG443]]
891 // CHECK-NEXT: [[REG445:[0-9a-zA-Z_%.]+]] = and i64 [[REG444]], 65535
892 // CHECK-NEXT: [[REG446:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG445]] to i32
893 // CHECK-NEXT: ret i32 [[REG446]]
894 
895 // CHECK: define available_externally signext i32 @_m_pextrw
896 // CHECK: [[REG447:[0-9a-zA-Z_%.]+]] = call signext i32 @_mm_extract_pi16
897 // CHECK-NEXT: ret i32 [[REG447]]
898 
899 void __attribute__((noinline))
test_insert()900 test_insert() {
901   res64 = _mm_insert_pi16(ms[0], i, i2);
902   res64 = _m_pinsrw(ms[0], i, i2);
903 }
904 
905 // CHECK-LABEL: @test_insert
906 
907 // CHECK: define available_externally i64 @_mm_insert_pi16
908 // CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
909 // CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}, align 4
910 // CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}, align 4
911 // CHECK-NEXT: [[REG448:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
912 // CHECK-NEXT: [[REG449:[0-9a-zA-Z_%.]+]] = and i32 [[REG448]], 3
913 // CHECK-NEXT: [[REG450:[0-9a-zA-Z_%.]+]] = mul nsw i32 [[REG449]], 16
914 // CHECK-NEXT: store i32 [[REG450]], i32* {{[0-9a-zA-Z_%.]+}}, align 4
915 // CHECK-NEXT: [[REG451:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
916 // CHECK-NEXT: [[REG452:[0-9a-zA-Z_%.]+]] = sext i32 [[REG451]] to i64
917 // CHECK-NEXT: [[REG453:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
918 // CHECK-NEXT: [[REG454:[0-9a-zA-Z_%.]+]] = zext i32 [[REG453]] to i64
919 // CHECK-NEXT: [[REG455:[0-9a-zA-Z_%.]+]] = shl i64 [[REG452]], [[REG454]]
920 // CHECK-NEXT: store i64 [[REG455]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
921 // CHECK-NEXT: [[REG456:[0-9a-zA-Z_%.]+]] = load i32, i32* {{[0-9a-zA-Z_%.]+}}, align 4
922 // CHECK-NEXT: [[REG457:[0-9a-zA-Z_%.]+]] = zext i32 [[REG456]] to i64
923 // CHECK-NEXT: [[REG458:[0-9a-zA-Z_%.]+]] = shl i64 65535, [[REG457]]
924 // CHECK-NEXT: store i64 [[REG458]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
925 // CHECK-NEXT: [[REG459:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
926 // CHECK-NEXT: [[REG460:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
927 // CHECK-NEXT: [[REG461:[0-9a-zA-Z_%.]+]] = xor i64 [[REG460]], -1
928 // CHECK-NEXT: [[REG462:[0-9a-zA-Z_%.]+]] = and i64 [[REG459]], [[REG461]]
929 // CHECK-NEXT: [[REG463:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
930 // CHECK-NEXT: [[REG464:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
931 // CHECK-NEXT: [[REG465:[0-9a-zA-Z_%.]+]] = and i64 [[REG463]], [[REG464]]
932 // CHECK-NEXT: [[REG466:[0-9a-zA-Z_%.]+]] = or i64 [[REG462]], [[REG465]]
933 // CHECK-NEXT: store i64 [[REG466]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
934 // CHECK-NEXT: [[REG467:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
935 // CHECK-NEXT: ret i64 [[REG467]]
936 
937 // CHECK: define available_externally i64 @_m_pinsrw
938 // CHECK: [[REG468:[0-9a-zA-Z_%.]+]] = call i64 @_mm_insert_pi16
939 // CHECK-NEXT: ret i64 [[REG468]]
940 
941 void __attribute__((noinline))
test_load()942 test_load() {
943   res = _mm_load_ps(fs);
944   res = _mm_load_ps1(fs);
945   res = _mm_load_ss(fs);
946   res = _mm_load1_ps(fs);
947   res = _mm_loadh_pi(m1, &ms[0]);
948   res = _mm_loadl_pi(m1, &ms[0]);
949   res = _mm_loadr_ps(fs);
950   res = _mm_loadu_ps(fs);
951 }
952 
953 // CHECK-LABEL: @test_load
954 
955 // CHECK: define available_externally <4 x float> @_mm_load_ps
956 // CHECK: [[REG469:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_ld(long, float vector[4] const*)
957 // CHECK-NEXT: ret <4 x float> [[REG469]]
958 
959 // CHECK: define available_externally <4 x float> @_mm_load_ps1
960 // CHECK: [[REG470:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_load1_ps
961 // CHECK-NEXT: ret <4 x float> [[REG470]]
962 
963 // CHECK: define available_externally <4 x float> @_mm_load_ss
964 // CHECK: [[REG471:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_set_ss
965 // CHECK-NEXT: ret <4 x float> [[REG471]]
966 
967 // CHECK: define available_externally <4 x float> @_mm_load1_ps
968 // CHECK: [[REG472:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_set1_ps
969 // CHECK-NEXT: ret <4 x float> [[REG472]]
970 
971 // CHECK: define available_externally <4 x float> @_mm_loadh_pi
972 // CHECK: [[REG473:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
973 // CHECK-NEXT: store <2 x i64> [[REG473]], <2 x i64>* [[REG474:[0-9a-zA-Z_%.]+]], align 16
974 // CHECK-NEXT: [[REG475:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG474]], align 16
975 // CHECK-NEXT: [[REG476:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG475]], i32 1
976 // CHECK-NEXT: [[REG477:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG478:[0-9a-zA-Z_%.]+]], align 16
977 // CHECK-NEXT: [[REG479:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG477]], i64 [[REG476]], i32 1
978 // CHECK-NEXT: store <2 x i64> [[REG479]], <2 x i64>* [[REG478]], align 16
979 // CHECK-NEXT: [[REG480:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG478]], align 16
980 // CHECK-NEXT: [[REG481:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG480]] to <4 x float>
981 // CHECK-NEXT: ret <4 x float> [[REG481]]
982 
983 // CHECK: define available_externally <4 x float> @_mm_loadl_pi
984 // CHECK: [[REG482:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)
985 // CHECK-NEXT: store <2 x i64> [[REG482]], <2 x i64>* [[REG483:[0-9a-zA-Z_%.]+]], align 16
986 // CHECK-NEXT: [[REG484:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG483]], align 16
987 // CHECK-NEXT: [[REG485:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG484]], i32 0
988 // CHECK-NEXT: [[REG486:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG487:[0-9a-zA-Z_%.]+]], align 16
989 // CHECK-NEXT: [[REG488:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> [[REG486]], i64 [[REG485]], i32 0
990 // CHECK-NEXT: store <2 x i64> [[REG488]], <2 x i64>* [[REG487]], align 16
991 // CHECK-NEXT: [[REG489:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* [[REG487]], align 16
992 // CHECK-NEXT: [[REG490:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG489]] to <4 x float>
993 // CHECK-NEXT: ret <4 x float> [[REG490]]
994 
995 // CHECK: define available_externally <4 x float> @_mm_loadr_ps
996 // CHECK: [[REG491:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_ld(long, float vector[4] const*)
997 // CHECK-NEXT: store <4 x float> [[REG491]], <4 x float>* [[REG492:[0-9a-zA-Z_%.]+]], align 16
998 // CHECK-NEXT: [[REG493:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG492]], align 16
999 // CHECK-NEXT: [[REG494:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG492]], align 16
1000 // CHECK-NEXT: [[REG495:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])(<4 x float> [[REG493]], <4 x float> [[REG494]], <16 x i8> <i8 28, i8 29, i8 30, i8 31, i8 24, i8 25, i8 26, i8 27, i8 20, i8 21, i8 22, i8 23, i8 16, i8 17, i8 18, i8 19>)
1001 // CHECK-NEXT: store <4 x float> [[REG495]], <4 x float>* [[REG496:[0-9a-zA-Z_%.]+]], align 16
1002 // CHECK-NEXT: [[REG497:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG496]], align 16
1003 // CHECK-NEXT: ret <4 x float> [[REG497]]
1004 
1005 // CHECK: define available_externally <4 x float> @_mm_loadu_ps
1006 // CHECK: [[REG498:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vsx_ld(int, float const*)
1007 // CHECK-NEXT: ret <4 x float> [[REG498]]
1008 
1009 void __attribute__((noinline))
test_logic()1010 test_logic() {
1011   res = _mm_or_ps(m1, m2);
1012   res = _mm_and_ps(m1, m2);
1013   res = _mm_andnot_ps(m1, m2);
1014   res = _mm_xor_ps(m1, m2);
1015 }
1016 
1017 // CHECK-LABEL: @test_logic
1018 
1019 // CHECK: define available_externally <4 x float> @_mm_or_ps
1020 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1021 // CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1022 // CHECK-NEXT: [[REG499:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1023 // CHECK-NEXT: [[REG500:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1024 // CHECK-NEXT: [[REG501:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_or(float vector[4], float vector[4])(<4 x float> [[REG499]], <4 x float> [[REG500]])
1025 // CHECK-NEXT: ret <4 x float> [[REG501]]
1026 
1027 // CHECK: define available_externally <4 x float> @_mm_and_ps
1028 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1029 // CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1030 // CHECK-NEXT: [[REG502:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1031 // CHECK-NEXT: [[REG503:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1032 // CHECK-NEXT: [[REG504:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_and(float vector[4], float vector[4])(<4 x float> [[REG502]], <4 x float> [[REG503]])
1033 // CHECK-NEXT: ret <4 x float> [[REG504]]
1034 
1035 // CHECK: define available_externally <4 x float> @_mm_andnot_ps
1036 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1037 // CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1038 // CHECK-NEXT: [[REG505:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1039 // CHECK-NEXT: [[REG506:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1040 // CHECK-NEXT: [[REG507:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_andc(float vector[4], float vector[4])(<4 x float> [[REG505]], <4 x float> [[REG506]])
1041 // CHECK-NEXT: ret <4 x float> [[REG507]]
1042 
1043 // CHECK: define available_externally <4 x float> @_mm_xor_ps
1044 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1045 // CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1046 // CHECK-NEXT: [[REG508:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1047 // CHECK-NEXT: [[REG509:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1048 // CHECK-NEXT: [[REG510:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_xor(float vector[4], float vector[4])(<4 x float> [[REG508]], <4 x float> [[REG509]])
1049 // CHECK-NEXT: ret <4 x float> [[REG510]]
1050 
1051 void __attribute__((noinline))
test_max()1052 test_max() {
1053   res = _mm_max_ps(m1, m2);
1054   res = _mm_max_ss(m1, m2);
1055   res64 = _mm_max_pi16(ms[0], ms[1]);
1056   res64 = _mm_max_pu8(ms[0], ms[1]);
1057 }
1058 
1059 // CHECK-LABEL: @test_max
1060 
1061 // CHECK: define available_externally <4 x float> @_mm_max_ps(<4 x float> [[REG511:[0-9a-zA-Z_%.]+]], <4 x float> [[REG512:[0-9a-zA-Z_%.]+]])
1062 // CHECK: store <4 x float> [[REG511]], <4 x float>* [[REG513:[0-9a-zA-Z_%.]+]], align 16
1063 // CHECK-NEXT: store <4 x float> [[REG512]], <4 x float>* [[REG514:[0-9a-zA-Z_%.]+]], align 16
1064 // CHECK-NEXT: [[REG515:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG513]], align 16
1065 // CHECK-NEXT: [[REG516:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG514]], align 16
1066 // CHECK-NEXT: [[REG517:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])(<4 x float> [[REG515]], <4 x float> [[REG516]])
1067 // CHECK-NEXT: store <4 x i32> [[REG517]], <4 x i32>* [[REG518:[0-9a-zA-Z_%.]+]], align 16
1068 // CHECK-NEXT: [[REG519:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG514]], align 16
1069 // CHECK-NEXT: [[REG520:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG513]], align 16
1070 // CHECK-NEXT: [[REG521:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG518]], align 16
1071 // CHECK-NEXT: [[REG522:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], bool vector[4])(<4 x float> [[REG519]], <4 x float> [[REG520]], <4 x i32> [[REG521]])
1072 // CHECK-NEXT: ret <4 x float> [[REG522]]
1073 
1074 // CHECK: define available_externally <4 x float> @_mm_max_ss
1075 // CHECK: [[REG523:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
1076 // CHECK-NEXT: store <4 x float> [[REG523]], <4 x float>* [[REG524:[0-9a-zA-Z_%.]+]], align 16
1077 // CHECK: [[REG525:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
1078 // CHECK-NEXT: store <4 x float> [[REG525]], <4 x float>* [[REG526:[0-9a-zA-Z_%.]+]], align 16
1079 // CHECK-NEXT: [[REG527:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG524]], align 16
1080 // CHECK-NEXT: [[REG528:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG526]], align 16
1081 // CHECK-NEXT: call <4 x float> @vec_max(float vector[4], float vector[4])(<4 x float> [[REG527]], <4 x float> [[REG528]])
1082 // CHECK: [[REG529:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
1083 // CHECK-NEXT: ret <4 x float> [[REG529]]
1084 
1085 // CHECK: define available_externally i64 @_mm_max_pi16
1086 // CHECK: [[REG530:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1087 // CHECK-NEXT: [[REG531:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG530]])
1088 // CHECK-NEXT: [[REG532:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG531]] to <8 x i16>
1089 // CHECK-NEXT: store <8 x i16> [[REG532]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
1090 // CHECK-NEXT: [[REG533:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1091 // CHECK-NEXT: [[REG534:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG533]])
1092 // CHECK-NEXT: [[REG535:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG534]] to <8 x i16>
1093 // CHECK-NEXT: store <8 x i16> [[REG535]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
1094 // CHECK-NEXT: [[REG536:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
1095 // CHECK-NEXT: [[REG537:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
1096 // CHECK-NEXT: [[REG538:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_cmpgt(short vector[8], short vector[8])(<8 x i16> [[REG536]], <8 x i16> [[REG537]])
1097 // CHECK-NEXT: store <8 x i16> [[REG538]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
1098 // CHECK-NEXT: [[REG539:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
1099 // CHECK-NEXT: [[REG540:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
1100 // CHECK-NEXT: [[REG541:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
1101 // CHECK-NEXT: [[REG542:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_sel(short vector[8], short vector[8], bool vector[8])(<8 x i16> [[REG539]], <8 x i16> [[REG540]], <8 x i16> [[REG541]])
1102 // CHECK-NEXT: store <8 x i16> [[REG542]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
1103 // CHECK-NEXT: [[REG543:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
1104 // CHECK-NEXT: [[REG544:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG543]] to <2 x i64>
1105 // CHECK-NEXT: [[REG545:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG544]], i32 0
1106 // CHECK-NEXT: ret i64 [[REG545]]
1107 
1108 // CHECK: define available_externally i64 @_mm_max_pu8
1109 // CHECK: [[REG546:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1110 // CHECK-NEXT: [[REG547:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG546]])
1111 // CHECK-NEXT: [[REG548:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG547]] to <16 x i8>
1112 // CHECK-NEXT: store <16 x i8> [[REG548]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1113 // CHECK-NEXT: [[REG549:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1114 // CHECK-NEXT: [[REG550:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG549]])
1115 // CHECK-NEXT: [[REG551:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG550]] to <16 x i8>
1116 // CHECK-NEXT: store <16 x i8> [[REG551]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1117 // CHECK-NEXT: [[REG552:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1118 // CHECK-NEXT: [[REG553:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1119 // CHECK-NEXT: [[REG554:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_cmpgt(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG552]], <16 x i8> [[REG553]])
1120 // CHECK-NEXT: store <16 x i8> [[REG554]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1121 // CHECK-NEXT: [[REG555:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1122 // CHECK-NEXT: [[REG556:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1123 // CHECK-NEXT: [[REG557:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1124 // CHECK-NEXT: [[REG558:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_sel(unsigned char vector[16], unsigned char vector[16], bool vector[16])(<16 x i8> [[REG555]], <16 x i8> [[REG556]], <16 x i8> [[REG557]])
1125 // CHECK-NEXT: store <16 x i8> [[REG558]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1126 // CHECK-NEXT: [[REG559:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1127 // CHECK-NEXT: [[REG560:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG559]] to <2 x i64>
1128 // CHECK-NEXT: [[REG561:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG560]], i32 0
1129 // CHECK-NEXT: ret i64 [[REG561]]
1130 
1131 void __attribute__((noinline))
test_alt_name_max()1132 test_alt_name_max() {
1133   res64 = _m_pmaxsw(ms[0], ms[1]);
1134   res64 = _m_pmaxub(ms[0], ms[1]);
1135 }
1136 
1137 // CHECK-LABEL: @test_alt_name_max
1138 
1139 // CHECK: define available_externally i64 @_m_pmaxsw
1140 // CHECK: [[REG562:[0-9a-zA-Z_%.]+]] = call i64 @_mm_max_pi16
1141 // CHECK-NEXT: ret i64 [[REG562]]
1142 
1143 // CHECK: define available_externally i64 @_m_pmaxub
1144 // CHECK: [[REG563:[0-9a-zA-Z_%.]+]] = call i64 @_mm_max_pu8
1145 // CHECK-NEXT: ret i64 [[REG563]]
1146 
1147 void __attribute__((noinline))
test_min()1148 test_min() {
1149   res = _mm_min_ps(m1, m2);
1150   res = _mm_min_ss(m1, m2);
1151   res64 = _mm_min_pi16(ms[0], ms[1]);
1152   res64 = _mm_min_pu8(ms[0], ms[1]);
1153 }
1154 
1155 // CHECK-LABEL: @test_min
1156 
1157 // CHECK: define available_externally <4 x float> @_mm_min_ps(<4 x float> [[REG517:[0-9a-zA-Z_%.]+]], <4 x float> [[REG518:[0-9a-zA-Z_%.]+]])
1158 // CHECK: store <4 x float> [[REG517]], <4 x float>* [[REG564:[0-9a-zA-Z_%.]+]], align 16
1159 // CHECK-NEXT: store <4 x float> [[REG518]], <4 x float>* [[REG565:[0-9a-zA-Z_%.]+]], align 16
1160 // CHECK-NEXT: [[REG566:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG565]], align 16
1161 // CHECK-NEXT: [[REG567:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG564]], align 16
1162 // CHECK-NEXT: [[REG568:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_cmpgt(float vector[4], float vector[4])(<4 x float> [[REG566]], <4 x float> [[REG567]])
1163 // CHECK-NEXT: store <4 x i32> [[REG568]], <4 x i32>* [[REG569:[0-9a-zA-Z_%.]+]], align 16
1164 // CHECK-NEXT: [[REG570:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG565]], align 16
1165 // CHECK-NEXT: [[REG571:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG564]], align 16
1166 // CHECK-NEXT: [[REG572:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* [[REG569]], align 16
1167 // CHECK-NEXT: [[REG573:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], bool vector[4])(<4 x float> [[REG570]], <4 x float> [[REG571]], <4 x i32> [[REG572]])
1168 // CHECK-NEXT: ret <4 x float> [[REG573]]
1169 
1170 // CHECK: define available_externally <4 x float> @_mm_min_ss
1171 // CHECK: [[REG574:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
1172 // CHECK-NEXT: store <4 x float> [[REG574]], <4 x float>* [[REG575:[0-9a-zA-Z_%.]+]], align 16
1173 // CHECK: [[REG576:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, {{i32|i32 zeroext}} 0)
1174 // CHECK-NEXT: store <4 x float> [[REG576]], <4 x float>* [[REG577:[0-9a-zA-Z_%.]+]], align 16
1175 // CHECK-NEXT: [[REG578:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG575]], align 16
1176 // CHECK-NEXT: [[REG579:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG577]], align 16
1177 // CHECK-NEXT: call <4 x float> @vec_min(float vector[4], float vector[4])(<4 x float> [[REG578]], <4 x float> [[REG579]])
1178 // CHECK: [[REG580:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
1179 // CHECK-NEXT: ret <4 x float> [[REG580]]
1180 
1181 // CHECK: define available_externally i64 @_mm_min_pi16
1182 // CHECK: [[REG581:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1183 // CHECK-NEXT: [[REG582:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG581]])
1184 // CHECK-NEXT: [[REG583:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG582]] to <8 x i16>
1185 // CHECK-NEXT: store <8 x i16> [[REG583]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
1186 // CHECK-NEXT: [[REG584:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1187 // CHECK-NEXT: [[REG585:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG584]])
1188 // CHECK-NEXT: [[REG586:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG585]] to <8 x i16>
1189 // CHECK-NEXT: store <8 x i16> [[REG586]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
1190 // CHECK-NEXT: [[REG587:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
1191 // CHECK-NEXT: [[REG588:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
1192 // CHECK-NEXT: [[REG589:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_cmplt(short vector[8], short vector[8])(<8 x i16> [[REG587]], <8 x i16> [[REG588]])
1193 // CHECK-NEXT: store <8 x i16> [[REG589]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
1194 // CHECK-NEXT: [[REG590:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
1195 // CHECK-NEXT: [[REG591:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
1196 // CHECK-NEXT: [[REG592:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
1197 // CHECK-NEXT: [[REG593:[0-9a-zA-Z_%.]+]] = call <8 x i16> @vec_sel(short vector[8], short vector[8], bool vector[8])(<8 x i16> [[REG590]], <8 x i16> [[REG591]], <8 x i16> [[REG592]])
1198 // CHECK-NEXT: store <8 x i16> [[REG593]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
1199 // CHECK-NEXT: [[REG594:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
1200 // CHECK-NEXT: [[REG595:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG594]] to <2 x i64>
1201 // CHECK-NEXT: [[REG596:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG595]], i32 0
1202 // CHECK-NEXT: ret i64 [[REG596]]
1203 
1204 // CHECK: define available_externally i64 @_mm_min_pu8
1205 // CHECK: [[REG597:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1206 // CHECK-NEXT: [[REG598:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG597]])
1207 // CHECK-NEXT: [[REG599:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG598]] to <16 x i8>
1208 // CHECK-NEXT: store <16 x i8> [[REG599]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1209 // CHECK-NEXT: [[REG600:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1210 // CHECK-NEXT: [[REG601:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG600]])
1211 // CHECK-NEXT: [[REG602:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG601]] to <16 x i8>
1212 // CHECK-NEXT: store <16 x i8> [[REG602]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1213 // CHECK-NEXT: [[REG603:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1214 // CHECK-NEXT: [[REG604:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1215 // CHECK-NEXT: [[REG605:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_cmplt(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG603]], <16 x i8> [[REG604]])
1216 // CHECK-NEXT: store <16 x i8> [[REG605]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1217 // CHECK-NEXT: [[REG606:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1218 // CHECK-NEXT: [[REG607:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1219 // CHECK-NEXT: [[REG608:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1220 // CHECK-NEXT: [[REG609:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_sel(unsigned char vector[16], unsigned char vector[16], bool vector[16])(<16 x i8> [[REG606]], <16 x i8> [[REG607]], <16 x i8> [[REG608]])
1221 // CHECK-NEXT: store <16 x i8> [[REG609]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1222 // CHECK-NEXT: [[REG610:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1223 // CHECK-NEXT: [[REG611:[0-9a-zA-Z_%.]+]] = bitcast <16 x i8> [[REG610]] to <2 x i64>
1224 // CHECK-NEXT: [[REG612:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG611]], i32 0
1225 // CHECK-NEXT: ret i64 [[REG612]]
1226 
1227 void __attribute__((noinline))
test_alt_name_min()1228 test_alt_name_min() {
1229   res64 = _m_pminsw(ms[0], ms[1]);
1230   res64 = _m_pminub(ms[0], ms[1]);
1231 }
1232 
1233 // CHECK-LABEL: @test_alt_name_min
1234 
1235 // CHECK: define available_externally i64 @_m_pminsw
1236 // CHECK: [[REG613:[0-9a-zA-Z_%.]+]] = call i64 @_mm_min_pi16
1237 // CHECK-NEXT: ret i64 [[REG613]]
1238 
1239 // CHECK: define available_externally i64 @_m_pminub
1240 // CHECK: [[REG614:[0-9a-zA-Z_%.]+]] = call i64 @_mm_min_pu8
1241 // CHECK-NEXT: ret i64 [[REG614]]
1242 
1243 void __attribute__((noinline))
test_move()1244 test_move() {
1245   _mm_maskmove_si64(ms[0], ms[1], (char *)&res64);
1246   res = _mm_move_ss(m1, m2);
1247   res = _mm_movehl_ps(m1, m2);
1248   res = _mm_movelh_ps(m1, m2);
1249   i = _mm_movemask_pi8(ms[0]);
1250   i = _mm_movemask_ps(m1);
1251 }
1252 
1253 // CHECK-LABEL: @test_move
1254 
1255 // CHECK: define available_externally void @_mm_maskmove_si64
1256 // CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1257 // CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1258 // CHECK-NEXT: store i8* {{[0-9a-zA-Z_%.]+}}, i8** {{[0-9a-zA-Z_%.]+}}, align 8
1259 // CHECK-NEXT: store i64 -9187201950435737472, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1260 // CHECK-NEXT: [[REG615:[0-9a-zA-Z_%.]+]] = load i8*, i8** {{[0-9a-zA-Z_%.]+}}, align 8
1261 // CHECK-NEXT: [[REG616:[0-9a-zA-Z_%.]+]] = bitcast i8* [[REG615]] to i64*
1262 // CHECK-NEXT: store i64* [[REG616]], i64** {{[0-9a-zA-Z_%.]+}}, align 8
1263 // CHECK-NEXT: [[REG617:[0-9a-zA-Z_%.]+]] = load i64*, i64** {{[0-9a-zA-Z_%.]+}}, align 8
1264 // CHECK-NEXT: [[REG618:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG617]], align 8
1265 // CHECK-NEXT: store i64 [[REG618]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
1266 // CHECK-NEXT: [[REG619:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1267 // CHECK-NEXT: [[REG620:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1268 // CHECK-NEXT: [[REG621:[0-9a-zA-Z_%.]+]] = and i64 [[REG619]], [[REG620]]
1269 // CHECK-NEXT: [[REG622:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1270 // CHECK-NEXT: [[REG623:[0-9a-zA-Z_%.]+]] = call i64 @_mm_cmpeq_pi8(i64 [[REG621]], i64 [[REG622]])
1271 // CHECK-NEXT: store i64 [[REG623]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
1272 // CHECK-NEXT: [[REG624:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1273 // CHECK-NEXT: [[REG625:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1274 // CHECK-NEXT: [[REG626:[0-9a-zA-Z_%.]+]] = xor i64 [[REG625]], -1
1275 // CHECK-NEXT: [[REG627:[0-9a-zA-Z_%.]+]] = and i64 [[REG624]], [[REG626]]
1276 // CHECK-NEXT: [[REG628:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1277 // CHECK-NEXT: [[REG629:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1278 // CHECK-NEXT: [[REG630:[0-9a-zA-Z_%.]+]] = and i64 [[REG628]], [[REG629]]
1279 // CHECK-NEXT: [[REG631:[0-9a-zA-Z_%.]+]] = or i64 [[REG627]], [[REG630]]
1280 // CHECK-NEXT: store i64 [[REG631]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
1281 // CHECK-NEXT: [[REG632:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1282 // CHECK-NEXT: [[REG633:[0-9a-zA-Z_%.]+]] = load i64*, i64** {{[0-9a-zA-Z_%.]+}}, align 8
1283 // CHECK-NEXT: store i64 [[REG632]], i64* [[REG633]], align 8
1284 // CHECK-NEXT: ret void
1285 
1286 // CHECK: define available_externally <4 x float> @_mm_move_ss
1287 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1288 // CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1289 // CHECK-NEXT: [[REG634:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1290 // CHECK-NEXT: [[REG635:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1291 // CHECK-NEXT: [[REG636:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG634]], <4 x float> [[REG635]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
1292 // CHECK-NEXT: ret <4 x float> [[REG636]]
1293 
1294 // CHECK: define available_externally <4 x float> @_mm_movehl_ps
1295 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1296 // CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1297 // CHECK-NEXT: [[REG637:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1298 // CHECK-NEXT: [[REG638:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG637]] to <2 x i64>
1299 // CHECK-NEXT: [[REG639:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1300 // CHECK-NEXT: [[REG640:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG639]] to <2 x i64>
1301 // CHECK-NEXT: [[REG641:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_mergel(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> [[REG638]], <2 x i64> [[REG640]])
1302 // CHECK-NEXT: [[REG642:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG641]] to <4 x float>
1303 // CHECK-NEXT: ret <4 x float> [[REG642]]
1304 
1305 // CHECK: define available_externally <4 x float> @_mm_movelh_ps
1306 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1307 // CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1308 // CHECK-NEXT: [[REG643:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1309 // CHECK-NEXT: [[REG644:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG643]] to <2 x i64>
1310 // CHECK-NEXT: [[REG645:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1311 // CHECK-NEXT: [[REG646:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG645]] to <2 x i64>
1312 // CHECK-NEXT: [[REG647:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_mergeh(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> [[REG644]], <2 x i64> [[REG646]])
1313 // CHECK-NEXT: [[REG648:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG647]] to <4 x float>
1314 // CHECK-NEXT: ret <4 x float> [[REG648]]
1315 
1316 // CHECK: define available_externally signext i32 @_mm_movemask_pi8
1317 // CHECK: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1318 // CHECK-LE-NEXT: store i64 2269495618449464, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1319 // CHECK-BE-NEXT: store i64 4048780183313844224, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1320 // CHECK-NEXT: [[REG649:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1321 // CHECK-NEXT: [[REG650:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1322 // CHECK-NEXT: [[REG651:[0-9a-zA-Z_%.]+]] = call i64 @llvm.ppc.bpermd(i64 [[REG649]], i64 [[REG650]])
1323 // CHECK-NEXT: [[REG652:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG651]] to i32
1324 // CHECK-NEXT: ret i32 [[REG652]]
1325 
1326 // CHECK: define available_externally signext i32 @_mm_movemask_ps
1327 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1328 // CHECK-NEXT: [[REG653:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1329 // CHECK-NEXT: [[REG654:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG653]] to <16 x i8>
1330 // CHECK-LE-NEXT: [[REG655:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_vbpermq(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG654]], <16 x i8> bitcast (<4 x i32> <i32 2113632, i32 -2139062144, i32 -2139062144, i32 -2139062144> to <16 x i8>))
1331 // CHECK-BE-NEXT: [[REG655:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_vbpermq(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG654]], <16 x i8> bitcast (<4 x i32> <i32 -2139062144, i32 -2139062144, i32 -2139062144, i32 2113632> to <16 x i8>))
1332 // CHECK-NEXT: store <2 x i64> [[REG655]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
1333 // CHECK-NEXT: [[REG656:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
1334 // CHECK-LE-NEXT: [[REG657:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG656]], i32 1
1335 // CHECK-BE-NEXT: [[REG657:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG656]], i32 0
1336 // CHECK-NEXT: [[REG658:[0-9a-zA-Z_%.]+]] = trunc i64 [[REG657]] to i32
1337 // CHECK-NEXT: ret i32 [[REG658]]
1338 
1339 void __attribute__((noinline))
test_alt_name_move()1340 test_alt_name_move() {
1341   i = _m_pmovmskb(ms[0]);
1342   _m_maskmovq(ms[0], ms[1], (char *)&res64);
1343 }
1344 
1345 // CHECK-LABEL: @test_alt_name_move
1346 
1347 // CHECK: define available_externally signext i32 @_m_pmovmskb
1348 // CHECK: [[REG659:[0-9a-zA-Z_%.]+]] = call signext i32 @_mm_movemask_pi8
1349 // CHECK-NEXT: ret i32 [[REG659]]
1350 
1351 // CHECK: define available_externally void @_m_maskmovq
1352 // CHECK: call void @_mm_maskmove_si64
1353 // CHECK-NEXT: ret void
1354 
1355 void __attribute__((noinline))
test_mul()1356 test_mul() {
1357   res = _mm_mul_ps(m1, m2);
1358   res = _mm_mul_ss(m1, m2);
1359   res64 = _mm_mulhi_pu16(ms[0], ms[1]);
1360   res64 = _m_pmulhuw(ms[0], ms[1]);
1361 }
1362 
1363 // CHECK-LABEL: @test_mul
1364 
1365 // CHECK: define available_externally <4 x float> @_mm_mul_ps(<4 x float> [[REG660:[0-9a-zA-Z_%.]+]], <4 x float> [[REG661:[0-9a-zA-Z_%.]+]])
1366 // CHECK: store <4 x float> [[REG660]], <4 x float>* [[REG662:[0-9a-zA-Z_%.]+]], align 16
1367 // CHECK-NEXT: store <4 x float> [[REG661]], <4 x float>* [[REG663:[0-9a-zA-Z_%.]+]], align 16
1368 // CHECK-NEXT: [[REG664:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG662]], align 16
1369 // CHECK-NEXT: [[REG665:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG663]], align 16
1370 // CHECK-NEXT: [[REG666:[0-9a-zA-Z_%.]+]] = fmul <4 x float> [[REG664]], [[REG665]]
1371 // CHECK-NEXT: ret <4 x float> [[REG666]]
1372 
1373 // CHECK: define available_externally <4 x float> @_mm_mul_ss(<4 x float> [[REG667:[0-9a-zA-Z_%.]+]], <4 x float> [[REG668:[0-9a-zA-Z_%.]+]])
1374 // CHECK: store <4 x float> [[REG667]], <4 x float>* [[REG669:[0-9a-zA-Z_%.]+]], align 16
1375 // CHECK-NEXT: store <4 x float> [[REG668]], <4 x float>* [[REG670:[0-9a-zA-Z_%.]+]], align 16
1376 // CHECK-NEXT: [[REG671:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG669]], align 16
1377 // CHECK-NEXT: [[REG672:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG671]], i32 zeroext 0)
1378 // CHECK-NEXT: store <4 x float> [[REG672]], <4 x float>* [[REG673:[0-9a-zA-Z_%.]+]], align 16
1379 // CHECK-NEXT: [[REG674:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG670]], align 16
1380 // CHECK-NEXT: [[REG675:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG674]], i32 zeroext 0)
1381 // CHECK-NEXT: store <4 x float> [[REG675]], <4 x float>* [[REG676:[0-9a-zA-Z_%.]+]], align 16
1382 // CHECK-NEXT: [[REG677:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG673]], align 16
1383 // CHECK-NEXT: [[REG678:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG676]], align 16
1384 // CHECK-NEXT: [[REG679:[0-9a-zA-Z_%.]+]] = fmul <4 x float> [[REG677]], [[REG678]]
1385 // CHECK-NEXT: store <4 x float> [[REG679]], <4 x float>* [[REG680:[0-9a-zA-Z_%.]+]], align 16
1386 // CHECK-NEXT: [[REG681:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG669]], align 16
1387 // CHECK-NEXT: [[REG682:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG680]], align 16
1388 // CHECK-NEXT: [[REG683:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG681]], <4 x float> [[REG682]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
1389 // CHECK-NEXT: ret <4 x float> [[REG683]]
1390 
1391 // CHECK: define available_externally i64 @_mm_mulhi_pu16(i64 [[REG684:[0-9a-zA-Z_%.]+]], i64 [[REG685:[0-9a-zA-Z_%.]+]])
1392 // CHECK: store i64 [[REG684]], i64* [[REG686:[0-9a-zA-Z_%.]+]], align 8
1393 // CHECK-NEXT: store i64 [[REG685]], i64* [[REG687:[0-9a-zA-Z_%.]+]], align 8
1394 // CHECK-LE-NEXT: store <16 x i8> <i8 2, i8 3, i8 18, i8 19, i8 6, i8 7, i8 22, i8 23, i8 10, i8 11, i8 26, i8 27, i8 14, i8 15, i8 30, i8 31>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1395 // CHECK-BE-NEXT: store <16 x i8> <i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21, i8 0, i8 1, i8 16, i8 17, i8 4, i8 5, i8 20, i8 21>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1396 // CHECK-NEXT: [[REG688:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG686]], align 8
1397 // CHECK-NEXT: [[REG689:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG688]])
1398 // CHECK-NEXT: [[REG690:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG689]] to <8 x i16>
1399 // CHECK-NEXT: store <8 x i16> [[REG690]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
1400 // CHECK-NEXT: [[REG691:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG687]], align 8
1401 // CHECK-NEXT: [[REG692:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG691]])
1402 // CHECK-NEXT: [[REG693:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG692]] to <8 x i16>
1403 // CHECK-NEXT: store <8 x i16> [[REG693]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
1404 // CHECK-NEXT: [[REG694:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
1405 // CHECK-NEXT: [[REG695:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
1406 // CHECK-NEXT: [[REG696:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vmuleuh(<8 x i16> [[REG694]], <8 x i16> [[REG695]])
1407 // CHECK-NEXT: store <4 x i32> [[REG696]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
1408 // CHECK-NEXT: [[REG697:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
1409 // CHECK-NEXT: [[REG698:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
1410 // CHECK-NEXT: [[REG699:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_vmulouh(<8 x i16> [[REG697]], <8 x i16> [[REG698]])
1411 // CHECK-NEXT: store <4 x i32> [[REG699]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
1412 // CHECK-NEXT: [[REG700:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
1413 // CHECK-NEXT: [[REG701:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
1414 // CHECK-NEXT: [[REG702:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1415 // CHECK-NEXT: [[REG703:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_perm(unsigned int vector[4], unsigned int vector[4], unsigned char vector[16])(<4 x i32> [[REG700]], <4 x i32> [[REG701]], <16 x i8> [[REG702]])
1416 // CHECK-NEXT: [[REG704:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG703]] to <8 x i16>
1417 // CHECK-NEXT: store <8 x i16> [[REG704]], <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
1418 // CHECK-NEXT: [[REG705:[0-9a-zA-Z_%.]+]] = load <8 x i16>, <8 x i16>* {{[0-9a-zA-Z_%.]+}}, align 16
1419 // CHECK-NEXT: [[REG706:[0-9a-zA-Z_%.]+]] = bitcast <8 x i16> [[REG705]] to <2 x i64>
1420 // CHECK-NEXT: [[REG707:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG706]], i32 0
1421 // CHECK-NEXT: ret i64 [[REG707]]
1422 
1423 // CHECK: define available_externally i64 @_m_pmulhuw(i64 [[REG708:[0-9a-zA-Z_%.]+]], i64 [[REG709:[0-9a-zA-Z_%.]+]])
1424 // CHECK: store i64 [[REG708]], i64* [[REG710:[0-9a-zA-Z_%.]+]], align 8
1425 // CHECK-NEXT: store i64 [[REG709]], i64* [[REG711:[0-9a-zA-Z_%.]+]], align 8
1426 // CHECK-NEXT: [[REG712:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG710]], align 8
1427 // CHECK-NEXT: [[REG713:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG711]], align 8
1428 // CHECK-NEXT: [[REG714:[0-9a-zA-Z_%.]+]] = call i64 @_mm_mulhi_pu16(i64 [[REG712]], i64 [[REG713]])
1429 // CHECK-NEXT: ret i64 [[REG714]]
1430 
1431 void __attribute__((noinline))
test_prefetch()1432 test_prefetch() {
1433   _mm_prefetch(ms, _MM_HINT_NTA);
1434 }
1435 
1436 // CHECK-LABEL: @test_prefetch
1437 
1438 // CHECK: define available_externally void @_mm_prefetch
1439 // CHECK: store i8* {{[0-9a-zA-Z_%.]+}}, i8** {{[0-9a-zA-Z_%.]+}}, align 8
1440 // CHECK-NEXT: store i32 {{[0-9a-zA-Z_%.]+}}, i32* {{[0-9a-zA-Z_%.]+}}, align 4
1441 // CHECK-NEXT: [[REG715:[0-9a-zA-Z_%.]+]] = load i8*, i8** {{[0-9a-zA-Z_%.]+}}, align 8
1442 // CHECK-NEXT: call void @llvm.prefetch.p0i8(i8* [[REG715]], i32 0, i32 3, i32 1)
1443 // CHECK-NEXT: ret void
1444 
1445 void __attribute__((noinline))
test_rcp()1446 test_rcp() {
1447   res = _mm_rcp_ps(m1);
1448   res = _mm_rcp_ss(m1);
1449 }
1450 
1451 // CHECK-LABEL: @test_rcp
1452 
1453 // CHECK: define available_externally <4 x float> @_mm_rcp_ps
1454 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1455 // CHECK-NEXT: [[REG716:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1456 // CHECK-NEXT: [[REG717:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_re(float vector[4])(<4 x float> [[REG716]])
1457 // CHECK-NEXT: ret <4 x float> [[REG717]]
1458 
1459 // CHECK: define available_externally <4 x float> @_mm_rcp_ss
1460 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1461 // CHECK-NEXT: [[REG718:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1462 // CHECK-NEXT: [[REG719:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG718]], i32 zeroext 0)
1463 // CHECK-NEXT: store <4 x float> [[REG719]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1464 // CHECK-NEXT: [[REG720:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1465 // CHECK-NEXT: [[REG721:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_rcp_ps(<4 x float> [[REG720]])
1466 // CHECK-NEXT: store <4 x float> [[REG721]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1467 // CHECK-NEXT: [[REG722:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1468 // CHECK-NEXT: [[REG723:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1469 // CHECK-NEXT: [[REG724:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG722]], <4 x float> [[REG723]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
1470 // CHECK-NEXT: ret <4 x float> [[REG724]]
1471 
1472 void __attribute__((noinline))
test_rsqrt()1473 test_rsqrt() {
1474   res = _mm_rsqrt_ps(m1);
1475   res = _mm_rsqrt_ss(m1);
1476 }
1477 
1478 // CHECK-LABEL: @test_rsqrt
1479 
1480 // CHECK: define available_externally <4 x float> @_mm_rsqrt_ps
1481 // CHECK: [[REG725:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_rsqrte(float vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}})
1482 // CHECK-NEXT: ret <4 x float> [[REG725]]
1483 
1484 // CHECK: define available_externally <4 x float> @_mm_rsqrt_ss
1485 // CHECK: [[REG726:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> {{[0-9a-zA-Z_%.]+}}, i32 zeroext 0)
1486 // CHECK-NEXT: store <4 x float> [[REG726]], <4 x float>* [[REG727:[0-9a-zA-Z_%.]+]], align 16
1487 // CHECK-NEXT: [[REG728:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG727]], align 16
1488 // CHECK-NEXT: [[REG729:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_rsqrte(float vector[4])(<4 x float> [[REG728]])
1489 // CHECK-NEXT: store <4 x float> [[REG729]], <4 x float>* [[REG730:[0-9a-zA_Z_%.]+]], align 16
1490 // CHECK-NEXT: [[REG731:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1491 // CHECK-NEXT: [[REG732:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG730]], align 16
1492 // CHECK-NEXT: [[REG733:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG731]], <4 x float> [[REG732]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
1493 // CHECK-NEXT: ret <4 x float> [[REG733]]
1494 
1495 void __attribute__((noinline))
test_sad()1496 test_sad() {
1497   res64 = _mm_sad_pu8(ms[0], ms[1]);
1498   res64 = _m_psadbw(ms[0], ms[1]);
1499 }
1500 
1501 // CHECK-LABEL: @test_sad
1502 
1503 // CHECK: define available_externally i64 @_mm_sad_pu8(i64 [[REG734:[0-9a-zA-Z_%.]+]], i64 [[REG735:[0-9a-zA-Z_%.]+]])
1504 // CHECK: store i64 [[REG734]], i64* [[REG736:[0-9a-zA-Z_%.]+]], align 8
1505 // CHECK-NEXT: store i64 [[REG735]], i64* [[REG737:[0-9a-zA-Z_%.]+]], align 8
1506 // CHECK-NEXT: store <4 x i32> zeroinitializer, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
1507 // CHECK-NEXT: [[REG738:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to i8*
1508 // CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[REG738]], i8 0, i64 8, i1 false)
1509 // CHECK-NEXT: [[REG739:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG736]], align 8
1510 // CHECK-NEXT: [[REG740:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> <i64 0, i64 undef>, i64 [[REG739]], i32 1
1511 // CHECK-NEXT: store <2 x i64> [[REG740]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
1512 // CHECK-NEXT: [[REG741:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
1513 // CHECK-NEXT: [[REG742:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG741]] to <16 x i8>
1514 // CHECK-NEXT: store <16 x i8> [[REG742]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1515 // CHECK-NEXT: [[REG743:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG737]], align 8
1516 // CHECK-NEXT: [[REG744:[0-9a-zA-Z_%.]+]] = insertelement <2 x i64> <i64 0, i64 undef>, i64 [[REG743]], i32 1
1517 // CHECK-NEXT: store <2 x i64> [[REG744]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
1518 // CHECK-NEXT: [[REG745:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
1519 // CHECK-NEXT: [[REG746:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG745]] to <16 x i8>
1520 // CHECK-NEXT: store <16 x i8> [[REG746]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1521 // CHECK-NEXT: [[REG747:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1522 // CHECK-NEXT: [[REG748:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1523 // CHECK-NEXT: [[REG749:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_min(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG747]], <16 x i8> [[REG748]])
1524 // CHECK-NEXT: store <16 x i8> [[REG749]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1525 // CHECK-NEXT: [[REG750:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1526 // CHECK-NEXT: [[REG751:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1527 // CHECK-NEXT: [[REG752:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_max(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG750]], <16 x i8> [[REG751]])
1528 // CHECK-NEXT: store <16 x i8> [[REG752]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1529 // CHECK-NEXT: [[REG753:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1530 // CHECK-NEXT: [[REG754:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1531 // CHECK-NEXT: [[REG755:[0-9a-zA-Z_%.]+]] = call <16 x i8> @vec_sub(unsigned char vector[16], unsigned char vector[16])(<16 x i8> [[REG753]], <16 x i8> [[REG754]])
1532 // CHECK-NEXT: store <16 x i8> [[REG755]], <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1533 // CHECK-NEXT: [[REG756:[0-9a-zA-Z_%.]+]] = load <16 x i8>, <16 x i8>* {{[0-9a-zA-Z_%.]+}}, align 16
1534 // CHECK-NEXT: [[REG757:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sum4s(unsigned char vector[16], unsigned int vector[4])(<16 x i8> [[REG756]], <4 x i32> zeroinitializer)
1535 // CHECK-NEXT: store <4 x i32> [[REG757]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
1536 // CHECK-NEXT: [[REG758:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
1537 // CHECK-NEXT: [[REG759:[0-9a-zA-Z_%.]+]] = call <4 x i32> @vec_sums(<4 x i32> [[REG758]], <4 x i32> zeroinitializer)
1538 // CHECK-NEXT: store <4 x i32> [[REG759]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
1539 // CHECK-NEXT: [[REG760:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
1540 // CHECK-NEXT: [[REG761:[0-9a-zA-Z_%.]+]] = extractelement <4 x i32> [[REG760]], i32 3
1541 // CHECK-NEXT: [[REG762:[0-9a-zA-Z_%.]+]] = trunc i32 [[REG761]] to i16
1542 // CHECK-NEXT: [[REG763:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to [4 x i16]*
1543 // CHECK-NEXT: [[REG764:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG763]], i64 0, i64 0
1544 // CHECK-NEXT: store i16 [[REG762]], i16* [[REG764]], align 8
1545 // CHECK-NEXT: [[REG765:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to i64*
1546 // CHECK-NEXT: [[REG766:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG765]], align 8
1547 // CHECK-NEXT: ret i64 [[REG766]]
1548 
1549 // CHECK: define available_externally i64 @_m_psadbw
1550 // CHECK: [[REG767:[0-9a-zA-Z_%.]+]] = call i64 @_mm_sad_pu8
1551 // CHECK-NEXT: ret i64 [[REG767]]
1552 
1553 void __attribute__((noinline))
test_set()1554 test_set() {
1555   res = _mm_set_ps(fs[0], fs[1], fs[2], fs[3]);
1556   res = _mm_set_ps1(fs[0]);
1557   res = _mm_set_ss(fs[0]);
1558   res = _mm_set1_ps(fs[0]);
1559   res = _mm_setr_ps(fs[0], fs[1], fs[2], fs[3]);
1560 }
1561 
1562 // CHECK-LABEL: @test_set
1563 
1564 // CHECK: define available_externally <4 x float> @_mm_set_ps(float [[REG768:[0-9a-zA-Z_%.]+]], float [[REG769:[0-9a-zA-Z_%.]+]], float [[REG770:[0-9a-zA-Z_%.]+]], float [[REG771:[0-9a-zA-Z_%.]+]])
1565 // CHECK: store float [[REG768]], float* [[REG772:[0-9a-zA-Z_%.]+]], align 4
1566 // CHECK-NEXT: store float [[REG769]], float* [[REG773:[0-9a-zA-Z_%.]+]], align 4
1567 // CHECK-NEXT: store float [[REG770]], float* [[REG774:[0-9a-zA-Z_%.]+]], align 4
1568 // CHECK-NEXT: store float [[REG771]], float* [[REG775:[0-9a-zA-Z_%.]+]], align 4
1569 // CHECK-NEXT: [[REG776:[0-9a-zA-Z_%.]+]] = load float, float* [[REG775]], align 4
1570 // CHECK-NEXT: [[REG777:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> undef, float [[REG776]], i32 0
1571 // CHECK-NEXT: [[REG778:[0-9a-zA-Z_%.]+]] = load float, float* [[REG774]], align 4
1572 // CHECK-NEXT: [[REG779:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG777]], float [[REG778]], i32 1
1573 // CHECK-NEXT: [[REG780:[0-9a-zA-Z_%.]+]] = load float, float* [[REG773]], align 4
1574 // CHECK-NEXT: [[REG781:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG779]], float [[REG780]], i32 2
1575 // CHECK-NEXT: [[REG782:[0-9a-zA-Z_%.]+]] = load float, float* [[REG772]], align 4
1576 // CHECK-NEXT: [[REG783:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG781]], float [[REG782]], i32 3
1577 // CHECK-NEXT: store <4 x float> [[REG783]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1578 // CHECK-NEXT: [[REG784:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1579 // CHECK-NEXT: ret <4 x float> [[REG784]]
1580 
1581 // CHECK: define available_externally <4 x float> @_mm_set_ps1(float [[REG785:[0-9a-zA-Z_%.]+]])
1582 // CHECK: store float [[REG785]], float* [[REG786:[0-9a-zA-Z_%.]+]], align 4
1583 // CHECK-NEXT: [[REG787:[0-9a-zA-Z_%.]+]] = load float, float* [[REG786]], align 4
1584 // CHECK-NEXT: [[REG788:[0-9a-zA-Z_%.]+]] = call <4 x float> @_mm_set1_ps(float [[REG787]])
1585 // CHECK-NEXT: ret <4 x float> [[REG788]]
1586 
1587 // CHECK: define available_externally <4 x float> @_mm_set_ss(float [[REG789:[0-9a-zA-Z_%.]+]])
1588 // CHECK: store float [[REG789:[0-9a-zA-Z_%.]+]], float* [[REG790:[0-9a-zA-Z_%.]+]], align 4
1589 // CHECK-NEXT: [[REG791:[0-9a-zA-Z_%.]+]] = load float, float* [[REG790]], align 4
1590 // CHECK-NEXT: [[REG792:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> undef, float [[REG791]], i32 0
1591 // CHECK-NEXT: [[REG793:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG792]], float 0.000000e+00, i32 1
1592 // CHECK-NEXT: [[REG794:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG793]], float 0.000000e+00, i32 2
1593 // CHECK-NEXT: [[REG795:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG794]], float 0.000000e+00, i32 3
1594 // CHECK-NEXT: store <4 x float> [[REG795]], <4 x float>* [[REG796:[0-9a-zA-Z_%.]+]], align 16
1595 // CHECK-NEXT: [[REG797:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG796]], align 16
1596 // CHECK-NEXT: ret <4 x float> [[REG797]]
1597 
1598 // CHECK: define available_externally <4 x float> @_mm_set1_ps(float [[REG798:[0-9a-zA-Z_%.]+]])
1599 // CHECK: store float [[REG798]], float* [[REG799:[0-9a-zA-Z_%.]+]], align 4
1600 // CHECK-NEXT: [[REG800:[0-9a-zA-Z_%.]+]] = load float, float* [[REG799]], align 4
1601 // CHECK-NEXT: [[REG801:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> undef, float [[REG800]], i32 0
1602 // CHECK-NEXT: [[REG802:[0-9a-zA-Z_%.]+]] = load float, float* [[REG799]], align 4
1603 // CHECK-NEXT: [[REG803:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG801]], float [[REG802]], i32 1
1604 // CHECK-NEXT: [[REG804:[0-9a-zA-Z_%.]+]] = load float, float* [[REG799]], align 4
1605 // CHECK-NEXT: [[REG805:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG803]], float [[REG804]], i32 2
1606 // CHECK-NEXT: [[REG806:[0-9a-zA-Z_%.]+]] = load float, float* [[REG799]], align 4
1607 // CHECK-NEXT: [[REG807:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG805]], float [[REG806]], i32 3
1608 // CHECK-NEXT: store <4 x float> [[REG807]], <4 x float>* [[REG808:[0-9a-zA-Z_%.]+]], align 16
1609 // CHECK-NEXT: [[REG809:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG808]], align 16
1610 // CHECK-NEXT: ret <4 x float> [[REG809]]
1611 
1612 // CHECK: define available_externally <4 x float> @_mm_setr_ps(float [[REG810:[0-9a-zA-Z_%.]+]], float [[REG811:[0-9a-zA-Z_%.]+]], float [[REG812:[0-9a-zA-Z_%.]+]], float [[REG813:[0-9a-zA-Z_%.]+]])
1613 // CHECK: store float [[REG810]], float* [[REG814:[0-9a-zA-Z_%.]+]], align 4
1614 // CHECK-NEXT: store float [[REG811]], float* [[REG815:[0-9a-zA-Z_%.]+]], align 4
1615 // CHECK-NEXT: store float [[REG812]], float* [[REG816:[0-9a-zA-Z_%.]+]], align 4
1616 // CHECK-NEXT: store float [[REG813]], float* [[REG817:[0-9a-zA-Z_%.]+]], align 4
1617 // CHECK-NEXT: [[REG818:[0-9a-zA-Z_%.]+]] = load float, float* [[REG814]], align 4
1618 // CHECK-NEXT: [[REG819:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> undef, float [[REG818]], i32 0
1619 // CHECK-NEXT: [[REG820:[0-9a-zA-Z_%.]+]] = load float, float* [[REG815]], align 4
1620 // CHECK-NEXT: [[REG821:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG819]], float [[REG820]], i32 1
1621 // CHECK-NEXT: [[REG822:[0-9a-zA-Z_%.]+]] = load float, float* [[REG816]], align 4
1622 // CHECK-NEXT: [[REG823:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG821]], float [[REG822]], i32 2
1623 // CHECK-NEXT: [[REG824:[0-9a-zA-Z_%.]+]] = load float, float* [[REG817]], align 4
1624 // CHECK-NEXT: [[REG825:[0-9a-zA-Z_%.]+]] = insertelement <4 x float> [[REG823]], float [[REG824]], i32 3
1625 // CHECK-NEXT: store <4 x float> [[REG825]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1626 // CHECK-NEXT: [[REG826:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1627 // CHECK-NEXT: ret <4 x float> [[REG826]]
1628 
1629 void __attribute__((noinline))
test_setzero()1630 test_setzero() {
1631   res = _mm_setzero_ps();
1632 }
1633 
1634 // CHECK-LABEL: @test_setzero
1635 
1636 // CHECK: define available_externally <4 x float> @_mm_setzero_ps
1637 // CHECK: store <4 x float> zeroinitializer, <4 x float>* [[REG827:[0-9a-zA-Z_%.]+]], align 16
1638 // CHECK-NEXT: [[REG828:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG827]], align 16
1639 // CHECK-NEXT: ret <4 x float> [[REG828]]
1640 
1641 void __attribute__((noinline))
test_sfence()1642 test_sfence() {
1643   _mm_sfence();
1644 }
1645 
1646 // CHECK-LABEL: @test_sfence
1647 
1648 // CHECK: define available_externally void @_mm_sfence
1649 // CHECK: fence release
1650 // CHECK-NEXT: ret void
1651 
1652 void __attribute__((noinline))
test_shuffle()1653 test_shuffle() {
1654   res64 = _mm_shuffle_pi16(ms[0], i);
1655   res = _mm_shuffle_ps(m1, m2, i);
1656   res64 = _m_pshufw(ms[0], i);
1657 }
1658 
1659 // CHECK-LABEL: @test_shuffle
1660 
1661 // CHECK: define available_externally i64 @_mm_shuffle_pi16(i64 [[REG829:[0-9a-zA-Z_%.]+]], i32 signext [[REG830:[0-9a-zA-Z_%.]+]])
1662 // CHECK: store i64 [[REG829]], i64* [[REG831:[0-9a-zA-Z_%.]+]], align 8
1663 // CHECK-NEXT: store i32 [[REG830]], i32* [[REG832:[0-9a-zA-Z_%.]+]], align 4
1664 // CHECK-NEXT: [[REG833:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG832]], align 4
1665 // CHECK-NEXT: [[REG834:[0-9a-zA-Z_%.]+]] = and i32 [[REG833]], 3
1666 // CHECK-NEXT: [[REG835:[0-9a-zA-Z_%.]+]] = sext i32 [[REG834]] to i64
1667 // CHECK-NEXT: store i64 [[REG835]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
1668 // CHECK-NEXT: [[REG836:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG832]], align 4
1669 // CHECK-NEXT: [[REG837:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG836]], 2
1670 // CHECK-NEXT: [[REG838:[0-9a-zA-Z_%.]+]] = and i32 [[REG837]], 3
1671 // CHECK-NEXT: [[REG839:[0-9a-zA-Z_%.]+]] = sext i32 [[REG838]] to i64
1672 // CHECK-NEXT: store i64 [[REG839]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
1673 // CHECK-NEXT: [[REG840:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG832]], align 4
1674 // CHECK-NEXT: [[REG841:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG840]], 4
1675 // CHECK-NEXT: [[REG842:[0-9a-zA-Z_%.]+]] = and i32 [[REG841]], 3
1676 // CHECK-NEXT: [[REG843:[0-9a-zA-Z_%.]+]] = sext i32 [[REG842]] to i64
1677 // CHECK-NEXT: store i64 [[REG843]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
1678 // CHECK-NEXT: [[REG844:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG832]], align 4
1679 // CHECK-NEXT: [[REG845:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG844]], 6
1680 // CHECK-NEXT: [[REG846:[0-9a-zA-Z_%.]+]] = and i32 [[REG845]], 3
1681 // CHECK-NEXT: [[REG847:[0-9a-zA-Z_%.]+]] = sext i32 [[REG846]] to i64
1682 // CHECK-NEXT: store i64 [[REG847]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
1683 // CHECK-NEXT: [[REG848:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1684 // CHECK-NEXT: [[REG849:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* @_mm_shuffle_pi16.permute_selectors, i64 0, i64 [[REG848]]
1685 // CHECK-NEXT: [[REG850:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG849]], align 2
1686 // CHECK-NEXT: [[REG851:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to [4 x i16]*
1687 // CHECK-LE-NEXT: [[REG852:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG851]], i64 0, i64 0
1688 // CHECK-BE-NEXT: [[REG852:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG851]], i64 0, i64 3
1689 // CHECK-NEXT: store i16 [[REG850]], i16* [[REG852]]
1690 // CHECK-NEXT: [[REG853:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1691 // CHECK-NEXT: [[REG854:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* @_mm_shuffle_pi16.permute_selectors, i64 0, i64 [[REG853]]
1692 // CHECK-NEXT: [[REG855:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG854]], align 2
1693 // CHECK-NEXT: [[REG856:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to [4 x i16]*
1694 // CHECK-LE-NEXT: [[REG857:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG856]], i64 0, i64 1
1695 // CHECK-BE-NEXT: [[REG857:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG856]], i64 0, i64 2
1696 // CHECK-NEXT: store i16 [[REG855]], i16* [[REG857]]
1697 // CHECK-NEXT: [[REG858:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1698 // CHECK-NEXT: [[REG859:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* @_mm_shuffle_pi16.permute_selectors, i64 0, i64 [[REG858]]
1699 // CHECK-NEXT: [[REG860:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG859]], align 2
1700 // CHECK-NEXT: [[REG861:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to [4 x i16]*
1701 // CHECK-LE-NEXT: [[REG862:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG861]], i64 0, i64 2
1702 // CHECK-BE-NEXT: [[REG862:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG861]], i64 0, i64 1
1703 // CHECK-NEXT: store i16 [[REG860]], i16* [[REG862]]
1704 // CHECK-NEXT: [[REG863:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1705 // CHECK-NEXT: [[REG864:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* @_mm_shuffle_pi16.permute_selectors, i64 0, i64 [[REG863]]
1706 // CHECK-NEXT: [[REG865:[0-9a-zA-Z_%.]+]] = load i16, i16* [[REG864]], align 2
1707 // CHECK-NEXT: [[REG866:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to [4 x i16]*
1708 // CHECK-LE-NEXT: [[REG867:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG866]], i64 0, i64 3
1709 // CHECK-BE-NEXT: [[REG867:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i16], [4 x i16]* [[REG866]], i64 0, i64 0
1710 // CHECK-NEXT: store i16 [[REG865]], i16* [[REG867]]
1711 // CHECK-NEXT: [[REG868:[0-9a-zA-Z_%.]+]] = bitcast {{[0-9a-zA-Z_%.]+}}* {{[0-9a-zA-Z_%.]+}} to i64*
1712 // CHECK-NEXT: [[REG869:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG868]], align 8
1713 // CHECK-NEXT: [[REG870:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG869]])
1714 // CHECK-NEXT: store <2 x i64> [[REG870]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
1715 // CHECK-NEXT: [[REG871:[0-9a-zA-Z_%.]+]] = load i64, i64* [[REG831]], align 8
1716 // CHECK-NEXT: [[REG872:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_splats(unsigned long long)(i64 [[REG871]])
1717 // CHECK-NEXT: store <2 x i64> [[REG872]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
1718 // CHECK-NEXT: [[REG873:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
1719 // CHECK-NEXT: [[REG874:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
1720 // CHECK-NEXT: [[REG875:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
1721 // CHECK-NEXT: [[REG876:[0-9a-zA-Z_%.]+]] = bitcast <2 x i64> [[REG875]] to <16 x i8>
1722 // CHECK-NEXT: [[REG877:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_perm(unsigned long long vector[2], unsigned long long vector[2], unsigned char vector[16])(<2 x i64> [[REG873]], <2 x i64> [[REG874]], <16 x i8> [[REG876]])
1723 // CHECK-NEXT: store <2 x i64> [[REG877]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
1724 // CHECK-NEXT: [[REG878:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
1725 // CHECK-NEXT: [[REG879:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG878]], i32 0
1726 // CHECK-NEXT: ret i64 [[REG879]]
1727 
1728 // CHECK: define available_externally <4 x float> @_mm_shuffle_ps(<4 x float> [[REG880:[0-9a-zA-Z_%.]+]], <4 x float> [[REG881:[0-9a-zA-Z_%.]+]], i32 signext [[REG882:[0-9a-zA-Z_%.]+]])
1729 // CHECK: store <4 x float> [[REG880]], <4 x float>* [[REG883:[0-9a-zA-Z_%.]+]], align 16
1730 // CHECK-NEXT: store <4 x float> [[REG881]], <4 x float>* [[REG884:[0-9a-zA-Z_%.]+]], align 16
1731 // CHECK-NEXT: store i32 [[REG882]], i32* [[REG885:[0-9a-zA-Z_%.]+]], align 4
1732 // CHECK-NEXT: [[REG886:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG885]], align 4
1733 // CHECK-NEXT: [[REG887:[0-9a-zA-Z_%.]+]] = and i32 [[REG886]], 3
1734 // CHECK-NEXT: [[REG888:[0-9a-zA-Z_%.]+]] = sext i32 [[REG887]] to i64
1735 // CHECK-NEXT: store i64 [[REG888]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
1736 // CHECK-NEXT: [[REG889:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG885]], align 4
1737 // CHECK-NEXT: [[REG890:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG889]], 2
1738 // CHECK-NEXT: [[REG891:[0-9a-zA-Z_%.]+]] = and i32 [[REG890]], 3
1739 // CHECK-NEXT: [[REG892:[0-9a-zA-Z_%.]+]] = sext i32 [[REG891]] to i64
1740 // CHECK-NEXT: store i64 [[REG892]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
1741 // CHECK-NEXT: [[REG893:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG885]], align 4
1742 // CHECK-NEXT: [[REG894:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG893]], 4
1743 // CHECK-NEXT: [[REG895:[0-9a-zA-Z_%.]+]] = and i32 [[REG894]], 3
1744 // CHECK-NEXT: [[REG896:[0-9a-zA-Z_%.]+]] = sext i32 [[REG895]] to i64
1745 // CHECK-NEXT: store i64 [[REG896]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
1746 // CHECK-NEXT: [[REG897:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG885]], align 4
1747 // CHECK-NEXT: [[REG898:[0-9a-zA-Z_%.]+]] = ashr i32 [[REG897]], 6
1748 // CHECK-NEXT: [[REG899:[0-9a-zA-Z_%.]+]] = and i32 [[REG898]], 3
1749 // CHECK-NEXT: [[REG900:[0-9a-zA-Z_%.]+]] = sext i32 [[REG899]] to i64
1750 // CHECK-NEXT: store i64 [[REG900]], i64* {{[0-9a-zA-Z_%.]+}}, align 8
1751 // CHECK-NEXT: [[REG901:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1752 // CHECK-NEXT: [[REG902:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i32], [4 x i32]* @_mm_shuffle_ps.permute_selectors, i64 0, i64 [[REG901]]
1753 // CHECK-NEXT: [[REG903:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG902]], align 4
1754 // CHECK-NEXT: [[REG904:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
1755 // CHECK-NEXT: [[REG905:[0-9a-zA-Z_%.]+]] = insertelement <4 x i32> [[REG904]], i32 [[REG903]], i32 0
1756 // CHECK-NEXT: store <4 x i32> [[REG905]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
1757 // CHECK-NEXT: [[REG906:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1758 // CHECK-NEXT: [[REG907:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i32], [4 x i32]* @_mm_shuffle_ps.permute_selectors, i64 0, i64 [[REG906]]
1759 // CHECK-NEXT: [[REG908:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG907]], align 4
1760 // CHECK-NEXT: [[REG909:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
1761 // CHECK-NEXT: [[REG910:[0-9a-zA-Z_%.]+]] = insertelement <4 x i32> [[REG909]], i32 [[REG908]], i32 1
1762 // CHECK-NEXT: store <4 x i32> [[REG910]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
1763 // CHECK-NEXT: [[REG911:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1764 // CHECK-NEXT: [[REG912:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i32], [4 x i32]* @_mm_shuffle_ps.permute_selectors, i64 0, i64 [[REG911]]
1765 // CHECK-NEXT: [[REG913:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG912]], align 4
1766 // CHECK-NEXT: [[REG914:[0-9a-zA-Z_%.]+]] = add i32 [[REG913]], 269488144
1767 // CHECK-NEXT: [[REG915:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
1768 // CHECK-NEXT: [[REG916:[0-9a-zA-Z_%.]+]] = insertelement <4 x i32> [[REG915]], i32 [[REG914]], i32 2
1769 // CHECK-NEXT: store <4 x i32> [[REG916]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
1770 // CHECK-NEXT: [[REG917:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1771 // CHECK-NEXT: [[REG918:[0-9a-zA-Z_%.]+]] = getelementptr inbounds [4 x i32], [4 x i32]* @_mm_shuffle_ps.permute_selectors, i64 0, i64 [[REG917]]
1772 // CHECK-NEXT: [[REG919:[0-9a-zA-Z_%.]+]] = load i32, i32* [[REG918]], align 4
1773 // CHECK-NEXT: [[REG920:[0-9a-zA-Z_%.]+]] = add i32 [[REG919]], 269488144
1774 // CHECK-NEXT: [[REG921:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
1775 // CHECK-NEXT: [[REG922:[0-9a-zA-Z_%.]+]] = insertelement <4 x i32> [[REG921]], i32 [[REG920]], i32 3
1776 // CHECK-NEXT: store <4 x i32> [[REG922]], <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
1777 // CHECK-NEXT: [[REG923:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1778 // CHECK-NEXT: [[REG924:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1779 // CHECK-NEXT: [[REG925:[0-9a-zA-Z_%.]+]] = load <4 x i32>, <4 x i32>* {{[0-9a-zA-Z_%.]+}}, align 16
1780 // CHECK-NEXT: [[REG926:[0-9a-zA-Z_%.]+]] = bitcast <4 x i32> [[REG925]] to <16 x i8>
1781 // CHECK-NEXT: [[REG927:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])(<4 x float> [[REG923]], <4 x float> [[REG924]], <16 x i8> [[REG926]])
1782 // CHECK-NEXT: ret <4 x float> [[REG927]]
1783 
1784 // CHECK: define available_externally i64 @_m_pshufw
1785 // CHECK: [[REG928:[0-9a-zA-Z_%.]+]] = call i64 @_mm_shuffle_pi16
1786 // CHECK-NEXT: ret i64 [[REG928]]
1787 
1788 void __attribute__((noinline))
test_sqrt()1789 test_sqrt() {
1790   res = _mm_sqrt_ps(m1);
1791   res = _mm_sqrt_ss(m1);
1792 }
1793 
1794 // CHECK-LABEL: @test_sqrt
1795 
1796 // CHECK: define available_externally <4 x float> @_mm_sqrt_ps
1797 // CHECK: [[REG929:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sqrt(float vector[4])(<4 x float> {{[0-9a-zA-Z_%.]+}})
1798 // CHECK-NEXT: ret <4 x float> [[REG929]]
1799 
1800 // CHECK: define available_externally <4 x float> @_mm_sqrt_ss
1801 // CHECK: [[REG930:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1802 // CHECK-NEXT: [[REG931:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG930]], i32 zeroext 0)
1803 // CHECK-NEXT: store <4 x float> [[REG931]], <4 x float>* [[REG932:[0-9a-zA-Z_%.]+]], align 16
1804 // CHECK-NEXT: [[REG933:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG932]], align 16
1805 // CHECK-NEXT: [[REG934:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sqrt(float vector[4])(<4 x float> [[REG933]])
1806 // CHECK-NEXT: store <4 x float> [[REG934]], <4 x float>* [[REG935:[0-9a-zA-Z_%.]+]], align 16
1807 // CHECK-NEXT: [[REG936:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1808 // CHECK-NEXT: [[REG937:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG935]], align 16
1809 // CHECK-NEXT: [[REG938:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG936]], <4 x float> [[REG937]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
1810 // CHECK-NEXT: ret <4 x float> [[REG938]]
1811 
1812 void __attribute__((noinline))
test_store()1813 test_store() {
1814   _mm_store_ps(fs, m1);
1815   _mm_store_ps1(fs, m1);
1816   _mm_store_ss(fs, m1);
1817   _mm_store1_ps(fs, m1);
1818   _mm_storeh_pi(ms, m1);
1819   _mm_storel_pi(ms, m1);
1820   _mm_storer_ps(fs, m1);
1821 }
1822 
1823 // CHECK-LABEL: @test_store
1824 
1825 // CHECK: define available_externally void @_mm_store_ps
1826 // CHECK: store float* {{[0-9a-zA-Z_%.]+}}, float** {{[0-9a-zA-Z_%.]+}}, align 8
1827 // CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1828 // CHECK-NEXT: [[REG939:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1829 // CHECK-NEXT: [[REG940:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
1830 // CHECK-NEXT: [[REG941:[0-9a-zA-Z_%.]+]] = bitcast float* [[REG940]] to <4 x float>*
1831 // CHECK-NEXT: call void @vec_st(float vector[4], long, float vector[4]*)(<4 x float> [[REG939]], i64 0, <4 x float>* [[REG941]])
1832 // CHECK-NEXT: ret void
1833 
1834 // CHECK: define available_externally void @_mm_store_ps1
1835 // CHECK: store float* {{[0-9a-zA-Z_%.]+}}, float** {{[0-9a-zA-Z_%.]+}}, align 8
1836 // CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1837 // CHECK-NEXT: [[REG942:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
1838 // CHECK-NEXT: [[REG943:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1839 // CHECK-NEXT: call void @_mm_store1_ps(float* [[REG942]], <4 x float> [[REG943]])
1840 // CHECK-NEXT: ret void
1841 
1842 // CHECK: define available_externally void @_mm_store_ss
1843 // CHECK: store float* {{[0-9a-zA-Z_%.]+}}, float** {{[0-9a-zA-Z_%.]+}}, align 8
1844 // CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1845 // CHECK-NEXT: [[REG944:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1846 // CHECK-NEXT: [[REG945:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG944]], i32 0
1847 // CHECK-NEXT: [[REG946:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
1848 // CHECK-NEXT: store float [[REG945]], float* [[REG946]], align 4
1849 // CHECK-NEXT: ret void
1850 
1851 // CHECK: define available_externally void @_mm_store1_ps
1852 // CHECK: store float* {{[0-9a-zA-Z_%.]+}}, float** {{[0-9a-zA-Z_%.]+}}, align 8
1853 // CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1854 // CHECK-NEXT: [[REG947:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1855 // CHECK-NEXT: [[REG948:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG947]], i32 zeroext 0)
1856 // CHECK-NEXT: store <4 x float> [[REG948]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1857 // CHECK-NEXT: [[REG949:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
1858 // CHECK-NEXT: [[REG950:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1859 // CHECK-NEXT: call void @_mm_store_ps(float* [[REG949]], <4 x float> [[REG950]])
1860 // CHECK-NEXT: ret void
1861 
1862 // CHECK: define available_externally void @_mm_storeh_pi
1863 // CHECK: store i64* {{[0-9a-zA-Z_%.]+}}, i64** {{[0-9a-zA-Z_%.]+}}, align 8
1864 // CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1865 // CHECK-NEXT: [[REG951:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1866 // CHECK-NEXT: [[REG952:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG951]] to <2 x i64>
1867 // CHECK-NEXT: store <2 x i64> [[REG952]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
1868 // CHECK-NEXT: [[REG953:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
1869 // CHECK-NEXT: [[REG954:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG953]], i32 1
1870 // CHECK-NEXT: [[REG955:[0-9a-zA-Z_%.]+]] = load i64*, i64** {{[0-9a-zA-Z_%.]+}}, align 8
1871 // CHECK-NEXT: store i64 [[REG954]], i64* [[REG955]], align 8
1872 // CHECK-NEXT: ret void
1873 
1874 // CHECK: define available_externally void @_mm_storel_pi
1875 // CHECK: store i64* {{[0-9a-zA-Z_%.]+}}, i64** {{[0-9a-zA-Z_%.]+}}, align 8
1876 // CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1877 // CHECK-NEXT: [[REG956:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1878 // CHECK-NEXT: [[REG957:[0-9a-zA-Z_%.]+]] = bitcast <4 x float> [[REG956]] to <2 x i64>
1879 // CHECK-NEXT: store <2 x i64> [[REG957]], <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
1880 // CHECK-NEXT: [[REG958:[0-9a-zA-Z_%.]+]] = load <2 x i64>, <2 x i64>* {{[0-9a-zA-Z_%.]+}}, align 16
1881 // CHECK-NEXT: [[REG959:[0-9a-zA-Z_%.]+]] = extractelement <2 x i64> [[REG958]], i32 0
1882 // CHECK-NEXT: [[REG960:[0-9a-zA-Z_%.]+]] = load i64*, i64** {{[0-9a-zA-Z_%.]+}}, align 8
1883 // CHECK-NEXT: store i64 [[REG959]], i64* [[REG960]], align 8
1884 // CHECK-NEXT: ret void
1885 
1886 // CHECK: define available_externally void @_mm_storer_ps
1887 // CHECK: store float* {{[0-9a-zA-Z_%.]+}}, float** {{[0-9a-zA-Z_%.]+}}, align 8
1888 // CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1889 // CHECK-NEXT: [[REG961:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1890 // CHECK-NEXT: [[REG962:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1891 // CHECK-NEXT: [[REG963:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_perm(float vector[4], float vector[4], unsigned char vector[16])(<4 x float> [[REG961]], <4 x float> [[REG962]], <16 x i8> <i8 28, i8 29, i8 30, i8 31, i8 24, i8 25, i8 26, i8 27, i8 20, i8 21, i8 22, i8 23, i8 16, i8 17, i8 18, i8 19>)
1892 // CHECK-NEXT: store <4 x float> [[REG963]], <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1893 // CHECK-NEXT: [[REG964:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
1894 // CHECK-NEXT: [[REG965:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1895 // CHECK-NEXT: call void @_mm_store_ps(float* [[REG964]], <4 x float> [[REG965]])
1896 // CHECK-NEXT: ret void
1897 
1898 void __attribute__((noinline))
test_stream()1899 test_stream() {
1900   _mm_stream_pi(&res64, ms[0]);
1901   _mm_stream_ps(&fs[0], m1);
1902 }
1903 
1904 // CHECK-LABEL: @test_stream
1905 
1906 /// CHECK: define available_externally void @_mm_stream_pi
1907 // CHECK: store i64* {{[0-9a-zA-Z_%.]+}}, i64** {{[0-9a-zA-Z_%.]+}}, align 8
1908 // CHECK-NEXT: store i64 {{[0-9a-zA-Z_%.]+}}, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1909 // CHECK-NEXT: [[REG966:[0-9a-zA-Z_%.]+]] = load i64*, i64** {{[0-9a-zA-Z_%.]+}}, align 8
1910 // CHECK-NEXT: call void asm sideeffect "\09dcbtstt\090,$0", "b,~{memory}"(i64* [[REG966]])
1911 // CHECK-NEXT: [[REG967:[0-9a-zA-Z_%.]+]] = load i64, i64* {{[0-9a-zA-Z_%.]+}}, align 8
1912 // CHECK-NEXT: [[REG968:[0-9a-zA-Z_%.]+]] = load i64*, i64** {{[0-9a-zA-Z_%.]+}}, align 8
1913 // CHECK-NEXT: store i64 [[REG967]], i64* [[REG968]], align 8
1914 // CHECK-NEXT: ret void
1915 
1916 // CHECK: define available_externally void @_mm_stream_ps
1917 // CHECK: store float* {{[0-9a-zA-Z_%.]+}}, float** {{[0-9a-zA-Z_%.]+}}, align 8
1918 // CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1919 // CHECK-NEXT: [[REG969:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
1920 // CHECK-NEXT: call void asm sideeffect "\09dcbtstt\090,$0", "b,~{memory}"(float* [[REG969]])
1921 // CHECK-NEXT: [[REG970:[0-9a-zA-Z_%.]+]] = load float*, float** {{[0-9a-zA-Z_%.]+}}, align 8
1922 // CHECK-NEXT: [[REG971:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1923 // CHECK-NEXT: call void @_mm_store_ps(float* [[REG970]], <4 x float> [[REG971]])
1924 // CHECK-NEXT: ret void
1925 
1926 void __attribute__((noinline))
test_sub()1927 test_sub() {
1928   res = _mm_sub_ps(m1, m2);
1929   res = _mm_sub_ss(m1, m2);
1930 }
1931 
1932 // CHECK-LABEL: @test_sub
1933 
1934 // CHECK: define available_externally <4 x float> @_mm_sub_ps(<4 x float> [[REG972:[0-9a-zA-Z_%.]+]], <4 x float> [[REG973:[0-9a-zA-Z_%.]+]])
1935 // CHECK: store <4 x float> [[REG972]], <4 x float>* [[REG974:[0-9a-zA-Z_%.]+]], align 16
1936 // CHECK-NEXT: store <4 x float> [[REG973]], <4 x float>* [[REG975:[0-9a-zA-Z_%.]+]], align 16
1937 // CHECK-NEXT: [[REG976:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG974]], align 16
1938 // CHECK-NEXT: [[REG977:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG975]], align 16
1939 // CHECK-NEXT: [[REG978:[0-9a-zA-Z_%.]+]] = fsub <4 x float> [[REG976]], [[REG977]]
1940 // CHECK-NEXT: ret <4 x float> [[REG978]]
1941 
1942 // CHECK: define available_externally <4 x float> @_mm_sub_ss(<4 x float> [[REG979:[0-9a-zA-Z_%.]+]], <4 x float> [[REG980:[0-9a-zA-Z_%.]+]])
1943 // CHECK: store <4 x float> [[REG979]], <4 x float>* [[REG981:[0-9a-zA-Z_%.]+]], align 16
1944 // CHECK-NEXT: store <4 x float> [[REG980]], <4 x float>* [[REG982:[0-9a-zA-Z_%.]+]], align 16
1945 // CHECK-NEXT: [[REG983:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG981]], align 16
1946 // CHECK-NEXT: [[REG984:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG983]], i32 zeroext 0)
1947 // CHECK-NEXT: store <4 x float> [[REG984]], <4 x float>* [[REG985:[0-9a-zA-Z_%.]+]], align 16
1948 // CHECK-NEXT: [[REG986:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG982]], align 16
1949 // CHECK-NEXT: [[REG987:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_splat(float vector[4], unsigned int)(<4 x float> [[REG986]], i32 zeroext 0)
1950 // CHECK-NEXT: store <4 x float> [[REG987]], <4 x float>* [[REG988:[0-9a-zA-Z_%.]+]], align 16
1951 // CHECK-NEXT: [[REG989:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG985:[0-9a-zA-Z_%.]+]], align 16
1952 // CHECK-NEXT: [[REG990:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG988:[0-9a-zA-Z_%.]+]], align 16
1953 // CHECK-NEXT: [[REG991:[0-9a-zA-Z_%.]+]] = fsub <4 x float> [[REG989]], [[REG990]]
1954 // CHECK-NEXT: store <4 x float> [[REG991]], <4 x float>* [[REG992:[0-9a-zA-Z_%.]+]], align 16
1955 // CHECK-NEXT: [[REG993:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG981]], align 16
1956 // CHECK-NEXT: [[REG994:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG992]], align 16
1957 // CHECK-NEXT: [[REG995:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> [[REG993]], <4 x float> [[REG994]], <4 x i32> <i32 -1, i32 0, i32 0, i32 0>)
1958 // CHECK-NEXT: ret <4 x float> [[REG995]]
1959 
1960 void __attribute__((noinline))
test_transpose()1961 test_transpose() {
1962   __m128 m3, m4;
1963   _MM_TRANSPOSE4_PS(m1, m2, m3, m4);
1964 }
1965 
1966 // CHECK-LABEL: @test_transpose
1967 
1968 // CHECK: br label %[[REG996:[0-9a-zA-Z_%.]+]]
1969 // CHECK: [[REG996]]:
1970 // CHECK: [[REG997:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vmrghw(float vector[4], float vector[4])
1971 // CHECK: [[REG998:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vmrghw(float vector[4], float vector[4])
1972 // CHECK: [[REG999:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vmrglw(float vector[4], float vector[4])
1973 // CHECK: [[REG1000:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vmrglw(float vector[4], float vector[4])
1974 // CHECK: [[REG1001:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_mergeh(long long vector[2], long long vector[2])
1975 // CHECK: [[REG1002:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_mergel(long long vector[2], long long vector[2])
1976 // CHECK: [[REG1003:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_mergeh(long long vector[2], long long vector[2])
1977 // CHECK: [[REG1004:[0-9a-zA-Z_%.]+]] = call <2 x i64> @vec_mergel(long long vector[2], long long vector[2])
1978 // CHECK: ret void
1979 
1980 void __attribute__((noinline))
test_ucomi()1981 test_ucomi() {
1982   i = _mm_ucomieq_ss(m1, m2);
1983   i = _mm_ucomige_ss(m1, m2);
1984   i = _mm_ucomigt_ss(m1, m2);
1985   i = _mm_ucomile_ss(m1, m2);
1986   i = _mm_ucomilt_ss(m1, m2);
1987   i = _mm_ucomineq_ss(m1, m2);
1988 }
1989 
1990 // CHECK-LABEL: @test_ucomi
1991 
1992 // CHECK: define available_externally signext i32 @_mm_ucomieq_ss
1993 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1994 // CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1995 // CHECK-NEXT: [[REG1005:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1996 // CHECK-NEXT: [[REG1006:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1005]], i32 0
1997 // CHECK-NEXT: [[REG1007:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
1998 // CHECK-NEXT: [[REG1008:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1007]], i32 0
1999 // CHECK-NEXT: [[REG1009:[0-9a-zA-Z_%.]+]] = fcmp oeq float [[REG1006]], [[REG1008]]
2000 // CHECK-NEXT: [[REG1010:[0-9a-zA-Z_%.]+]] = zext i1 [[REG1009]] to i32
2001 // CHECK-NEXT: ret i32 [[REG1010]]
2002 
2003 // CHECK: define available_externally signext i32 @_mm_ucomige_ss
2004 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
2005 // CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
2006 // CHECK-NEXT: [[REG1011:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
2007 // CHECK-NEXT: [[REG1012:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1011]], i32 0
2008 // CHECK-NEXT: [[REG1013:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
2009 // CHECK-NEXT: [[REG1014:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1013]], i32 0
2010 // CHECK-NEXT: [[REG1015:[0-9a-zA-Z_%.]+]] = fcmp oge float [[REG1012]], [[REG1014]]
2011 // CHECK-NEXT: [[REG1016:[0-9a-zA-Z_%.]+]] = zext i1 [[REG1015]] to i32
2012 // CHECK-NEXT: ret i32 [[REG1016]]
2013 
2014 // CHECK: define available_externally signext i32 @_mm_ucomigt_ss
2015 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
2016 // CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
2017 // CHECK-NEXT: [[REG1017:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
2018 // CHECK-NEXT: [[REG1018:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1017]], i32 0
2019 // CHECK-NEXT: [[REG1019:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
2020 // CHECK-NEXT: [[REG1020:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1019]], i32 0
2021 // CHECK-NEXT: [[REG1021:[0-9a-zA-Z_%.]+]] = fcmp ogt float [[REG1018]], [[REG1020]]
2022 // CHECK-NEXT: [[REG1022:[0-9a-zA-Z_%.]+]] = zext i1 [[REG1021]] to i32
2023 // CHECK-NEXT: ret i32 [[REG1022]]
2024 
2025 // CHECK: define available_externally signext i32 @_mm_ucomile_ss
2026 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
2027 // CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
2028 // CHECK-NEXT: [[REG1023:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
2029 // CHECK-NEXT: [[REG1024:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1023]], i32 0
2030 // CHECK-NEXT: [[REG1025:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
2031 // CHECK-NEXT: [[REG1026:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1025]], i32 0
2032 // CHECK-NEXT: [[REG1027:[0-9a-zA-Z_%.]+]] = fcmp ole float [[REG1024]], [[REG1026]]
2033 // CHECK-NEXT: [[REG1028:[0-9a-zA-Z_%.]+]] = zext i1 [[REG1027]] to i32
2034 // CHECK-NEXT: ret i32 [[REG1028]]
2035 
2036 // CHECK: define available_externally signext i32 @_mm_ucomilt_ss
2037 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
2038 // CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
2039 // CHECK-NEXT: [[REG1029:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
2040 // CHECK-NEXT: [[REG1030:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1029]], i32 0
2041 // CHECK-NEXT: [[REG1031:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
2042 // CHECK-NEXT: [[REG1032:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1031]], i32 0
2043 // CHECK-NEXT: [[REG1033:[0-9a-zA-Z_%.]+]] = fcmp olt float [[REG1030]], [[REG1032]]
2044 // CHECK-NEXT: [[REG1034:[0-9a-zA-Z_%.]+]] = zext i1 [[REG1033]] to i32
2045 // CHECK-NEXT: ret i32 [[REG1034]]
2046 
2047 // CHECK: define available_externally signext i32 @_mm_ucomineq_ss
2048 // CHECK: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
2049 // CHECK-NEXT: store <4 x float> {{[0-9a-zA-Z_%.]+}}, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
2050 // CHECK-NEXT: [[REG1035:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
2051 // CHECK-NEXT: [[REG1036:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1035]], i32 0
2052 // CHECK-NEXT: [[REG1037:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* {{[0-9a-zA-Z_%.]+}}, align 16
2053 // CHECK-NEXT: [[REG1038:[0-9a-zA-Z_%.]+]] = extractelement <4 x float> [[REG1037]], i32 0
2054 // CHECK-NEXT: [[REG1039:[0-9a-zA-Z_%.]+]] = fcmp une float [[REG1036]], [[REG1038]]
2055 // CHECK-NEXT: [[REG1040:[0-9a-zA-Z_%.]+]] = zext i1 [[REG1039]] to i32
2056 // CHECK-NEXT: ret i32 [[REG1040]]
2057 
2058 void __attribute__((noinline))
test_undefined()2059 test_undefined() {
2060   res = _mm_undefined_ps();
2061 }
2062 
2063 // CHECK-LABEL: @test_undefined
2064 
2065 // CHECK: define available_externally <4 x float> @_mm_undefined_ps
2066 // CHECK: [[REG1041:[0-9a-zA-Z_%.]+]] = alloca <4 x float>, align 16
2067 // CHECK-NEXT: [[REG1042:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG1041]], align 16
2068 // CHECK-NEXT: store <4 x float> [[REG1042]], <4 x float>* [[REG1041]], align 16
2069 // CHECK-NEXT: [[REG1043:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG1041]], align 16
2070 // CHECK-NEXT: ret <4 x float> [[REG1043]]
2071 
2072 void __attribute__((noinline))
test_unpack()2073 test_unpack() {
2074   res = _mm_unpackhi_ps(m1, m2);
2075   res = _mm_unpacklo_ps(m1, m2);
2076 }
2077 
2078 // CHECK-LABEL: @test_unpack
2079 
2080 // CHECK: define available_externally <4 x float> @_mm_unpackhi_ps(<4 x float> [[REG1044:[0-9a-zA-Z_%.]+]], <4 x float> [[REG1045:[0-9a-zA-Z_%.]+]])
2081 // CHECK: store <4 x float> [[REG1044]], <4 x float>* [[REG1046:[0-9a-zA-Z_%.]+]], align 16
2082 // CHECK-NEXT: store <4 x float> [[REG1045]], <4 x float>* [[REG1047:[0-9a-zA-Z_%.]+]], align 16
2083 // CHECK-NEXT: [[REG1048:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG1046]], align 16
2084 // CHECK-NEXT: [[REG1049:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG1047]], align 16
2085 // CHECK-NEXT: [[REG1050:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vmrglw(float vector[4], float vector[4])(<4 x float> [[REG1048]], <4 x float> [[REG1049]])
2086 // CHECK-NEXT: ret <4 x float> [[REG1050]]
2087 
2088 // CHECK: define available_externally <4 x float> @_mm_unpacklo_ps(<4 x float> [[REG1051:[0-9a-zA-Z_%.]+]], <4 x float> [[REG1052:[0-9a-zA-Z_%.]+]])
2089 // CHECK: store <4 x float> [[REG1051]], <4 x float>* [[REG1053:[0-9a-zA-Z_%.]+]], align 16
2090 // CHECK-NEXT: store <4 x float> [[REG1052]], <4 x float>* [[REG1054:[0-9a-zA-Z_%.]+]], align 16
2091 // CHECK-NEXT: [[REG1055:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG1053]], align 16
2092 // CHECK-NEXT: [[REG1056:[0-9a-zA-Z_%.]+]] = load <4 x float>, <4 x float>* [[REG1054]], align 16
2093 // CHECK-NEXT: [[REG1057:[0-9a-zA-Z_%.]+]] = call <4 x float> @vec_vmrghw(float vector[4], float vector[4])(<4 x float> [[REG1055]], <4 x float> [[REG1056]])
2094 // CHECK-NEXT: ret <4 x float> [[REG1057]]
2095