1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq < %s | FileCheck %s --check-prefix=X86-64
3; RUN: llc -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq < %s | FileCheck %s --check-prefix=X86-32
4
5define void @test_fcmp_storefloat(i1 %cond, float* %fptr, float %f1, float %f2, float %f3, float %f4, float %f5, float %f6) {
6; X86-64-LABEL: test_fcmp_storefloat:
7; X86-64:       # %bb.0: # %entry
8; X86-64-NEXT:    testb $1, %dil
9; X86-64-NEXT:    je .LBB0_2
10; X86-64-NEXT:  # %bb.1: # %if
11; X86-64-NEXT:    vcmpeqss %xmm3, %xmm2, %k1
12; X86-64-NEXT:    jmp .LBB0_3
13; X86-64-NEXT:  .LBB0_2: # %else
14; X86-64-NEXT:    vcmpeqss %xmm5, %xmm4, %k1
15; X86-64-NEXT:  .LBB0_3: # %exit
16; X86-64-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
17; X86-64-NEXT:    vmovss %xmm1, (%rsi)
18; X86-64-NEXT:    retq
19;
20; X86-32-LABEL: test_fcmp_storefloat:
21; X86-32:       # %bb.0: # %entry
22; X86-32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
23; X86-32-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
24; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
25; X86-32-NEXT:    testb $1, {{[0-9]+}}(%esp)
26; X86-32-NEXT:    je .LBB0_2
27; X86-32-NEXT:  # %bb.1: # %if
28; X86-32-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
29; X86-32-NEXT:    vcmpeqss {{[0-9]+}}(%esp), %xmm2, %k1
30; X86-32-NEXT:    jmp .LBB0_3
31; X86-32-NEXT:  .LBB0_2: # %else
32; X86-32-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
33; X86-32-NEXT:    vcmpeqss {{[0-9]+}}(%esp), %xmm2, %k1
34; X86-32-NEXT:  .LBB0_3: # %exit
35; X86-32-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
36; X86-32-NEXT:    vmovss %xmm0, (%eax)
37; X86-32-NEXT:    retl
38entry:
39  br i1 %cond, label %if, label %else
40
41if:
42  %cmp1 = fcmp oeq float %f3, %f4
43  br label %exit
44
45else:
46  %cmp2 = fcmp oeq float %f5, %f6
47  br label %exit
48
49exit:
50  %val = phi i1 [%cmp1, %if], [%cmp2, %else]
51  %selected = select i1 %val, float %f1, float %f2
52  store float %selected, float* %fptr
53  ret void
54}
55
56define void @test_fcmp_storei1(i1 %cond, float* %fptr, i1* %iptr, float %f1, float %f2, float %f3, float %f4) {
57; X86-64-LABEL: test_fcmp_storei1:
58; X86-64:       # %bb.0: # %entry
59; X86-64-NEXT:    testb $1, %dil
60; X86-64-NEXT:    je .LBB1_2
61; X86-64-NEXT:  # %bb.1: # %if
62; X86-64-NEXT:    vcmpeqss %xmm1, %xmm0, %k0
63; X86-64-NEXT:    kmovb %k0, (%rdx)
64; X86-64-NEXT:    retq
65; X86-64-NEXT:  .LBB1_2: # %else
66; X86-64-NEXT:    vcmpeqss %xmm3, %xmm2, %k0
67; X86-64-NEXT:    kmovb %k0, (%rdx)
68; X86-64-NEXT:    retq
69;
70; X86-32-LABEL: test_fcmp_storei1:
71; X86-32:       # %bb.0: # %entry
72; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
73; X86-32-NEXT:    testb $1, {{[0-9]+}}(%esp)
74; X86-32-NEXT:    je .LBB1_2
75; X86-32-NEXT:  # %bb.1: # %if
76; X86-32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
77; X86-32-NEXT:    vcmpeqss {{[0-9]+}}(%esp), %xmm0, %k0
78; X86-32-NEXT:    kmovb %k0, (%eax)
79; X86-32-NEXT:    retl
80; X86-32-NEXT:  .LBB1_2: # %else
81; X86-32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
82; X86-32-NEXT:    vcmpeqss {{[0-9]+}}(%esp), %xmm0, %k0
83; X86-32-NEXT:    kmovb %k0, (%eax)
84; X86-32-NEXT:    retl
85entry:
86  br i1 %cond, label %if, label %else
87
88if:
89  %cmp1 = fcmp oeq float %f1, %f2
90  br label %exit
91
92else:
93  %cmp2 = fcmp oeq float %f3, %f4
94  br label %exit
95
96exit:
97  %val = phi i1 [%cmp1, %if], [%cmp2, %else]
98  store i1 %val, i1* %iptr
99  ret void
100}
101
102define void @test_load_add(i1 %cond, float* %fptr, i1* %iptr1, i1* %iptr2, float %f1, float %f2)  {
103; X86-64-LABEL: test_load_add:
104; X86-64:       # %bb.0: # %entry
105; X86-64-NEXT:    testb $1, %dil
106; X86-64-NEXT:    je .LBB2_2
107; X86-64-NEXT:  # %bb.1: # %if
108; X86-64-NEXT:    movb (%rdx), %al
109; X86-64-NEXT:    addb (%rcx), %al
110; X86-64-NEXT:    jmp .LBB2_3
111; X86-64-NEXT:  .LBB2_2: # %else
112; X86-64-NEXT:    movb (%rcx), %al
113; X86-64-NEXT:  .LBB2_3: # %exit
114; X86-64-NEXT:    kmovd %eax, %k1
115; X86-64-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
116; X86-64-NEXT:    vmovss %xmm1, (%rsi)
117; X86-64-NEXT:    retq
118;
119; X86-32-LABEL: test_load_add:
120; X86-32:       # %bb.0: # %entry
121; X86-32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
122; X86-32-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
123; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
124; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
125; X86-32-NEXT:    testb $1, {{[0-9]+}}(%esp)
126; X86-32-NEXT:    je .LBB2_2
127; X86-32-NEXT:  # %bb.1: # %if
128; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
129; X86-32-NEXT:    movb (%edx), %dl
130; X86-32-NEXT:    addb (%ecx), %dl
131; X86-32-NEXT:    jmp .LBB2_3
132; X86-32-NEXT:  .LBB2_2: # %else
133; X86-32-NEXT:    movb (%ecx), %dl
134; X86-32-NEXT:  .LBB2_3: # %exit
135; X86-32-NEXT:    kmovd %edx, %k1
136; X86-32-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
137; X86-32-NEXT:    vmovss %xmm0, (%eax)
138; X86-32-NEXT:    retl
139entry:
140  br i1 %cond, label %if, label %else
141
142if:
143  %loaded1 = load i1, i1* %iptr1
144  %loaded2if = load i1, i1* %iptr2
145  %added = add i1 %loaded1, %loaded2if
146  br label %exit
147
148else:
149  %loaded2else = load i1, i1* %iptr2
150  br label %exit
151
152exit:
153  %val = phi i1 [%added, %if], [%loaded2else, %else]
154  %selected = select i1 %val, float %f1, float %f2
155  store float %selected, float* %fptr
156  ret void
157}
158
159define void @test_load_i1(i1 %cond, float* %fptr, i1* %iptr1, i1* %iptr2, float %f1, float %f2)  {
160; X86-64-LABEL: test_load_i1:
161; X86-64:       # %bb.0: # %entry
162; X86-64-NEXT:    testb $1, %dil
163; X86-64-NEXT:    je .LBB3_2
164; X86-64-NEXT:  # %bb.1: # %if
165; X86-64-NEXT:    kmovb (%rdx), %k1
166; X86-64-NEXT:    jmp .LBB3_3
167; X86-64-NEXT:  .LBB3_2: # %else
168; X86-64-NEXT:    kmovb (%rcx), %k1
169; X86-64-NEXT:  .LBB3_3: # %exit
170; X86-64-NEXT:    vmovss %xmm0, %xmm1, %xmm1 {%k1}
171; X86-64-NEXT:    vmovss %xmm1, (%rsi)
172; X86-64-NEXT:    retq
173;
174; X86-32-LABEL: test_load_i1:
175; X86-32:       # %bb.0: # %entry
176; X86-32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
177; X86-32-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
178; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
179; X86-32-NEXT:    testb $1, {{[0-9]+}}(%esp)
180; X86-32-NEXT:    je .LBB3_2
181; X86-32-NEXT:  # %bb.1: # %if
182; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
183; X86-32-NEXT:    jmp .LBB3_3
184; X86-32-NEXT:  .LBB3_2: # %else
185; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
186; X86-32-NEXT:  .LBB3_3: # %exit
187; X86-32-NEXT:    kmovb (%ecx), %k1
188; X86-32-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
189; X86-32-NEXT:    vmovss %xmm0, (%eax)
190; X86-32-NEXT:    retl
191entry:
192  br i1 %cond, label %if, label %else
193
194if:
195  %loaded1 = load i1, i1* %iptr1
196  br label %exit
197
198else:
199  %loaded2 = load i1, i1* %iptr2
200  br label %exit
201
202exit:
203  %val = phi i1 [%loaded1, %if], [%loaded2, %else]
204  %selected = select i1 %val, float %f1, float %f2
205  store float %selected, float* %fptr
206  ret void
207}
208
209define void @test_loadi1_storei1(i1 %cond, i1* %iptr1, i1* %iptr2, i1* %iptr3)  {
210; X86-64-LABEL: test_loadi1_storei1:
211; X86-64:       # %bb.0: # %entry
212; X86-64-NEXT:    testb $1, %dil
213; X86-64-NEXT:    je .LBB4_2
214; X86-64-NEXT:  # %bb.1: # %if
215; X86-64-NEXT:    movb (%rsi), %al
216; X86-64-NEXT:    jmp .LBB4_3
217; X86-64-NEXT:  .LBB4_2: # %else
218; X86-64-NEXT:    movb (%rdx), %al
219; X86-64-NEXT:  .LBB4_3: # %exit
220; X86-64-NEXT:    andb $1, %al
221; X86-64-NEXT:    movb %al, (%rcx)
222; X86-64-NEXT:    retq
223;
224; X86-32-LABEL: test_loadi1_storei1:
225; X86-32:       # %bb.0: # %entry
226; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
227; X86-32-NEXT:    testb $1, {{[0-9]+}}(%esp)
228; X86-32-NEXT:    je .LBB4_2
229; X86-32-NEXT:  # %bb.1: # %if
230; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
231; X86-32-NEXT:    jmp .LBB4_3
232; X86-32-NEXT:  .LBB4_2: # %else
233; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
234; X86-32-NEXT:  .LBB4_3: # %exit
235; X86-32-NEXT:    movb (%ecx), %cl
236; X86-32-NEXT:    andb $1, %cl
237; X86-32-NEXT:    movb %cl, (%eax)
238; X86-32-NEXT:    retl
239entry:
240  br i1 %cond, label %if, label %else
241
242if:
243  %loaded1 = load i1, i1* %iptr1
244  br label %exit
245
246else:
247  %loaded2 = load i1, i1* %iptr2
248  br label %exit
249
250exit:
251  %val = phi i1 [%loaded1, %if], [%loaded2, %else]
252  store i1 %val, i1* %iptr3
253  ret void
254}
255
256define void @test_shl1(i1 %cond, i8* %ptr1, i8* %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, <8 x float>* %fptrvec) {
257; X86-64-LABEL: test_shl1:
258; X86-64:       # %bb.0: # %entry
259; X86-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
260; X86-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
261; X86-64-NEXT:    testb $1, %dil
262; X86-64-NEXT:    je .LBB5_2
263; X86-64-NEXT:  # %bb.1: # %if
264; X86-64-NEXT:    kmovb (%rsi), %k0
265; X86-64-NEXT:    kaddb %k0, %k0, %k1
266; X86-64-NEXT:    jmp .LBB5_3
267; X86-64-NEXT:  .LBB5_2: # %else
268; X86-64-NEXT:    kmovb (%rdx), %k1
269; X86-64-NEXT:  .LBB5_3: # %exit
270; X86-64-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
271; X86-64-NEXT:    vmovaps %ymm1, (%rcx)
272; X86-64-NEXT:    vzeroupper
273; X86-64-NEXT:    retq
274;
275; X86-32-LABEL: test_shl1:
276; X86-32:       # %bb.0: # %entry
277; X86-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
278; X86-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
279; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
280; X86-32-NEXT:    testb $1, {{[0-9]+}}(%esp)
281; X86-32-NEXT:    je .LBB5_2
282; X86-32-NEXT:  # %bb.1: # %if
283; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
284; X86-32-NEXT:    kmovb (%ecx), %k0
285; X86-32-NEXT:    kaddb %k0, %k0, %k1
286; X86-32-NEXT:    jmp .LBB5_3
287; X86-32-NEXT:  .LBB5_2: # %else
288; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
289; X86-32-NEXT:    kmovb (%ecx), %k1
290; X86-32-NEXT:  .LBB5_3: # %exit
291; X86-32-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
292; X86-32-NEXT:    vmovaps %ymm1, (%eax)
293; X86-32-NEXT:    vzeroupper
294; X86-32-NEXT:    retl
295entry:
296  br i1 %cond, label %if, label %else
297
298if:
299  %loaded1 = load i8, i8* %ptr1
300  %shifted = shl i8 %loaded1, 1
301  br label %exit
302
303else:
304  %loaded2 = load i8, i8* %ptr2
305  br label %exit
306
307exit:
308  %val = phi i8 [%shifted, %if], [%loaded2, %else]
309  %mask = bitcast i8 %val to <8 x i1>
310  %selected = select <8 x i1> %mask, <8 x float> %fvec1, <8 x float> %fvec2
311  store <8 x float> %selected, <8 x float>* %fptrvec
312  ret void
313}
314
315define void @test_shr1(i1 %cond, i8* %ptr1, i8* %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, <8 x float>* %fptrvec) {
316; X86-64-LABEL: test_shr1:
317; X86-64:       # %bb.0: # %entry
318; X86-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
319; X86-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
320; X86-64-NEXT:    testb $1, %dil
321; X86-64-NEXT:    je .LBB6_2
322; X86-64-NEXT:  # %bb.1: # %if
323; X86-64-NEXT:    movb (%rsi), %al
324; X86-64-NEXT:    shrb %al
325; X86-64-NEXT:    jmp .LBB6_3
326; X86-64-NEXT:  .LBB6_2: # %else
327; X86-64-NEXT:    movb (%rdx), %al
328; X86-64-NEXT:  .LBB6_3: # %exit
329; X86-64-NEXT:    kmovd %eax, %k1
330; X86-64-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
331; X86-64-NEXT:    vmovaps %ymm1, (%rcx)
332; X86-64-NEXT:    vzeroupper
333; X86-64-NEXT:    retq
334;
335; X86-32-LABEL: test_shr1:
336; X86-32:       # %bb.0: # %entry
337; X86-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
338; X86-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
339; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
340; X86-32-NEXT:    testb $1, {{[0-9]+}}(%esp)
341; X86-32-NEXT:    je .LBB6_2
342; X86-32-NEXT:  # %bb.1: # %if
343; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
344; X86-32-NEXT:    movb (%ecx), %cl
345; X86-32-NEXT:    shrb %cl
346; X86-32-NEXT:    jmp .LBB6_3
347; X86-32-NEXT:  .LBB6_2: # %else
348; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
349; X86-32-NEXT:    movb (%ecx), %cl
350; X86-32-NEXT:  .LBB6_3: # %exit
351; X86-32-NEXT:    kmovd %ecx, %k1
352; X86-32-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
353; X86-32-NEXT:    vmovaps %ymm1, (%eax)
354; X86-32-NEXT:    vzeroupper
355; X86-32-NEXT:    retl
356entry:
357  br i1 %cond, label %if, label %else
358
359if:
360  %loaded1 = load i8, i8* %ptr1
361  %shifted = lshr i8 %loaded1, 1
362  br label %exit
363
364else:
365  %loaded2 = load i8, i8* %ptr2
366  br label %exit
367
368exit:
369  %val = phi i8 [%shifted, %if], [%loaded2, %else]
370  %mask = bitcast i8 %val to <8 x i1>
371  %selected = select <8 x i1> %mask, <8 x float> %fvec1, <8 x float> %fvec2
372  store <8 x float> %selected, <8 x float>* %fptrvec
373  ret void
374}
375
376define void @test_shr2(i1 %cond, i8* %ptr1, i8* %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, <8 x float>* %fptrvec) {
377; X86-64-LABEL: test_shr2:
378; X86-64:       # %bb.0: # %entry
379; X86-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
380; X86-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
381; X86-64-NEXT:    testb $1, %dil
382; X86-64-NEXT:    je .LBB7_2
383; X86-64-NEXT:  # %bb.1: # %if
384; X86-64-NEXT:    kmovb (%rsi), %k0
385; X86-64-NEXT:    kshiftrb $2, %k0, %k1
386; X86-64-NEXT:    jmp .LBB7_3
387; X86-64-NEXT:  .LBB7_2: # %else
388; X86-64-NEXT:    kmovb (%rdx), %k1
389; X86-64-NEXT:  .LBB7_3: # %exit
390; X86-64-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
391; X86-64-NEXT:    vmovaps %ymm1, (%rcx)
392; X86-64-NEXT:    vzeroupper
393; X86-64-NEXT:    retq
394;
395; X86-32-LABEL: test_shr2:
396; X86-32:       # %bb.0: # %entry
397; X86-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
398; X86-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
399; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
400; X86-32-NEXT:    testb $1, {{[0-9]+}}(%esp)
401; X86-32-NEXT:    je .LBB7_2
402; X86-32-NEXT:  # %bb.1: # %if
403; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
404; X86-32-NEXT:    kmovb (%ecx), %k0
405; X86-32-NEXT:    kshiftrb $2, %k0, %k1
406; X86-32-NEXT:    jmp .LBB7_3
407; X86-32-NEXT:  .LBB7_2: # %else
408; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
409; X86-32-NEXT:    kmovb (%ecx), %k1
410; X86-32-NEXT:  .LBB7_3: # %exit
411; X86-32-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
412; X86-32-NEXT:    vmovaps %ymm1, (%eax)
413; X86-32-NEXT:    vzeroupper
414; X86-32-NEXT:    retl
415entry:
416  br i1 %cond, label %if, label %else
417
418if:
419  %loaded1 = load i8, i8* %ptr1
420  %shifted = lshr i8 %loaded1, 2
421  br label %exit
422
423else:
424  %loaded2 = load i8, i8* %ptr2
425  br label %exit
426
427exit:
428  %val = phi i8 [%shifted, %if], [%loaded2, %else]
429  %mask = bitcast i8 %val to <8 x i1>
430  %selected = select <8 x i1> %mask, <8 x float> %fvec1, <8 x float> %fvec2
431  store <8 x float> %selected, <8 x float>* %fptrvec
432  ret void
433}
434
435define void @test_shl(i1 %cond, i8* %ptr1, i8* %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, <8 x float>* %fptrvec) {
436; X86-64-LABEL: test_shl:
437; X86-64:       # %bb.0: # %entry
438; X86-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
439; X86-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
440; X86-64-NEXT:    testb $1, %dil
441; X86-64-NEXT:    je .LBB8_2
442; X86-64-NEXT:  # %bb.1: # %if
443; X86-64-NEXT:    kmovb (%rsi), %k0
444; X86-64-NEXT:    kshiftlb $6, %k0, %k1
445; X86-64-NEXT:    jmp .LBB8_3
446; X86-64-NEXT:  .LBB8_2: # %else
447; X86-64-NEXT:    kmovb (%rdx), %k1
448; X86-64-NEXT:  .LBB8_3: # %exit
449; X86-64-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
450; X86-64-NEXT:    vmovaps %ymm1, (%rcx)
451; X86-64-NEXT:    vzeroupper
452; X86-64-NEXT:    retq
453;
454; X86-32-LABEL: test_shl:
455; X86-32:       # %bb.0: # %entry
456; X86-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
457; X86-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
458; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
459; X86-32-NEXT:    testb $1, {{[0-9]+}}(%esp)
460; X86-32-NEXT:    je .LBB8_2
461; X86-32-NEXT:  # %bb.1: # %if
462; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
463; X86-32-NEXT:    kmovb (%ecx), %k0
464; X86-32-NEXT:    kshiftlb $6, %k0, %k1
465; X86-32-NEXT:    jmp .LBB8_3
466; X86-32-NEXT:  .LBB8_2: # %else
467; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
468; X86-32-NEXT:    kmovb (%ecx), %k1
469; X86-32-NEXT:  .LBB8_3: # %exit
470; X86-32-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
471; X86-32-NEXT:    vmovaps %ymm1, (%eax)
472; X86-32-NEXT:    vzeroupper
473; X86-32-NEXT:    retl
474entry:
475  br i1 %cond, label %if, label %else
476
477if:
478  %loaded1 = load i8, i8* %ptr1
479  %shifted = shl i8 %loaded1, 6
480  br label %exit
481
482else:
483  %loaded2 = load i8, i8* %ptr2
484  br label %exit
485
486exit:
487  %val = phi i8 [%shifted, %if], [%loaded2, %else]
488  %mask = bitcast i8 %val to <8 x i1>
489  %selected = select <8 x i1> %mask, <8 x float> %fvec1, <8 x float> %fvec2
490  store <8 x float> %selected, <8 x float>* %fptrvec
491  ret void
492}
493
494define void @test_add(i1 %cond, i8* %ptr1, i8* %ptr2, <8 x float> %fvec1, <8 x float> %fvec2, <8 x float>* %fptrvec) {
495; X86-64-LABEL: test_add:
496; X86-64:       # %bb.0: # %entry
497; X86-64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
498; X86-64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
499; X86-64-NEXT:    kmovb (%rsi), %k0
500; X86-64-NEXT:    kmovb (%rdx), %k1
501; X86-64-NEXT:    testb $1, %dil
502; X86-64-NEXT:    je .LBB9_2
503; X86-64-NEXT:  # %bb.1: # %if
504; X86-64-NEXT:    kandb %k1, %k0, %k1
505; X86-64-NEXT:    jmp .LBB9_3
506; X86-64-NEXT:  .LBB9_2: # %else
507; X86-64-NEXT:    kaddb %k1, %k0, %k1
508; X86-64-NEXT:  .LBB9_3: # %exit
509; X86-64-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
510; X86-64-NEXT:    vmovaps %ymm1, (%rcx)
511; X86-64-NEXT:    vzeroupper
512; X86-64-NEXT:    retq
513;
514; X86-32-LABEL: test_add:
515; X86-32:       # %bb.0: # %entry
516; X86-32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
517; X86-32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
518; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
519; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
520; X86-32-NEXT:    movl {{[0-9]+}}(%esp), %edx
521; X86-32-NEXT:    kmovb (%edx), %k0
522; X86-32-NEXT:    kmovb (%ecx), %k1
523; X86-32-NEXT:    testb $1, {{[0-9]+}}(%esp)
524; X86-32-NEXT:    je .LBB9_2
525; X86-32-NEXT:  # %bb.1: # %if
526; X86-32-NEXT:    kandb %k1, %k0, %k1
527; X86-32-NEXT:    jmp .LBB9_3
528; X86-32-NEXT:  .LBB9_2: # %else
529; X86-32-NEXT:    kaddb %k1, %k0, %k1
530; X86-32-NEXT:  .LBB9_3: # %exit
531; X86-32-NEXT:    vmovaps %zmm0, %zmm1 {%k1}
532; X86-32-NEXT:    vmovaps %ymm1, (%eax)
533; X86-32-NEXT:    vzeroupper
534; X86-32-NEXT:    retl
535entry:
536  %loaded1 = load i8, i8* %ptr1
537  %loaded2 = load i8, i8* %ptr2
538  br i1 %cond, label %if, label %else
539
540if:
541  %and = and i8 %loaded1, %loaded2
542  br label %exit
543
544else:
545  %add = add i8 %loaded1, %loaded2
546  br label %exit
547
548exit:
549  %val = phi i8 [%and, %if], [%add, %else]
550  %mask = bitcast i8 %val to <8 x i1>
551  %selected = select <8 x i1> %mask, <8 x float> %fvec1, <8 x float> %fvec2
552  store <8 x float> %selected, <8 x float>* %fptrvec
553  ret void
554}
555