1; RUN: opt < %s  -cost-model -analyze -mtriple=arm-apple-ios6.0.0 -mcpu=cortex-a8 | FileCheck %s --check-prefix=COST
2; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
3; Make sure that ARM backend with NEON handles vselect.
4
5define void @vmax_v4i32(<4 x i32>* %m, <4 x i32> %a, <4 x i32> %b) {
6; CHECK: vmax.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
7    %cmpres = icmp sgt <4 x i32> %a, %b
8    %maxres = select <4 x i1> %cmpres, <4 x i32> %a,  <4 x i32> %b
9    store <4 x i32> %maxres, <4 x i32>* %m
10    ret void
11}
12
13%T0_10 = type <16 x i16>
14%T1_10 = type <16 x i1>
15; CHECK-LABEL: func_blend10:
16define void @func_blend10(%T0_10* %loadaddr, %T0_10* %loadaddr2,
17                           %T1_10* %blend, %T0_10* %storeaddr) {
18  %v0 = load %T0_10, %T0_10* %loadaddr
19  %v1 = load %T0_10, %T0_10* %loadaddr2
20  %c = icmp slt %T0_10 %v0, %v1
21; CHECK: vmin.s16
22; CHECK: vmin.s16
23; COST: func_blend10
24; COST: cost of 0 {{.*}} icmp
25; COST: cost of 4 {{.*}} select
26  %r = select %T1_10 %c, %T0_10 %v0, %T0_10 %v1
27  store %T0_10 %r, %T0_10* %storeaddr
28  ret void
29}
30%T0_14 = type <8 x i32>
31%T1_14 = type <8 x i1>
32; CHECK-LABEL: func_blend14:
33define void @func_blend14(%T0_14* %loadaddr, %T0_14* %loadaddr2,
34                           %T1_14* %blend, %T0_14* %storeaddr) {
35  %v0 = load %T0_14, %T0_14* %loadaddr
36  %v1 = load %T0_14, %T0_14* %loadaddr2
37  %c = icmp slt %T0_14 %v0, %v1
38; CHECK: vmin.s32
39; CHECK: vmin.s32
40; COST: func_blend14
41; COST: cost of 0 {{.*}} icmp
42; COST: cost of 4 {{.*}} select
43  %r = select %T1_14 %c, %T0_14 %v0, %T0_14 %v1
44  store %T0_14 %r, %T0_14* %storeaddr
45  ret void
46}
47%T0_15 = type <16 x i32>
48%T1_15 = type <16 x i1>
49; CHECK-LABEL: func_blend15:
50define void @func_blend15(%T0_15* %loadaddr, %T0_15* %loadaddr2,
51                           %T1_15* %blend, %T0_15* %storeaddr) {
52; CHECK: vmin.s32
53; CHECK: vmin.s32
54  %v0 = load %T0_15, %T0_15* %loadaddr
55  %v1 = load %T0_15, %T0_15* %loadaddr2
56  %c = icmp slt %T0_15 %v0, %v1
57; COST: func_blend15
58; COST: cost of 0 {{.*}} icmp
59; COST: cost of 8 {{.*}} select
60  %r = select %T1_15 %c, %T0_15 %v0, %T0_15 %v1
61  store %T0_15 %r, %T0_15* %storeaddr
62  ret void
63}
64
65; We adjusted the cost model of the following selects. When we improve code
66; lowering we also need to adjust the cost.
67%T0_18 = type <4 x i64>
68%T1_18 = type <4 x i1>
69define void @func_blend18(%T0_18* %loadaddr, %T0_18* %loadaddr2,
70                           %T1_18* %blend, %T0_18* %storeaddr) {
71; CHECK-LABEL: func_blend18:
72; CHECK:       @ %bb.0:
73; CHECK-NEXT:    .save {r4, r5, r6, r7, r11, lr}
74; CHECK-NEXT:    push {r4, r5, r6, r7, r11, lr}
75; CHECK-NEXT:    vld1.64 {d16, d17}, [r1:128]!
76; CHECK-NEXT:    vld1.64 {d22, d23}, [r0:128]!
77; CHECK-NEXT:    vmov r4, r6, d16
78; CHECK-NEXT:    vld1.64 {d18, d19}, [r1:128]
79; CHECK-NEXT:    vld1.64 {d20, d21}, [r0:128]
80; CHECK-NEXT:    vmov lr, r12, d18
81; CHECK-NEXT:    mov r0, #0
82; CHECK-NEXT:    vmov r2, r1, d20
83; CHECK-NEXT:    subs r2, r2, lr
84; CHECK-NEXT:    vmov r7, lr, d17
85; CHECK-NEXT:    vmov r2, r5, d22
86; CHECK-NEXT:    sbcs r1, r1, r12
87; CHECK-NEXT:    mov r1, #0
88; CHECK-NEXT:    movlt r1, #1
89; CHECK-NEXT:    cmp r1, #0
90; CHECK-NEXT:    mvnne r1, #0
91; CHECK-NEXT:    subs r2, r2, r4
92; CHECK-NEXT:    sbcs r6, r5, r6
93; CHECK-NEXT:    vmov r2, r12, d19
94; CHECK-NEXT:    vmov r5, r4, d21
95; CHECK-NEXT:    mov r6, #0
96; CHECK-NEXT:    movlt r6, #1
97; CHECK-NEXT:    cmp r6, #0
98; CHECK-NEXT:    mvnne r6, #0
99; CHECK-NEXT:    subs r2, r5, r2
100; CHECK-NEXT:    sbcs r4, r4, r12
101; CHECK-NEXT:    mov r2, #0
102; CHECK-NEXT:    vmov r4, r5, d23
103; CHECK-NEXT:    movlt r2, #1
104; CHECK-NEXT:    subs r7, r4, r7
105; CHECK-NEXT:    sbcs r7, r5, lr
106; CHECK-NEXT:    movlt r0, #1
107; CHECK-NEXT:    cmp r0, #0
108; CHECK-NEXT:    mvnne r0, #0
109; CHECK-NEXT:    cmp r2, #0
110; CHECK-NEXT:    vdup.32 d25, r0
111; CHECK-NEXT:    mvnne r2, #0
112; CHECK-NEXT:    vdup.32 d24, r6
113; CHECK-NEXT:    vdup.32 d27, r2
114; CHECK-NEXT:    vbit q8, q11, q12
115; CHECK-NEXT:    vdup.32 d26, r1
116; CHECK-NEXT:    vbit q9, q10, q13
117; CHECK-NEXT:    vst1.64 {d16, d17}, [r3:128]!
118; CHECK-NEXT:    vst1.64 {d18, d19}, [r3:128]
119; CHECK-NEXT:    pop {r4, r5, r6, r7, r11, lr}
120; CHECK-NEXT:    mov pc, lr
121  %v0 = load %T0_18, %T0_18* %loadaddr
122  %v1 = load %T0_18, %T0_18* %loadaddr2
123  %c = icmp slt %T0_18 %v0, %v1
124; COST: func_blend18
125; COST: cost of 0 {{.*}} icmp
126; COST: cost of 21 {{.*}} select
127  %r = select %T1_18 %c, %T0_18 %v0, %T0_18 %v1
128  store %T0_18 %r, %T0_18* %storeaddr
129  ret void
130}
131%T0_19 = type <8 x i64>
132%T1_19 = type <8 x i1>
133define void @func_blend19(%T0_19* %loadaddr, %T0_19* %loadaddr2,
134                           %T1_19* %blend, %T0_19* %storeaddr) {
135; CHECK-LABEL: func_blend19:
136; CHECK:       @ %bb.0:
137; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
138; CHECK-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
139; CHECK-NEXT:    add r2, r1, #48
140; CHECK-NEXT:    mov r8, #0
141; CHECK-NEXT:    vld1.64 {d16, d17}, [r2:128]
142; CHECK-NEXT:    add r2, r0, #48
143; CHECK-NEXT:    mov lr, #0
144; CHECK-NEXT:    vld1.64 {d18, d19}, [r2:128]
145; CHECK-NEXT:    vmov r2, r12, d16
146; CHECK-NEXT:    vmov r6, r7, d17
147; CHECK-NEXT:    vmov r4, r5, d18
148; CHECK-NEXT:    subs r2, r4, r2
149; CHECK-NEXT:    sbcs r2, r5, r12
150; CHECK-NEXT:    mov r12, #0
151; CHECK-NEXT:    vmov r2, r4, d19
152; CHECK-NEXT:    movlt r12, #1
153; CHECK-NEXT:    cmp r12, #0
154; CHECK-NEXT:    mov r5, r1
155; CHECK-NEXT:    mvnne r12, #0
156; CHECK-NEXT:    vld1.64 {d24, d25}, [r5:128]!
157; CHECK-NEXT:    vld1.64 {d20, d21}, [r5:128]
158; CHECK-NEXT:    subs r2, r2, r6
159; CHECK-NEXT:    mov r2, r0
160; CHECK-NEXT:    add r0, r0, #32
161; CHECK-NEXT:    vld1.64 {d26, d27}, [r2:128]!
162; CHECK-NEXT:    vld1.64 {d22, d23}, [r2:128]
163; CHECK-NEXT:    sbcs r2, r4, r7
164; CHECK-NEXT:    vmov r4, r5, d21
165; CHECK-NEXT:    movlt r8, #1
166; CHECK-NEXT:    vmov r6, r7, d23
167; CHECK-NEXT:    cmp r8, #0
168; CHECK-NEXT:    mvnne r8, #0
169; CHECK-NEXT:    vld1.64 {d28, d29}, [r0:128]
170; CHECK-NEXT:    add r0, r1, #32
171; CHECK-NEXT:    vld1.64 {d30, d31}, [r0:128]
172; CHECK-NEXT:    vmov r0, r1, d20
173; CHECK-NEXT:    vdup.32 d7, r8
174; CHECK-NEXT:    vdup.32 d6, r12
175; CHECK-NEXT:    subs r4, r6, r4
176; CHECK-NEXT:    sbcs r4, r7, r5
177; CHECK-NEXT:    vmov r5, r6, d24
178; CHECK-NEXT:    vmov r7, r2, d26
179; CHECK-NEXT:    mov r4, #0
180; CHECK-NEXT:    movlt r4, #1
181; CHECK-NEXT:    cmp r4, #0
182; CHECK-NEXT:    mvnne r4, #0
183; CHECK-NEXT:    vdup.32 d5, r4
184; CHECK-NEXT:    subs r5, r7, r5
185; CHECK-NEXT:    sbcs r2, r2, r6
186; CHECK-NEXT:    vmov r7, r6, d27
187; CHECK-NEXT:    vmov r2, r9, d25
188; CHECK-NEXT:    mov r5, #0
189; CHECK-NEXT:    movlt r5, #1
190; CHECK-NEXT:    cmp r5, #0
191; CHECK-NEXT:    mvnne r5, #0
192; CHECK-NEXT:    subs r2, r7, r2
193; CHECK-NEXT:    sbcs r2, r6, r9
194; CHECK-NEXT:    vmov r6, r7, d22
195; CHECK-NEXT:    mov r2, #0
196; CHECK-NEXT:    movlt r2, #1
197; CHECK-NEXT:    cmp r2, #0
198; CHECK-NEXT:    mvnne r2, #0
199; CHECK-NEXT:    vdup.32 d1, r2
200; CHECK-NEXT:    vdup.32 d0, r5
201; CHECK-NEXT:    vbit q12, q13, q0
202; CHECK-NEXT:    subs r0, r6, r0
203; CHECK-NEXT:    vmov r2, r6, d28
204; CHECK-NEXT:    sbcs r0, r7, r1
205; CHECK-NEXT:    mov r7, #0
206; CHECK-NEXT:    vmov r0, r1, d30
207; CHECK-NEXT:    movlt r7, #1
208; CHECK-NEXT:    subs r0, r2, r0
209; CHECK-NEXT:    vmov r2, r5, d29
210; CHECK-NEXT:    sbcs r0, r6, r1
211; CHECK-NEXT:    mov r6, #0
212; CHECK-NEXT:    vmov r0, r1, d31
213; CHECK-NEXT:    movlt r6, #1
214; CHECK-NEXT:    subs r0, r2, r0
215; CHECK-NEXT:    sbcs r0, r5, r1
216; CHECK-NEXT:    movlt lr, #1
217; CHECK-NEXT:    cmp lr, #0
218; CHECK-NEXT:    mvnne lr, #0
219; CHECK-NEXT:    cmp r6, #0
220; CHECK-NEXT:    mvnne r6, #0
221; CHECK-NEXT:    vdup.32 d3, lr
222; CHECK-NEXT:    vdup.32 d2, r6
223; CHECK-NEXT:    cmp r7, #0
224; CHECK-NEXT:    vorr q13, q1, q1
225; CHECK-NEXT:    mvnne r7, #0
226; CHECK-NEXT:    vdup.32 d4, r7
227; CHECK-NEXT:    add r0, r3, #32
228; CHECK-NEXT:    vbsl q13, q14, q15
229; CHECK-NEXT:    vbit q10, q11, q2
230; CHECK-NEXT:    vbit q8, q9, q3
231; CHECK-NEXT:    vst1.64 {d26, d27}, [r0:128]
232; CHECK-NEXT:    add r0, r3, #48
233; CHECK-NEXT:    vst1.64 {d24, d25}, [r3:128]!
234; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
235; CHECK-NEXT:    vst1.64 {d20, d21}, [r3:128]
236; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, lr}
237; CHECK-NEXT:    mov pc, lr
238  %v0 = load %T0_19, %T0_19* %loadaddr
239  %v1 = load %T0_19, %T0_19* %loadaddr2
240  %c = icmp slt %T0_19 %v0, %v1
241; COST: func_blend19
242; COST: cost of 0 {{.*}} icmp
243; COST: cost of 54 {{.*}} select
244  %r = select %T1_19 %c, %T0_19 %v0, %T0_19 %v1
245  store %T0_19 %r, %T0_19* %storeaddr
246  ret void
247}
248%T0_20 = type <16 x i64>
249%T1_20 = type <16 x i1>
250define void @func_blend20(%T0_20* %loadaddr, %T0_20* %loadaddr2,
251                           %T1_20* %blend, %T0_20* %storeaddr) {
252; CHECK-LABEL: func_blend20:
253; CHECK:       @ %bb.0:
254; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
255; CHECK-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
256; CHECK-NEXT:    .pad #4
257; CHECK-NEXT:    sub sp, sp, #4
258; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
259; CHECK-NEXT:    vpush {d8, d9, d10, d11}
260; CHECK-NEXT:    .pad #8
261; CHECK-NEXT:    sub sp, sp, #8
262; CHECK-NEXT:    add r9, r1, #64
263; CHECK-NEXT:    mov r2, #32
264; CHECK-NEXT:    add r8, r0, #64
265; CHECK-NEXT:    vld1.64 {d16, d17}, [r9:128], r2
266; CHECK-NEXT:    mov r10, r1
267; CHECK-NEXT:    mov r11, r0
268; CHECK-NEXT:    vld1.64 {d18, d19}, [r8:128], r2
269; CHECK-NEXT:    vmov r7, r5, d17
270; CHECK-NEXT:    vmov r6, r2, d19
271; CHECK-NEXT:    str r3, [sp, #4] @ 4-byte Spill
272; CHECK-NEXT:    vld1.64 {d22, d23}, [r10:128]!
273; CHECK-NEXT:    subs r7, r6, r7
274; CHECK-NEXT:    sbcs r2, r2, r5
275; CHECK-NEXT:    vmov r5, r6, d16
276; CHECK-NEXT:    vmov r7, r4, d18
277; CHECK-NEXT:    mov r2, #0
278; CHECK-NEXT:    movlt r2, #1
279; CHECK-NEXT:    cmp r2, #0
280; CHECK-NEXT:    mvnne r2, #0
281; CHECK-NEXT:    vdup.32 d21, r2
282; CHECK-NEXT:    subs r5, r7, r5
283; CHECK-NEXT:    sbcs r4, r4, r6
284; CHECK-NEXT:    mov r4, #0
285; CHECK-NEXT:    movlt r4, #1
286; CHECK-NEXT:    cmp r4, #0
287; CHECK-NEXT:    mvnne r4, #0
288; CHECK-NEXT:    vdup.32 d20, r4
289; CHECK-NEXT:    vmov r2, r4, d23
290; CHECK-NEXT:    vbit q8, q9, q10
291; CHECK-NEXT:    vld1.64 {d18, d19}, [r11:128]!
292; CHECK-NEXT:    vmov r7, r5, d19
293; CHECK-NEXT:    subs r2, r7, r2
294; CHECK-NEXT:    sbcs r2, r5, r4
295; CHECK-NEXT:    vmov r5, r7, d18
296; CHECK-NEXT:    mov r2, #0
297; CHECK-NEXT:    movlt r2, #1
298; CHECK-NEXT:    cmp r2, #0
299; CHECK-NEXT:    mvnne r2, #0
300; CHECK-NEXT:    vdup.32 d21, r2
301; CHECK-NEXT:    vmov r2, r4, d22
302; CHECK-NEXT:    subs r2, r5, r2
303; CHECK-NEXT:    sbcs r2, r7, r4
304; CHECK-NEXT:    mov r2, #0
305; CHECK-NEXT:    movlt r2, #1
306; CHECK-NEXT:    cmp r2, #0
307; CHECK-NEXT:    mvnne r2, #0
308; CHECK-NEXT:    vdup.32 d20, r2
309; CHECK-NEXT:    add r2, r0, #48
310; CHECK-NEXT:    vbif q9, q11, q10
311; CHECK-NEXT:    vld1.64 {d30, d31}, [r2:128]
312; CHECK-NEXT:    add r2, r1, #48
313; CHECK-NEXT:    vld1.64 {d2, d3}, [r2:128]
314; CHECK-NEXT:    vmov r5, r7, d30
315; CHECK-NEXT:    vmov r2, r4, d2
316; CHECK-NEXT:    vld1.64 {d26, d27}, [r11:128]
317; CHECK-NEXT:    vld1.64 {d0, d1}, [r10:128]
318; CHECK-NEXT:    vld1.64 {d24, d25}, [r9:128]!
319; CHECK-NEXT:    vld1.64 {d22, d23}, [r9:128]
320; CHECK-NEXT:    vld1.64 {d20, d21}, [r8:128]!
321; CHECK-NEXT:    vmov r11, r10, d21
322; CHECK-NEXT:    subs r2, r5, r2
323; CHECK-NEXT:    sbcs r2, r7, r4
324; CHECK-NEXT:    vmov r7, r6, d31
325; CHECK-NEXT:    vmov r2, r5, d3
326; CHECK-NEXT:    mov r4, #0
327; CHECK-NEXT:    movlt r4, #1
328; CHECK-NEXT:    cmp r4, #0
329; CHECK-NEXT:    mvnne r4, #0
330; CHECK-NEXT:    subs r2, r7, r2
331; CHECK-NEXT:    mov r7, #0
332; CHECK-NEXT:    sbcs r2, r6, r5
333; CHECK-NEXT:    vmov r6, r5, d27
334; CHECK-NEXT:    vmov r2, r9, d1
335; CHECK-NEXT:    movlt r7, #1
336; CHECK-NEXT:    cmp r7, #0
337; CHECK-NEXT:    mvnne r7, #0
338; CHECK-NEXT:    vdup.32 d7, r7
339; CHECK-NEXT:    vdup.32 d6, r4
340; CHECK-NEXT:    subs r2, r6, r2
341; CHECK-NEXT:    sbcs r2, r5, r9
342; CHECK-NEXT:    vmov r6, r5, d26
343; CHECK-NEXT:    mov r2, #0
344; CHECK-NEXT:    movlt r2, #1
345; CHECK-NEXT:    cmp r2, #0
346; CHECK-NEXT:    mvnne r2, #0
347; CHECK-NEXT:    vdup.32 d5, r2
348; CHECK-NEXT:    vmov r2, r9, d0
349; CHECK-NEXT:    subs r2, r6, r2
350; CHECK-NEXT:    sbcs r2, r5, r9
351; CHECK-NEXT:    mov r2, #0
352; CHECK-NEXT:    movlt r2, #1
353; CHECK-NEXT:    cmp r2, #0
354; CHECK-NEXT:    mvnne r2, #0
355; CHECK-NEXT:    vdup.32 d4, r2
356; CHECK-NEXT:    add r2, r1, #32
357; CHECK-NEXT:    vld1.64 {d28, d29}, [r2:128]
358; CHECK-NEXT:    add r2, r0, #32
359; CHECK-NEXT:    vbif q13, q0, q2
360; CHECK-NEXT:    add r1, r1, #80
361; CHECK-NEXT:    vld1.64 {d0, d1}, [r2:128]
362; CHECK-NEXT:    vmov r4, r5, d28
363; CHECK-NEXT:    vbif q15, q1, q3
364; CHECK-NEXT:    add r0, r0, #80
365; CHECK-NEXT:    vmov r2, r6, d0
366; CHECK-NEXT:    vld1.64 {d2, d3}, [r8:128]
367; CHECK-NEXT:    vmov r9, r8, d25
368; CHECK-NEXT:    vld1.64 {d8, d9}, [r0:128]
369; CHECK-NEXT:    vld1.64 {d6, d7}, [r1:128]
370; CHECK-NEXT:    vmov r3, r12, d8
371; CHECK-NEXT:    subs r2, r2, r4
372; CHECK-NEXT:    sbcs r2, r6, r5
373; CHECK-NEXT:    vmov r4, r5, d29
374; CHECK-NEXT:    vmov r6, r7, d1
375; CHECK-NEXT:    mov r2, #0
376; CHECK-NEXT:    movlt r2, #1
377; CHECK-NEXT:    cmp r2, #0
378; CHECK-NEXT:    mvnne r2, #0
379; CHECK-NEXT:    subs r4, r6, r4
380; CHECK-NEXT:    sbcs r4, r7, r5
381; CHECK-NEXT:    vmov r5, r6, d2
382; CHECK-NEXT:    mov r4, #0
383; CHECK-NEXT:    movlt r4, #1
384; CHECK-NEXT:    cmp r4, #0
385; CHECK-NEXT:    mvnne r4, #0
386; CHECK-NEXT:    vdup.32 d5, r4
387; CHECK-NEXT:    vdup.32 d4, r2
388; CHECK-NEXT:    vmov r2, r4, d22
389; CHECK-NEXT:    vbit q14, q0, q2
390; CHECK-NEXT:    subs r2, r5, r2
391; CHECK-NEXT:    sbcs r2, r6, r4
392; CHECK-NEXT:    vmov r4, r5, d24
393; CHECK-NEXT:    vmov r6, r7, d20
394; CHECK-NEXT:    mov r2, #0
395; CHECK-NEXT:    movlt r2, #1
396; CHECK-NEXT:    cmp r2, #0
397; CHECK-NEXT:    mvnne r2, #0
398; CHECK-NEXT:    subs r1, r6, r4
399; CHECK-NEXT:    vmov r0, r6, d9
400; CHECK-NEXT:    sbcs r1, r7, r5
401; CHECK-NEXT:    vmov r4, r5, d7
402; CHECK-NEXT:    mov r1, #0
403; CHECK-NEXT:    movlt r1, #1
404; CHECK-NEXT:    cmp r1, #0
405; CHECK-NEXT:    mvnne r1, #0
406; CHECK-NEXT:    subs r0, r0, r4
407; CHECK-NEXT:    vmov r7, r4, d23
408; CHECK-NEXT:    sbcs r0, r6, r5
409; CHECK-NEXT:    vmov r5, lr, d6
410; CHECK-NEXT:    mov r0, #0
411; CHECK-NEXT:    movlt r0, #1
412; CHECK-NEXT:    cmp r0, #0
413; CHECK-NEXT:    mvnne r0, #0
414; CHECK-NEXT:    vdup.32 d11, r0
415; CHECK-NEXT:    vmov r0, r6, d3
416; CHECK-NEXT:    subs r0, r0, r7
417; CHECK-NEXT:    sbcs r0, r6, r4
418; CHECK-NEXT:    mov r0, #0
419; CHECK-NEXT:    movlt r0, #1
420; CHECK-NEXT:    subs r4, r11, r9
421; CHECK-NEXT:    sbcs r4, r10, r8
422; CHECK-NEXT:    mov r4, #0
423; CHECK-NEXT:    movlt r4, #1
424; CHECK-NEXT:    subs r3, r3, r5
425; CHECK-NEXT:    sbcs r3, r12, lr
426; CHECK-NEXT:    mov r3, #0
427; CHECK-NEXT:    movlt r3, #1
428; CHECK-NEXT:    cmp r3, #0
429; CHECK-NEXT:    mvnne r3, #0
430; CHECK-NEXT:    cmp r4, #0
431; CHECK-NEXT:    mvnne r4, #0
432; CHECK-NEXT:    vdup.32 d10, r3
433; CHECK-NEXT:    vdup.32 d1, r4
434; CHECK-NEXT:    vorr q2, q5, q5
435; CHECK-NEXT:    vdup.32 d0, r1
436; CHECK-NEXT:    cmp r0, #0
437; CHECK-NEXT:    vbsl q2, q4, q3
438; CHECK-NEXT:    mvnne r0, #0
439; CHECK-NEXT:    vbif q10, q12, q0
440; CHECK-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
441; CHECK-NEXT:    vdup.32 d7, r0
442; CHECK-NEXT:    add r0, r1, #80
443; CHECK-NEXT:    vdup.32 d6, r2
444; CHECK-NEXT:    vbit q11, q1, q3
445; CHECK-NEXT:    vst1.64 {d4, d5}, [r0:128]
446; CHECK-NEXT:    add r0, r1, #32
447; CHECK-NEXT:    vst1.64 {d28, d29}, [r0:128]
448; CHECK-NEXT:    add r0, r1, #48
449; CHECK-NEXT:    vst1.64 {d30, d31}, [r0:128]
450; CHECK-NEXT:    add r0, r1, #64
451; CHECK-NEXT:    vst1.64 {d18, d19}, [r1:128]!
452; CHECK-NEXT:    vst1.64 {d26, d27}, [r1:128]
453; CHECK-NEXT:    mov r1, #32
454; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128], r1
455; CHECK-NEXT:    vst1.64 {d20, d21}, [r0:128]!
456; CHECK-NEXT:    vst1.64 {d22, d23}, [r0:128]
457; CHECK-NEXT:    add sp, sp, #8
458; CHECK-NEXT:    vpop {d8, d9, d10, d11}
459; CHECK-NEXT:    add sp, sp, #4
460; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, lr}
461; CHECK-NEXT:    mov pc, lr
462  %v0 = load %T0_20, %T0_20* %loadaddr
463  %v1 = load %T0_20, %T0_20* %loadaddr2
464  %c = icmp slt %T0_20 %v0, %v1
465; COST: func_blend20
466; COST: cost of 0 {{.*}} icmp
467; COST: cost of 108 {{.*}} select
468  %r = select %T1_20 %c, %T0_20 %v0, %T0_20 %v1
469  store %T0_20 %r, %T0_20* %storeaddr
470  ret void
471}
472