1; RUN: opt < %s -cost-model -analyze -mtriple=arm-apple-ios6.0.0 -mcpu=cortex-a8 | FileCheck %s --check-prefix=COST 2; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s 3; Make sure that ARM backend with NEON handles vselect. 4 5define void @vmax_v4i32(<4 x i32>* %m, <4 x i32> %a, <4 x i32> %b) { 6; CHECK: vmax.s32 {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}} 7 %cmpres = icmp sgt <4 x i32> %a, %b 8 %maxres = select <4 x i1> %cmpres, <4 x i32> %a, <4 x i32> %b 9 store <4 x i32> %maxres, <4 x i32>* %m 10 ret void 11} 12 13%T0_10 = type <16 x i16> 14%T1_10 = type <16 x i1> 15; CHECK-LABEL: func_blend10: 16define void @func_blend10(%T0_10* %loadaddr, %T0_10* %loadaddr2, 17 %T1_10* %blend, %T0_10* %storeaddr) { 18 %v0 = load %T0_10, %T0_10* %loadaddr 19 %v1 = load %T0_10, %T0_10* %loadaddr2 20 %c = icmp slt %T0_10 %v0, %v1 21; CHECK: vmin.s16 22; CHECK: vmin.s16 23; COST: func_blend10 24; COST: cost of 0 {{.*}} icmp 25; COST: cost of 4 {{.*}} select 26 %r = select %T1_10 %c, %T0_10 %v0, %T0_10 %v1 27 store %T0_10 %r, %T0_10* %storeaddr 28 ret void 29} 30%T0_14 = type <8 x i32> 31%T1_14 = type <8 x i1> 32; CHECK-LABEL: func_blend14: 33define void @func_blend14(%T0_14* %loadaddr, %T0_14* %loadaddr2, 34 %T1_14* %blend, %T0_14* %storeaddr) { 35 %v0 = load %T0_14, %T0_14* %loadaddr 36 %v1 = load %T0_14, %T0_14* %loadaddr2 37 %c = icmp slt %T0_14 %v0, %v1 38; CHECK: vmin.s32 39; CHECK: vmin.s32 40; COST: func_blend14 41; COST: cost of 0 {{.*}} icmp 42; COST: cost of 4 {{.*}} select 43 %r = select %T1_14 %c, %T0_14 %v0, %T0_14 %v1 44 store %T0_14 %r, %T0_14* %storeaddr 45 ret void 46} 47%T0_15 = type <16 x i32> 48%T1_15 = type <16 x i1> 49; CHECK-LABEL: func_blend15: 50define void @func_blend15(%T0_15* %loadaddr, %T0_15* %loadaddr2, 51 %T1_15* %blend, %T0_15* %storeaddr) { 52; CHECK: vmin.s32 53; CHECK: vmin.s32 54 %v0 = load %T0_15, %T0_15* %loadaddr 55 %v1 = load %T0_15, %T0_15* %loadaddr2 56 %c = icmp slt %T0_15 %v0, %v1 57; COST: func_blend15 58; COST: cost of 0 {{.*}} icmp 59; COST: cost of 8 {{.*}} select 60 %r = select %T1_15 %c, %T0_15 %v0, %T0_15 %v1 61 store %T0_15 %r, %T0_15* %storeaddr 62 ret void 63} 64 65; We adjusted the cost model of the following selects. When we improve code 66; lowering we also need to adjust the cost. 67%T0_18 = type <4 x i64> 68%T1_18 = type <4 x i1> 69define void @func_blend18(%T0_18* %loadaddr, %T0_18* %loadaddr2, 70 %T1_18* %blend, %T0_18* %storeaddr) { 71; CHECK-LABEL: func_blend18: 72; CHECK: @ %bb.0: 73; CHECK-NEXT: .save {r4, r5, r6, r7, r11, lr} 74; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr} 75; CHECK-NEXT: vld1.64 {d16, d17}, [r1:128]! 76; CHECK-NEXT: vld1.64 {d22, d23}, [r0:128]! 77; CHECK-NEXT: vmov r4, r6, d16 78; CHECK-NEXT: vld1.64 {d18, d19}, [r1:128] 79; CHECK-NEXT: vld1.64 {d20, d21}, [r0:128] 80; CHECK-NEXT: vmov lr, r12, d18 81; CHECK-NEXT: mov r0, #0 82; CHECK-NEXT: vmov r2, r1, d20 83; CHECK-NEXT: subs r2, r2, lr 84; CHECK-NEXT: vmov r7, lr, d17 85; CHECK-NEXT: vmov r2, r5, d22 86; CHECK-NEXT: sbcs r1, r1, r12 87; CHECK-NEXT: mov r1, #0 88; CHECK-NEXT: movlt r1, #1 89; CHECK-NEXT: cmp r1, #0 90; CHECK-NEXT: mvnne r1, #0 91; CHECK-NEXT: subs r2, r2, r4 92; CHECK-NEXT: sbcs r6, r5, r6 93; CHECK-NEXT: vmov r2, r12, d19 94; CHECK-NEXT: vmov r5, r4, d21 95; CHECK-NEXT: mov r6, #0 96; CHECK-NEXT: movlt r6, #1 97; CHECK-NEXT: cmp r6, #0 98; CHECK-NEXT: mvnne r6, #0 99; CHECK-NEXT: subs r2, r5, r2 100; CHECK-NEXT: sbcs r4, r4, r12 101; CHECK-NEXT: mov r2, #0 102; CHECK-NEXT: vmov r4, r5, d23 103; CHECK-NEXT: movlt r2, #1 104; CHECK-NEXT: subs r7, r4, r7 105; CHECK-NEXT: sbcs r7, r5, lr 106; CHECK-NEXT: movlt r0, #1 107; CHECK-NEXT: cmp r0, #0 108; CHECK-NEXT: mvnne r0, #0 109; CHECK-NEXT: cmp r2, #0 110; CHECK-NEXT: vdup.32 d25, r0 111; CHECK-NEXT: mvnne r2, #0 112; CHECK-NEXT: vdup.32 d24, r6 113; CHECK-NEXT: vdup.32 d27, r2 114; CHECK-NEXT: vbit q8, q11, q12 115; CHECK-NEXT: vdup.32 d26, r1 116; CHECK-NEXT: vbit q9, q10, q13 117; CHECK-NEXT: vst1.64 {d16, d17}, [r3:128]! 118; CHECK-NEXT: vst1.64 {d18, d19}, [r3:128] 119; CHECK-NEXT: pop {r4, r5, r6, r7, r11, lr} 120; CHECK-NEXT: mov pc, lr 121 %v0 = load %T0_18, %T0_18* %loadaddr 122 %v1 = load %T0_18, %T0_18* %loadaddr2 123 %c = icmp slt %T0_18 %v0, %v1 124; COST: func_blend18 125; COST: cost of 0 {{.*}} icmp 126; COST: cost of 21 {{.*}} select 127 %r = select %T1_18 %c, %T0_18 %v0, %T0_18 %v1 128 store %T0_18 %r, %T0_18* %storeaddr 129 ret void 130} 131%T0_19 = type <8 x i64> 132%T1_19 = type <8 x i1> 133define void @func_blend19(%T0_19* %loadaddr, %T0_19* %loadaddr2, 134 %T1_19* %blend, %T0_19* %storeaddr) { 135; CHECK-LABEL: func_blend19: 136; CHECK: @ %bb.0: 137; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr} 138; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} 139; CHECK-NEXT: add r2, r1, #48 140; CHECK-NEXT: mov r8, #0 141; CHECK-NEXT: vld1.64 {d16, d17}, [r2:128] 142; CHECK-NEXT: add r2, r0, #48 143; CHECK-NEXT: mov lr, #0 144; CHECK-NEXT: vld1.64 {d18, d19}, [r2:128] 145; CHECK-NEXT: vmov r2, r12, d16 146; CHECK-NEXT: vmov r6, r7, d17 147; CHECK-NEXT: vmov r4, r5, d18 148; CHECK-NEXT: subs r2, r4, r2 149; CHECK-NEXT: sbcs r2, r5, r12 150; CHECK-NEXT: mov r12, #0 151; CHECK-NEXT: vmov r2, r4, d19 152; CHECK-NEXT: movlt r12, #1 153; CHECK-NEXT: cmp r12, #0 154; CHECK-NEXT: mov r5, r1 155; CHECK-NEXT: mvnne r12, #0 156; CHECK-NEXT: vld1.64 {d24, d25}, [r5:128]! 157; CHECK-NEXT: vld1.64 {d20, d21}, [r5:128] 158; CHECK-NEXT: subs r2, r2, r6 159; CHECK-NEXT: mov r2, r0 160; CHECK-NEXT: add r0, r0, #32 161; CHECK-NEXT: vld1.64 {d26, d27}, [r2:128]! 162; CHECK-NEXT: vld1.64 {d22, d23}, [r2:128] 163; CHECK-NEXT: sbcs r2, r4, r7 164; CHECK-NEXT: vmov r4, r5, d21 165; CHECK-NEXT: movlt r8, #1 166; CHECK-NEXT: vmov r6, r7, d23 167; CHECK-NEXT: cmp r8, #0 168; CHECK-NEXT: mvnne r8, #0 169; CHECK-NEXT: vld1.64 {d28, d29}, [r0:128] 170; CHECK-NEXT: add r0, r1, #32 171; CHECK-NEXT: vld1.64 {d30, d31}, [r0:128] 172; CHECK-NEXT: vmov r0, r1, d20 173; CHECK-NEXT: vdup.32 d7, r8 174; CHECK-NEXT: vdup.32 d6, r12 175; CHECK-NEXT: subs r4, r6, r4 176; CHECK-NEXT: sbcs r4, r7, r5 177; CHECK-NEXT: vmov r5, r6, d24 178; CHECK-NEXT: vmov r7, r2, d26 179; CHECK-NEXT: mov r4, #0 180; CHECK-NEXT: movlt r4, #1 181; CHECK-NEXT: cmp r4, #0 182; CHECK-NEXT: mvnne r4, #0 183; CHECK-NEXT: vdup.32 d5, r4 184; CHECK-NEXT: subs r5, r7, r5 185; CHECK-NEXT: sbcs r2, r2, r6 186; CHECK-NEXT: vmov r7, r6, d27 187; CHECK-NEXT: vmov r2, r9, d25 188; CHECK-NEXT: mov r5, #0 189; CHECK-NEXT: movlt r5, #1 190; CHECK-NEXT: cmp r5, #0 191; CHECK-NEXT: mvnne r5, #0 192; CHECK-NEXT: subs r2, r7, r2 193; CHECK-NEXT: sbcs r2, r6, r9 194; CHECK-NEXT: vmov r6, r7, d22 195; CHECK-NEXT: mov r2, #0 196; CHECK-NEXT: movlt r2, #1 197; CHECK-NEXT: cmp r2, #0 198; CHECK-NEXT: mvnne r2, #0 199; CHECK-NEXT: vdup.32 d1, r2 200; CHECK-NEXT: vdup.32 d0, r5 201; CHECK-NEXT: vbit q12, q13, q0 202; CHECK-NEXT: subs r0, r6, r0 203; CHECK-NEXT: vmov r2, r6, d28 204; CHECK-NEXT: sbcs r0, r7, r1 205; CHECK-NEXT: mov r7, #0 206; CHECK-NEXT: vmov r0, r1, d30 207; CHECK-NEXT: movlt r7, #1 208; CHECK-NEXT: subs r0, r2, r0 209; CHECK-NEXT: vmov r2, r5, d29 210; CHECK-NEXT: sbcs r0, r6, r1 211; CHECK-NEXT: mov r6, #0 212; CHECK-NEXT: vmov r0, r1, d31 213; CHECK-NEXT: movlt r6, #1 214; CHECK-NEXT: subs r0, r2, r0 215; CHECK-NEXT: sbcs r0, r5, r1 216; CHECK-NEXT: movlt lr, #1 217; CHECK-NEXT: cmp lr, #0 218; CHECK-NEXT: mvnne lr, #0 219; CHECK-NEXT: cmp r6, #0 220; CHECK-NEXT: mvnne r6, #0 221; CHECK-NEXT: vdup.32 d3, lr 222; CHECK-NEXT: vdup.32 d2, r6 223; CHECK-NEXT: cmp r7, #0 224; CHECK-NEXT: vorr q13, q1, q1 225; CHECK-NEXT: mvnne r7, #0 226; CHECK-NEXT: vdup.32 d4, r7 227; CHECK-NEXT: add r0, r3, #32 228; CHECK-NEXT: vbsl q13, q14, q15 229; CHECK-NEXT: vbit q10, q11, q2 230; CHECK-NEXT: vbit q8, q9, q3 231; CHECK-NEXT: vst1.64 {d26, d27}, [r0:128] 232; CHECK-NEXT: add r0, r3, #48 233; CHECK-NEXT: vst1.64 {d24, d25}, [r3:128]! 234; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] 235; CHECK-NEXT: vst1.64 {d20, d21}, [r3:128] 236; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, lr} 237; CHECK-NEXT: mov pc, lr 238 %v0 = load %T0_19, %T0_19* %loadaddr 239 %v1 = load %T0_19, %T0_19* %loadaddr2 240 %c = icmp slt %T0_19 %v0, %v1 241; COST: func_blend19 242; COST: cost of 0 {{.*}} icmp 243; COST: cost of 54 {{.*}} select 244 %r = select %T1_19 %c, %T0_19 %v0, %T0_19 %v1 245 store %T0_19 %r, %T0_19* %storeaddr 246 ret void 247} 248%T0_20 = type <16 x i64> 249%T1_20 = type <16 x i1> 250define void @func_blend20(%T0_20* %loadaddr, %T0_20* %loadaddr2, 251 %T1_20* %blend, %T0_20* %storeaddr) { 252; CHECK-LABEL: func_blend20: 253; CHECK: @ %bb.0: 254; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} 255; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} 256; CHECK-NEXT: .pad #4 257; CHECK-NEXT: sub sp, sp, #4 258; CHECK-NEXT: .vsave {d8, d9, d10, d11} 259; CHECK-NEXT: vpush {d8, d9, d10, d11} 260; CHECK-NEXT: .pad #8 261; CHECK-NEXT: sub sp, sp, #8 262; CHECK-NEXT: add r9, r1, #64 263; CHECK-NEXT: mov r2, #32 264; CHECK-NEXT: add r8, r0, #64 265; CHECK-NEXT: vld1.64 {d16, d17}, [r9:128], r2 266; CHECK-NEXT: mov r10, r1 267; CHECK-NEXT: mov r11, r0 268; CHECK-NEXT: vld1.64 {d18, d19}, [r8:128], r2 269; CHECK-NEXT: vmov r7, r5, d17 270; CHECK-NEXT: vmov r6, r2, d19 271; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill 272; CHECK-NEXT: vld1.64 {d22, d23}, [r10:128]! 273; CHECK-NEXT: subs r7, r6, r7 274; CHECK-NEXT: sbcs r2, r2, r5 275; CHECK-NEXT: vmov r5, r6, d16 276; CHECK-NEXT: vmov r7, r4, d18 277; CHECK-NEXT: mov r2, #0 278; CHECK-NEXT: movlt r2, #1 279; CHECK-NEXT: cmp r2, #0 280; CHECK-NEXT: mvnne r2, #0 281; CHECK-NEXT: vdup.32 d21, r2 282; CHECK-NEXT: subs r5, r7, r5 283; CHECK-NEXT: sbcs r4, r4, r6 284; CHECK-NEXT: mov r4, #0 285; CHECK-NEXT: movlt r4, #1 286; CHECK-NEXT: cmp r4, #0 287; CHECK-NEXT: mvnne r4, #0 288; CHECK-NEXT: vdup.32 d20, r4 289; CHECK-NEXT: vmov r2, r4, d23 290; CHECK-NEXT: vbit q8, q9, q10 291; CHECK-NEXT: vld1.64 {d18, d19}, [r11:128]! 292; CHECK-NEXT: vmov r7, r5, d19 293; CHECK-NEXT: subs r2, r7, r2 294; CHECK-NEXT: sbcs r2, r5, r4 295; CHECK-NEXT: vmov r5, r7, d18 296; CHECK-NEXT: mov r2, #0 297; CHECK-NEXT: movlt r2, #1 298; CHECK-NEXT: cmp r2, #0 299; CHECK-NEXT: mvnne r2, #0 300; CHECK-NEXT: vdup.32 d21, r2 301; CHECK-NEXT: vmov r2, r4, d22 302; CHECK-NEXT: subs r2, r5, r2 303; CHECK-NEXT: sbcs r2, r7, r4 304; CHECK-NEXT: mov r2, #0 305; CHECK-NEXT: movlt r2, #1 306; CHECK-NEXT: cmp r2, #0 307; CHECK-NEXT: mvnne r2, #0 308; CHECK-NEXT: vdup.32 d20, r2 309; CHECK-NEXT: add r2, r0, #48 310; CHECK-NEXT: vbif q9, q11, q10 311; CHECK-NEXT: vld1.64 {d30, d31}, [r2:128] 312; CHECK-NEXT: add r2, r1, #48 313; CHECK-NEXT: vld1.64 {d2, d3}, [r2:128] 314; CHECK-NEXT: vmov r5, r7, d30 315; CHECK-NEXT: vmov r2, r4, d2 316; CHECK-NEXT: vld1.64 {d26, d27}, [r11:128] 317; CHECK-NEXT: vld1.64 {d0, d1}, [r10:128] 318; CHECK-NEXT: vld1.64 {d24, d25}, [r9:128]! 319; CHECK-NEXT: vld1.64 {d22, d23}, [r9:128] 320; CHECK-NEXT: vld1.64 {d20, d21}, [r8:128]! 321; CHECK-NEXT: vmov r11, r10, d21 322; CHECK-NEXT: subs r2, r5, r2 323; CHECK-NEXT: sbcs r2, r7, r4 324; CHECK-NEXT: vmov r7, r6, d31 325; CHECK-NEXT: vmov r2, r5, d3 326; CHECK-NEXT: mov r4, #0 327; CHECK-NEXT: movlt r4, #1 328; CHECK-NEXT: cmp r4, #0 329; CHECK-NEXT: mvnne r4, #0 330; CHECK-NEXT: subs r2, r7, r2 331; CHECK-NEXT: mov r7, #0 332; CHECK-NEXT: sbcs r2, r6, r5 333; CHECK-NEXT: vmov r6, r5, d27 334; CHECK-NEXT: vmov r2, r9, d1 335; CHECK-NEXT: movlt r7, #1 336; CHECK-NEXT: cmp r7, #0 337; CHECK-NEXT: mvnne r7, #0 338; CHECK-NEXT: vdup.32 d7, r7 339; CHECK-NEXT: vdup.32 d6, r4 340; CHECK-NEXT: subs r2, r6, r2 341; CHECK-NEXT: sbcs r2, r5, r9 342; CHECK-NEXT: vmov r6, r5, d26 343; CHECK-NEXT: mov r2, #0 344; CHECK-NEXT: movlt r2, #1 345; CHECK-NEXT: cmp r2, #0 346; CHECK-NEXT: mvnne r2, #0 347; CHECK-NEXT: vdup.32 d5, r2 348; CHECK-NEXT: vmov r2, r9, d0 349; CHECK-NEXT: subs r2, r6, r2 350; CHECK-NEXT: sbcs r2, r5, r9 351; CHECK-NEXT: mov r2, #0 352; CHECK-NEXT: movlt r2, #1 353; CHECK-NEXT: cmp r2, #0 354; CHECK-NEXT: mvnne r2, #0 355; CHECK-NEXT: vdup.32 d4, r2 356; CHECK-NEXT: add r2, r1, #32 357; CHECK-NEXT: vld1.64 {d28, d29}, [r2:128] 358; CHECK-NEXT: add r2, r0, #32 359; CHECK-NEXT: vbif q13, q0, q2 360; CHECK-NEXT: add r1, r1, #80 361; CHECK-NEXT: vld1.64 {d0, d1}, [r2:128] 362; CHECK-NEXT: vmov r4, r5, d28 363; CHECK-NEXT: vbif q15, q1, q3 364; CHECK-NEXT: add r0, r0, #80 365; CHECK-NEXT: vmov r2, r6, d0 366; CHECK-NEXT: vld1.64 {d2, d3}, [r8:128] 367; CHECK-NEXT: vmov r9, r8, d25 368; CHECK-NEXT: vld1.64 {d8, d9}, [r0:128] 369; CHECK-NEXT: vld1.64 {d6, d7}, [r1:128] 370; CHECK-NEXT: vmov r3, r12, d8 371; CHECK-NEXT: subs r2, r2, r4 372; CHECK-NEXT: sbcs r2, r6, r5 373; CHECK-NEXT: vmov r4, r5, d29 374; CHECK-NEXT: vmov r6, r7, d1 375; CHECK-NEXT: mov r2, #0 376; CHECK-NEXT: movlt r2, #1 377; CHECK-NEXT: cmp r2, #0 378; CHECK-NEXT: mvnne r2, #0 379; CHECK-NEXT: subs r4, r6, r4 380; CHECK-NEXT: sbcs r4, r7, r5 381; CHECK-NEXT: vmov r5, r6, d2 382; CHECK-NEXT: mov r4, #0 383; CHECK-NEXT: movlt r4, #1 384; CHECK-NEXT: cmp r4, #0 385; CHECK-NEXT: mvnne r4, #0 386; CHECK-NEXT: vdup.32 d5, r4 387; CHECK-NEXT: vdup.32 d4, r2 388; CHECK-NEXT: vmov r2, r4, d22 389; CHECK-NEXT: vbit q14, q0, q2 390; CHECK-NEXT: subs r2, r5, r2 391; CHECK-NEXT: sbcs r2, r6, r4 392; CHECK-NEXT: vmov r4, r5, d24 393; CHECK-NEXT: vmov r6, r7, d20 394; CHECK-NEXT: mov r2, #0 395; CHECK-NEXT: movlt r2, #1 396; CHECK-NEXT: cmp r2, #0 397; CHECK-NEXT: mvnne r2, #0 398; CHECK-NEXT: subs r1, r6, r4 399; CHECK-NEXT: vmov r0, r6, d9 400; CHECK-NEXT: sbcs r1, r7, r5 401; CHECK-NEXT: vmov r4, r5, d7 402; CHECK-NEXT: mov r1, #0 403; CHECK-NEXT: movlt r1, #1 404; CHECK-NEXT: cmp r1, #0 405; CHECK-NEXT: mvnne r1, #0 406; CHECK-NEXT: subs r0, r0, r4 407; CHECK-NEXT: vmov r7, r4, d23 408; CHECK-NEXT: sbcs r0, r6, r5 409; CHECK-NEXT: vmov r5, lr, d6 410; CHECK-NEXT: mov r0, #0 411; CHECK-NEXT: movlt r0, #1 412; CHECK-NEXT: cmp r0, #0 413; CHECK-NEXT: mvnne r0, #0 414; CHECK-NEXT: vdup.32 d11, r0 415; CHECK-NEXT: vmov r0, r6, d3 416; CHECK-NEXT: subs r0, r0, r7 417; CHECK-NEXT: sbcs r0, r6, r4 418; CHECK-NEXT: mov r0, #0 419; CHECK-NEXT: movlt r0, #1 420; CHECK-NEXT: subs r4, r11, r9 421; CHECK-NEXT: sbcs r4, r10, r8 422; CHECK-NEXT: mov r4, #0 423; CHECK-NEXT: movlt r4, #1 424; CHECK-NEXT: subs r3, r3, r5 425; CHECK-NEXT: sbcs r3, r12, lr 426; CHECK-NEXT: mov r3, #0 427; CHECK-NEXT: movlt r3, #1 428; CHECK-NEXT: cmp r3, #0 429; CHECK-NEXT: mvnne r3, #0 430; CHECK-NEXT: cmp r4, #0 431; CHECK-NEXT: mvnne r4, #0 432; CHECK-NEXT: vdup.32 d10, r3 433; CHECK-NEXT: vdup.32 d1, r4 434; CHECK-NEXT: vorr q2, q5, q5 435; CHECK-NEXT: vdup.32 d0, r1 436; CHECK-NEXT: cmp r0, #0 437; CHECK-NEXT: vbsl q2, q4, q3 438; CHECK-NEXT: mvnne r0, #0 439; CHECK-NEXT: vbif q10, q12, q0 440; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload 441; CHECK-NEXT: vdup.32 d7, r0 442; CHECK-NEXT: add r0, r1, #80 443; CHECK-NEXT: vdup.32 d6, r2 444; CHECK-NEXT: vbit q11, q1, q3 445; CHECK-NEXT: vst1.64 {d4, d5}, [r0:128] 446; CHECK-NEXT: add r0, r1, #32 447; CHECK-NEXT: vst1.64 {d28, d29}, [r0:128] 448; CHECK-NEXT: add r0, r1, #48 449; CHECK-NEXT: vst1.64 {d30, d31}, [r0:128] 450; CHECK-NEXT: add r0, r1, #64 451; CHECK-NEXT: vst1.64 {d18, d19}, [r1:128]! 452; CHECK-NEXT: vst1.64 {d26, d27}, [r1:128] 453; CHECK-NEXT: mov r1, #32 454; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128], r1 455; CHECK-NEXT: vst1.64 {d20, d21}, [r0:128]! 456; CHECK-NEXT: vst1.64 {d22, d23}, [r0:128] 457; CHECK-NEXT: add sp, sp, #8 458; CHECK-NEXT: vpop {d8, d9, d10, d11} 459; CHECK-NEXT: add sp, sp, #4 460; CHECK-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, lr} 461; CHECK-NEXT: mov pc, lr 462 %v0 = load %T0_20, %T0_20* %loadaddr 463 %v1 = load %T0_20, %T0_20* %loadaddr2 464 %c = icmp slt %T0_20 %v0, %v1 465; COST: func_blend20 466; COST: cost of 0 {{.*}} icmp 467; COST: cost of 108 {{.*}} select 468 %r = select %T1_20 %c, %T0_20 %v0, %T0_20 %v1 469 store %T0_20 %r, %T0_20* %storeaddr 470 ret void 471} 472