1; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s 2 3 4; Verify that each of the following test cases is folded into a single 5; instruction which performs a blend operation. 6 7define <2 x i64> @test1(<2 x i64> %a, <2 x i64> %b) { 8; CHECK-LABEL: test1: 9; CHECK: # BB#0: 10; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 11; CHECK-NEXT: retq 12 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 13 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1> 14 %or = or <2 x i64> %shuf1, %shuf2 15 ret <2 x i64> %or 16} 17 18 19define <4 x i32> @test2(<4 x i32> %a, <4 x i32> %b) { 20; CHECK-LABEL: test2: 21; CHECK: # BB#0: 22; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 23; CHECK-NEXT: retq 24 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3> 25 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4> 26 %or = or <4 x i32> %shuf1, %shuf2 27 ret <4 x i32> %or 28} 29 30 31define <2 x i64> @test3(<2 x i64> %a, <2 x i64> %b) { 32; CHECK-LABEL: test3: 33; CHECK: # BB#0: 34; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 35; CHECK-NEXT: retq 36 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 1> 37 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 38 %or = or <2 x i64> %shuf1, %shuf2 39 ret <2 x i64> %or 40} 41 42 43define <4 x i32> @test4(<4 x i32> %a, <4 x i32> %b) { 44; CHECK-LABEL: test4: 45; CHECK: # BB#0: 46; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] 47; CHECK-NEXT: retq 48 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4> 49 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3> 50 %or = or <4 x i32> %shuf1, %shuf2 51 ret <4 x i32> %or 52} 53 54 55define <4 x i32> @test5(<4 x i32> %a, <4 x i32> %b) { 56; CHECK-LABEL: test5: 57; CHECK: # BB#0: 58; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] 59; CHECK-NEXT: retq 60 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 1, i32 2, i32 3> 61 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4> 62 %or = or <4 x i32> %shuf1, %shuf2 63 ret <4 x i32> %or 64} 65 66 67define <4 x i32> @test6(<4 x i32> %a, <4 x i32> %b) { 68; CHECK-LABEL: test6: 69; CHECK: # BB#0: 70; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 71; CHECK-NEXT: retq 72 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4> 73 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3> 74 %or = or <4 x i32> %shuf1, %shuf2 75 ret <4 x i32> %or 76} 77 78 79define <4 x i32> @test7(<4 x i32> %a, <4 x i32> %b) { 80; CHECK-LABEL: test7: 81; CHECK: # BB#0: 82; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 83; CHECK-NEXT: retq 84 %and1 = and <4 x i32> %a, <i32 -1, i32 -1, i32 0, i32 0> 85 %and2 = and <4 x i32> %b, <i32 0, i32 0, i32 -1, i32 -1> 86 %or = or <4 x i32> %and1, %and2 87 ret <4 x i32> %or 88} 89 90 91define <2 x i64> @test8(<2 x i64> %a, <2 x i64> %b) { 92; CHECK-LABEL: test8: 93; CHECK: # BB#0: 94; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7] 95; CHECK-NEXT: retq 96 %and1 = and <2 x i64> %a, <i64 -1, i64 0> 97 %and2 = and <2 x i64> %b, <i64 0, i64 -1> 98 %or = or <2 x i64> %and1, %and2 99 ret <2 x i64> %or 100} 101 102 103define <4 x i32> @test9(<4 x i32> %a, <4 x i32> %b) { 104; CHECK-LABEL: test9: 105; CHECK: # BB#0: 106; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 107; CHECK-NEXT: retq 108 %and1 = and <4 x i32> %a, <i32 0, i32 0, i32 -1, i32 -1> 109 %and2 = and <4 x i32> %b, <i32 -1, i32 -1, i32 0, i32 0> 110 %or = or <4 x i32> %and1, %and2 111 ret <4 x i32> %or 112} 113 114 115define <2 x i64> @test10(<2 x i64> %a, <2 x i64> %b) { 116; CHECK-LABEL: test10: 117; CHECK: # BB#0: 118; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 119; CHECK-NEXT: retq 120 %and1 = and <2 x i64> %a, <i64 0, i64 -1> 121 %and2 = and <2 x i64> %b, <i64 -1, i64 0> 122 %or = or <2 x i64> %and1, %and2 123 ret <2 x i64> %or 124} 125 126 127define <4 x i32> @test11(<4 x i32> %a, <4 x i32> %b) { 128; CHECK-LABEL: test11: 129; CHECK: # BB#0: 130; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7] 131; CHECK-NEXT: retq 132 %and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 0> 133 %and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 -1, i32 -1> 134 %or = or <4 x i32> %and1, %and2 135 ret <4 x i32> %or 136} 137 138 139define <4 x i32> @test12(<4 x i32> %a, <4 x i32> %b) { 140; CHECK-LABEL: test12: 141; CHECK: # BB#0: 142; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3,4,5,6,7] 143; CHECK-NEXT: retq 144 %and1 = and <4 x i32> %a, <i32 0, i32 -1, i32 -1, i32 -1> 145 %and2 = and <4 x i32> %b, <i32 -1, i32 0, i32 0, i32 0> 146 %or = or <4 x i32> %and1, %and2 147 ret <4 x i32> %or 148} 149 150 151; Verify that the following test cases are folded into single shuffles. 152 153define <4 x i32> @test13(<4 x i32> %a, <4 x i32> %b) { 154; CHECK-LABEL: test13: 155; CHECK: # BB#0: 156; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] 157; CHECK-NEXT: retq 158 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 1, i32 1, i32 4, i32 4> 159 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3> 160 %or = or <4 x i32> %shuf1, %shuf2 161 ret <4 x i32> %or 162} 163 164 165define <2 x i64> @test14(<2 x i64> %a, <2 x i64> %b) { 166; CHECK-LABEL: test14: 167; CHECK: # BB#0: 168; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 169; CHECK-NEXT: retq 170 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 171 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0> 172 %or = or <2 x i64> %shuf1, %shuf2 173 ret <2 x i64> %or 174} 175 176 177define <4 x i32> @test15(<4 x i32> %a, <4 x i32> %b) { 178; CHECK-LABEL: test15: 179; CHECK: # BB#0: 180; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,1],xmm0[2,1] 181; CHECK-NEXT: movaps %xmm1, %xmm0 182; CHECK-NEXT: retq 183 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 1> 184 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 2, i32 1, i32 4, i32 4> 185 %or = or <4 x i32> %shuf1, %shuf2 186 ret <4 x i32> %or 187} 188 189 190define <2 x i64> @test16(<2 x i64> %a, <2 x i64> %b) { 191; CHECK-LABEL: test16: 192; CHECK: # BB#0: 193; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 194; CHECK-NEXT: movdqa %xmm1, %xmm0 195; CHECK-NEXT: retq 196 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0> 197 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 198 %or = or <2 x i64> %shuf1, %shuf2 199 ret <2 x i64> %or 200} 201 202 203; Verify that the dag-combiner does not fold a OR of two shuffles into a single 204; shuffle instruction when the shuffle indexes are not compatible. 205 206define <4 x i32> @test17(<4 x i32> %a, <4 x i32> %b) { 207; CHECK-LABEL: test17: 208; CHECK: # BB#0: 209; CHECK-NEXT: xorps %xmm2, %xmm2 210; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,0] 211; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm0[0,2] 212; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3] 213; CHECK-NEXT: orps %xmm1, %xmm2 214; CHECK-NEXT: movaps %xmm2, %xmm0 215; CHECK-NEXT: retq 216 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 2> 217 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4> 218 %or = or <4 x i32> %shuf1, %shuf2 219 ret <4 x i32> %or 220} 221 222 223define <4 x i32> @test18(<4 x i32> %a, <4 x i32> %b) { 224; CHECK-LABEL: test18: 225; CHECK: # BB#0: 226; CHECK-NEXT: pxor %xmm2, %xmm2 227; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3,4,5,6,7] 228; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1] 229; CHECK-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5,6,7] 230; CHECK-NEXT: por %xmm1, %xmm0 231; CHECK-NEXT: retq 232 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 4> 233 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 4, i32 4> 234 %or = or <4 x i32> %shuf1, %shuf2 235 ret <4 x i32> %or 236} 237 238 239define <4 x i32> @test19(<4 x i32> %a, <4 x i32> %b) { 240; CHECK-LABEL: test19: 241; CHECK: # BB#0: 242; CHECK-NEXT: xorps %xmm2, %xmm2 243; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm0[0,3] 244; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,1,3] 245; CHECK-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],zero,xmm1[2,2] 246; CHECK-NEXT: orps %xmm1, %xmm2 247; CHECK-NEXT: movaps %xmm2, %xmm0 248; CHECK-NEXT: retq 249 %shuf1 = shufflevector <4 x i32> %a, <4 x i32> zeroinitializer, <4 x i32><i32 4, i32 0, i32 4, i32 3> 250 %shuf2 = shufflevector <4 x i32> %b, <4 x i32> zeroinitializer, <4 x i32><i32 0, i32 4, i32 2, i32 2> 251 %or = or <4 x i32> %shuf1, %shuf2 252 ret <4 x i32> %or 253} 254 255 256define <2 x i64> @test20(<2 x i64> %a, <2 x i64> %b) { 257; CHECK-LABEL: test20: 258; CHECK: # BB#0: 259; CHECK-NEXT: orps %xmm1, %xmm0 260; CHECK-NEXT: movq %xmm0, %xmm0 261; CHECK-NEXT: retq 262 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 263 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 0, i32 2> 264 %or = or <2 x i64> %shuf1, %shuf2 265 ret <2 x i64> %or 266} 267 268 269define <2 x i64> @test21(<2 x i64> %a, <2 x i64> %b) { 270; CHECK-LABEL: test21: 271; CHECK: # BB#0: 272; CHECK-NEXT: por %xmm1, %xmm0 273; CHECK-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] 274; CHECK-NEXT: retq 275 %shuf1 = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0> 276 %shuf2 = shufflevector <2 x i64> %b, <2 x i64> zeroinitializer, <2 x i32><i32 2, i32 0> 277 %or = or <2 x i64> %shuf1, %shuf2 278 ret <2 x i64> %or 279} 280 281; Verify that the DAGCombiner doesn't crash in the attempt to check if a shuffle 282; with illegal type has a legal mask. Method 'isShuffleMaskLegal' only knows how to 283; handle legal vector value types. 284define <4 x i8> @test_crash(<4 x i8> %a, <4 x i8> %b) { 285; CHECK-LABEL: test_crash: 286; CHECK: # BB#0: 287; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7] 288; CHECK-NEXT: retq 289 %shuf1 = shufflevector <4 x i8> %a, <4 x i8> zeroinitializer, <4 x i32><i32 4, i32 4, i32 2, i32 3> 290 %shuf2 = shufflevector <4 x i8> %b, <4 x i8> zeroinitializer, <4 x i32><i32 0, i32 1, i32 4, i32 4> 291 %or = or <4 x i8> %shuf1, %shuf2 292 ret <4 x i8> %or 293} 294 295