1; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+sse2 -mcpu=corei7 | FileCheck %s 2 3; SSE2 Logical Shift Left 4 5define <8 x i16> @test_sllw_1(<8 x i16> %InVec) { 6entry: 7 %shl = shl <8 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0> 8 ret <8 x i16> %shl 9} 10 11; CHECK-LABEL: test_sllw_1: 12; CHECK-NOT: psllw $0, %xmm0 13; CHECK: ret 14 15define <8 x i16> @test_sllw_2(<8 x i16> %InVec) { 16entry: 17 %shl = shl <8 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 18 ret <8 x i16> %shl 19} 20 21; CHECK-LABEL: test_sllw_2: 22; CHECK: paddw %xmm0, %xmm0 23; CHECK-NEXT: ret 24 25define <8 x i16> @test_sllw_3(<8 x i16> %InVec) { 26entry: 27 %shl = shl <8 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 28 ret <8 x i16> %shl 29} 30 31; CHECK-LABEL: test_sllw_3: 32; CHECK: psllw $15, %xmm0 33; CHECK-NEXT: ret 34 35define <4 x i32> @test_slld_1(<4 x i32> %InVec) { 36entry: 37 %shl = shl <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0> 38 ret <4 x i32> %shl 39} 40 41; CHECK-LABEL: test_slld_1: 42; CHECK-NOT: pslld $0, %xmm0 43; CHECK: ret 44 45define <4 x i32> @test_slld_2(<4 x i32> %InVec) { 46entry: 47 %shl = shl <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1> 48 ret <4 x i32> %shl 49} 50 51; CHECK-LABEL: test_slld_2: 52; CHECK: paddd %xmm0, %xmm0 53; CHECK-NEXT: ret 54 55define <4 x i32> @test_slld_3(<4 x i32> %InVec) { 56entry: 57 %shl = shl <4 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31> 58 ret <4 x i32> %shl 59} 60 61; CHECK-LABEL: test_slld_3: 62; CHECK: pslld $31, %xmm0 63; CHECK-NEXT: ret 64 65define <2 x i64> @test_sllq_1(<2 x i64> %InVec) { 66entry: 67 %shl = shl <2 x i64> %InVec, <i64 0, i64 0> 68 ret <2 x i64> %shl 69} 70 71; CHECK-LABEL: test_sllq_1: 72; CHECK-NOT: psllq $0, %xmm0 73; CHECK: ret 74 75define <2 x i64> @test_sllq_2(<2 x i64> %InVec) { 76entry: 77 %shl = shl <2 x i64> %InVec, <i64 1, i64 1> 78 ret <2 x i64> %shl 79} 80 81; CHECK-LABEL: test_sllq_2: 82; CHECK: paddq %xmm0, %xmm0 83; CHECK-NEXT: ret 84 85define <2 x i64> @test_sllq_3(<2 x i64> %InVec) { 86entry: 87 %shl = shl <2 x i64> %InVec, <i64 63, i64 63> 88 ret <2 x i64> %shl 89} 90 91; CHECK-LABEL: test_sllq_3: 92; CHECK: psllq $63, %xmm0 93; CHECK-NEXT: ret 94 95; SSE2 Arithmetic Shift 96 97define <8 x i16> @test_sraw_1(<8 x i16> %InVec) { 98entry: 99 %shl = ashr <8 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0> 100 ret <8 x i16> %shl 101} 102 103; CHECK-LABEL: test_sraw_1: 104; CHECK-NOT: psraw $0, %xmm0 105; CHECK: ret 106 107define <8 x i16> @test_sraw_2(<8 x i16> %InVec) { 108entry: 109 %shl = ashr <8 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 110 ret <8 x i16> %shl 111} 112 113; CHECK-LABEL: test_sraw_2: 114; CHECK: psraw $1, %xmm0 115; CHECK-NEXT: ret 116 117define <8 x i16> @test_sraw_3(<8 x i16> %InVec) { 118entry: 119 %shl = ashr <8 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 120 ret <8 x i16> %shl 121} 122 123; CHECK-LABEL: test_sraw_3: 124; CHECK: psraw $15, %xmm0 125; CHECK-NEXT: ret 126 127define <4 x i32> @test_srad_1(<4 x i32> %InVec) { 128entry: 129 %shl = ashr <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0> 130 ret <4 x i32> %shl 131} 132 133; CHECK-LABEL: test_srad_1: 134; CHECK-NOT: psrad $0, %xmm0 135; CHECK: ret 136 137define <4 x i32> @test_srad_2(<4 x i32> %InVec) { 138entry: 139 %shl = ashr <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1> 140 ret <4 x i32> %shl 141} 142 143; CHECK-LABEL: test_srad_2: 144; CHECK: psrad $1, %xmm0 145; CHECK-NEXT: ret 146 147define <4 x i32> @test_srad_3(<4 x i32> %InVec) { 148entry: 149 %shl = ashr <4 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31> 150 ret <4 x i32> %shl 151} 152 153; CHECK-LABEL: test_srad_3: 154; CHECK: psrad $31, %xmm0 155; CHECK-NEXT: ret 156 157; SSE Logical Shift Right 158 159define <8 x i16> @test_srlw_1(<8 x i16> %InVec) { 160entry: 161 %shl = lshr <8 x i16> %InVec, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0> 162 ret <8 x i16> %shl 163} 164 165; CHECK-LABEL: test_srlw_1: 166; CHECK-NOT: psrlw $0, %xmm0 167; CHECK: ret 168 169define <8 x i16> @test_srlw_2(<8 x i16> %InVec) { 170entry: 171 %shl = lshr <8 x i16> %InVec, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 172 ret <8 x i16> %shl 173} 174 175; CHECK-LABEL: test_srlw_2: 176; CHECK: psrlw $1, %xmm0 177; CHECK-NEXT: ret 178 179define <8 x i16> @test_srlw_3(<8 x i16> %InVec) { 180entry: 181 %shl = lshr <8 x i16> %InVec, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 182 ret <8 x i16> %shl 183} 184 185; CHECK-LABEL: test_srlw_3: 186; CHECK: psrlw $15, %xmm0 187; CHECK-NEXT: ret 188 189define <4 x i32> @test_srld_1(<4 x i32> %InVec) { 190entry: 191 %shl = lshr <4 x i32> %InVec, <i32 0, i32 0, i32 0, i32 0> 192 ret <4 x i32> %shl 193} 194 195; CHECK-LABEL: test_srld_1: 196; CHECK-NOT: psrld $0, %xmm0 197; CHECK: ret 198 199define <4 x i32> @test_srld_2(<4 x i32> %InVec) { 200entry: 201 %shl = lshr <4 x i32> %InVec, <i32 1, i32 1, i32 1, i32 1> 202 ret <4 x i32> %shl 203} 204 205; CHECK-LABEL: test_srld_2: 206; CHECK: psrld $1, %xmm0 207; CHECK-NEXT: ret 208 209define <4 x i32> @test_srld_3(<4 x i32> %InVec) { 210entry: 211 %shl = lshr <4 x i32> %InVec, <i32 31, i32 31, i32 31, i32 31> 212 ret <4 x i32> %shl 213} 214 215; CHECK-LABEL: test_srld_3: 216; CHECK: psrld $31, %xmm0 217; CHECK-NEXT: ret 218 219define <2 x i64> @test_srlq_1(<2 x i64> %InVec) { 220entry: 221 %shl = lshr <2 x i64> %InVec, <i64 0, i64 0> 222 ret <2 x i64> %shl 223} 224 225; CHECK-LABEL: test_srlq_1: 226; CHECK-NOT: psrlq $0, %xmm0 227; CHECK: ret 228 229define <2 x i64> @test_srlq_2(<2 x i64> %InVec) { 230entry: 231 %shl = lshr <2 x i64> %InVec, <i64 1, i64 1> 232 ret <2 x i64> %shl 233} 234 235; CHECK-LABEL: test_srlq_2: 236; CHECK: psrlq $1, %xmm0 237; CHECK-NEXT: ret 238 239define <2 x i64> @test_srlq_3(<2 x i64> %InVec) { 240entry: 241 %shl = lshr <2 x i64> %InVec, <i64 63, i64 63> 242 ret <2 x i64> %shl 243} 244 245; CHECK-LABEL: test_srlq_3: 246; CHECK: psrlq $63, %xmm0 247; CHECK-NEXT: ret 248 249 250; CHECK-LABEL: sra_sra_v4i32: 251; CHECK: psrad $6, %xmm0 252; CHECK-NEXT: retq 253define <4 x i32> @sra_sra_v4i32(<4 x i32> %x) nounwind { 254 %sra0 = ashr <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2> 255 %sra1 = ashr <4 x i32> %sra0, <i32 4, i32 4, i32 4, i32 4> 256 ret <4 x i32> %sra1 257} 258 259; CHECK-LABEL: @srl_srl_v4i32 260; CHECK: psrld $6, %xmm0 261; CHECK-NEXT: ret 262define <4 x i32> @srl_srl_v4i32(<4 x i32> %x) nounwind { 263 %srl0 = lshr <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2> 264 %srl1 = lshr <4 x i32> %srl0, <i32 4, i32 4, i32 4, i32 4> 265 ret <4 x i32> %srl1 266} 267 268; CHECK-LABEL: @srl_shl_v4i32 269; CHECK: andps 270; CHECK-NEXT: retq 271define <4 x i32> @srl_shl_v4i32(<4 x i32> %x) nounwind { 272 %srl0 = shl <4 x i32> %x, <i32 4, i32 4, i32 4, i32 4> 273 %srl1 = lshr <4 x i32> %srl0, <i32 4, i32 4, i32 4, i32 4> 274 ret <4 x i32> %srl1 275} 276 277; CHECK-LABEL: @srl_sra_31_v4i32 278; CHECK: psrld $31, %xmm0 279; CHECK-NEXT: ret 280define <4 x i32> @srl_sra_31_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { 281 %sra = ashr <4 x i32> %x, %y 282 %srl1 = lshr <4 x i32> %sra, <i32 31, i32 31, i32 31, i32 31> 283 ret <4 x i32> %srl1 284} 285 286; CHECK-LABEL: @shl_shl_v4i32 287; CHECK: pslld $6, %xmm0 288; CHECK-NEXT: ret 289define <4 x i32> @shl_shl_v4i32(<4 x i32> %x) nounwind { 290 %shl0 = shl <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2> 291 %shl1 = shl <4 x i32> %shl0, <i32 4, i32 4, i32 4, i32 4> 292 ret <4 x i32> %shl1 293} 294 295; CHECK-LABEL: @shl_sra_v4i32 296; CHECK: andps 297; CHECK-NEXT: ret 298define <4 x i32> @shl_sra_v4i32(<4 x i32> %x) nounwind { 299 %shl0 = ashr <4 x i32> %x, <i32 4, i32 4, i32 4, i32 4> 300 %shl1 = shl <4 x i32> %shl0, <i32 4, i32 4, i32 4, i32 4> 301 ret <4 x i32> %shl1 302} 303 304; CHECK-LABEL: @shl_srl_v4i32 305; CHECK: pslld $3, %xmm0 306; CHECK-NEXT: pand 307; CHECK-NEXT: ret 308define <4 x i32> @shl_srl_v4i32(<4 x i32> %x) nounwind { 309 %shl0 = lshr <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2> 310 %shl1 = shl <4 x i32> %shl0, <i32 5, i32 5, i32 5, i32 5> 311 ret <4 x i32> %shl1 312} 313 314; CHECK-LABEL: @shl_zext_srl_v4i32 315; CHECK: andps 316; CHECK-NEXT: ret 317define <4 x i32> @shl_zext_srl_v4i32(<4 x i16> %x) nounwind { 318 %srl = lshr <4 x i16> %x, <i16 2, i16 2, i16 2, i16 2> 319 %zext = zext <4 x i16> %srl to <4 x i32> 320 %shl = shl <4 x i32> %zext, <i32 2, i32 2, i32 2, i32 2> 321 ret <4 x i32> %shl 322} 323 324; CHECK: @sra_trunc_srl_v4i32 325; CHECK: psrad $19, %xmm0 326; CHECK-NEXT: retq 327define <4 x i16> @sra_trunc_srl_v4i32(<4 x i32> %x) nounwind { 328 %srl = lshr <4 x i32> %x, <i32 16, i32 16, i32 16, i32 16> 329 %trunc = trunc <4 x i32> %srl to <4 x i16> 330 %sra = ashr <4 x i16> %trunc, <i16 3, i16 3, i16 3, i16 3> 331 ret <4 x i16> %sra 332} 333 334; CHECK-LABEL: @shl_zext_shl_v4i32 335; CHECK: pand 336; CHECK-NEXT: pslld $19, %xmm0 337; CHECK-NEXT: ret 338define <4 x i32> @shl_zext_shl_v4i32(<4 x i16> %x) nounwind { 339 %shl0 = shl <4 x i16> %x, <i16 2, i16 2, i16 2, i16 2> 340 %ext = zext <4 x i16> %shl0 to <4 x i32> 341 %shl1 = shl <4 x i32> %ext, <i32 17, i32 17, i32 17, i32 17> 342 ret <4 x i32> %shl1 343} 344 345; CHECK-LABEL: @sra_v4i32 346; CHECK: psrad $3, %xmm0 347; CHECK-NEXT: ret 348define <4 x i32> @sra_v4i32(<4 x i32> %x) nounwind { 349 %sra = ashr <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3> 350 ret <4 x i32> %sra 351} 352 353; CHECK-LABEL: @srl_v4i32 354; CHECK: psrld $3, %xmm0 355; CHECK-NEXT: ret 356define <4 x i32> @srl_v4i32(<4 x i32> %x) nounwind { 357 %sra = lshr <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3> 358 ret <4 x i32> %sra 359} 360 361; CHECK-LABEL: @shl_v4i32 362; CHECK: pslld $3, %xmm0 363; CHECK-NEXT: ret 364define <4 x i32> @shl_v4i32(<4 x i32> %x) nounwind { 365 %sra = shl <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3> 366 ret <4 x i32> %sra 367} 368