1; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX 2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE 3; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 4 5 6; Verify that the following shifts are lowered into a sequence of two shifts plus 7; a blend. On pre-avx2 targets, instead of scalarizing logical and arithmetic 8; packed shift right by a constant build_vector the backend should always try to 9; emit a simpler sequence of two shifts + blend when possible. 10 11define <8 x i16> @test1(<8 x i16> %a) { 12 %lshr = lshr <8 x i16> %a, <i16 3, i16 3, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 13 ret <8 x i16> %lshr 14} 15; CHECK-LABEL: test1 16; SSE: psrlw 17; SSE-NEXT: psrlw 18; SSE-NEXT: movss 19; AVX: vpsrlw 20; AVX-NEXT: vpsrlw 21; AVX-NEXT: vmovss 22; AVX2: vpsrlw 23; AVX2-NEXT: vpsrlw 24; AVX2-NEXT: vmovss 25; CHECK: ret 26 27 28define <8 x i16> @test2(<8 x i16> %a) { 29 %lshr = lshr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 2, i16 2, i16 2, i16 2> 30 ret <8 x i16> %lshr 31} 32; CHECK-LABEL: test2 33; SSE: psrlw 34; SSE-NEXT: psrlw 35; SSE-NEXT: movsd 36; AVX: vpsrlw 37; AVX-NEXT: vpsrlw 38; AVX-NEXT: vmovsd 39; AVX2: vpsrlw 40; AVX2-NEXT: vpsrlw 41; AVX2-NEXT: vmovsd 42; CHECK: ret 43 44 45define <4 x i32> @test3(<4 x i32> %a) { 46 %lshr = lshr <4 x i32> %a, <i32 3, i32 2, i32 2, i32 2> 47 ret <4 x i32> %lshr 48} 49; CHECK-LABEL: test3 50; SSE: psrld 51; SSE-NEXT: psrld 52; SSE-NEXT: movss 53; AVX: vpsrld 54; AVX-NEXT: vpsrld 55; AVX-NEXT: vmovss 56; AVX2: vpsrlvd 57; CHECK: ret 58 59 60define <4 x i32> @test4(<4 x i32> %a) { 61 %lshr = lshr <4 x i32> %a, <i32 3, i32 3, i32 2, i32 2> 62 ret <4 x i32> %lshr 63} 64; CHECK-LABEL: test4 65; SSE: psrld 66; SSE-NEXT: psrld 67; SSE-NEXT: movsd 68; AVX: vpsrld 69; AVX-NEXT: vpsrld 70; AVX-NEXT: vmovsd 71; AVX2: vpsrlvd 72; CHECK: ret 73 74 75define <8 x i16> @test5(<8 x i16> %a) { 76 %lshr = ashr <8 x i16> %a, <i16 3, i16 3, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> 77 ret <8 x i16> %lshr 78} 79 80define <8 x i16> @test6(<8 x i16> %a) { 81 %lshr = ashr <8 x i16> %a, <i16 3, i16 3, i16 3, i16 3, i16 2, i16 2, i16 2, i16 2> 82 ret <8 x i16> %lshr 83} 84; CHECK-LABEL: test6 85; SSE: psraw 86; SSE-NEXT: psraw 87; SSE-NEXT: movsd 88; AVX: vpsraw 89; AVX-NEXT: vpsraw 90; AVX-NEXT: vmovsd 91; AVX2: vpsraw 92; AVX2-NEXT: vpsraw 93; AVX2-NEXT: vmovsd 94; CHECK: ret 95 96 97define <4 x i32> @test7(<4 x i32> %a) { 98 %lshr = ashr <4 x i32> %a, <i32 3, i32 2, i32 2, i32 2> 99 ret <4 x i32> %lshr 100} 101; CHECK-LABEL: test7 102; SSE: psrad 103; SSE-NEXT: psrad 104; SSE-NEXT: movss 105; AVX: vpsrad 106; AVX-NEXT: vpsrad 107; AVX-NEXT: vmovss 108; AVX2: vpsravd 109; CHECK: ret 110 111 112define <4 x i32> @test8(<4 x i32> %a) { 113 %lshr = ashr <4 x i32> %a, <i32 3, i32 3, i32 2, i32 2> 114 ret <4 x i32> %lshr 115} 116; CHECK-LABEL: test8 117; SSE: psrad 118; SSE-NEXT: psrad 119; SSE-NEXT: movsd 120; AVX: vpsrad 121; AVX-NEXT: vpsrad 122; AVX-NEXT: vmovsd 123; AVX2: vpsravd 124; CHECK: ret 125 126