1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE
3; RUN: opt < %s -vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX
4
5; x86 does not have a cheap v16i8 shuffle until SSSE3 (pshufb)
6
7define <16 x i8> @bitcast_shuf_narrow_element(<4 x i32> %v) {
8; SSE-LABEL: @bitcast_shuf_narrow_element(
9; SSE-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
10; SSE-NEXT:    [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to <16 x i8>
11; SSE-NEXT:    ret <16 x i8> [[R]]
12;
13; AVX-LABEL: @bitcast_shuf_narrow_element(
14; AVX-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[V:%.*]] to <16 x i8>
15; AVX-NEXT:    [[R:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
16; AVX-NEXT:    ret <16 x i8> [[R]]
17;
18  %shuf = shufflevector <4 x i32> %v, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
19  %r = bitcast <4 x i32> %shuf to <16 x i8>
20  ret <16 x i8> %r
21}
22
23; v4f32 is the same cost as v4i32, so this always works
24
25define <4 x float> @bitcast_shuf_same_size(<4 x i32> %v) {
26; CHECK-LABEL: @bitcast_shuf_same_size(
27; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[V:%.*]] to <4 x float>
28; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
29; CHECK-NEXT:    ret <4 x float> [[R]]
30;
31  %shuf = shufflevector <4 x i32> %v, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
32  %r = bitcast <4 x i32> %shuf to <4 x float>
33  ret <4 x float> %r
34}
35
36; Negative test - length-changing shuffle
37
38define <16 x i8> @bitcast_shuf_narrow_element_wrong_size(<2 x i32> %v) {
39; CHECK-LABEL: @bitcast_shuf_narrow_element_wrong_size(
40; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <2 x i32> [[V:%.*]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
41; CHECK-NEXT:    [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to <16 x i8>
42; CHECK-NEXT:    ret <16 x i8> [[R]]
43;
44  %shuf = shufflevector <2 x i32> %v, <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
45  %r = bitcast <4 x i32> %shuf to <16 x i8>
46  ret <16 x i8> %r
47}
48
49; Negative test - must cast to vector type
50
51define i128 @bitcast_shuf_narrow_element_wrong_type(<4 x i32> %v) {
52; CHECK-LABEL: @bitcast_shuf_narrow_element_wrong_type(
53; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
54; CHECK-NEXT:    [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to i128
55; CHECK-NEXT:    ret i128 [[R]]
56;
57  %shuf = shufflevector <4 x i32> %v, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
58  %r = bitcast <4 x i32> %shuf to i128
59  ret i128 %r
60}
61
62; Widen shuffle elements
63
64define <4 x i32> @bitcast_shuf_wide_element(<8 x i16> %v) {
65; CHECK-LABEL: @bitcast_shuf_wide_element(
66; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[V:%.*]] to <4 x i32>
67; CHECK-NEXT:    [[R:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
68; CHECK-NEXT:    ret <4 x i32> [[R]]
69;
70  %shuf = shufflevector <8 x i16> %v, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3>
71  %r = bitcast <8 x i16> %shuf to <4 x i32>
72  ret <4 x i32> %r
73}
74
75declare void @use(<4 x i32>)
76
77; Negative test - don't create an extra shuffle
78
79define <16 x i8> @bitcast_shuf_uses(<4 x i32> %v) {
80; CHECK-LABEL: @bitcast_shuf_uses(
81; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
82; CHECK-NEXT:    call void @use(<4 x i32> [[SHUF]])
83; CHECK-NEXT:    [[R:%.*]] = bitcast <4 x i32> [[SHUF]] to <16 x i8>
84; CHECK-NEXT:    ret <16 x i8> [[R]]
85;
86  %shuf = shufflevector <4 x i32> %v, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
87  call void @use(<4 x i32> %shuf)
88  %r = bitcast <4 x i32> %shuf to <16 x i8>
89  ret <16 x i8> %r
90}
91
92define <2 x i64> @PR35454_1(<2 x i64> %v) {
93; SSE-LABEL: @PR35454_1(
94; SSE-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32>
95; SSE-NEXT:    [[PERMIL:%.*]] = shufflevector <4 x i32> [[BC]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
96; SSE-NEXT:    [[BC1:%.*]] = bitcast <4 x i32> [[PERMIL]] to <16 x i8>
97; SSE-NEXT:    [[ADD:%.*]] = shl <16 x i8> [[BC1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
98; SSE-NEXT:    [[BC2:%.*]] = bitcast <16 x i8> [[ADD]] to <4 x i32>
99; SSE-NEXT:    [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
100; SSE-NEXT:    [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64>
101; SSE-NEXT:    ret <2 x i64> [[BC3]]
102;
103; AVX-LABEL: @PR35454_1(
104; AVX-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32>
105; AVX-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[BC]] to <16 x i8>
106; AVX-NEXT:    [[BC1:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
107; AVX-NEXT:    [[ADD:%.*]] = shl <16 x i8> [[BC1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
108; AVX-NEXT:    [[BC2:%.*]] = bitcast <16 x i8> [[ADD]] to <4 x i32>
109; AVX-NEXT:    [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
110; AVX-NEXT:    [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64>
111; AVX-NEXT:    ret <2 x i64> [[BC3]]
112;
113  %bc = bitcast <2 x i64> %v to <4 x i32>
114  %permil = shufflevector <4 x i32> %bc, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
115  %bc1 = bitcast <4 x i32> %permil to <16 x i8>
116  %add = shl <16 x i8> %bc1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
117  %bc2 = bitcast <16 x i8> %add to <4 x i32>
118  %permil1 = shufflevector <4 x i32> %bc2, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
119  %bc3 = bitcast <4 x i32> %permil1 to <2 x i64>
120  ret <2 x i64> %bc3
121}
122
123define <2 x i64> @PR35454_2(<2 x i64> %v) {
124; SSE-LABEL: @PR35454_2(
125; SSE-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32>
126; SSE-NEXT:    [[PERMIL:%.*]] = shufflevector <4 x i32> [[BC]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
127; SSE-NEXT:    [[BC1:%.*]] = bitcast <4 x i32> [[PERMIL]] to <8 x i16>
128; SSE-NEXT:    [[ADD:%.*]] = shl <8 x i16> [[BC1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
129; SSE-NEXT:    [[BC2:%.*]] = bitcast <8 x i16> [[ADD]] to <4 x i32>
130; SSE-NEXT:    [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
131; SSE-NEXT:    [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64>
132; SSE-NEXT:    ret <2 x i64> [[BC3]]
133;
134; AVX-LABEL: @PR35454_2(
135; AVX-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32>
136; AVX-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[BC]] to <8 x i16>
137; AVX-NEXT:    [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1>
138; AVX-NEXT:    [[ADD:%.*]] = shl <8 x i16> [[BC1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
139; AVX-NEXT:    [[BC2:%.*]] = bitcast <8 x i16> [[ADD]] to <4 x i32>
140; AVX-NEXT:    [[PERMIL1:%.*]] = shufflevector <4 x i32> [[BC2]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
141; AVX-NEXT:    [[BC3:%.*]] = bitcast <4 x i32> [[PERMIL1]] to <2 x i64>
142; AVX-NEXT:    ret <2 x i64> [[BC3]]
143;
144  %bc = bitcast <2 x i64> %v to <4 x i32>
145  %permil = shufflevector <4 x i32> %bc, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
146  %bc1 = bitcast <4 x i32> %permil to <8 x i16>
147  %add = shl <8 x i16> %bc1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
148  %bc2 = bitcast <8 x i16> %add to <4 x i32>
149  %permil1 = shufflevector <4 x i32> %bc2, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
150  %bc3 = bitcast <4 x i32> %permil1 to <2 x i64>
151  ret <2 x i64> %bc3
152}
153