1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefix=SSE2
3; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefix=AVX2
4
5define <8 x i32> @zext_and_v8i32(<8 x i16> %x, <8 x i16> %y) {
6; SSE2-LABEL: zext_and_v8i32:
7; SSE2:       # %bb.0:
8; SSE2-NEXT:    movdqa %xmm0, %xmm2
9; SSE2-NEXT:    pand %xmm1, %xmm2
10; SSE2-NEXT:    pxor %xmm1, %xmm1
11; SSE2-NEXT:    movdqa %xmm2, %xmm0
12; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13; SSE2-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
14; SSE2-NEXT:    movdqa %xmm2, %xmm1
15; SSE2-NEXT:    retq
16;
17; AVX2-LABEL: zext_and_v8i32:
18; AVX2:       # %bb.0:
19; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
20; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
21; AVX2-NEXT:    retq
22  %xz = zext <8 x i16> %x to <8 x i32>
23  %yz = zext <8 x i16> %y to <8 x i32>
24  %r = and <8 x i32> %xz, %yz
25  ret <8 x i32> %r
26}
27
28define <8 x i32> @zext_or_v8i32(<8 x i16> %x, <8 x i16> %y) {
29; SSE2-LABEL: zext_or_v8i32:
30; SSE2:       # %bb.0:
31; SSE2-NEXT:    movdqa %xmm0, %xmm2
32; SSE2-NEXT:    por %xmm1, %xmm2
33; SSE2-NEXT:    pxor %xmm1, %xmm1
34; SSE2-NEXT:    movdqa %xmm2, %xmm0
35; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
36; SSE2-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
37; SSE2-NEXT:    movdqa %xmm2, %xmm1
38; SSE2-NEXT:    retq
39;
40; AVX2-LABEL: zext_or_v8i32:
41; AVX2:       # %bb.0:
42; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
43; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
44; AVX2-NEXT:    retq
45  %xz = zext <8 x i16> %x to <8 x i32>
46  %yz = zext <8 x i16> %y to <8 x i32>
47  %r = or <8 x i32> %xz, %yz
48  ret <8 x i32> %r
49}
50
51define <8 x i32> @zext_xor_v8i32(<8 x i16> %x, <8 x i16> %y) {
52; SSE2-LABEL: zext_xor_v8i32:
53; SSE2:       # %bb.0:
54; SSE2-NEXT:    movdqa %xmm0, %xmm2
55; SSE2-NEXT:    pxor %xmm1, %xmm2
56; SSE2-NEXT:    pxor %xmm1, %xmm1
57; SSE2-NEXT:    movdqa %xmm2, %xmm0
58; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
59; SSE2-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
60; SSE2-NEXT:    movdqa %xmm2, %xmm1
61; SSE2-NEXT:    retq
62;
63; AVX2-LABEL: zext_xor_v8i32:
64; AVX2:       # %bb.0:
65; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
66; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
67; AVX2-NEXT:    retq
68  %xz = zext <8 x i16> %x to <8 x i32>
69  %yz = zext <8 x i16> %y to <8 x i32>
70  %r = xor <8 x i32> %xz, %yz
71  ret <8 x i32> %r
72}
73
74define <8 x i32> @sext_and_v8i32(<8 x i16> %x, <8 x i16> %y) {
75; SSE2-LABEL: sext_and_v8i32:
76; SSE2:       # %bb.0:
77; SSE2-NEXT:    pand %xmm1, %xmm0
78; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
79; SSE2-NEXT:    psrad $16, %xmm2
80; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
81; SSE2-NEXT:    psrad $16, %xmm1
82; SSE2-NEXT:    movdqa %xmm2, %xmm0
83; SSE2-NEXT:    retq
84;
85; AVX2-LABEL: sext_and_v8i32:
86; AVX2:       # %bb.0:
87; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
88; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
89; AVX2-NEXT:    retq
90  %xs = sext <8 x i16> %x to <8 x i32>
91  %ys = sext <8 x i16> %y to <8 x i32>
92  %r = and <8 x i32> %xs, %ys
93  ret <8 x i32> %r
94}
95
96define <8 x i32> @sext_or_v8i32(<8 x i16> %x, <8 x i16> %y) {
97; SSE2-LABEL: sext_or_v8i32:
98; SSE2:       # %bb.0:
99; SSE2-NEXT:    por %xmm1, %xmm0
100; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
101; SSE2-NEXT:    psrad $16, %xmm2
102; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
103; SSE2-NEXT:    psrad $16, %xmm1
104; SSE2-NEXT:    movdqa %xmm2, %xmm0
105; SSE2-NEXT:    retq
106;
107; AVX2-LABEL: sext_or_v8i32:
108; AVX2:       # %bb.0:
109; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
110; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
111; AVX2-NEXT:    retq
112  %xs = sext <8 x i16> %x to <8 x i32>
113  %ys = sext <8 x i16> %y to <8 x i32>
114  %r = or <8 x i32> %xs, %ys
115  ret <8 x i32> %r
116}
117
118define <8 x i32> @sext_xor_v8i32(<8 x i16> %x, <8 x i16> %y) {
119; SSE2-LABEL: sext_xor_v8i32:
120; SSE2:       # %bb.0:
121; SSE2-NEXT:    pxor %xmm1, %xmm0
122; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
123; SSE2-NEXT:    psrad $16, %xmm2
124; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
125; SSE2-NEXT:    psrad $16, %xmm1
126; SSE2-NEXT:    movdqa %xmm2, %xmm0
127; SSE2-NEXT:    retq
128;
129; AVX2-LABEL: sext_xor_v8i32:
130; AVX2:       # %bb.0:
131; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
132; AVX2-NEXT:    vpmovsxwd %xmm0, %ymm0
133; AVX2-NEXT:    retq
134  %xs = sext <8 x i16> %x to <8 x i32>
135  %ys = sext <8 x i16> %y to <8 x i32>
136  %r = xor <8 x i32> %xs, %ys
137  ret <8 x i32> %r
138}
139
140define <8 x i16> @zext_and_v8i16(<8 x i8> %x, <8 x i8> %y) {
141; SSE2-LABEL: zext_and_v8i16:
142; SSE2:       # %bb.0:
143; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
144; SSE2-NEXT:    pxor %xmm2, %xmm2
145; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
146; SSE2-NEXT:    pand %xmm1, %xmm0
147; SSE2-NEXT:    retq
148;
149; AVX2-LABEL: zext_and_v8i16:
150; AVX2:       # %bb.0:
151; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
152; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
153; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
154; AVX2-NEXT:    retq
155  %xz = zext <8 x i8> %x to <8 x i16>
156  %yz = zext <8 x i8> %y to <8 x i16>
157  %r = and <8 x i16> %xz, %yz
158  ret <8 x i16> %r
159}
160
161define <8 x i16> @zext_or_v8i16(<8 x i8> %x, <8 x i8> %y) {
162; SSE2-LABEL: zext_or_v8i16:
163; SSE2:       # %bb.0:
164; SSE2-NEXT:    pxor %xmm2, %xmm2
165; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
166; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
167; SSE2-NEXT:    por %xmm1, %xmm0
168; SSE2-NEXT:    retq
169;
170; AVX2-LABEL: zext_or_v8i16:
171; AVX2:       # %bb.0:
172; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
173; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
174; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
175; AVX2-NEXT:    retq
176  %xz = zext <8 x i8> %x to <8 x i16>
177  %yz = zext <8 x i8> %y to <8 x i16>
178  %r = or <8 x i16> %xz, %yz
179  ret <8 x i16> %r
180}
181
182define <8 x i16> @zext_xor_v8i16(<8 x i8> %x, <8 x i8> %y) {
183; SSE2-LABEL: zext_xor_v8i16:
184; SSE2:       # %bb.0:
185; SSE2-NEXT:    pxor %xmm2, %xmm2
186; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
187; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
188; SSE2-NEXT:    pxor %xmm1, %xmm0
189; SSE2-NEXT:    retq
190;
191; AVX2-LABEL: zext_xor_v8i16:
192; AVX2:       # %bb.0:
193; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
194; AVX2-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
195; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
196; AVX2-NEXT:    retq
197  %xz = zext <8 x i8> %x to <8 x i16>
198  %yz = zext <8 x i8> %y to <8 x i16>
199  %r = xor <8 x i16> %xz, %yz
200  ret <8 x i16> %r
201}
202
203define <8 x i16> @sext_and_v8i16(<8 x i8> %x, <8 x i8> %y) {
204; SSE2-LABEL: sext_and_v8i16:
205; SSE2:       # %bb.0:
206; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
207; SSE2-NEXT:    psraw $8, %xmm2
208; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
209; SSE2-NEXT:    psraw $8, %xmm0
210; SSE2-NEXT:    pand %xmm2, %xmm0
211; SSE2-NEXT:    retq
212;
213; AVX2-LABEL: sext_and_v8i16:
214; AVX2:       # %bb.0:
215; AVX2-NEXT:    vpmovsxbw %xmm0, %xmm0
216; AVX2-NEXT:    vpmovsxbw %xmm1, %xmm1
217; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
218; AVX2-NEXT:    retq
219  %xs = sext <8 x i8> %x to <8 x i16>
220  %ys = sext <8 x i8> %y to <8 x i16>
221  %r = and <8 x i16> %xs, %ys
222  ret <8 x i16> %r
223}
224
225define <8 x i16> @sext_or_v8i16(<8 x i8> %x, <8 x i8> %y) {
226; SSE2-LABEL: sext_or_v8i16:
227; SSE2:       # %bb.0:
228; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
229; SSE2-NEXT:    psraw $8, %xmm2
230; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
231; SSE2-NEXT:    psraw $8, %xmm0
232; SSE2-NEXT:    por %xmm2, %xmm0
233; SSE2-NEXT:    retq
234;
235; AVX2-LABEL: sext_or_v8i16:
236; AVX2:       # %bb.0:
237; AVX2-NEXT:    vpmovsxbw %xmm0, %xmm0
238; AVX2-NEXT:    vpmovsxbw %xmm1, %xmm1
239; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
240; AVX2-NEXT:    retq
241  %xs = sext <8 x i8> %x to <8 x i16>
242  %ys = sext <8 x i8> %y to <8 x i16>
243  %r = or <8 x i16> %xs, %ys
244  ret <8 x i16> %r
245}
246
247define <8 x i16> @sext_xor_v8i16(<8 x i8> %x, <8 x i8> %y) {
248; SSE2-LABEL: sext_xor_v8i16:
249; SSE2:       # %bb.0:
250; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
251; SSE2-NEXT:    psraw $8, %xmm2
252; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
253; SSE2-NEXT:    psraw $8, %xmm0
254; SSE2-NEXT:    pxor %xmm2, %xmm0
255; SSE2-NEXT:    retq
256;
257; AVX2-LABEL: sext_xor_v8i16:
258; AVX2:       # %bb.0:
259; AVX2-NEXT:    vpmovsxbw %xmm0, %xmm0
260; AVX2-NEXT:    vpmovsxbw %xmm1, %xmm1
261; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
262; AVX2-NEXT:    retq
263  %xs = sext <8 x i8> %x to <8 x i16>
264  %ys = sext <8 x i8> %y to <8 x i16>
265  %r = xor <8 x i16> %xs, %ys
266  ret <8 x i16> %r
267}
268
269define <8 x i32> @bool_zext_and(<8 x i1> %x, <8 x i1> %y) {
270; SSE2-LABEL: bool_zext_and:
271; SSE2:       # %bb.0:
272; SSE2-NEXT:    movdqa %xmm0, %xmm3
273; SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4,4,5,5,6,6,7,7]
274; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
275; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
276; SSE2-NEXT:    pxor %xmm4, %xmm4
277; SSE2-NEXT:    movdqa %xmm1, %xmm2
278; SSE2-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
279; SSE2-NEXT:    pand %xmm3, %xmm2
280; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
281; SSE2-NEXT:    pand %xmm1, %xmm0
282; SSE2-NEXT:    movdqa %xmm2, %xmm1
283; SSE2-NEXT:    retq
284;
285; AVX2-LABEL: bool_zext_and:
286; AVX2:       # %bb.0:
287; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
288; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
289; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
290; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
291; AVX2-NEXT:    retq
292  %xz = zext <8 x i1> %x to <8 x i32>
293  %yz = zext <8 x i1> %y to <8 x i32>
294  %r = and <8 x i32> %xz, %yz
295  ret <8 x i32> %r
296}
297
298define <8 x i32> @bool_zext_or(<8 x i1> %x, <8 x i1> %y) {
299; SSE2-LABEL: bool_zext_or:
300; SSE2:       # %bb.0:
301; SSE2-NEXT:    movdqa %xmm0, %xmm2
302; SSE2-NEXT:    por %xmm1, %xmm2
303; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
304; SSE2-NEXT:    pxor %xmm1, %xmm1
305; SSE2-NEXT:    movdqa %xmm2, %xmm0
306; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
307; SSE2-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
308; SSE2-NEXT:    movdqa %xmm2, %xmm1
309; SSE2-NEXT:    retq
310;
311; AVX2-LABEL: bool_zext_or:
312; AVX2:       # %bb.0:
313; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
314; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
315; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
316; AVX2-NEXT:    retq
317  %xz = zext <8 x i1> %x to <8 x i32>
318  %yz = zext <8 x i1> %y to <8 x i32>
319  %r = or <8 x i32> %xz, %yz
320  ret <8 x i32> %r
321}
322
323define <8 x i32> @bool_zext_xor(<8 x i1> %x, <8 x i1> %y) {
324; SSE2-LABEL: bool_zext_xor:
325; SSE2:       # %bb.0:
326; SSE2-NEXT:    movdqa %xmm0, %xmm2
327; SSE2-NEXT:    pxor %xmm1, %xmm2
328; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
329; SSE2-NEXT:    pxor %xmm1, %xmm1
330; SSE2-NEXT:    movdqa %xmm2, %xmm0
331; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
332; SSE2-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
333; SSE2-NEXT:    movdqa %xmm2, %xmm1
334; SSE2-NEXT:    retq
335;
336; AVX2-LABEL: bool_zext_xor:
337; AVX2:       # %bb.0:
338; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
339; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
340; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
341; AVX2-NEXT:    retq
342  %xz = zext <8 x i1> %x to <8 x i32>
343  %yz = zext <8 x i1> %y to <8 x i32>
344  %r = xor <8 x i32> %xz, %yz
345  ret <8 x i32> %r
346}
347
348define <8 x i32> @bool_sext_and(<8 x i1> %x, <8 x i1> %y) {
349; SSE2-LABEL: bool_sext_and:
350; SSE2:       # %bb.0:
351; SSE2-NEXT:    movdqa %xmm1, %xmm3
352; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3]
353; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
354; SSE2-NEXT:    movdqa %xmm0, %xmm2
355; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3]
356; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
357; SSE2-NEXT:    pslld $31, %xmm0
358; SSE2-NEXT:    psrad $31, %xmm0
359; SSE2-NEXT:    pslld $31, %xmm2
360; SSE2-NEXT:    psrad $31, %xmm2
361; SSE2-NEXT:    pslld $31, %xmm1
362; SSE2-NEXT:    psrad $31, %xmm1
363; SSE2-NEXT:    pand %xmm0, %xmm1
364; SSE2-NEXT:    pslld $31, %xmm3
365; SSE2-NEXT:    psrad $31, %xmm3
366; SSE2-NEXT:    pand %xmm3, %xmm2
367; SSE2-NEXT:    movdqa %xmm2, %xmm0
368; SSE2-NEXT:    retq
369;
370; AVX2-LABEL: bool_sext_and:
371; AVX2:       # %bb.0:
372; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
373; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
374; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
375; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
376; AVX2-NEXT:    vpslld $31, %ymm1, %ymm1
377; AVX2-NEXT:    vpsrad $31, %ymm1, %ymm1
378; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
379; AVX2-NEXT:    retq
380  %xs = sext <8 x i1> %x to <8 x i32>
381  %ys = sext <8 x i1> %y to <8 x i32>
382  %r = and <8 x i32> %xs, %ys
383  ret <8 x i32> %r
384}
385
386define <8 x i32> @bool_sext_or(<8 x i1> %x, <8 x i1> %y) {
387; SSE2-LABEL: bool_sext_or:
388; SSE2:       # %bb.0:
389; SSE2-NEXT:    movdqa %xmm1, %xmm3
390; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3]
391; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
392; SSE2-NEXT:    movdqa %xmm0, %xmm2
393; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3]
394; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
395; SSE2-NEXT:    pslld $31, %xmm0
396; SSE2-NEXT:    psrad $31, %xmm0
397; SSE2-NEXT:    pslld $31, %xmm2
398; SSE2-NEXT:    psrad $31, %xmm2
399; SSE2-NEXT:    pslld $31, %xmm1
400; SSE2-NEXT:    psrad $31, %xmm1
401; SSE2-NEXT:    por %xmm0, %xmm1
402; SSE2-NEXT:    pslld $31, %xmm3
403; SSE2-NEXT:    psrad $31, %xmm3
404; SSE2-NEXT:    por %xmm3, %xmm2
405; SSE2-NEXT:    movdqa %xmm2, %xmm0
406; SSE2-NEXT:    retq
407;
408; AVX2-LABEL: bool_sext_or:
409; AVX2:       # %bb.0:
410; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
411; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
412; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
413; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
414; AVX2-NEXT:    vpslld $31, %ymm1, %ymm1
415; AVX2-NEXT:    vpsrad $31, %ymm1, %ymm1
416; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
417; AVX2-NEXT:    retq
418  %xs = sext <8 x i1> %x to <8 x i32>
419  %ys = sext <8 x i1> %y to <8 x i32>
420  %r = or <8 x i32> %xs, %ys
421  ret <8 x i32> %r
422}
423
424define <8 x i32> @bool_sext_xor(<8 x i1> %x, <8 x i1> %y) {
425; SSE2-LABEL: bool_sext_xor:
426; SSE2:       # %bb.0:
427; SSE2-NEXT:    movdqa %xmm1, %xmm3
428; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3]
429; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
430; SSE2-NEXT:    movdqa %xmm0, %xmm2
431; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3]
432; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
433; SSE2-NEXT:    pslld $31, %xmm0
434; SSE2-NEXT:    psrad $31, %xmm0
435; SSE2-NEXT:    pslld $31, %xmm2
436; SSE2-NEXT:    psrad $31, %xmm2
437; SSE2-NEXT:    pslld $31, %xmm1
438; SSE2-NEXT:    psrad $31, %xmm1
439; SSE2-NEXT:    pxor %xmm0, %xmm1
440; SSE2-NEXT:    pslld $31, %xmm3
441; SSE2-NEXT:    psrad $31, %xmm3
442; SSE2-NEXT:    pxor %xmm3, %xmm2
443; SSE2-NEXT:    movdqa %xmm2, %xmm0
444; SSE2-NEXT:    retq
445;
446; AVX2-LABEL: bool_sext_xor:
447; AVX2:       # %bb.0:
448; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
449; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
450; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
451; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
452; AVX2-NEXT:    vpslld $31, %ymm1, %ymm1
453; AVX2-NEXT:    vpsrad $31, %ymm1, %ymm1
454; AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
455; AVX2-NEXT:    retq
456  %xs = sext <8 x i1> %x to <8 x i32>
457  %ys = sext <8 x i1> %y to <8 x i32>
458  %r = xor <8 x i32> %xs, %ys
459  ret <8 x i32> %r
460}
461
462