1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2       | FileCheck %s --check-prefix=SSE
3; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.2     | FileCheck %s --check-prefix=SSE
4; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx        | FileCheck %s --check-prefix=AVX
5; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2       | FileCheck %s --check-prefix=AVX
6; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512dq,+avx512bw | FileCheck %s --check-prefix=AVX
7
8define <4 x i32> @add_op1_constant(i32* %p) nounwind {
9; SSE-LABEL: add_op1_constant:
10; SSE:       # %bb.0:
11; SSE-NEXT:    movl (%rdi), %eax
12; SSE-NEXT:    addl $42, %eax
13; SSE-NEXT:    movd %eax, %xmm0
14; SSE-NEXT:    retq
15;
16; AVX-LABEL: add_op1_constant:
17; AVX:       # %bb.0:
18; AVX-NEXT:    movl (%rdi), %eax
19; AVX-NEXT:    addl $42, %eax
20; AVX-NEXT:    vmovd %eax, %xmm0
21; AVX-NEXT:    retq
22  %x = load i32, i32* %p
23  %b = add i32 %x, 42
24  %r = insertelement <4 x i32> undef, i32 %b, i32 0
25  ret <4 x i32> %r
26}
27
28; Code and data size may increase by using more vector ops, so the transform is disabled here.
29
30define <4 x i32> @add_op1_constant_optsize(i32* %p) nounwind optsize {
31; SSE-LABEL: add_op1_constant_optsize:
32; SSE:       # %bb.0:
33; SSE-NEXT:    movl (%rdi), %eax
34; SSE-NEXT:    addl $42, %eax
35; SSE-NEXT:    movd %eax, %xmm0
36; SSE-NEXT:    retq
37;
38; AVX-LABEL: add_op1_constant_optsize:
39; AVX:       # %bb.0:
40; AVX-NEXT:    movl (%rdi), %eax
41; AVX-NEXT:    addl $42, %eax
42; AVX-NEXT:    vmovd %eax, %xmm0
43; AVX-NEXT:    retq
44  %x = load i32, i32* %p
45  %b = add i32 %x, 42
46  %r = insertelement <4 x i32> undef, i32 %b, i32 0
47  ret <4 x i32> %r
48}
49
50define <8 x i16> @add_op0_constant(i16* %p) nounwind {
51; SSE-LABEL: add_op0_constant:
52; SSE:       # %bb.0:
53; SSE-NEXT:    movzwl (%rdi), %eax
54; SSE-NEXT:    addl $42, %eax
55; SSE-NEXT:    movd %eax, %xmm0
56; SSE-NEXT:    retq
57;
58; AVX-LABEL: add_op0_constant:
59; AVX:       # %bb.0:
60; AVX-NEXT:    movzwl (%rdi), %eax
61; AVX-NEXT:    addl $42, %eax
62; AVX-NEXT:    vmovd %eax, %xmm0
63; AVX-NEXT:    retq
64  %x = load i16, i16* %p
65  %b = add i16 42, %x
66  %r = insertelement <8 x i16> undef, i16 %b, i32 0
67  ret <8 x i16> %r
68}
69
70define <2 x i64> @sub_op0_constant(i64* %p) nounwind {
71; SSE-LABEL: sub_op0_constant:
72; SSE:       # %bb.0:
73; SSE-NEXT:    movl $42, %eax
74; SSE-NEXT:    subq (%rdi), %rax
75; SSE-NEXT:    movq %rax, %xmm0
76; SSE-NEXT:    retq
77;
78; AVX-LABEL: sub_op0_constant:
79; AVX:       # %bb.0:
80; AVX-NEXT:    movl $42, %eax
81; AVX-NEXT:    subq (%rdi), %rax
82; AVX-NEXT:    vmovq %rax, %xmm0
83; AVX-NEXT:    retq
84  %x = load i64, i64* %p
85  %b = sub i64 42, %x
86  %r = insertelement <2 x i64> undef, i64 %b, i32 0
87  ret <2 x i64> %r
88}
89
90define <16 x i8> @sub_op1_constant(i8* %p) nounwind {
91; SSE-LABEL: sub_op1_constant:
92; SSE:       # %bb.0:
93; SSE-NEXT:    movb (%rdi), %al
94; SSE-NEXT:    addb $-42, %al
95; SSE-NEXT:    movzbl %al, %eax
96; SSE-NEXT:    movd %eax, %xmm0
97; SSE-NEXT:    retq
98;
99; AVX-LABEL: sub_op1_constant:
100; AVX:       # %bb.0:
101; AVX-NEXT:    movb (%rdi), %al
102; AVX-NEXT:    addb $-42, %al
103; AVX-NEXT:    movzbl %al, %eax
104; AVX-NEXT:    vmovd %eax, %xmm0
105; AVX-NEXT:    retq
106  %x = load i8, i8* %p
107  %b = sub i8 %x, 42
108  %r = insertelement <16 x i8> undef, i8 %b, i32 0
109  ret <16 x i8> %r
110}
111
112define <4 x i32> @mul_op1_constant(i32* %p) nounwind {
113; SSE-LABEL: mul_op1_constant:
114; SSE:       # %bb.0:
115; SSE-NEXT:    imull $42, (%rdi), %eax
116; SSE-NEXT:    movd %eax, %xmm0
117; SSE-NEXT:    retq
118;
119; AVX-LABEL: mul_op1_constant:
120; AVX:       # %bb.0:
121; AVX-NEXT:    imull $42, (%rdi), %eax
122; AVX-NEXT:    vmovd %eax, %xmm0
123; AVX-NEXT:    retq
124  %x = load i32, i32* %p
125  %b = mul i32 %x, 42
126  %r = insertelement <4 x i32> undef, i32 %b, i32 0
127  ret <4 x i32> %r
128}
129
130define <8 x i16> @mul_op0_constant(i16* %p) nounwind {
131; SSE-LABEL: mul_op0_constant:
132; SSE:       # %bb.0:
133; SSE-NEXT:    movzwl (%rdi), %eax
134; SSE-NEXT:    imull $42, %eax, %eax
135; SSE-NEXT:    movd %eax, %xmm0
136; SSE-NEXT:    retq
137;
138; AVX-LABEL: mul_op0_constant:
139; AVX:       # %bb.0:
140; AVX-NEXT:    movzwl (%rdi), %eax
141; AVX-NEXT:    imull $42, %eax, %eax
142; AVX-NEXT:    vmovd %eax, %xmm0
143; AVX-NEXT:    retq
144  %x = load i16, i16* %p
145  %b = mul i16 42, %x
146  %r = insertelement <8 x i16> undef, i16 %b, i32 0
147  ret <8 x i16> %r
148}
149
150define <4 x i32> @and_op1_constant(i32* %p) nounwind {
151; SSE-LABEL: and_op1_constant:
152; SSE:       # %bb.0:
153; SSE-NEXT:    movl (%rdi), %eax
154; SSE-NEXT:    andl $42, %eax
155; SSE-NEXT:    movd %eax, %xmm0
156; SSE-NEXT:    retq
157;
158; AVX-LABEL: and_op1_constant:
159; AVX:       # %bb.0:
160; AVX-NEXT:    movl (%rdi), %eax
161; AVX-NEXT:    andl $42, %eax
162; AVX-NEXT:    vmovd %eax, %xmm0
163; AVX-NEXT:    retq
164  %x = load i32, i32* %p
165  %b = and i32 %x, 42
166  %r = insertelement <4 x i32> undef, i32 %b, i32 0
167  ret <4 x i32> %r
168}
169
170define <2 x i64> @or_op1_constant(i64* %p) nounwind {
171; SSE-LABEL: or_op1_constant:
172; SSE:       # %bb.0:
173; SSE-NEXT:    movq (%rdi), %rax
174; SSE-NEXT:    orq $42, %rax
175; SSE-NEXT:    movq %rax, %xmm0
176; SSE-NEXT:    retq
177;
178; AVX-LABEL: or_op1_constant:
179; AVX:       # %bb.0:
180; AVX-NEXT:    movq (%rdi), %rax
181; AVX-NEXT:    orq $42, %rax
182; AVX-NEXT:    vmovq %rax, %xmm0
183; AVX-NEXT:    retq
184  %x = load i64, i64* %p
185  %b = or i64 %x, 42
186  %r = insertelement <2 x i64> undef, i64 %b, i32 0
187  ret <2 x i64> %r
188}
189
190define <8 x i16> @xor_op1_constant(i16* %p) nounwind {
191; SSE-LABEL: xor_op1_constant:
192; SSE:       # %bb.0:
193; SSE-NEXT:    movzwl (%rdi), %eax
194; SSE-NEXT:    xorl $42, %eax
195; SSE-NEXT:    movd %eax, %xmm0
196; SSE-NEXT:    retq
197;
198; AVX-LABEL: xor_op1_constant:
199; AVX:       # %bb.0:
200; AVX-NEXT:    movzwl (%rdi), %eax
201; AVX-NEXT:    xorl $42, %eax
202; AVX-NEXT:    vmovd %eax, %xmm0
203; AVX-NEXT:    retq
204  %x = load i16, i16* %p
205  %b = xor i16 %x, 42
206  %r = insertelement <8 x i16> undef, i16 %b, i32 0
207  ret <8 x i16> %r
208}
209
210define <4 x i32> @shl_op0_constant(i32* %p) nounwind {
211; SSE-LABEL: shl_op0_constant:
212; SSE:       # %bb.0:
213; SSE-NEXT:    movb (%rdi), %cl
214; SSE-NEXT:    movl $42, %eax
215; SSE-NEXT:    shll %cl, %eax
216; SSE-NEXT:    movd %eax, %xmm0
217; SSE-NEXT:    retq
218;
219; AVX-LABEL: shl_op0_constant:
220; AVX:       # %bb.0:
221; AVX-NEXT:    movb (%rdi), %cl
222; AVX-NEXT:    movl $42, %eax
223; AVX-NEXT:    shll %cl, %eax
224; AVX-NEXT:    vmovd %eax, %xmm0
225; AVX-NEXT:    retq
226  %x = load i32, i32* %p
227  %b = shl i32 42, %x
228  %r = insertelement <4 x i32> undef, i32 %b, i32 0
229  ret <4 x i32> %r
230}
231
232define <16 x i8> @shl_op1_constant(i8* %p) nounwind {
233; SSE-LABEL: shl_op1_constant:
234; SSE:       # %bb.0:
235; SSE-NEXT:    movb (%rdi), %al
236; SSE-NEXT:    shlb $5, %al
237; SSE-NEXT:    movzbl %al, %eax
238; SSE-NEXT:    movd %eax, %xmm0
239; SSE-NEXT:    retq
240;
241; AVX-LABEL: shl_op1_constant:
242; AVX:       # %bb.0:
243; AVX-NEXT:    movb (%rdi), %al
244; AVX-NEXT:    shlb $5, %al
245; AVX-NEXT:    movzbl %al, %eax
246; AVX-NEXT:    vmovd %eax, %xmm0
247; AVX-NEXT:    retq
248  %x = load i8, i8* %p
249  %b = shl i8 %x, 5
250  %r = insertelement <16 x i8> undef, i8 %b, i32 0
251  ret <16 x i8> %r
252}
253
254define <2 x i64> @lshr_op0_constant(i64* %p) nounwind {
255; SSE-LABEL: lshr_op0_constant:
256; SSE:       # %bb.0:
257; SSE-NEXT:    movb (%rdi), %cl
258; SSE-NEXT:    movl $42, %eax
259; SSE-NEXT:    shrq %cl, %rax
260; SSE-NEXT:    movq %rax, %xmm0
261; SSE-NEXT:    retq
262;
263; AVX-LABEL: lshr_op0_constant:
264; AVX:       # %bb.0:
265; AVX-NEXT:    movb (%rdi), %cl
266; AVX-NEXT:    movl $42, %eax
267; AVX-NEXT:    shrq %cl, %rax
268; AVX-NEXT:    vmovq %rax, %xmm0
269; AVX-NEXT:    retq
270  %x = load i64, i64* %p
271  %b = lshr i64 42, %x
272  %r = insertelement <2 x i64> undef, i64 %b, i32 0
273  ret <2 x i64> %r
274}
275
276define <4 x i32> @lshr_op1_constant(i32* %p) nounwind {
277; SSE-LABEL: lshr_op1_constant:
278; SSE:       # %bb.0:
279; SSE-NEXT:    movl (%rdi), %eax
280; SSE-NEXT:    shrl $17, %eax
281; SSE-NEXT:    movd %eax, %xmm0
282; SSE-NEXT:    retq
283;
284; AVX-LABEL: lshr_op1_constant:
285; AVX:       # %bb.0:
286; AVX-NEXT:    movl (%rdi), %eax
287; AVX-NEXT:    shrl $17, %eax
288; AVX-NEXT:    vmovd %eax, %xmm0
289; AVX-NEXT:    retq
290  %x = load i32, i32* %p
291  %b = lshr i32 %x, 17
292  %r = insertelement <4 x i32> undef, i32 %b, i32 0
293  ret <4 x i32> %r
294}
295
296define <8 x i16> @ashr_op0_constant(i16* %p) nounwind {
297; SSE-LABEL: ashr_op0_constant:
298; SSE:       # %bb.0:
299; SSE-NEXT:    movb (%rdi), %cl
300; SSE-NEXT:    movl $-42, %eax
301; SSE-NEXT:    sarl %cl, %eax
302; SSE-NEXT:    movd %eax, %xmm0
303; SSE-NEXT:    retq
304;
305; AVX-LABEL: ashr_op0_constant:
306; AVX:       # %bb.0:
307; AVX-NEXT:    movb (%rdi), %cl
308; AVX-NEXT:    movl $-42, %eax
309; AVX-NEXT:    sarl %cl, %eax
310; AVX-NEXT:    vmovd %eax, %xmm0
311; AVX-NEXT:    retq
312  %x = load i16, i16* %p
313  %b = ashr i16 -42, %x
314  %r = insertelement <8 x i16> undef, i16 %b, i32 0
315  ret <8 x i16> %r
316}
317
318define <8 x i16> @ashr_op1_constant(i16* %p) nounwind {
319; SSE-LABEL: ashr_op1_constant:
320; SSE:       # %bb.0:
321; SSE-NEXT:    movswl (%rdi), %eax
322; SSE-NEXT:    sarl $7, %eax
323; SSE-NEXT:    movd %eax, %xmm0
324; SSE-NEXT:    retq
325;
326; AVX-LABEL: ashr_op1_constant:
327; AVX:       # %bb.0:
328; AVX-NEXT:    movswl (%rdi), %eax
329; AVX-NEXT:    sarl $7, %eax
330; AVX-NEXT:    vmovd %eax, %xmm0
331; AVX-NEXT:    retq
332  %x = load i16, i16* %p
333  %b = ashr i16 %x, 7
334  %r = insertelement <8 x i16> undef, i16 %b, i32 0
335  ret <8 x i16> %r
336}
337
338define <4 x i32> @sdiv_op0_constant(i32* %p) nounwind {
339; SSE-LABEL: sdiv_op0_constant:
340; SSE:       # %bb.0:
341; SSE-NEXT:    movl $42, %eax
342; SSE-NEXT:    xorl %edx, %edx
343; SSE-NEXT:    idivl (%rdi)
344; SSE-NEXT:    movd %eax, %xmm0
345; SSE-NEXT:    retq
346;
347; AVX-LABEL: sdiv_op0_constant:
348; AVX:       # %bb.0:
349; AVX-NEXT:    movl $42, %eax
350; AVX-NEXT:    xorl %edx, %edx
351; AVX-NEXT:    idivl (%rdi)
352; AVX-NEXT:    vmovd %eax, %xmm0
353; AVX-NEXT:    retq
354  %x = load i32, i32* %p
355  %b = sdiv i32 42, %x
356  %r = insertelement <4 x i32> undef, i32 %b, i32 0
357  ret <4 x i32> %r
358}
359
360define <8 x i16> @sdiv_op1_constant(i16* %p) nounwind {
361; SSE-LABEL: sdiv_op1_constant:
362; SSE:       # %bb.0:
363; SSE-NEXT:    movswl (%rdi), %eax
364; SSE-NEXT:    imull $-15603, %eax, %ecx # imm = 0xC30D
365; SSE-NEXT:    shrl $16, %ecx
366; SSE-NEXT:    addl %eax, %ecx
367; SSE-NEXT:    movzwl %cx, %eax
368; SSE-NEXT:    movswl %ax, %ecx
369; SSE-NEXT:    shrl $15, %eax
370; SSE-NEXT:    sarl $5, %ecx
371; SSE-NEXT:    addl %eax, %ecx
372; SSE-NEXT:    movd %ecx, %xmm0
373; SSE-NEXT:    retq
374;
375; AVX-LABEL: sdiv_op1_constant:
376; AVX:       # %bb.0:
377; AVX-NEXT:    movswl (%rdi), %eax
378; AVX-NEXT:    imull $-15603, %eax, %ecx # imm = 0xC30D
379; AVX-NEXT:    shrl $16, %ecx
380; AVX-NEXT:    addl %eax, %ecx
381; AVX-NEXT:    movzwl %cx, %eax
382; AVX-NEXT:    movswl %ax, %ecx
383; AVX-NEXT:    shrl $15, %eax
384; AVX-NEXT:    sarl $5, %ecx
385; AVX-NEXT:    addl %eax, %ecx
386; AVX-NEXT:    vmovd %ecx, %xmm0
387; AVX-NEXT:    retq
388  %x = load i16, i16* %p
389  %b = sdiv i16 %x, 42
390  %r = insertelement <8 x i16> undef, i16 %b, i32 0
391  ret <8 x i16> %r
392}
393
394define <8 x i16> @srem_op0_constant(i16* %p) nounwind {
395; SSE-LABEL: srem_op0_constant:
396; SSE:       # %bb.0:
397; SSE-NEXT:    movw $42, %ax
398; SSE-NEXT:    xorl %edx, %edx
399; SSE-NEXT:    idivw (%rdi)
400; SSE-NEXT:    # kill: def $dx killed $dx def $edx
401; SSE-NEXT:    movd %edx, %xmm0
402; SSE-NEXT:    retq
403;
404; AVX-LABEL: srem_op0_constant:
405; AVX:       # %bb.0:
406; AVX-NEXT:    movw $42, %ax
407; AVX-NEXT:    xorl %edx, %edx
408; AVX-NEXT:    idivw (%rdi)
409; AVX-NEXT:    # kill: def $dx killed $dx def $edx
410; AVX-NEXT:    vmovd %edx, %xmm0
411; AVX-NEXT:    retq
412  %x = load i16, i16* %p
413  %b = srem i16 42, %x
414  %r = insertelement <8 x i16> undef, i16 %b, i32 0
415  ret <8 x i16> %r
416}
417
418define <4 x i32> @srem_op1_constant(i32* %p) nounwind {
419; SSE-LABEL: srem_op1_constant:
420; SSE:       # %bb.0:
421; SSE-NEXT:    movslq (%rdi), %rax
422; SSE-NEXT:    imulq $818089009, %rax, %rcx # imm = 0x30C30C31
423; SSE-NEXT:    movq %rcx, %rdx
424; SSE-NEXT:    shrq $63, %rdx
425; SSE-NEXT:    sarq $35, %rcx
426; SSE-NEXT:    addl %edx, %ecx
427; SSE-NEXT:    imull $42, %ecx, %ecx
428; SSE-NEXT:    subl %ecx, %eax
429; SSE-NEXT:    movd %eax, %xmm0
430; SSE-NEXT:    retq
431;
432; AVX-LABEL: srem_op1_constant:
433; AVX:       # %bb.0:
434; AVX-NEXT:    movslq (%rdi), %rax
435; AVX-NEXT:    imulq $818089009, %rax, %rcx # imm = 0x30C30C31
436; AVX-NEXT:    movq %rcx, %rdx
437; AVX-NEXT:    shrq $63, %rdx
438; AVX-NEXT:    sarq $35, %rcx
439; AVX-NEXT:    addl %edx, %ecx
440; AVX-NEXT:    imull $42, %ecx, %ecx
441; AVX-NEXT:    subl %ecx, %eax
442; AVX-NEXT:    vmovd %eax, %xmm0
443; AVX-NEXT:    retq
444  %x = load i32, i32* %p
445  %b = srem i32 %x, 42
446  %r = insertelement <4 x i32> undef, i32 %b, i32 0
447  ret <4 x i32> %r
448}
449
450define <4 x i32> @udiv_op0_constant(i32* %p) nounwind {
451; SSE-LABEL: udiv_op0_constant:
452; SSE:       # %bb.0:
453; SSE-NEXT:    movl $42, %eax
454; SSE-NEXT:    xorl %edx, %edx
455; SSE-NEXT:    divl (%rdi)
456; SSE-NEXT:    movd %eax, %xmm0
457; SSE-NEXT:    retq
458;
459; AVX-LABEL: udiv_op0_constant:
460; AVX:       # %bb.0:
461; AVX-NEXT:    movl $42, %eax
462; AVX-NEXT:    xorl %edx, %edx
463; AVX-NEXT:    divl (%rdi)
464; AVX-NEXT:    vmovd %eax, %xmm0
465; AVX-NEXT:    retq
466  %x = load i32, i32* %p
467  %b = udiv i32 42, %x
468  %r = insertelement <4 x i32> undef, i32 %b, i32 0
469  ret <4 x i32> %r
470}
471
472define <2 x i64> @udiv_op1_constant(i64* %p) nounwind {
473; SSE-LABEL: udiv_op1_constant:
474; SSE:       # %bb.0:
475; SSE-NEXT:    movq (%rdi), %rax
476; SSE-NEXT:    shrq %rax
477; SSE-NEXT:    movabsq $-4392081922311798003, %rcx # imm = 0xC30C30C30C30C30D
478; SSE-NEXT:    mulq %rcx
479; SSE-NEXT:    shrq $4, %rdx
480; SSE-NEXT:    movq %rdx, %xmm0
481; SSE-NEXT:    retq
482;
483; AVX-LABEL: udiv_op1_constant:
484; AVX:       # %bb.0:
485; AVX-NEXT:    movq (%rdi), %rax
486; AVX-NEXT:    shrq %rax
487; AVX-NEXT:    movabsq $-4392081922311798003, %rcx # imm = 0xC30C30C30C30C30D
488; AVX-NEXT:    mulq %rcx
489; AVX-NEXT:    shrq $4, %rdx
490; AVX-NEXT:    vmovq %rdx, %xmm0
491; AVX-NEXT:    retq
492  %x = load i64, i64* %p
493  %b = udiv i64 %x, 42
494  %r = insertelement <2 x i64> undef, i64 %b, i32 0
495  ret <2 x i64> %r
496}
497
498define <2 x i64> @urem_op0_constant(i64* %p) nounwind {
499; SSE-LABEL: urem_op0_constant:
500; SSE:       # %bb.0:
501; SSE-NEXT:    movl $42, %eax
502; SSE-NEXT:    xorl %edx, %edx
503; SSE-NEXT:    divq (%rdi)
504; SSE-NEXT:    movq %rdx, %xmm0
505; SSE-NEXT:    retq
506;
507; AVX-LABEL: urem_op0_constant:
508; AVX:       # %bb.0:
509; AVX-NEXT:    movl $42, %eax
510; AVX-NEXT:    xorl %edx, %edx
511; AVX-NEXT:    divq (%rdi)
512; AVX-NEXT:    vmovq %rdx, %xmm0
513; AVX-NEXT:    retq
514  %x = load i64, i64* %p
515  %b = urem i64 42, %x
516  %r = insertelement <2 x i64> undef, i64 %b, i32 0
517  ret <2 x i64> %r
518}
519
520define <16 x i8> @urem_op1_constant(i8* %p) nounwind {
521; SSE-LABEL: urem_op1_constant:
522; SSE:       # %bb.0:
523; SSE-NEXT:    movb (%rdi), %al
524; SSE-NEXT:    movl %eax, %ecx
525; SSE-NEXT:    shrb %cl
526; SSE-NEXT:    movzbl %cl, %ecx
527; SSE-NEXT:    imull $49, %ecx, %ecx
528; SSE-NEXT:    shrl $10, %ecx
529; SSE-NEXT:    imull $42, %ecx, %ecx
530; SSE-NEXT:    subb %cl, %al
531; SSE-NEXT:    movzbl %al, %eax
532; SSE-NEXT:    movd %eax, %xmm0
533; SSE-NEXT:    retq
534;
535; AVX-LABEL: urem_op1_constant:
536; AVX:       # %bb.0:
537; AVX-NEXT:    movb (%rdi), %al
538; AVX-NEXT:    movl %eax, %ecx
539; AVX-NEXT:    shrb %cl
540; AVX-NEXT:    movzbl %cl, %ecx
541; AVX-NEXT:    imull $49, %ecx, %ecx
542; AVX-NEXT:    shrl $10, %ecx
543; AVX-NEXT:    imull $42, %ecx, %ecx
544; AVX-NEXT:    subb %cl, %al
545; AVX-NEXT:    movzbl %al, %eax
546; AVX-NEXT:    vmovd %eax, %xmm0
547; AVX-NEXT:    retq
548  %x = load i8, i8* %p
549  %b = urem i8 %x, 42
550  %r = insertelement <16 x i8> undef, i8 %b, i32 0
551  ret <16 x i8> %r
552}
553
554define <4 x float> @fadd_op1_constant(float* %p) nounwind {
555; SSE-LABEL: fadd_op1_constant:
556; SSE:       # %bb.0:
557; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
558; SSE-NEXT:    addss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
559; SSE-NEXT:    retq
560;
561; AVX-LABEL: fadd_op1_constant:
562; AVX:       # %bb.0:
563; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
564; AVX-NEXT:    vaddss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
565; AVX-NEXT:    retq
566  %x = load float, float* %p
567  %b = fadd float %x, 42.0
568  %r = insertelement <4 x float> undef, float %b, i32 0
569  ret <4 x float> %r
570}
571
572define <2 x double> @fsub_op1_constant(double* %p) nounwind {
573; SSE-LABEL: fsub_op1_constant:
574; SSE:       # %bb.0:
575; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
576; SSE-NEXT:    addsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
577; SSE-NEXT:    retq
578;
579; AVX-LABEL: fsub_op1_constant:
580; AVX:       # %bb.0:
581; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
582; AVX-NEXT:    vaddsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
583; AVX-NEXT:    retq
584  %x = load double, double* %p
585  %b = fsub double %x, 42.0
586  %r = insertelement <2 x double> undef, double %b, i32 0
587  ret <2 x double> %r
588}
589
590define <4 x float> @fsub_op0_constant(float* %p) nounwind {
591; SSE-LABEL: fsub_op0_constant:
592; SSE:       # %bb.0:
593; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
594; SSE-NEXT:    subss (%rdi), %xmm0
595; SSE-NEXT:    retq
596;
597; AVX-LABEL: fsub_op0_constant:
598; AVX:       # %bb.0:
599; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
600; AVX-NEXT:    vsubss (%rdi), %xmm0, %xmm0
601; AVX-NEXT:    retq
602  %x = load float, float* %p
603  %b = fsub float 42.0, %x
604  %r = insertelement <4 x float> undef, float %b, i32 0
605  ret <4 x float> %r
606}
607
608define <4 x float> @fmul_op1_constant(float* %p) nounwind {
609; SSE-LABEL: fmul_op1_constant:
610; SSE:       # %bb.0:
611; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
612; SSE-NEXT:    mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
613; SSE-NEXT:    retq
614;
615; AVX-LABEL: fmul_op1_constant:
616; AVX:       # %bb.0:
617; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
618; AVX-NEXT:    vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
619; AVX-NEXT:    retq
620  %x = load float, float* %p
621  %b = fmul float %x, 42.0
622  %r = insertelement <4 x float> undef, float %b, i32 0
623  ret <4 x float> %r
624}
625
626define <2 x double> @fdiv_op1_constant(double* %p) nounwind {
627; SSE-LABEL: fdiv_op1_constant:
628; SSE:       # %bb.0:
629; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
630; SSE-NEXT:    divsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
631; SSE-NEXT:    retq
632;
633; AVX-LABEL: fdiv_op1_constant:
634; AVX:       # %bb.0:
635; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
636; AVX-NEXT:    vdivsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
637; AVX-NEXT:    retq
638  %x = load double, double* %p
639  %b = fdiv double %x, 42.0
640  %r = insertelement <2 x double> undef, double %b, i32 0
641  ret <2 x double> %r
642}
643
644define <4 x float> @fdiv_op0_constant(float* %p) nounwind {
645; SSE-LABEL: fdiv_op0_constant:
646; SSE:       # %bb.0:
647; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
648; SSE-NEXT:    divss (%rdi), %xmm0
649; SSE-NEXT:    retq
650;
651; AVX-LABEL: fdiv_op0_constant:
652; AVX:       # %bb.0:
653; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
654; AVX-NEXT:    vdivss (%rdi), %xmm0, %xmm0
655; AVX-NEXT:    retq
656  %x = load float, float* %p
657  %b = fdiv float 42.0, %x
658  %r = insertelement <4 x float> undef, float %b, i32 0
659  ret <4 x float> %r
660}
661
662define <4 x float> @frem_op1_constant(float* %p) nounwind {
663; SSE-LABEL: frem_op1_constant:
664; SSE:       # %bb.0:
665; SSE-NEXT:    pushq %rax
666; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
667; SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
668; SSE-NEXT:    callq fmodf@PLT
669; SSE-NEXT:    popq %rax
670; SSE-NEXT:    retq
671;
672; AVX-LABEL: frem_op1_constant:
673; AVX:       # %bb.0:
674; AVX-NEXT:    pushq %rax
675; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
676; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
677; AVX-NEXT:    callq fmodf@PLT
678; AVX-NEXT:    popq %rax
679; AVX-NEXT:    retq
680  %x = load float, float* %p
681  %b = frem float %x, 42.0
682  %r = insertelement <4 x float> undef, float %b, i32 0
683  ret <4 x float> %r
684}
685
686define <2 x double> @frem_op0_constant(double* %p) nounwind {
687; SSE-LABEL: frem_op0_constant:
688; SSE:       # %bb.0:
689; SSE-NEXT:    pushq %rax
690; SSE-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
691; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
692; SSE-NEXT:    callq fmod@PLT
693; SSE-NEXT:    popq %rax
694; SSE-NEXT:    retq
695;
696; AVX-LABEL: frem_op0_constant:
697; AVX:       # %bb.0:
698; AVX-NEXT:    pushq %rax
699; AVX-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
700; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
701; AVX-NEXT:    callq fmod@PLT
702; AVX-NEXT:    popq %rax
703; AVX-NEXT:    retq
704  %x = load double, double* %p
705  %b = frem double 42.0, %x
706  %r = insertelement <2 x double> undef, double %b, i32 0
707  ret <2 x double> %r
708}
709
710; Try again with 256-bit types.
711
712define <8 x i32> @add_op1_constant_v8i32(i32* %p) nounwind {
713; SSE-LABEL: add_op1_constant_v8i32:
714; SSE:       # %bb.0:
715; SSE-NEXT:    movl (%rdi), %eax
716; SSE-NEXT:    addl $42, %eax
717; SSE-NEXT:    movd %eax, %xmm0
718; SSE-NEXT:    retq
719;
720; AVX-LABEL: add_op1_constant_v8i32:
721; AVX:       # %bb.0:
722; AVX-NEXT:    movl (%rdi), %eax
723; AVX-NEXT:    addl $42, %eax
724; AVX-NEXT:    vmovd %eax, %xmm0
725; AVX-NEXT:    retq
726  %x = load i32, i32* %p
727  %b = add i32 %x, 42
728  %r = insertelement <8 x i32> undef, i32 %b, i32 0
729  ret <8 x i32> %r
730}
731
732define <4 x i64> @sub_op0_constant_v4i64(i64* %p) nounwind {
733; SSE-LABEL: sub_op0_constant_v4i64:
734; SSE:       # %bb.0:
735; SSE-NEXT:    movl $42, %eax
736; SSE-NEXT:    subq (%rdi), %rax
737; SSE-NEXT:    movq %rax, %xmm0
738; SSE-NEXT:    retq
739;
740; AVX-LABEL: sub_op0_constant_v4i64:
741; AVX:       # %bb.0:
742; AVX-NEXT:    movl $42, %eax
743; AVX-NEXT:    subq (%rdi), %rax
744; AVX-NEXT:    vmovq %rax, %xmm0
745; AVX-NEXT:    retq
746  %x = load i64, i64* %p
747  %b = sub i64 42, %x
748  %r = insertelement <4 x i64> undef, i64 %b, i32 0
749  ret <4 x i64> %r
750}
751
752define <8 x i32> @mul_op1_constant_v8i32(i32* %p) nounwind {
753; SSE-LABEL: mul_op1_constant_v8i32:
754; SSE:       # %bb.0:
755; SSE-NEXT:    imull $42, (%rdi), %eax
756; SSE-NEXT:    movd %eax, %xmm0
757; SSE-NEXT:    retq
758;
759; AVX-LABEL: mul_op1_constant_v8i32:
760; AVX:       # %bb.0:
761; AVX-NEXT:    imull $42, (%rdi), %eax
762; AVX-NEXT:    vmovd %eax, %xmm0
763; AVX-NEXT:    retq
764  %x = load i32, i32* %p
765  %b = mul i32 %x, 42
766  %r = insertelement <8 x i32> undef, i32 %b, i32 0
767  ret <8 x i32> %r
768}
769
770define <4 x i64> @or_op1_constant_v4i64(i64* %p) nounwind {
771; SSE-LABEL: or_op1_constant_v4i64:
772; SSE:       # %bb.0:
773; SSE-NEXT:    movq (%rdi), %rax
774; SSE-NEXT:    orq $42, %rax
775; SSE-NEXT:    movq %rax, %xmm0
776; SSE-NEXT:    retq
777;
778; AVX-LABEL: or_op1_constant_v4i64:
779; AVX:       # %bb.0:
780; AVX-NEXT:    movq (%rdi), %rax
781; AVX-NEXT:    orq $42, %rax
782; AVX-NEXT:    vmovq %rax, %xmm0
783; AVX-NEXT:    retq
784  %x = load i64, i64* %p
785  %b = or i64 %x, 42
786  %r = insertelement <4 x i64> undef, i64 %b, i32 0
787  ret <4 x i64> %r
788}
789
790; Try again with 512-bit types.
791
792define <16 x i32> @add_op1_constant_v16i32(i32* %p) nounwind {
793; SSE-LABEL: add_op1_constant_v16i32:
794; SSE:       # %bb.0:
795; SSE-NEXT:    movl (%rdi), %eax
796; SSE-NEXT:    addl $42, %eax
797; SSE-NEXT:    movd %eax, %xmm0
798; SSE-NEXT:    retq
799;
800; AVX-LABEL: add_op1_constant_v16i32:
801; AVX:       # %bb.0:
802; AVX-NEXT:    movl (%rdi), %eax
803; AVX-NEXT:    addl $42, %eax
804; AVX-NEXT:    vmovd %eax, %xmm0
805; AVX-NEXT:    retq
806  %x = load i32, i32* %p
807  %b = add i32 %x, 42
808  %r = insertelement <16 x i32> undef, i32 %b, i32 0
809  ret <16 x i32> %r
810}
811
812define <8 x i64> @sub_op0_constant_v8i64(i64* %p) nounwind {
813; SSE-LABEL: sub_op0_constant_v8i64:
814; SSE:       # %bb.0:
815; SSE-NEXT:    movl $42, %eax
816; SSE-NEXT:    subq (%rdi), %rax
817; SSE-NEXT:    movq %rax, %xmm0
818; SSE-NEXT:    retq
819;
820; AVX-LABEL: sub_op0_constant_v8i64:
821; AVX:       # %bb.0:
822; AVX-NEXT:    movl $42, %eax
823; AVX-NEXT:    subq (%rdi), %rax
824; AVX-NEXT:    vmovq %rax, %xmm0
825; AVX-NEXT:    retq
826  %x = load i64, i64* %p
827  %b = sub i64 42, %x
828  %r = insertelement <8 x i64> undef, i64 %b, i32 0
829  ret <8 x i64> %r
830}
831
832define <16 x i32> @mul_op1_constant_v16i32(i32* %p) nounwind {
833; SSE-LABEL: mul_op1_constant_v16i32:
834; SSE:       # %bb.0:
835; SSE-NEXT:    imull $42, (%rdi), %eax
836; SSE-NEXT:    movd %eax, %xmm0
837; SSE-NEXT:    retq
838;
839; AVX-LABEL: mul_op1_constant_v16i32:
840; AVX:       # %bb.0:
841; AVX-NEXT:    imull $42, (%rdi), %eax
842; AVX-NEXT:    vmovd %eax, %xmm0
843; AVX-NEXT:    retq
844  %x = load i32, i32* %p
845  %b = mul i32 %x, 42
846  %r = insertelement <16 x i32> undef, i32 %b, i32 0
847  ret <16 x i32> %r
848}
849
850define <8 x i64> @or_op1_constant_v8i64(i64* %p) nounwind {
851; SSE-LABEL: or_op1_constant_v8i64:
852; SSE:       # %bb.0:
853; SSE-NEXT:    movq (%rdi), %rax
854; SSE-NEXT:    orq $42, %rax
855; SSE-NEXT:    movq %rax, %xmm0
856; SSE-NEXT:    retq
857;
858; AVX-LABEL: or_op1_constant_v8i64:
859; AVX:       # %bb.0:
860; AVX-NEXT:    movq (%rdi), %rax
861; AVX-NEXT:    orq $42, %rax
862; AVX-NEXT:    vmovq %rax, %xmm0
863; AVX-NEXT:    retq
864  %x = load i64, i64* %p
865  %b = or i64 %x, 42
866  %r = insertelement <8 x i64> undef, i64 %b, i32 0
867  ret <8 x i64> %r
868}
869
870