1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i386-pc-win32       -mattr=+avx512bw  | FileCheck %s --check-prefix=X32
3; RUN: llc < %s -mtriple=x86_64-win32        -mattr=+avx512bw  | FileCheck %s --check-prefix=CHECK64 --check-prefix=WIN64
4; RUN: llc < %s -mtriple=x86_64-linux-gnu    -mattr=+avx512bw  | FileCheck %s --check-prefix=CHECK64 --check-prefix=LINUXOSX64
5
6; Test regcall when receiving arguments of v64i1 type
7define dso_local x86_regcallcc i64 @test_argv64i1(<64 x i1> %x0, <64 x i1> %x1, <64 x i1> %x2, <64 x i1> %x3, <64 x i1> %x4, <64 x i1> %x5, <64 x i1> %x6, <64 x i1> %x7, <64 x i1> %x8, <64 x i1> %x9, <64 x i1> %x10, <64 x i1> %x11, <64 x i1> %x12)  {
8; X32-LABEL: test_argv64i1:
9; X32:       # %bb.0:
10; X32-NEXT:    addl %edx, %eax
11; X32-NEXT:    adcl %edi, %ecx
12; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
13; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
14; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
15; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
16; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
17; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
18; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
19; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
20; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
21; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
22; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
23; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
24; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
25; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
26; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
27; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
28; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
29; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
30; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
31; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
32; X32-NEXT:    addl {{[0-9]+}}(%esp), %eax
33; X32-NEXT:    adcl {{[0-9]+}}(%esp), %ecx
34; X32-NEXT:    retl
35;
36; WIN64-LABEL: test_argv64i1:
37; WIN64:       # %bb.0:
38; WIN64-NEXT:    addq %rcx, %rax
39; WIN64-NEXT:    addq %rdx, %rax
40; WIN64-NEXT:    addq %rdi, %rax
41; WIN64-NEXT:    addq %rsi, %rax
42; WIN64-NEXT:    addq %r8, %rax
43; WIN64-NEXT:    addq %r9, %rax
44; WIN64-NEXT:    addq %r10, %rax
45; WIN64-NEXT:    addq %r11, %rax
46; WIN64-NEXT:    addq %r12, %rax
47; WIN64-NEXT:    addq %r14, %rax
48; WIN64-NEXT:    addq %r15, %rax
49; WIN64-NEXT:    addq {{[0-9]+}}(%rsp), %rax
50; WIN64-NEXT:    retq
51;
52; LINUXOSX64-LABEL: test_argv64i1:
53; LINUXOSX64:       # %bb.0:
54; LINUXOSX64-NEXT:    addq %rcx, %rax
55; LINUXOSX64-NEXT:    addq %rdx, %rax
56; LINUXOSX64-NEXT:    addq %rdi, %rax
57; LINUXOSX64-NEXT:    addq %rsi, %rax
58; LINUXOSX64-NEXT:    addq %r8, %rax
59; LINUXOSX64-NEXT:    addq %r9, %rax
60; LINUXOSX64-NEXT:    addq %r12, %rax
61; LINUXOSX64-NEXT:    addq %r13, %rax
62; LINUXOSX64-NEXT:    addq %r14, %rax
63; LINUXOSX64-NEXT:    addq %r15, %rax
64; LINUXOSX64-NEXT:    addq {{[0-9]+}}(%rsp), %rax
65; LINUXOSX64-NEXT:    addq {{[0-9]+}}(%rsp), %rax
66; LINUXOSX64-NEXT:    retq
67  %y0 = bitcast <64 x i1> %x0 to i64
68  %y1 = bitcast <64 x i1> %x1 to i64
69  %y2 = bitcast <64 x i1> %x2 to i64
70  %y3 = bitcast <64 x i1> %x3 to i64
71  %y4 = bitcast <64 x i1> %x4 to i64
72  %y5 = bitcast <64 x i1> %x5 to i64
73  %y6 = bitcast <64 x i1> %x6 to i64
74  %y7 = bitcast <64 x i1> %x7 to i64
75  %y8 = bitcast <64 x i1> %x8 to i64
76  %y9 = bitcast <64 x i1> %x9 to i64
77  %y10 = bitcast <64 x i1> %x10 to i64
78  %y11 = bitcast <64 x i1> %x11 to i64
79  %y12 = bitcast <64 x i1> %x12 to i64
80  %add1 = add i64 %y0, %y1
81  %add2 = add i64 %add1, %y2
82  %add3 = add i64 %add2, %y3
83  %add4 = add i64 %add3, %y4
84  %add5 = add i64 %add4, %y5
85  %add6 = add i64 %add5, %y6
86  %add7 = add i64 %add6, %y7
87  %add8 = add i64 %add7, %y8
88  %add9 = add i64 %add8, %y9
89  %add10 = add i64 %add9, %y10
90  %add11 = add i64 %add10, %y11
91  %add12 = add i64 %add11, %y12
92  ret i64 %add12
93}
94
95; Test regcall when passing arguments of v64i1 type
96define dso_local i64 @caller_argv64i1() #0 {
97; X32-LABEL: caller_argv64i1:
98; X32:       # %bb.0: # %entry
99; X32-NEXT:    pushl %edi
100; X32-NEXT:    subl $88, %esp
101; X32-NEXT:    vmovaps {{.*#+}} xmm0 = [2,1,2,1]
102; X32-NEXT:    vmovups %xmm0, {{[0-9]+}}(%esp)
103; X32-NEXT:    vmovaps {{.*#+}} zmm0 = [2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1]
104; X32-NEXT:    vmovups %zmm0, (%esp)
105; X32-NEXT:    movl $1, {{[0-9]+}}(%esp)
106; X32-NEXT:    movl $2, {{[0-9]+}}(%esp)
107; X32-NEXT:    movl $2, %eax
108; X32-NEXT:    movl $1, %ecx
109; X32-NEXT:    movl $2, %edx
110; X32-NEXT:    movl $1, %edi
111; X32-NEXT:    vzeroupper
112; X32-NEXT:    calll _test_argv64i1
113; X32-NEXT:    movl %ecx, %edx
114; X32-NEXT:    addl $88, %esp
115; X32-NEXT:    popl %edi
116; X32-NEXT:    retl
117;
118; WIN64-LABEL: caller_argv64i1:
119; WIN64:       # %bb.0: # %entry
120; WIN64-NEXT:    pushq %r15
121; WIN64-NEXT:    .seh_pushreg %r15
122; WIN64-NEXT:    pushq %r14
123; WIN64-NEXT:    .seh_pushreg %r14
124; WIN64-NEXT:    pushq %r12
125; WIN64-NEXT:    .seh_pushreg %r12
126; WIN64-NEXT:    pushq %rsi
127; WIN64-NEXT:    .seh_pushreg %rsi
128; WIN64-NEXT:    pushq %rdi
129; WIN64-NEXT:    .seh_pushreg %rdi
130; WIN64-NEXT:    subq $48, %rsp
131; WIN64-NEXT:    .seh_stackalloc 48
132; WIN64-NEXT:    vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
133; WIN64-NEXT:    .seh_savexmm %xmm7, 32
134; WIN64-NEXT:    vmovaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
135; WIN64-NEXT:    .seh_savexmm %xmm6, 16
136; WIN64-NEXT:    .seh_endprologue
137; WIN64-NEXT:    movabsq $4294967298, %rax # imm = 0x100000002
138; WIN64-NEXT:    movq %rax, (%rsp)
139; WIN64-NEXT:    movq %rax, %rcx
140; WIN64-NEXT:    movq %rax, %rdx
141; WIN64-NEXT:    movq %rax, %rdi
142; WIN64-NEXT:    movq %rax, %r8
143; WIN64-NEXT:    movq %rax, %r9
144; WIN64-NEXT:    movq %rax, %r10
145; WIN64-NEXT:    movq %rax, %r11
146; WIN64-NEXT:    movq %rax, %r12
147; WIN64-NEXT:    movq %rax, %r14
148; WIN64-NEXT:    movq %rax, %r15
149; WIN64-NEXT:    movq %rax, %rsi
150; WIN64-NEXT:    callq test_argv64i1
151; WIN64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload
152; WIN64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
153; WIN64-NEXT:    addq $48, %rsp
154; WIN64-NEXT:    popq %rdi
155; WIN64-NEXT:    popq %rsi
156; WIN64-NEXT:    popq %r12
157; WIN64-NEXT:    popq %r14
158; WIN64-NEXT:    popq %r15
159; WIN64-NEXT:    retq
160; WIN64-NEXT:    .seh_endproc
161;
162; LINUXOSX64-LABEL: caller_argv64i1:
163; LINUXOSX64:       # %bb.0: # %entry
164; LINUXOSX64-NEXT:    pushq %r15
165; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
166; LINUXOSX64-NEXT:    pushq %r14
167; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 24
168; LINUXOSX64-NEXT:    pushq %r13
169; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 32
170; LINUXOSX64-NEXT:    pushq %r12
171; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 40
172; LINUXOSX64-NEXT:    pushq %rax
173; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 48
174; LINUXOSX64-NEXT:    .cfi_offset %r12, -40
175; LINUXOSX64-NEXT:    .cfi_offset %r13, -32
176; LINUXOSX64-NEXT:    .cfi_offset %r14, -24
177; LINUXOSX64-NEXT:    .cfi_offset %r15, -16
178; LINUXOSX64-NEXT:    movabsq $4294967298, %rax # imm = 0x100000002
179; LINUXOSX64-NEXT:    movq %rax, %rcx
180; LINUXOSX64-NEXT:    movq %rax, %rdx
181; LINUXOSX64-NEXT:    movq %rax, %rdi
182; LINUXOSX64-NEXT:    movq %rax, %r8
183; LINUXOSX64-NEXT:    movq %rax, %r9
184; LINUXOSX64-NEXT:    movq %rax, %r12
185; LINUXOSX64-NEXT:    movq %rax, %r13
186; LINUXOSX64-NEXT:    movq %rax, %r14
187; LINUXOSX64-NEXT:    movq %rax, %r15
188; LINUXOSX64-NEXT:    movq %rax, %rsi
189; LINUXOSX64-NEXT:    pushq %rax
190; LINUXOSX64-NEXT:    .cfi_adjust_cfa_offset 8
191; LINUXOSX64-NEXT:    pushq %rax
192; LINUXOSX64-NEXT:    .cfi_adjust_cfa_offset 8
193; LINUXOSX64-NEXT:    callq test_argv64i1
194; LINUXOSX64-NEXT:    addq $24, %rsp
195; LINUXOSX64-NEXT:    .cfi_adjust_cfa_offset -24
196; LINUXOSX64-NEXT:    popq %r12
197; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 32
198; LINUXOSX64-NEXT:    popq %r13
199; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 24
200; LINUXOSX64-NEXT:    popq %r14
201; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
202; LINUXOSX64-NEXT:    popq %r15
203; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
204; LINUXOSX64-NEXT:    retq
205entry:
206  %v0 = bitcast i64 4294967298 to <64 x i1>
207  %call = call x86_regcallcc i64 @test_argv64i1(<64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
208                                                <64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
209                                                <64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
210                                                <64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0,
211                                                <64 x i1> %v0)
212  ret i64 %call
213}
214
215; Test regcall when returning v64i1 type
216define dso_local x86_regcallcc <64 x i1> @test_retv64i1()  {
217; X32-LABEL: test_retv64i1:
218; X32:       # %bb.0:
219; X32-NEXT:    movl $2, %eax
220; X32-NEXT:    movl $1, %ecx
221; X32-NEXT:    retl
222;
223; CHECK64-LABEL: test_retv64i1:
224; CHECK64:       # %bb.0:
225; CHECK64-NEXT:    movabsq $4294967298, %rax # imm = 0x100000002
226; CHECK64-NEXT:    retq
227  %a = bitcast i64 4294967298 to <64 x i1>
228 ret <64 x i1> %a
229}
230
231; Test regcall when processing result of v64i1 type
232define dso_local <64 x i1> @caller_retv64i1() #0 {
233; X32-LABEL: caller_retv64i1:
234; X32:       # %bb.0: # %entry
235; X32-NEXT:    calll _test_retv64i1
236; X32-NEXT:    kmovd %eax, %k0
237; X32-NEXT:    kmovd %ecx, %k1
238; X32-NEXT:    kunpckdq %k0, %k1, %k0
239; X32-NEXT:    vpmovm2b %k0, %zmm0
240; X32-NEXT:    retl
241;
242; WIN64-LABEL: caller_retv64i1:
243; WIN64:       # %bb.0: # %entry
244; WIN64-NEXT:    pushq %rsi
245; WIN64-NEXT:    .seh_pushreg %rsi
246; WIN64-NEXT:    pushq %rdi
247; WIN64-NEXT:    .seh_pushreg %rdi
248; WIN64-NEXT:    subq $40, %rsp
249; WIN64-NEXT:    .seh_stackalloc 40
250; WIN64-NEXT:    vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
251; WIN64-NEXT:    .seh_savexmm %xmm7, 16
252; WIN64-NEXT:    vmovaps %xmm6, (%rsp) # 16-byte Spill
253; WIN64-NEXT:    .seh_savexmm %xmm6, 0
254; WIN64-NEXT:    .seh_endprologue
255; WIN64-NEXT:    callq test_retv64i1
256; WIN64-NEXT:    kmovq %rax, %k0
257; WIN64-NEXT:    vpmovm2b %k0, %zmm0
258; WIN64-NEXT:    vmovaps (%rsp), %xmm6 # 16-byte Reload
259; WIN64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
260; WIN64-NEXT:    addq $40, %rsp
261; WIN64-NEXT:    popq %rdi
262; WIN64-NEXT:    popq %rsi
263; WIN64-NEXT:    retq
264; WIN64-NEXT:    .seh_endproc
265;
266; LINUXOSX64-LABEL: caller_retv64i1:
267; LINUXOSX64:       # %bb.0: # %entry
268; LINUXOSX64-NEXT:    pushq %rax
269; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
270; LINUXOSX64-NEXT:    callq test_retv64i1
271; LINUXOSX64-NEXT:    kmovq %rax, %k0
272; LINUXOSX64-NEXT:    vpmovm2b %k0, %zmm0
273; LINUXOSX64-NEXT:    popq %rax
274; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
275; LINUXOSX64-NEXT:    retq
276entry:
277  %call = call x86_regcallcc <64 x i1> @test_retv64i1()
278  ret <64 x i1> %call
279}
280
281; Test regcall when receiving arguments of v32i1 type
282declare i32 @test_argv32i1helper(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2)
283define dso_local x86_regcallcc i32 @test_argv32i1(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2)  {
284; X32-LABEL: test_argv32i1:
285; X32:       # %bb.0: # %entry
286; X32-NEXT:    subl $76, %esp
287; X32-NEXT:    vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
288; X32-NEXT:    vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
289; X32-NEXT:    vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
290; X32-NEXT:    vmovups %xmm4, (%esp) # 16-byte Spill
291; X32-NEXT:    kmovd %edx, %k0
292; X32-NEXT:    kmovd %ecx, %k1
293; X32-NEXT:    kmovd %eax, %k2
294; X32-NEXT:    vpmovm2b %k2, %zmm0
295; X32-NEXT:    vpmovm2b %k1, %zmm1
296; X32-NEXT:    vpmovm2b %k0, %zmm2
297; X32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
298; X32-NEXT:    # kill: def $ymm1 killed $ymm1 killed $zmm1
299; X32-NEXT:    # kill: def $ymm2 killed $ymm2 killed $zmm2
300; X32-NEXT:    calll _test_argv32i1helper
301; X32-NEXT:    vmovups (%esp), %xmm4 # 16-byte Reload
302; X32-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload
303; X32-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload
304; X32-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload
305; X32-NEXT:    addl $76, %esp
306; X32-NEXT:    vzeroupper
307; X32-NEXT:    retl
308;
309; WIN64-LABEL: test_argv32i1:
310; WIN64:       # %bb.0: # %entry
311; WIN64-NEXT:    pushq %rbp
312; WIN64-NEXT:    .seh_pushreg %rbp
313; WIN64-NEXT:    pushq %r11
314; WIN64-NEXT:    .seh_pushreg %r11
315; WIN64-NEXT:    pushq %r10
316; WIN64-NEXT:    .seh_pushreg %r10
317; WIN64-NEXT:    subq $128, %rsp
318; WIN64-NEXT:    .seh_stackalloc 128
319; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %rbp
320; WIN64-NEXT:    .seh_setframe %rbp, 128
321; WIN64-NEXT:    .seh_endprologue
322; WIN64-NEXT:    andq $-32, %rsp
323; WIN64-NEXT:    kmovd %edx, %k0
324; WIN64-NEXT:    kmovd %eax, %k1
325; WIN64-NEXT:    kmovd %ecx, %k2
326; WIN64-NEXT:    vpmovm2b %k2, %zmm0
327; WIN64-NEXT:    vmovdqa %ymm0, {{[0-9]+}}(%rsp)
328; WIN64-NEXT:    vpmovm2b %k1, %zmm0
329; WIN64-NEXT:    vmovdqa %ymm0, {{[0-9]+}}(%rsp)
330; WIN64-NEXT:    vpmovm2b %k0, %zmm0
331; WIN64-NEXT:    vmovdqa %ymm0, {{[0-9]+}}(%rsp)
332; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
333; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
334; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
335; WIN64-NEXT:    vzeroupper
336; WIN64-NEXT:    callq test_argv32i1helper
337; WIN64-NEXT:    nop
338; WIN64-NEXT:    movq %rbp, %rsp
339; WIN64-NEXT:    popq %r10
340; WIN64-NEXT:    popq %r11
341; WIN64-NEXT:    popq %rbp
342; WIN64-NEXT:    retq
343; WIN64-NEXT:    .seh_endproc
344;
345; LINUXOSX64-LABEL: test_argv32i1:
346; LINUXOSX64:       # %bb.0: # %entry
347; LINUXOSX64-NEXT:    subq $136, %rsp
348; LINUXOSX64-NEXT:    vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
349; LINUXOSX64-NEXT:    vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
350; LINUXOSX64-NEXT:    vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
351; LINUXOSX64-NEXT:    vmovaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
352; LINUXOSX64-NEXT:    vmovaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
353; LINUXOSX64-NEXT:    vmovaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
354; LINUXOSX64-NEXT:    vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
355; LINUXOSX64-NEXT:    vmovaps %xmm8, (%rsp) # 16-byte Spill
356; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 144
357; LINUXOSX64-NEXT:    .cfi_offset %xmm8, -144
358; LINUXOSX64-NEXT:    .cfi_offset %xmm9, -128
359; LINUXOSX64-NEXT:    .cfi_offset %xmm10, -112
360; LINUXOSX64-NEXT:    .cfi_offset %xmm11, -96
361; LINUXOSX64-NEXT:    .cfi_offset %xmm12, -80
362; LINUXOSX64-NEXT:    .cfi_offset %xmm13, -64
363; LINUXOSX64-NEXT:    .cfi_offset %xmm14, -48
364; LINUXOSX64-NEXT:    .cfi_offset %xmm15, -32
365; LINUXOSX64-NEXT:    kmovd %edx, %k0
366; LINUXOSX64-NEXT:    kmovd %ecx, %k1
367; LINUXOSX64-NEXT:    kmovd %eax, %k2
368; LINUXOSX64-NEXT:    vpmovm2b %k2, %zmm0
369; LINUXOSX64-NEXT:    vpmovm2b %k1, %zmm1
370; LINUXOSX64-NEXT:    vpmovm2b %k0, %zmm2
371; LINUXOSX64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
372; LINUXOSX64-NEXT:    # kill: def $ymm1 killed $ymm1 killed $zmm1
373; LINUXOSX64-NEXT:    # kill: def $ymm2 killed $ymm2 killed $zmm2
374; LINUXOSX64-NEXT:    callq test_argv32i1helper@PLT
375; LINUXOSX64-NEXT:    vmovaps (%rsp), %xmm8 # 16-byte Reload
376; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload
377; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload
378; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload
379; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload
380; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
381; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
382; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
383; LINUXOSX64-NEXT:    addq $136, %rsp
384; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
385; LINUXOSX64-NEXT:    vzeroupper
386; LINUXOSX64-NEXT:    retq
387entry:
388  %res = call i32 @test_argv32i1helper(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2)
389  ret i32 %res
390}
391
392; Test regcall when passing arguments of v32i1 type
393define dso_local i32 @caller_argv32i1() #0 {
394; X32-LABEL: caller_argv32i1:
395; X32:       # %bb.0: # %entry
396; X32-NEXT:    movl $1, %eax
397; X32-NEXT:    movl $1, %ecx
398; X32-NEXT:    movl $1, %edx
399; X32-NEXT:    calll _test_argv32i1
400; X32-NEXT:    retl
401;
402; WIN64-LABEL: caller_argv32i1:
403; WIN64:       # %bb.0: # %entry
404; WIN64-NEXT:    pushq %rsi
405; WIN64-NEXT:    .seh_pushreg %rsi
406; WIN64-NEXT:    pushq %rdi
407; WIN64-NEXT:    .seh_pushreg %rdi
408; WIN64-NEXT:    subq $40, %rsp
409; WIN64-NEXT:    .seh_stackalloc 40
410; WIN64-NEXT:    vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
411; WIN64-NEXT:    .seh_savexmm %xmm7, 16
412; WIN64-NEXT:    vmovaps %xmm6, (%rsp) # 16-byte Spill
413; WIN64-NEXT:    .seh_savexmm %xmm6, 0
414; WIN64-NEXT:    .seh_endprologue
415; WIN64-NEXT:    movl $1, %eax
416; WIN64-NEXT:    movl $1, %ecx
417; WIN64-NEXT:    movl $1, %edx
418; WIN64-NEXT:    callq test_argv32i1
419; WIN64-NEXT:    vmovaps (%rsp), %xmm6 # 16-byte Reload
420; WIN64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
421; WIN64-NEXT:    addq $40, %rsp
422; WIN64-NEXT:    popq %rdi
423; WIN64-NEXT:    popq %rsi
424; WIN64-NEXT:    retq
425; WIN64-NEXT:    .seh_endproc
426;
427; LINUXOSX64-LABEL: caller_argv32i1:
428; LINUXOSX64:       # %bb.0: # %entry
429; LINUXOSX64-NEXT:    pushq %rax
430; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
431; LINUXOSX64-NEXT:    movl $1, %eax
432; LINUXOSX64-NEXT:    movl $1, %ecx
433; LINUXOSX64-NEXT:    movl $1, %edx
434; LINUXOSX64-NEXT:    callq test_argv32i1
435; LINUXOSX64-NEXT:    popq %rcx
436; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
437; LINUXOSX64-NEXT:    retq
438entry:
439  %v0 = bitcast i32 1 to <32 x i1>
440  %call = call x86_regcallcc i32 @test_argv32i1(<32 x i1> %v0, <32 x i1> %v0, <32 x i1> %v0)
441  ret i32 %call
442}
443
444; Test regcall when returning v32i1 type
445define dso_local x86_regcallcc <32 x i1> @test_retv32i1()  {
446; X32-LABEL: test_retv32i1:
447; X32:       # %bb.0:
448; X32-NEXT:    movl $1, %eax
449; X32-NEXT:    retl
450;
451; CHECK64-LABEL: test_retv32i1:
452; CHECK64:       # %bb.0:
453; CHECK64-NEXT:    movl $1, %eax
454; CHECK64-NEXT:    retq
455  %a = bitcast i32 1 to <32 x i1>
456  ret <32 x i1> %a
457}
458
459; Test regcall when processing result of v32i1 type
460define dso_local i32 @caller_retv32i1() #0 {
461; X32-LABEL: caller_retv32i1:
462; X32:       # %bb.0: # %entry
463; X32-NEXT:    calll _test_retv32i1
464; X32-NEXT:    incl %eax
465; X32-NEXT:    retl
466;
467; WIN64-LABEL: caller_retv32i1:
468; WIN64:       # %bb.0: # %entry
469; WIN64-NEXT:    pushq %rsi
470; WIN64-NEXT:    .seh_pushreg %rsi
471; WIN64-NEXT:    pushq %rdi
472; WIN64-NEXT:    .seh_pushreg %rdi
473; WIN64-NEXT:    subq $40, %rsp
474; WIN64-NEXT:    .seh_stackalloc 40
475; WIN64-NEXT:    vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
476; WIN64-NEXT:    .seh_savexmm %xmm7, 16
477; WIN64-NEXT:    vmovaps %xmm6, (%rsp) # 16-byte Spill
478; WIN64-NEXT:    .seh_savexmm %xmm6, 0
479; WIN64-NEXT:    .seh_endprologue
480; WIN64-NEXT:    callq test_retv32i1
481; WIN64-NEXT:    incl %eax
482; WIN64-NEXT:    vmovaps (%rsp), %xmm6 # 16-byte Reload
483; WIN64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
484; WIN64-NEXT:    addq $40, %rsp
485; WIN64-NEXT:    popq %rdi
486; WIN64-NEXT:    popq %rsi
487; WIN64-NEXT:    retq
488; WIN64-NEXT:    .seh_endproc
489;
490; LINUXOSX64-LABEL: caller_retv32i1:
491; LINUXOSX64:       # %bb.0: # %entry
492; LINUXOSX64-NEXT:    pushq %rax
493; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
494; LINUXOSX64-NEXT:    callq test_retv32i1
495; LINUXOSX64-NEXT:    incl %eax
496; LINUXOSX64-NEXT:    popq %rcx
497; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
498; LINUXOSX64-NEXT:    retq
499entry:
500  %call = call x86_regcallcc <32 x i1> @test_retv32i1()
501  %c = bitcast <32 x i1> %call to i32
502  %add = add i32 %c, 1
503  ret i32 %add
504}
505
506; Test regcall when receiving arguments of v16i1 type
507declare i16 @test_argv16i1helper(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2)
508define dso_local x86_regcallcc i16 @test_argv16i1(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2)  {
509; X32-LABEL: test_argv16i1:
510; X32:       # %bb.0:
511; X32-NEXT:    subl $76, %esp
512; X32-NEXT:    vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
513; X32-NEXT:    vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
514; X32-NEXT:    vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
515; X32-NEXT:    vmovups %xmm4, (%esp) # 16-byte Spill
516; X32-NEXT:    kmovd %edx, %k0
517; X32-NEXT:    kmovd %ecx, %k1
518; X32-NEXT:    kmovd %eax, %k2
519; X32-NEXT:    vpmovm2b %k2, %zmm0
520; X32-NEXT:    vpmovm2b %k1, %zmm1
521; X32-NEXT:    vpmovm2b %k0, %zmm2
522; X32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
523; X32-NEXT:    # kill: def $xmm1 killed $xmm1 killed $zmm1
524; X32-NEXT:    # kill: def $xmm2 killed $xmm2 killed $zmm2
525; X32-NEXT:    vzeroupper
526; X32-NEXT:    calll _test_argv16i1helper
527; X32-NEXT:    vmovups (%esp), %xmm4 # 16-byte Reload
528; X32-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload
529; X32-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload
530; X32-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload
531; X32-NEXT:    addl $76, %esp
532; X32-NEXT:    retl
533;
534; WIN64-LABEL: test_argv16i1:
535; WIN64:       # %bb.0:
536; WIN64-NEXT:    pushq %r11
537; WIN64-NEXT:    .seh_pushreg %r11
538; WIN64-NEXT:    pushq %r10
539; WIN64-NEXT:    .seh_pushreg %r10
540; WIN64-NEXT:    subq $88, %rsp
541; WIN64-NEXT:    .seh_stackalloc 88
542; WIN64-NEXT:    .seh_endprologue
543; WIN64-NEXT:    kmovd %edx, %k0
544; WIN64-NEXT:    kmovd %eax, %k1
545; WIN64-NEXT:    kmovd %ecx, %k2
546; WIN64-NEXT:    vpmovm2b %k2, %zmm0
547; WIN64-NEXT:    vmovdqa %xmm0, {{[0-9]+}}(%rsp)
548; WIN64-NEXT:    vpmovm2b %k1, %zmm0
549; WIN64-NEXT:    vmovdqa %xmm0, {{[0-9]+}}(%rsp)
550; WIN64-NEXT:    vpmovm2b %k0, %zmm0
551; WIN64-NEXT:    vmovdqa %xmm0, {{[0-9]+}}(%rsp)
552; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
553; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
554; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
555; WIN64-NEXT:    vzeroupper
556; WIN64-NEXT:    callq test_argv16i1helper
557; WIN64-NEXT:    nop
558; WIN64-NEXT:    addq $88, %rsp
559; WIN64-NEXT:    popq %r10
560; WIN64-NEXT:    popq %r11
561; WIN64-NEXT:    retq
562; WIN64-NEXT:    .seh_endproc
563;
564; LINUXOSX64-LABEL: test_argv16i1:
565; LINUXOSX64:       # %bb.0:
566; LINUXOSX64-NEXT:    subq $136, %rsp
567; LINUXOSX64-NEXT:    vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
568; LINUXOSX64-NEXT:    vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
569; LINUXOSX64-NEXT:    vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
570; LINUXOSX64-NEXT:    vmovaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
571; LINUXOSX64-NEXT:    vmovaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
572; LINUXOSX64-NEXT:    vmovaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
573; LINUXOSX64-NEXT:    vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
574; LINUXOSX64-NEXT:    vmovaps %xmm8, (%rsp) # 16-byte Spill
575; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 144
576; LINUXOSX64-NEXT:    .cfi_offset %xmm8, -144
577; LINUXOSX64-NEXT:    .cfi_offset %xmm9, -128
578; LINUXOSX64-NEXT:    .cfi_offset %xmm10, -112
579; LINUXOSX64-NEXT:    .cfi_offset %xmm11, -96
580; LINUXOSX64-NEXT:    .cfi_offset %xmm12, -80
581; LINUXOSX64-NEXT:    .cfi_offset %xmm13, -64
582; LINUXOSX64-NEXT:    .cfi_offset %xmm14, -48
583; LINUXOSX64-NEXT:    .cfi_offset %xmm15, -32
584; LINUXOSX64-NEXT:    kmovd %edx, %k0
585; LINUXOSX64-NEXT:    kmovd %ecx, %k1
586; LINUXOSX64-NEXT:    kmovd %eax, %k2
587; LINUXOSX64-NEXT:    vpmovm2b %k2, %zmm0
588; LINUXOSX64-NEXT:    vpmovm2b %k1, %zmm1
589; LINUXOSX64-NEXT:    vpmovm2b %k0, %zmm2
590; LINUXOSX64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
591; LINUXOSX64-NEXT:    # kill: def $xmm1 killed $xmm1 killed $zmm1
592; LINUXOSX64-NEXT:    # kill: def $xmm2 killed $xmm2 killed $zmm2
593; LINUXOSX64-NEXT:    vzeroupper
594; LINUXOSX64-NEXT:    callq test_argv16i1helper@PLT
595; LINUXOSX64-NEXT:    vmovaps (%rsp), %xmm8 # 16-byte Reload
596; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload
597; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload
598; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload
599; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload
600; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
601; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
602; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
603; LINUXOSX64-NEXT:    addq $136, %rsp
604; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
605; LINUXOSX64-NEXT:    retq
606  %res = call i16 @test_argv16i1helper(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2)
607  ret i16 %res
608}
609
610; Test regcall when passing arguments of v16i1 type
611define dso_local i16 @caller_argv16i1() #0 {
612; X32-LABEL: caller_argv16i1:
613; X32:       # %bb.0: # %entry
614; X32-NEXT:    movl $1, %eax
615; X32-NEXT:    movl $1, %ecx
616; X32-NEXT:    movl $1, %edx
617; X32-NEXT:    calll _test_argv16i1
618; X32-NEXT:    retl
619;
620; WIN64-LABEL: caller_argv16i1:
621; WIN64:       # %bb.0: # %entry
622; WIN64-NEXT:    pushq %rsi
623; WIN64-NEXT:    .seh_pushreg %rsi
624; WIN64-NEXT:    pushq %rdi
625; WIN64-NEXT:    .seh_pushreg %rdi
626; WIN64-NEXT:    subq $40, %rsp
627; WIN64-NEXT:    .seh_stackalloc 40
628; WIN64-NEXT:    vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
629; WIN64-NEXT:    .seh_savexmm %xmm7, 16
630; WIN64-NEXT:    vmovaps %xmm6, (%rsp) # 16-byte Spill
631; WIN64-NEXT:    .seh_savexmm %xmm6, 0
632; WIN64-NEXT:    .seh_endprologue
633; WIN64-NEXT:    movl $1, %eax
634; WIN64-NEXT:    movl $1, %ecx
635; WIN64-NEXT:    movl $1, %edx
636; WIN64-NEXT:    callq test_argv16i1
637; WIN64-NEXT:    vmovaps (%rsp), %xmm6 # 16-byte Reload
638; WIN64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
639; WIN64-NEXT:    addq $40, %rsp
640; WIN64-NEXT:    popq %rdi
641; WIN64-NEXT:    popq %rsi
642; WIN64-NEXT:    retq
643; WIN64-NEXT:    .seh_endproc
644;
645; LINUXOSX64-LABEL: caller_argv16i1:
646; LINUXOSX64:       # %bb.0: # %entry
647; LINUXOSX64-NEXT:    pushq %rax
648; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
649; LINUXOSX64-NEXT:    movl $1, %eax
650; LINUXOSX64-NEXT:    movl $1, %ecx
651; LINUXOSX64-NEXT:    movl $1, %edx
652; LINUXOSX64-NEXT:    callq test_argv16i1
653; LINUXOSX64-NEXT:    popq %rcx
654; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
655; LINUXOSX64-NEXT:    retq
656entry:
657  %v0 = bitcast i16 1 to <16 x i1>
658  %call = call x86_regcallcc i16 @test_argv16i1(<16 x i1> %v0, <16 x i1> %v0, <16 x i1> %v0)
659  ret i16 %call
660}
661
662; Test regcall when returning v16i1 type
663define dso_local x86_regcallcc <16 x i1> @test_retv16i1()  {
664; X32-LABEL: test_retv16i1:
665; X32:       # %bb.0:
666; X32-NEXT:    movw $1, %ax
667; X32-NEXT:    retl
668;
669; CHECK64-LABEL: test_retv16i1:
670; CHECK64:       # %bb.0:
671; CHECK64-NEXT:    movw $1, %ax
672; CHECK64-NEXT:    retq
673  %a = bitcast i16 1 to <16 x i1>
674  ret <16 x i1> %a
675}
676
677; Test regcall when processing result of v16i1 type
678define dso_local i16 @caller_retv16i1() #0 {
679; X32-LABEL: caller_retv16i1:
680; X32:       # %bb.0: # %entry
681; X32-NEXT:    calll _test_retv16i1
682; X32-NEXT:    # kill: def $ax killed $ax def $eax
683; X32-NEXT:    incl %eax
684; X32-NEXT:    # kill: def $ax killed $ax killed $eax
685; X32-NEXT:    retl
686;
687; WIN64-LABEL: caller_retv16i1:
688; WIN64:       # %bb.0: # %entry
689; WIN64-NEXT:    pushq %rsi
690; WIN64-NEXT:    .seh_pushreg %rsi
691; WIN64-NEXT:    pushq %rdi
692; WIN64-NEXT:    .seh_pushreg %rdi
693; WIN64-NEXT:    subq $40, %rsp
694; WIN64-NEXT:    .seh_stackalloc 40
695; WIN64-NEXT:    vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
696; WIN64-NEXT:    .seh_savexmm %xmm7, 16
697; WIN64-NEXT:    vmovaps %xmm6, (%rsp) # 16-byte Spill
698; WIN64-NEXT:    .seh_savexmm %xmm6, 0
699; WIN64-NEXT:    .seh_endprologue
700; WIN64-NEXT:    callq test_retv16i1
701; WIN64-NEXT:    # kill: def $ax killed $ax def $eax
702; WIN64-NEXT:    incl %eax
703; WIN64-NEXT:    # kill: def $ax killed $ax killed $eax
704; WIN64-NEXT:    vmovaps (%rsp), %xmm6 # 16-byte Reload
705; WIN64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
706; WIN64-NEXT:    addq $40, %rsp
707; WIN64-NEXT:    popq %rdi
708; WIN64-NEXT:    popq %rsi
709; WIN64-NEXT:    retq
710; WIN64-NEXT:    .seh_endproc
711;
712; LINUXOSX64-LABEL: caller_retv16i1:
713; LINUXOSX64:       # %bb.0: # %entry
714; LINUXOSX64-NEXT:    pushq %rax
715; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
716; LINUXOSX64-NEXT:    callq test_retv16i1
717; LINUXOSX64-NEXT:    # kill: def $ax killed $ax def $eax
718; LINUXOSX64-NEXT:    incl %eax
719; LINUXOSX64-NEXT:    # kill: def $ax killed $ax killed $eax
720; LINUXOSX64-NEXT:    popq %rcx
721; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
722; LINUXOSX64-NEXT:    retq
723entry:
724  %call = call x86_regcallcc <16 x i1> @test_retv16i1()
725  %c = bitcast <16 x i1> %call to i16
726  %add = add i16 %c, 1
727  ret i16 %add
728}
729
730; Test regcall when receiving arguments of v8i1 type
731declare i8 @test_argv8i1helper(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2)
732define dso_local x86_regcallcc i8 @test_argv8i1(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2)  {
733; X32-LABEL: test_argv8i1:
734; X32:       # %bb.0:
735; X32-NEXT:    subl $76, %esp
736; X32-NEXT:    vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
737; X32-NEXT:    vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
738; X32-NEXT:    vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
739; X32-NEXT:    vmovups %xmm4, (%esp) # 16-byte Spill
740; X32-NEXT:    kmovd %edx, %k0
741; X32-NEXT:    kmovd %ecx, %k1
742; X32-NEXT:    kmovd %eax, %k2
743; X32-NEXT:    vpmovm2w %k2, %zmm0
744; X32-NEXT:    vpmovm2w %k1, %zmm1
745; X32-NEXT:    vpmovm2w %k0, %zmm2
746; X32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
747; X32-NEXT:    # kill: def $xmm1 killed $xmm1 killed $zmm1
748; X32-NEXT:    # kill: def $xmm2 killed $xmm2 killed $zmm2
749; X32-NEXT:    vzeroupper
750; X32-NEXT:    calll _test_argv8i1helper
751; X32-NEXT:    vmovups (%esp), %xmm4 # 16-byte Reload
752; X32-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload
753; X32-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload
754; X32-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload
755; X32-NEXT:    addl $76, %esp
756; X32-NEXT:    retl
757;
758; WIN64-LABEL: test_argv8i1:
759; WIN64:       # %bb.0:
760; WIN64-NEXT:    pushq %r11
761; WIN64-NEXT:    .seh_pushreg %r11
762; WIN64-NEXT:    pushq %r10
763; WIN64-NEXT:    .seh_pushreg %r10
764; WIN64-NEXT:    subq $88, %rsp
765; WIN64-NEXT:    .seh_stackalloc 88
766; WIN64-NEXT:    .seh_endprologue
767; WIN64-NEXT:    kmovd %edx, %k0
768; WIN64-NEXT:    kmovd %eax, %k1
769; WIN64-NEXT:    kmovd %ecx, %k2
770; WIN64-NEXT:    vpmovm2w %k2, %zmm0
771; WIN64-NEXT:    vmovdqa %xmm0, {{[0-9]+}}(%rsp)
772; WIN64-NEXT:    vpmovm2w %k1, %zmm0
773; WIN64-NEXT:    vmovdqa %xmm0, {{[0-9]+}}(%rsp)
774; WIN64-NEXT:    vpmovm2w %k0, %zmm0
775; WIN64-NEXT:    vmovdqa %xmm0, {{[0-9]+}}(%rsp)
776; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %rcx
777; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
778; WIN64-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
779; WIN64-NEXT:    vzeroupper
780; WIN64-NEXT:    callq test_argv8i1helper
781; WIN64-NEXT:    nop
782; WIN64-NEXT:    addq $88, %rsp
783; WIN64-NEXT:    popq %r10
784; WIN64-NEXT:    popq %r11
785; WIN64-NEXT:    retq
786; WIN64-NEXT:    .seh_endproc
787;
788; LINUXOSX64-LABEL: test_argv8i1:
789; LINUXOSX64:       # %bb.0:
790; LINUXOSX64-NEXT:    subq $136, %rsp
791; LINUXOSX64-NEXT:    vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
792; LINUXOSX64-NEXT:    vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
793; LINUXOSX64-NEXT:    vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
794; LINUXOSX64-NEXT:    vmovaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
795; LINUXOSX64-NEXT:    vmovaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
796; LINUXOSX64-NEXT:    vmovaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
797; LINUXOSX64-NEXT:    vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
798; LINUXOSX64-NEXT:    vmovaps %xmm8, (%rsp) # 16-byte Spill
799; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 144
800; LINUXOSX64-NEXT:    .cfi_offset %xmm8, -144
801; LINUXOSX64-NEXT:    .cfi_offset %xmm9, -128
802; LINUXOSX64-NEXT:    .cfi_offset %xmm10, -112
803; LINUXOSX64-NEXT:    .cfi_offset %xmm11, -96
804; LINUXOSX64-NEXT:    .cfi_offset %xmm12, -80
805; LINUXOSX64-NEXT:    .cfi_offset %xmm13, -64
806; LINUXOSX64-NEXT:    .cfi_offset %xmm14, -48
807; LINUXOSX64-NEXT:    .cfi_offset %xmm15, -32
808; LINUXOSX64-NEXT:    kmovd %edx, %k0
809; LINUXOSX64-NEXT:    kmovd %ecx, %k1
810; LINUXOSX64-NEXT:    kmovd %eax, %k2
811; LINUXOSX64-NEXT:    vpmovm2w %k2, %zmm0
812; LINUXOSX64-NEXT:    vpmovm2w %k1, %zmm1
813; LINUXOSX64-NEXT:    vpmovm2w %k0, %zmm2
814; LINUXOSX64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
815; LINUXOSX64-NEXT:    # kill: def $xmm1 killed $xmm1 killed $zmm1
816; LINUXOSX64-NEXT:    # kill: def $xmm2 killed $xmm2 killed $zmm2
817; LINUXOSX64-NEXT:    vzeroupper
818; LINUXOSX64-NEXT:    callq test_argv8i1helper@PLT
819; LINUXOSX64-NEXT:    vmovaps (%rsp), %xmm8 # 16-byte Reload
820; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload
821; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload
822; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload
823; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload
824; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload
825; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload
826; LINUXOSX64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload
827; LINUXOSX64-NEXT:    addq $136, %rsp
828; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
829; LINUXOSX64-NEXT:    retq
830  %res = call i8 @test_argv8i1helper(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2)
831  ret i8 %res
832}
833
834; Test regcall when passing arguments of v8i1 type
835define dso_local i8 @caller_argv8i1() #0 {
836; X32-LABEL: caller_argv8i1:
837; X32:       # %bb.0: # %entry
838; X32-NEXT:    movl $1, %eax
839; X32-NEXT:    movl $1, %ecx
840; X32-NEXT:    movl $1, %edx
841; X32-NEXT:    calll _test_argv8i1
842; X32-NEXT:    retl
843;
844; WIN64-LABEL: caller_argv8i1:
845; WIN64:       # %bb.0: # %entry
846; WIN64-NEXT:    pushq %rsi
847; WIN64-NEXT:    .seh_pushreg %rsi
848; WIN64-NEXT:    pushq %rdi
849; WIN64-NEXT:    .seh_pushreg %rdi
850; WIN64-NEXT:    subq $40, %rsp
851; WIN64-NEXT:    .seh_stackalloc 40
852; WIN64-NEXT:    vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
853; WIN64-NEXT:    .seh_savexmm %xmm7, 16
854; WIN64-NEXT:    vmovaps %xmm6, (%rsp) # 16-byte Spill
855; WIN64-NEXT:    .seh_savexmm %xmm6, 0
856; WIN64-NEXT:    .seh_endprologue
857; WIN64-NEXT:    movl $1, %eax
858; WIN64-NEXT:    movl $1, %ecx
859; WIN64-NEXT:    movl $1, %edx
860; WIN64-NEXT:    callq test_argv8i1
861; WIN64-NEXT:    vmovaps (%rsp), %xmm6 # 16-byte Reload
862; WIN64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
863; WIN64-NEXT:    addq $40, %rsp
864; WIN64-NEXT:    popq %rdi
865; WIN64-NEXT:    popq %rsi
866; WIN64-NEXT:    retq
867; WIN64-NEXT:    .seh_endproc
868;
869; LINUXOSX64-LABEL: caller_argv8i1:
870; LINUXOSX64:       # %bb.0: # %entry
871; LINUXOSX64-NEXT:    pushq %rax
872; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
873; LINUXOSX64-NEXT:    movl $1, %eax
874; LINUXOSX64-NEXT:    movl $1, %ecx
875; LINUXOSX64-NEXT:    movl $1, %edx
876; LINUXOSX64-NEXT:    callq test_argv8i1
877; LINUXOSX64-NEXT:    popq %rcx
878; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
879; LINUXOSX64-NEXT:    retq
880entry:
881  %v0 = bitcast i8 1 to <8 x i1>
882  %call = call x86_regcallcc i8 @test_argv8i1(<8 x i1> %v0, <8 x i1> %v0, <8 x i1> %v0)
883  ret i8 %call
884}
885
886; Test regcall when returning v8i1 type
887define dso_local x86_regcallcc <8 x i1> @test_retv8i1()  {
888; X32-LABEL: test_retv8i1:
889; X32:       # %bb.0:
890; X32-NEXT:    movb $1, %al
891; X32-NEXT:    retl
892;
893; CHECK64-LABEL: test_retv8i1:
894; CHECK64:       # %bb.0:
895; CHECK64-NEXT:    movb $1, %al
896; CHECK64-NEXT:    retq
897  %a = bitcast i8 1 to <8 x i1>
898  ret <8 x i1> %a
899}
900
901; Test regcall when processing result of v8i1 type
902define dso_local <8 x i1> @caller_retv8i1() #0 {
903; X32-LABEL: caller_retv8i1:
904; X32:       # %bb.0: # %entry
905; X32-NEXT:    calll _test_retv8i1
906; X32-NEXT:    # kill: def $al killed $al def $eax
907; X32-NEXT:    kmovd %eax, %k0
908; X32-NEXT:    vpmovm2w %k0, %zmm0
909; X32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
910; X32-NEXT:    vzeroupper
911; X32-NEXT:    retl
912;
913; WIN64-LABEL: caller_retv8i1:
914; WIN64:       # %bb.0: # %entry
915; WIN64-NEXT:    pushq %rsi
916; WIN64-NEXT:    .seh_pushreg %rsi
917; WIN64-NEXT:    pushq %rdi
918; WIN64-NEXT:    .seh_pushreg %rdi
919; WIN64-NEXT:    subq $40, %rsp
920; WIN64-NEXT:    .seh_stackalloc 40
921; WIN64-NEXT:    vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
922; WIN64-NEXT:    .seh_savexmm %xmm7, 16
923; WIN64-NEXT:    vmovaps %xmm6, (%rsp) # 16-byte Spill
924; WIN64-NEXT:    .seh_savexmm %xmm6, 0
925; WIN64-NEXT:    .seh_endprologue
926; WIN64-NEXT:    callq test_retv8i1
927; WIN64-NEXT:    # kill: def $al killed $al def $eax
928; WIN64-NEXT:    kmovd %eax, %k0
929; WIN64-NEXT:    vpmovm2w %k0, %zmm0
930; WIN64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
931; WIN64-NEXT:    vmovaps (%rsp), %xmm6 # 16-byte Reload
932; WIN64-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
933; WIN64-NEXT:    addq $40, %rsp
934; WIN64-NEXT:    popq %rdi
935; WIN64-NEXT:    popq %rsi
936; WIN64-NEXT:    vzeroupper
937; WIN64-NEXT:    retq
938; WIN64-NEXT:    .seh_endproc
939;
940; LINUXOSX64-LABEL: caller_retv8i1:
941; LINUXOSX64:       # %bb.0: # %entry
942; LINUXOSX64-NEXT:    pushq %rax
943; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 16
944; LINUXOSX64-NEXT:    callq test_retv8i1
945; LINUXOSX64-NEXT:    # kill: def $al killed $al def $eax
946; LINUXOSX64-NEXT:    kmovd %eax, %k0
947; LINUXOSX64-NEXT:    vpmovm2w %k0, %zmm0
948; LINUXOSX64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
949; LINUXOSX64-NEXT:    popq %rax
950; LINUXOSX64-NEXT:    .cfi_def_cfa_offset 8
951; LINUXOSX64-NEXT:    vzeroupper
952; LINUXOSX64-NEXT:    retq
953entry:
954  %call = call x86_regcallcc <8 x i1> @test_retv8i1()
955  ret <8 x i1> %call
956}
957
958