1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i386-pc-win32 -mattr=+avx512bw | FileCheck %s --check-prefix=X32 3; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK64 --check-prefix=WIN64 4; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+avx512bw | FileCheck %s --check-prefix=CHECK64 --check-prefix=LINUXOSX64 5 6; Test regcall when receiving arguments of v64i1 type 7define dso_local x86_regcallcc i64 @test_argv64i1(<64 x i1> %x0, <64 x i1> %x1, <64 x i1> %x2, <64 x i1> %x3, <64 x i1> %x4, <64 x i1> %x5, <64 x i1> %x6, <64 x i1> %x7, <64 x i1> %x8, <64 x i1> %x9, <64 x i1> %x10, <64 x i1> %x11, <64 x i1> %x12) { 8; X32-LABEL: test_argv64i1: 9; X32: # %bb.0: 10; X32-NEXT: addl %edx, %eax 11; X32-NEXT: adcl %edi, %ecx 12; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 13; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx 14; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 15; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx 16; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 17; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx 18; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 19; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx 20; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 21; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx 22; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 23; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx 24; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 25; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx 26; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 27; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx 28; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 29; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx 30; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 31; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx 32; X32-NEXT: addl {{[0-9]+}}(%esp), %eax 33; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx 34; X32-NEXT: retl 35; 36; WIN64-LABEL: test_argv64i1: 37; WIN64: # %bb.0: 38; WIN64-NEXT: addq %rcx, %rax 39; WIN64-NEXT: addq %rdx, %rax 40; WIN64-NEXT: addq %rdi, %rax 41; WIN64-NEXT: addq %rsi, %rax 42; WIN64-NEXT: addq %r8, %rax 43; WIN64-NEXT: addq %r9, %rax 44; WIN64-NEXT: addq %r10, %rax 45; WIN64-NEXT: addq %r11, %rax 46; WIN64-NEXT: addq %r12, %rax 47; WIN64-NEXT: addq %r14, %rax 48; WIN64-NEXT: addq %r15, %rax 49; WIN64-NEXT: addq {{[0-9]+}}(%rsp), %rax 50; WIN64-NEXT: retq 51; 52; LINUXOSX64-LABEL: test_argv64i1: 53; LINUXOSX64: # %bb.0: 54; LINUXOSX64-NEXT: addq %rcx, %rax 55; LINUXOSX64-NEXT: addq %rdx, %rax 56; LINUXOSX64-NEXT: addq %rdi, %rax 57; LINUXOSX64-NEXT: addq %rsi, %rax 58; LINUXOSX64-NEXT: addq %r8, %rax 59; LINUXOSX64-NEXT: addq %r9, %rax 60; LINUXOSX64-NEXT: addq %r12, %rax 61; LINUXOSX64-NEXT: addq %r13, %rax 62; LINUXOSX64-NEXT: addq %r14, %rax 63; LINUXOSX64-NEXT: addq %r15, %rax 64; LINUXOSX64-NEXT: addq {{[0-9]+}}(%rsp), %rax 65; LINUXOSX64-NEXT: addq {{[0-9]+}}(%rsp), %rax 66; LINUXOSX64-NEXT: retq 67 %y0 = bitcast <64 x i1> %x0 to i64 68 %y1 = bitcast <64 x i1> %x1 to i64 69 %y2 = bitcast <64 x i1> %x2 to i64 70 %y3 = bitcast <64 x i1> %x3 to i64 71 %y4 = bitcast <64 x i1> %x4 to i64 72 %y5 = bitcast <64 x i1> %x5 to i64 73 %y6 = bitcast <64 x i1> %x6 to i64 74 %y7 = bitcast <64 x i1> %x7 to i64 75 %y8 = bitcast <64 x i1> %x8 to i64 76 %y9 = bitcast <64 x i1> %x9 to i64 77 %y10 = bitcast <64 x i1> %x10 to i64 78 %y11 = bitcast <64 x i1> %x11 to i64 79 %y12 = bitcast <64 x i1> %x12 to i64 80 %add1 = add i64 %y0, %y1 81 %add2 = add i64 %add1, %y2 82 %add3 = add i64 %add2, %y3 83 %add4 = add i64 %add3, %y4 84 %add5 = add i64 %add4, %y5 85 %add6 = add i64 %add5, %y6 86 %add7 = add i64 %add6, %y7 87 %add8 = add i64 %add7, %y8 88 %add9 = add i64 %add8, %y9 89 %add10 = add i64 %add9, %y10 90 %add11 = add i64 %add10, %y11 91 %add12 = add i64 %add11, %y12 92 ret i64 %add12 93} 94 95; Test regcall when passing arguments of v64i1 type 96define dso_local i64 @caller_argv64i1() #0 { 97; X32-LABEL: caller_argv64i1: 98; X32: # %bb.0: # %entry 99; X32-NEXT: pushl %edi 100; X32-NEXT: subl $88, %esp 101; X32-NEXT: vmovaps {{.*#+}} xmm0 = [2,1,2,1] 102; X32-NEXT: vmovups %xmm0, {{[0-9]+}}(%esp) 103; X32-NEXT: vmovaps {{.*#+}} zmm0 = [2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1] 104; X32-NEXT: vmovups %zmm0, (%esp) 105; X32-NEXT: movl $1, {{[0-9]+}}(%esp) 106; X32-NEXT: movl $2, {{[0-9]+}}(%esp) 107; X32-NEXT: movl $2, %eax 108; X32-NEXT: movl $1, %ecx 109; X32-NEXT: movl $2, %edx 110; X32-NEXT: movl $1, %edi 111; X32-NEXT: vzeroupper 112; X32-NEXT: calll _test_argv64i1 113; X32-NEXT: movl %ecx, %edx 114; X32-NEXT: addl $88, %esp 115; X32-NEXT: popl %edi 116; X32-NEXT: retl 117; 118; WIN64-LABEL: caller_argv64i1: 119; WIN64: # %bb.0: # %entry 120; WIN64-NEXT: pushq %r15 121; WIN64-NEXT: .seh_pushreg %r15 122; WIN64-NEXT: pushq %r14 123; WIN64-NEXT: .seh_pushreg %r14 124; WIN64-NEXT: pushq %r12 125; WIN64-NEXT: .seh_pushreg %r12 126; WIN64-NEXT: pushq %rsi 127; WIN64-NEXT: .seh_pushreg %rsi 128; WIN64-NEXT: pushq %rdi 129; WIN64-NEXT: .seh_pushreg %rdi 130; WIN64-NEXT: subq $48, %rsp 131; WIN64-NEXT: .seh_stackalloc 48 132; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 133; WIN64-NEXT: .seh_savexmm %xmm7, 32 134; WIN64-NEXT: vmovaps %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 135; WIN64-NEXT: .seh_savexmm %xmm6, 16 136; WIN64-NEXT: .seh_endprologue 137; WIN64-NEXT: movabsq $4294967298, %rax # imm = 0x100000002 138; WIN64-NEXT: movq %rax, (%rsp) 139; WIN64-NEXT: movq %rax, %rcx 140; WIN64-NEXT: movq %rax, %rdx 141; WIN64-NEXT: movq %rax, %rdi 142; WIN64-NEXT: movq %rax, %r8 143; WIN64-NEXT: movq %rax, %r9 144; WIN64-NEXT: movq %rax, %r10 145; WIN64-NEXT: movq %rax, %r11 146; WIN64-NEXT: movq %rax, %r12 147; WIN64-NEXT: movq %rax, %r14 148; WIN64-NEXT: movq %rax, %r15 149; WIN64-NEXT: movq %rax, %rsi 150; WIN64-NEXT: callq test_argv64i1 151; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm6 # 16-byte Reload 152; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload 153; WIN64-NEXT: addq $48, %rsp 154; WIN64-NEXT: popq %rdi 155; WIN64-NEXT: popq %rsi 156; WIN64-NEXT: popq %r12 157; WIN64-NEXT: popq %r14 158; WIN64-NEXT: popq %r15 159; WIN64-NEXT: retq 160; WIN64-NEXT: .seh_endproc 161; 162; LINUXOSX64-LABEL: caller_argv64i1: 163; LINUXOSX64: # %bb.0: # %entry 164; LINUXOSX64-NEXT: pushq %r15 165; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 166; LINUXOSX64-NEXT: pushq %r14 167; LINUXOSX64-NEXT: .cfi_def_cfa_offset 24 168; LINUXOSX64-NEXT: pushq %r13 169; LINUXOSX64-NEXT: .cfi_def_cfa_offset 32 170; LINUXOSX64-NEXT: pushq %r12 171; LINUXOSX64-NEXT: .cfi_def_cfa_offset 40 172; LINUXOSX64-NEXT: pushq %rax 173; LINUXOSX64-NEXT: .cfi_def_cfa_offset 48 174; LINUXOSX64-NEXT: .cfi_offset %r12, -40 175; LINUXOSX64-NEXT: .cfi_offset %r13, -32 176; LINUXOSX64-NEXT: .cfi_offset %r14, -24 177; LINUXOSX64-NEXT: .cfi_offset %r15, -16 178; LINUXOSX64-NEXT: movabsq $4294967298, %rax # imm = 0x100000002 179; LINUXOSX64-NEXT: movq %rax, %rcx 180; LINUXOSX64-NEXT: movq %rax, %rdx 181; LINUXOSX64-NEXT: movq %rax, %rdi 182; LINUXOSX64-NEXT: movq %rax, %r8 183; LINUXOSX64-NEXT: movq %rax, %r9 184; LINUXOSX64-NEXT: movq %rax, %r12 185; LINUXOSX64-NEXT: movq %rax, %r13 186; LINUXOSX64-NEXT: movq %rax, %r14 187; LINUXOSX64-NEXT: movq %rax, %r15 188; LINUXOSX64-NEXT: movq %rax, %rsi 189; LINUXOSX64-NEXT: pushq %rax 190; LINUXOSX64-NEXT: .cfi_adjust_cfa_offset 8 191; LINUXOSX64-NEXT: pushq %rax 192; LINUXOSX64-NEXT: .cfi_adjust_cfa_offset 8 193; LINUXOSX64-NEXT: callq test_argv64i1 194; LINUXOSX64-NEXT: addq $24, %rsp 195; LINUXOSX64-NEXT: .cfi_adjust_cfa_offset -24 196; LINUXOSX64-NEXT: popq %r12 197; LINUXOSX64-NEXT: .cfi_def_cfa_offset 32 198; LINUXOSX64-NEXT: popq %r13 199; LINUXOSX64-NEXT: .cfi_def_cfa_offset 24 200; LINUXOSX64-NEXT: popq %r14 201; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 202; LINUXOSX64-NEXT: popq %r15 203; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 204; LINUXOSX64-NEXT: retq 205entry: 206 %v0 = bitcast i64 4294967298 to <64 x i1> 207 %call = call x86_regcallcc i64 @test_argv64i1(<64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0, 208 <64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0, 209 <64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0, 210 <64 x i1> %v0, <64 x i1> %v0, <64 x i1> %v0, 211 <64 x i1> %v0) 212 ret i64 %call 213} 214 215; Test regcall when returning v64i1 type 216define dso_local x86_regcallcc <64 x i1> @test_retv64i1() { 217; X32-LABEL: test_retv64i1: 218; X32: # %bb.0: 219; X32-NEXT: movl $2, %eax 220; X32-NEXT: movl $1, %ecx 221; X32-NEXT: retl 222; 223; CHECK64-LABEL: test_retv64i1: 224; CHECK64: # %bb.0: 225; CHECK64-NEXT: movabsq $4294967298, %rax # imm = 0x100000002 226; CHECK64-NEXT: retq 227 %a = bitcast i64 4294967298 to <64 x i1> 228 ret <64 x i1> %a 229} 230 231; Test regcall when processing result of v64i1 type 232define dso_local <64 x i1> @caller_retv64i1() #0 { 233; X32-LABEL: caller_retv64i1: 234; X32: # %bb.0: # %entry 235; X32-NEXT: calll _test_retv64i1 236; X32-NEXT: kmovd %eax, %k0 237; X32-NEXT: kmovd %ecx, %k1 238; X32-NEXT: kunpckdq %k0, %k1, %k0 239; X32-NEXT: vpmovm2b %k0, %zmm0 240; X32-NEXT: retl 241; 242; WIN64-LABEL: caller_retv64i1: 243; WIN64: # %bb.0: # %entry 244; WIN64-NEXT: pushq %rsi 245; WIN64-NEXT: .seh_pushreg %rsi 246; WIN64-NEXT: pushq %rdi 247; WIN64-NEXT: .seh_pushreg %rdi 248; WIN64-NEXT: subq $40, %rsp 249; WIN64-NEXT: .seh_stackalloc 40 250; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 251; WIN64-NEXT: .seh_savexmm %xmm7, 16 252; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill 253; WIN64-NEXT: .seh_savexmm %xmm6, 0 254; WIN64-NEXT: .seh_endprologue 255; WIN64-NEXT: callq test_retv64i1 256; WIN64-NEXT: kmovq %rax, %k0 257; WIN64-NEXT: vpmovm2b %k0, %zmm0 258; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload 259; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload 260; WIN64-NEXT: addq $40, %rsp 261; WIN64-NEXT: popq %rdi 262; WIN64-NEXT: popq %rsi 263; WIN64-NEXT: retq 264; WIN64-NEXT: .seh_endproc 265; 266; LINUXOSX64-LABEL: caller_retv64i1: 267; LINUXOSX64: # %bb.0: # %entry 268; LINUXOSX64-NEXT: pushq %rax 269; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 270; LINUXOSX64-NEXT: callq test_retv64i1 271; LINUXOSX64-NEXT: kmovq %rax, %k0 272; LINUXOSX64-NEXT: vpmovm2b %k0, %zmm0 273; LINUXOSX64-NEXT: popq %rax 274; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 275; LINUXOSX64-NEXT: retq 276entry: 277 %call = call x86_regcallcc <64 x i1> @test_retv64i1() 278 ret <64 x i1> %call 279} 280 281; Test regcall when receiving arguments of v32i1 type 282declare i32 @test_argv32i1helper(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2) 283define dso_local x86_regcallcc i32 @test_argv32i1(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2) { 284; X32-LABEL: test_argv32i1: 285; X32: # %bb.0: # %entry 286; X32-NEXT: subl $76, %esp 287; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 288; X32-NEXT: vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 289; X32-NEXT: vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 290; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill 291; X32-NEXT: kmovd %edx, %k0 292; X32-NEXT: kmovd %ecx, %k1 293; X32-NEXT: kmovd %eax, %k2 294; X32-NEXT: vpmovm2b %k2, %zmm0 295; X32-NEXT: vpmovm2b %k1, %zmm1 296; X32-NEXT: vpmovm2b %k0, %zmm2 297; X32-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 298; X32-NEXT: # kill: def $ymm1 killed $ymm1 killed $zmm1 299; X32-NEXT: # kill: def $ymm2 killed $ymm2 killed $zmm2 300; X32-NEXT: calll _test_argv32i1helper 301; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload 302; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload 303; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload 304; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload 305; X32-NEXT: addl $76, %esp 306; X32-NEXT: vzeroupper 307; X32-NEXT: retl 308; 309; WIN64-LABEL: test_argv32i1: 310; WIN64: # %bb.0: # %entry 311; WIN64-NEXT: pushq %rbp 312; WIN64-NEXT: .seh_pushreg %rbp 313; WIN64-NEXT: pushq %r11 314; WIN64-NEXT: .seh_pushreg %r11 315; WIN64-NEXT: pushq %r10 316; WIN64-NEXT: .seh_pushreg %r10 317; WIN64-NEXT: subq $128, %rsp 318; WIN64-NEXT: .seh_stackalloc 128 319; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rbp 320; WIN64-NEXT: .seh_setframe %rbp, 128 321; WIN64-NEXT: .seh_endprologue 322; WIN64-NEXT: andq $-32, %rsp 323; WIN64-NEXT: kmovd %edx, %k0 324; WIN64-NEXT: kmovd %eax, %k1 325; WIN64-NEXT: kmovd %ecx, %k2 326; WIN64-NEXT: vpmovm2b %k2, %zmm0 327; WIN64-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp) 328; WIN64-NEXT: vpmovm2b %k1, %zmm0 329; WIN64-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp) 330; WIN64-NEXT: vpmovm2b %k0, %zmm0 331; WIN64-NEXT: vmovdqa %ymm0, {{[0-9]+}}(%rsp) 332; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx 333; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx 334; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %r8 335; WIN64-NEXT: vzeroupper 336; WIN64-NEXT: callq test_argv32i1helper 337; WIN64-NEXT: nop 338; WIN64-NEXT: movq %rbp, %rsp 339; WIN64-NEXT: popq %r10 340; WIN64-NEXT: popq %r11 341; WIN64-NEXT: popq %rbp 342; WIN64-NEXT: retq 343; WIN64-NEXT: .seh_endproc 344; 345; LINUXOSX64-LABEL: test_argv32i1: 346; LINUXOSX64: # %bb.0: # %entry 347; LINUXOSX64-NEXT: subq $136, %rsp 348; LINUXOSX64-NEXT: vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 349; LINUXOSX64-NEXT: vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 350; LINUXOSX64-NEXT: vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 351; LINUXOSX64-NEXT: vmovaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 352; LINUXOSX64-NEXT: vmovaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 353; LINUXOSX64-NEXT: vmovaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 354; LINUXOSX64-NEXT: vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 355; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill 356; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144 357; LINUXOSX64-NEXT: .cfi_offset %xmm8, -144 358; LINUXOSX64-NEXT: .cfi_offset %xmm9, -128 359; LINUXOSX64-NEXT: .cfi_offset %xmm10, -112 360; LINUXOSX64-NEXT: .cfi_offset %xmm11, -96 361; LINUXOSX64-NEXT: .cfi_offset %xmm12, -80 362; LINUXOSX64-NEXT: .cfi_offset %xmm13, -64 363; LINUXOSX64-NEXT: .cfi_offset %xmm14, -48 364; LINUXOSX64-NEXT: .cfi_offset %xmm15, -32 365; LINUXOSX64-NEXT: kmovd %edx, %k0 366; LINUXOSX64-NEXT: kmovd %ecx, %k1 367; LINUXOSX64-NEXT: kmovd %eax, %k2 368; LINUXOSX64-NEXT: vpmovm2b %k2, %zmm0 369; LINUXOSX64-NEXT: vpmovm2b %k1, %zmm1 370; LINUXOSX64-NEXT: vpmovm2b %k0, %zmm2 371; LINUXOSX64-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 372; LINUXOSX64-NEXT: # kill: def $ymm1 killed $ymm1 killed $zmm1 373; LINUXOSX64-NEXT: # kill: def $ymm2 killed $ymm2 killed $zmm2 374; LINUXOSX64-NEXT: callq test_argv32i1helper@PLT 375; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload 376; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload 377; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload 378; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload 379; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload 380; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload 381; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload 382; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload 383; LINUXOSX64-NEXT: addq $136, %rsp 384; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 385; LINUXOSX64-NEXT: vzeroupper 386; LINUXOSX64-NEXT: retq 387entry: 388 %res = call i32 @test_argv32i1helper(<32 x i1> %x0, <32 x i1> %x1, <32 x i1> %x2) 389 ret i32 %res 390} 391 392; Test regcall when passing arguments of v32i1 type 393define dso_local i32 @caller_argv32i1() #0 { 394; X32-LABEL: caller_argv32i1: 395; X32: # %bb.0: # %entry 396; X32-NEXT: movl $1, %eax 397; X32-NEXT: movl $1, %ecx 398; X32-NEXT: movl $1, %edx 399; X32-NEXT: calll _test_argv32i1 400; X32-NEXT: retl 401; 402; WIN64-LABEL: caller_argv32i1: 403; WIN64: # %bb.0: # %entry 404; WIN64-NEXT: pushq %rsi 405; WIN64-NEXT: .seh_pushreg %rsi 406; WIN64-NEXT: pushq %rdi 407; WIN64-NEXT: .seh_pushreg %rdi 408; WIN64-NEXT: subq $40, %rsp 409; WIN64-NEXT: .seh_stackalloc 40 410; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 411; WIN64-NEXT: .seh_savexmm %xmm7, 16 412; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill 413; WIN64-NEXT: .seh_savexmm %xmm6, 0 414; WIN64-NEXT: .seh_endprologue 415; WIN64-NEXT: movl $1, %eax 416; WIN64-NEXT: movl $1, %ecx 417; WIN64-NEXT: movl $1, %edx 418; WIN64-NEXT: callq test_argv32i1 419; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload 420; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload 421; WIN64-NEXT: addq $40, %rsp 422; WIN64-NEXT: popq %rdi 423; WIN64-NEXT: popq %rsi 424; WIN64-NEXT: retq 425; WIN64-NEXT: .seh_endproc 426; 427; LINUXOSX64-LABEL: caller_argv32i1: 428; LINUXOSX64: # %bb.0: # %entry 429; LINUXOSX64-NEXT: pushq %rax 430; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 431; LINUXOSX64-NEXT: movl $1, %eax 432; LINUXOSX64-NEXT: movl $1, %ecx 433; LINUXOSX64-NEXT: movl $1, %edx 434; LINUXOSX64-NEXT: callq test_argv32i1 435; LINUXOSX64-NEXT: popq %rcx 436; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 437; LINUXOSX64-NEXT: retq 438entry: 439 %v0 = bitcast i32 1 to <32 x i1> 440 %call = call x86_regcallcc i32 @test_argv32i1(<32 x i1> %v0, <32 x i1> %v0, <32 x i1> %v0) 441 ret i32 %call 442} 443 444; Test regcall when returning v32i1 type 445define dso_local x86_regcallcc <32 x i1> @test_retv32i1() { 446; X32-LABEL: test_retv32i1: 447; X32: # %bb.0: 448; X32-NEXT: movl $1, %eax 449; X32-NEXT: retl 450; 451; CHECK64-LABEL: test_retv32i1: 452; CHECK64: # %bb.0: 453; CHECK64-NEXT: movl $1, %eax 454; CHECK64-NEXT: retq 455 %a = bitcast i32 1 to <32 x i1> 456 ret <32 x i1> %a 457} 458 459; Test regcall when processing result of v32i1 type 460define dso_local i32 @caller_retv32i1() #0 { 461; X32-LABEL: caller_retv32i1: 462; X32: # %bb.0: # %entry 463; X32-NEXT: calll _test_retv32i1 464; X32-NEXT: incl %eax 465; X32-NEXT: retl 466; 467; WIN64-LABEL: caller_retv32i1: 468; WIN64: # %bb.0: # %entry 469; WIN64-NEXT: pushq %rsi 470; WIN64-NEXT: .seh_pushreg %rsi 471; WIN64-NEXT: pushq %rdi 472; WIN64-NEXT: .seh_pushreg %rdi 473; WIN64-NEXT: subq $40, %rsp 474; WIN64-NEXT: .seh_stackalloc 40 475; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 476; WIN64-NEXT: .seh_savexmm %xmm7, 16 477; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill 478; WIN64-NEXT: .seh_savexmm %xmm6, 0 479; WIN64-NEXT: .seh_endprologue 480; WIN64-NEXT: callq test_retv32i1 481; WIN64-NEXT: incl %eax 482; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload 483; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload 484; WIN64-NEXT: addq $40, %rsp 485; WIN64-NEXT: popq %rdi 486; WIN64-NEXT: popq %rsi 487; WIN64-NEXT: retq 488; WIN64-NEXT: .seh_endproc 489; 490; LINUXOSX64-LABEL: caller_retv32i1: 491; LINUXOSX64: # %bb.0: # %entry 492; LINUXOSX64-NEXT: pushq %rax 493; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 494; LINUXOSX64-NEXT: callq test_retv32i1 495; LINUXOSX64-NEXT: incl %eax 496; LINUXOSX64-NEXT: popq %rcx 497; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 498; LINUXOSX64-NEXT: retq 499entry: 500 %call = call x86_regcallcc <32 x i1> @test_retv32i1() 501 %c = bitcast <32 x i1> %call to i32 502 %add = add i32 %c, 1 503 ret i32 %add 504} 505 506; Test regcall when receiving arguments of v16i1 type 507declare i16 @test_argv16i1helper(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2) 508define dso_local x86_regcallcc i16 @test_argv16i1(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2) { 509; X32-LABEL: test_argv16i1: 510; X32: # %bb.0: 511; X32-NEXT: subl $76, %esp 512; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 513; X32-NEXT: vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 514; X32-NEXT: vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 515; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill 516; X32-NEXT: kmovd %edx, %k0 517; X32-NEXT: kmovd %ecx, %k1 518; X32-NEXT: kmovd %eax, %k2 519; X32-NEXT: vpmovm2b %k2, %zmm0 520; X32-NEXT: vpmovm2b %k1, %zmm1 521; X32-NEXT: vpmovm2b %k0, %zmm2 522; X32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 523; X32-NEXT: # kill: def $xmm1 killed $xmm1 killed $zmm1 524; X32-NEXT: # kill: def $xmm2 killed $xmm2 killed $zmm2 525; X32-NEXT: vzeroupper 526; X32-NEXT: calll _test_argv16i1helper 527; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload 528; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload 529; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload 530; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload 531; X32-NEXT: addl $76, %esp 532; X32-NEXT: retl 533; 534; WIN64-LABEL: test_argv16i1: 535; WIN64: # %bb.0: 536; WIN64-NEXT: pushq %r11 537; WIN64-NEXT: .seh_pushreg %r11 538; WIN64-NEXT: pushq %r10 539; WIN64-NEXT: .seh_pushreg %r10 540; WIN64-NEXT: subq $88, %rsp 541; WIN64-NEXT: .seh_stackalloc 88 542; WIN64-NEXT: .seh_endprologue 543; WIN64-NEXT: kmovd %edx, %k0 544; WIN64-NEXT: kmovd %eax, %k1 545; WIN64-NEXT: kmovd %ecx, %k2 546; WIN64-NEXT: vpmovm2b %k2, %zmm0 547; WIN64-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) 548; WIN64-NEXT: vpmovm2b %k1, %zmm0 549; WIN64-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) 550; WIN64-NEXT: vpmovm2b %k0, %zmm0 551; WIN64-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) 552; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx 553; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx 554; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %r8 555; WIN64-NEXT: vzeroupper 556; WIN64-NEXT: callq test_argv16i1helper 557; WIN64-NEXT: nop 558; WIN64-NEXT: addq $88, %rsp 559; WIN64-NEXT: popq %r10 560; WIN64-NEXT: popq %r11 561; WIN64-NEXT: retq 562; WIN64-NEXT: .seh_endproc 563; 564; LINUXOSX64-LABEL: test_argv16i1: 565; LINUXOSX64: # %bb.0: 566; LINUXOSX64-NEXT: subq $136, %rsp 567; LINUXOSX64-NEXT: vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 568; LINUXOSX64-NEXT: vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 569; LINUXOSX64-NEXT: vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 570; LINUXOSX64-NEXT: vmovaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 571; LINUXOSX64-NEXT: vmovaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 572; LINUXOSX64-NEXT: vmovaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 573; LINUXOSX64-NEXT: vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 574; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill 575; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144 576; LINUXOSX64-NEXT: .cfi_offset %xmm8, -144 577; LINUXOSX64-NEXT: .cfi_offset %xmm9, -128 578; LINUXOSX64-NEXT: .cfi_offset %xmm10, -112 579; LINUXOSX64-NEXT: .cfi_offset %xmm11, -96 580; LINUXOSX64-NEXT: .cfi_offset %xmm12, -80 581; LINUXOSX64-NEXT: .cfi_offset %xmm13, -64 582; LINUXOSX64-NEXT: .cfi_offset %xmm14, -48 583; LINUXOSX64-NEXT: .cfi_offset %xmm15, -32 584; LINUXOSX64-NEXT: kmovd %edx, %k0 585; LINUXOSX64-NEXT: kmovd %ecx, %k1 586; LINUXOSX64-NEXT: kmovd %eax, %k2 587; LINUXOSX64-NEXT: vpmovm2b %k2, %zmm0 588; LINUXOSX64-NEXT: vpmovm2b %k1, %zmm1 589; LINUXOSX64-NEXT: vpmovm2b %k0, %zmm2 590; LINUXOSX64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 591; LINUXOSX64-NEXT: # kill: def $xmm1 killed $xmm1 killed $zmm1 592; LINUXOSX64-NEXT: # kill: def $xmm2 killed $xmm2 killed $zmm2 593; LINUXOSX64-NEXT: vzeroupper 594; LINUXOSX64-NEXT: callq test_argv16i1helper@PLT 595; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload 596; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload 597; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload 598; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload 599; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload 600; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload 601; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload 602; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload 603; LINUXOSX64-NEXT: addq $136, %rsp 604; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 605; LINUXOSX64-NEXT: retq 606 %res = call i16 @test_argv16i1helper(<16 x i1> %x0, <16 x i1> %x1, <16 x i1> %x2) 607 ret i16 %res 608} 609 610; Test regcall when passing arguments of v16i1 type 611define dso_local i16 @caller_argv16i1() #0 { 612; X32-LABEL: caller_argv16i1: 613; X32: # %bb.0: # %entry 614; X32-NEXT: movl $1, %eax 615; X32-NEXT: movl $1, %ecx 616; X32-NEXT: movl $1, %edx 617; X32-NEXT: calll _test_argv16i1 618; X32-NEXT: retl 619; 620; WIN64-LABEL: caller_argv16i1: 621; WIN64: # %bb.0: # %entry 622; WIN64-NEXT: pushq %rsi 623; WIN64-NEXT: .seh_pushreg %rsi 624; WIN64-NEXT: pushq %rdi 625; WIN64-NEXT: .seh_pushreg %rdi 626; WIN64-NEXT: subq $40, %rsp 627; WIN64-NEXT: .seh_stackalloc 40 628; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 629; WIN64-NEXT: .seh_savexmm %xmm7, 16 630; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill 631; WIN64-NEXT: .seh_savexmm %xmm6, 0 632; WIN64-NEXT: .seh_endprologue 633; WIN64-NEXT: movl $1, %eax 634; WIN64-NEXT: movl $1, %ecx 635; WIN64-NEXT: movl $1, %edx 636; WIN64-NEXT: callq test_argv16i1 637; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload 638; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload 639; WIN64-NEXT: addq $40, %rsp 640; WIN64-NEXT: popq %rdi 641; WIN64-NEXT: popq %rsi 642; WIN64-NEXT: retq 643; WIN64-NEXT: .seh_endproc 644; 645; LINUXOSX64-LABEL: caller_argv16i1: 646; LINUXOSX64: # %bb.0: # %entry 647; LINUXOSX64-NEXT: pushq %rax 648; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 649; LINUXOSX64-NEXT: movl $1, %eax 650; LINUXOSX64-NEXT: movl $1, %ecx 651; LINUXOSX64-NEXT: movl $1, %edx 652; LINUXOSX64-NEXT: callq test_argv16i1 653; LINUXOSX64-NEXT: popq %rcx 654; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 655; LINUXOSX64-NEXT: retq 656entry: 657 %v0 = bitcast i16 1 to <16 x i1> 658 %call = call x86_regcallcc i16 @test_argv16i1(<16 x i1> %v0, <16 x i1> %v0, <16 x i1> %v0) 659 ret i16 %call 660} 661 662; Test regcall when returning v16i1 type 663define dso_local x86_regcallcc <16 x i1> @test_retv16i1() { 664; X32-LABEL: test_retv16i1: 665; X32: # %bb.0: 666; X32-NEXT: movw $1, %ax 667; X32-NEXT: retl 668; 669; CHECK64-LABEL: test_retv16i1: 670; CHECK64: # %bb.0: 671; CHECK64-NEXT: movw $1, %ax 672; CHECK64-NEXT: retq 673 %a = bitcast i16 1 to <16 x i1> 674 ret <16 x i1> %a 675} 676 677; Test regcall when processing result of v16i1 type 678define dso_local i16 @caller_retv16i1() #0 { 679; X32-LABEL: caller_retv16i1: 680; X32: # %bb.0: # %entry 681; X32-NEXT: calll _test_retv16i1 682; X32-NEXT: # kill: def $ax killed $ax def $eax 683; X32-NEXT: incl %eax 684; X32-NEXT: # kill: def $ax killed $ax killed $eax 685; X32-NEXT: retl 686; 687; WIN64-LABEL: caller_retv16i1: 688; WIN64: # %bb.0: # %entry 689; WIN64-NEXT: pushq %rsi 690; WIN64-NEXT: .seh_pushreg %rsi 691; WIN64-NEXT: pushq %rdi 692; WIN64-NEXT: .seh_pushreg %rdi 693; WIN64-NEXT: subq $40, %rsp 694; WIN64-NEXT: .seh_stackalloc 40 695; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 696; WIN64-NEXT: .seh_savexmm %xmm7, 16 697; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill 698; WIN64-NEXT: .seh_savexmm %xmm6, 0 699; WIN64-NEXT: .seh_endprologue 700; WIN64-NEXT: callq test_retv16i1 701; WIN64-NEXT: # kill: def $ax killed $ax def $eax 702; WIN64-NEXT: incl %eax 703; WIN64-NEXT: # kill: def $ax killed $ax killed $eax 704; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload 705; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload 706; WIN64-NEXT: addq $40, %rsp 707; WIN64-NEXT: popq %rdi 708; WIN64-NEXT: popq %rsi 709; WIN64-NEXT: retq 710; WIN64-NEXT: .seh_endproc 711; 712; LINUXOSX64-LABEL: caller_retv16i1: 713; LINUXOSX64: # %bb.0: # %entry 714; LINUXOSX64-NEXT: pushq %rax 715; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 716; LINUXOSX64-NEXT: callq test_retv16i1 717; LINUXOSX64-NEXT: # kill: def $ax killed $ax def $eax 718; LINUXOSX64-NEXT: incl %eax 719; LINUXOSX64-NEXT: # kill: def $ax killed $ax killed $eax 720; LINUXOSX64-NEXT: popq %rcx 721; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 722; LINUXOSX64-NEXT: retq 723entry: 724 %call = call x86_regcallcc <16 x i1> @test_retv16i1() 725 %c = bitcast <16 x i1> %call to i16 726 %add = add i16 %c, 1 727 ret i16 %add 728} 729 730; Test regcall when receiving arguments of v8i1 type 731declare i8 @test_argv8i1helper(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2) 732define dso_local x86_regcallcc i8 @test_argv8i1(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2) { 733; X32-LABEL: test_argv8i1: 734; X32: # %bb.0: 735; X32-NEXT: subl $76, %esp 736; X32-NEXT: vmovups %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 737; X32-NEXT: vmovups %xmm6, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 738; X32-NEXT: vmovups %xmm5, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 739; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill 740; X32-NEXT: kmovd %edx, %k0 741; X32-NEXT: kmovd %ecx, %k1 742; X32-NEXT: kmovd %eax, %k2 743; X32-NEXT: vpmovm2w %k2, %zmm0 744; X32-NEXT: vpmovm2w %k1, %zmm1 745; X32-NEXT: vpmovm2w %k0, %zmm2 746; X32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 747; X32-NEXT: # kill: def $xmm1 killed $xmm1 killed $zmm1 748; X32-NEXT: # kill: def $xmm2 killed $xmm2 killed $zmm2 749; X32-NEXT: vzeroupper 750; X32-NEXT: calll _test_argv8i1helper 751; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload 752; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm5 # 16-byte Reload 753; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm6 # 16-byte Reload 754; X32-NEXT: vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload 755; X32-NEXT: addl $76, %esp 756; X32-NEXT: retl 757; 758; WIN64-LABEL: test_argv8i1: 759; WIN64: # %bb.0: 760; WIN64-NEXT: pushq %r11 761; WIN64-NEXT: .seh_pushreg %r11 762; WIN64-NEXT: pushq %r10 763; WIN64-NEXT: .seh_pushreg %r10 764; WIN64-NEXT: subq $88, %rsp 765; WIN64-NEXT: .seh_stackalloc 88 766; WIN64-NEXT: .seh_endprologue 767; WIN64-NEXT: kmovd %edx, %k0 768; WIN64-NEXT: kmovd %eax, %k1 769; WIN64-NEXT: kmovd %ecx, %k2 770; WIN64-NEXT: vpmovm2w %k2, %zmm0 771; WIN64-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) 772; WIN64-NEXT: vpmovm2w %k1, %zmm0 773; WIN64-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) 774; WIN64-NEXT: vpmovm2w %k0, %zmm0 775; WIN64-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) 776; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx 777; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx 778; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %r8 779; WIN64-NEXT: vzeroupper 780; WIN64-NEXT: callq test_argv8i1helper 781; WIN64-NEXT: nop 782; WIN64-NEXT: addq $88, %rsp 783; WIN64-NEXT: popq %r10 784; WIN64-NEXT: popq %r11 785; WIN64-NEXT: retq 786; WIN64-NEXT: .seh_endproc 787; 788; LINUXOSX64-LABEL: test_argv8i1: 789; LINUXOSX64: # %bb.0: 790; LINUXOSX64-NEXT: subq $136, %rsp 791; LINUXOSX64-NEXT: vmovaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 792; LINUXOSX64-NEXT: vmovaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 793; LINUXOSX64-NEXT: vmovaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 794; LINUXOSX64-NEXT: vmovaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 795; LINUXOSX64-NEXT: vmovaps %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 796; LINUXOSX64-NEXT: vmovaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 797; LINUXOSX64-NEXT: vmovaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 798; LINUXOSX64-NEXT: vmovaps %xmm8, (%rsp) # 16-byte Spill 799; LINUXOSX64-NEXT: .cfi_def_cfa_offset 144 800; LINUXOSX64-NEXT: .cfi_offset %xmm8, -144 801; LINUXOSX64-NEXT: .cfi_offset %xmm9, -128 802; LINUXOSX64-NEXT: .cfi_offset %xmm10, -112 803; LINUXOSX64-NEXT: .cfi_offset %xmm11, -96 804; LINUXOSX64-NEXT: .cfi_offset %xmm12, -80 805; LINUXOSX64-NEXT: .cfi_offset %xmm13, -64 806; LINUXOSX64-NEXT: .cfi_offset %xmm14, -48 807; LINUXOSX64-NEXT: .cfi_offset %xmm15, -32 808; LINUXOSX64-NEXT: kmovd %edx, %k0 809; LINUXOSX64-NEXT: kmovd %ecx, %k1 810; LINUXOSX64-NEXT: kmovd %eax, %k2 811; LINUXOSX64-NEXT: vpmovm2w %k2, %zmm0 812; LINUXOSX64-NEXT: vpmovm2w %k1, %zmm1 813; LINUXOSX64-NEXT: vpmovm2w %k0, %zmm2 814; LINUXOSX64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 815; LINUXOSX64-NEXT: # kill: def $xmm1 killed $xmm1 killed $zmm1 816; LINUXOSX64-NEXT: # kill: def $xmm2 killed $xmm2 killed $zmm2 817; LINUXOSX64-NEXT: vzeroupper 818; LINUXOSX64-NEXT: callq test_argv8i1helper@PLT 819; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload 820; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 16-byte Reload 821; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm10 # 16-byte Reload 822; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm11 # 16-byte Reload 823; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload 824; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload 825; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload 826; LINUXOSX64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload 827; LINUXOSX64-NEXT: addq $136, %rsp 828; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 829; LINUXOSX64-NEXT: retq 830 %res = call i8 @test_argv8i1helper(<8 x i1> %x0, <8 x i1> %x1, <8 x i1> %x2) 831 ret i8 %res 832} 833 834; Test regcall when passing arguments of v8i1 type 835define dso_local i8 @caller_argv8i1() #0 { 836; X32-LABEL: caller_argv8i1: 837; X32: # %bb.0: # %entry 838; X32-NEXT: movl $1, %eax 839; X32-NEXT: movl $1, %ecx 840; X32-NEXT: movl $1, %edx 841; X32-NEXT: calll _test_argv8i1 842; X32-NEXT: retl 843; 844; WIN64-LABEL: caller_argv8i1: 845; WIN64: # %bb.0: # %entry 846; WIN64-NEXT: pushq %rsi 847; WIN64-NEXT: .seh_pushreg %rsi 848; WIN64-NEXT: pushq %rdi 849; WIN64-NEXT: .seh_pushreg %rdi 850; WIN64-NEXT: subq $40, %rsp 851; WIN64-NEXT: .seh_stackalloc 40 852; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 853; WIN64-NEXT: .seh_savexmm %xmm7, 16 854; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill 855; WIN64-NEXT: .seh_savexmm %xmm6, 0 856; WIN64-NEXT: .seh_endprologue 857; WIN64-NEXT: movl $1, %eax 858; WIN64-NEXT: movl $1, %ecx 859; WIN64-NEXT: movl $1, %edx 860; WIN64-NEXT: callq test_argv8i1 861; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload 862; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload 863; WIN64-NEXT: addq $40, %rsp 864; WIN64-NEXT: popq %rdi 865; WIN64-NEXT: popq %rsi 866; WIN64-NEXT: retq 867; WIN64-NEXT: .seh_endproc 868; 869; LINUXOSX64-LABEL: caller_argv8i1: 870; LINUXOSX64: # %bb.0: # %entry 871; LINUXOSX64-NEXT: pushq %rax 872; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 873; LINUXOSX64-NEXT: movl $1, %eax 874; LINUXOSX64-NEXT: movl $1, %ecx 875; LINUXOSX64-NEXT: movl $1, %edx 876; LINUXOSX64-NEXT: callq test_argv8i1 877; LINUXOSX64-NEXT: popq %rcx 878; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 879; LINUXOSX64-NEXT: retq 880entry: 881 %v0 = bitcast i8 1 to <8 x i1> 882 %call = call x86_regcallcc i8 @test_argv8i1(<8 x i1> %v0, <8 x i1> %v0, <8 x i1> %v0) 883 ret i8 %call 884} 885 886; Test regcall when returning v8i1 type 887define dso_local x86_regcallcc <8 x i1> @test_retv8i1() { 888; X32-LABEL: test_retv8i1: 889; X32: # %bb.0: 890; X32-NEXT: movb $1, %al 891; X32-NEXT: retl 892; 893; CHECK64-LABEL: test_retv8i1: 894; CHECK64: # %bb.0: 895; CHECK64-NEXT: movb $1, %al 896; CHECK64-NEXT: retq 897 %a = bitcast i8 1 to <8 x i1> 898 ret <8 x i1> %a 899} 900 901; Test regcall when processing result of v8i1 type 902define dso_local <8 x i1> @caller_retv8i1() #0 { 903; X32-LABEL: caller_retv8i1: 904; X32: # %bb.0: # %entry 905; X32-NEXT: calll _test_retv8i1 906; X32-NEXT: # kill: def $al killed $al def $eax 907; X32-NEXT: kmovd %eax, %k0 908; X32-NEXT: vpmovm2w %k0, %zmm0 909; X32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 910; X32-NEXT: vzeroupper 911; X32-NEXT: retl 912; 913; WIN64-LABEL: caller_retv8i1: 914; WIN64: # %bb.0: # %entry 915; WIN64-NEXT: pushq %rsi 916; WIN64-NEXT: .seh_pushreg %rsi 917; WIN64-NEXT: pushq %rdi 918; WIN64-NEXT: .seh_pushreg %rdi 919; WIN64-NEXT: subq $40, %rsp 920; WIN64-NEXT: .seh_stackalloc 40 921; WIN64-NEXT: vmovaps %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 922; WIN64-NEXT: .seh_savexmm %xmm7, 16 923; WIN64-NEXT: vmovaps %xmm6, (%rsp) # 16-byte Spill 924; WIN64-NEXT: .seh_savexmm %xmm6, 0 925; WIN64-NEXT: .seh_endprologue 926; WIN64-NEXT: callq test_retv8i1 927; WIN64-NEXT: # kill: def $al killed $al def $eax 928; WIN64-NEXT: kmovd %eax, %k0 929; WIN64-NEXT: vpmovm2w %k0, %zmm0 930; WIN64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 931; WIN64-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload 932; WIN64-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload 933; WIN64-NEXT: addq $40, %rsp 934; WIN64-NEXT: popq %rdi 935; WIN64-NEXT: popq %rsi 936; WIN64-NEXT: vzeroupper 937; WIN64-NEXT: retq 938; WIN64-NEXT: .seh_endproc 939; 940; LINUXOSX64-LABEL: caller_retv8i1: 941; LINUXOSX64: # %bb.0: # %entry 942; LINUXOSX64-NEXT: pushq %rax 943; LINUXOSX64-NEXT: .cfi_def_cfa_offset 16 944; LINUXOSX64-NEXT: callq test_retv8i1 945; LINUXOSX64-NEXT: # kill: def $al killed $al def $eax 946; LINUXOSX64-NEXT: kmovd %eax, %k0 947; LINUXOSX64-NEXT: vpmovm2w %k0, %zmm0 948; LINUXOSX64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 949; LINUXOSX64-NEXT: popq %rax 950; LINUXOSX64-NEXT: .cfi_def_cfa_offset 8 951; LINUXOSX64-NEXT: vzeroupper 952; LINUXOSX64-NEXT: retq 953entry: 954 %call = call x86_regcallcc <8 x i1> @test_retv8i1() 955 ret <8 x i1> %call 956} 957 958