1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=ALL,SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=ALL,SSE,SSE41 4; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=ALL,AVX,AVX1OR2,AVX1 5; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX,AVX1OR2,AVX2 6; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=ALL,AVX,AVX512,AVX512F 7; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=ALL,AVX,AVX512,AVX512BW 8; RUN: llc < %s -mtriple=i686-- -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,X86AVX2 9 10define <16 x i8> @undef_index(i8 %x) nounwind { 11; ALL-LABEL: undef_index: 12; ALL: # %bb.0: 13; ALL-NEXT: ret{{[l|q]}} 14 %ins = insertelement <16 x i8> undef, i8 %x, i64 undef 15 ret <16 x i8> %ins 16} 17 18define <16 x i8> @undef_scalar(<16 x i8> %x, i32 %index) nounwind { 19; ALL-LABEL: undef_scalar: 20; ALL: # %bb.0: 21; ALL-NEXT: ret{{[l|q]}} 22 %ins = insertelement <16 x i8> %x, i8 undef, i32 %index 23 ret <16 x i8> %ins 24} 25 26; 27; Insertion into undef vectors 28; 29 30define <16 x i8> @arg_i8_v16i8_undef(i8 %x, i32 %y) nounwind { 31; SSE2-LABEL: arg_i8_v16i8_undef: 32; SSE2: # %bb.0: 33; SSE2-NEXT: movd %edi, %xmm0 34; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 35; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 36; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 37; SSE2-NEXT: retq 38; 39; SSE41-LABEL: arg_i8_v16i8_undef: 40; SSE41: # %bb.0: 41; SSE41-NEXT: movd %edi, %xmm0 42; SSE41-NEXT: pxor %xmm1, %xmm1 43; SSE41-NEXT: pshufb %xmm1, %xmm0 44; SSE41-NEXT: retq 45; 46; AVX1-LABEL: arg_i8_v16i8_undef: 47; AVX1: # %bb.0: 48; AVX1-NEXT: vmovd %edi, %xmm0 49; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 50; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 51; AVX1-NEXT: retq 52; 53; AVX2-LABEL: arg_i8_v16i8_undef: 54; AVX2: # %bb.0: 55; AVX2-NEXT: vmovd %edi, %xmm0 56; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0 57; AVX2-NEXT: retq 58; 59; AVX512F-LABEL: arg_i8_v16i8_undef: 60; AVX512F: # %bb.0: 61; AVX512F-NEXT: vmovd %edi, %xmm0 62; AVX512F-NEXT: vpbroadcastb %xmm0, %xmm0 63; AVX512F-NEXT: retq 64; 65; AVX512BW-LABEL: arg_i8_v16i8_undef: 66; AVX512BW: # %bb.0: 67; AVX512BW-NEXT: vpbroadcastb %edi, %xmm0 68; AVX512BW-NEXT: retq 69; 70; X86AVX2-LABEL: arg_i8_v16i8_undef: 71; X86AVX2: # %bb.0: 72; X86AVX2-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %xmm0 73; X86AVX2-NEXT: retl 74 %ins = insertelement <16 x i8> undef, i8 %x, i32 %y 75 ret <16 x i8> %ins 76} 77 78define <8 x i16> @arg_i16_v8i16_undef(i16 %x, i32 %y) nounwind { 79; SSE-LABEL: arg_i16_v8i16_undef: 80; SSE: # %bb.0: 81; SSE-NEXT: movd %edi, %xmm0 82; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 83; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 84; SSE-NEXT: retq 85; 86; AVX1-LABEL: arg_i16_v8i16_undef: 87; AVX1: # %bb.0: 88; AVX1-NEXT: vmovd %edi, %xmm0 89; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 90; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 91; AVX1-NEXT: retq 92; 93; AVX2-LABEL: arg_i16_v8i16_undef: 94; AVX2: # %bb.0: 95; AVX2-NEXT: vmovd %edi, %xmm0 96; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 97; AVX2-NEXT: retq 98; 99; AVX512F-LABEL: arg_i16_v8i16_undef: 100; AVX512F: # %bb.0: 101; AVX512F-NEXT: vmovd %edi, %xmm0 102; AVX512F-NEXT: vpbroadcastw %xmm0, %xmm0 103; AVX512F-NEXT: retq 104; 105; AVX512BW-LABEL: arg_i16_v8i16_undef: 106; AVX512BW: # %bb.0: 107; AVX512BW-NEXT: vpbroadcastw %edi, %xmm0 108; AVX512BW-NEXT: retq 109; 110; X86AVX2-LABEL: arg_i16_v8i16_undef: 111; X86AVX2: # %bb.0: 112; X86AVX2-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %xmm0 113; X86AVX2-NEXT: retl 114 %ins = insertelement <8 x i16> undef, i16 %x, i32 %y 115 ret <8 x i16> %ins 116} 117 118define <4 x i32> @arg_i32_v4i32_undef(i32 %x, i32 %y) nounwind { 119; SSE-LABEL: arg_i32_v4i32_undef: 120; SSE: # %bb.0: 121; SSE-NEXT: movd %edi, %xmm0 122; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 123; SSE-NEXT: retq 124; 125; AVX1-LABEL: arg_i32_v4i32_undef: 126; AVX1: # %bb.0: 127; AVX1-NEXT: vmovd %edi, %xmm0 128; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 129; AVX1-NEXT: retq 130; 131; AVX2-LABEL: arg_i32_v4i32_undef: 132; AVX2: # %bb.0: 133; AVX2-NEXT: vmovd %edi, %xmm0 134; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 135; AVX2-NEXT: retq 136; 137; AVX512-LABEL: arg_i32_v4i32_undef: 138; AVX512: # %bb.0: 139; AVX512-NEXT: vpbroadcastd %edi, %xmm0 140; AVX512-NEXT: retq 141; 142; X86AVX2-LABEL: arg_i32_v4i32_undef: 143; X86AVX2: # %bb.0: 144; X86AVX2-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm0 145; X86AVX2-NEXT: retl 146 %ins = insertelement <4 x i32> undef, i32 %x, i32 %y 147 ret <4 x i32> %ins 148} 149 150define <2 x i64> @arg_i64_v2i64_undef(i64 %x, i32 %y) nounwind { 151; SSE-LABEL: arg_i64_v2i64_undef: 152; SSE: # %bb.0: 153; SSE-NEXT: movq %rdi, %xmm0 154; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 155; SSE-NEXT: retq 156; 157; AVX1-LABEL: arg_i64_v2i64_undef: 158; AVX1: # %bb.0: 159; AVX1-NEXT: vmovq %rdi, %xmm0 160; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 161; AVX1-NEXT: retq 162; 163; AVX2-LABEL: arg_i64_v2i64_undef: 164; AVX2: # %bb.0: 165; AVX2-NEXT: vmovq %rdi, %xmm0 166; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 167; AVX2-NEXT: retq 168; 169; AVX512-LABEL: arg_i64_v2i64_undef: 170; AVX512: # %bb.0: 171; AVX512-NEXT: vpbroadcastq %rdi, %xmm0 172; AVX512-NEXT: retq 173; 174; X86AVX2-LABEL: arg_i64_v2i64_undef: 175; X86AVX2: # %bb.0: 176; X86AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 177; X86AVX2-NEXT: retl 178 %ins = insertelement <2 x i64> undef, i64 %x, i32 %y 179 ret <2 x i64> %ins 180} 181 182define <4 x float> @arg_f32_v4f32_undef(float %x, i32 %y) nounwind { 183; SSE-LABEL: arg_f32_v4f32_undef: 184; SSE: # %bb.0: 185; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] 186; SSE-NEXT: retq 187; 188; AVX1-LABEL: arg_f32_v4f32_undef: 189; AVX1: # %bb.0: 190; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 191; AVX1-NEXT: retq 192; 193; AVX2-LABEL: arg_f32_v4f32_undef: 194; AVX2: # %bb.0: 195; AVX2-NEXT: vbroadcastss %xmm0, %xmm0 196; AVX2-NEXT: retq 197; 198; AVX512-LABEL: arg_f32_v4f32_undef: 199; AVX512: # %bb.0: 200; AVX512-NEXT: vbroadcastss %xmm0, %xmm0 201; AVX512-NEXT: retq 202; 203; X86AVX2-LABEL: arg_f32_v4f32_undef: 204; X86AVX2: # %bb.0: 205; X86AVX2-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm0 206; X86AVX2-NEXT: retl 207 %ins = insertelement <4 x float> undef, float %x, i32 %y 208 ret <4 x float> %ins 209} 210 211define <2 x double> @arg_f64_v2f64_undef(double %x, i32 %y) nounwind { 212; SSE2-LABEL: arg_f64_v2f64_undef: 213; SSE2: # %bb.0: 214; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 215; SSE2-NEXT: retq 216; 217; SSE41-LABEL: arg_f64_v2f64_undef: 218; SSE41: # %bb.0: 219; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 220; SSE41-NEXT: retq 221; 222; AVX-LABEL: arg_f64_v2f64_undef: 223; AVX: # %bb.0: 224; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 225; AVX-NEXT: retq 226; 227; X86AVX2-LABEL: arg_f64_v2f64_undef: 228; X86AVX2: # %bb.0: 229; X86AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 230; X86AVX2-NEXT: retl 231 %ins = insertelement <2 x double> undef, double %x, i32 %y 232 ret <2 x double> %ins 233} 234 235define <16 x i8> @load_i8_v16i8_undef(i8* %p, i32 %y) nounwind { 236; SSE2-LABEL: load_i8_v16i8_undef: 237; SSE2: # %bb.0: 238; SSE2-NEXT: movzbl (%rdi), %eax 239; SSE2-NEXT: movd %eax, %xmm0 240; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 241; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 242; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 243; SSE2-NEXT: retq 244; 245; SSE41-LABEL: load_i8_v16i8_undef: 246; SSE41: # %bb.0: 247; SSE41-NEXT: movzbl (%rdi), %eax 248; SSE41-NEXT: movd %eax, %xmm0 249; SSE41-NEXT: pxor %xmm1, %xmm1 250; SSE41-NEXT: pshufb %xmm1, %xmm0 251; SSE41-NEXT: retq 252; 253; AVX1-LABEL: load_i8_v16i8_undef: 254; AVX1: # %bb.0: 255; AVX1-NEXT: movzbl (%rdi), %eax 256; AVX1-NEXT: vmovd %eax, %xmm0 257; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 258; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 259; AVX1-NEXT: retq 260; 261; AVX2-LABEL: load_i8_v16i8_undef: 262; AVX2: # %bb.0: 263; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0 264; AVX2-NEXT: retq 265; 266; AVX512-LABEL: load_i8_v16i8_undef: 267; AVX512: # %bb.0: 268; AVX512-NEXT: vpbroadcastb (%rdi), %xmm0 269; AVX512-NEXT: retq 270; 271; X86AVX2-LABEL: load_i8_v16i8_undef: 272; X86AVX2: # %bb.0: 273; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 274; X86AVX2-NEXT: vpbroadcastb (%eax), %xmm0 275; X86AVX2-NEXT: retl 276 %x = load i8, i8* %p 277 %ins = insertelement <16 x i8> undef, i8 %x, i32 %y 278 ret <16 x i8> %ins 279} 280 281define <8 x i16> @load_i16_v8i16_undef(i16* %p, i32 %y) nounwind { 282; SSE-LABEL: load_i16_v8i16_undef: 283; SSE: # %bb.0: 284; SSE-NEXT: movzwl (%rdi), %eax 285; SSE-NEXT: movd %eax, %xmm0 286; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 287; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 288; SSE-NEXT: retq 289; 290; AVX1-LABEL: load_i16_v8i16_undef: 291; AVX1: # %bb.0: 292; AVX1-NEXT: movzwl (%rdi), %eax 293; AVX1-NEXT: vmovd %eax, %xmm0 294; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 295; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 296; AVX1-NEXT: retq 297; 298; AVX2-LABEL: load_i16_v8i16_undef: 299; AVX2: # %bb.0: 300; AVX2-NEXT: vpbroadcastw (%rdi), %xmm0 301; AVX2-NEXT: retq 302; 303; AVX512-LABEL: load_i16_v8i16_undef: 304; AVX512: # %bb.0: 305; AVX512-NEXT: vpbroadcastw (%rdi), %xmm0 306; AVX512-NEXT: retq 307; 308; X86AVX2-LABEL: load_i16_v8i16_undef: 309; X86AVX2: # %bb.0: 310; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 311; X86AVX2-NEXT: vpbroadcastw (%eax), %xmm0 312; X86AVX2-NEXT: retl 313 %x = load i16, i16* %p 314 %ins = insertelement <8 x i16> undef, i16 %x, i32 %y 315 ret <8 x i16> %ins 316} 317 318define <4 x i32> @load_i32_v4i32_undef(i32* %p, i32 %y) nounwind { 319; SSE-LABEL: load_i32_v4i32_undef: 320; SSE: # %bb.0: 321; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 322; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 323; SSE-NEXT: retq 324; 325; AVX-LABEL: load_i32_v4i32_undef: 326; AVX: # %bb.0: 327; AVX-NEXT: vbroadcastss (%rdi), %xmm0 328; AVX-NEXT: retq 329; 330; X86AVX2-LABEL: load_i32_v4i32_undef: 331; X86AVX2: # %bb.0: 332; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 333; X86AVX2-NEXT: vbroadcastss (%eax), %xmm0 334; X86AVX2-NEXT: retl 335 %x = load i32, i32* %p 336 %ins = insertelement <4 x i32> undef, i32 %x, i32 %y 337 ret <4 x i32> %ins 338} 339 340define <2 x i64> @load_i64_v2i64_undef(i64* %p, i32 %y) nounwind { 341; SSE-LABEL: load_i64_v2i64_undef: 342; SSE: # %bb.0: 343; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 344; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 345; SSE-NEXT: retq 346; 347; AVX-LABEL: load_i64_v2i64_undef: 348; AVX: # %bb.0: 349; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 350; AVX-NEXT: retq 351; 352; X86AVX2-LABEL: load_i64_v2i64_undef: 353; X86AVX2: # %bb.0: 354; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 355; X86AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 356; X86AVX2-NEXT: retl 357 %x = load i64, i64* %p 358 %ins = insertelement <2 x i64> undef, i64 %x, i32 %y 359 ret <2 x i64> %ins 360} 361 362define <4 x float> @load_f32_v4f32_undef(float* %p, i32 %y) nounwind { 363; SSE-LABEL: load_f32_v4f32_undef: 364; SSE: # %bb.0: 365; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 366; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] 367; SSE-NEXT: retq 368; 369; AVX-LABEL: load_f32_v4f32_undef: 370; AVX: # %bb.0: 371; AVX-NEXT: vbroadcastss (%rdi), %xmm0 372; AVX-NEXT: retq 373; 374; X86AVX2-LABEL: load_f32_v4f32_undef: 375; X86AVX2: # %bb.0: 376; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 377; X86AVX2-NEXT: vbroadcastss (%eax), %xmm0 378; X86AVX2-NEXT: retl 379 %x = load float, float* %p 380 %ins = insertelement <4 x float> undef, float %x, i32 %y 381 ret <4 x float> %ins 382} 383 384define <2 x double> @load_f64_v2f64_undef(double* %p, i32 %y) nounwind { 385; SSE2-LABEL: load_f64_v2f64_undef: 386; SSE2: # %bb.0: 387; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 388; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 389; SSE2-NEXT: retq 390; 391; SSE41-LABEL: load_f64_v2f64_undef: 392; SSE41: # %bb.0: 393; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 394; SSE41-NEXT: retq 395; 396; AVX-LABEL: load_f64_v2f64_undef: 397; AVX: # %bb.0: 398; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 399; AVX-NEXT: retq 400; 401; X86AVX2-LABEL: load_f64_v2f64_undef: 402; X86AVX2: # %bb.0: 403; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 404; X86AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 405; X86AVX2-NEXT: retl 406 %x = load double, double* %p 407 %ins = insertelement <2 x double> undef, double %x, i32 %y 408 ret <2 x double> %ins 409} 410 411define <32 x i8> @arg_i8_v32i8_undef(i8 %x, i32 %y) nounwind { 412; SSE-LABEL: arg_i8_v32i8_undef: 413; SSE: # %bb.0: 414; SSE-NEXT: # kill: def $esi killed $esi def $rsi 415; SSE-NEXT: andl $31, %esi 416; SSE-NEXT: movb %dil, -40(%rsp,%rsi) 417; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 418; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 419; SSE-NEXT: retq 420; 421; AVX1-LABEL: arg_i8_v32i8_undef: 422; AVX1: # %bb.0: 423; AVX1-NEXT: vmovd %edi, %xmm0 424; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 425; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 426; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 427; AVX1-NEXT: retq 428; 429; AVX2-LABEL: arg_i8_v32i8_undef: 430; AVX2: # %bb.0: 431; AVX2-NEXT: vmovd %edi, %xmm0 432; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0 433; AVX2-NEXT: retq 434; 435; AVX512F-LABEL: arg_i8_v32i8_undef: 436; AVX512F: # %bb.0: 437; AVX512F-NEXT: vmovd %edi, %xmm0 438; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0 439; AVX512F-NEXT: retq 440; 441; AVX512BW-LABEL: arg_i8_v32i8_undef: 442; AVX512BW: # %bb.0: 443; AVX512BW-NEXT: vpbroadcastb %edi, %ymm0 444; AVX512BW-NEXT: retq 445; 446; X86AVX2-LABEL: arg_i8_v32i8_undef: 447; X86AVX2: # %bb.0: 448; X86AVX2-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %ymm0 449; X86AVX2-NEXT: retl 450 %ins = insertelement <32 x i8> undef, i8 %x, i32 %y 451 ret <32 x i8> %ins 452} 453 454define <16 x i16> @arg_i16_v16i16_undef(i16 %x, i32 %y) nounwind { 455; SSE-LABEL: arg_i16_v16i16_undef: 456; SSE: # %bb.0: 457; SSE-NEXT: # kill: def $esi killed $esi def $rsi 458; SSE-NEXT: andl $15, %esi 459; SSE-NEXT: movw %di, -40(%rsp,%rsi,2) 460; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 461; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 462; SSE-NEXT: retq 463; 464; AVX1-LABEL: arg_i16_v16i16_undef: 465; AVX1: # %bb.0: 466; AVX1-NEXT: vmovd %edi, %xmm0 467; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 468; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 469; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 470; AVX1-NEXT: retq 471; 472; AVX2-LABEL: arg_i16_v16i16_undef: 473; AVX2: # %bb.0: 474; AVX2-NEXT: vmovd %edi, %xmm0 475; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0 476; AVX2-NEXT: retq 477; 478; AVX512F-LABEL: arg_i16_v16i16_undef: 479; AVX512F: # %bb.0: 480; AVX512F-NEXT: vmovd %edi, %xmm0 481; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm0 482; AVX512F-NEXT: retq 483; 484; AVX512BW-LABEL: arg_i16_v16i16_undef: 485; AVX512BW: # %bb.0: 486; AVX512BW-NEXT: vpbroadcastw %edi, %ymm0 487; AVX512BW-NEXT: retq 488; 489; X86AVX2-LABEL: arg_i16_v16i16_undef: 490; X86AVX2: # %bb.0: 491; X86AVX2-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %ymm0 492; X86AVX2-NEXT: retl 493 %ins = insertelement <16 x i16> undef, i16 %x, i32 %y 494 ret <16 x i16> %ins 495} 496 497define <8 x i32> @arg_i32_v8i32_undef(i32 %x, i32 %y) nounwind { 498; SSE-LABEL: arg_i32_v8i32_undef: 499; SSE: # %bb.0: 500; SSE-NEXT: # kill: def $esi killed $esi def $rsi 501; SSE-NEXT: andl $7, %esi 502; SSE-NEXT: movl %edi, -40(%rsp,%rsi,4) 503; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 504; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 505; SSE-NEXT: retq 506; 507; AVX1-LABEL: arg_i32_v8i32_undef: 508; AVX1: # %bb.0: 509; AVX1-NEXT: vmovd %edi, %xmm0 510; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 511; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 512; AVX1-NEXT: retq 513; 514; AVX2-LABEL: arg_i32_v8i32_undef: 515; AVX2: # %bb.0: 516; AVX2-NEXT: vmovd %edi, %xmm0 517; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0 518; AVX2-NEXT: retq 519; 520; AVX512-LABEL: arg_i32_v8i32_undef: 521; AVX512: # %bb.0: 522; AVX512-NEXT: vpbroadcastd %edi, %ymm0 523; AVX512-NEXT: retq 524; 525; X86AVX2-LABEL: arg_i32_v8i32_undef: 526; X86AVX2: # %bb.0: 527; X86AVX2-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0 528; X86AVX2-NEXT: retl 529 %ins = insertelement <8 x i32> undef, i32 %x, i32 %y 530 ret <8 x i32> %ins 531} 532 533define <4 x i64> @arg_i64_v4i64_undef(i64 %x, i32 %y) nounwind { 534; SSE-LABEL: arg_i64_v4i64_undef: 535; SSE: # %bb.0: 536; SSE-NEXT: # kill: def $esi killed $esi def $rsi 537; SSE-NEXT: andl $3, %esi 538; SSE-NEXT: movq %rdi, -40(%rsp,%rsi,8) 539; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 540; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 541; SSE-NEXT: retq 542; 543; AVX1-LABEL: arg_i64_v4i64_undef: 544; AVX1: # %bb.0: 545; AVX1-NEXT: vmovq %rdi, %xmm0 546; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 547; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 548; AVX1-NEXT: retq 549; 550; AVX2-LABEL: arg_i64_v4i64_undef: 551; AVX2: # %bb.0: 552; AVX2-NEXT: vmovq %rdi, %xmm0 553; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0 554; AVX2-NEXT: retq 555; 556; AVX512-LABEL: arg_i64_v4i64_undef: 557; AVX512: # %bb.0: 558; AVX512-NEXT: vpbroadcastq %rdi, %ymm0 559; AVX512-NEXT: retq 560; 561; X86AVX2-LABEL: arg_i64_v4i64_undef: 562; X86AVX2: # %bb.0: 563; X86AVX2-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0 564; X86AVX2-NEXT: retl 565 %ins = insertelement <4 x i64> undef, i64 %x, i32 %y 566 ret <4 x i64> %ins 567} 568 569define <8 x float> @arg_f32_v8f32_undef(float %x, i32 %y) nounwind { 570; SSE-LABEL: arg_f32_v8f32_undef: 571; SSE: # %bb.0: 572; SSE-NEXT: # kill: def $edi killed $edi def $rdi 573; SSE-NEXT: andl $7, %edi 574; SSE-NEXT: movss %xmm0, -40(%rsp,%rdi,4) 575; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 576; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 577; SSE-NEXT: retq 578; 579; AVX1-LABEL: arg_f32_v8f32_undef: 580; AVX1: # %bb.0: 581; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] 582; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 583; AVX1-NEXT: retq 584; 585; AVX2-LABEL: arg_f32_v8f32_undef: 586; AVX2: # %bb.0: 587; AVX2-NEXT: vbroadcastss %xmm0, %ymm0 588; AVX2-NEXT: retq 589; 590; AVX512-LABEL: arg_f32_v8f32_undef: 591; AVX512: # %bb.0: 592; AVX512-NEXT: vbroadcastss %xmm0, %ymm0 593; AVX512-NEXT: retq 594; 595; X86AVX2-LABEL: arg_f32_v8f32_undef: 596; X86AVX2: # %bb.0: 597; X86AVX2-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0 598; X86AVX2-NEXT: retl 599 %ins = insertelement <8 x float> undef, float %x, i32 %y 600 ret <8 x float> %ins 601} 602 603define <4 x double> @arg_f64_v4f64_undef(double %x, i32 %y) nounwind { 604; SSE-LABEL: arg_f64_v4f64_undef: 605; SSE: # %bb.0: 606; SSE-NEXT: # kill: def $edi killed $edi def $rdi 607; SSE-NEXT: andl $3, %edi 608; SSE-NEXT: movsd %xmm0, -40(%rsp,%rdi,8) 609; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 610; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 611; SSE-NEXT: retq 612; 613; AVX1-LABEL: arg_f64_v4f64_undef: 614; AVX1: # %bb.0: 615; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 616; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 617; AVX1-NEXT: retq 618; 619; AVX2-LABEL: arg_f64_v4f64_undef: 620; AVX2: # %bb.0: 621; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 622; AVX2-NEXT: retq 623; 624; AVX512-LABEL: arg_f64_v4f64_undef: 625; AVX512: # %bb.0: 626; AVX512-NEXT: vbroadcastsd %xmm0, %ymm0 627; AVX512-NEXT: retq 628; 629; X86AVX2-LABEL: arg_f64_v4f64_undef: 630; X86AVX2: # %bb.0: 631; X86AVX2-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0 632; X86AVX2-NEXT: retl 633 %ins = insertelement <4 x double> undef, double %x, i32 %y 634 ret <4 x double> %ins 635} 636 637define <32 x i8> @load_i8_v32i8_undef(i8* %p, i32 %y) nounwind { 638; SSE-LABEL: load_i8_v32i8_undef: 639; SSE: # %bb.0: 640; SSE-NEXT: # kill: def $esi killed $esi def $rsi 641; SSE-NEXT: movb (%rdi), %al 642; SSE-NEXT: andl $31, %esi 643; SSE-NEXT: movb %al, -40(%rsp,%rsi) 644; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 645; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 646; SSE-NEXT: retq 647; 648; AVX1-LABEL: load_i8_v32i8_undef: 649; AVX1: # %bb.0: 650; AVX1-NEXT: movzbl (%rdi), %eax 651; AVX1-NEXT: vmovd %eax, %xmm0 652; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 653; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 654; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 655; AVX1-NEXT: retq 656; 657; AVX2-LABEL: load_i8_v32i8_undef: 658; AVX2: # %bb.0: 659; AVX2-NEXT: vpbroadcastb (%rdi), %ymm0 660; AVX2-NEXT: retq 661; 662; AVX512-LABEL: load_i8_v32i8_undef: 663; AVX512: # %bb.0: 664; AVX512-NEXT: vpbroadcastb (%rdi), %ymm0 665; AVX512-NEXT: retq 666; 667; X86AVX2-LABEL: load_i8_v32i8_undef: 668; X86AVX2: # %bb.0: 669; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 670; X86AVX2-NEXT: vpbroadcastb (%eax), %ymm0 671; X86AVX2-NEXT: retl 672 %x = load i8, i8* %p 673 %ins = insertelement <32 x i8> undef, i8 %x, i32 %y 674 ret <32 x i8> %ins 675} 676 677define <16 x i16> @load_i16_v16i16_undef(i16* %p, i32 %y) nounwind { 678; SSE-LABEL: load_i16_v16i16_undef: 679; SSE: # %bb.0: 680; SSE-NEXT: # kill: def $esi killed $esi def $rsi 681; SSE-NEXT: movzwl (%rdi), %eax 682; SSE-NEXT: andl $15, %esi 683; SSE-NEXT: movw %ax, -40(%rsp,%rsi,2) 684; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 685; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 686; SSE-NEXT: retq 687; 688; AVX1-LABEL: load_i16_v16i16_undef: 689; AVX1: # %bb.0: 690; AVX1-NEXT: movzwl (%rdi), %eax 691; AVX1-NEXT: vmovd %eax, %xmm0 692; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 693; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 694; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 695; AVX1-NEXT: retq 696; 697; AVX2-LABEL: load_i16_v16i16_undef: 698; AVX2: # %bb.0: 699; AVX2-NEXT: vpbroadcastw (%rdi), %ymm0 700; AVX2-NEXT: retq 701; 702; AVX512-LABEL: load_i16_v16i16_undef: 703; AVX512: # %bb.0: 704; AVX512-NEXT: vpbroadcastw (%rdi), %ymm0 705; AVX512-NEXT: retq 706; 707; X86AVX2-LABEL: load_i16_v16i16_undef: 708; X86AVX2: # %bb.0: 709; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 710; X86AVX2-NEXT: vpbroadcastw (%eax), %ymm0 711; X86AVX2-NEXT: retl 712 %x = load i16, i16* %p 713 %ins = insertelement <16 x i16> undef, i16 %x, i32 %y 714 ret <16 x i16> %ins 715} 716 717define <8 x i32> @load_i32_v8i32_undef(i32* %p, i32 %y) nounwind { 718; SSE-LABEL: load_i32_v8i32_undef: 719; SSE: # %bb.0: 720; SSE-NEXT: # kill: def $esi killed $esi def $rsi 721; SSE-NEXT: movl (%rdi), %eax 722; SSE-NEXT: andl $7, %esi 723; SSE-NEXT: movl %eax, -40(%rsp,%rsi,4) 724; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 725; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 726; SSE-NEXT: retq 727; 728; AVX-LABEL: load_i32_v8i32_undef: 729; AVX: # %bb.0: 730; AVX-NEXT: vbroadcastss (%rdi), %ymm0 731; AVX-NEXT: retq 732; 733; X86AVX2-LABEL: load_i32_v8i32_undef: 734; X86AVX2: # %bb.0: 735; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 736; X86AVX2-NEXT: vbroadcastss (%eax), %ymm0 737; X86AVX2-NEXT: retl 738 %x = load i32, i32* %p 739 %ins = insertelement <8 x i32> undef, i32 %x, i32 %y 740 ret <8 x i32> %ins 741} 742 743define <4 x i64> @load_i64_v4i64_undef(i64* %p, i32 %y) nounwind { 744; SSE-LABEL: load_i64_v4i64_undef: 745; SSE: # %bb.0: 746; SSE-NEXT: # kill: def $esi killed $esi def $rsi 747; SSE-NEXT: movq (%rdi), %rax 748; SSE-NEXT: andl $3, %esi 749; SSE-NEXT: movq %rax, -40(%rsp,%rsi,8) 750; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 751; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 752; SSE-NEXT: retq 753; 754; AVX-LABEL: load_i64_v4i64_undef: 755; AVX: # %bb.0: 756; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 757; AVX-NEXT: retq 758; 759; X86AVX2-LABEL: load_i64_v4i64_undef: 760; X86AVX2: # %bb.0: 761; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 762; X86AVX2-NEXT: vbroadcastsd (%eax), %ymm0 763; X86AVX2-NEXT: retl 764 %x = load i64, i64* %p 765 %ins = insertelement <4 x i64> undef, i64 %x, i32 %y 766 ret <4 x i64> %ins 767} 768 769define <8 x float> @load_f32_v8f32_undef(float* %p, i32 %y) nounwind { 770; SSE-LABEL: load_f32_v8f32_undef: 771; SSE: # %bb.0: 772; SSE-NEXT: # kill: def $esi killed $esi def $rsi 773; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 774; SSE-NEXT: andl $7, %esi 775; SSE-NEXT: movss %xmm0, -40(%rsp,%rsi,4) 776; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 777; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 778; SSE-NEXT: retq 779; 780; AVX-LABEL: load_f32_v8f32_undef: 781; AVX: # %bb.0: 782; AVX-NEXT: vbroadcastss (%rdi), %ymm0 783; AVX-NEXT: retq 784; 785; X86AVX2-LABEL: load_f32_v8f32_undef: 786; X86AVX2: # %bb.0: 787; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 788; X86AVX2-NEXT: vbroadcastss (%eax), %ymm0 789; X86AVX2-NEXT: retl 790 %x = load float, float* %p 791 %ins = insertelement <8 x float> undef, float %x, i32 %y 792 ret <8 x float> %ins 793} 794 795define <4 x double> @load_f64_v4f64_undef(double* %p, i32 %y) nounwind { 796; SSE-LABEL: load_f64_v4f64_undef: 797; SSE: # %bb.0: 798; SSE-NEXT: # kill: def $esi killed $esi def $rsi 799; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 800; SSE-NEXT: andl $3, %esi 801; SSE-NEXT: movsd %xmm0, -40(%rsp,%rsi,8) 802; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 803; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 804; SSE-NEXT: retq 805; 806; AVX-LABEL: load_f64_v4f64_undef: 807; AVX: # %bb.0: 808; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 809; AVX-NEXT: retq 810; 811; X86AVX2-LABEL: load_f64_v4f64_undef: 812; X86AVX2: # %bb.0: 813; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 814; X86AVX2-NEXT: vbroadcastsd (%eax), %ymm0 815; X86AVX2-NEXT: retl 816 %x = load double, double* %p 817 %ins = insertelement <4 x double> undef, double %x, i32 %y 818 ret <4 x double> %ins 819} 820 821; 822; Insertion into arg vectors 823; 824 825define <16 x i8> @arg_i8_v16i8(<16 x i8> %v, i8 %x, i32 %y) nounwind { 826; SSE-LABEL: arg_i8_v16i8: 827; SSE: # %bb.0: 828; SSE-NEXT: # kill: def $esi killed $esi def $rsi 829; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 830; SSE-NEXT: andl $15, %esi 831; SSE-NEXT: movb %dil, -24(%rsp,%rsi) 832; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 833; SSE-NEXT: retq 834; 835; AVX1OR2-LABEL: arg_i8_v16i8: 836; AVX1OR2: # %bb.0: 837; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi 838; AVX1OR2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 839; AVX1OR2-NEXT: andl $15, %esi 840; AVX1OR2-NEXT: movb %dil, -24(%rsp,%rsi) 841; AVX1OR2-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 842; AVX1OR2-NEXT: retq 843; 844; AVX512F-LABEL: arg_i8_v16i8: 845; AVX512F: # %bb.0: 846; AVX512F-NEXT: # kill: def $esi killed $esi def $rsi 847; AVX512F-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 848; AVX512F-NEXT: andl $15, %esi 849; AVX512F-NEXT: movb %dil, -24(%rsp,%rsi) 850; AVX512F-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 851; AVX512F-NEXT: retq 852; 853; AVX512BW-LABEL: arg_i8_v16i8: 854; AVX512BW: # %bb.0: 855; AVX512BW-NEXT: vpbroadcastb %esi, %xmm1 856; AVX512BW-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 857; AVX512BW-NEXT: vpbroadcastb %edi, %xmm0 {%k1} 858; AVX512BW-NEXT: retq 859; 860; X86AVX2-LABEL: arg_i8_v16i8: 861; X86AVX2: # %bb.0: 862; X86AVX2-NEXT: pushl %ebp 863; X86AVX2-NEXT: movl %esp, %ebp 864; X86AVX2-NEXT: andl $-16, %esp 865; X86AVX2-NEXT: subl $32, %esp 866; X86AVX2-NEXT: movl 12(%ebp), %eax 867; X86AVX2-NEXT: andl $15, %eax 868; X86AVX2-NEXT: movb 8(%ebp), %cl 869; X86AVX2-NEXT: vmovaps %xmm0, (%esp) 870; X86AVX2-NEXT: movb %cl, (%esp,%eax) 871; X86AVX2-NEXT: vmovaps (%esp), %xmm0 872; X86AVX2-NEXT: movl %ebp, %esp 873; X86AVX2-NEXT: popl %ebp 874; X86AVX2-NEXT: retl 875 %ins = insertelement <16 x i8> %v, i8 %x, i32 %y 876 ret <16 x i8> %ins 877} 878 879define <8 x i16> @arg_i16_v8i16(<8 x i16> %v, i16 %x, i32 %y) nounwind { 880; SSE-LABEL: arg_i16_v8i16: 881; SSE: # %bb.0: 882; SSE-NEXT: # kill: def $esi killed $esi def $rsi 883; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 884; SSE-NEXT: andl $7, %esi 885; SSE-NEXT: movw %di, -24(%rsp,%rsi,2) 886; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 887; SSE-NEXT: retq 888; 889; AVX1OR2-LABEL: arg_i16_v8i16: 890; AVX1OR2: # %bb.0: 891; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi 892; AVX1OR2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 893; AVX1OR2-NEXT: andl $7, %esi 894; AVX1OR2-NEXT: movw %di, -24(%rsp,%rsi,2) 895; AVX1OR2-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 896; AVX1OR2-NEXT: retq 897; 898; AVX512F-LABEL: arg_i16_v8i16: 899; AVX512F: # %bb.0: 900; AVX512F-NEXT: # kill: def $esi killed $esi def $rsi 901; AVX512F-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 902; AVX512F-NEXT: andl $7, %esi 903; AVX512F-NEXT: movw %di, -24(%rsp,%rsi,2) 904; AVX512F-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 905; AVX512F-NEXT: retq 906; 907; AVX512BW-LABEL: arg_i16_v8i16: 908; AVX512BW: # %bb.0: 909; AVX512BW-NEXT: vpbroadcastw %esi, %xmm1 910; AVX512BW-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 911; AVX512BW-NEXT: vpbroadcastw %edi, %xmm0 {%k1} 912; AVX512BW-NEXT: retq 913; 914; X86AVX2-LABEL: arg_i16_v8i16: 915; X86AVX2: # %bb.0: 916; X86AVX2-NEXT: pushl %ebp 917; X86AVX2-NEXT: movl %esp, %ebp 918; X86AVX2-NEXT: andl $-16, %esp 919; X86AVX2-NEXT: subl $32, %esp 920; X86AVX2-NEXT: movl 12(%ebp), %eax 921; X86AVX2-NEXT: andl $7, %eax 922; X86AVX2-NEXT: movzwl 8(%ebp), %ecx 923; X86AVX2-NEXT: vmovaps %xmm0, (%esp) 924; X86AVX2-NEXT: movw %cx, (%esp,%eax,2) 925; X86AVX2-NEXT: vmovaps (%esp), %xmm0 926; X86AVX2-NEXT: movl %ebp, %esp 927; X86AVX2-NEXT: popl %ebp 928; X86AVX2-NEXT: retl 929 %ins = insertelement <8 x i16> %v, i16 %x, i32 %y 930 ret <8 x i16> %ins 931} 932 933define <4 x i32> @arg_i32_v4i32(<4 x i32> %v, i32 %x, i32 %y) nounwind { 934; SSE-LABEL: arg_i32_v4i32: 935; SSE: # %bb.0: 936; SSE-NEXT: # kill: def $esi killed $esi def $rsi 937; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 938; SSE-NEXT: andl $3, %esi 939; SSE-NEXT: movl %edi, -24(%rsp,%rsi,4) 940; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 941; SSE-NEXT: retq 942; 943; AVX1OR2-LABEL: arg_i32_v4i32: 944; AVX1OR2: # %bb.0: 945; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi 946; AVX1OR2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 947; AVX1OR2-NEXT: andl $3, %esi 948; AVX1OR2-NEXT: movl %edi, -24(%rsp,%rsi,4) 949; AVX1OR2-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 950; AVX1OR2-NEXT: retq 951; 952; AVX512-LABEL: arg_i32_v4i32: 953; AVX512: # %bb.0: 954; AVX512-NEXT: vpbroadcastd %esi, %xmm1 955; AVX512-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 956; AVX512-NEXT: vpbroadcastd %edi, %xmm0 {%k1} 957; AVX512-NEXT: retq 958; 959; X86AVX2-LABEL: arg_i32_v4i32: 960; X86AVX2: # %bb.0: 961; X86AVX2-NEXT: pushl %ebp 962; X86AVX2-NEXT: movl %esp, %ebp 963; X86AVX2-NEXT: andl $-16, %esp 964; X86AVX2-NEXT: subl $32, %esp 965; X86AVX2-NEXT: movl 12(%ebp), %eax 966; X86AVX2-NEXT: andl $3, %eax 967; X86AVX2-NEXT: movl 8(%ebp), %ecx 968; X86AVX2-NEXT: vmovaps %xmm0, (%esp) 969; X86AVX2-NEXT: movl %ecx, (%esp,%eax,4) 970; X86AVX2-NEXT: vmovaps (%esp), %xmm0 971; X86AVX2-NEXT: movl %ebp, %esp 972; X86AVX2-NEXT: popl %ebp 973; X86AVX2-NEXT: retl 974 %ins = insertelement <4 x i32> %v, i32 %x, i32 %y 975 ret <4 x i32> %ins 976} 977 978define <2 x i64> @arg_i64_v2i64(<2 x i64> %v, i64 %x, i32 %y) nounwind { 979; SSE-LABEL: arg_i64_v2i64: 980; SSE: # %bb.0: 981; SSE-NEXT: # kill: def $esi killed $esi def $rsi 982; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 983; SSE-NEXT: andl $1, %esi 984; SSE-NEXT: movq %rdi, -24(%rsp,%rsi,8) 985; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 986; SSE-NEXT: retq 987; 988; AVX1OR2-LABEL: arg_i64_v2i64: 989; AVX1OR2: # %bb.0: 990; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi 991; AVX1OR2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 992; AVX1OR2-NEXT: andl $1, %esi 993; AVX1OR2-NEXT: movq %rdi, -24(%rsp,%rsi,8) 994; AVX1OR2-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 995; AVX1OR2-NEXT: retq 996; 997; AVX512-LABEL: arg_i64_v2i64: 998; AVX512: # %bb.0: 999; AVX512-NEXT: movslq %esi, %rax 1000; AVX512-NEXT: vpbroadcastq %rax, %xmm1 1001; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 1002; AVX512-NEXT: vpbroadcastq %rdi, %xmm0 {%k1} 1003; AVX512-NEXT: retq 1004; 1005; X86AVX2-LABEL: arg_i64_v2i64: 1006; X86AVX2: # %bb.0: 1007; X86AVX2-NEXT: pushl %ebp 1008; X86AVX2-NEXT: movl %esp, %ebp 1009; X86AVX2-NEXT: pushl %esi 1010; X86AVX2-NEXT: andl $-16, %esp 1011; X86AVX2-NEXT: subl $48, %esp 1012; X86AVX2-NEXT: movl 8(%ebp), %eax 1013; X86AVX2-NEXT: movl 12(%ebp), %ecx 1014; X86AVX2-NEXT: movl 16(%ebp), %edx 1015; X86AVX2-NEXT: vmovaps %xmm0, (%esp) 1016; X86AVX2-NEXT: leal (%edx,%edx), %esi 1017; X86AVX2-NEXT: andl $3, %esi 1018; X86AVX2-NEXT: movl %eax, (%esp,%esi,4) 1019; X86AVX2-NEXT: vmovaps (%esp), %xmm0 1020; X86AVX2-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) 1021; X86AVX2-NEXT: leal 1(%edx,%edx), %eax 1022; X86AVX2-NEXT: andl $3, %eax 1023; X86AVX2-NEXT: movl %ecx, 16(%esp,%eax,4) 1024; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0 1025; X86AVX2-NEXT: leal -4(%ebp), %esp 1026; X86AVX2-NEXT: popl %esi 1027; X86AVX2-NEXT: popl %ebp 1028; X86AVX2-NEXT: retl 1029 %ins = insertelement <2 x i64> %v, i64 %x, i32 %y 1030 ret <2 x i64> %ins 1031} 1032 1033define <4 x float> @arg_f32_v4f32(<4 x float> %v, float %x, i32 %y) nounwind { 1034; SSE2-LABEL: arg_f32_v4f32: 1035; SSE2: # %bb.0: 1036; SSE2-NEXT: # kill: def $edi killed $edi def $rdi 1037; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1038; SSE2-NEXT: andl $3, %edi 1039; SSE2-NEXT: movss %xmm1, -24(%rsp,%rdi,4) 1040; SSE2-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 1041; SSE2-NEXT: retq 1042; 1043; SSE41-LABEL: arg_f32_v4f32: 1044; SSE41: # %bb.0: 1045; SSE41-NEXT: movaps %xmm0, %xmm2 1046; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0] 1047; SSE41-NEXT: movd %edi, %xmm0 1048; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 1049; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1050; SSE41-NEXT: blendvps %xmm0, %xmm1, %xmm2 1051; SSE41-NEXT: movaps %xmm2, %xmm0 1052; SSE41-NEXT: retq 1053; 1054; AVX1-LABEL: arg_f32_v4f32: 1055; AVX1: # %bb.0: 1056; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,0,0] 1057; AVX1-NEXT: vmovd %edi, %xmm2 1058; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] 1059; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1060; AVX1-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 1061; AVX1-NEXT: retq 1062; 1063; AVX2-LABEL: arg_f32_v4f32: 1064; AVX2: # %bb.0: 1065; AVX2-NEXT: vbroadcastss %xmm1, %xmm1 1066; AVX2-NEXT: vmovd %edi, %xmm2 1067; AVX2-NEXT: vpbroadcastd %xmm2, %xmm2 1068; AVX2-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1069; AVX2-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 1070; AVX2-NEXT: retq 1071; 1072; AVX512-LABEL: arg_f32_v4f32: 1073; AVX512: # %bb.0: 1074; AVX512-NEXT: vpbroadcastd %edi, %xmm2 1075; AVX512-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %k1 1076; AVX512-NEXT: vbroadcastss %xmm1, %xmm0 {%k1} 1077; AVX512-NEXT: retq 1078; 1079; X86AVX2-LABEL: arg_f32_v4f32: 1080; X86AVX2: # %bb.0: 1081; X86AVX2-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm1 1082; X86AVX2-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 1083; X86AVX2-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm2 1084; X86AVX2-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0 1085; X86AVX2-NEXT: retl 1086 %ins = insertelement <4 x float> %v, float %x, i32 %y 1087 ret <4 x float> %ins 1088} 1089 1090define <2 x double> @arg_f64_v2f64(<2 x double> %v, double %x, i32 %y) nounwind { 1091; SSE2-LABEL: arg_f64_v2f64: 1092; SSE2: # %bb.0: 1093; SSE2-NEXT: # kill: def $edi killed $edi def $rdi 1094; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1095; SSE2-NEXT: andl $1, %edi 1096; SSE2-NEXT: movsd %xmm1, -24(%rsp,%rdi,8) 1097; SSE2-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 1098; SSE2-NEXT: retq 1099; 1100; SSE41-LABEL: arg_f64_v2f64: 1101; SSE41: # %bb.0: 1102; SSE41-NEXT: movapd %xmm0, %xmm2 1103; SSE41-NEXT: movddup {{.*#+}} xmm1 = xmm1[0,0] 1104; SSE41-NEXT: movslq %edi, %rax 1105; SSE41-NEXT: movq %rax, %xmm0 1106; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1107; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1108; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 1109; SSE41-NEXT: movapd %xmm2, %xmm0 1110; SSE41-NEXT: retq 1111; 1112; AVX1-LABEL: arg_f64_v2f64: 1113; AVX1: # %bb.0: 1114; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] 1115; AVX1-NEXT: movslq %edi, %rax 1116; AVX1-NEXT: vmovq %rax, %xmm2 1117; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] 1118; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1119; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 1120; AVX1-NEXT: retq 1121; 1122; AVX2-LABEL: arg_f64_v2f64: 1123; AVX2: # %bb.0: 1124; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] 1125; AVX2-NEXT: movslq %edi, %rax 1126; AVX2-NEXT: vmovq %rax, %xmm2 1127; AVX2-NEXT: vpbroadcastq %xmm2, %xmm2 1128; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1129; AVX2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 1130; AVX2-NEXT: retq 1131; 1132; AVX512-LABEL: arg_f64_v2f64: 1133; AVX512: # %bb.0: 1134; AVX512-NEXT: movslq %edi, %rax 1135; AVX512-NEXT: vpbroadcastq %rax, %xmm2 1136; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %k1 1137; AVX512-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0] 1138; AVX512-NEXT: retq 1139; 1140; X86AVX2-LABEL: arg_f64_v2f64: 1141; X86AVX2: # %bb.0: 1142; X86AVX2-NEXT: pushl %ebp 1143; X86AVX2-NEXT: movl %esp, %ebp 1144; X86AVX2-NEXT: andl $-16, %esp 1145; X86AVX2-NEXT: subl $32, %esp 1146; X86AVX2-NEXT: movl 16(%ebp), %eax 1147; X86AVX2-NEXT: andl $1, %eax 1148; X86AVX2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 1149; X86AVX2-NEXT: vmovaps %xmm0, (%esp) 1150; X86AVX2-NEXT: vmovsd %xmm1, (%esp,%eax,8) 1151; X86AVX2-NEXT: vmovaps (%esp), %xmm0 1152; X86AVX2-NEXT: movl %ebp, %esp 1153; X86AVX2-NEXT: popl %ebp 1154; X86AVX2-NEXT: retl 1155 %ins = insertelement <2 x double> %v, double %x, i32 %y 1156 ret <2 x double> %ins 1157} 1158 1159define <16 x i8> @load_i8_v16i8(<16 x i8> %v, i8* %p, i32 %y) nounwind { 1160; SSE-LABEL: load_i8_v16i8: 1161; SSE: # %bb.0: 1162; SSE-NEXT: # kill: def $esi killed $esi def $rsi 1163; SSE-NEXT: movb (%rdi), %al 1164; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1165; SSE-NEXT: andl $15, %esi 1166; SSE-NEXT: movb %al, -24(%rsp,%rsi) 1167; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 1168; SSE-NEXT: retq 1169; 1170; AVX1OR2-LABEL: load_i8_v16i8: 1171; AVX1OR2: # %bb.0: 1172; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi 1173; AVX1OR2-NEXT: movb (%rdi), %al 1174; AVX1OR2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 1175; AVX1OR2-NEXT: andl $15, %esi 1176; AVX1OR2-NEXT: movb %al, -24(%rsp,%rsi) 1177; AVX1OR2-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 1178; AVX1OR2-NEXT: retq 1179; 1180; AVX512F-LABEL: load_i8_v16i8: 1181; AVX512F: # %bb.0: 1182; AVX512F-NEXT: # kill: def $esi killed $esi def $rsi 1183; AVX512F-NEXT: movb (%rdi), %al 1184; AVX512F-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 1185; AVX512F-NEXT: andl $15, %esi 1186; AVX512F-NEXT: movb %al, -24(%rsp,%rsi) 1187; AVX512F-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 1188; AVX512F-NEXT: retq 1189; 1190; AVX512BW-LABEL: load_i8_v16i8: 1191; AVX512BW: # %bb.0: 1192; AVX512BW-NEXT: vpbroadcastb %esi, %xmm1 1193; AVX512BW-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 1194; AVX512BW-NEXT: vpbroadcastb (%rdi), %xmm0 {%k1} 1195; AVX512BW-NEXT: retq 1196; 1197; X86AVX2-LABEL: load_i8_v16i8: 1198; X86AVX2: # %bb.0: 1199; X86AVX2-NEXT: pushl %ebp 1200; X86AVX2-NEXT: movl %esp, %ebp 1201; X86AVX2-NEXT: andl $-16, %esp 1202; X86AVX2-NEXT: subl $32, %esp 1203; X86AVX2-NEXT: movl 12(%ebp), %eax 1204; X86AVX2-NEXT: andl $15, %eax 1205; X86AVX2-NEXT: movl 8(%ebp), %ecx 1206; X86AVX2-NEXT: movb (%ecx), %cl 1207; X86AVX2-NEXT: vmovaps %xmm0, (%esp) 1208; X86AVX2-NEXT: movb %cl, (%esp,%eax) 1209; X86AVX2-NEXT: vmovaps (%esp), %xmm0 1210; X86AVX2-NEXT: movl %ebp, %esp 1211; X86AVX2-NEXT: popl %ebp 1212; X86AVX2-NEXT: retl 1213 %x = load i8, i8* %p 1214 %ins = insertelement <16 x i8> %v, i8 %x, i32 %y 1215 ret <16 x i8> %ins 1216} 1217 1218define <8 x i16> @load_i16_v8i16(<8 x i16> %v, i16* %p, i32 %y) nounwind { 1219; SSE-LABEL: load_i16_v8i16: 1220; SSE: # %bb.0: 1221; SSE-NEXT: # kill: def $esi killed $esi def $rsi 1222; SSE-NEXT: movzwl (%rdi), %eax 1223; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1224; SSE-NEXT: andl $7, %esi 1225; SSE-NEXT: movw %ax, -24(%rsp,%rsi,2) 1226; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 1227; SSE-NEXT: retq 1228; 1229; AVX1OR2-LABEL: load_i16_v8i16: 1230; AVX1OR2: # %bb.0: 1231; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi 1232; AVX1OR2-NEXT: movzwl (%rdi), %eax 1233; AVX1OR2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 1234; AVX1OR2-NEXT: andl $7, %esi 1235; AVX1OR2-NEXT: movw %ax, -24(%rsp,%rsi,2) 1236; AVX1OR2-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 1237; AVX1OR2-NEXT: retq 1238; 1239; AVX512F-LABEL: load_i16_v8i16: 1240; AVX512F: # %bb.0: 1241; AVX512F-NEXT: # kill: def $esi killed $esi def $rsi 1242; AVX512F-NEXT: movzwl (%rdi), %eax 1243; AVX512F-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 1244; AVX512F-NEXT: andl $7, %esi 1245; AVX512F-NEXT: movw %ax, -24(%rsp,%rsi,2) 1246; AVX512F-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 1247; AVX512F-NEXT: retq 1248; 1249; AVX512BW-LABEL: load_i16_v8i16: 1250; AVX512BW: # %bb.0: 1251; AVX512BW-NEXT: vpbroadcastw %esi, %xmm1 1252; AVX512BW-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 1253; AVX512BW-NEXT: vpbroadcastw (%rdi), %xmm0 {%k1} 1254; AVX512BW-NEXT: retq 1255; 1256; X86AVX2-LABEL: load_i16_v8i16: 1257; X86AVX2: # %bb.0: 1258; X86AVX2-NEXT: pushl %ebp 1259; X86AVX2-NEXT: movl %esp, %ebp 1260; X86AVX2-NEXT: andl $-16, %esp 1261; X86AVX2-NEXT: subl $32, %esp 1262; X86AVX2-NEXT: movl 12(%ebp), %eax 1263; X86AVX2-NEXT: andl $7, %eax 1264; X86AVX2-NEXT: movl 8(%ebp), %ecx 1265; X86AVX2-NEXT: movzwl (%ecx), %ecx 1266; X86AVX2-NEXT: vmovaps %xmm0, (%esp) 1267; X86AVX2-NEXT: movw %cx, (%esp,%eax,2) 1268; X86AVX2-NEXT: vmovaps (%esp), %xmm0 1269; X86AVX2-NEXT: movl %ebp, %esp 1270; X86AVX2-NEXT: popl %ebp 1271; X86AVX2-NEXT: retl 1272 %x = load i16, i16* %p 1273 %ins = insertelement <8 x i16> %v, i16 %x, i32 %y 1274 ret <8 x i16> %ins 1275} 1276 1277define <4 x i32> @load_i32_v4i32(<4 x i32> %v, i32* %p, i32 %y) nounwind { 1278; SSE-LABEL: load_i32_v4i32: 1279; SSE: # %bb.0: 1280; SSE-NEXT: # kill: def $esi killed $esi def $rsi 1281; SSE-NEXT: movl (%rdi), %eax 1282; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1283; SSE-NEXT: andl $3, %esi 1284; SSE-NEXT: movl %eax, -24(%rsp,%rsi,4) 1285; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 1286; SSE-NEXT: retq 1287; 1288; AVX1OR2-LABEL: load_i32_v4i32: 1289; AVX1OR2: # %bb.0: 1290; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi 1291; AVX1OR2-NEXT: movl (%rdi), %eax 1292; AVX1OR2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 1293; AVX1OR2-NEXT: andl $3, %esi 1294; AVX1OR2-NEXT: movl %eax, -24(%rsp,%rsi,4) 1295; AVX1OR2-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 1296; AVX1OR2-NEXT: retq 1297; 1298; AVX512-LABEL: load_i32_v4i32: 1299; AVX512: # %bb.0: 1300; AVX512-NEXT: vpbroadcastd %esi, %xmm1 1301; AVX512-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 1302; AVX512-NEXT: vpbroadcastd (%rdi), %xmm0 {%k1} 1303; AVX512-NEXT: retq 1304; 1305; X86AVX2-LABEL: load_i32_v4i32: 1306; X86AVX2: # %bb.0: 1307; X86AVX2-NEXT: pushl %ebp 1308; X86AVX2-NEXT: movl %esp, %ebp 1309; X86AVX2-NEXT: andl $-16, %esp 1310; X86AVX2-NEXT: subl $32, %esp 1311; X86AVX2-NEXT: movl 12(%ebp), %eax 1312; X86AVX2-NEXT: andl $3, %eax 1313; X86AVX2-NEXT: movl 8(%ebp), %ecx 1314; X86AVX2-NEXT: movl (%ecx), %ecx 1315; X86AVX2-NEXT: vmovaps %xmm0, (%esp) 1316; X86AVX2-NEXT: movl %ecx, (%esp,%eax,4) 1317; X86AVX2-NEXT: vmovaps (%esp), %xmm0 1318; X86AVX2-NEXT: movl %ebp, %esp 1319; X86AVX2-NEXT: popl %ebp 1320; X86AVX2-NEXT: retl 1321 %x = load i32, i32* %p 1322 %ins = insertelement <4 x i32> %v, i32 %x, i32 %y 1323 ret <4 x i32> %ins 1324} 1325 1326define <2 x i64> @load_i64_v2i64(<2 x i64> %v, i64* %p, i32 %y) nounwind { 1327; SSE-LABEL: load_i64_v2i64: 1328; SSE: # %bb.0: 1329; SSE-NEXT: # kill: def $esi killed $esi def $rsi 1330; SSE-NEXT: movq (%rdi), %rax 1331; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1332; SSE-NEXT: andl $1, %esi 1333; SSE-NEXT: movq %rax, -24(%rsp,%rsi,8) 1334; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 1335; SSE-NEXT: retq 1336; 1337; AVX1OR2-LABEL: load_i64_v2i64: 1338; AVX1OR2: # %bb.0: 1339; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi 1340; AVX1OR2-NEXT: movq (%rdi), %rax 1341; AVX1OR2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 1342; AVX1OR2-NEXT: andl $1, %esi 1343; AVX1OR2-NEXT: movq %rax, -24(%rsp,%rsi,8) 1344; AVX1OR2-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 1345; AVX1OR2-NEXT: retq 1346; 1347; AVX512-LABEL: load_i64_v2i64: 1348; AVX512: # %bb.0: 1349; AVX512-NEXT: movslq %esi, %rax 1350; AVX512-NEXT: vpbroadcastq %rax, %xmm1 1351; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 1352; AVX512-NEXT: vpbroadcastq (%rdi), %xmm0 {%k1} 1353; AVX512-NEXT: retq 1354; 1355; X86AVX2-LABEL: load_i64_v2i64: 1356; X86AVX2: # %bb.0: 1357; X86AVX2-NEXT: pushl %ebp 1358; X86AVX2-NEXT: movl %esp, %ebp 1359; X86AVX2-NEXT: pushl %esi 1360; X86AVX2-NEXT: andl $-16, %esp 1361; X86AVX2-NEXT: subl $48, %esp 1362; X86AVX2-NEXT: movl 12(%ebp), %eax 1363; X86AVX2-NEXT: movl 8(%ebp), %ecx 1364; X86AVX2-NEXT: movl (%ecx), %edx 1365; X86AVX2-NEXT: movl 4(%ecx), %ecx 1366; X86AVX2-NEXT: vmovaps %xmm0, (%esp) 1367; X86AVX2-NEXT: leal (%eax,%eax), %esi 1368; X86AVX2-NEXT: andl $3, %esi 1369; X86AVX2-NEXT: movl %edx, (%esp,%esi,4) 1370; X86AVX2-NEXT: vmovaps (%esp), %xmm0 1371; X86AVX2-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) 1372; X86AVX2-NEXT: leal 1(%eax,%eax), %eax 1373; X86AVX2-NEXT: andl $3, %eax 1374; X86AVX2-NEXT: movl %ecx, 16(%esp,%eax,4) 1375; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0 1376; X86AVX2-NEXT: leal -4(%ebp), %esp 1377; X86AVX2-NEXT: popl %esi 1378; X86AVX2-NEXT: popl %ebp 1379; X86AVX2-NEXT: retl 1380 %x = load i64, i64* %p 1381 %ins = insertelement <2 x i64> %v, i64 %x, i32 %y 1382 ret <2 x i64> %ins 1383} 1384 1385define <4 x float> @load_f32_v4f32(<4 x float> %v, float* %p, i32 %y) nounwind { 1386; SSE2-LABEL: load_f32_v4f32: 1387; SSE2: # %bb.0: 1388; SSE2-NEXT: # kill: def $esi killed $esi def $rsi 1389; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1390; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1391; SSE2-NEXT: andl $3, %esi 1392; SSE2-NEXT: movss %xmm1, -24(%rsp,%rsi,4) 1393; SSE2-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 1394; SSE2-NEXT: retq 1395; 1396; SSE41-LABEL: load_f32_v4f32: 1397; SSE41: # %bb.0: 1398; SSE41-NEXT: movaps %xmm0, %xmm1 1399; SSE41-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 1400; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0,0,0] 1401; SSE41-NEXT: movd %esi, %xmm0 1402; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 1403; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1404; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm1 1405; SSE41-NEXT: movaps %xmm1, %xmm0 1406; SSE41-NEXT: retq 1407; 1408; AVX1-LABEL: load_f32_v4f32: 1409; AVX1: # %bb.0: 1410; AVX1-NEXT: vbroadcastss (%rdi), %xmm1 1411; AVX1-NEXT: vmovd %esi, %xmm2 1412; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] 1413; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1414; AVX1-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 1415; AVX1-NEXT: retq 1416; 1417; AVX2-LABEL: load_f32_v4f32: 1418; AVX2: # %bb.0: 1419; AVX2-NEXT: vbroadcastss (%rdi), %xmm1 1420; AVX2-NEXT: vmovd %esi, %xmm2 1421; AVX2-NEXT: vpbroadcastd %xmm2, %xmm2 1422; AVX2-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1423; AVX2-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 1424; AVX2-NEXT: retq 1425; 1426; AVX512-LABEL: load_f32_v4f32: 1427; AVX512: # %bb.0: 1428; AVX512-NEXT: vpbroadcastd %esi, %xmm1 1429; AVX512-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 1430; AVX512-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} 1431; AVX512-NEXT: retq 1432; 1433; X86AVX2-LABEL: load_f32_v4f32: 1434; X86AVX2: # %bb.0: 1435; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 1436; X86AVX2-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm1 1437; X86AVX2-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 1438; X86AVX2-NEXT: vbroadcastss (%eax), %xmm2 1439; X86AVX2-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0 1440; X86AVX2-NEXT: retl 1441 %x = load float, float* %p 1442 %ins = insertelement <4 x float> %v, float %x, i32 %y 1443 ret <4 x float> %ins 1444} 1445 1446define <2 x double> @load_f64_v2f64(<2 x double> %v, double* %p, i32 %y) nounwind { 1447; SSE2-LABEL: load_f64_v2f64: 1448; SSE2: # %bb.0: 1449; SSE2-NEXT: # kill: def $esi killed $esi def $rsi 1450; SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 1451; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1452; SSE2-NEXT: andl $1, %esi 1453; SSE2-NEXT: movsd %xmm1, -24(%rsp,%rsi,8) 1454; SSE2-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 1455; SSE2-NEXT: retq 1456; 1457; SSE41-LABEL: load_f64_v2f64: 1458; SSE41: # %bb.0: 1459; SSE41-NEXT: movapd %xmm0, %xmm1 1460; SSE41-NEXT: movddup {{.*#+}} xmm2 = mem[0,0] 1461; SSE41-NEXT: movslq %esi, %rax 1462; SSE41-NEXT: movq %rax, %xmm0 1463; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1464; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1465; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 1466; SSE41-NEXT: movapd %xmm1, %xmm0 1467; SSE41-NEXT: retq 1468; 1469; AVX1-LABEL: load_f64_v2f64: 1470; AVX1: # %bb.0: 1471; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] 1472; AVX1-NEXT: movslq %esi, %rax 1473; AVX1-NEXT: vmovq %rax, %xmm2 1474; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] 1475; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1476; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 1477; AVX1-NEXT: retq 1478; 1479; AVX2-LABEL: load_f64_v2f64: 1480; AVX2: # %bb.0: 1481; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] 1482; AVX2-NEXT: movslq %esi, %rax 1483; AVX2-NEXT: vmovq %rax, %xmm2 1484; AVX2-NEXT: vpbroadcastq %xmm2, %xmm2 1485; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1486; AVX2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 1487; AVX2-NEXT: retq 1488; 1489; AVX512-LABEL: load_f64_v2f64: 1490; AVX512: # %bb.0: 1491; AVX512-NEXT: movslq %esi, %rax 1492; AVX512-NEXT: vpbroadcastq %rax, %xmm1 1493; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 1494; AVX512-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = mem[0,0] 1495; AVX512-NEXT: retq 1496; 1497; X86AVX2-LABEL: load_f64_v2f64: 1498; X86AVX2: # %bb.0: 1499; X86AVX2-NEXT: pushl %ebp 1500; X86AVX2-NEXT: movl %esp, %ebp 1501; X86AVX2-NEXT: andl $-16, %esp 1502; X86AVX2-NEXT: subl $32, %esp 1503; X86AVX2-NEXT: movl 12(%ebp), %eax 1504; X86AVX2-NEXT: andl $1, %eax 1505; X86AVX2-NEXT: movl 8(%ebp), %ecx 1506; X86AVX2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 1507; X86AVX2-NEXT: vmovaps %xmm0, (%esp) 1508; X86AVX2-NEXT: vmovsd %xmm1, (%esp,%eax,8) 1509; X86AVX2-NEXT: vmovaps (%esp), %xmm0 1510; X86AVX2-NEXT: movl %ebp, %esp 1511; X86AVX2-NEXT: popl %ebp 1512; X86AVX2-NEXT: retl 1513 %x = load double, double* %p 1514 %ins = insertelement <2 x double> %v, double %x, i32 %y 1515 ret <2 x double> %ins 1516} 1517 1518define <32 x i8> @arg_i8_v32i8(<32 x i8> %v, i8 %x, i32 %y) nounwind { 1519; SSE-LABEL: arg_i8_v32i8: 1520; SSE: # %bb.0: 1521; SSE-NEXT: # kill: def $esi killed $esi def $rsi 1522; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) 1523; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1524; SSE-NEXT: andl $31, %esi 1525; SSE-NEXT: movb %dil, -40(%rsp,%rsi) 1526; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 1527; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 1528; SSE-NEXT: retq 1529; 1530; AVX1OR2-LABEL: arg_i8_v32i8: 1531; AVX1OR2: # %bb.0: 1532; AVX1OR2-NEXT: pushq %rbp 1533; AVX1OR2-NEXT: movq %rsp, %rbp 1534; AVX1OR2-NEXT: andq $-32, %rsp 1535; AVX1OR2-NEXT: subq $64, %rsp 1536; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi 1537; AVX1OR2-NEXT: vmovaps %ymm0, (%rsp) 1538; AVX1OR2-NEXT: andl $31, %esi 1539; AVX1OR2-NEXT: movb %dil, (%rsp,%rsi) 1540; AVX1OR2-NEXT: vmovaps (%rsp), %ymm0 1541; AVX1OR2-NEXT: movq %rbp, %rsp 1542; AVX1OR2-NEXT: popq %rbp 1543; AVX1OR2-NEXT: retq 1544; 1545; AVX512F-LABEL: arg_i8_v32i8: 1546; AVX512F: # %bb.0: 1547; AVX512F-NEXT: pushq %rbp 1548; AVX512F-NEXT: movq %rsp, %rbp 1549; AVX512F-NEXT: andq $-32, %rsp 1550; AVX512F-NEXT: subq $64, %rsp 1551; AVX512F-NEXT: # kill: def $esi killed $esi def $rsi 1552; AVX512F-NEXT: vmovaps %ymm0, (%rsp) 1553; AVX512F-NEXT: andl $31, %esi 1554; AVX512F-NEXT: movb %dil, (%rsp,%rsi) 1555; AVX512F-NEXT: vmovaps (%rsp), %ymm0 1556; AVX512F-NEXT: movq %rbp, %rsp 1557; AVX512F-NEXT: popq %rbp 1558; AVX512F-NEXT: retq 1559; 1560; AVX512BW-LABEL: arg_i8_v32i8: 1561; AVX512BW: # %bb.0: 1562; AVX512BW-NEXT: vpbroadcastb %esi, %ymm1 1563; AVX512BW-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1 1564; AVX512BW-NEXT: vpbroadcastb %edi, %ymm0 {%k1} 1565; AVX512BW-NEXT: retq 1566; 1567; X86AVX2-LABEL: arg_i8_v32i8: 1568; X86AVX2: # %bb.0: 1569; X86AVX2-NEXT: pushl %ebp 1570; X86AVX2-NEXT: movl %esp, %ebp 1571; X86AVX2-NEXT: andl $-32, %esp 1572; X86AVX2-NEXT: subl $64, %esp 1573; X86AVX2-NEXT: movl 12(%ebp), %eax 1574; X86AVX2-NEXT: andl $31, %eax 1575; X86AVX2-NEXT: movb 8(%ebp), %cl 1576; X86AVX2-NEXT: vmovaps %ymm0, (%esp) 1577; X86AVX2-NEXT: movb %cl, (%esp,%eax) 1578; X86AVX2-NEXT: vmovaps (%esp), %ymm0 1579; X86AVX2-NEXT: movl %ebp, %esp 1580; X86AVX2-NEXT: popl %ebp 1581; X86AVX2-NEXT: retl 1582 %ins = insertelement <32 x i8> %v, i8 %x, i32 %y 1583 ret <32 x i8> %ins 1584} 1585 1586define <16 x i16> @arg_i16_v16i16(<16 x i16> %v, i16 %x, i32 %y) nounwind { 1587; SSE-LABEL: arg_i16_v16i16: 1588; SSE: # %bb.0: 1589; SSE-NEXT: # kill: def $esi killed $esi def $rsi 1590; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) 1591; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1592; SSE-NEXT: andl $15, %esi 1593; SSE-NEXT: movw %di, -40(%rsp,%rsi,2) 1594; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 1595; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 1596; SSE-NEXT: retq 1597; 1598; AVX1OR2-LABEL: arg_i16_v16i16: 1599; AVX1OR2: # %bb.0: 1600; AVX1OR2-NEXT: pushq %rbp 1601; AVX1OR2-NEXT: movq %rsp, %rbp 1602; AVX1OR2-NEXT: andq $-32, %rsp 1603; AVX1OR2-NEXT: subq $64, %rsp 1604; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi 1605; AVX1OR2-NEXT: vmovaps %ymm0, (%rsp) 1606; AVX1OR2-NEXT: andl $15, %esi 1607; AVX1OR2-NEXT: movw %di, (%rsp,%rsi,2) 1608; AVX1OR2-NEXT: vmovaps (%rsp), %ymm0 1609; AVX1OR2-NEXT: movq %rbp, %rsp 1610; AVX1OR2-NEXT: popq %rbp 1611; AVX1OR2-NEXT: retq 1612; 1613; AVX512F-LABEL: arg_i16_v16i16: 1614; AVX512F: # %bb.0: 1615; AVX512F-NEXT: pushq %rbp 1616; AVX512F-NEXT: movq %rsp, %rbp 1617; AVX512F-NEXT: andq $-32, %rsp 1618; AVX512F-NEXT: subq $64, %rsp 1619; AVX512F-NEXT: # kill: def $esi killed $esi def $rsi 1620; AVX512F-NEXT: vmovaps %ymm0, (%rsp) 1621; AVX512F-NEXT: andl $15, %esi 1622; AVX512F-NEXT: movw %di, (%rsp,%rsi,2) 1623; AVX512F-NEXT: vmovaps (%rsp), %ymm0 1624; AVX512F-NEXT: movq %rbp, %rsp 1625; AVX512F-NEXT: popq %rbp 1626; AVX512F-NEXT: retq 1627; 1628; AVX512BW-LABEL: arg_i16_v16i16: 1629; AVX512BW: # %bb.0: 1630; AVX512BW-NEXT: vpbroadcastw %esi, %ymm1 1631; AVX512BW-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1 1632; AVX512BW-NEXT: vpbroadcastw %edi, %ymm0 {%k1} 1633; AVX512BW-NEXT: retq 1634; 1635; X86AVX2-LABEL: arg_i16_v16i16: 1636; X86AVX2: # %bb.0: 1637; X86AVX2-NEXT: pushl %ebp 1638; X86AVX2-NEXT: movl %esp, %ebp 1639; X86AVX2-NEXT: andl $-32, %esp 1640; X86AVX2-NEXT: subl $64, %esp 1641; X86AVX2-NEXT: movl 12(%ebp), %eax 1642; X86AVX2-NEXT: andl $15, %eax 1643; X86AVX2-NEXT: movzwl 8(%ebp), %ecx 1644; X86AVX2-NEXT: vmovaps %ymm0, (%esp) 1645; X86AVX2-NEXT: movw %cx, (%esp,%eax,2) 1646; X86AVX2-NEXT: vmovaps (%esp), %ymm0 1647; X86AVX2-NEXT: movl %ebp, %esp 1648; X86AVX2-NEXT: popl %ebp 1649; X86AVX2-NEXT: retl 1650 %ins = insertelement <16 x i16> %v, i16 %x, i32 %y 1651 ret <16 x i16> %ins 1652} 1653 1654define <8 x i32> @arg_i32_v8i32(<8 x i32> %v, i32 %x, i32 %y) nounwind { 1655; SSE-LABEL: arg_i32_v8i32: 1656; SSE: # %bb.0: 1657; SSE-NEXT: # kill: def $esi killed $esi def $rsi 1658; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) 1659; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1660; SSE-NEXT: andl $7, %esi 1661; SSE-NEXT: movl %edi, -40(%rsp,%rsi,4) 1662; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 1663; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 1664; SSE-NEXT: retq 1665; 1666; AVX1OR2-LABEL: arg_i32_v8i32: 1667; AVX1OR2: # %bb.0: 1668; AVX1OR2-NEXT: pushq %rbp 1669; AVX1OR2-NEXT: movq %rsp, %rbp 1670; AVX1OR2-NEXT: andq $-32, %rsp 1671; AVX1OR2-NEXT: subq $64, %rsp 1672; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi 1673; AVX1OR2-NEXT: vmovaps %ymm0, (%rsp) 1674; AVX1OR2-NEXT: andl $7, %esi 1675; AVX1OR2-NEXT: movl %edi, (%rsp,%rsi,4) 1676; AVX1OR2-NEXT: vmovaps (%rsp), %ymm0 1677; AVX1OR2-NEXT: movq %rbp, %rsp 1678; AVX1OR2-NEXT: popq %rbp 1679; AVX1OR2-NEXT: retq 1680; 1681; AVX512-LABEL: arg_i32_v8i32: 1682; AVX512: # %bb.0: 1683; AVX512-NEXT: vpbroadcastd %esi, %ymm1 1684; AVX512-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1 1685; AVX512-NEXT: vpbroadcastd %edi, %ymm0 {%k1} 1686; AVX512-NEXT: retq 1687; 1688; X86AVX2-LABEL: arg_i32_v8i32: 1689; X86AVX2: # %bb.0: 1690; X86AVX2-NEXT: pushl %ebp 1691; X86AVX2-NEXT: movl %esp, %ebp 1692; X86AVX2-NEXT: andl $-32, %esp 1693; X86AVX2-NEXT: subl $64, %esp 1694; X86AVX2-NEXT: movl 12(%ebp), %eax 1695; X86AVX2-NEXT: andl $7, %eax 1696; X86AVX2-NEXT: movl 8(%ebp), %ecx 1697; X86AVX2-NEXT: vmovaps %ymm0, (%esp) 1698; X86AVX2-NEXT: movl %ecx, (%esp,%eax,4) 1699; X86AVX2-NEXT: vmovaps (%esp), %ymm0 1700; X86AVX2-NEXT: movl %ebp, %esp 1701; X86AVX2-NEXT: popl %ebp 1702; X86AVX2-NEXT: retl 1703 %ins = insertelement <8 x i32> %v, i32 %x, i32 %y 1704 ret <8 x i32> %ins 1705} 1706 1707define <4 x i64> @arg_i64_v4i64(<4 x i64> %v, i64 %x, i32 %y) nounwind { 1708; SSE-LABEL: arg_i64_v4i64: 1709; SSE: # %bb.0: 1710; SSE-NEXT: # kill: def $esi killed $esi def $rsi 1711; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) 1712; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1713; SSE-NEXT: andl $3, %esi 1714; SSE-NEXT: movq %rdi, -40(%rsp,%rsi,8) 1715; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 1716; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 1717; SSE-NEXT: retq 1718; 1719; AVX1OR2-LABEL: arg_i64_v4i64: 1720; AVX1OR2: # %bb.0: 1721; AVX1OR2-NEXT: pushq %rbp 1722; AVX1OR2-NEXT: movq %rsp, %rbp 1723; AVX1OR2-NEXT: andq $-32, %rsp 1724; AVX1OR2-NEXT: subq $64, %rsp 1725; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi 1726; AVX1OR2-NEXT: vmovaps %ymm0, (%rsp) 1727; AVX1OR2-NEXT: andl $3, %esi 1728; AVX1OR2-NEXT: movq %rdi, (%rsp,%rsi,8) 1729; AVX1OR2-NEXT: vmovaps (%rsp), %ymm0 1730; AVX1OR2-NEXT: movq %rbp, %rsp 1731; AVX1OR2-NEXT: popq %rbp 1732; AVX1OR2-NEXT: retq 1733; 1734; AVX512-LABEL: arg_i64_v4i64: 1735; AVX512: # %bb.0: 1736; AVX512-NEXT: movslq %esi, %rax 1737; AVX512-NEXT: vpbroadcastq %rax, %ymm1 1738; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1 1739; AVX512-NEXT: vpbroadcastq %rdi, %ymm0 {%k1} 1740; AVX512-NEXT: retq 1741; 1742; X86AVX2-LABEL: arg_i64_v4i64: 1743; X86AVX2: # %bb.0: 1744; X86AVX2-NEXT: pushl %ebp 1745; X86AVX2-NEXT: movl %esp, %ebp 1746; X86AVX2-NEXT: pushl %esi 1747; X86AVX2-NEXT: andl $-32, %esp 1748; X86AVX2-NEXT: subl $96, %esp 1749; X86AVX2-NEXT: movl 8(%ebp), %eax 1750; X86AVX2-NEXT: movl 12(%ebp), %ecx 1751; X86AVX2-NEXT: movl 16(%ebp), %edx 1752; X86AVX2-NEXT: vmovaps %ymm0, (%esp) 1753; X86AVX2-NEXT: leal (%edx,%edx), %esi 1754; X86AVX2-NEXT: andl $7, %esi 1755; X86AVX2-NEXT: movl %eax, (%esp,%esi,4) 1756; X86AVX2-NEXT: vmovaps (%esp), %ymm0 1757; X86AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp) 1758; X86AVX2-NEXT: leal 1(%edx,%edx), %eax 1759; X86AVX2-NEXT: andl $7, %eax 1760; X86AVX2-NEXT: movl %ecx, 32(%esp,%eax,4) 1761; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %ymm0 1762; X86AVX2-NEXT: leal -4(%ebp), %esp 1763; X86AVX2-NEXT: popl %esi 1764; X86AVX2-NEXT: popl %ebp 1765; X86AVX2-NEXT: retl 1766 %ins = insertelement <4 x i64> %v, i64 %x, i32 %y 1767 ret <4 x i64> %ins 1768} 1769 1770define <8 x float> @arg_f32_v8f32(<8 x float> %v, float %x, i32 %y) nounwind { 1771; SSE-LABEL: arg_f32_v8f32: 1772; SSE: # %bb.0: 1773; SSE-NEXT: # kill: def $edi killed $edi def $rdi 1774; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) 1775; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1776; SSE-NEXT: andl $7, %edi 1777; SSE-NEXT: movss %xmm2, -40(%rsp,%rdi,4) 1778; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 1779; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 1780; SSE-NEXT: retq 1781; 1782; AVX1-LABEL: arg_f32_v8f32: 1783; AVX1: # %bb.0: 1784; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[0,0,0,0] 1785; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 1786; AVX1-NEXT: vmovd %edi, %xmm2 1787; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] 1788; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 1789; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm2, %xmm2 1790; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 1791; AVX1-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 1792; AVX1-NEXT: retq 1793; 1794; AVX2-LABEL: arg_f32_v8f32: 1795; AVX2: # %bb.0: 1796; AVX2-NEXT: vbroadcastss %xmm1, %ymm1 1797; AVX2-NEXT: vmovd %edi, %xmm2 1798; AVX2-NEXT: vpbroadcastd %xmm2, %ymm2 1799; AVX2-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 1800; AVX2-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 1801; AVX2-NEXT: retq 1802; 1803; AVX512-LABEL: arg_f32_v8f32: 1804; AVX512: # %bb.0: 1805; AVX512-NEXT: vpbroadcastd %edi, %ymm2 1806; AVX512-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %k1 1807; AVX512-NEXT: vbroadcastss %xmm1, %ymm0 {%k1} 1808; AVX512-NEXT: retq 1809; 1810; X86AVX2-LABEL: arg_f32_v8f32: 1811; X86AVX2: # %bb.0: 1812; X86AVX2-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %ymm1 1813; X86AVX2-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1 1814; X86AVX2-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm2 1815; X86AVX2-NEXT: vblendvps %ymm1, %ymm2, %ymm0, %ymm0 1816; X86AVX2-NEXT: retl 1817 %ins = insertelement <8 x float> %v, float %x, i32 %y 1818 ret <8 x float> %ins 1819} 1820 1821define <4 x double> @arg_f64_v4f64(<4 x double> %v, double %x, i32 %y) nounwind { 1822; SSE-LABEL: arg_f64_v4f64: 1823; SSE: # %bb.0: 1824; SSE-NEXT: # kill: def $edi killed $edi def $rdi 1825; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) 1826; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1827; SSE-NEXT: andl $3, %edi 1828; SSE-NEXT: movsd %xmm2, -40(%rsp,%rdi,8) 1829; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 1830; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 1831; SSE-NEXT: retq 1832; 1833; AVX1-LABEL: arg_f64_v4f64: 1834; AVX1: # %bb.0: 1835; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] 1836; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 1837; AVX1-NEXT: movslq %edi, %rax 1838; AVX1-NEXT: vmovq %rax, %xmm2 1839; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] 1840; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 1841; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm2, %xmm2 1842; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 1843; AVX1-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 1844; AVX1-NEXT: retq 1845; 1846; AVX2-LABEL: arg_f64_v4f64: 1847; AVX2: # %bb.0: 1848; AVX2-NEXT: vbroadcastsd %xmm1, %ymm1 1849; AVX2-NEXT: movslq %edi, %rax 1850; AVX2-NEXT: vmovq %rax, %xmm2 1851; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2 1852; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 1853; AVX2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 1854; AVX2-NEXT: retq 1855; 1856; AVX512-LABEL: arg_f64_v4f64: 1857; AVX512: # %bb.0: 1858; AVX512-NEXT: movslq %edi, %rax 1859; AVX512-NEXT: vpbroadcastq %rax, %ymm2 1860; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %k1 1861; AVX512-NEXT: vbroadcastsd %xmm1, %ymm0 {%k1} 1862; AVX512-NEXT: retq 1863; 1864; X86AVX2-LABEL: arg_f64_v4f64: 1865; X86AVX2: # %bb.0: 1866; X86AVX2-NEXT: pushl %ebp 1867; X86AVX2-NEXT: movl %esp, %ebp 1868; X86AVX2-NEXT: andl $-32, %esp 1869; X86AVX2-NEXT: subl $64, %esp 1870; X86AVX2-NEXT: movl 16(%ebp), %eax 1871; X86AVX2-NEXT: andl $3, %eax 1872; X86AVX2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 1873; X86AVX2-NEXT: vmovaps %ymm0, (%esp) 1874; X86AVX2-NEXT: vmovsd %xmm1, (%esp,%eax,8) 1875; X86AVX2-NEXT: vmovaps (%esp), %ymm0 1876; X86AVX2-NEXT: movl %ebp, %esp 1877; X86AVX2-NEXT: popl %ebp 1878; X86AVX2-NEXT: retl 1879 %ins = insertelement <4 x double> %v, double %x, i32 %y 1880 ret <4 x double> %ins 1881} 1882 1883define <32 x i8> @load_i8_v32i8(<32 x i8> %v, i8* %p, i32 %y) nounwind { 1884; SSE-LABEL: load_i8_v32i8: 1885; SSE: # %bb.0: 1886; SSE-NEXT: # kill: def $esi killed $esi def $rsi 1887; SSE-NEXT: movb (%rdi), %al 1888; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) 1889; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1890; SSE-NEXT: andl $31, %esi 1891; SSE-NEXT: movb %al, -40(%rsp,%rsi) 1892; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 1893; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 1894; SSE-NEXT: retq 1895; 1896; AVX1OR2-LABEL: load_i8_v32i8: 1897; AVX1OR2: # %bb.0: 1898; AVX1OR2-NEXT: pushq %rbp 1899; AVX1OR2-NEXT: movq %rsp, %rbp 1900; AVX1OR2-NEXT: andq $-32, %rsp 1901; AVX1OR2-NEXT: subq $64, %rsp 1902; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi 1903; AVX1OR2-NEXT: movb (%rdi), %al 1904; AVX1OR2-NEXT: vmovaps %ymm0, (%rsp) 1905; AVX1OR2-NEXT: andl $31, %esi 1906; AVX1OR2-NEXT: movb %al, (%rsp,%rsi) 1907; AVX1OR2-NEXT: vmovaps (%rsp), %ymm0 1908; AVX1OR2-NEXT: movq %rbp, %rsp 1909; AVX1OR2-NEXT: popq %rbp 1910; AVX1OR2-NEXT: retq 1911; 1912; AVX512F-LABEL: load_i8_v32i8: 1913; AVX512F: # %bb.0: 1914; AVX512F-NEXT: pushq %rbp 1915; AVX512F-NEXT: movq %rsp, %rbp 1916; AVX512F-NEXT: andq $-32, %rsp 1917; AVX512F-NEXT: subq $64, %rsp 1918; AVX512F-NEXT: # kill: def $esi killed $esi def $rsi 1919; AVX512F-NEXT: movb (%rdi), %al 1920; AVX512F-NEXT: vmovaps %ymm0, (%rsp) 1921; AVX512F-NEXT: andl $31, %esi 1922; AVX512F-NEXT: movb %al, (%rsp,%rsi) 1923; AVX512F-NEXT: vmovaps (%rsp), %ymm0 1924; AVX512F-NEXT: movq %rbp, %rsp 1925; AVX512F-NEXT: popq %rbp 1926; AVX512F-NEXT: retq 1927; 1928; AVX512BW-LABEL: load_i8_v32i8: 1929; AVX512BW: # %bb.0: 1930; AVX512BW-NEXT: vpbroadcastb %esi, %ymm1 1931; AVX512BW-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1 1932; AVX512BW-NEXT: vpbroadcastb (%rdi), %ymm0 {%k1} 1933; AVX512BW-NEXT: retq 1934; 1935; X86AVX2-LABEL: load_i8_v32i8: 1936; X86AVX2: # %bb.0: 1937; X86AVX2-NEXT: pushl %ebp 1938; X86AVX2-NEXT: movl %esp, %ebp 1939; X86AVX2-NEXT: andl $-32, %esp 1940; X86AVX2-NEXT: subl $64, %esp 1941; X86AVX2-NEXT: movl 12(%ebp), %eax 1942; X86AVX2-NEXT: andl $31, %eax 1943; X86AVX2-NEXT: movl 8(%ebp), %ecx 1944; X86AVX2-NEXT: movb (%ecx), %cl 1945; X86AVX2-NEXT: vmovaps %ymm0, (%esp) 1946; X86AVX2-NEXT: movb %cl, (%esp,%eax) 1947; X86AVX2-NEXT: vmovaps (%esp), %ymm0 1948; X86AVX2-NEXT: movl %ebp, %esp 1949; X86AVX2-NEXT: popl %ebp 1950; X86AVX2-NEXT: retl 1951 %x = load i8, i8* %p 1952 %ins = insertelement <32 x i8> %v, i8 %x, i32 %y 1953 ret <32 x i8> %ins 1954} 1955 1956define <16 x i16> @load_i16_v16i16(<16 x i16> %v, i16* %p, i32 %y) nounwind { 1957; SSE-LABEL: load_i16_v16i16: 1958; SSE: # %bb.0: 1959; SSE-NEXT: # kill: def $esi killed $esi def $rsi 1960; SSE-NEXT: movzwl (%rdi), %eax 1961; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) 1962; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1963; SSE-NEXT: andl $15, %esi 1964; SSE-NEXT: movw %ax, -40(%rsp,%rsi,2) 1965; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 1966; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 1967; SSE-NEXT: retq 1968; 1969; AVX1OR2-LABEL: load_i16_v16i16: 1970; AVX1OR2: # %bb.0: 1971; AVX1OR2-NEXT: pushq %rbp 1972; AVX1OR2-NEXT: movq %rsp, %rbp 1973; AVX1OR2-NEXT: andq $-32, %rsp 1974; AVX1OR2-NEXT: subq $64, %rsp 1975; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi 1976; AVX1OR2-NEXT: movzwl (%rdi), %eax 1977; AVX1OR2-NEXT: vmovaps %ymm0, (%rsp) 1978; AVX1OR2-NEXT: andl $15, %esi 1979; AVX1OR2-NEXT: movw %ax, (%rsp,%rsi,2) 1980; AVX1OR2-NEXT: vmovaps (%rsp), %ymm0 1981; AVX1OR2-NEXT: movq %rbp, %rsp 1982; AVX1OR2-NEXT: popq %rbp 1983; AVX1OR2-NEXT: retq 1984; 1985; AVX512F-LABEL: load_i16_v16i16: 1986; AVX512F: # %bb.0: 1987; AVX512F-NEXT: pushq %rbp 1988; AVX512F-NEXT: movq %rsp, %rbp 1989; AVX512F-NEXT: andq $-32, %rsp 1990; AVX512F-NEXT: subq $64, %rsp 1991; AVX512F-NEXT: # kill: def $esi killed $esi def $rsi 1992; AVX512F-NEXT: movzwl (%rdi), %eax 1993; AVX512F-NEXT: vmovaps %ymm0, (%rsp) 1994; AVX512F-NEXT: andl $15, %esi 1995; AVX512F-NEXT: movw %ax, (%rsp,%rsi,2) 1996; AVX512F-NEXT: vmovaps (%rsp), %ymm0 1997; AVX512F-NEXT: movq %rbp, %rsp 1998; AVX512F-NEXT: popq %rbp 1999; AVX512F-NEXT: retq 2000; 2001; AVX512BW-LABEL: load_i16_v16i16: 2002; AVX512BW: # %bb.0: 2003; AVX512BW-NEXT: vpbroadcastw %esi, %ymm1 2004; AVX512BW-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1 2005; AVX512BW-NEXT: vpbroadcastw (%rdi), %ymm0 {%k1} 2006; AVX512BW-NEXT: retq 2007; 2008; X86AVX2-LABEL: load_i16_v16i16: 2009; X86AVX2: # %bb.0: 2010; X86AVX2-NEXT: pushl %ebp 2011; X86AVX2-NEXT: movl %esp, %ebp 2012; X86AVX2-NEXT: andl $-32, %esp 2013; X86AVX2-NEXT: subl $64, %esp 2014; X86AVX2-NEXT: movl 12(%ebp), %eax 2015; X86AVX2-NEXT: andl $15, %eax 2016; X86AVX2-NEXT: movl 8(%ebp), %ecx 2017; X86AVX2-NEXT: movzwl (%ecx), %ecx 2018; X86AVX2-NEXT: vmovaps %ymm0, (%esp) 2019; X86AVX2-NEXT: movw %cx, (%esp,%eax,2) 2020; X86AVX2-NEXT: vmovaps (%esp), %ymm0 2021; X86AVX2-NEXT: movl %ebp, %esp 2022; X86AVX2-NEXT: popl %ebp 2023; X86AVX2-NEXT: retl 2024 %x = load i16, i16* %p 2025 %ins = insertelement <16 x i16> %v, i16 %x, i32 %y 2026 ret <16 x i16> %ins 2027} 2028 2029define <8 x i32> @load_i32_v8i32(<8 x i32> %v, i32* %p, i32 %y) nounwind { 2030; SSE-LABEL: load_i32_v8i32: 2031; SSE: # %bb.0: 2032; SSE-NEXT: # kill: def $esi killed $esi def $rsi 2033; SSE-NEXT: movl (%rdi), %eax 2034; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) 2035; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 2036; SSE-NEXT: andl $7, %esi 2037; SSE-NEXT: movl %eax, -40(%rsp,%rsi,4) 2038; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 2039; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 2040; SSE-NEXT: retq 2041; 2042; AVX1OR2-LABEL: load_i32_v8i32: 2043; AVX1OR2: # %bb.0: 2044; AVX1OR2-NEXT: pushq %rbp 2045; AVX1OR2-NEXT: movq %rsp, %rbp 2046; AVX1OR2-NEXT: andq $-32, %rsp 2047; AVX1OR2-NEXT: subq $64, %rsp 2048; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi 2049; AVX1OR2-NEXT: movl (%rdi), %eax 2050; AVX1OR2-NEXT: vmovaps %ymm0, (%rsp) 2051; AVX1OR2-NEXT: andl $7, %esi 2052; AVX1OR2-NEXT: movl %eax, (%rsp,%rsi,4) 2053; AVX1OR2-NEXT: vmovaps (%rsp), %ymm0 2054; AVX1OR2-NEXT: movq %rbp, %rsp 2055; AVX1OR2-NEXT: popq %rbp 2056; AVX1OR2-NEXT: retq 2057; 2058; AVX512-LABEL: load_i32_v8i32: 2059; AVX512: # %bb.0: 2060; AVX512-NEXT: vpbroadcastd %esi, %ymm1 2061; AVX512-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1 2062; AVX512-NEXT: vpbroadcastd (%rdi), %ymm0 {%k1} 2063; AVX512-NEXT: retq 2064; 2065; X86AVX2-LABEL: load_i32_v8i32: 2066; X86AVX2: # %bb.0: 2067; X86AVX2-NEXT: pushl %ebp 2068; X86AVX2-NEXT: movl %esp, %ebp 2069; X86AVX2-NEXT: andl $-32, %esp 2070; X86AVX2-NEXT: subl $64, %esp 2071; X86AVX2-NEXT: movl 12(%ebp), %eax 2072; X86AVX2-NEXT: andl $7, %eax 2073; X86AVX2-NEXT: movl 8(%ebp), %ecx 2074; X86AVX2-NEXT: movl (%ecx), %ecx 2075; X86AVX2-NEXT: vmovaps %ymm0, (%esp) 2076; X86AVX2-NEXT: movl %ecx, (%esp,%eax,4) 2077; X86AVX2-NEXT: vmovaps (%esp), %ymm0 2078; X86AVX2-NEXT: movl %ebp, %esp 2079; X86AVX2-NEXT: popl %ebp 2080; X86AVX2-NEXT: retl 2081 %x = load i32, i32* %p 2082 %ins = insertelement <8 x i32> %v, i32 %x, i32 %y 2083 ret <8 x i32> %ins 2084} 2085 2086define <4 x i64> @load_i64_v4i64(<4 x i64> %v, i64* %p, i32 %y) nounwind { 2087; SSE-LABEL: load_i64_v4i64: 2088; SSE: # %bb.0: 2089; SSE-NEXT: # kill: def $esi killed $esi def $rsi 2090; SSE-NEXT: movq (%rdi), %rax 2091; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) 2092; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 2093; SSE-NEXT: andl $3, %esi 2094; SSE-NEXT: movq %rax, -40(%rsp,%rsi,8) 2095; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 2096; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 2097; SSE-NEXT: retq 2098; 2099; AVX1OR2-LABEL: load_i64_v4i64: 2100; AVX1OR2: # %bb.0: 2101; AVX1OR2-NEXT: pushq %rbp 2102; AVX1OR2-NEXT: movq %rsp, %rbp 2103; AVX1OR2-NEXT: andq $-32, %rsp 2104; AVX1OR2-NEXT: subq $64, %rsp 2105; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi 2106; AVX1OR2-NEXT: movq (%rdi), %rax 2107; AVX1OR2-NEXT: vmovaps %ymm0, (%rsp) 2108; AVX1OR2-NEXT: andl $3, %esi 2109; AVX1OR2-NEXT: movq %rax, (%rsp,%rsi,8) 2110; AVX1OR2-NEXT: vmovaps (%rsp), %ymm0 2111; AVX1OR2-NEXT: movq %rbp, %rsp 2112; AVX1OR2-NEXT: popq %rbp 2113; AVX1OR2-NEXT: retq 2114; 2115; AVX512-LABEL: load_i64_v4i64: 2116; AVX512: # %bb.0: 2117; AVX512-NEXT: movslq %esi, %rax 2118; AVX512-NEXT: vpbroadcastq %rax, %ymm1 2119; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1 2120; AVX512-NEXT: vpbroadcastq (%rdi), %ymm0 {%k1} 2121; AVX512-NEXT: retq 2122; 2123; X86AVX2-LABEL: load_i64_v4i64: 2124; X86AVX2: # %bb.0: 2125; X86AVX2-NEXT: pushl %ebp 2126; X86AVX2-NEXT: movl %esp, %ebp 2127; X86AVX2-NEXT: pushl %esi 2128; X86AVX2-NEXT: andl $-32, %esp 2129; X86AVX2-NEXT: subl $96, %esp 2130; X86AVX2-NEXT: movl 12(%ebp), %eax 2131; X86AVX2-NEXT: movl 8(%ebp), %ecx 2132; X86AVX2-NEXT: movl (%ecx), %edx 2133; X86AVX2-NEXT: movl 4(%ecx), %ecx 2134; X86AVX2-NEXT: vmovaps %ymm0, (%esp) 2135; X86AVX2-NEXT: leal (%eax,%eax), %esi 2136; X86AVX2-NEXT: andl $7, %esi 2137; X86AVX2-NEXT: movl %edx, (%esp,%esi,4) 2138; X86AVX2-NEXT: vmovaps (%esp), %ymm0 2139; X86AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp) 2140; X86AVX2-NEXT: leal 1(%eax,%eax), %eax 2141; X86AVX2-NEXT: andl $7, %eax 2142; X86AVX2-NEXT: movl %ecx, 32(%esp,%eax,4) 2143; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %ymm0 2144; X86AVX2-NEXT: leal -4(%ebp), %esp 2145; X86AVX2-NEXT: popl %esi 2146; X86AVX2-NEXT: popl %ebp 2147; X86AVX2-NEXT: retl 2148 %x = load i64, i64* %p 2149 %ins = insertelement <4 x i64> %v, i64 %x, i32 %y 2150 ret <4 x i64> %ins 2151} 2152 2153define <8 x float> @load_f32_v8f32(<8 x float> %v, float* %p, i32 %y) nounwind { 2154; SSE-LABEL: load_f32_v8f32: 2155; SSE: # %bb.0: 2156; SSE-NEXT: # kill: def $esi killed $esi def $rsi 2157; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 2158; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) 2159; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 2160; SSE-NEXT: andl $7, %esi 2161; SSE-NEXT: movss %xmm2, -40(%rsp,%rsi,4) 2162; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 2163; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 2164; SSE-NEXT: retq 2165; 2166; AVX1-LABEL: load_f32_v8f32: 2167; AVX1: # %bb.0: 2168; AVX1-NEXT: vmovd %esi, %xmm1 2169; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 2170; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 2171; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm1, %xmm1 2172; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 2173; AVX1-NEXT: vbroadcastss (%rdi), %ymm2 2174; AVX1-NEXT: vblendvps %ymm1, %ymm2, %ymm0, %ymm0 2175; AVX1-NEXT: retq 2176; 2177; AVX2-LABEL: load_f32_v8f32: 2178; AVX2: # %bb.0: 2179; AVX2-NEXT: vbroadcastss (%rdi), %ymm1 2180; AVX2-NEXT: vmovd %esi, %xmm2 2181; AVX2-NEXT: vpbroadcastd %xmm2, %ymm2 2182; AVX2-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 2183; AVX2-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 2184; AVX2-NEXT: retq 2185; 2186; AVX512-LABEL: load_f32_v8f32: 2187; AVX512: # %bb.0: 2188; AVX512-NEXT: vpbroadcastd %esi, %ymm1 2189; AVX512-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1 2190; AVX512-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} 2191; AVX512-NEXT: retq 2192; 2193; X86AVX2-LABEL: load_f32_v8f32: 2194; X86AVX2: # %bb.0: 2195; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 2196; X86AVX2-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %ymm1 2197; X86AVX2-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1 2198; X86AVX2-NEXT: vbroadcastss (%eax), %ymm2 2199; X86AVX2-NEXT: vblendvps %ymm1, %ymm2, %ymm0, %ymm0 2200; X86AVX2-NEXT: retl 2201 %x = load float, float* %p 2202 %ins = insertelement <8 x float> %v, float %x, i32 %y 2203 ret <8 x float> %ins 2204} 2205 2206define <4 x double> @load_f64_v4f64(<4 x double> %v, double* %p, i32 %y) nounwind { 2207; SSE-LABEL: load_f64_v4f64: 2208; SSE: # %bb.0: 2209; SSE-NEXT: # kill: def $esi killed $esi def $rsi 2210; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero 2211; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) 2212; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 2213; SSE-NEXT: andl $3, %esi 2214; SSE-NEXT: movsd %xmm2, -40(%rsp,%rsi,8) 2215; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 2216; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 2217; SSE-NEXT: retq 2218; 2219; AVX1-LABEL: load_f64_v4f64: 2220; AVX1: # %bb.0: 2221; AVX1-NEXT: movslq %esi, %rax 2222; AVX1-NEXT: vmovq %rax, %xmm1 2223; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] 2224; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 2225; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm1, %xmm1 2226; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 2227; AVX1-NEXT: vbroadcastsd (%rdi), %ymm2 2228; AVX1-NEXT: vblendvpd %ymm1, %ymm2, %ymm0, %ymm0 2229; AVX1-NEXT: retq 2230; 2231; AVX2-LABEL: load_f64_v4f64: 2232; AVX2: # %bb.0: 2233; AVX2-NEXT: vbroadcastsd (%rdi), %ymm1 2234; AVX2-NEXT: movslq %esi, %rax 2235; AVX2-NEXT: vmovq %rax, %xmm2 2236; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2 2237; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 2238; AVX2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 2239; AVX2-NEXT: retq 2240; 2241; AVX512-LABEL: load_f64_v4f64: 2242; AVX512: # %bb.0: 2243; AVX512-NEXT: movslq %esi, %rax 2244; AVX512-NEXT: vpbroadcastq %rax, %ymm1 2245; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1 2246; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} 2247; AVX512-NEXT: retq 2248; 2249; X86AVX2-LABEL: load_f64_v4f64: 2250; X86AVX2: # %bb.0: 2251; X86AVX2-NEXT: pushl %ebp 2252; X86AVX2-NEXT: movl %esp, %ebp 2253; X86AVX2-NEXT: andl $-32, %esp 2254; X86AVX2-NEXT: subl $64, %esp 2255; X86AVX2-NEXT: movl 12(%ebp), %eax 2256; X86AVX2-NEXT: andl $3, %eax 2257; X86AVX2-NEXT: movl 8(%ebp), %ecx 2258; X86AVX2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 2259; X86AVX2-NEXT: vmovaps %ymm0, (%esp) 2260; X86AVX2-NEXT: vmovsd %xmm1, (%esp,%eax,8) 2261; X86AVX2-NEXT: vmovaps (%esp), %ymm0 2262; X86AVX2-NEXT: movl %ebp, %esp 2263; X86AVX2-NEXT: popl %ebp 2264; X86AVX2-NEXT: retl 2265 %x = load double, double* %p 2266 %ins = insertelement <4 x double> %v, double %x, i32 %y 2267 ret <4 x double> %ins 2268} 2269 2270; Don't die trying to insert to an invalid index. 2271 2272define i32 @PR44139(<16 x i64>* %p) { 2273; SSE-LABEL: PR44139: 2274; SSE: # %bb.0: 2275; SSE-NEXT: movl (%rdi), %eax 2276; SSE-NEXT: leal 2147483647(%rax), %ecx 2277; SSE-NEXT: testl %eax, %eax 2278; SSE-NEXT: cmovnsl %eax, %ecx 2279; SSE-NEXT: andl $-2147483648, %ecx # imm = 0x80000000 2280; SSE-NEXT: addl %eax, %ecx 2281; SSE-NEXT: # kill: def $eax killed $eax killed $rax 2282; SSE-NEXT: xorl %edx, %edx 2283; SSE-NEXT: divl %ecx 2284; SSE-NEXT: retq 2285; 2286; AVX-LABEL: PR44139: 2287; AVX: # %bb.0: 2288; AVX-NEXT: movl (%rdi), %eax 2289; AVX-NEXT: leal 2147483647(%rax), %ecx 2290; AVX-NEXT: testl %eax, %eax 2291; AVX-NEXT: cmovnsl %eax, %ecx 2292; AVX-NEXT: andl $-2147483648, %ecx # imm = 0x80000000 2293; AVX-NEXT: addl %eax, %ecx 2294; AVX-NEXT: # kill: def $eax killed $eax killed $rax 2295; AVX-NEXT: xorl %edx, %edx 2296; AVX-NEXT: divl %ecx 2297; AVX-NEXT: retq 2298; 2299; X86AVX2-LABEL: PR44139: 2300; X86AVX2: # %bb.0: 2301; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 2302; X86AVX2-NEXT: movl (%eax), %eax 2303; X86AVX2-NEXT: leal 2147483647(%eax), %ecx 2304; X86AVX2-NEXT: testl %eax, %eax 2305; X86AVX2-NEXT: cmovnsl %eax, %ecx 2306; X86AVX2-NEXT: andl $-2147483648, %ecx # imm = 0x80000000 2307; X86AVX2-NEXT: addl %eax, %ecx 2308; X86AVX2-NEXT: xorl %edx, %edx 2309; X86AVX2-NEXT: divl %ecx 2310; X86AVX2-NEXT: retl 2311 %L = load <16 x i64>, <16 x i64>* %p 2312 %E1 = extractelement <16 x i64> %L, i64 0 2313 %tempvector = insertelement <16 x i64> undef, i64 %E1, i32 0 2314 %vector = shufflevector <16 x i64> %tempvector, <16 x i64> undef, <16 x i32> zeroinitializer 2315 %C3 = icmp sgt i64 9223372036854775807, -9223372036854775808 2316 %t0 = trunc <16 x i64> %vector to <16 x i32> 2317 %I4 = insertelement <16 x i64> %vector, i64 %E1, i1 %C3 2318 store <16 x i64> %I4, <16 x i64>* %p 2319 %elt = extractelement <16 x i32> %t0, i32 0 2320 %B = srem i32 %elt, -2147483648 2321 %B9 = udiv i32 %elt, %B 2322 ret i32 %B9 2323} 2324