1; RUN: llc < %s -mcpu=x86-64 -mattr=+avx -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 2; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 3 4target triple = "x86_64-unknown-unknown" 5 6define <4 x double> @shuffle_v4f64_0000(<4 x double> %a, <4 x double> %b) { 7; AVX1-LABEL: shuffle_v4f64_0000: 8; AVX1: # BB#0: 9; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0] 10; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 11; AVX1-NEXT: retq 12; 13; AVX2-LABEL: shuffle_v4f64_0000: 14; AVX2: # BB#0: 15; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 16; AVX2-NEXT: retq 17 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 18 ret <4 x double> %shuffle 19} 20 21define <4 x double> @shuffle_v4f64_0001(<4 x double> %a, <4 x double> %b) { 22; AVX1-LABEL: shuffle_v4f64_0001: 23; AVX1: # BB#0: 24; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0,0] 25; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 26; AVX1-NEXT: retq 27; 28; AVX2-LABEL: shuffle_v4f64_0001: 29; AVX2: # BB#0: 30; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1] 31; AVX2-NEXT: retq 32 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1> 33 ret <4 x double> %shuffle 34} 35 36define <4 x double> @shuffle_v4f64_0020(<4 x double> %a, <4 x double> %b) { 37; AVX1-LABEL: shuffle_v4f64_0020: 38; AVX1: # BB#0: 39; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 40; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 41; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0] 42; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 43; AVX1-NEXT: retq 44; 45; AVX2-LABEL: shuffle_v4f64_0020: 46; AVX2: # BB#0: 47; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0] 48; AVX2-NEXT: retq 49 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0> 50 ret <4 x double> %shuffle 51} 52 53define <4 x double> @shuffle_v4f64_0300(<4 x double> %a, <4 x double> %b) { 54; AVX1-LABEL: shuffle_v4f64_0300: 55; AVX1: # BB#0: 56; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 57; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,1,2,2] 58; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 59; AVX1-NEXT: retq 60; 61; AVX2-LABEL: shuffle_v4f64_0300: 62; AVX2: # BB#0: 63; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0] 64; AVX2-NEXT: retq 65 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0> 66 ret <4 x double> %shuffle 67} 68 69define <4 x double> @shuffle_v4f64_1000(<4 x double> %a, <4 x double> %b) { 70; AVX1-LABEL: shuffle_v4f64_1000: 71; AVX1: # BB#0: 72; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 73; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0] 74; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 75; AVX1-NEXT: retq 76; 77; AVX2-LABEL: shuffle_v4f64_1000: 78; AVX2: # BB#0: 79; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0] 80; AVX2-NEXT: retq 81 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0> 82 ret <4 x double> %shuffle 83} 84 85define <4 x double> @shuffle_v4f64_2200(<4 x double> %a, <4 x double> %b) { 86; AVX1-LABEL: shuffle_v4f64_2200: 87; AVX1: # BB#0: 88; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 89; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2] 90; AVX1-NEXT: retq 91; 92; AVX2-LABEL: shuffle_v4f64_2200: 93; AVX2: # BB#0: 94; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0] 95; AVX2-NEXT: retq 96 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0> 97 ret <4 x double> %shuffle 98} 99 100define <4 x double> @shuffle_v4f64_3330(<4 x double> %a, <4 x double> %b) { 101; AVX1-LABEL: shuffle_v4f64_3330: 102; AVX1: # BB#0: 103; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 104; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[1,1,2,2] 105; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,3,2] 106; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3] 107; AVX1-NEXT: retq 108; 109; AVX2-LABEL: shuffle_v4f64_3330: 110; AVX2: # BB#0: 111; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0] 112; AVX2-NEXT: retq 113 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0> 114 ret <4 x double> %shuffle 115} 116 117define <4 x double> @shuffle_v4f64_3210(<4 x double> %a, <4 x double> %b) { 118; AVX1-LABEL: shuffle_v4f64_3210: 119; AVX1: # BB#0: 120; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 121; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 122; AVX1-NEXT: retq 123; 124; AVX2-LABEL: shuffle_v4f64_3210: 125; AVX2: # BB#0: 126; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0] 127; AVX2-NEXT: retq 128 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 129 ret <4 x double> %shuffle 130} 131 132define <4 x double> @shuffle_v4f64_0023(<4 x double> %a, <4 x double> %b) { 133; ALL-LABEL: shuffle_v4f64_0023: 134; ALL: # BB#0: 135; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3] 136; ALL-NEXT: retq 137 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 3> 138 ret <4 x double> %shuffle 139} 140 141define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) { 142; ALL-LABEL: shuffle_v4f64_0022: 143; ALL: # BB#0: 144; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2] 145; ALL-NEXT: retq 146 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 147 ret <4 x double> %shuffle 148} 149 150define <4 x double> @shuffle_v4f64_1032(<4 x double> %a, <4 x double> %b) { 151; ALL-LABEL: shuffle_v4f64_1032: 152; ALL: # BB#0: 153; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 154; ALL-NEXT: retq 155 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 156 ret <4 x double> %shuffle 157} 158 159define <4 x double> @shuffle_v4f64_1133(<4 x double> %a, <4 x double> %b) { 160; ALL-LABEL: shuffle_v4f64_1133: 161; ALL: # BB#0: 162; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3] 163; ALL-NEXT: retq 164 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3> 165 ret <4 x double> %shuffle 166} 167 168define <4 x double> @shuffle_v4f64_1023(<4 x double> %a, <4 x double> %b) { 169; ALL-LABEL: shuffle_v4f64_1023: 170; ALL: # BB#0: 171; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] 172; ALL-NEXT: retq 173 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 3> 174 ret <4 x double> %shuffle 175} 176 177define <4 x double> @shuffle_v4f64_1022(<4 x double> %a, <4 x double> %b) { 178; ALL-LABEL: shuffle_v4f64_1022: 179; ALL: # BB#0: 180; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2] 181; ALL-NEXT: retq 182 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 2> 183 ret <4 x double> %shuffle 184} 185 186define <4 x double> @shuffle_v4f64_0423(<4 x double> %a, <4 x double> %b) { 187; AVX1-LABEL: shuffle_v4f64_0423: 188; AVX1: # BB#0: 189; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,0,2,2] 190; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3] 191; AVX1-NEXT: retq 192; 193; AVX2-LABEL: shuffle_v4f64_0423: 194; AVX2: # BB#0: 195; AVX2-NEXT: vbroadcastsd %xmm1, %ymm1 196; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3] 197; AVX2-NEXT: retq 198 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 3> 199 ret <4 x double> %shuffle 200} 201 202define <4 x double> @shuffle_v4f64_0462(<4 x double> %a, <4 x double> %b) { 203; ALL-LABEL: shuffle_v4f64_0462: 204; ALL: # BB#0: 205; ALL-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,0,2,2] 206; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2] 207; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] 208; ALL-NEXT: retq 209 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 6, i32 2> 210 ret <4 x double> %shuffle 211} 212 213define <4 x double> @shuffle_v4f64_0426(<4 x double> %a, <4 x double> %b) { 214; ALL-LABEL: shuffle_v4f64_0426: 215; ALL: # BB#0: 216; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 217; ALL-NEXT: retq 218 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 219 ret <4 x double> %shuffle 220} 221 222define <4 x double> @shuffle_v4f64_1537(<4 x double> %a, <4 x double> %b) { 223; ALL-LABEL: shuffle_v4f64_1537: 224; ALL: # BB#0: 225; ALL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 226; ALL-NEXT: retq 227 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 228 ret <4 x double> %shuffle 229} 230 231define <4 x double> @shuffle_v4f64_4062(<4 x double> %a, <4 x double> %b) { 232; ALL-LABEL: shuffle_v4f64_4062: 233; ALL: # BB#0: 234; ALL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2] 235; ALL-NEXT: retq 236 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2> 237 ret <4 x double> %shuffle 238} 239 240define <4 x double> @shuffle_v4f64_5173(<4 x double> %a, <4 x double> %b) { 241; ALL-LABEL: shuffle_v4f64_5173: 242; ALL: # BB#0: 243; ALL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3] 244; ALL-NEXT: retq 245 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 7, i32 3> 246 ret <4 x double> %shuffle 247} 248 249define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) { 250; ALL-LABEL: shuffle_v4f64_5163: 251; ALL: # BB#0: 252; ALL-NEXT: vshufpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[2],ymm0[3] 253; ALL-NEXT: retq 254 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 6, i32 3> 255 ret <4 x double> %shuffle 256} 257 258define <4 x double> @shuffle_v4f64_0527(<4 x double> %a, <4 x double> %b) { 259; ALL-LABEL: shuffle_v4f64_0527: 260; ALL: # BB#0: 261; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3] 262; ALL-NEXT: retq 263 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 264 ret <4 x double> %shuffle 265} 266 267define <4 x double> @shuffle_v4f64_4163(<4 x double> %a, <4 x double> %b) { 268; ALL-LABEL: shuffle_v4f64_4163: 269; ALL: # BB#0: 270; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3] 271; ALL-NEXT: retq 272 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 273 ret <4 x double> %shuffle 274} 275 276define <4 x double> @shuffle_v4f64_0145(<4 x double> %a, <4 x double> %b) { 277; ALL-LABEL: shuffle_v4f64_0145: 278; ALL: # BB#0: 279; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 280; ALL-NEXT: retq 281 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 282 ret <4 x double> %shuffle 283} 284 285define <4 x double> @shuffle_v4f64_4501(<4 x double> %a, <4 x double> %b) { 286; ALL-LABEL: shuffle_v4f64_4501: 287; ALL: # BB#0: 288; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 289; ALL-NEXT: retq 290 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 291 ret <4 x double> %shuffle 292} 293 294define <4 x double> @shuffle_v4f64_0167(<4 x double> %a, <4 x double> %b) { 295; ALL-LABEL: shuffle_v4f64_0167: 296; ALL: # BB#0: 297; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 298; ALL-NEXT: retq 299 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 300 ret <4 x double> %shuffle 301} 302 303define <4 x double> @shuffle_v4f64_1054(<4 x double> %a, <4 x double> %b) { 304; ALL-LABEL: shuffle_v4f64_1054: 305; ALL: # BB#0: 306; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 307; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 308; ALL-NEXT: retq 309 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4> 310 ret <4 x double> %shuffle 311} 312 313define <4 x double> @shuffle_v4f64_3254(<4 x double> %a, <4 x double> %b) { 314; ALL-LABEL: shuffle_v4f64_3254: 315; ALL: # BB#0: 316; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 317; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 318; ALL-NEXT: retq 319 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4> 320 ret <4 x double> %shuffle 321} 322 323define <4 x double> @shuffle_v4f64_3276(<4 x double> %a, <4 x double> %b) { 324; ALL-LABEL: shuffle_v4f64_3276: 325; ALL: # BB#0: 326; ALL-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 327; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 328; ALL-NEXT: retq 329 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6> 330 ret <4 x double> %shuffle 331} 332 333define <4 x double> @shuffle_v4f64_1076(<4 x double> %a, <4 x double> %b) { 334; ALL-LABEL: shuffle_v4f64_1076: 335; ALL: # BB#0: 336; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 337; ALL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 338; ALL-NEXT: retq 339 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6> 340 ret <4 x double> %shuffle 341} 342 343define <4 x double> @shuffle_v4f64_0415(<4 x double> %a, <4 x double> %b) { 344; AVX1-LABEL: shuffle_v4f64_0415: 345; AVX1: # BB#0: 346; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1] 347; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 348; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 349; AVX1-NEXT: retq 350; 351; AVX2-LABEL: shuffle_v4f64_0415: 352; AVX2: # BB#0: 353; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1] 354; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3] 355; AVX2-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3] 356; AVX2-NEXT: retq 357 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 358 ret <4 x double> %shuffle 359} 360 361define <4 x i64> @shuffle_v4i64_0000(<4 x i64> %a, <4 x i64> %b) { 362; AVX1-LABEL: shuffle_v4i64_0000: 363; AVX1: # BB#0: 364; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0] 365; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 366; AVX1-NEXT: retq 367; 368; AVX2-LABEL: shuffle_v4i64_0000: 369; AVX2: # BB#0: 370; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 371; AVX2-NEXT: retq 372 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 373 ret <4 x i64> %shuffle 374} 375 376define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) { 377; AVX1-LABEL: shuffle_v4i64_0001: 378; AVX1: # BB#0: 379; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0,0] 380; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 381; AVX1-NEXT: retq 382; 383; AVX2-LABEL: shuffle_v4i64_0001: 384; AVX2: # BB#0: 385; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1] 386; AVX2-NEXT: retq 387 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1> 388 ret <4 x i64> %shuffle 389} 390 391define <4 x i64> @shuffle_v4i64_0020(<4 x i64> %a, <4 x i64> %b) { 392; AVX1-LABEL: shuffle_v4i64_0020: 393; AVX1: # BB#0: 394; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 395; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 396; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0] 397; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 398; AVX1-NEXT: retq 399; 400; AVX2-LABEL: shuffle_v4i64_0020: 401; AVX2: # BB#0: 402; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0] 403; AVX2-NEXT: retq 404 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0> 405 ret <4 x i64> %shuffle 406} 407 408define <4 x i64> @shuffle_v4i64_0112(<4 x i64> %a, <4 x i64> %b) { 409; AVX1-LABEL: shuffle_v4i64_0112: 410; AVX1: # BB#0: 411; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 412; AVX1-NEXT: vshufpd {{.*#+}} xmm1 = xmm0[1],xmm1[0] 413; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 414; AVX1-NEXT: retq 415; 416; AVX2-LABEL: shuffle_v4i64_0112: 417; AVX2: # BB#0: 418; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2] 419; AVX2-NEXT: retq 420 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 1, i32 2> 421 ret <4 x i64> %shuffle 422} 423 424define <4 x i64> @shuffle_v4i64_0300(<4 x i64> %a, <4 x i64> %b) { 425; AVX1-LABEL: shuffle_v4i64_0300: 426; AVX1: # BB#0: 427; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 428; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,1,2,2] 429; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 430; AVX1-NEXT: retq 431; 432; AVX2-LABEL: shuffle_v4i64_0300: 433; AVX2: # BB#0: 434; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0] 435; AVX2-NEXT: retq 436 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0> 437 ret <4 x i64> %shuffle 438} 439 440define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) { 441; AVX1-LABEL: shuffle_v4i64_1000: 442; AVX1: # BB#0: 443; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 444; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0] 445; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 446; AVX1-NEXT: retq 447; 448; AVX2-LABEL: shuffle_v4i64_1000: 449; AVX2: # BB#0: 450; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0] 451; AVX2-NEXT: retq 452 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0> 453 ret <4 x i64> %shuffle 454} 455 456define <4 x i64> @shuffle_v4i64_2200(<4 x i64> %a, <4 x i64> %b) { 457; AVX1-LABEL: shuffle_v4i64_2200: 458; AVX1: # BB#0: 459; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 460; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,2] 461; AVX1-NEXT: retq 462; 463; AVX2-LABEL: shuffle_v4i64_2200: 464; AVX2: # BB#0: 465; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0] 466; AVX2-NEXT: retq 467 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0> 468 ret <4 x i64> %shuffle 469} 470 471define <4 x i64> @shuffle_v4i64_3330(<4 x i64> %a, <4 x i64> %b) { 472; AVX1-LABEL: shuffle_v4i64_3330: 473; AVX1: # BB#0: 474; AVX1-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1] 475; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[1,1,2,2] 476; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,0,3,2] 477; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2],ymm1[3] 478; AVX1-NEXT: retq 479; 480; AVX2-LABEL: shuffle_v4i64_3330: 481; AVX2: # BB#0: 482; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,3,3,0] 483; AVX2-NEXT: retq 484 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0> 485 ret <4 x i64> %shuffle 486} 487 488define <4 x i64> @shuffle_v4i64_3210(<4 x i64> %a, <4 x i64> %b) { 489; AVX1-LABEL: shuffle_v4i64_3210: 490; AVX1: # BB#0: 491; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1] 492; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 493; AVX1-NEXT: retq 494; 495; AVX2-LABEL: shuffle_v4i64_3210: 496; AVX2: # BB#0: 497; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0] 498; AVX2-NEXT: retq 499 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 500 ret <4 x i64> %shuffle 501} 502 503define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) { 504; AVX1-LABEL: shuffle_v4i64_0124: 505; AVX1: # BB#0: 506; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0,0] 507; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 508; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3] 509; AVX1-NEXT: retq 510; 511; AVX2-LABEL: shuffle_v4i64_0124: 512; AVX2: # BB#0: 513; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1 514; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7] 515; AVX2-NEXT: retq 516 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4> 517 ret <4 x i64> %shuffle 518} 519 520define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) { 521; AVX1-LABEL: shuffle_v4i64_0142: 522; AVX1: # BB#0: 523; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 524; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[0,1,2,2] 525; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3] 526; AVX1-NEXT: retq 527; 528; AVX2-LABEL: shuffle_v4i64_0142: 529; AVX2: # BB#0: 530; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm1 531; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,2] 532; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7] 533; AVX2-NEXT: retq 534 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2> 535 ret <4 x i64> %shuffle 536} 537 538define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) { 539; AVX1-LABEL: shuffle_v4i64_0412: 540; AVX1: # BB#0: 541; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 542; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0] 543; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 544; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm1[0,0,2,2] 545; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3] 546; AVX1-NEXT: retq 547; 548; AVX2-LABEL: shuffle_v4i64_0412: 549; AVX2: # BB#0: 550; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2] 551; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1 552; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7] 553; AVX2-NEXT: retq 554 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2> 555 ret <4 x i64> %shuffle 556} 557 558define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) { 559; AVX1-LABEL: shuffle_v4i64_4012: 560; AVX1: # BB#0: 561; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 562; AVX1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0] 563; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0] 564; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 565; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3] 566; AVX1-NEXT: retq 567; 568; AVX2-LABEL: shuffle_v4i64_4012: 569; AVX2: # BB#0: 570; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,2] 571; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7] 572; AVX2-NEXT: retq 573 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2> 574 ret <4 x i64> %shuffle 575} 576 577define <4 x i64> @shuffle_v4i64_0145(<4 x i64> %a, <4 x i64> %b) { 578; ALL-LABEL: shuffle_v4i64_0145: 579; ALL: # BB#0: 580; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 581; ALL-NEXT: retq 582 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 583 ret <4 x i64> %shuffle 584} 585 586define <4 x i64> @shuffle_v4i64_0451(<4 x i64> %a, <4 x i64> %b) { 587; AVX1-LABEL: shuffle_v4i64_0451: 588; AVX1: # BB#0: 589; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm1[1],xmm0[1] 590; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 591; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 592; AVX1-NEXT: retq 593; 594; AVX2-LABEL: shuffle_v4i64_0451: 595; AVX2: # BB#0: 596; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 597; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,1,3] 598; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7] 599; AVX2-NEXT: retq 600 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1> 601 ret <4 x i64> %shuffle 602} 603 604define <4 x i64> @shuffle_v4i64_4501(<4 x i64> %a, <4 x i64> %b) { 605; ALL-LABEL: shuffle_v4i64_4501: 606; ALL: # BB#0: 607; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 608; ALL-NEXT: retq 609 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 610 ret <4 x i64> %shuffle 611} 612 613define <4 x i64> @shuffle_v4i64_4015(<4 x i64> %a, <4 x i64> %b) { 614; AVX1-LABEL: shuffle_v4i64_4015: 615; AVX1: # BB#0: 616; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1] 617; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 618; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 619; AVX1-NEXT: retq 620; 621; AVX2-LABEL: shuffle_v4i64_4015: 622; AVX2: # BB#0: 623; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm1 624; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,1,3] 625; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7] 626; AVX2-NEXT: retq 627 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5> 628 ret <4 x i64> %shuffle 629} 630 631define <4 x i64> @shuffle_v4i64_2u35(<4 x i64> %a, <4 x i64> %b) { 632; AVX1-LABEL: shuffle_v4i64_2u35: 633; AVX1: # BB#0: 634; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 635; AVX1-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm0[1],xmm1[1] 636; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 637; AVX1-NEXT: retq 638; 639; AVX2-LABEL: shuffle_v4i64_2u35: 640; AVX2: # BB#0: 641; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm1 642; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,1,3,3] 643; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7] 644; AVX2-NEXT: retq 645 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 undef, i32 3, i32 5> 646 ret <4 x i64> %shuffle 647} 648 649define <4 x i64> @shuffle_v4i64_1251(<4 x i64> %a, <4 x i64> %b) { 650; AVX1-LABEL: shuffle_v4i64_1251: 651; AVX1: # BB#0: 652; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1] 653; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm2[0],ymm0[2],ymm2[3] 654; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 655; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 656; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3] 657; AVX1-NEXT: retq 658; 659; AVX2-LABEL: shuffle_v4i64_1251: 660; AVX2: # BB#0: 661; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,1,3] 662; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,2,1] 663; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7] 664; AVX2-NEXT: retq 665 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 1> 666 ret <4 x i64> %shuffle 667} 668 669define <4 x i64> @shuffle_v4i64_1054(<4 x i64> %a, <4 x i64> %b) { 670; AVX1-LABEL: shuffle_v4i64_1054: 671; AVX1: # BB#0: 672; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 673; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 674; AVX1-NEXT: retq 675; 676; AVX2-LABEL: shuffle_v4i64_1054: 677; AVX2: # BB#0: 678; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 679; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 680; AVX2-NEXT: retq 681 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4> 682 ret <4 x i64> %shuffle 683} 684 685define <4 x i64> @shuffle_v4i64_3254(<4 x i64> %a, <4 x i64> %b) { 686; AVX1-LABEL: shuffle_v4i64_3254: 687; AVX1: # BB#0: 688; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 689; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 690; AVX1-NEXT: retq 691; 692; AVX2-LABEL: shuffle_v4i64_3254: 693; AVX2: # BB#0: 694; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1] 695; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 696; AVX2-NEXT: retq 697 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4> 698 ret <4 x i64> %shuffle 699} 700 701define <4 x i64> @shuffle_v4i64_3276(<4 x i64> %a, <4 x i64> %b) { 702; AVX1-LABEL: shuffle_v4i64_3276: 703; AVX1: # BB#0: 704; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 705; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 706; AVX1-NEXT: retq 707; 708; AVX2-LABEL: shuffle_v4i64_3276: 709; AVX2: # BB#0: 710; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3] 711; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 712; AVX2-NEXT: retq 713 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6> 714 ret <4 x i64> %shuffle 715} 716 717define <4 x i64> @shuffle_v4i64_1076(<4 x i64> %a, <4 x i64> %b) { 718; AVX1-LABEL: shuffle_v4i64_1076: 719; AVX1: # BB#0: 720; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3] 721; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] 722; AVX1-NEXT: retq 723; 724; AVX2-LABEL: shuffle_v4i64_1076: 725; AVX2: # BB#0: 726; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 727; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5] 728; AVX2-NEXT: retq 729 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6> 730 ret <4 x i64> %shuffle 731} 732 733define <4 x i64> @shuffle_v4i64_0415(<4 x i64> %a, <4 x i64> %b) { 734; AVX1-LABEL: shuffle_v4i64_0415: 735; AVX1: # BB#0: 736; AVX1-NEXT: vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1] 737; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 738; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 739; AVX1-NEXT: retq 740; 741; AVX2-LABEL: shuffle_v4i64_0415: 742; AVX2: # BB#0: 743; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1] 744; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3] 745; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7] 746; AVX2-NEXT: retq 747 %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 748 ret <4 x i64> %shuffle 749} 750 751define <4 x i64> @stress_test1(<4 x i64> %a, <4 x i64> %b) { 752; AVX1-LABEL: stress_test1: 753; AVX1: # BB#0: 754; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm1[2,3,0,1] 755; AVX1-NEXT: vpermilpd {{.*#+}} ymm2 = ymm0[1,0,3,2] 756; AVX1-NEXT: vblendpd {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2,3] 757; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2] 758; AVX1-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 759; AVX1-NEXT: retq 760; 761; AVX2-LABEL: stress_test1: 762; AVX2: # BB#0: 763; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm1[3,1,1,0] 764; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[3,3,1,3] 765; AVX2-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3] 766; AVX2-NEXT: retq 767 %c = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> <i32 3, i32 1, i32 1, i32 0> 768 %d = shufflevector <4 x i64> %c, <4 x i64> undef, <4 x i32> <i32 3, i32 undef, i32 2, i32 undef> 769 %e = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 undef> 770 %f = shufflevector <4 x i64> %d, <4 x i64> %e, <4 x i32> <i32 5, i32 1, i32 1, i32 0> 771 772 ret <4 x i64> %f 773} 774 775define <4 x i64> @insert_reg_and_zero_v4i64(i64 %a) { 776; AVX1-LABEL: insert_reg_and_zero_v4i64: 777; AVX1: # BB#0: 778; AVX1-NEXT: vmovq %rdi, %xmm0 779; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1 780; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 781; AVX1-NEXT: retq 782; 783; AVX2-LABEL: insert_reg_and_zero_v4i64: 784; AVX2: # BB#0: 785; AVX2-NEXT: vmovq %rdi, %xmm0 786; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 787; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] 788; AVX2-NEXT: retq 789 %v = insertelement <4 x i64> undef, i64 %a, i64 0 790 %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 791 ret <4 x i64> %shuffle 792} 793 794define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) { 795; AVX1-LABEL: insert_mem_and_zero_v4i64: 796; AVX1: # BB#0: 797; AVX1-NEXT: vmovq (%rdi), %xmm0 798; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1 799; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3] 800; AVX1-NEXT: retq 801; 802; AVX2-LABEL: insert_mem_and_zero_v4i64: 803; AVX2: # BB#0: 804; AVX2-NEXT: vmovq (%rdi), %xmm0 805; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 806; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7] 807; AVX2-NEXT: retq 808 %a = load i64* %ptr 809 %v = insertelement <4 x i64> undef, i64 %a, i64 0 810 %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 811 ret <4 x i64> %shuffle 812} 813 814define <4 x double> @insert_reg_and_zero_v4f64(double %a) { 815; ALL-LABEL: insert_reg_and_zero_v4f64: 816; ALL: # BB#0: 817; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 818; ALL-NEXT: vmovsd %xmm0, %xmm1, %xmm0 819; ALL-NEXT: retq 820 %v = insertelement <4 x double> undef, double %a, i32 0 821 %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 822 ret <4 x double> %shuffle 823} 824 825define <4 x double> @insert_mem_and_zero_v4f64(double* %ptr) { 826; ALL-LABEL: insert_mem_and_zero_v4f64: 827; ALL: # BB#0: 828; ALL-NEXT: vmovsd (%rdi), %xmm0 829; ALL-NEXT: retq 830 %a = load double* %ptr 831 %v = insertelement <4 x double> undef, double %a, i32 0 832 %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7> 833 ret <4 x double> %shuffle 834} 835 836define <4 x double> @splat_mem_v4f64(double* %ptr) { 837; ALL-LABEL: splat_mem_v4f64: 838; ALL: # BB#0: 839; ALL-NEXT: vbroadcastsd (%rdi), %ymm0 840; ALL-NEXT: retq 841 %a = load double* %ptr 842 %v = insertelement <4 x double> undef, double %a, i32 0 843 %shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 844 ret <4 x double> %shuffle 845} 846 847define <4 x i64> @splat_mem_v4i64(i64* %ptr) { 848; AVX1-LABEL: splat_mem_v4i64: 849; AVX1: # BB#0: 850; AVX1-NEXT: vmovddup (%rdi), %xmm0 851; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 852; AVX1-NEXT: retq 853; 854; AVX2-LABEL: splat_mem_v4i64: 855; AVX2: # BB#0: 856; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 857; AVX2-NEXT: retq 858 %a = load i64* %ptr 859 %v = insertelement <4 x i64> undef, i64 %a, i64 0 860 %shuffle = shufflevector <4 x i64> %v, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 861 ret <4 x i64> %shuffle 862} 863 864define <4 x double> @splat_mem_v4f64_2(double* %p) { 865; ALL-LABEL: splat_mem_v4f64_2: 866; ALL: # BB#0: 867; ALL-NEXT: vbroadcastsd (%rdi), %ymm0 868; ALL-NEXT: retq 869 %1 = load double* %p 870 %2 = insertelement <2 x double> undef, double %1, i32 0 871 %3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> zeroinitializer 872 ret <4 x double> %3 873} 874 875define <4 x double> @splat_v4f64(<2 x double> %r) { 876; AVX1-LABEL: splat_v4f64: 877; AVX1: # BB#0: 878; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0,0] 879; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 880; AVX1-NEXT: retq 881; 882; AVX2-LABEL: splat_v4f64: 883; AVX2: # BB#0: 884; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 885; AVX2-NEXT: retq 886 %1 = shufflevector <2 x double> %r, <2 x double> undef, <4 x i32> zeroinitializer 887 ret <4 x double> %1 888} 889