1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s 3 4define <2 x i64> @extract0_i32_zext_insert0_i64_undef(<4 x i32> %x) { 5; CHECK-LABEL: extract0_i32_zext_insert0_i64_undef: 6; CHECK: // %bb.0: 7; CHECK-NEXT: movi v1.2d, #0000000000000000 8; CHECK-NEXT: zip1 v0.4s, v0.4s, v1.4s 9; CHECK-NEXT: ret 10 %e = extractelement <4 x i32> %x, i32 0 11 %z = zext i32 %e to i64 12 %r = insertelement <2 x i64> undef, i64 %z, i32 0 13 ret <2 x i64> %r 14} 15 16define <2 x i64> @extract0_i32_zext_insert0_i64_zero(<4 x i32> %x) { 17; CHECK-LABEL: extract0_i32_zext_insert0_i64_zero: 18; CHECK: // %bb.0: 19; CHECK-NEXT: fmov w8, s0 20; CHECK-NEXT: movi v0.2d, #0000000000000000 21; CHECK-NEXT: mov v0.d[0], x8 22; CHECK-NEXT: ret 23 %e = extractelement <4 x i32> %x, i32 0 24 %z = zext i32 %e to i64 25 %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0 26 ret <2 x i64> %r 27} 28 29define <2 x i64> @extract1_i32_zext_insert0_i64_undef(<4 x i32> %x) { 30; CHECK-LABEL: extract1_i32_zext_insert0_i64_undef: 31; CHECK: // %bb.0: 32; CHECK-NEXT: zip1 v0.4s, v0.4s, v0.4s 33; CHECK-NEXT: movi v1.2d, #0000000000000000 34; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #12 35; CHECK-NEXT: ret 36 %e = extractelement <4 x i32> %x, i32 1 37 %z = zext i32 %e to i64 38 %r = insertelement <2 x i64> undef, i64 %z, i32 0 39 ret <2 x i64> %r 40} 41 42define <2 x i64> @extract1_i32_zext_insert0_i64_zero(<4 x i32> %x) { 43; CHECK-LABEL: extract1_i32_zext_insert0_i64_zero: 44; CHECK: // %bb.0: 45; CHECK-NEXT: mov w8, v0.s[1] 46; CHECK-NEXT: movi v0.2d, #0000000000000000 47; CHECK-NEXT: mov v0.d[0], x8 48; CHECK-NEXT: ret 49 %e = extractelement <4 x i32> %x, i32 1 50 %z = zext i32 %e to i64 51 %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0 52 ret <2 x i64> %r 53} 54 55define <2 x i64> @extract2_i32_zext_insert0_i64_undef(<4 x i32> %x) { 56; CHECK-LABEL: extract2_i32_zext_insert0_i64_undef: 57; CHECK: // %bb.0: 58; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s 59; CHECK-NEXT: movi v1.2d, #0000000000000000 60; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #12 61; CHECK-NEXT: ret 62 %e = extractelement <4 x i32> %x, i32 2 63 %z = zext i32 %e to i64 64 %r = insertelement <2 x i64> undef, i64 %z, i32 0 65 ret <2 x i64> %r 66} 67 68define <2 x i64> @extract2_i32_zext_insert0_i64_zero(<4 x i32> %x) { 69; CHECK-LABEL: extract2_i32_zext_insert0_i64_zero: 70; CHECK: // %bb.0: 71; CHECK-NEXT: mov w8, v0.s[2] 72; CHECK-NEXT: movi v0.2d, #0000000000000000 73; CHECK-NEXT: mov v0.d[0], x8 74; CHECK-NEXT: ret 75 %e = extractelement <4 x i32> %x, i32 2 76 %z = zext i32 %e to i64 77 %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0 78 ret <2 x i64> %r 79} 80 81define <2 x i64> @extract3_i32_zext_insert0_i64_undef(<4 x i32> %x) { 82; CHECK-LABEL: extract3_i32_zext_insert0_i64_undef: 83; CHECK: // %bb.0: 84; CHECK-NEXT: movi v1.2d, #0000000000000000 85; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #12 86; CHECK-NEXT: ret 87 %e = extractelement <4 x i32> %x, i32 3 88 %z = zext i32 %e to i64 89 %r = insertelement <2 x i64> undef, i64 %z, i32 0 90 ret <2 x i64> %r 91} 92 93define <2 x i64> @extract3_i32_zext_insert0_i64_zero(<4 x i32> %x) { 94; CHECK-LABEL: extract3_i32_zext_insert0_i64_zero: 95; CHECK: // %bb.0: 96; CHECK-NEXT: mov w8, v0.s[3] 97; CHECK-NEXT: movi v0.2d, #0000000000000000 98; CHECK-NEXT: mov v0.d[0], x8 99; CHECK-NEXT: ret 100 %e = extractelement <4 x i32> %x, i32 3 101 %z = zext i32 %e to i64 102 %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0 103 ret <2 x i64> %r 104} 105 106define <2 x i64> @extract0_i32_zext_insert1_i64_undef(<4 x i32> %x) { 107; CHECK-LABEL: extract0_i32_zext_insert1_i64_undef: 108; CHECK: // %bb.0: 109; CHECK-NEXT: movi v1.2d, #0000000000000000 110; CHECK-NEXT: zip1 v1.4s, v0.4s, v1.4s 111; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #8 112; CHECK-NEXT: ret 113 %e = extractelement <4 x i32> %x, i32 0 114 %z = zext i32 %e to i64 115 %r = insertelement <2 x i64> undef, i64 %z, i32 1 116 ret <2 x i64> %r 117} 118 119define <2 x i64> @extract0_i32_zext_insert1_i64_zero(<4 x i32> %x) { 120; CHECK-LABEL: extract0_i32_zext_insert1_i64_zero: 121; CHECK: // %bb.0: 122; CHECK-NEXT: fmov w8, s0 123; CHECK-NEXT: movi v0.2d, #0000000000000000 124; CHECK-NEXT: mov v0.d[1], x8 125; CHECK-NEXT: ret 126 %e = extractelement <4 x i32> %x, i32 0 127 %z = zext i32 %e to i64 128 %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1 129 ret <2 x i64> %r 130} 131 132define <2 x i64> @extract1_i32_zext_insert1_i64_undef(<4 x i32> %x) { 133; CHECK-LABEL: extract1_i32_zext_insert1_i64_undef: 134; CHECK: // %bb.0: 135; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 136; CHECK-NEXT: movi v1.2d, #0000000000000000 137; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #4 138; CHECK-NEXT: ret 139 %e = extractelement <4 x i32> %x, i32 1 140 %z = zext i32 %e to i64 141 %r = insertelement <2 x i64> undef, i64 %z, i32 1 142 ret <2 x i64> %r 143} 144 145define <2 x i64> @extract1_i32_zext_insert1_i64_zero(<4 x i32> %x) { 146; CHECK-LABEL: extract1_i32_zext_insert1_i64_zero: 147; CHECK: // %bb.0: 148; CHECK-NEXT: mov w8, v0.s[1] 149; CHECK-NEXT: movi v0.2d, #0000000000000000 150; CHECK-NEXT: mov v0.d[1], x8 151; CHECK-NEXT: ret 152 %e = extractelement <4 x i32> %x, i32 1 153 %z = zext i32 %e to i64 154 %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1 155 ret <2 x i64> %r 156} 157 158define <2 x i64> @extract2_i32_zext_insert1_i64_undef(<4 x i32> %x) { 159; CHECK-LABEL: extract2_i32_zext_insert1_i64_undef: 160; CHECK: // %bb.0: 161; CHECK-NEXT: mov v0.s[3], wzr 162; CHECK-NEXT: ret 163 %e = extractelement <4 x i32> %x, i32 2 164 %z = zext i32 %e to i64 165 %r = insertelement <2 x i64> undef, i64 %z, i32 1 166 ret <2 x i64> %r 167} 168 169define <2 x i64> @extract2_i32_zext_insert1_i64_zero(<4 x i32> %x) { 170; CHECK-LABEL: extract2_i32_zext_insert1_i64_zero: 171; CHECK: // %bb.0: 172; CHECK-NEXT: mov w8, v0.s[2] 173; CHECK-NEXT: movi v0.2d, #0000000000000000 174; CHECK-NEXT: mov v0.d[1], x8 175; CHECK-NEXT: ret 176 %e = extractelement <4 x i32> %x, i32 2 177 %z = zext i32 %e to i64 178 %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1 179 ret <2 x i64> %r 180} 181 182define <2 x i64> @extract3_i32_zext_insert1_i64_undef(<4 x i32> %x) { 183; CHECK-LABEL: extract3_i32_zext_insert1_i64_undef: 184; CHECK: // %bb.0: 185; CHECK-NEXT: movi v1.2d, #0000000000000000 186; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #4 187; CHECK-NEXT: ret 188 %e = extractelement <4 x i32> %x, i32 3 189 %z = zext i32 %e to i64 190 %r = insertelement <2 x i64> undef, i64 %z, i32 1 191 ret <2 x i64> %r 192} 193 194define <2 x i64> @extract3_i32_zext_insert1_i64_zero(<4 x i32> %x) { 195; CHECK-LABEL: extract3_i32_zext_insert1_i64_zero: 196; CHECK: // %bb.0: 197; CHECK-NEXT: mov w8, v0.s[3] 198; CHECK-NEXT: movi v0.2d, #0000000000000000 199; CHECK-NEXT: mov v0.d[1], x8 200; CHECK-NEXT: ret 201 %e = extractelement <4 x i32> %x, i32 3 202 %z = zext i32 %e to i64 203 %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1 204 ret <2 x i64> %r 205} 206 207define <2 x i64> @extract0_i16_zext_insert0_i64_undef(<8 x i16> %x) { 208; CHECK-LABEL: extract0_i16_zext_insert0_i64_undef: 209; CHECK: // %bb.0: 210; CHECK-NEXT: umov w8, v0.h[0] 211; CHECK-NEXT: and x8, x8, #0xffff 212; CHECK-NEXT: fmov d0, x8 213; CHECK-NEXT: ret 214 %e = extractelement <8 x i16> %x, i32 0 215 %z = zext i16 %e to i64 216 %r = insertelement <2 x i64> undef, i64 %z, i32 0 217 ret <2 x i64> %r 218} 219 220define <2 x i64> @extract0_i16_zext_insert0_i64_zero(<8 x i16> %x) { 221; CHECK-LABEL: extract0_i16_zext_insert0_i64_zero: 222; CHECK: // %bb.0: 223; CHECK-NEXT: umov w8, v0.h[0] 224; CHECK-NEXT: and x8, x8, #0xffff 225; CHECK-NEXT: movi v0.2d, #0000000000000000 226; CHECK-NEXT: mov v0.d[0], x8 227; CHECK-NEXT: ret 228 %e = extractelement <8 x i16> %x, i32 0 229 %z = zext i16 %e to i64 230 %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0 231 ret <2 x i64> %r 232} 233 234define <2 x i64> @extract1_i16_zext_insert0_i64_undef(<8 x i16> %x) { 235; CHECK-LABEL: extract1_i16_zext_insert0_i64_undef: 236; CHECK: // %bb.0: 237; CHECK-NEXT: umov w8, v0.h[1] 238; CHECK-NEXT: and x8, x8, #0xffff 239; CHECK-NEXT: fmov d0, x8 240; CHECK-NEXT: ret 241 %e = extractelement <8 x i16> %x, i32 1 242 %z = zext i16 %e to i64 243 %r = insertelement <2 x i64> undef, i64 %z, i32 0 244 ret <2 x i64> %r 245} 246 247define <2 x i64> @extract1_i16_zext_insert0_i64_zero(<8 x i16> %x) { 248; CHECK-LABEL: extract1_i16_zext_insert0_i64_zero: 249; CHECK: // %bb.0: 250; CHECK-NEXT: umov w8, v0.h[1] 251; CHECK-NEXT: and x8, x8, #0xffff 252; CHECK-NEXT: movi v0.2d, #0000000000000000 253; CHECK-NEXT: mov v0.d[0], x8 254; CHECK-NEXT: ret 255 %e = extractelement <8 x i16> %x, i32 1 256 %z = zext i16 %e to i64 257 %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0 258 ret <2 x i64> %r 259} 260 261define <2 x i64> @extract2_i16_zext_insert0_i64_undef(<8 x i16> %x) { 262; CHECK-LABEL: extract2_i16_zext_insert0_i64_undef: 263; CHECK: // %bb.0: 264; CHECK-NEXT: umov w8, v0.h[2] 265; CHECK-NEXT: and x8, x8, #0xffff 266; CHECK-NEXT: fmov d0, x8 267; CHECK-NEXT: ret 268 %e = extractelement <8 x i16> %x, i32 2 269 %z = zext i16 %e to i64 270 %r = insertelement <2 x i64> undef, i64 %z, i32 0 271 ret <2 x i64> %r 272} 273 274define <2 x i64> @extract2_i16_zext_insert0_i64_zero(<8 x i16> %x) { 275; CHECK-LABEL: extract2_i16_zext_insert0_i64_zero: 276; CHECK: // %bb.0: 277; CHECK-NEXT: umov w8, v0.h[2] 278; CHECK-NEXT: and x8, x8, #0xffff 279; CHECK-NEXT: movi v0.2d, #0000000000000000 280; CHECK-NEXT: mov v0.d[0], x8 281; CHECK-NEXT: ret 282 %e = extractelement <8 x i16> %x, i32 2 283 %z = zext i16 %e to i64 284 %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0 285 ret <2 x i64> %r 286} 287 288define <2 x i64> @extract3_i16_zext_insert0_i64_undef(<8 x i16> %x) { 289; CHECK-LABEL: extract3_i16_zext_insert0_i64_undef: 290; CHECK: // %bb.0: 291; CHECK-NEXT: umov w8, v0.h[3] 292; CHECK-NEXT: and x8, x8, #0xffff 293; CHECK-NEXT: fmov d0, x8 294; CHECK-NEXT: ret 295 %e = extractelement <8 x i16> %x, i32 3 296 %z = zext i16 %e to i64 297 %r = insertelement <2 x i64> undef, i64 %z, i32 0 298 ret <2 x i64> %r 299} 300 301define <2 x i64> @extract3_i16_zext_insert0_i64_zero(<8 x i16> %x) { 302; CHECK-LABEL: extract3_i16_zext_insert0_i64_zero: 303; CHECK: // %bb.0: 304; CHECK-NEXT: umov w8, v0.h[3] 305; CHECK-NEXT: and x8, x8, #0xffff 306; CHECK-NEXT: movi v0.2d, #0000000000000000 307; CHECK-NEXT: mov v0.d[0], x8 308; CHECK-NEXT: ret 309 %e = extractelement <8 x i16> %x, i32 3 310 %z = zext i16 %e to i64 311 %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 0 312 ret <2 x i64> %r 313} 314 315define <2 x i64> @extract0_i16_zext_insert1_i64_undef(<8 x i16> %x) { 316; CHECK-LABEL: extract0_i16_zext_insert1_i64_undef: 317; CHECK: // %bb.0: 318; CHECK-NEXT: umov w8, v0.h[0] 319; CHECK-NEXT: and x8, x8, #0xffff 320; CHECK-NEXT: dup v0.2d, x8 321; CHECK-NEXT: ret 322 %e = extractelement <8 x i16> %x, i32 0 323 %z = zext i16 %e to i64 324 %r = insertelement <2 x i64> undef, i64 %z, i32 1 325 ret <2 x i64> %r 326} 327 328define <2 x i64> @extract0_i16_zext_insert1_i64_zero(<8 x i16> %x) { 329; CHECK-LABEL: extract0_i16_zext_insert1_i64_zero: 330; CHECK: // %bb.0: 331; CHECK-NEXT: umov w8, v0.h[0] 332; CHECK-NEXT: and x8, x8, #0xffff 333; CHECK-NEXT: movi v0.2d, #0000000000000000 334; CHECK-NEXT: mov v0.d[1], x8 335; CHECK-NEXT: ret 336 %e = extractelement <8 x i16> %x, i32 0 337 %z = zext i16 %e to i64 338 %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1 339 ret <2 x i64> %r 340} 341 342define <2 x i64> @extract1_i16_zext_insert1_i64_undef(<8 x i16> %x) { 343; CHECK-LABEL: extract1_i16_zext_insert1_i64_undef: 344; CHECK: // %bb.0: 345; CHECK-NEXT: umov w8, v0.h[1] 346; CHECK-NEXT: and x8, x8, #0xffff 347; CHECK-NEXT: dup v0.2d, x8 348; CHECK-NEXT: ret 349 %e = extractelement <8 x i16> %x, i32 1 350 %z = zext i16 %e to i64 351 %r = insertelement <2 x i64> undef, i64 %z, i32 1 352 ret <2 x i64> %r 353} 354 355define <2 x i64> @extract1_i16_zext_insert1_i64_zero(<8 x i16> %x) { 356; CHECK-LABEL: extract1_i16_zext_insert1_i64_zero: 357; CHECK: // %bb.0: 358; CHECK-NEXT: umov w8, v0.h[1] 359; CHECK-NEXT: and x8, x8, #0xffff 360; CHECK-NEXT: movi v0.2d, #0000000000000000 361; CHECK-NEXT: mov v0.d[1], x8 362; CHECK-NEXT: ret 363 %e = extractelement <8 x i16> %x, i32 1 364 %z = zext i16 %e to i64 365 %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1 366 ret <2 x i64> %r 367} 368 369define <2 x i64> @extract2_i16_zext_insert1_i64_undef(<8 x i16> %x) { 370; CHECK-LABEL: extract2_i16_zext_insert1_i64_undef: 371; CHECK: // %bb.0: 372; CHECK-NEXT: umov w8, v0.h[2] 373; CHECK-NEXT: and x8, x8, #0xffff 374; CHECK-NEXT: dup v0.2d, x8 375; CHECK-NEXT: ret 376 %e = extractelement <8 x i16> %x, i32 2 377 %z = zext i16 %e to i64 378 %r = insertelement <2 x i64> undef, i64 %z, i32 1 379 ret <2 x i64> %r 380} 381 382define <2 x i64> @extract2_i16_zext_insert1_i64_zero(<8 x i16> %x) { 383; CHECK-LABEL: extract2_i16_zext_insert1_i64_zero: 384; CHECK: // %bb.0: 385; CHECK-NEXT: umov w8, v0.h[2] 386; CHECK-NEXT: and x8, x8, #0xffff 387; CHECK-NEXT: movi v0.2d, #0000000000000000 388; CHECK-NEXT: mov v0.d[1], x8 389; CHECK-NEXT: ret 390 %e = extractelement <8 x i16> %x, i32 2 391 %z = zext i16 %e to i64 392 %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1 393 ret <2 x i64> %r 394} 395 396define <2 x i64> @extract3_i16_zext_insert1_i64_undef(<8 x i16> %x) { 397; CHECK-LABEL: extract3_i16_zext_insert1_i64_undef: 398; CHECK: // %bb.0: 399; CHECK-NEXT: umov w8, v0.h[3] 400; CHECK-NEXT: and x8, x8, #0xffff 401; CHECK-NEXT: dup v0.2d, x8 402; CHECK-NEXT: ret 403 %e = extractelement <8 x i16> %x, i32 3 404 %z = zext i16 %e to i64 405 %r = insertelement <2 x i64> undef, i64 %z, i32 1 406 ret <2 x i64> %r 407} 408 409define <2 x i64> @extract3_i16_zext_insert1_i64_zero(<8 x i16> %x) { 410; CHECK-LABEL: extract3_i16_zext_insert1_i64_zero: 411; CHECK: // %bb.0: 412; CHECK-NEXT: umov w8, v0.h[3] 413; CHECK-NEXT: and x8, x8, #0xffff 414; CHECK-NEXT: movi v0.2d, #0000000000000000 415; CHECK-NEXT: mov v0.d[1], x8 416; CHECK-NEXT: ret 417 %e = extractelement <8 x i16> %x, i32 3 418 %z = zext i16 %e to i64 419 %r = insertelement <2 x i64> zeroinitializer, i64 %z, i32 1 420 ret <2 x i64> %r 421} 422 423; This would crash because we did not expect to create 424; a shuffle for a vector where the source operand is 425; not the same size as the result. 426; TODO: Should we handle this pattern? Ie, is moving to/from 427; registers the optimal code? 428 429define <4 x i32> @larger_bv_than_source(<4 x i16> %t0) { 430; CHECK-LABEL: larger_bv_than_source: 431; CHECK: // %bb.0: 432; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 433; CHECK-NEXT: umov w8, v0.h[2] 434; CHECK-NEXT: fmov s0, w8 435; CHECK-NEXT: ret 436 %t1 = extractelement <4 x i16> %t0, i32 2 437 %vgetq_lane = zext i16 %t1 to i32 438 %t2 = insertelement <4 x i32> undef, i32 %vgetq_lane, i64 0 439 ret <4 x i32> %t2 440} 441 442