1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s 3; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECKBE 4 5define arm_aapcs_vfpcc <8 x i16> @vmovn32_trunc1(<4 x i32> %src1, <4 x i32> %src2) { 6; CHECK-LABEL: vmovn32_trunc1: 7; CHECK: @ %bb.0: @ %entry 8; CHECK-NEXT: vmovnt.i32 q0, q1 9; CHECK-NEXT: bx lr 10; 11; CHECKBE-LABEL: vmovn32_trunc1: 12; CHECKBE: @ %bb.0: @ %entry 13; CHECKBE-NEXT: vrev64.32 q2, q1 14; CHECKBE-NEXT: vrev64.32 q1, q0 15; CHECKBE-NEXT: vmovnt.i32 q1, q2 16; CHECKBE-NEXT: vrev64.16 q0, q1 17; CHECKBE-NEXT: bx lr 18entry: 19 %strided.vec = shufflevector <4 x i32> %src1, <4 x i32> %src2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 20 %out = trunc <8 x i32> %strided.vec to <8 x i16> 21 ret <8 x i16> %out 22} 23 24define arm_aapcs_vfpcc <8 x i16> @vmovn32_trunc2(<4 x i32> %src1, <4 x i32> %src2) { 25; CHECK-LABEL: vmovn32_trunc2: 26; CHECK: @ %bb.0: @ %entry 27; CHECK-NEXT: vmovnt.i32 q1, q0 28; CHECK-NEXT: vmov q0, q1 29; CHECK-NEXT: bx lr 30; 31; CHECKBE-LABEL: vmovn32_trunc2: 32; CHECKBE: @ %bb.0: @ %entry 33; CHECKBE-NEXT: vrev64.32 q2, q0 34; CHECKBE-NEXT: vrev64.32 q3, q1 35; CHECKBE-NEXT: vmovnt.i32 q3, q2 36; CHECKBE-NEXT: vrev64.16 q0, q3 37; CHECKBE-NEXT: bx lr 38entry: 39 %strided.vec = shufflevector <4 x i32> %src1, <4 x i32> %src2, <8 x i32> <i32 4, i32 0, i32 5, i32 1, i32 6, i32 2, i32 7, i32 3> 40 %out = trunc <8 x i32> %strided.vec to <8 x i16> 41 ret <8 x i16> %out 42} 43 44define arm_aapcs_vfpcc <8 x i16> @vmovn32_trunc3(<4 x i32> %src1) { 45; CHECK-LABEL: vmovn32_trunc3: 46; CHECK: @ %bb.0: @ %entry 47; CHECK-NEXT: vmovnt.i32 q0, q0 48; CHECK-NEXT: bx lr 49; 50; CHECKBE-LABEL: vmovn32_trunc3: 51; CHECKBE: @ %bb.0: @ %entry 52; CHECKBE-NEXT: vrev64.32 q1, q0 53; CHECKBE-NEXT: vmovnt.i32 q1, q1 54; CHECKBE-NEXT: vrev64.16 q0, q1 55; CHECKBE-NEXT: bx lr 56entry: 57 %strided.vec = shufflevector <4 x i32> %src1, <4 x i32> undef, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> 58 %out = trunc <8 x i32> %strided.vec to <8 x i16> 59 ret <8 x i16> %out 60} 61 62 63define arm_aapcs_vfpcc <16 x i8> @vmovn16_trunc1(<8 x i16> %src1, <8 x i16> %src2) { 64; CHECK-LABEL: vmovn16_trunc1: 65; CHECK: @ %bb.0: @ %entry 66; CHECK-NEXT: vmovnt.i16 q0, q1 67; CHECK-NEXT: bx lr 68; 69; CHECKBE-LABEL: vmovn16_trunc1: 70; CHECKBE: @ %bb.0: @ %entry 71; CHECKBE-NEXT: vrev64.16 q2, q1 72; CHECKBE-NEXT: vrev64.16 q1, q0 73; CHECKBE-NEXT: vmovnt.i16 q1, q2 74; CHECKBE-NEXT: vrev64.8 q0, q1 75; CHECKBE-NEXT: bx lr 76entry: 77 %strided.vec = shufflevector <8 x i16> %src1, <8 x i16> %src2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 78 %out = trunc <16 x i16> %strided.vec to <16 x i8> 79 ret <16 x i8> %out 80} 81 82define arm_aapcs_vfpcc <16 x i8> @vmovn16_trunc2(<8 x i16> %src1, <8 x i16> %src2) { 83; CHECK-LABEL: vmovn16_trunc2: 84; CHECK: @ %bb.0: @ %entry 85; CHECK-NEXT: vmovnt.i16 q1, q0 86; CHECK-NEXT: vmov q0, q1 87; CHECK-NEXT: bx lr 88; 89; CHECKBE-LABEL: vmovn16_trunc2: 90; CHECKBE: @ %bb.0: @ %entry 91; CHECKBE-NEXT: vrev64.16 q2, q0 92; CHECKBE-NEXT: vrev64.16 q3, q1 93; CHECKBE-NEXT: vmovnt.i16 q3, q2 94; CHECKBE-NEXT: vrev64.8 q0, q3 95; CHECKBE-NEXT: bx lr 96entry: 97 %strided.vec = shufflevector <8 x i16> %src1, <8 x i16> %src2, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7> 98 %out = trunc <16 x i16> %strided.vec to <16 x i8> 99 ret <16 x i8> %out 100} 101 102define arm_aapcs_vfpcc <16 x i8> @vmovn16_trunc3(<8 x i16> %src1) { 103; CHECK-LABEL: vmovn16_trunc3: 104; CHECK: @ %bb.0: @ %entry 105; CHECK-NEXT: vmovnt.i16 q0, q0 106; CHECK-NEXT: bx lr 107; 108; CHECKBE-LABEL: vmovn16_trunc3: 109; CHECKBE: @ %bb.0: @ %entry 110; CHECKBE-NEXT: vrev64.16 q1, q0 111; CHECKBE-NEXT: vmovnt.i16 q1, q1 112; CHECKBE-NEXT: vrev64.8 q0, q1 113; CHECKBE-NEXT: bx lr 114entry: 115 %strided.vec = shufflevector <8 x i16> %src1, <8 x i16> undef, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7> 116 %out = trunc <16 x i16> %strided.vec to <16 x i8> 117 ret <16 x i8> %out 118} 119 120 121 122define arm_aapcs_vfpcc <2 x i64> @vmovn64_t1(<2 x i64> %src1, <2 x i64> %src2) { 123; CHECK-LABEL: vmovn64_t1: 124; CHECK: @ %bb.0: @ %entry 125; CHECK-NEXT: vmov.f32 s2, s4 126; CHECK-NEXT: vmov.f32 s3, s5 127; CHECK-NEXT: bx lr 128; 129; CHECKBE-LABEL: vmovn64_t1: 130; CHECKBE: @ %bb.0: @ %entry 131; CHECKBE-NEXT: vmov.f32 s2, s4 132; CHECKBE-NEXT: vmov.f32 s3, s5 133; CHECKBE-NEXT: bx lr 134entry: 135 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 0, i32 2> 136 ret <2 x i64> %out 137} 138 139define arm_aapcs_vfpcc <2 x i64> @vmovn64_t2(<2 x i64> %src1, <2 x i64> %src2) { 140; CHECK-LABEL: vmovn64_t2: 141; CHECK: @ %bb.0: @ %entry 142; CHECK-NEXT: vmov.f32 s6, s0 143; CHECK-NEXT: vmov.f32 s7, s1 144; CHECK-NEXT: vmov q0, q1 145; CHECK-NEXT: bx lr 146; 147; CHECKBE-LABEL: vmovn64_t2: 148; CHECKBE: @ %bb.0: @ %entry 149; CHECKBE-NEXT: vmov.f32 s6, s0 150; CHECKBE-NEXT: vmov.f32 s7, s1 151; CHECKBE-NEXT: vmov q0, q1 152; CHECKBE-NEXT: bx lr 153entry: 154 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 2, i32 0> 155 ret <2 x i64> %out 156} 157 158define arm_aapcs_vfpcc <2 x i64> @vmovn64_b1(<2 x i64> %src1, <2 x i64> %src2) { 159; CHECK-LABEL: vmovn64_b1: 160; CHECK: @ %bb.0: @ %entry 161; CHECK-NEXT: vmov.f32 s2, s6 162; CHECK-NEXT: vmov.f32 s3, s7 163; CHECK-NEXT: bx lr 164; 165; CHECKBE-LABEL: vmovn64_b1: 166; CHECKBE: @ %bb.0: @ %entry 167; CHECKBE-NEXT: vmov.f32 s2, s6 168; CHECKBE-NEXT: vmov.f32 s3, s7 169; CHECKBE-NEXT: bx lr 170entry: 171 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 0, i32 3> 172 ret <2 x i64> %out 173} 174 175define arm_aapcs_vfpcc <2 x i64> @vmovn64_b2(<2 x i64> %src1, <2 x i64> %src2) { 176; CHECK-LABEL: vmovn64_b2: 177; CHECK: @ %bb.0: @ %entry 178; CHECK-NEXT: vmov.f32 s4, s6 179; CHECK-NEXT: vmov.f32 s6, s0 180; CHECK-NEXT: vmov.f32 s5, s7 181; CHECK-NEXT: vmov.f32 s7, s1 182; CHECK-NEXT: vmov q0, q1 183; CHECK-NEXT: bx lr 184; 185; CHECKBE-LABEL: vmovn64_b2: 186; CHECKBE: @ %bb.0: @ %entry 187; CHECKBE-NEXT: vmov.f32 s4, s6 188; CHECKBE-NEXT: vmov.f32 s6, s0 189; CHECKBE-NEXT: vmov.f32 s5, s7 190; CHECKBE-NEXT: vmov.f32 s7, s1 191; CHECKBE-NEXT: vmov q0, q1 192; CHECKBE-NEXT: bx lr 193entry: 194 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 3, i32 0> 195 ret <2 x i64> %out 196} 197 198define arm_aapcs_vfpcc <2 x i64> @vmovn64_b3(<2 x i64> %src1, <2 x i64> %src2) { 199; CHECK-LABEL: vmovn64_b3: 200; CHECK: @ %bb.0: @ %entry 201; CHECK-NEXT: vmov.f32 s0, s2 202; CHECK-NEXT: vmov.f32 s2, s4 203; CHECK-NEXT: vmov.f32 s1, s3 204; CHECK-NEXT: vmov.f32 s3, s5 205; CHECK-NEXT: bx lr 206; 207; CHECKBE-LABEL: vmovn64_b3: 208; CHECKBE: @ %bb.0: @ %entry 209; CHECKBE-NEXT: vmov.f32 s0, s2 210; CHECKBE-NEXT: vmov.f32 s2, s4 211; CHECKBE-NEXT: vmov.f32 s1, s3 212; CHECKBE-NEXT: vmov.f32 s3, s5 213; CHECKBE-NEXT: bx lr 214entry: 215 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 1, i32 2> 216 ret <2 x i64> %out 217} 218 219define arm_aapcs_vfpcc <2 x i64> @vmovn64_b4(<2 x i64> %src1, <2 x i64> %src2) { 220; CHECK-LABEL: vmovn64_b4: 221; CHECK: @ %bb.0: @ %entry 222; CHECK-NEXT: vmov.f32 s6, s2 223; CHECK-NEXT: vmov.f32 s7, s3 224; CHECK-NEXT: vmov q0, q1 225; CHECK-NEXT: bx lr 226; 227; CHECKBE-LABEL: vmovn64_b4: 228; CHECKBE: @ %bb.0: @ %entry 229; CHECKBE-NEXT: vmov.f32 s6, s2 230; CHECKBE-NEXT: vmov.f32 s7, s3 231; CHECKBE-NEXT: vmov q0, q1 232; CHECKBE-NEXT: bx lr 233entry: 234 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 2, i32 1> 235 ret <2 x i64> %out 236} 237 238 239 240define arm_aapcs_vfpcc <4 x i32> @vmovn32_t1(<4 x i32> %src1, <4 x i32> %src2) { 241; CHECK-LABEL: vmovn32_t1: 242; CHECK: @ %bb.0: @ %entry 243; CHECK-NEXT: vmov.f32 s1, s4 244; CHECK-NEXT: vmov.f32 s3, s6 245; CHECK-NEXT: bx lr 246; 247; CHECKBE-LABEL: vmovn32_t1: 248; CHECKBE: @ %bb.0: @ %entry 249; CHECKBE-NEXT: vrev64.32 q2, q1 250; CHECKBE-NEXT: vrev64.32 q1, q0 251; CHECKBE-NEXT: vmov.f32 s5, s8 252; CHECKBE-NEXT: vmov.f32 s7, s10 253; CHECKBE-NEXT: vrev64.32 q0, q1 254; CHECKBE-NEXT: bx lr 255entry: 256 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 257 ret <4 x i32> %out 258} 259 260define arm_aapcs_vfpcc <4 x i32> @vmovn32_t2(<4 x i32> %src1, <4 x i32> %src2) { 261; CHECK-LABEL: vmovn32_t2: 262; CHECK: @ %bb.0: @ %entry 263; CHECK-NEXT: vmov.f32 s5, s0 264; CHECK-NEXT: vmov.f32 s7, s2 265; CHECK-NEXT: vmov q0, q1 266; CHECK-NEXT: bx lr 267; 268; CHECKBE-LABEL: vmovn32_t2: 269; CHECKBE: @ %bb.0: @ %entry 270; CHECKBE-NEXT: vrev64.32 q2, q0 271; CHECKBE-NEXT: vrev64.32 q3, q1 272; CHECKBE-NEXT: vmov.f32 s13, s8 273; CHECKBE-NEXT: vmov.f32 s15, s10 274; CHECKBE-NEXT: vrev64.32 q0, q3 275; CHECKBE-NEXT: bx lr 276entry: 277 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 4, i32 0, i32 6, i32 2> 278 ret <4 x i32> %out 279} 280 281define arm_aapcs_vfpcc <4 x i32> @vmovn32_b1(<4 x i32> %src1, <4 x i32> %src2) { 282; CHECK-LABEL: vmovn32_b1: 283; CHECK: @ %bb.0: @ %entry 284; CHECK-NEXT: vmov.f32 s1, s5 285; CHECK-NEXT: vmov.f32 s3, s7 286; CHECK-NEXT: bx lr 287; 288; CHECKBE-LABEL: vmovn32_b1: 289; CHECKBE: @ %bb.0: @ %entry 290; CHECKBE-NEXT: vrev64.32 q2, q1 291; CHECKBE-NEXT: vrev64.32 q1, q0 292; CHECKBE-NEXT: vmov.f32 s5, s9 293; CHECKBE-NEXT: vmov.f32 s7, s11 294; CHECKBE-NEXT: vrev64.32 q0, q1 295; CHECKBE-NEXT: bx lr 296entry: 297 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 298 ret <4 x i32> %out 299} 300 301define arm_aapcs_vfpcc <4 x i32> @vmovn32_b2(<4 x i32> %src1, <4 x i32> %src2) { 302; CHECK-LABEL: vmovn32_b2: 303; CHECK: @ %bb.0: @ %entry 304; CHECK-NEXT: vmov.f32 s4, s5 305; CHECK-NEXT: vmov.f32 s6, s7 306; CHECK-NEXT: vmov.f32 s5, s0 307; CHECK-NEXT: vmov.f32 s7, s2 308; CHECK-NEXT: vmov q0, q1 309; CHECK-NEXT: bx lr 310; 311; CHECKBE-LABEL: vmovn32_b2: 312; CHECKBE: @ %bb.0: @ %entry 313; CHECKBE-NEXT: vrev64.32 q2, q0 314; CHECKBE-NEXT: vrev64.32 q0, q1 315; CHECKBE-NEXT: vmov.f32 s4, s1 316; CHECKBE-NEXT: vmov.f32 s5, s8 317; CHECKBE-NEXT: vmov.f32 s6, s3 318; CHECKBE-NEXT: vmov.f32 s7, s10 319; CHECKBE-NEXT: vrev64.32 q0, q1 320; CHECKBE-NEXT: bx lr 321entry: 322 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 5, i32 0, i32 7, i32 2> 323 ret <4 x i32> %out 324} 325 326define arm_aapcs_vfpcc <4 x i32> @vmovn32_b3(<4 x i32> %src1, <4 x i32> %src2) { 327; CHECK-LABEL: vmovn32_b3: 328; CHECK: @ %bb.0: @ %entry 329; CHECK-NEXT: vmov.f32 s0, s1 330; CHECK-NEXT: vmov.f32 s2, s3 331; CHECK-NEXT: vmov.f32 s1, s4 332; CHECK-NEXT: vmov.f32 s3, s6 333; CHECK-NEXT: bx lr 334; 335; CHECKBE-LABEL: vmovn32_b3: 336; CHECKBE: @ %bb.0: @ %entry 337; CHECKBE-NEXT: vrev64.32 q2, q1 338; CHECKBE-NEXT: vrev64.32 q1, q0 339; CHECKBE-NEXT: vmov.f32 s4, s5 340; CHECKBE-NEXT: vmov.f32 s6, s7 341; CHECKBE-NEXT: vmov.f32 s5, s8 342; CHECKBE-NEXT: vmov.f32 s7, s10 343; CHECKBE-NEXT: vrev64.32 q0, q1 344; CHECKBE-NEXT: bx lr 345entry: 346 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 1, i32 4, i32 3, i32 6> 347 ret <4 x i32> %out 348} 349 350define arm_aapcs_vfpcc <4 x i32> @vmovn32_b4(<4 x i32> %src1, <4 x i32> %src2) { 351; CHECK-LABEL: vmovn32_b4: 352; CHECK: @ %bb.0: @ %entry 353; CHECK-NEXT: vmov.f32 s5, s1 354; CHECK-NEXT: vmov.f32 s7, s3 355; CHECK-NEXT: vmov q0, q1 356; CHECK-NEXT: bx lr 357; 358; CHECKBE-LABEL: vmovn32_b4: 359; CHECKBE: @ %bb.0: @ %entry 360; CHECKBE-NEXT: vrev64.32 q2, q0 361; CHECKBE-NEXT: vrev64.32 q3, q1 362; CHECKBE-NEXT: vmov.f32 s13, s9 363; CHECKBE-NEXT: vmov.f32 s15, s11 364; CHECKBE-NEXT: vrev64.32 q0, q3 365; CHECKBE-NEXT: bx lr 366entry: 367 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 368 ret <4 x i32> %out 369} 370 371define arm_aapcs_vfpcc <4 x i32> @vmovn32_single_t(<4 x i32> %src1) { 372; CHECK-LABEL: vmovn32_single_t: 373; CHECK: @ %bb.0: @ %entry 374; CHECK-NEXT: vmov.f32 s1, s0 375; CHECK-NEXT: vmov.f32 s3, s2 376; CHECK-NEXT: bx lr 377; 378; CHECKBE-LABEL: vmovn32_single_t: 379; CHECKBE: @ %bb.0: @ %entry 380; CHECKBE-NEXT: vrev64.32 q1, q0 381; CHECKBE-NEXT: vmov.f32 s5, s4 382; CHECKBE-NEXT: vmov.f32 s7, s6 383; CHECKBE-NEXT: vrev64.32 q0, q1 384; CHECKBE-NEXT: bx lr 385entry: 386 %out = shufflevector <4 x i32> %src1, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 387 ret <4 x i32> %out 388} 389 390 391 392 393define arm_aapcs_vfpcc <8 x i16> @vmovn16_t1(<8 x i16> %src1, <8 x i16> %src2) { 394; CHECK-LABEL: vmovn16_t1: 395; CHECK: @ %bb.0: @ %entry 396; CHECK-NEXT: vmovnt.i32 q0, q1 397; CHECK-NEXT: bx lr 398; 399; CHECKBE-LABEL: vmovn16_t1: 400; CHECKBE: @ %bb.0: @ %entry 401; CHECKBE-NEXT: vrev64.16 q2, q1 402; CHECKBE-NEXT: vrev64.16 q1, q0 403; CHECKBE-NEXT: vmovnt.i32 q1, q2 404; CHECKBE-NEXT: vrev64.16 q0, q1 405; CHECKBE-NEXT: bx lr 406entry: 407 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 408 ret <8 x i16> %out 409} 410 411define arm_aapcs_vfpcc <8 x i16> @vmovn16_t2(<8 x i16> %src1, <8 x i16> %src2) { 412; CHECK-LABEL: vmovn16_t2: 413; CHECK: @ %bb.0: @ %entry 414; CHECK-NEXT: vmovnt.i32 q1, q0 415; CHECK-NEXT: vmov q0, q1 416; CHECK-NEXT: bx lr 417; 418; CHECKBE-LABEL: vmovn16_t2: 419; CHECKBE: @ %bb.0: @ %entry 420; CHECKBE-NEXT: vrev64.16 q2, q0 421; CHECKBE-NEXT: vrev64.16 q3, q1 422; CHECKBE-NEXT: vmovnt.i32 q3, q2 423; CHECKBE-NEXT: vrev64.16 q0, q3 424; CHECKBE-NEXT: bx lr 425entry: 426 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 8, i32 0, i32 10, i32 2, i32 12, i32 4, i32 14, i32 6> 427 ret <8 x i16> %out 428} 429 430define arm_aapcs_vfpcc <8 x i16> @vmovn16_b1(<8 x i16> %src1, <8 x i16> %src2) { 431; CHECK-LABEL: vmovn16_b1: 432; CHECK: @ %bb.0: @ %entry 433; CHECK-NEXT: vmovnb.i32 q1, q0 434; CHECK-NEXT: vmov q0, q1 435; CHECK-NEXT: bx lr 436; 437; CHECKBE-LABEL: vmovn16_b1: 438; CHECKBE: @ %bb.0: @ %entry 439; CHECKBE-NEXT: vrev64.16 q2, q0 440; CHECKBE-NEXT: vrev64.16 q3, q1 441; CHECKBE-NEXT: vmovnb.i32 q3, q2 442; CHECKBE-NEXT: vrev64.16 q0, q3 443; CHECKBE-NEXT: bx lr 444entry: 445 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 446 ret <8 x i16> %out 447} 448 449define arm_aapcs_vfpcc <8 x i16> @vmovn16_b2(<8 x i16> %src1, <8 x i16> %src2) { 450; CHECK-LABEL: vmovn16_b2: 451; CHECK: @ %bb.0: @ %entry 452; CHECK-NEXT: vmovx.f16 s5, s5 453; CHECK-NEXT: vmovx.f16 s4, s4 454; CHECK-NEXT: vmovx.f16 s6, s6 455; CHECK-NEXT: vmovx.f16 s7, s7 456; CHECK-NEXT: vins.f16 s5, s1 457; CHECK-NEXT: vins.f16 s4, s0 458; CHECK-NEXT: vins.f16 s6, s2 459; CHECK-NEXT: vins.f16 s7, s3 460; CHECK-NEXT: vmov q0, q1 461; CHECK-NEXT: bx lr 462; 463; CHECKBE-LABEL: vmovn16_b2: 464; CHECKBE: @ %bb.0: @ %entry 465; CHECKBE-NEXT: vrev64.16 q2, q0 466; CHECKBE-NEXT: vrev64.16 q0, q1 467; CHECKBE-NEXT: vmovx.f16 s5, s1 468; CHECKBE-NEXT: vmovx.f16 s4, s0 469; CHECKBE-NEXT: vmovx.f16 s6, s2 470; CHECKBE-NEXT: vmovx.f16 s7, s3 471; CHECKBE-NEXT: vins.f16 s5, s9 472; CHECKBE-NEXT: vins.f16 s4, s8 473; CHECKBE-NEXT: vins.f16 s6, s10 474; CHECKBE-NEXT: vins.f16 s7, s11 475; CHECKBE-NEXT: vrev64.16 q0, q1 476; CHECKBE-NEXT: bx lr 477entry: 478 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 9, i32 0, i32 11, i32 2, i32 13, i32 4, i32 15, i32 6> 479 ret <8 x i16> %out 480} 481 482define arm_aapcs_vfpcc <8 x i16> @vmovn16_b3(<8 x i16> %src1, <8 x i16> %src2) { 483; CHECK-LABEL: vmovn16_b3: 484; CHECK: @ %bb.0: @ %entry 485; CHECK-NEXT: vmovx.f16 s1, s1 486; CHECK-NEXT: vmovx.f16 s0, s0 487; CHECK-NEXT: vmovx.f16 s2, s2 488; CHECK-NEXT: vmovx.f16 s3, s3 489; CHECK-NEXT: vins.f16 s1, s5 490; CHECK-NEXT: vins.f16 s0, s4 491; CHECK-NEXT: vins.f16 s2, s6 492; CHECK-NEXT: vins.f16 s3, s7 493; CHECK-NEXT: bx lr 494; 495; CHECKBE-LABEL: vmovn16_b3: 496; CHECKBE: @ %bb.0: @ %entry 497; CHECKBE-NEXT: vrev64.16 q2, q1 498; CHECKBE-NEXT: vrev64.16 q1, q0 499; CHECKBE-NEXT: vmovx.f16 s5, s5 500; CHECKBE-NEXT: vmovx.f16 s4, s4 501; CHECKBE-NEXT: vmovx.f16 s6, s6 502; CHECKBE-NEXT: vmovx.f16 s7, s7 503; CHECKBE-NEXT: vins.f16 s5, s9 504; CHECKBE-NEXT: vins.f16 s4, s8 505; CHECKBE-NEXT: vins.f16 s6, s10 506; CHECKBE-NEXT: vins.f16 s7, s11 507; CHECKBE-NEXT: vrev64.16 q0, q1 508; CHECKBE-NEXT: bx lr 509entry: 510 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 1, i32 8, i32 3, i32 10, i32 5, i32 12, i32 7, i32 14> 511 ret <8 x i16> %out 512} 513 514define arm_aapcs_vfpcc <8 x i16> @vmovn16_b4(<8 x i16> %src1, <8 x i16> %src2) { 515; CHECK-LABEL: vmovn16_b4: 516; CHECK: @ %bb.0: @ %entry 517; CHECK-NEXT: vmovnb.i32 q0, q1 518; CHECK-NEXT: bx lr 519; 520; CHECKBE-LABEL: vmovn16_b4: 521; CHECKBE: @ %bb.0: @ %entry 522; CHECKBE-NEXT: vrev64.16 q2, q1 523; CHECKBE-NEXT: vrev64.16 q1, q0 524; CHECKBE-NEXT: vmovnb.i32 q1, q2 525; CHECKBE-NEXT: vrev64.16 q0, q1 526; CHECKBE-NEXT: bx lr 527entry: 528 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7> 529 ret <8 x i16> %out 530} 531 532define arm_aapcs_vfpcc <8 x i16> @vmovn16_single_t(<8 x i16> %src1) { 533; CHECK-LABEL: vmovn16_single_t: 534; CHECK: @ %bb.0: @ %entry 535; CHECK-NEXT: vmovnt.i32 q0, q0 536; CHECK-NEXT: bx lr 537; 538; CHECKBE-LABEL: vmovn16_single_t: 539; CHECKBE: @ %bb.0: @ %entry 540; CHECKBE-NEXT: vrev64.16 q1, q0 541; CHECKBE-NEXT: vmovnt.i32 q1, q1 542; CHECKBE-NEXT: vrev64.16 q0, q1 543; CHECKBE-NEXT: bx lr 544entry: 545 %out = shufflevector <8 x i16> %src1, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 546 ret <8 x i16> %out 547} 548 549 550define arm_aapcs_vfpcc <16 x i8> @vmovn8_b1(<16 x i8> %src1, <16 x i8> %src2) { 551; CHECK-LABEL: vmovn8_b1: 552; CHECK: @ %bb.0: @ %entry 553; CHECK-NEXT: vmovnt.i16 q0, q1 554; CHECK-NEXT: bx lr 555; 556; CHECKBE-LABEL: vmovn8_b1: 557; CHECKBE: @ %bb.0: @ %entry 558; CHECKBE-NEXT: vrev64.8 q2, q1 559; CHECKBE-NEXT: vrev64.8 q1, q0 560; CHECKBE-NEXT: vmovnt.i16 q1, q2 561; CHECKBE-NEXT: vrev64.8 q0, q1 562; CHECKBE-NEXT: bx lr 563entry: 564 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30> 565 ret <16 x i8> %out 566} 567 568define arm_aapcs_vfpcc <16 x i8> @vmovn8_b2(<16 x i8> %src1, <16 x i8> %src2) { 569; CHECK-LABEL: vmovn8_b2: 570; CHECK: @ %bb.0: @ %entry 571; CHECK-NEXT: vmovnt.i16 q1, q0 572; CHECK-NEXT: vmov q0, q1 573; CHECK-NEXT: bx lr 574; 575; CHECKBE-LABEL: vmovn8_b2: 576; CHECKBE: @ %bb.0: @ %entry 577; CHECKBE-NEXT: vrev64.8 q2, q0 578; CHECKBE-NEXT: vrev64.8 q3, q1 579; CHECKBE-NEXT: vmovnt.i16 q3, q2 580; CHECKBE-NEXT: vrev64.8 q0, q3 581; CHECKBE-NEXT: bx lr 582entry: 583 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 16, i32 0, i32 18, i32 2, i32 20, i32 4, i32 22, i32 6, i32 24, i32 8, i32 26, i32 10, i32 28, i32 12, i32 30, i32 14> 584 ret <16 x i8> %out 585} 586 587define arm_aapcs_vfpcc <16 x i8> @vmovn8_t1(<16 x i8> %src1, <16 x i8> %src2) { 588; CHECK-LABEL: vmovn8_t1: 589; CHECK: @ %bb.0: @ %entry 590; CHECK-NEXT: vmovnb.i16 q1, q0 591; CHECK-NEXT: vmov q0, q1 592; CHECK-NEXT: bx lr 593; 594; CHECKBE-LABEL: vmovn8_t1: 595; CHECKBE: @ %bb.0: @ %entry 596; CHECKBE-NEXT: vrev64.8 q2, q0 597; CHECKBE-NEXT: vrev64.8 q3, q1 598; CHECKBE-NEXT: vmovnb.i16 q3, q2 599; CHECKBE-NEXT: vrev64.8 q0, q3 600; CHECKBE-NEXT: bx lr 601entry: 602 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 603 ret <16 x i8> %out 604} 605 606define arm_aapcs_vfpcc <16 x i8> @vmovn8_t2(<16 x i8> %src1, <16 x i8> %src2) { 607; CHECK-LABEL: vmovn8_t2: 608; CHECK: @ %bb.0: @ %entry 609; CHECK-NEXT: vmov q2, q0 610; CHECK-NEXT: vmov.u8 r0, q1[1] 611; CHECK-NEXT: vmov.8 q0[0], r0 612; CHECK-NEXT: vmov.u8 r0, q2[0] 613; CHECK-NEXT: vmov.8 q0[1], r0 614; CHECK-NEXT: vmov.u8 r0, q1[3] 615; CHECK-NEXT: vmov.8 q0[2], r0 616; CHECK-NEXT: vmov.u8 r0, q2[2] 617; CHECK-NEXT: vmov.8 q0[3], r0 618; CHECK-NEXT: vmov.u8 r0, q1[5] 619; CHECK-NEXT: vmov.8 q0[4], r0 620; CHECK-NEXT: vmov.u8 r0, q2[4] 621; CHECK-NEXT: vmov.8 q0[5], r0 622; CHECK-NEXT: vmov.u8 r0, q1[7] 623; CHECK-NEXT: vmov.8 q0[6], r0 624; CHECK-NEXT: vmov.u8 r0, q2[6] 625; CHECK-NEXT: vmov.8 q0[7], r0 626; CHECK-NEXT: vmov.u8 r0, q1[9] 627; CHECK-NEXT: vmov.8 q0[8], r0 628; CHECK-NEXT: vmov.u8 r0, q2[8] 629; CHECK-NEXT: vmov.8 q0[9], r0 630; CHECK-NEXT: vmov.u8 r0, q1[11] 631; CHECK-NEXT: vmov.8 q0[10], r0 632; CHECK-NEXT: vmov.u8 r0, q2[10] 633; CHECK-NEXT: vmov.8 q0[11], r0 634; CHECK-NEXT: vmov.u8 r0, q1[13] 635; CHECK-NEXT: vmov.8 q0[12], r0 636; CHECK-NEXT: vmov.u8 r0, q2[12] 637; CHECK-NEXT: vmov.8 q0[13], r0 638; CHECK-NEXT: vmov.u8 r0, q1[15] 639; CHECK-NEXT: vmov.8 q0[14], r0 640; CHECK-NEXT: vmov.u8 r0, q2[14] 641; CHECK-NEXT: vmov.8 q0[15], r0 642; CHECK-NEXT: bx lr 643; 644; CHECKBE-LABEL: vmovn8_t2: 645; CHECKBE: @ %bb.0: @ %entry 646; CHECKBE-NEXT: vrev64.8 q2, q1 647; CHECKBE-NEXT: vrev64.8 q3, q0 648; CHECKBE-NEXT: vmov.u8 r0, q2[1] 649; CHECKBE-NEXT: vmov.8 q1[0], r0 650; CHECKBE-NEXT: vmov.u8 r0, q3[0] 651; CHECKBE-NEXT: vmov.8 q1[1], r0 652; CHECKBE-NEXT: vmov.u8 r0, q2[3] 653; CHECKBE-NEXT: vmov.8 q1[2], r0 654; CHECKBE-NEXT: vmov.u8 r0, q3[2] 655; CHECKBE-NEXT: vmov.8 q1[3], r0 656; CHECKBE-NEXT: vmov.u8 r0, q2[5] 657; CHECKBE-NEXT: vmov.8 q1[4], r0 658; CHECKBE-NEXT: vmov.u8 r0, q3[4] 659; CHECKBE-NEXT: vmov.8 q1[5], r0 660; CHECKBE-NEXT: vmov.u8 r0, q2[7] 661; CHECKBE-NEXT: vmov.8 q1[6], r0 662; CHECKBE-NEXT: vmov.u8 r0, q3[6] 663; CHECKBE-NEXT: vmov.8 q1[7], r0 664; CHECKBE-NEXT: vmov.u8 r0, q2[9] 665; CHECKBE-NEXT: vmov.8 q1[8], r0 666; CHECKBE-NEXT: vmov.u8 r0, q3[8] 667; CHECKBE-NEXT: vmov.8 q1[9], r0 668; CHECKBE-NEXT: vmov.u8 r0, q2[11] 669; CHECKBE-NEXT: vmov.8 q1[10], r0 670; CHECKBE-NEXT: vmov.u8 r0, q3[10] 671; CHECKBE-NEXT: vmov.8 q1[11], r0 672; CHECKBE-NEXT: vmov.u8 r0, q2[13] 673; CHECKBE-NEXT: vmov.8 q1[12], r0 674; CHECKBE-NEXT: vmov.u8 r0, q3[12] 675; CHECKBE-NEXT: vmov.8 q1[13], r0 676; CHECKBE-NEXT: vmov.u8 r0, q2[15] 677; CHECKBE-NEXT: vmov.8 q1[14], r0 678; CHECKBE-NEXT: vmov.u8 r0, q3[14] 679; CHECKBE-NEXT: vmov.8 q1[15], r0 680; CHECKBE-NEXT: vrev64.8 q0, q1 681; CHECKBE-NEXT: bx lr 682entry: 683 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 17, i32 0, i32 19, i32 2, i32 21, i32 4, i32 23, i32 6, i32 25, i32 8, i32 27, i32 10, i32 29, i32 12, i32 31, i32 14> 684 ret <16 x i8> %out 685} 686 687define arm_aapcs_vfpcc <16 x i8> @vmovn8_t3(<16 x i8> %src1, <16 x i8> %src2) { 688; CHECK-LABEL: vmovn8_t3: 689; CHECK: @ %bb.0: @ %entry 690; CHECK-NEXT: vmov.u8 r0, q0[1] 691; CHECK-NEXT: vmov q2, q0 692; CHECK-NEXT: vmov.8 q0[0], r0 693; CHECK-NEXT: vmov.u8 r0, q1[0] 694; CHECK-NEXT: vmov.8 q0[1], r0 695; CHECK-NEXT: vmov.u8 r0, q2[3] 696; CHECK-NEXT: vmov.8 q0[2], r0 697; CHECK-NEXT: vmov.u8 r0, q1[2] 698; CHECK-NEXT: vmov.8 q0[3], r0 699; CHECK-NEXT: vmov.u8 r0, q2[5] 700; CHECK-NEXT: vmov.8 q0[4], r0 701; CHECK-NEXT: vmov.u8 r0, q1[4] 702; CHECK-NEXT: vmov.8 q0[5], r0 703; CHECK-NEXT: vmov.u8 r0, q2[7] 704; CHECK-NEXT: vmov.8 q0[6], r0 705; CHECK-NEXT: vmov.u8 r0, q1[6] 706; CHECK-NEXT: vmov.8 q0[7], r0 707; CHECK-NEXT: vmov.u8 r0, q2[9] 708; CHECK-NEXT: vmov.8 q0[8], r0 709; CHECK-NEXT: vmov.u8 r0, q1[8] 710; CHECK-NEXT: vmov.8 q0[9], r0 711; CHECK-NEXT: vmov.u8 r0, q2[11] 712; CHECK-NEXT: vmov.8 q0[10], r0 713; CHECK-NEXT: vmov.u8 r0, q1[10] 714; CHECK-NEXT: vmov.8 q0[11], r0 715; CHECK-NEXT: vmov.u8 r0, q2[13] 716; CHECK-NEXT: vmov.8 q0[12], r0 717; CHECK-NEXT: vmov.u8 r0, q1[12] 718; CHECK-NEXT: vmov.8 q0[13], r0 719; CHECK-NEXT: vmov.u8 r0, q2[15] 720; CHECK-NEXT: vmov.8 q0[14], r0 721; CHECK-NEXT: vmov.u8 r0, q1[14] 722; CHECK-NEXT: vmov.8 q0[15], r0 723; CHECK-NEXT: bx lr 724; 725; CHECKBE-LABEL: vmovn8_t3: 726; CHECKBE: @ %bb.0: @ %entry 727; CHECKBE-NEXT: vrev64.8 q3, q0 728; CHECKBE-NEXT: vrev64.8 q0, q1 729; CHECKBE-NEXT: vmov.u8 r0, q3[1] 730; CHECKBE-NEXT: vmov.8 q2[0], r0 731; CHECKBE-NEXT: vmov.u8 r0, q0[0] 732; CHECKBE-NEXT: vmov.8 q2[1], r0 733; CHECKBE-NEXT: vmov.u8 r0, q3[3] 734; CHECKBE-NEXT: vmov.8 q2[2], r0 735; CHECKBE-NEXT: vmov.u8 r0, q0[2] 736; CHECKBE-NEXT: vmov.8 q2[3], r0 737; CHECKBE-NEXT: vmov.u8 r0, q3[5] 738; CHECKBE-NEXT: vmov.8 q2[4], r0 739; CHECKBE-NEXT: vmov.u8 r0, q0[4] 740; CHECKBE-NEXT: vmov.8 q2[5], r0 741; CHECKBE-NEXT: vmov.u8 r0, q3[7] 742; CHECKBE-NEXT: vmov.8 q2[6], r0 743; CHECKBE-NEXT: vmov.u8 r0, q0[6] 744; CHECKBE-NEXT: vmov.8 q2[7], r0 745; CHECKBE-NEXT: vmov.u8 r0, q3[9] 746; CHECKBE-NEXT: vmov.8 q2[8], r0 747; CHECKBE-NEXT: vmov.u8 r0, q0[8] 748; CHECKBE-NEXT: vmov.8 q2[9], r0 749; CHECKBE-NEXT: vmov.u8 r0, q3[11] 750; CHECKBE-NEXT: vmov.8 q2[10], r0 751; CHECKBE-NEXT: vmov.u8 r0, q0[10] 752; CHECKBE-NEXT: vmov.8 q2[11], r0 753; CHECKBE-NEXT: vmov.u8 r0, q3[13] 754; CHECKBE-NEXT: vmov.8 q2[12], r0 755; CHECKBE-NEXT: vmov.u8 r0, q0[12] 756; CHECKBE-NEXT: vmov.8 q2[13], r0 757; CHECKBE-NEXT: vmov.u8 r0, q3[15] 758; CHECKBE-NEXT: vmov.8 q2[14], r0 759; CHECKBE-NEXT: vmov.u8 r0, q0[14] 760; CHECKBE-NEXT: vmov.8 q2[15], r0 761; CHECKBE-NEXT: vrev64.8 q0, q2 762; CHECKBE-NEXT: bx lr 763entry: 764 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 1, i32 16, i32 3, i32 18, i32 5, i32 20, i32 7, i32 22, i32 9, i32 24, i32 11, i32 26, i32 13, i32 28, i32 15, i32 30> 765 ret <16 x i8> %out 766} 767 768define arm_aapcs_vfpcc <16 x i8> @vmovn8_t4(<16 x i8> %src1, <16 x i8> %src2) { 769; CHECK-LABEL: vmovn8_t4: 770; CHECK: @ %bb.0: @ %entry 771; CHECK-NEXT: vmovnb.i16 q0, q1 772; CHECK-NEXT: bx lr 773; 774; CHECKBE-LABEL: vmovn8_t4: 775; CHECKBE: @ %bb.0: @ %entry 776; CHECKBE-NEXT: vrev64.8 q2, q1 777; CHECKBE-NEXT: vrev64.8 q1, q0 778; CHECKBE-NEXT: vmovnb.i16 q1, q2 779; CHECKBE-NEXT: vrev64.8 q0, q1 780; CHECKBE-NEXT: bx lr 781entry: 782 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15> 783 ret <16 x i8> %out 784} 785 786define arm_aapcs_vfpcc <16 x i8> @vmovn8_single_t(<16 x i8> %src1) { 787; CHECK-LABEL: vmovn8_single_t: 788; CHECK: @ %bb.0: @ %entry 789; CHECK-NEXT: vmovnt.i16 q0, q0 790; CHECK-NEXT: bx lr 791; 792; CHECKBE-LABEL: vmovn8_single_t: 793; CHECKBE: @ %bb.0: @ %entry 794; CHECKBE-NEXT: vrev64.8 q1, q0 795; CHECKBE-NEXT: vmovnt.i16 q1, q1 796; CHECKBE-NEXT: vrev64.8 q0, q1 797; CHECKBE-NEXT: bx lr 798entry: 799 %out = shufflevector <16 x i8> %src1, <16 x i8> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14> 800 ret <16 x i8> %out 801} 802 803 804define arm_aapcs_vfpcc <8 x i16> @vmovn32trunct_undef2(<8 x i16> %a) { 805; CHECK-LABEL: vmovn32trunct_undef2: 806; CHECK: @ %bb.0: @ %entry 807; CHECK-NEXT: bx lr 808; 809; CHECKBE-LABEL: vmovn32trunct_undef2: 810; CHECKBE: @ %bb.0: @ %entry 811; CHECKBE-NEXT: bx lr 812entry: 813 %c1 = call <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v8i16(<8 x i16> %a) 814 %c2 = call <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v8i16(<8 x i16> undef) 815 %strided.vec = shufflevector <4 x i32> %c1, <4 x i32> %c2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 816 %out = trunc <8 x i32> %strided.vec to <8 x i16> 817 ret <8 x i16> %out 818} 819 820define arm_aapcs_vfpcc <8 x i16> @vmovn32trunct_undef1(<8 x i16> %a) { 821; CHECK-LABEL: vmovn32trunct_undef1: 822; CHECK: @ %bb.0: @ %entry 823; CHECK-NEXT: vmovnt.i32 q0, q0 824; CHECK-NEXT: bx lr 825; 826; CHECKBE-LABEL: vmovn32trunct_undef1: 827; CHECKBE: @ %bb.0: @ %entry 828; CHECKBE-NEXT: vrev64.16 q1, q0 829; CHECKBE-NEXT: vmovnt.i32 q1, q1 830; CHECKBE-NEXT: vrev64.16 q0, q1 831; CHECKBE-NEXT: bx lr 832entry: 833 %c1 = call <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v8i16(<8 x i16> undef) 834 %c2 = call <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v8i16(<8 x i16> %a) 835 %strided.vec = shufflevector <4 x i32> %c1, <4 x i32> %c2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 836 %out = trunc <8 x i32> %strided.vec to <8 x i16> 837 ret <8 x i16> %out 838} 839 840define arm_aapcs_vfpcc <8 x i16> @vmovn16b_undef2(<16 x i8> %a) { 841; CHECK-LABEL: vmovn16b_undef2: 842; CHECK: @ %bb.0: @ %entry 843; CHECK-NEXT: bx lr 844; 845; CHECKBE-LABEL: vmovn16b_undef2: 846; CHECKBE: @ %bb.0: @ %entry 847; CHECKBE-NEXT: vrev64.8 q1, q0 848; CHECKBE-NEXT: vrev64.16 q0, q1 849; CHECKBE-NEXT: bx lr 850entry: 851 %c1 = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> %a) 852 %c2 = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> undef) 853 %out = shufflevector <8 x i16> %c1, <8 x i16> %c2, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 854 ret <8 x i16> %out 855} 856 857define arm_aapcs_vfpcc <8 x i16> @vmovn16b_undef1(<16 x i8> %a) { 858; CHECK-LABEL: vmovn16b_undef1: 859; CHECK: @ %bb.0: @ %entry 860; CHECK-NEXT: bx lr 861; 862; CHECKBE-LABEL: vmovn16b_undef1: 863; CHECKBE: @ %bb.0: @ %entry 864; CHECKBE-NEXT: vrev64.8 q1, q0 865; CHECKBE-NEXT: vrev64.16 q0, q1 866; CHECKBE-NEXT: bx lr 867entry: 868 %c1 = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> undef) 869 %c2 = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> %a) 870 %out = shufflevector <8 x i16> %c1, <8 x i16> %c2, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 871 ret <8 x i16> %out 872} 873 874define arm_aapcs_vfpcc <8 x i16> @vmovn32_badlanes(<4 x i32> %src1) { 875; CHECK-LABEL: vmovn32_badlanes: 876; CHECK: @ %bb.0: @ %entry 877; CHECK-NEXT: vmov r0, r1, d0 878; CHECK-NEXT: vmov.16 q1[1], r0 879; CHECK-NEXT: vmov r0, s2 880; CHECK-NEXT: vmov.16 q1[3], r1 881; CHECK-NEXT: vmov.16 q1[5], r1 882; CHECK-NEXT: vmov.16 q1[7], r0 883; CHECK-NEXT: vmov q0, q1 884; CHECK-NEXT: bx lr 885; 886; CHECKBE-LABEL: vmovn32_badlanes: 887; CHECKBE: @ %bb.0: @ %entry 888; CHECKBE-NEXT: vrev64.32 q1, q0 889; CHECKBE-NEXT: vmov r0, r1, d2 890; CHECKBE-NEXT: vmov.16 q2[1], r0 891; CHECKBE-NEXT: vmov r0, s6 892; CHECKBE-NEXT: vmov.16 q2[3], r1 893; CHECKBE-NEXT: vmov.16 q2[5], r1 894; CHECKBE-NEXT: vmov.16 q2[7], r0 895; CHECKBE-NEXT: vrev64.16 q0, q2 896; CHECKBE-NEXT: bx lr 897entry: 898 %strided.vec = shufflevector <4 x i32> %src1, <4 x i32> undef, <8 x i32> <i32 4, i32 0, i32 5, i32 1, i32 6, i32 1, i32 7, i32 2> 899 %out = trunc <8 x i32> %strided.vec to <8 x i16> 900 ret <8 x i16> %out 901} 902 903define arm_aapcs_vfpcc <16 x i8> @vmovn16trunct_undef2(<16 x i8> %a) { 904; CHECK-LABEL: vmovn16trunct_undef2: 905; CHECK: @ %bb.0: @ %entry 906; CHECK-NEXT: bx lr 907; 908; CHECKBE-LABEL: vmovn16trunct_undef2: 909; CHECKBE: @ %bb.0: @ %entry 910; CHECKBE-NEXT: bx lr 911entry: 912 %c1 = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> %a) 913 %c2 = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> undef) 914 %strided.vec = shufflevector <8 x i16> %c1, <8 x i16> %c2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 915 %out = trunc <16 x i16> %strided.vec to <16 x i8> 916 ret <16 x i8> %out 917} 918 919define arm_aapcs_vfpcc <16 x i8> @vmovn16trunct_undef1(<16 x i8> %a) { 920; CHECK-LABEL: vmovn16trunct_undef1: 921; CHECK: @ %bb.0: @ %entry 922; CHECK-NEXT: vmovnt.i16 q0, q0 923; CHECK-NEXT: bx lr 924; 925; CHECKBE-LABEL: vmovn16trunct_undef1: 926; CHECKBE: @ %bb.0: @ %entry 927; CHECKBE-NEXT: vrev64.8 q1, q0 928; CHECKBE-NEXT: vmovnt.i16 q1, q1 929; CHECKBE-NEXT: vrev64.8 q0, q1 930; CHECKBE-NEXT: bx lr 931entry: 932 %c1 = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> undef) 933 %c2 = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> %a) 934 %strided.vec = shufflevector <8 x i16> %c1, <8 x i16> %c2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 935 %out = trunc <16 x i16> %strided.vec to <16 x i8> 936 ret <16 x i8> %out 937} 938 939declare <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v8i16(<8 x i16>) 940declare <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8>) 941