1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE 3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP 4 5define arm_aapcs_vfpcc <16 x i8> @add_int8_t(<16 x i8> %src1, <16 x i8> %src2) { 6; CHECK-LABEL: add_int8_t: 7; CHECK: @ %bb.0: @ %entry 8; CHECK-NEXT: vadd.i8 q0, q0, q1 9; CHECK-NEXT: bx lr 10entry: 11 %0 = add <16 x i8> %src1, %src2 12 ret <16 x i8> %0 13} 14 15define arm_aapcs_vfpcc <8 x i16> @add_int16_t(<8 x i16> %src1, <8 x i16> %src2) { 16; CHECK-LABEL: add_int16_t: 17; CHECK: @ %bb.0: @ %entry 18; CHECK-NEXT: vadd.i16 q0, q0, q1 19; CHECK-NEXT: bx lr 20entry: 21 %0 = add <8 x i16> %src1, %src2 22 ret <8 x i16> %0 23} 24 25define arm_aapcs_vfpcc <4 x i32> @add_int32_t(<4 x i32> %src1, <4 x i32> %src2) { 26; CHECK-LABEL: add_int32_t: 27; CHECK: @ %bb.0: @ %entry 28; CHECK-NEXT: vadd.i32 q0, q0, q1 29; CHECK-NEXT: bx lr 30entry: 31 %0 = add nsw <4 x i32> %src1, %src2 32 ret <4 x i32> %0 33} 34 35define arm_aapcs_vfpcc <2 x i64> @add_int64_t(<2 x i64> %src1, <2 x i64> %src2) { 36; CHECK-LABEL: add_int64_t: 37; CHECK: @ %bb.0: @ %entry 38; CHECK-NEXT: .save {r4, r5, r7, lr} 39; CHECK-NEXT: push {r4, r5, r7, lr} 40; CHECK-NEXT: vmov lr, r12, d3 41; CHECK-NEXT: vmov r2, r3, d1 42; CHECK-NEXT: vmov r1, r0, d2 43; CHECK-NEXT: vmov r4, r5, d0 44; CHECK-NEXT: adds.w r2, r2, lr 45; CHECK-NEXT: adc.w r3, r3, r12 46; CHECK-NEXT: adds r1, r1, r4 47; CHECK-NEXT: adcs r0, r5 48; CHECK-NEXT: vmov q0[2], q0[0], r1, r2 49; CHECK-NEXT: vmov q0[3], q0[1], r0, r3 50; CHECK-NEXT: pop {r4, r5, r7, pc} 51entry: 52 %0 = add nsw <2 x i64> %src1, %src2 53 ret <2 x i64> %0 54} 55 56define arm_aapcs_vfpcc <4 x float> @add_float32_t(<4 x float> %src1, <4 x float> %src2) { 57; CHECK-MVE-LABEL: add_float32_t: 58; CHECK-MVE: @ %bb.0: @ %entry 59; CHECK-MVE-NEXT: vadd.f32 s11, s7, s3 60; CHECK-MVE-NEXT: vadd.f32 s10, s6, s2 61; CHECK-MVE-NEXT: vadd.f32 s9, s5, s1 62; CHECK-MVE-NEXT: vadd.f32 s8, s4, s0 63; CHECK-MVE-NEXT: vmov q0, q2 64; CHECK-MVE-NEXT: bx lr 65; 66; CHECK-MVEFP-LABEL: add_float32_t: 67; CHECK-MVEFP: @ %bb.0: @ %entry 68; CHECK-MVEFP-NEXT: vadd.f32 q0, q1, q0 69; CHECK-MVEFP-NEXT: bx lr 70entry: 71 %0 = fadd nnan ninf nsz <4 x float> %src2, %src1 72 ret <4 x float> %0 73} 74 75define arm_aapcs_vfpcc <8 x half> @add_float16_t(<8 x half> %src1, <8 x half> %src2) { 76; CHECK-MVE-LABEL: add_float16_t: 77; CHECK-MVE: @ %bb.0: @ %entry 78; CHECK-MVE-NEXT: vmov q2, q0 79; CHECK-MVE-NEXT: vmovx.f16 s2, s4 80; CHECK-MVE-NEXT: vmovx.f16 s0, s8 81; CHECK-MVE-NEXT: vmovx.f16 s14, s5 82; CHECK-MVE-NEXT: vadd.f16 s12, s2, s0 83; CHECK-MVE-NEXT: vadd.f16 s0, s4, s8 84; CHECK-MVE-NEXT: vins.f16 s0, s12 85; CHECK-MVE-NEXT: vmovx.f16 s12, s9 86; CHECK-MVE-NEXT: vadd.f16 s12, s14, s12 87; CHECK-MVE-NEXT: vadd.f16 s1, s5, s9 88; CHECK-MVE-NEXT: vins.f16 s1, s12 89; CHECK-MVE-NEXT: vmovx.f16 s12, s10 90; CHECK-MVE-NEXT: vmovx.f16 s14, s6 91; CHECK-MVE-NEXT: vadd.f16 s2, s6, s10 92; CHECK-MVE-NEXT: vadd.f16 s12, s14, s12 93; CHECK-MVE-NEXT: vmovx.f16 s14, s7 94; CHECK-MVE-NEXT: vins.f16 s2, s12 95; CHECK-MVE-NEXT: vmovx.f16 s12, s11 96; CHECK-MVE-NEXT: vadd.f16 s12, s14, s12 97; CHECK-MVE-NEXT: vadd.f16 s3, s7, s11 98; CHECK-MVE-NEXT: vins.f16 s3, s12 99; CHECK-MVE-NEXT: bx lr 100; 101; CHECK-MVEFP-LABEL: add_float16_t: 102; CHECK-MVEFP: @ %bb.0: @ %entry 103; CHECK-MVEFP-NEXT: vadd.f16 q0, q1, q0 104; CHECK-MVEFP-NEXT: bx lr 105entry: 106 %0 = fadd nnan ninf nsz <8 x half> %src2, %src1 107 ret <8 x half> %0 108} 109 110define arm_aapcs_vfpcc <2 x double> @add_float64_t(<2 x double> %src1, <2 x double> %src2) { 111; CHECK-LABEL: add_float64_t: 112; CHECK: @ %bb.0: @ %entry 113; CHECK-NEXT: .save {r7, lr} 114; CHECK-NEXT: push {r7, lr} 115; CHECK-NEXT: .vsave {d8, d9, d10, d11} 116; CHECK-NEXT: vpush {d8, d9, d10, d11} 117; CHECK-NEXT: vmov q4, q1 118; CHECK-NEXT: vmov q5, q0 119; CHECK-NEXT: vmov r0, r1, d9 120; CHECK-NEXT: vmov r2, r3, d11 121; CHECK-NEXT: bl __aeabi_dadd 122; CHECK-NEXT: vmov lr, r12, d8 123; CHECK-NEXT: vmov r2, r3, d10 124; CHECK-NEXT: vmov d9, r0, r1 125; CHECK-NEXT: mov r0, lr 126; CHECK-NEXT: mov r1, r12 127; CHECK-NEXT: bl __aeabi_dadd 128; CHECK-NEXT: vmov d8, r0, r1 129; CHECK-NEXT: vmov q0, q4 130; CHECK-NEXT: vpop {d8, d9, d10, d11} 131; CHECK-NEXT: pop {r7, pc} 132entry: 133 %0 = fadd nnan ninf nsz <2 x double> %src2, %src1 134 ret <2 x double> %0 135} 136 137 138define arm_aapcs_vfpcc <16 x i8> @sub_int8_t(<16 x i8> %src1, <16 x i8> %src2) { 139; CHECK-LABEL: sub_int8_t: 140; CHECK: @ %bb.0: @ %entry 141; CHECK-NEXT: vsub.i8 q0, q1, q0 142; CHECK-NEXT: bx lr 143entry: 144 %0 = sub <16 x i8> %src2, %src1 145 ret <16 x i8> %0 146} 147 148define arm_aapcs_vfpcc <8 x i16> @sub_int16_t(<8 x i16> %src1, <8 x i16> %src2) { 149; CHECK-LABEL: sub_int16_t: 150; CHECK: @ %bb.0: @ %entry 151; CHECK-NEXT: vsub.i16 q0, q1, q0 152; CHECK-NEXT: bx lr 153entry: 154 %0 = sub <8 x i16> %src2, %src1 155 ret <8 x i16> %0 156} 157 158define arm_aapcs_vfpcc <4 x i32> @sub_int32_t(<4 x i32> %src1, <4 x i32> %src2) { 159; CHECK-LABEL: sub_int32_t: 160; CHECK: @ %bb.0: @ %entry 161; CHECK-NEXT: vsub.i32 q0, q1, q0 162; CHECK-NEXT: bx lr 163entry: 164 %0 = sub nsw <4 x i32> %src2, %src1 165 ret <4 x i32> %0 166} 167 168define arm_aapcs_vfpcc <2 x i64> @sub_int64_t(<2 x i64> %src1, <2 x i64> %src2) { 169; CHECK-LABEL: sub_int64_t: 170; CHECK: @ %bb.0: @ %entry 171; CHECK-NEXT: .save {r4, r5, r7, lr} 172; CHECK-NEXT: push {r4, r5, r7, lr} 173; CHECK-NEXT: vmov lr, r12, d1 174; CHECK-NEXT: vmov r2, r3, d3 175; CHECK-NEXT: vmov r1, r0, d0 176; CHECK-NEXT: vmov r4, r5, d2 177; CHECK-NEXT: subs.w r2, r2, lr 178; CHECK-NEXT: sbc.w r3, r3, r12 179; CHECK-NEXT: subs r1, r4, r1 180; CHECK-NEXT: sbc.w r0, r5, r0 181; CHECK-NEXT: vmov q0[2], q0[0], r1, r2 182; CHECK-NEXT: vmov q0[3], q0[1], r0, r3 183; CHECK-NEXT: pop {r4, r5, r7, pc} 184entry: 185 %0 = sub nsw <2 x i64> %src2, %src1 186 ret <2 x i64> %0 187} 188 189define arm_aapcs_vfpcc <4 x float> @sub_float32_t(<4 x float> %src1, <4 x float> %src2) { 190; CHECK-MVE-LABEL: sub_float32_t: 191; CHECK-MVE: @ %bb.0: @ %entry 192; CHECK-MVE-NEXT: vsub.f32 s11, s7, s3 193; CHECK-MVE-NEXT: vsub.f32 s10, s6, s2 194; CHECK-MVE-NEXT: vsub.f32 s9, s5, s1 195; CHECK-MVE-NEXT: vsub.f32 s8, s4, s0 196; CHECK-MVE-NEXT: vmov q0, q2 197; CHECK-MVE-NEXT: bx lr 198; 199; CHECK-MVEFP-LABEL: sub_float32_t: 200; CHECK-MVEFP: @ %bb.0: @ %entry 201; CHECK-MVEFP-NEXT: vsub.f32 q0, q1, q0 202; CHECK-MVEFP-NEXT: bx lr 203entry: 204 %0 = fsub nnan ninf nsz <4 x float> %src2, %src1 205 ret <4 x float> %0 206} 207 208define arm_aapcs_vfpcc <8 x half> @sub_float16_t(<8 x half> %src1, <8 x half> %src2) { 209; CHECK-MVE-LABEL: sub_float16_t: 210; CHECK-MVE: @ %bb.0: @ %entry 211; CHECK-MVE-NEXT: vmov q2, q0 212; CHECK-MVE-NEXT: vmovx.f16 s2, s4 213; CHECK-MVE-NEXT: vmovx.f16 s0, s8 214; CHECK-MVE-NEXT: vmovx.f16 s14, s5 215; CHECK-MVE-NEXT: vsub.f16 s12, s2, s0 216; CHECK-MVE-NEXT: vsub.f16 s0, s4, s8 217; CHECK-MVE-NEXT: vins.f16 s0, s12 218; CHECK-MVE-NEXT: vmovx.f16 s12, s9 219; CHECK-MVE-NEXT: vsub.f16 s12, s14, s12 220; CHECK-MVE-NEXT: vsub.f16 s1, s5, s9 221; CHECK-MVE-NEXT: vins.f16 s1, s12 222; CHECK-MVE-NEXT: vmovx.f16 s12, s10 223; CHECK-MVE-NEXT: vmovx.f16 s14, s6 224; CHECK-MVE-NEXT: vsub.f16 s2, s6, s10 225; CHECK-MVE-NEXT: vsub.f16 s12, s14, s12 226; CHECK-MVE-NEXT: vmovx.f16 s14, s7 227; CHECK-MVE-NEXT: vins.f16 s2, s12 228; CHECK-MVE-NEXT: vmovx.f16 s12, s11 229; CHECK-MVE-NEXT: vsub.f16 s12, s14, s12 230; CHECK-MVE-NEXT: vsub.f16 s3, s7, s11 231; CHECK-MVE-NEXT: vins.f16 s3, s12 232; CHECK-MVE-NEXT: bx lr 233; 234; CHECK-MVEFP-LABEL: sub_float16_t: 235; CHECK-MVEFP: @ %bb.0: @ %entry 236; CHECK-MVEFP-NEXT: vsub.f16 q0, q1, q0 237; CHECK-MVEFP-NEXT: bx lr 238entry: 239 %0 = fsub nnan ninf nsz <8 x half> %src2, %src1 240 ret <8 x half> %0 241} 242 243define arm_aapcs_vfpcc <2 x double> @sub_float64_t(<2 x double> %src1, <2 x double> %src2) { 244; CHECK-LABEL: sub_float64_t: 245; CHECK: @ %bb.0: @ %entry 246; CHECK-NEXT: .save {r7, lr} 247; CHECK-NEXT: push {r7, lr} 248; CHECK-NEXT: .vsave {d8, d9, d10, d11} 249; CHECK-NEXT: vpush {d8, d9, d10, d11} 250; CHECK-NEXT: vmov q4, q1 251; CHECK-NEXT: vmov q5, q0 252; CHECK-NEXT: vmov r0, r1, d9 253; CHECK-NEXT: vmov r2, r3, d11 254; CHECK-NEXT: bl __aeabi_dsub 255; CHECK-NEXT: vmov lr, r12, d8 256; CHECK-NEXT: vmov r2, r3, d10 257; CHECK-NEXT: vmov d9, r0, r1 258; CHECK-NEXT: mov r0, lr 259; CHECK-NEXT: mov r1, r12 260; CHECK-NEXT: bl __aeabi_dsub 261; CHECK-NEXT: vmov d8, r0, r1 262; CHECK-NEXT: vmov q0, q4 263; CHECK-NEXT: vpop {d8, d9, d10, d11} 264; CHECK-NEXT: pop {r7, pc} 265entry: 266 %0 = fsub nnan ninf nsz <2 x double> %src2, %src1 267 ret <2 x double> %0 268} 269 270 271define arm_aapcs_vfpcc <16 x i8> @mul_int8_t(<16 x i8> %src1, <16 x i8> %src2) { 272; CHECK-LABEL: mul_int8_t: 273; CHECK: @ %bb.0: @ %entry 274; CHECK-NEXT: vmul.i8 q0, q0, q1 275; CHECK-NEXT: bx lr 276entry: 277 %0 = mul <16 x i8> %src1, %src2 278 ret <16 x i8> %0 279} 280 281define arm_aapcs_vfpcc <8 x i16> @mul_int16_t(<8 x i16> %src1, <8 x i16> %src2) { 282; CHECK-LABEL: mul_int16_t: 283; CHECK: @ %bb.0: @ %entry 284; CHECK-NEXT: vmul.i16 q0, q0, q1 285; CHECK-NEXT: bx lr 286entry: 287 %0 = mul <8 x i16> %src1, %src2 288 ret <8 x i16> %0 289} 290 291define arm_aapcs_vfpcc <4 x i32> @mul_int32_t(<4 x i32> %src1, <4 x i32> %src2) { 292; CHECK-LABEL: mul_int32_t: 293; CHECK: @ %bb.0: @ %entry 294; CHECK-NEXT: vmul.i32 q0, q0, q1 295; CHECK-NEXT: bx lr 296entry: 297 %0 = mul nsw <4 x i32> %src1, %src2 298 ret <4 x i32> %0 299} 300 301define arm_aapcs_vfpcc <2 x i64> @mul_int64_t(<2 x i64> %src1, <2 x i64> %src2) { 302; CHECK-LABEL: mul_int64_t: 303; CHECK: @ %bb.0: @ %entry 304; CHECK-NEXT: .save {r4, r5, r6, r7, lr} 305; CHECK-NEXT: push {r4, r5, r6, r7, lr} 306; CHECK-NEXT: vmov r0, r1, d2 307; CHECK-NEXT: vmov r2, lr, d0 308; CHECK-NEXT: vmov r4, r5, d3 309; CHECK-NEXT: umull r12, r3, r2, r0 310; CHECK-NEXT: mla r1, r2, r1, r3 311; CHECK-NEXT: vmov r2, r3, d1 312; CHECK-NEXT: mla r0, lr, r0, r1 313; CHECK-NEXT: umull r6, r7, r2, r4 314; CHECK-NEXT: mla r2, r2, r5, r7 315; CHECK-NEXT: vmov q0[2], q0[0], r12, r6 316; CHECK-NEXT: mla r2, r3, r4, r2 317; CHECK-NEXT: vmov q0[3], q0[1], r0, r2 318; CHECK-NEXT: pop {r4, r5, r6, r7, pc} 319entry: 320 %0 = mul nsw <2 x i64> %src1, %src2 321 ret <2 x i64> %0 322} 323 324define arm_aapcs_vfpcc <8 x half> @mul_float16_t(<8 x half> %src1, <8 x half> %src2) { 325; CHECK-MVE-LABEL: mul_float16_t: 326; CHECK-MVE: @ %bb.0: @ %entry 327; CHECK-MVE-NEXT: vmov q2, q0 328; CHECK-MVE-NEXT: vmovx.f16 s2, s4 329; CHECK-MVE-NEXT: vmovx.f16 s0, s8 330; CHECK-MVE-NEXT: vmovx.f16 s14, s5 331; CHECK-MVE-NEXT: vmul.f16 s12, s2, s0 332; CHECK-MVE-NEXT: vmul.f16 s0, s4, s8 333; CHECK-MVE-NEXT: vins.f16 s0, s12 334; CHECK-MVE-NEXT: vmovx.f16 s12, s9 335; CHECK-MVE-NEXT: vmul.f16 s12, s14, s12 336; CHECK-MVE-NEXT: vmul.f16 s1, s5, s9 337; CHECK-MVE-NEXT: vins.f16 s1, s12 338; CHECK-MVE-NEXT: vmovx.f16 s12, s10 339; CHECK-MVE-NEXT: vmovx.f16 s14, s6 340; CHECK-MVE-NEXT: vmul.f16 s2, s6, s10 341; CHECK-MVE-NEXT: vmul.f16 s12, s14, s12 342; CHECK-MVE-NEXT: vmovx.f16 s14, s7 343; CHECK-MVE-NEXT: vins.f16 s2, s12 344; CHECK-MVE-NEXT: vmovx.f16 s12, s11 345; CHECK-MVE-NEXT: vmul.f16 s12, s14, s12 346; CHECK-MVE-NEXT: vmul.f16 s3, s7, s11 347; CHECK-MVE-NEXT: vins.f16 s3, s12 348; CHECK-MVE-NEXT: bx lr 349; 350; CHECK-MVEFP-LABEL: mul_float16_t: 351; CHECK-MVEFP: @ %bb.0: @ %entry 352; CHECK-MVEFP-NEXT: vmul.f16 q0, q1, q0 353; CHECK-MVEFP-NEXT: bx lr 354entry: 355 %0 = fmul nnan ninf nsz <8 x half> %src2, %src1 356 ret <8 x half> %0 357} 358 359define arm_aapcs_vfpcc <4 x float> @mul_float32_t(<4 x float> %src1, <4 x float> %src2) { 360; CHECK-MVE-LABEL: mul_float32_t: 361; CHECK-MVE: @ %bb.0: @ %entry 362; CHECK-MVE-NEXT: vmul.f32 s11, s7, s3 363; CHECK-MVE-NEXT: vmul.f32 s10, s6, s2 364; CHECK-MVE-NEXT: vmul.f32 s9, s5, s1 365; CHECK-MVE-NEXT: vmul.f32 s8, s4, s0 366; CHECK-MVE-NEXT: vmov q0, q2 367; CHECK-MVE-NEXT: bx lr 368; 369; CHECK-MVEFP-LABEL: mul_float32_t: 370; CHECK-MVEFP: @ %bb.0: @ %entry 371; CHECK-MVEFP-NEXT: vmul.f32 q0, q1, q0 372; CHECK-MVEFP-NEXT: bx lr 373entry: 374 %0 = fmul nnan ninf nsz <4 x float> %src2, %src1 375 ret <4 x float> %0 376} 377 378define arm_aapcs_vfpcc <2 x double> @mul_float64_t(<2 x double> %src1, <2 x double> %src2) { 379; CHECK-LABEL: mul_float64_t: 380; CHECK: @ %bb.0: @ %entry 381; CHECK-NEXT: .save {r7, lr} 382; CHECK-NEXT: push {r7, lr} 383; CHECK-NEXT: .vsave {d8, d9, d10, d11} 384; CHECK-NEXT: vpush {d8, d9, d10, d11} 385; CHECK-NEXT: vmov q4, q1 386; CHECK-NEXT: vmov q5, q0 387; CHECK-NEXT: vmov r0, r1, d9 388; CHECK-NEXT: vmov r2, r3, d11 389; CHECK-NEXT: bl __aeabi_dmul 390; CHECK-NEXT: vmov lr, r12, d8 391; CHECK-NEXT: vmov r2, r3, d10 392; CHECK-NEXT: vmov d9, r0, r1 393; CHECK-NEXT: mov r0, lr 394; CHECK-NEXT: mov r1, r12 395; CHECK-NEXT: bl __aeabi_dmul 396; CHECK-NEXT: vmov d8, r0, r1 397; CHECK-NEXT: vmov q0, q4 398; CHECK-NEXT: vpop {d8, d9, d10, d11} 399; CHECK-NEXT: pop {r7, pc} 400entry: 401 %0 = fmul nnan ninf nsz <2 x double> %src2, %src1 402 ret <2 x double> %0 403} 404 405