1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64-none-eabi -mattr=+bf16 | FileCheck %s 3 4; bfloat16x4_t test_vcreate_bf16(uint64_t a) { return vcreate_bf16(a); } 5define <4 x bfloat> @test_vcreate_bf16(i64 %a) nounwind { 6; CHECK-LABEL: test_vcreate_bf16: 7; CHECK: // %bb.0: // %entry 8; CHECK-NEXT: fmov d0, x0 9; CHECK-NEXT: ret 10entry: 11 %0 = bitcast i64 %a to <4 x bfloat> 12 ret <4 x bfloat> %0 13} 14 15; bfloat16x4_t test_vdup_n_bf16(bfloat16_t v) { return vdup_n_bf16(v); } 16define <4 x bfloat> @test_vdup_n_bf16(bfloat %v) nounwind { 17; CHECK-LABEL: test_vdup_n_bf16: 18; CHECK: // %bb.0: // %entry 19; CHECK-NEXT: // kill: def $h0 killed $h0 def $q0 20; CHECK-NEXT: dup v0.4h, v0.h[0] 21; CHECK-NEXT: ret 22entry: 23 %vecinit.i = insertelement <4 x bfloat> undef, bfloat %v, i32 0 24 %vecinit3.i = shufflevector <4 x bfloat> %vecinit.i, <4 x bfloat> undef, <4 x i32> zeroinitializer 25 ret <4 x bfloat> %vecinit3.i 26} 27 28; bfloat16x8_t test_vdupq_n_bf16(bfloat16_t v) { return vdupq_n_bf16(v); } 29define <8 x bfloat> @test_vdupq_n_bf16(bfloat %v) nounwind { 30; CHECK-LABEL: test_vdupq_n_bf16: 31; CHECK: // %bb.0: // %entry 32; CHECK-NEXT: // kill: def $h0 killed $h0 def $q0 33; CHECK-NEXT: dup v0.8h, v0.h[0] 34; CHECK-NEXT: ret 35entry: 36 %vecinit.i = insertelement <8 x bfloat> undef, bfloat %v, i32 0 37 %vecinit7.i = shufflevector <8 x bfloat> %vecinit.i, <8 x bfloat> undef, <8 x i32> zeroinitializer 38 ret <8 x bfloat> %vecinit7.i 39} 40 41; bfloat16x4_t test_vdup_lane_bf16(bfloat16x4_t v) { return vdup_lane_bf16(v, 1); } 42define <4 x bfloat> @test_vdup_lane_bf16(<4 x bfloat> %v) nounwind { 43; CHECK-LABEL: test_vdup_lane_bf16: 44; CHECK: // %bb.0: // %entry 45; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 46; CHECK-NEXT: dup v0.4h, v0.h[1] 47; CHECK-NEXT: ret 48entry: 49 %lane = shufflevector <4 x bfloat> %v, <4 x bfloat> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 50 ret <4 x bfloat> %lane 51} 52 53; bfloat16x8_t test_vdupq_lane_bf16(bfloat16x4_t v) { return vdupq_lane_bf16(v, 1); } 54define <8 x bfloat> @test_vdupq_lane_bf16(<4 x bfloat> %v) nounwind { 55; CHECK-LABEL: test_vdupq_lane_bf16: 56; CHECK: // %bb.0: // %entry 57; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 58; CHECK-NEXT: dup v0.8h, v0.h[1] 59; CHECK-NEXT: ret 60entry: 61 %lane = shufflevector <4 x bfloat> %v, <4 x bfloat> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 62 ret <8 x bfloat> %lane 63} 64 65; bfloat16x4_t test_vdup_laneq_bf16(bfloat16x8_t v) { return vdup_laneq_bf16(v, 7); } 66define <4 x bfloat> @test_vdup_laneq_bf16(<8 x bfloat> %v) nounwind { 67; CHECK-LABEL: test_vdup_laneq_bf16: 68; CHECK: // %bb.0: // %entry 69; CHECK-NEXT: dup v0.4h, v0.h[7] 70; CHECK-NEXT: ret 71entry: 72 %lane = shufflevector <8 x bfloat> %v, <8 x bfloat> undef, <4 x i32> <i32 7, i32 7, i32 7, i32 7> 73 ret <4 x bfloat> %lane 74} 75 76; bfloat16x8_t test_vdupq_laneq_bf16(bfloat16x8_t v) { return vdupq_laneq_bf16(v, 7); } 77define <8 x bfloat> @test_vdupq_laneq_bf16(<8 x bfloat> %v) nounwind { 78; CHECK-LABEL: test_vdupq_laneq_bf16: 79; CHECK: // %bb.0: // %entry 80; CHECK-NEXT: dup v0.8h, v0.h[7] 81; CHECK-NEXT: ret 82entry: 83 %lane = shufflevector <8 x bfloat> %v, <8 x bfloat> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 84 ret <8 x bfloat> %lane 85} 86 87; bfloat16x8_t test_vcombine_bf16(bfloat16x4_t low, bfloat16x4_t high) { return vcombine_bf16(low, high); } 88define <8 x bfloat> @test_vcombine_bf16(<4 x bfloat> %low, <4 x bfloat> %high) nounwind { 89; CHECK-LABEL: test_vcombine_bf16: 90; CHECK: // %bb.0: // %entry 91; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 92; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 93; CHECK-NEXT: mov v0.d[1], v1.d[0] 94; CHECK-NEXT: ret 95entry: 96 %shuffle.i = shufflevector <4 x bfloat> %low, <4 x bfloat> %high, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 97 ret <8 x bfloat> %shuffle.i 98} 99 100; bfloat16x4_t test_vget_high_bf16(bfloat16x8_t a) { return vget_high_bf16(a); } 101define <4 x bfloat> @test_vget_high_bf16(<8 x bfloat> %a) nounwind { 102; CHECK-LABEL: test_vget_high_bf16: 103; CHECK: // %bb.0: // %entry 104; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 105; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 106; CHECK-NEXT: ret 107entry: 108 %shuffle.i = shufflevector <8 x bfloat> %a, <8 x bfloat> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 109 ret <4 x bfloat> %shuffle.i 110} 111 112; bfloat16x4_t test_vget_low_bf16(bfloat16x8_t a) { return vget_low_bf16(a); } 113define <4 x bfloat> @test_vget_low_bf16(<8 x bfloat> %a) nounwind { 114; CHECK-LABEL: test_vget_low_bf16: 115; CHECK: // %bb.0: // %entry 116; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 117; CHECK-NEXT: ret 118entry: 119 %shuffle.i = shufflevector <8 x bfloat> %a, <8 x bfloat> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 120 ret <4 x bfloat> %shuffle.i 121} 122 123; bfloat16_t test_vget_lane_bf16(bfloat16x4_t v) { return vget_lane_bf16(v, 1); } 124define bfloat @test_vget_lane_bf16(<4 x bfloat> %v) nounwind { 125; CHECK-LABEL: test_vget_lane_bf16: 126; CHECK: // %bb.0: // %entry 127; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 128; CHECK-NEXT: mov h0, v0.h[1] 129; CHECK-NEXT: ret 130entry: 131 %vget_lane = extractelement <4 x bfloat> %v, i32 1 132 ret bfloat %vget_lane 133} 134 135; bfloat16_t test_vgetq_lane_bf16(bfloat16x8_t v) { return vgetq_lane_bf16(v, 7); } 136define bfloat @test_vgetq_lane_bf16(<8 x bfloat> %v) nounwind { 137; CHECK-LABEL: test_vgetq_lane_bf16: 138; CHECK: // %bb.0: // %entry 139; CHECK-NEXT: mov h0, v0.h[7] 140; CHECK-NEXT: ret 141entry: 142 %vgetq_lane = extractelement <8 x bfloat> %v, i32 7 143 ret bfloat %vgetq_lane 144} 145 146; bfloat16x4_t test_vset_lane_bf16(bfloat16_t a, bfloat16x4_t v) { return vset_lane_bf16(a, v, 1); } 147define <4 x bfloat> @test_vset_lane_bf16(bfloat %a, <4 x bfloat> %v) nounwind { 148; CHECK-LABEL: test_vset_lane_bf16: 149; CHECK: // %bb.0: // %entry 150; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 151; CHECK-NEXT: // kill: def $h0 killed $h0 def $q0 152; CHECK-NEXT: mov v1.h[1], v0.h[0] 153; CHECK-NEXT: fmov d0, d1 154; CHECK-NEXT: ret 155entry: 156 %vset_lane = insertelement <4 x bfloat> %v, bfloat %a, i32 1 157 ret <4 x bfloat> %vset_lane 158} 159 160; bfloat16x8_t test_vsetq_lane_bf16(bfloat16_t a, bfloat16x8_t v) { return vsetq_lane_bf16(a, v, 7); } 161define <8 x bfloat> @test_vsetq_lane_bf16(bfloat %a, <8 x bfloat> %v) nounwind { 162; CHECK-LABEL: test_vsetq_lane_bf16: 163; CHECK: // %bb.0: // %entry 164; CHECK-NEXT: // kill: def $h0 killed $h0 def $q0 165; CHECK-NEXT: mov v1.h[7], v0.h[0] 166; CHECK-NEXT: mov v0.16b, v1.16b 167; CHECK-NEXT: ret 168entry: 169 %vset_lane = insertelement <8 x bfloat> %v, bfloat %a, i32 7 170 ret <8 x bfloat> %vset_lane 171} 172 173; bfloat16_t test_vduph_lane_bf16(bfloat16x4_t v) { return vduph_lane_bf16(v, 1); } 174define bfloat @test_vduph_lane_bf16(<4 x bfloat> %v) nounwind { 175; CHECK-LABEL: test_vduph_lane_bf16: 176; CHECK: // %bb.0: // %entry 177; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 178; CHECK-NEXT: mov h0, v0.h[1] 179; CHECK-NEXT: ret 180entry: 181 %vget_lane = extractelement <4 x bfloat> %v, i32 1 182 ret bfloat %vget_lane 183} 184 185; bfloat16_t test_vduph_laneq_bf16(bfloat16x8_t v) { return vduph_laneq_bf16(v, 7); } 186define bfloat @test_vduph_laneq_bf16(<8 x bfloat> %v) nounwind { 187; CHECK-LABEL: test_vduph_laneq_bf16: 188; CHECK: // %bb.0: // %entry 189; CHECK-NEXT: mov h0, v0.h[7] 190; CHECK-NEXT: ret 191entry: 192 %vgetq_lane = extractelement <8 x bfloat> %v, i32 7 193 ret bfloat %vgetq_lane 194} 195 196; vcopy_lane_bf16(a, 1, b, 3); 197define <4 x bfloat> @test_vcopy_lane_bf16_v1(<4 x bfloat> %a, <4 x bfloat> %b) nounwind { 198; CHECK-LABEL: test_vcopy_lane_bf16_v1: 199; CHECK: // %bb.0: // %entry 200; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 201; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 202; CHECK-NEXT: mov v0.h[1], v1.h[3] 203; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 204; CHECK-NEXT: ret 205entry: 206 %vset_lane = shufflevector <4 x bfloat> %a, <4 x bfloat> %b, <4 x i32> <i32 0, i32 7, i32 2, i32 3> 207 ret <4 x bfloat> %vset_lane 208} 209 210; vcopy_lane_bf16(a, 2, b, 0); 211define <4 x bfloat> @test_vcopy_lane_bf16_v2(<4 x bfloat> %a, <4 x bfloat> %b) nounwind { 212; CHECK-LABEL: test_vcopy_lane_bf16_v2: 213; CHECK: // %bb.0: // %entry 214; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 215; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 216; CHECK-NEXT: mov v0.h[2], v1.h[0] 217; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 218; CHECK-NEXT: ret 219entry: 220 %vset_lane = shufflevector <4 x bfloat> %a, <4 x bfloat> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 3> 221 ret <4 x bfloat> %vset_lane 222} 223 224; vcopyq_lane_bf16(a, 0, b, 2); 225define <8 x bfloat> @test_vcopyq_lane_bf16_v1(<8 x bfloat> %a, <4 x bfloat> %b) nounwind { 226; CHECK-LABEL: test_vcopyq_lane_bf16_v1: 227; CHECK: // %bb.0: // %entry 228; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 229; CHECK-NEXT: mov v0.h[0], v1.h[2] 230; CHECK-NEXT: ret 231entry: 232 %0 = shufflevector <4 x bfloat> %b, <4 x bfloat> undef, <8 x i32> <i32 undef, i32 undef, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 233 %vset_lane = shufflevector <8 x bfloat> %a, <8 x bfloat> %0, <8 x i32> <i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 234 ret <8 x bfloat> %vset_lane 235} 236 237; vcopyq_lane_bf16(a, 6, b, 0); 238define <8 x bfloat> @test_vcopyq_lane_bf16_v2(<8 x bfloat> %a, <4 x bfloat> %b) nounwind { 239; CHECK-LABEL: test_vcopyq_lane_bf16_v2: 240; CHECK: // %bb.0: // %entry 241; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 242; CHECK-NEXT: mov v0.h[6], v1.h[0] 243; CHECK-NEXT: ret 244entry: 245 %0 = shufflevector <4 x bfloat> %b, <4 x bfloat> undef, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 246 %vset_lane = shufflevector <8 x bfloat> %a, <8 x bfloat> %0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 7> 247 ret <8 x bfloat> %vset_lane 248} 249 250; vcopy_laneq_bf16(a, 0, b, 7); 251define <4 x bfloat> @test_vcopy_laneq_bf16_v1(<4 x bfloat> %a, <8 x bfloat> %b) nounwind { 252; CHECK-LABEL: test_vcopy_laneq_bf16_v1: 253; CHECK: // %bb.0: // %entry 254; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 255; CHECK-NEXT: mov v0.h[0], v1.h[7] 256; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 257; CHECK-NEXT: ret 258entry: 259 %vgetq_lane = extractelement <8 x bfloat> %b, i32 7 260 %vset_lane = insertelement <4 x bfloat> %a, bfloat %vgetq_lane, i32 0 261 ret <4 x bfloat> %vset_lane 262} 263 264; vcopy_laneq_bf16(a, 3, b, 4); 265define <4 x bfloat> @test_vcopy_laneq_bf16_v2(<4 x bfloat> %a, <8 x bfloat> %b) nounwind { 266; CHECK-LABEL: test_vcopy_laneq_bf16_v2: 267; CHECK: // %bb.0: // %entry 268; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 269; CHECK-NEXT: mov v0.h[3], v1.h[4] 270; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 271; CHECK-NEXT: ret 272entry: 273 %vgetq_lane = extractelement <8 x bfloat> %b, i32 4 274 %vset_lane = insertelement <4 x bfloat> %a, bfloat %vgetq_lane, i32 3 275 ret <4 x bfloat> %vset_lane 276} 277 278; vcopyq_laneq_bf16(a, 3, b, 7); 279define <8 x bfloat> @test_vcopyq_laneq_bf16_v1(<8 x bfloat> %a, <8 x bfloat> %b) nounwind { 280; CHECK-LABEL: test_vcopyq_laneq_bf16_v1: 281; CHECK: // %bb.0: // %entry 282; CHECK-NEXT: mov v0.h[3], v1.h[7] 283; CHECK-NEXT: ret 284entry: 285 %vset_lane = shufflevector <8 x bfloat> %a, <8 x bfloat> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6, i32 7> 286 ret <8 x bfloat> %vset_lane 287} 288 289; vcopyq_laneq_bf16(a, 6, b, 2); 290define <8 x bfloat> @test_vcopyq_laneq_bf16_v2(<8 x bfloat> %a, <8 x bfloat> %b) nounwind { 291; CHECK-LABEL: test_vcopyq_laneq_bf16_v2: 292; CHECK: // %bb.0: // %entry 293; CHECK-NEXT: mov v0.h[6], v1.h[2] 294; CHECK-NEXT: ret 295entry: 296 %vset_lane = shufflevector <8 x bfloat> %a, <8 x bfloat> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 10, i32 7> 297 ret <8 x bfloat> %vset_lane 298} 299