1; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s 2; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t 3 4; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it. 5; WARN-NOT: warning 6 7; 8; ST1B 9; 10 11define void @st1b_i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pred, i8* %a, i64 %index) { 12; CHECK-LABEL: st1b_i8: 13; CHECK: st1b { z0.b }, p0, [x0, x1] 14; CHECK-NEXT: ret 15 %base = getelementptr i8, i8* %a, i64 %index 16 call void @llvm.aarch64.sve.st1.nxv16i8(<vscale x 16 x i8> %data, 17 <vscale x 16 x i1> %pred, 18 i8* %base) 19 ret void 20} 21 22 23 24define void @st1b_h(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pred, i8* %a, i64 %index) { 25; CHECK-LABEL: st1b_h: 26; CHECK: st1b { z0.h }, p0, [x0, x1] 27; CHECK-NEXT: ret 28 %base = getelementptr i8, i8* %a, i64 %index 29 %trunc = trunc <vscale x 8 x i16> %data to <vscale x 8 x i8> 30 call void @llvm.aarch64.sve.st1.nxv8i8(<vscale x 8 x i8> %trunc, 31 <vscale x 8 x i1> %pred, 32 i8* %base) 33 ret void 34} 35 36define void @st1b_s(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, i8* %a, i64 %index) { 37; CHECK-LABEL: st1b_s: 38; CHECK: st1b { z0.s }, p0, [x0, x1] 39; CHECK-NEXT: ret 40 %base = getelementptr i8, i8* %a, i64 %index 41 %trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8> 42 call void @llvm.aarch64.sve.st1.nxv4i8(<vscale x 4 x i8> %trunc, 43 <vscale x 4 x i1> %pred, 44 i8* %base) 45 ret void 46} 47 48define void @st1b_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, i8* %a, i64 %index) { 49; CHECK-LABEL: st1b_d: 50; CHECK: st1b { z0.d }, p0, [x0, x1] 51; CHECK-NEXT: ret 52 %base = getelementptr i8, i8* %a, i64 %index 53 %trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8> 54 call void @llvm.aarch64.sve.st1.nxv2i8(<vscale x 2 x i8> %trunc, 55 <vscale x 2 x i1> %pred, 56 i8* %base) 57 ret void 58} 59 60; 61; ST1H 62; 63 64define void @st1h_i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pred, i16* %a, i64 %index) { 65; CHECK-LABEL: st1h_i16: 66; CHECK: st1h { z0.h }, p0, [x0, x1, lsl #1] 67; CHECK-NEXT: ret 68 %base = getelementptr i16, i16* %a, i64 %index 69 call void @llvm.aarch64.sve.st1.nxv8i16(<vscale x 8 x i16> %data, 70 <vscale x 8 x i1> %pred, 71 i16* %base) 72 ret void 73} 74 75define void @st1h_f16(<vscale x 8 x half> %data, <vscale x 8 x i1> %pred, half* %a, i64 %index) { 76; CHECK-LABEL: st1h_f16: 77; CHECK: st1h { z0.h }, p0, [x0, x1, lsl #1] 78; CHECK-NEXT: ret 79 %base = getelementptr half, half* %a, i64 %index 80 call void @llvm.aarch64.sve.st1.nxv8f16(<vscale x 8 x half> %data, 81 <vscale x 8 x i1> %pred, 82 half* %base) 83 ret void 84} 85 86define void @st1h_bf16(<vscale x 8 x bfloat> %data, <vscale x 8 x i1> %pred, bfloat* %a, i64 %index) #0 { 87; CHECK-LABEL: st1h_bf16: 88; CHECK: st1h { z0.h }, p0, [x0, x1, lsl #1] 89; CHECK-NEXT: ret 90 %base = getelementptr bfloat, bfloat* %a, i64 %index 91 call void @llvm.aarch64.sve.st1.nxv8bf16(<vscale x 8 x bfloat> %data, 92 <vscale x 8 x i1> %pred, 93 bfloat* %base) 94 ret void 95} 96 97define void @st1h_s(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, i16* %addr) { 98; CHECK-LABEL: st1h_s: 99; CHECK: st1h { z0.s }, p0, [x0] 100; CHECK-NEXT: ret 101 %trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16> 102 call void @llvm.aarch64.sve.st1.nxv4i16(<vscale x 4 x i16> %trunc, 103 <vscale x 4 x i1> %pred, 104 i16* %addr) 105 ret void 106} 107 108define void @st1h_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, i16* %a, i64 %index) { 109; CHECK-LABEL: st1h_d: 110; CHECK: st1h { z0.d }, p0, [x0, x1, lsl #1] 111; CHECK-NEXT: ret 112 %base = getelementptr i16, i16* %a, i64 %index 113 %trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16> 114 call void @llvm.aarch64.sve.st1.nxv2i16(<vscale x 2 x i16> %trunc, 115 <vscale x 2 x i1> %pred, 116 i16* %base) 117 ret void 118} 119 120; 121; ST1W 122; 123 124define void @st1w_i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, i32* %a, i64 %index) { 125; CHECK-LABEL: st1w_i32: 126; CHECK: st1w { z0.s }, p0, [x0, x1, lsl #2] 127; CHECK-NEXT: ret 128 %base = getelementptr i32, i32* %a, i64 %index 129 call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> %data, 130 <vscale x 4 x i1> %pred, 131 i32* %base) 132 ret void 133} 134 135define void @st1w_f32(<vscale x 4 x float> %data, <vscale x 4 x i1> %pred, float* %a, i64 %index) { 136; CHECK-LABEL: st1w_f32: 137; CHECK: st1w { z0.s }, p0, [x0, x1, lsl #2] 138; CHECK-NEXT: ret 139 %base = getelementptr float, float* %a, i64 %index 140 call void @llvm.aarch64.sve.st1.nxv4f32(<vscale x 4 x float> %data, 141 <vscale x 4 x i1> %pred, 142 float* %base) 143 ret void 144} 145 146define void @st1w_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, i32* %a, i64 %index) { 147; CHECK-LABEL: st1w_d: 148; CHECK: st1w { z0.d }, p0, [x0, x1, lsl #2] 149; CHECK-NEXT: ret 150 %base = getelementptr i32, i32* %a, i64 %index 151 %trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32> 152 call void @llvm.aarch64.sve.st1.nxv2i32(<vscale x 2 x i32> %trunc, 153 <vscale x 2 x i1> %pred, 154 i32* %base) 155 ret void 156} 157 158; 159; ST1D 160; 161 162define void @st1d_i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, i64* %a, i64 %index) { 163; CHECK-LABEL: st1d_i64: 164; CHECK: st1d { z0.d }, p0, [x0, x1, lsl #3] 165; CHECK-NEXT: ret 166 %base = getelementptr i64, i64* %a, i64 %index 167 call void @llvm.aarch64.sve.st1.nxv2i64(<vscale x 2 x i64> %data, 168 <vscale x 2 x i1> %pred, 169 i64* %base) 170 ret void 171} 172 173define void @st1d_f64(<vscale x 2 x double> %data, <vscale x 2 x i1> %pred, double* %a, i64 %index) { 174; CHECK-LABEL: st1d_f64: 175; CHECK: st1d { z0.d }, p0, [x0, x1, lsl #3] 176; CHECK-NEXT: ret 177 %base = getelementptr double, double* %a, i64 %index 178 call void @llvm.aarch64.sve.st1.nxv2f64(<vscale x 2 x double> %data, 179 <vscale x 2 x i1> %pred, 180 double* %base) 181 ret void 182} 183 184declare void @llvm.aarch64.sve.st1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i8*) 185 186declare void @llvm.aarch64.sve.st1.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i1>, i8*) 187declare void @llvm.aarch64.sve.st1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i16*) 188declare void @llvm.aarch64.sve.st1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, half*) 189declare void @llvm.aarch64.sve.st1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, bfloat*) 190 191declare void @llvm.aarch64.sve.st1.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i1>, i8*) 192declare void @llvm.aarch64.sve.st1.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, i16*) 193declare void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32*) 194declare void @llvm.aarch64.sve.st1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, float*) 195 196declare void @llvm.aarch64.sve.st1.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, i8*) 197declare void @llvm.aarch64.sve.st1.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, i16*) 198declare void @llvm.aarch64.sve.st1.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, i32*) 199declare void @llvm.aarch64.sve.st1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64*) 200declare void @llvm.aarch64.sve.st1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double*) 201 202; +bf16 is required for the bfloat version. 203attributes #0 = { "target-features"="+sve,+bf16" } 204