1; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -asm-verbose=0 < %s | FileCheck %s 2 3; 4; Masked Loads 5; 6 7define <vscale x 2 x i64> @masked_load_nxv2i64(<vscale x 2 x i64> *%a, <vscale x 2 x i1> %mask) nounwind { 8; CHECK-LABEL: masked_load_nxv2i64: 9; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 10; CHECK-NEXT: ret 11 %load = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64(<vscale x 2 x i64> *%a, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i64> undef) 12 ret <vscale x 2 x i64> %load 13} 14 15define <vscale x 4 x i32> @masked_load_nxv4i32(<vscale x 4 x i32> *%a, <vscale x 4 x i1> %mask) nounwind { 16; CHECK-LABEL: masked_load_nxv4i32: 17; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 18; CHECK-NEXT: ret 19 %load = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32(<vscale x 4 x i32> *%a, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef) 20 ret <vscale x 4 x i32> %load 21} 22 23define <vscale x 8 x i16> @masked_load_nxv8i16(<vscale x 8 x i16> *%a, <vscale x 8 x i1> %mask) nounwind { 24; CHECK-LABEL: masked_load_nxv8i16: 25; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 26; CHECK-NEXT: ret 27 %load = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16(<vscale x 8 x i16> *%a, i32 2, <vscale x 8 x i1> %mask, <vscale x 8 x i16> undef) 28 ret <vscale x 8 x i16> %load 29} 30 31define <vscale x 16 x i8> @masked_load_nxv16i8(<vscale x 16 x i8> *%a, <vscale x 16 x i1> %mask) nounwind { 32; CHECK-LABEL: masked_load_nxv16i8: 33; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] 34; CHECK-NEXT: ret 35 %load = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8(<vscale x 16 x i8> *%a, i32 1, <vscale x 16 x i1> %mask, <vscale x 16 x i8> undef) 36 ret <vscale x 16 x i8> %load 37} 38 39define <vscale x 2 x double> @masked_load_nxv2f64(<vscale x 2 x double> *%a, <vscale x 2 x i1> %mask) nounwind { 40; CHECK-LABEL: masked_load_nxv2f64: 41; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 42; CHECK-NEXT: ret 43 %load = call <vscale x 2 x double> @llvm.masked.load.nxv2f64(<vscale x 2 x double> *%a, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x double> undef) 44 ret <vscale x 2 x double> %load 45} 46 47define <vscale x 2 x float> @masked_load_nxv2f32(<vscale x 2 x float> *%a, <vscale x 2 x i1> %mask) nounwind { 48; CHECK-LABEL: masked_load_nxv2f32: 49; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0] 50; CHECK-NEXT: ret 51 %load = call <vscale x 2 x float> @llvm.masked.load.nxv2f32(<vscale x 2 x float> *%a, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x float> undef) 52 ret <vscale x 2 x float> %load 53} 54 55define <vscale x 2 x half> @masked_load_nxv2f16(<vscale x 2 x half> *%a, <vscale x 2 x i1> %mask) nounwind { 56; CHECK-LABEL: masked_load_nxv2f16: 57; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0] 58; CHECK-NEXT: ret 59 %load = call <vscale x 2 x half> @llvm.masked.load.nxv2f16(<vscale x 2 x half> *%a, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x half> undef) 60 ret <vscale x 2 x half> %load 61} 62 63define <vscale x 4 x float> @masked_load_nxv4f32(<vscale x 4 x float> *%a, <vscale x 4 x i1> %mask) nounwind { 64; CHECK-LABEL: masked_load_nxv4f32: 65; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 66; CHECK-NEXT: ret 67 %load = call <vscale x 4 x float> @llvm.masked.load.nxv4f32(<vscale x 4 x float> *%a, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> undef) 68 ret <vscale x 4 x float> %load 69} 70 71define <vscale x 4 x half> @masked_load_nxv4f16(<vscale x 4 x half> *%a, <vscale x 4 x i1> %mask) nounwind { 72; CHECK-LABEL: masked_load_nxv4f16: 73; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] 74; CHECK-NEXT: ret 75 %load = call <vscale x 4 x half> @llvm.masked.load.nxv4f16(<vscale x 4 x half> *%a, i32 2, <vscale x 4 x i1> %mask, <vscale x 4 x half> undef) 76 ret <vscale x 4 x half> %load 77} 78 79define <vscale x 8 x half> @masked_load_nxv8f16(<vscale x 8 x half> *%a, <vscale x 8 x i1> %mask) nounwind { 80; CHECK-LABEL: masked_load_nxv8f16: 81; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 82; CHECK-NEXT: ret 83 %load = call <vscale x 8 x half> @llvm.masked.load.nxv8f16(<vscale x 8 x half> *%a, i32 2, <vscale x 8 x i1> %mask, <vscale x 8 x half> undef) 84 ret <vscale x 8 x half> %load 85} 86 87; 88; Masked Stores 89; 90 91define void @masked_store_nxv2i64(<vscale x 2 x i64> *%a, <vscale x 2 x i64> %val, <vscale x 2 x i1> %mask) nounwind { 92; CHECK-LABEL: masked_store_nxv2i64: 93; CHECK-NEXT: st1d { z0.d }, p0, [x0] 94; CHECK-NEXT: ret 95 call void @llvm.masked.store.nxv2i64(<vscale x 2 x i64> %val, <vscale x 2 x i64> *%a, i32 8, <vscale x 2 x i1> %mask) 96 ret void 97} 98 99define void @masked_store_nxv4i32(<vscale x 4 x i32> *%a, <vscale x 4 x i32> %val, <vscale x 4 x i1> %mask) nounwind { 100; CHECK-LABEL: masked_store_nxv4i32: 101; CHECK-NEXT: st1w { z0.s }, p0, [x0] 102; CHECK-NEXT: ret 103 call void @llvm.masked.store.nxv4i32(<vscale x 4 x i32> %val, <vscale x 4 x i32> *%a, i32 4, <vscale x 4 x i1> %mask) 104 ret void 105} 106 107define void @masked_store_nxv8i16(<vscale x 8 x i16> *%a, <vscale x 8 x i16> %val, <vscale x 8 x i1> %mask) nounwind { 108; CHECK-LABEL: masked_store_nxv8i16: 109; CHECK-NEXT: st1h { z0.h }, p0, [x0] 110; CHECK-NEXT: ret 111 call void @llvm.masked.store.nxv8i16(<vscale x 8 x i16> %val, <vscale x 8 x i16> *%a, i32 2, <vscale x 8 x i1> %mask) 112 ret void 113} 114 115define void @masked_store_nxv16i8(<vscale x 16 x i8> *%a, <vscale x 16 x i8> %val, <vscale x 16 x i1> %mask) nounwind { 116; CHECK-LABEL: masked_store_nxv16i8: 117; CHECK-NEXT: st1b { z0.b }, p0, [x0] 118; CHECK-NEXT: ret 119 call void @llvm.masked.store.nxv16i8(<vscale x 16 x i8> %val, <vscale x 16 x i8> *%a, i32 1, <vscale x 16 x i1> %mask) 120 ret void 121} 122 123define void @masked_store_nxv2f64(<vscale x 2 x double> *%a, <vscale x 2 x double> %val, <vscale x 2 x i1> %mask) nounwind { 124; CHECK-LABEL: masked_store_nxv2f64: 125; CHECK-NEXT: st1d { z0.d }, p0, [x0] 126; CHECK-NEXT: ret 127 call void @llvm.masked.store.nxv2f64(<vscale x 2 x double> %val, <vscale x 2 x double> *%a, i32 8, <vscale x 2 x i1> %mask) 128 ret void 129} 130 131define void @masked_store_nxv2f32(<vscale x 2 x float> *%a, <vscale x 2 x float> %val, <vscale x 2 x i1> %mask) nounwind { 132; CHECK-LABEL: masked_store_nxv2f32: 133; CHECK-NEXT: st1w { z0.d }, p0, [x0] 134; CHECK-NEXT: ret 135 call void @llvm.masked.store.nxv2f32(<vscale x 2 x float> %val, <vscale x 2 x float> *%a, i32 4, <vscale x 2 x i1> %mask) 136 ret void 137} 138 139define void @masked_store_nxv2f16(<vscale x 2 x half> *%a, <vscale x 2 x half> %val, <vscale x 2 x i1> %mask) nounwind { 140; CHECK-LABEL: masked_store_nxv2f16: 141; CHECK-NEXT: st1h { z0.d }, p0, [x0] 142; CHECK-NEXT: ret 143 call void @llvm.masked.store.nxv2f16(<vscale x 2 x half> %val, <vscale x 2 x half> *%a, i32 4, <vscale x 2 x i1> %mask) 144 ret void 145} 146 147define void @masked_store_nxv4f32(<vscale x 4 x float> *%a, <vscale x 4 x float> %val, <vscale x 4 x i1> %mask) nounwind { 148; CHECK-LABEL: masked_store_nxv4f32: 149; CHECK-NEXT: st1w { z0.s }, p0, [x0] 150; CHECK-NEXT: ret 151 call void @llvm.masked.store.nxv4f32(<vscale x 4 x float> %val, <vscale x 4 x float> *%a, i32 4, <vscale x 4 x i1> %mask) 152 ret void 153} 154 155define void @masked_store_nxv4f16(<vscale x 4 x half> *%a, <vscale x 4 x half> %val, <vscale x 4 x i1> %mask) nounwind { 156; CHECK-LABEL: masked_store_nxv4f16: 157; CHECK-NEXT: st1h { z0.s }, p0, [x0] 158; CHECK-NEXT: ret 159 call void @llvm.masked.store.nxv4f16(<vscale x 4 x half> %val, <vscale x 4 x half> *%a, i32 2, <vscale x 4 x i1> %mask) 160 ret void 161} 162 163define void @masked_store_nxv8f16(<vscale x 8 x half> *%a, <vscale x 8 x half> %val, <vscale x 8 x i1> %mask) nounwind { 164; CHECK-LABEL: masked_store_nxv8f16: 165; CHECK-NEXT: st1h { z0.h }, p0, [x0] 166; CHECK-NEXT: ret 167 call void @llvm.masked.store.nxv8f16(<vscale x 8 x half> %val, <vscale x 8 x half> *%a, i32 2, <vscale x 8 x i1> %mask) 168 ret void 169} 170 171declare <vscale x 2 x i64> @llvm.masked.load.nxv2i64(<vscale x 2 x i64>*, i32, <vscale x 2 x i1>, <vscale x 2 x i64>) 172declare <vscale x 4 x i32> @llvm.masked.load.nxv4i32(<vscale x 4 x i32>*, i32, <vscale x 4 x i1>, <vscale x 4 x i32>) 173declare <vscale x 8 x i16> @llvm.masked.load.nxv8i16(<vscale x 8 x i16>*, i32, <vscale x 8 x i1>, <vscale x 8 x i16>) 174declare <vscale x 16 x i8> @llvm.masked.load.nxv16i8(<vscale x 16 x i8>*, i32, <vscale x 16 x i1>, <vscale x 16 x i8>) 175 176declare <vscale x 2 x double> @llvm.masked.load.nxv2f64(<vscale x 2 x double>*, i32, <vscale x 2 x i1>, <vscale x 2 x double>) 177declare <vscale x 2 x float> @llvm.masked.load.nxv2f32(<vscale x 2 x float>*, i32, <vscale x 2 x i1>, <vscale x 2 x float>) 178declare <vscale x 2 x half> @llvm.masked.load.nxv2f16(<vscale x 2 x half>*, i32, <vscale x 2 x i1>, <vscale x 2 x half>) 179declare <vscale x 4 x float> @llvm.masked.load.nxv4f32(<vscale x 4 x float>*, i32, <vscale x 4 x i1>, <vscale x 4 x float>) 180declare <vscale x 4 x half> @llvm.masked.load.nxv4f16(<vscale x 4 x half>*, i32, <vscale x 4 x i1>, <vscale x 4 x half>) 181declare <vscale x 8 x half> @llvm.masked.load.nxv8f16(<vscale x 8 x half>*, i32, <vscale x 8 x i1>, <vscale x 8 x half>) 182 183declare void @llvm.masked.store.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>*, i32, <vscale x 2 x i1>) 184declare void @llvm.masked.store.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>*, i32, <vscale x 4 x i1>) 185declare void @llvm.masked.store.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>*, i32, <vscale x 8 x i1>) 186declare void @llvm.masked.store.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>*, i32, <vscale x 16 x i1>) 187 188declare void @llvm.masked.store.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>*, i32, <vscale x 2 x i1>) 189declare void @llvm.masked.store.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>*, i32, <vscale x 2 x i1>) 190declare void @llvm.masked.store.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>*, i32, <vscale x 2 x i1>) 191declare void @llvm.masked.store.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>*, i32, <vscale x 4 x i1>) 192declare void @llvm.masked.store.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>*, i32, <vscale x 4 x i1>) 193declare void @llvm.masked.store.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>*, i32, <vscale x 8 x i1>) 194