1; RUN: llc -aarch64-sve-vector-bits-min=128 -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE 2; RUN: llc -aarch64-sve-vector-bits-min=256 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256 3; RUN: llc -aarch64-sve-vector-bits-min=384 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK 4; RUN: llc -aarch64-sve-vector-bits-min=512 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 5; RUN: llc -aarch64-sve-vector-bits-min=640 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 6; RUN: llc -aarch64-sve-vector-bits-min=768 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 7; RUN: llc -aarch64-sve-vector-bits-min=896 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 8; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 9; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 10; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 11; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 12; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 13; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 14; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 15; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024 16; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048 17 18target triple = "aarch64-unknown-linux-gnu" 19 20; Don't use SVE when its registers are no bigger than NEON. 21; NO_SVE-NOT: ptrue 22 23; 24; CEIL -> FRINTP 25; 26 27; Don't use SVE for 64-bit vectors. 28define <4 x half> @frintp_v4f16(<4 x half> %op) #0 { 29; CHECK-LABEL: frintp_v4f16: 30; CHECK: frintp v0.4h, v0.4h 31; CHECK-NEXT: ret 32 %res = call <4 x half> @llvm.ceil.v4f16(<4 x half> %op) 33 ret <4 x half> %res 34} 35 36; Don't use SVE for 128-bit vectors. 37define <8 x half> @frintp_v8f16(<8 x half> %op) #0 { 38; CHECK-LABEL: frintp_v8f16: 39; CHECK: frintp v0.8h, v0.8h 40; CHECK-NEXT: ret 41 %res = call <8 x half> @llvm.ceil.v8f16(<8 x half> %op) 42 ret <8 x half> %res 43} 44 45define void @frintp_v16f16(<16 x half>* %a) #0 { 46; CHECK-LABEL: frintp_v16f16: 47; CHECK: ptrue [[PG:p[0-9]+]].h, vl16 48; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] 49; CHECK-NEXT: frintp [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h 50; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 51; CHECK-NEXT: ret 52 %op = load <16 x half>, <16 x half>* %a 53 %res = call <16 x half> @llvm.ceil.v16f16(<16 x half> %op) 54 store <16 x half> %res, <16 x half>* %a 55 ret void 56} 57 58define void @frintp_v32f16(<32 x half>* %a) #0 { 59; CHECK-LABEL: frintp_v32f16: 60; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32 61; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] 62; VBITS_GE_512-NEXT: frintp [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h 63; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 64; VBITS_GE_512-NEXT: ret 65 66; Ensure sensible type legalisation. 67; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 68; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 69; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0] 70; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] 71; VBITS_EQ_256-DAG: frintp [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h 72; VBITS_EQ_256-DAG: frintp [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h 73; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] 74; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] 75; VBITS_EQ_256-NEXT: ret 76 %op = load <32 x half>, <32 x half>* %a 77 %res = call <32 x half> @llvm.ceil.v32f16(<32 x half> %op) 78 store <32 x half> %res, <32 x half>* %a 79 ret void 80} 81 82define void @frintp_v64f16(<64 x half>* %a) #0 { 83; CHECK-LABEL: frintp_v64f16: 84; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64 85; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] 86; VBITS_GE_1024-NEXT: frintp [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h 87; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 88; VBITS_GE_1024-NEXT: ret 89 %op = load <64 x half>, <64 x half>* %a 90 %res = call <64 x half> @llvm.ceil.v64f16(<64 x half> %op) 91 store <64 x half> %res, <64 x half>* %a 92 ret void 93} 94 95define void @frintp_v128f16(<128 x half>* %a) #0 { 96; CHECK-LABEL: frintp_v128f16: 97; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128 98; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] 99; VBITS_GE_2048-NEXT: frintp [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h 100; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 101; VBITS_GE_2048-NEXT: ret 102 %op = load <128 x half>, <128 x half>* %a 103 %res = call <128 x half> @llvm.ceil.v128f16(<128 x half> %op) 104 store <128 x half> %res, <128 x half>* %a 105 ret void 106} 107 108; Don't use SVE for 64-bit vectors. 109define <2 x float> @frintp_v2f32(<2 x float> %op) #0 { 110; CHECK-LABEL: frintp_v2f32: 111; CHECK: frintp v0.2s, v0.2s 112; CHECK-NEXT: ret 113 %res = call <2 x float> @llvm.ceil.v2f32(<2 x float> %op) 114 ret <2 x float> %res 115} 116 117; Don't use SVE for 128-bit vectors. 118define <4 x float> @frintp_v4f32(<4 x float> %op) #0 { 119; CHECK-LABEL: frintp_v4f32: 120; CHECK: frintp v0.4s, v0.4s 121; CHECK-NEXT: ret 122 %res = call <4 x float> @llvm.ceil.v4f32(<4 x float> %op) 123 ret <4 x float> %res 124} 125 126define void @frintp_v8f32(<8 x float>* %a) #0 { 127; CHECK-LABEL: frintp_v8f32: 128; CHECK: ptrue [[PG:p[0-9]+]].s, vl8 129; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] 130; CHECK-NEXT: frintp [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s 131; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 132; CHECK-NEXT: ret 133 %op = load <8 x float>, <8 x float>* %a 134 %res = call <8 x float> @llvm.ceil.v8f32(<8 x float> %op) 135 store <8 x float> %res, <8 x float>* %a 136 ret void 137} 138 139define void @frintp_v16f32(<16 x float>* %a) #0 { 140; CHECK-LABEL: frintp_v16f32: 141; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16 142; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] 143; VBITS_GE_512-NEXT: frintp [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s 144; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 145; VBITS_GE_512-NEXT: ret 146 147; Ensure sensible type legalisation. 148; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 149; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 150; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0] 151; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] 152; VBITS_EQ_256-DAG: frintp [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s 153; VBITS_EQ_256-DAG: frintp [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s 154; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] 155; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] 156; VBITS_EQ_256-NEXT: ret 157 %op = load <16 x float>, <16 x float>* %a 158 %res = call <16 x float> @llvm.ceil.v16f32(<16 x float> %op) 159 store <16 x float> %res, <16 x float>* %a 160 ret void 161} 162 163define void @frintp_v32f32(<32 x float>* %a) #0 { 164; CHECK-LABEL: frintp_v32f32: 165; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32 166; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] 167; VBITS_GE_1024-NEXT: frintp [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s 168; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 169; VBITS_GE_1024-NEXT: ret 170 %op = load <32 x float>, <32 x float>* %a 171 %res = call <32 x float> @llvm.ceil.v32f32(<32 x float> %op) 172 store <32 x float> %res, <32 x float>* %a 173 ret void 174} 175 176define void @frintp_v64f32(<64 x float>* %a) #0 { 177; CHECK-LABEL: frintp_v64f32: 178; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64 179; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] 180; VBITS_GE_2048-NEXT: frintp [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s 181; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 182; VBITS_GE_2048-NEXT: ret 183 %op = load <64 x float>, <64 x float>* %a 184 %res = call <64 x float> @llvm.ceil.v64f32(<64 x float> %op) 185 store <64 x float> %res, <64 x float>* %a 186 ret void 187} 188 189; Don't use SVE for 64-bit vectors. 190define <1 x double> @frintp_v1f64(<1 x double> %op) #0 { 191; CHECK-LABEL: frintp_v1f64: 192; CHECK: frintp d0, d0 193; CHECK-NEXT: ret 194 %res = call <1 x double> @llvm.ceil.v1f64(<1 x double> %op) 195 ret <1 x double> %res 196} 197 198; Don't use SVE for 128-bit vectors. 199define <2 x double> @frintp_v2f64(<2 x double> %op) #0 { 200; CHECK-LABEL: frintp_v2f64: 201; CHECK: frintp v0.2d, v0.2d 202; CHECK-NEXT: ret 203 %res = call <2 x double> @llvm.ceil.v2f64(<2 x double> %op) 204 ret <2 x double> %res 205} 206 207define void @frintp_v4f64(<4 x double>* %a) #0 { 208; CHECK-LABEL: frintp_v4f64: 209; CHECK: ptrue [[PG:p[0-9]+]].d, vl4 210; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] 211; CHECK-NEXT: frintp [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d 212; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 213; CHECK-NEXT: ret 214 %op = load <4 x double>, <4 x double>* %a 215 %res = call <4 x double> @llvm.ceil.v4f64(<4 x double> %op) 216 store <4 x double> %res, <4 x double>* %a 217 ret void 218} 219 220define void @frintp_v8f64(<8 x double>* %a) #0 { 221; CHECK-LABEL: frintp_v8f64: 222; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8 223; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] 224; VBITS_GE_512-NEXT: frintp [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d 225; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 226; VBITS_GE_512-NEXT: ret 227 228; Ensure sensible type legalisation. 229; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 230; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 231; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0] 232; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] 233; VBITS_EQ_256-DAG: frintp [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d 234; VBITS_EQ_256-DAG: frintp [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d 235; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] 236; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] 237; VBITS_EQ_256-NEXT: ret 238 %op = load <8 x double>, <8 x double>* %a 239 %res = call <8 x double> @llvm.ceil.v8f64(<8 x double> %op) 240 store <8 x double> %res, <8 x double>* %a 241 ret void 242} 243 244define void @frintp_v16f64(<16 x double>* %a) #0 { 245; CHECK-LABEL: frintp_v16f64: 246; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16 247; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] 248; VBITS_GE_1024-NEXT: frintp [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d 249; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 250; VBITS_GE_1024-NEXT: ret 251 %op = load <16 x double>, <16 x double>* %a 252 %res = call <16 x double> @llvm.ceil.v16f64(<16 x double> %op) 253 store <16 x double> %res, <16 x double>* %a 254 ret void 255} 256 257define void @frintp_v32f64(<32 x double>* %a) #0 { 258; CHECK-LABEL: frintp_v32f64: 259; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32 260; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] 261; VBITS_GE_2048-NEXT: frintp [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d 262; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 263; VBITS_GE_2048-NEXT: ret 264 %op = load <32 x double>, <32 x double>* %a 265 %res = call <32 x double> @llvm.ceil.v32f64(<32 x double> %op) 266 store <32 x double> %res, <32 x double>* %a 267 ret void 268} 269 270; 271; FLOOR -> FRINTM 272; 273 274; Don't use SVE for 64-bit vectors. 275define <4 x half> @frintm_v4f16(<4 x half> %op) #0 { 276; CHECK-LABEL: frintm_v4f16: 277; CHECK: frintm v0.4h, v0.4h 278; CHECK-NEXT: ret 279 %res = call <4 x half> @llvm.floor.v4f16(<4 x half> %op) 280 ret <4 x half> %res 281} 282 283; Don't use SVE for 128-bit vectors. 284define <8 x half> @frintm_v8f16(<8 x half> %op) #0 { 285; CHECK-LABEL: frintm_v8f16: 286; CHECK: frintm v0.8h, v0.8h 287; CHECK-NEXT: ret 288 %res = call <8 x half> @llvm.floor.v8f16(<8 x half> %op) 289 ret <8 x half> %res 290} 291 292define void @frintm_v16f16(<16 x half>* %a) #0 { 293; CHECK-LABEL: frintm_v16f16: 294; CHECK: ptrue [[PG:p[0-9]+]].h, vl16 295; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] 296; CHECK-NEXT: frintm [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h 297; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 298; CHECK-NEXT: ret 299 %op = load <16 x half>, <16 x half>* %a 300 %res = call <16 x half> @llvm.floor.v16f16(<16 x half> %op) 301 store <16 x half> %res, <16 x half>* %a 302 ret void 303} 304 305define void @frintm_v32f16(<32 x half>* %a) #0 { 306; CHECK-LABEL: frintm_v32f16: 307; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32 308; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] 309; VBITS_GE_512-NEXT: frintm [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h 310; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 311; VBITS_GE_512-NEXT: ret 312 313; Ensure sensible type legalisation. 314; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 315; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 316; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0] 317; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] 318; VBITS_EQ_256-DAG: frintm [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h 319; VBITS_EQ_256-DAG: frintm [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h 320; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] 321; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] 322; VBITS_EQ_256-NEXT: ret 323 %op = load <32 x half>, <32 x half>* %a 324 %res = call <32 x half> @llvm.floor.v32f16(<32 x half> %op) 325 store <32 x half> %res, <32 x half>* %a 326 ret void 327} 328 329define void @frintm_v64f16(<64 x half>* %a) #0 { 330; CHECK-LABEL: frintm_v64f16: 331; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64 332; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] 333; VBITS_GE_1024-NEXT: frintm [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h 334; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 335; VBITS_GE_1024-NEXT: ret 336 %op = load <64 x half>, <64 x half>* %a 337 %res = call <64 x half> @llvm.floor.v64f16(<64 x half> %op) 338 store <64 x half> %res, <64 x half>* %a 339 ret void 340} 341 342define void @frintm_v128f16(<128 x half>* %a) #0 { 343; CHECK-LABEL: frintm_v128f16: 344; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128 345; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] 346; VBITS_GE_2048-NEXT: frintm [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h 347; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 348; VBITS_GE_2048-NEXT: ret 349 %op = load <128 x half>, <128 x half>* %a 350 %res = call <128 x half> @llvm.floor.v128f16(<128 x half> %op) 351 store <128 x half> %res, <128 x half>* %a 352 ret void 353} 354 355; Don't use SVE for 64-bit vectors. 356define <2 x float> @frintm_v2f32(<2 x float> %op) #0 { 357; CHECK-LABEL: frintm_v2f32: 358; CHECK: frintm v0.2s, v0.2s 359; CHECK-NEXT: ret 360 %res = call <2 x float> @llvm.floor.v2f32(<2 x float> %op) 361 ret <2 x float> %res 362} 363 364; Don't use SVE for 128-bit vectors. 365define <4 x float> @frintm_v4f32(<4 x float> %op) #0 { 366; CHECK-LABEL: frintm_v4f32: 367; CHECK: frintm v0.4s, v0.4s 368; CHECK-NEXT: ret 369 %res = call <4 x float> @llvm.floor.v4f32(<4 x float> %op) 370 ret <4 x float> %res 371} 372 373define void @frintm_v8f32(<8 x float>* %a) #0 { 374; CHECK-LABEL: frintm_v8f32: 375; CHECK: ptrue [[PG:p[0-9]+]].s, vl8 376; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] 377; CHECK-NEXT: frintm [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s 378; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 379; CHECK-NEXT: ret 380 %op = load <8 x float>, <8 x float>* %a 381 %res = call <8 x float> @llvm.floor.v8f32(<8 x float> %op) 382 store <8 x float> %res, <8 x float>* %a 383 ret void 384} 385 386define void @frintm_v16f32(<16 x float>* %a) #0 { 387; CHECK-LABEL: frintm_v16f32: 388; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16 389; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] 390; VBITS_GE_512-NEXT: frintm [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s 391; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 392; VBITS_GE_512-NEXT: ret 393 394; Ensure sensible type legalisation. 395; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 396; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 397; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0] 398; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] 399; VBITS_EQ_256-DAG: frintm [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s 400; VBITS_EQ_256-DAG: frintm [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s 401; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] 402; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] 403; VBITS_EQ_256-NEXT: ret 404 %op = load <16 x float>, <16 x float>* %a 405 %res = call <16 x float> @llvm.floor.v16f32(<16 x float> %op) 406 store <16 x float> %res, <16 x float>* %a 407 ret void 408} 409 410define void @frintm_v32f32(<32 x float>* %a) #0 { 411; CHECK-LABEL: frintm_v32f32: 412; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32 413; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] 414; VBITS_GE_1024-NEXT: frintm [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s 415; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 416; VBITS_GE_1024-NEXT: ret 417 %op = load <32 x float>, <32 x float>* %a 418 %res = call <32 x float> @llvm.floor.v32f32(<32 x float> %op) 419 store <32 x float> %res, <32 x float>* %a 420 ret void 421} 422 423define void @frintm_v64f32(<64 x float>* %a) #0 { 424; CHECK-LABEL: frintm_v64f32: 425; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64 426; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] 427; VBITS_GE_2048-NEXT: frintm [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s 428; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 429; VBITS_GE_2048-NEXT: ret 430 %op = load <64 x float>, <64 x float>* %a 431 %res = call <64 x float> @llvm.floor.v64f32(<64 x float> %op) 432 store <64 x float> %res, <64 x float>* %a 433 ret void 434} 435 436; Don't use SVE for 64-bit vectors. 437define <1 x double> @frintm_v1f64(<1 x double> %op) #0 { 438; CHECK-LABEL: frintm_v1f64: 439; CHECK: frintm d0, d0 440; CHECK-NEXT: ret 441 %res = call <1 x double> @llvm.floor.v1f64(<1 x double> %op) 442 ret <1 x double> %res 443} 444 445; Don't use SVE for 128-bit vectors. 446define <2 x double> @frintm_v2f64(<2 x double> %op) #0 { 447; CHECK-LABEL: frintm_v2f64: 448; CHECK: frintm v0.2d, v0.2d 449; CHECK-NEXT: ret 450 %res = call <2 x double> @llvm.floor.v2f64(<2 x double> %op) 451 ret <2 x double> %res 452} 453 454define void @frintm_v4f64(<4 x double>* %a) #0 { 455; CHECK-LABEL: frintm_v4f64: 456; CHECK: ptrue [[PG:p[0-9]+]].d, vl4 457; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] 458; CHECK-NEXT: frintm [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d 459; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 460; CHECK-NEXT: ret 461 %op = load <4 x double>, <4 x double>* %a 462 %res = call <4 x double> @llvm.floor.v4f64(<4 x double> %op) 463 store <4 x double> %res, <4 x double>* %a 464 ret void 465} 466 467define void @frintm_v8f64(<8 x double>* %a) #0 { 468; CHECK-LABEL: frintm_v8f64: 469; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8 470; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] 471; VBITS_GE_512-NEXT: frintm [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d 472; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 473; VBITS_GE_512-NEXT: ret 474 475; Ensure sensible type legalisation. 476; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 477; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 478; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0] 479; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] 480; VBITS_EQ_256-DAG: frintm [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d 481; VBITS_EQ_256-DAG: frintm [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d 482; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] 483; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] 484; VBITS_EQ_256-NEXT: ret 485 %op = load <8 x double>, <8 x double>* %a 486 %res = call <8 x double> @llvm.floor.v8f64(<8 x double> %op) 487 store <8 x double> %res, <8 x double>* %a 488 ret void 489} 490 491define void @frintm_v16f64(<16 x double>* %a) #0 { 492; CHECK-LABEL: frintm_v16f64: 493; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16 494; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] 495; VBITS_GE_1024-NEXT: frintm [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d 496; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 497; VBITS_GE_1024-NEXT: ret 498 %op = load <16 x double>, <16 x double>* %a 499 %res = call <16 x double> @llvm.floor.v16f64(<16 x double> %op) 500 store <16 x double> %res, <16 x double>* %a 501 ret void 502} 503 504define void @frintm_v32f64(<32 x double>* %a) #0 { 505; CHECK-LABEL: frintm_v32f64: 506; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32 507; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] 508; VBITS_GE_2048-NEXT: frintm [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d 509; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 510; VBITS_GE_2048-NEXT: ret 511 %op = load <32 x double>, <32 x double>* %a 512 %res = call <32 x double> @llvm.floor.v32f64(<32 x double> %op) 513 store <32 x double> %res, <32 x double>* %a 514 ret void 515} 516 517; 518; FNEARBYINT -> FRINTI 519; 520 521; Don't use SVE for 64-bit vectors. 522define <4 x half> @frinti_v4f16(<4 x half> %op) #0 { 523; CHECK-LABEL: frinti_v4f16: 524; CHECK: frinti v0.4h, v0.4h 525; CHECK-NEXT: ret 526 %res = call <4 x half> @llvm.nearbyint.v4f16(<4 x half> %op) 527 ret <4 x half> %res 528} 529 530; Don't use SVE for 128-bit vectors. 531define <8 x half> @frinti_v8f16(<8 x half> %op) #0 { 532; CHECK-LABEL: frinti_v8f16: 533; CHECK: frinti v0.8h, v0.8h 534; CHECK-NEXT: ret 535 %res = call <8 x half> @llvm.nearbyint.v8f16(<8 x half> %op) 536 ret <8 x half> %res 537} 538 539define void @frinti_v16f16(<16 x half>* %a) #0 { 540; CHECK-LABEL: frinti_v16f16: 541; CHECK: ptrue [[PG:p[0-9]+]].h, vl16 542; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] 543; CHECK-NEXT: frinti [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h 544; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 545; CHECK-NEXT: ret 546 %op = load <16 x half>, <16 x half>* %a 547 %res = call <16 x half> @llvm.nearbyint.v16f16(<16 x half> %op) 548 store <16 x half> %res, <16 x half>* %a 549 ret void 550} 551 552define void @frinti_v32f16(<32 x half>* %a) #0 { 553; CHECK-LABEL: frinti_v32f16: 554; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32 555; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] 556; VBITS_GE_512-NEXT: frinti [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h 557; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 558; VBITS_GE_512-NEXT: ret 559 560; Ensure sensible type legalisation. 561; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 562; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 563; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0] 564; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] 565; VBITS_EQ_256-DAG: frinti [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h 566; VBITS_EQ_256-DAG: frinti [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h 567; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] 568; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] 569; VBITS_EQ_256-NEXT: ret 570 %op = load <32 x half>, <32 x half>* %a 571 %res = call <32 x half> @llvm.nearbyint.v32f16(<32 x half> %op) 572 store <32 x half> %res, <32 x half>* %a 573 ret void 574} 575 576define void @frinti_v64f16(<64 x half>* %a) #0 { 577; CHECK-LABEL: frinti_v64f16: 578; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64 579; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] 580; VBITS_GE_1024-NEXT: frinti [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h 581; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 582; VBITS_GE_1024-NEXT: ret 583 %op = load <64 x half>, <64 x half>* %a 584 %res = call <64 x half> @llvm.nearbyint.v64f16(<64 x half> %op) 585 store <64 x half> %res, <64 x half>* %a 586 ret void 587} 588 589define void @frinti_v128f16(<128 x half>* %a) #0 { 590; CHECK-LABEL: frinti_v128f16: 591; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128 592; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] 593; VBITS_GE_2048-NEXT: frinti [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h 594; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 595; VBITS_GE_2048-NEXT: ret 596 %op = load <128 x half>, <128 x half>* %a 597 %res = call <128 x half> @llvm.nearbyint.v128f16(<128 x half> %op) 598 store <128 x half> %res, <128 x half>* %a 599 ret void 600} 601 602; Don't use SVE for 64-bit vectors. 603define <2 x float> @frinti_v2f32(<2 x float> %op) #0 { 604; CHECK-LABEL: frinti_v2f32: 605; CHECK: frinti v0.2s, v0.2s 606; CHECK-NEXT: ret 607 %res = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %op) 608 ret <2 x float> %res 609} 610 611; Don't use SVE for 128-bit vectors. 612define <4 x float> @frinti_v4f32(<4 x float> %op) #0 { 613; CHECK-LABEL: frinti_v4f32: 614; CHECK: frinti v0.4s, v0.4s 615; CHECK-NEXT: ret 616 %res = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %op) 617 ret <4 x float> %res 618} 619 620define void @frinti_v8f32(<8 x float>* %a) #0 { 621; CHECK-LABEL: frinti_v8f32: 622; CHECK: ptrue [[PG:p[0-9]+]].s, vl8 623; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] 624; CHECK-NEXT: frinti [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s 625; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 626; CHECK-NEXT: ret 627 %op = load <8 x float>, <8 x float>* %a 628 %res = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> %op) 629 store <8 x float> %res, <8 x float>* %a 630 ret void 631} 632 633define void @frinti_v16f32(<16 x float>* %a) #0 { 634; CHECK-LABEL: frinti_v16f32: 635; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16 636; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] 637; VBITS_GE_512-NEXT: frinti [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s 638; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 639; VBITS_GE_512-NEXT: ret 640 641; Ensure sensible type legalisation. 642; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 643; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 644; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0] 645; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] 646; VBITS_EQ_256-DAG: frinti [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s 647; VBITS_EQ_256-DAG: frinti [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s 648; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] 649; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] 650; VBITS_EQ_256-NEXT: ret 651 %op = load <16 x float>, <16 x float>* %a 652 %res = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> %op) 653 store <16 x float> %res, <16 x float>* %a 654 ret void 655} 656 657define void @frinti_v32f32(<32 x float>* %a) #0 { 658; CHECK-LABEL: frinti_v32f32: 659; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32 660; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] 661; VBITS_GE_1024-NEXT: frinti [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s 662; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 663; VBITS_GE_1024-NEXT: ret 664 %op = load <32 x float>, <32 x float>* %a 665 %res = call <32 x float> @llvm.nearbyint.v32f32(<32 x float> %op) 666 store <32 x float> %res, <32 x float>* %a 667 ret void 668} 669 670define void @frinti_v64f32(<64 x float>* %a) #0 { 671; CHECK-LABEL: frinti_v64f32: 672; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64 673; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] 674; VBITS_GE_2048-NEXT: frinti [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s 675; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 676; VBITS_GE_2048-NEXT: ret 677 %op = load <64 x float>, <64 x float>* %a 678 %res = call <64 x float> @llvm.nearbyint.v64f32(<64 x float> %op) 679 store <64 x float> %res, <64 x float>* %a 680 ret void 681} 682 683; Don't use SVE for 64-bit vectors. 684define <1 x double> @frinti_v1f64(<1 x double> %op) #0 { 685; CHECK-LABEL: frinti_v1f64: 686; CHECK: frinti d0, d0 687; CHECK-NEXT: ret 688 %res = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %op) 689 ret <1 x double> %res 690} 691 692; Don't use SVE for 128-bit vectors. 693define <2 x double> @frinti_v2f64(<2 x double> %op) #0 { 694; CHECK-LABEL: frinti_v2f64: 695; CHECK: frinti v0.2d, v0.2d 696; CHECK-NEXT: ret 697 %res = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %op) 698 ret <2 x double> %res 699} 700 701define void @frinti_v4f64(<4 x double>* %a) #0 { 702; CHECK-LABEL: frinti_v4f64: 703; CHECK: ptrue [[PG:p[0-9]+]].d, vl4 704; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] 705; CHECK-NEXT: frinti [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d 706; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 707; CHECK-NEXT: ret 708 %op = load <4 x double>, <4 x double>* %a 709 %res = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %op) 710 store <4 x double> %res, <4 x double>* %a 711 ret void 712} 713 714define void @frinti_v8f64(<8 x double>* %a) #0 { 715; CHECK-LABEL: frinti_v8f64: 716; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8 717; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] 718; VBITS_GE_512-NEXT: frinti [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d 719; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 720; VBITS_GE_512-NEXT: ret 721 722; Ensure sensible type legalisation. 723; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 724; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 725; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0] 726; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] 727; VBITS_EQ_256-DAG: frinti [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d 728; VBITS_EQ_256-DAG: frinti [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d 729; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] 730; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] 731; VBITS_EQ_256-NEXT: ret 732 %op = load <8 x double>, <8 x double>* %a 733 %res = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> %op) 734 store <8 x double> %res, <8 x double>* %a 735 ret void 736} 737 738define void @frinti_v16f64(<16 x double>* %a) #0 { 739; CHECK-LABEL: frinti_v16f64: 740; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16 741; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] 742; VBITS_GE_1024-NEXT: frinti [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d 743; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 744; VBITS_GE_1024-NEXT: ret 745 %op = load <16 x double>, <16 x double>* %a 746 %res = call <16 x double> @llvm.nearbyint.v16f64(<16 x double> %op) 747 store <16 x double> %res, <16 x double>* %a 748 ret void 749} 750 751define void @frinti_v32f64(<32 x double>* %a) #0 { 752; CHECK-LABEL: frinti_v32f64: 753; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32 754; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] 755; VBITS_GE_2048-NEXT: frinti [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d 756; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 757; VBITS_GE_2048-NEXT: ret 758 %op = load <32 x double>, <32 x double>* %a 759 %res = call <32 x double> @llvm.nearbyint.v32f64(<32 x double> %op) 760 store <32 x double> %res, <32 x double>* %a 761 ret void 762} 763 764; 765; RINT -> FRINTX 766; 767 768; Don't use SVE for 64-bit vectors. 769define <4 x half> @frintx_v4f16(<4 x half> %op) #0 { 770; CHECK-LABEL: frintx_v4f16: 771; CHECK: frintx v0.4h, v0.4h 772; CHECK-NEXT: ret 773 %res = call <4 x half> @llvm.rint.v4f16(<4 x half> %op) 774 ret <4 x half> %res 775} 776 777; Don't use SVE for 128-bit vectors. 778define <8 x half> @frintx_v8f16(<8 x half> %op) #0 { 779; CHECK-LABEL: frintx_v8f16: 780; CHECK: frintx v0.8h, v0.8h 781; CHECK-NEXT: ret 782 %res = call <8 x half> @llvm.rint.v8f16(<8 x half> %op) 783 ret <8 x half> %res 784} 785 786define void @frintx_v16f16(<16 x half>* %a) #0 { 787; CHECK-LABEL: frintx_v16f16: 788; CHECK: ptrue [[PG:p[0-9]+]].h, vl16 789; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] 790; CHECK-NEXT: frintx [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h 791; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 792; CHECK-NEXT: ret 793 %op = load <16 x half>, <16 x half>* %a 794 %res = call <16 x half> @llvm.rint.v16f16(<16 x half> %op) 795 store <16 x half> %res, <16 x half>* %a 796 ret void 797} 798 799define void @frintx_v32f16(<32 x half>* %a) #0 { 800; CHECK-LABEL: frintx_v32f16: 801; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32 802; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] 803; VBITS_GE_512-NEXT: frintx [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h 804; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 805; VBITS_GE_512-NEXT: ret 806 807; Ensure sensible type legalisation. 808; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 809; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 810; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0] 811; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] 812; VBITS_EQ_256-DAG: frintx [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h 813; VBITS_EQ_256-DAG: frintx [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h 814; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] 815; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] 816; VBITS_EQ_256-NEXT: ret 817 %op = load <32 x half>, <32 x half>* %a 818 %res = call <32 x half> @llvm.rint.v32f16(<32 x half> %op) 819 store <32 x half> %res, <32 x half>* %a 820 ret void 821} 822 823define void @frintx_v64f16(<64 x half>* %a) #0 { 824; CHECK-LABEL: frintx_v64f16: 825; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64 826; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] 827; VBITS_GE_1024-NEXT: frintx [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h 828; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 829; VBITS_GE_1024-NEXT: ret 830 %op = load <64 x half>, <64 x half>* %a 831 %res = call <64 x half> @llvm.rint.v64f16(<64 x half> %op) 832 store <64 x half> %res, <64 x half>* %a 833 ret void 834} 835 836define void @frintx_v128f16(<128 x half>* %a) #0 { 837; CHECK-LABEL: frintx_v128f16: 838; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128 839; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] 840; VBITS_GE_2048-NEXT: frintx [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h 841; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 842; VBITS_GE_2048-NEXT: ret 843 %op = load <128 x half>, <128 x half>* %a 844 %res = call <128 x half> @llvm.rint.v128f16(<128 x half> %op) 845 store <128 x half> %res, <128 x half>* %a 846 ret void 847} 848 849; Don't use SVE for 64-bit vectors. 850define <2 x float> @frintx_v2f32(<2 x float> %op) #0 { 851; CHECK-LABEL: frintx_v2f32: 852; CHECK: frintx v0.2s, v0.2s 853; CHECK-NEXT: ret 854 %res = call <2 x float> @llvm.rint.v2f32(<2 x float> %op) 855 ret <2 x float> %res 856} 857 858; Don't use SVE for 128-bit vectors. 859define <4 x float> @frintx_v4f32(<4 x float> %op) #0 { 860; CHECK-LABEL: frintx_v4f32: 861; CHECK: frintx v0.4s, v0.4s 862; CHECK-NEXT: ret 863 %res = call <4 x float> @llvm.rint.v4f32(<4 x float> %op) 864 ret <4 x float> %res 865} 866 867define void @frintx_v8f32(<8 x float>* %a) #0 { 868; CHECK-LABEL: frintx_v8f32: 869; CHECK: ptrue [[PG:p[0-9]+]].s, vl8 870; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] 871; CHECK-NEXT: frintx [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s 872; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 873; CHECK-NEXT: ret 874 %op = load <8 x float>, <8 x float>* %a 875 %res = call <8 x float> @llvm.rint.v8f32(<8 x float> %op) 876 store <8 x float> %res, <8 x float>* %a 877 ret void 878} 879 880define void @frintx_v16f32(<16 x float>* %a) #0 { 881; CHECK-LABEL: frintx_v16f32: 882; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16 883; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] 884; VBITS_GE_512-NEXT: frintx [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s 885; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 886; VBITS_GE_512-NEXT: ret 887 888; Ensure sensible type legalisation. 889; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 890; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 891; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0] 892; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] 893; VBITS_EQ_256-DAG: frintx [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s 894; VBITS_EQ_256-DAG: frintx [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s 895; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] 896; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] 897; VBITS_EQ_256-NEXT: ret 898 %op = load <16 x float>, <16 x float>* %a 899 %res = call <16 x float> @llvm.rint.v16f32(<16 x float> %op) 900 store <16 x float> %res, <16 x float>* %a 901 ret void 902} 903 904define void @frintx_v32f32(<32 x float>* %a) #0 { 905; CHECK-LABEL: frintx_v32f32: 906; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32 907; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] 908; VBITS_GE_1024-NEXT: frintx [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s 909; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 910; VBITS_GE_1024-NEXT: ret 911 %op = load <32 x float>, <32 x float>* %a 912 %res = call <32 x float> @llvm.rint.v32f32(<32 x float> %op) 913 store <32 x float> %res, <32 x float>* %a 914 ret void 915} 916 917define void @frintx_v64f32(<64 x float>* %a) #0 { 918; CHECK-LABEL: frintx_v64f32: 919; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64 920; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] 921; VBITS_GE_2048-NEXT: frintx [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s 922; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 923; VBITS_GE_2048-NEXT: ret 924 %op = load <64 x float>, <64 x float>* %a 925 %res = call <64 x float> @llvm.rint.v64f32(<64 x float> %op) 926 store <64 x float> %res, <64 x float>* %a 927 ret void 928} 929 930; Don't use SVE for 64-bit vectors. 931define <1 x double> @frintx_v1f64(<1 x double> %op) #0 { 932; CHECK-LABEL: frintx_v1f64: 933; CHECK: frintx d0, d0 934; CHECK-NEXT: ret 935 %res = call <1 x double> @llvm.rint.v1f64(<1 x double> %op) 936 ret <1 x double> %res 937} 938 939; Don't use SVE for 128-bit vectors. 940define <2 x double> @frintx_v2f64(<2 x double> %op) #0 { 941; CHECK-LABEL: frintx_v2f64: 942; CHECK: frintx v0.2d, v0.2d 943; CHECK-NEXT: ret 944 %res = call <2 x double> @llvm.rint.v2f64(<2 x double> %op) 945 ret <2 x double> %res 946} 947 948define void @frintx_v4f64(<4 x double>* %a) #0 { 949; CHECK-LABEL: frintx_v4f64: 950; CHECK: ptrue [[PG:p[0-9]+]].d, vl4 951; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] 952; CHECK-NEXT: frintx [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d 953; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 954; CHECK-NEXT: ret 955 %op = load <4 x double>, <4 x double>* %a 956 %res = call <4 x double> @llvm.rint.v4f64(<4 x double> %op) 957 store <4 x double> %res, <4 x double>* %a 958 ret void 959} 960 961define void @frintx_v8f64(<8 x double>* %a) #0 { 962; CHECK-LABEL: frintx_v8f64: 963; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8 964; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] 965; VBITS_GE_512-NEXT: frintx [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d 966; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 967; VBITS_GE_512-NEXT: ret 968 969; Ensure sensible type legalisation. 970; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 971; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 972; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0] 973; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] 974; VBITS_EQ_256-DAG: frintx [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d 975; VBITS_EQ_256-DAG: frintx [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d 976; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] 977; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] 978; VBITS_EQ_256-NEXT: ret 979 %op = load <8 x double>, <8 x double>* %a 980 %res = call <8 x double> @llvm.rint.v8f64(<8 x double> %op) 981 store <8 x double> %res, <8 x double>* %a 982 ret void 983} 984 985define void @frintx_v16f64(<16 x double>* %a) #0 { 986; CHECK-LABEL: frintx_v16f64: 987; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16 988; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] 989; VBITS_GE_1024-NEXT: frintx [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d 990; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 991; VBITS_GE_1024-NEXT: ret 992 %op = load <16 x double>, <16 x double>* %a 993 %res = call <16 x double> @llvm.rint.v16f64(<16 x double> %op) 994 store <16 x double> %res, <16 x double>* %a 995 ret void 996} 997 998define void @frintx_v32f64(<32 x double>* %a) #0 { 999; CHECK-LABEL: frintx_v32f64: 1000; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32 1001; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] 1002; VBITS_GE_2048-NEXT: frintx [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d 1003; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 1004; VBITS_GE_2048-NEXT: ret 1005 %op = load <32 x double>, <32 x double>* %a 1006 %res = call <32 x double> @llvm.rint.v32f64(<32 x double> %op) 1007 store <32 x double> %res, <32 x double>* %a 1008 ret void 1009} 1010 1011; 1012; ROUND -> FRINTA 1013; 1014 1015; Don't use SVE for 64-bit vectors. 1016define <4 x half> @frinta_v4f16(<4 x half> %op) #0 { 1017; CHECK-LABEL: frinta_v4f16: 1018; CHECK: frinta v0.4h, v0.4h 1019; CHECK-NEXT: ret 1020 %res = call <4 x half> @llvm.round.v4f16(<4 x half> %op) 1021 ret <4 x half> %res 1022} 1023 1024; Don't use SVE for 128-bit vectors. 1025define <8 x half> @frinta_v8f16(<8 x half> %op) #0 { 1026; CHECK-LABEL: frinta_v8f16: 1027; CHECK: frinta v0.8h, v0.8h 1028; CHECK-NEXT: ret 1029 %res = call <8 x half> @llvm.round.v8f16(<8 x half> %op) 1030 ret <8 x half> %res 1031} 1032 1033define void @frinta_v16f16(<16 x half>* %a) #0 { 1034; CHECK-LABEL: frinta_v16f16: 1035; CHECK: ptrue [[PG:p[0-9]+]].h, vl16 1036; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] 1037; CHECK-NEXT: frinta [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h 1038; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 1039; CHECK-NEXT: ret 1040 %op = load <16 x half>, <16 x half>* %a 1041 %res = call <16 x half> @llvm.round.v16f16(<16 x half> %op) 1042 store <16 x half> %res, <16 x half>* %a 1043 ret void 1044} 1045 1046define void @frinta_v32f16(<32 x half>* %a) #0 { 1047; CHECK-LABEL: frinta_v32f16: 1048; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32 1049; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] 1050; VBITS_GE_512-NEXT: frinta [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h 1051; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 1052; VBITS_GE_512-NEXT: ret 1053 1054; Ensure sensible type legalisation. 1055; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 1056; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 1057; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0] 1058; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] 1059; VBITS_EQ_256-DAG: frinta [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h 1060; VBITS_EQ_256-DAG: frinta [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h 1061; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] 1062; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] 1063; VBITS_EQ_256-NEXT: ret 1064 %op = load <32 x half>, <32 x half>* %a 1065 %res = call <32 x half> @llvm.round.v32f16(<32 x half> %op) 1066 store <32 x half> %res, <32 x half>* %a 1067 ret void 1068} 1069 1070define void @frinta_v64f16(<64 x half>* %a) #0 { 1071; CHECK-LABEL: frinta_v64f16: 1072; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64 1073; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] 1074; VBITS_GE_1024-NEXT: frinta [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h 1075; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 1076; VBITS_GE_1024-NEXT: ret 1077 %op = load <64 x half>, <64 x half>* %a 1078 %res = call <64 x half> @llvm.round.v64f16(<64 x half> %op) 1079 store <64 x half> %res, <64 x half>* %a 1080 ret void 1081} 1082 1083define void @frinta_v128f16(<128 x half>* %a) #0 { 1084; CHECK-LABEL: frinta_v128f16: 1085; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128 1086; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] 1087; VBITS_GE_2048-NEXT: frinta [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h 1088; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 1089; VBITS_GE_2048-NEXT: ret 1090 %op = load <128 x half>, <128 x half>* %a 1091 %res = call <128 x half> @llvm.round.v128f16(<128 x half> %op) 1092 store <128 x half> %res, <128 x half>* %a 1093 ret void 1094} 1095 1096; Don't use SVE for 64-bit vectors. 1097define <2 x float> @frinta_v2f32(<2 x float> %op) #0 { 1098; CHECK-LABEL: frinta_v2f32: 1099; CHECK: frinta v0.2s, v0.2s 1100; CHECK-NEXT: ret 1101 %res = call <2 x float> @llvm.round.v2f32(<2 x float> %op) 1102 ret <2 x float> %res 1103} 1104 1105; Don't use SVE for 128-bit vectors. 1106define <4 x float> @frinta_v4f32(<4 x float> %op) #0 { 1107; CHECK-LABEL: frinta_v4f32: 1108; CHECK: frinta v0.4s, v0.4s 1109; CHECK-NEXT: ret 1110 %res = call <4 x float> @llvm.round.v4f32(<4 x float> %op) 1111 ret <4 x float> %res 1112} 1113 1114define void @frinta_v8f32(<8 x float>* %a) #0 { 1115; CHECK-LABEL: frinta_v8f32: 1116; CHECK: ptrue [[PG:p[0-9]+]].s, vl8 1117; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] 1118; CHECK-NEXT: frinta [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s 1119; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 1120; CHECK-NEXT: ret 1121 %op = load <8 x float>, <8 x float>* %a 1122 %res = call <8 x float> @llvm.round.v8f32(<8 x float> %op) 1123 store <8 x float> %res, <8 x float>* %a 1124 ret void 1125} 1126 1127define void @frinta_v16f32(<16 x float>* %a) #0 { 1128; CHECK-LABEL: frinta_v16f32: 1129; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16 1130; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] 1131; VBITS_GE_512-NEXT: frinta [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s 1132; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 1133; VBITS_GE_512-NEXT: ret 1134 1135; Ensure sensible type legalisation. 1136; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 1137; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 1138; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0] 1139; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] 1140; VBITS_EQ_256-DAG: frinta [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s 1141; VBITS_EQ_256-DAG: frinta [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s 1142; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] 1143; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] 1144; VBITS_EQ_256-NEXT: ret 1145 %op = load <16 x float>, <16 x float>* %a 1146 %res = call <16 x float> @llvm.round.v16f32(<16 x float> %op) 1147 store <16 x float> %res, <16 x float>* %a 1148 ret void 1149} 1150 1151define void @frinta_v32f32(<32 x float>* %a) #0 { 1152; CHECK-LABEL: frinta_v32f32: 1153; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32 1154; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] 1155; VBITS_GE_1024-NEXT: frinta [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s 1156; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 1157; VBITS_GE_1024-NEXT: ret 1158 %op = load <32 x float>, <32 x float>* %a 1159 %res = call <32 x float> @llvm.round.v32f32(<32 x float> %op) 1160 store <32 x float> %res, <32 x float>* %a 1161 ret void 1162} 1163 1164define void @frinta_v64f32(<64 x float>* %a) #0 { 1165; CHECK-LABEL: frinta_v64f32: 1166; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64 1167; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] 1168; VBITS_GE_2048-NEXT: frinta [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s 1169; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 1170; VBITS_GE_2048-NEXT: ret 1171 %op = load <64 x float>, <64 x float>* %a 1172 %res = call <64 x float> @llvm.round.v64f32(<64 x float> %op) 1173 store <64 x float> %res, <64 x float>* %a 1174 ret void 1175} 1176 1177; Don't use SVE for 64-bit vectors. 1178define <1 x double> @frinta_v1f64(<1 x double> %op) #0 { 1179; CHECK-LABEL: frinta_v1f64: 1180; CHECK: frinta d0, d0 1181; CHECK-NEXT: ret 1182 %res = call <1 x double> @llvm.round.v1f64(<1 x double> %op) 1183 ret <1 x double> %res 1184} 1185 1186; Don't use SVE for 128-bit vectors. 1187define <2 x double> @frinta_v2f64(<2 x double> %op) #0 { 1188; CHECK-LABEL: frinta_v2f64: 1189; CHECK: frinta v0.2d, v0.2d 1190; CHECK-NEXT: ret 1191 %res = call <2 x double> @llvm.round.v2f64(<2 x double> %op) 1192 ret <2 x double> %res 1193} 1194 1195define void @frinta_v4f64(<4 x double>* %a) #0 { 1196; CHECK-LABEL: frinta_v4f64: 1197; CHECK: ptrue [[PG:p[0-9]+]].d, vl4 1198; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] 1199; CHECK-NEXT: frinta [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d 1200; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 1201; CHECK-NEXT: ret 1202 %op = load <4 x double>, <4 x double>* %a 1203 %res = call <4 x double> @llvm.round.v4f64(<4 x double> %op) 1204 store <4 x double> %res, <4 x double>* %a 1205 ret void 1206} 1207 1208define void @frinta_v8f64(<8 x double>* %a) #0 { 1209; CHECK-LABEL: frinta_v8f64: 1210; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8 1211; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] 1212; VBITS_GE_512-NEXT: frinta [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d 1213; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 1214; VBITS_GE_512-NEXT: ret 1215 1216; Ensure sensible type legalisation. 1217; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 1218; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 1219; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0] 1220; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] 1221; VBITS_EQ_256-DAG: frinta [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d 1222; VBITS_EQ_256-DAG: frinta [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d 1223; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] 1224; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] 1225; VBITS_EQ_256-NEXT: ret 1226 %op = load <8 x double>, <8 x double>* %a 1227 %res = call <8 x double> @llvm.round.v8f64(<8 x double> %op) 1228 store <8 x double> %res, <8 x double>* %a 1229 ret void 1230} 1231 1232define void @frinta_v16f64(<16 x double>* %a) #0 { 1233; CHECK-LABEL: frinta_v16f64: 1234; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16 1235; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] 1236; VBITS_GE_1024-NEXT: frinta [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d 1237; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 1238; VBITS_GE_1024-NEXT: ret 1239 %op = load <16 x double>, <16 x double>* %a 1240 %res = call <16 x double> @llvm.round.v16f64(<16 x double> %op) 1241 store <16 x double> %res, <16 x double>* %a 1242 ret void 1243} 1244 1245define void @frinta_v32f64(<32 x double>* %a) #0 { 1246; CHECK-LABEL: frinta_v32f64: 1247; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32 1248; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] 1249; VBITS_GE_2048-NEXT: frinta [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d 1250; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 1251; VBITS_GE_2048-NEXT: ret 1252 %op = load <32 x double>, <32 x double>* %a 1253 %res = call <32 x double> @llvm.round.v32f64(<32 x double> %op) 1254 store <32 x double> %res, <32 x double>* %a 1255 ret void 1256} 1257 1258; 1259; ROUNDEVEN -> FRINTN 1260; 1261 1262; Don't use SVE for 64-bit vectors. 1263define <4 x half> @frintn_v4f16(<4 x half> %op) #0 { 1264; CHECK-LABEL: frintn_v4f16: 1265; CHECK: frintn v0.4h, v0.4h 1266; CHECK-NEXT: ret 1267 %res = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %op) 1268 ret <4 x half> %res 1269} 1270 1271; Don't use SVE for 128-bit vectors. 1272define <8 x half> @frintn_v8f16(<8 x half> %op) #0 { 1273; CHECK-LABEL: frintn_v8f16: 1274; CHECK: frintn v0.8h, v0.8h 1275; CHECK-NEXT: ret 1276 %res = call <8 x half> @llvm.roundeven.v8f16(<8 x half> %op) 1277 ret <8 x half> %res 1278} 1279 1280define void @frintn_v16f16(<16 x half>* %a) #0 { 1281; CHECK-LABEL: frintn_v16f16: 1282; CHECK: ptrue [[PG:p[0-9]+]].h, vl16 1283; CHECK-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] 1284; CHECK-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h 1285; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 1286; CHECK-NEXT: ret 1287 %op = load <16 x half>, <16 x half>* %a 1288 %res = call <16 x half> @llvm.roundeven.v16f16(<16 x half> %op) 1289 store <16 x half> %res, <16 x half>* %a 1290 ret void 1291} 1292 1293define void @frintn_v32f16(<32 x half>* %a) #0 { 1294; CHECK-LABEL: frintn_v32f16: 1295; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32 1296; VBITS_GE_512-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] 1297; VBITS_GE_512-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h 1298; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 1299; VBITS_GE_512-NEXT: ret 1300 1301; Ensure sensible type legalisation. 1302; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 1303; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 1304; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0] 1305; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] 1306; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h 1307; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h 1308; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] 1309; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] 1310; VBITS_EQ_256-NEXT: ret 1311 %op = load <32 x half>, <32 x half>* %a 1312 %res = call <32 x half> @llvm.roundeven.v32f16(<32 x half> %op) 1313 store <32 x half> %res, <32 x half>* %a 1314 ret void 1315} 1316 1317define void @frintn_v64f16(<64 x half>* %a) #0 { 1318; CHECK-LABEL: frintn_v64f16: 1319; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64 1320; VBITS_GE_1024-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] 1321; VBITS_GE_1024-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h 1322; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 1323; VBITS_GE_1024-NEXT: ret 1324 %op = load <64 x half>, <64 x half>* %a 1325 %res = call <64 x half> @llvm.roundeven.v64f16(<64 x half> %op) 1326 store <64 x half> %res, <64 x half>* %a 1327 ret void 1328} 1329 1330define void @frintn_v128f16(<128 x half>* %a) #0 { 1331; CHECK-LABEL: frintn_v128f16: 1332; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128 1333; VBITS_GE_2048-DAG: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] 1334; VBITS_GE_2048-NEXT: frintn [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h 1335; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 1336; VBITS_GE_2048-NEXT: ret 1337 %op = load <128 x half>, <128 x half>* %a 1338 %res = call <128 x half> @llvm.roundeven.v128f16(<128 x half> %op) 1339 store <128 x half> %res, <128 x half>* %a 1340 ret void 1341} 1342 1343; Don't use SVE for 64-bit vectors. 1344define <2 x float> @frintn_v2f32(<2 x float> %op) #0 { 1345; CHECK-LABEL: frintn_v2f32: 1346; CHECK: frintn v0.2s, v0.2s 1347; CHECK-NEXT: ret 1348 %res = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %op) 1349 ret <2 x float> %res 1350} 1351 1352; Don't use SVE for 128-bit vectors. 1353define <4 x float> @frintn_v4f32(<4 x float> %op) #0 { 1354; CHECK-LABEL: frintn_v4f32: 1355; CHECK: frintn v0.4s, v0.4s 1356; CHECK-NEXT: ret 1357 %res = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %op) 1358 ret <4 x float> %res 1359} 1360 1361define void @frintn_v8f32(<8 x float>* %a) #0 { 1362; CHECK-LABEL: frintn_v8f32: 1363; CHECK: ptrue [[PG:p[0-9]+]].s, vl8 1364; CHECK-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] 1365; CHECK-NEXT: frintn [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s 1366; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 1367; CHECK-NEXT: ret 1368 %op = load <8 x float>, <8 x float>* %a 1369 %res = call <8 x float> @llvm.roundeven.v8f32(<8 x float> %op) 1370 store <8 x float> %res, <8 x float>* %a 1371 ret void 1372} 1373 1374define void @frintn_v16f32(<16 x float>* %a) #0 { 1375; CHECK-LABEL: frintn_v16f32: 1376; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16 1377; VBITS_GE_512-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] 1378; VBITS_GE_512-NEXT: frintn [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s 1379; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 1380; VBITS_GE_512-NEXT: ret 1381 1382; Ensure sensible type legalisation. 1383; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 1384; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 1385; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0] 1386; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] 1387; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s 1388; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s 1389; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] 1390; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] 1391; VBITS_EQ_256-NEXT: ret 1392 %op = load <16 x float>, <16 x float>* %a 1393 %res = call <16 x float> @llvm.roundeven.v16f32(<16 x float> %op) 1394 store <16 x float> %res, <16 x float>* %a 1395 ret void 1396} 1397 1398define void @frintn_v32f32(<32 x float>* %a) #0 { 1399; CHECK-LABEL: frintn_v32f32: 1400; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32 1401; VBITS_GE_1024-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] 1402; VBITS_GE_1024-NEXT: frintn [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s 1403; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 1404; VBITS_GE_1024-NEXT: ret 1405 %op = load <32 x float>, <32 x float>* %a 1406 %res = call <32 x float> @llvm.roundeven.v32f32(<32 x float> %op) 1407 store <32 x float> %res, <32 x float>* %a 1408 ret void 1409} 1410 1411define void @frintn_v64f32(<64 x float>* %a) #0 { 1412; CHECK-LABEL: frintn_v64f32: 1413; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64 1414; VBITS_GE_2048-DAG: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] 1415; VBITS_GE_2048-NEXT: frintn [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s 1416; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 1417; VBITS_GE_2048-NEXT: ret 1418 %op = load <64 x float>, <64 x float>* %a 1419 %res = call <64 x float> @llvm.roundeven.v64f32(<64 x float> %op) 1420 store <64 x float> %res, <64 x float>* %a 1421 ret void 1422} 1423 1424; Don't use SVE for 64-bit vectors. 1425define <1 x double> @frintn_v1f64(<1 x double> %op) #0 { 1426; CHECK-LABEL: frintn_v1f64: 1427; CHECK: frintn d0, d0 1428; CHECK-NEXT: ret 1429 %res = call <1 x double> @llvm.roundeven.v1f64(<1 x double> %op) 1430 ret <1 x double> %res 1431} 1432 1433; Don't use SVE for 128-bit vectors. 1434define <2 x double> @frintn_v2f64(<2 x double> %op) #0 { 1435; CHECK-LABEL: frintn_v2f64: 1436; CHECK: frintn v0.2d, v0.2d 1437; CHECK-NEXT: ret 1438 %res = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %op) 1439 ret <2 x double> %res 1440} 1441 1442define void @frintn_v4f64(<4 x double>* %a) #0 { 1443; CHECK-LABEL: frintn_v4f64: 1444; CHECK: ptrue [[PG:p[0-9]+]].d, vl4 1445; CHECK-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] 1446; CHECK-NEXT: frintn [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d 1447; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 1448; CHECK-NEXT: ret 1449 %op = load <4 x double>, <4 x double>* %a 1450 %res = call <4 x double> @llvm.roundeven.v4f64(<4 x double> %op) 1451 store <4 x double> %res, <4 x double>* %a 1452 ret void 1453} 1454 1455define void @frintn_v8f64(<8 x double>* %a) #0 { 1456; CHECK-LABEL: frintn_v8f64: 1457; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8 1458; VBITS_GE_512-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] 1459; VBITS_GE_512-NEXT: frintn [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d 1460; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 1461; VBITS_GE_512-NEXT: ret 1462 1463; Ensure sensible type legalisation. 1464; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 1465; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 1466; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0] 1467; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] 1468; VBITS_EQ_256-DAG: frintn [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d 1469; VBITS_EQ_256-DAG: frintn [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d 1470; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] 1471; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] 1472; VBITS_EQ_256-NEXT: ret 1473 %op = load <8 x double>, <8 x double>* %a 1474 %res = call <8 x double> @llvm.roundeven.v8f64(<8 x double> %op) 1475 store <8 x double> %res, <8 x double>* %a 1476 ret void 1477} 1478 1479define void @frintn_v16f64(<16 x double>* %a) #0 { 1480; CHECK-LABEL: frintn_v16f64: 1481; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16 1482; VBITS_GE_1024-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] 1483; VBITS_GE_1024-NEXT: frintn [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d 1484; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 1485; VBITS_GE_1024-NEXT: ret 1486 %op = load <16 x double>, <16 x double>* %a 1487 %res = call <16 x double> @llvm.roundeven.v16f64(<16 x double> %op) 1488 store <16 x double> %res, <16 x double>* %a 1489 ret void 1490} 1491 1492define void @frintn_v32f64(<32 x double>* %a) #0 { 1493; CHECK-LABEL: frintn_v32f64: 1494; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32 1495; VBITS_GE_2048-DAG: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] 1496; VBITS_GE_2048-NEXT: frintn [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d 1497; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 1498; VBITS_GE_2048-NEXT: ret 1499 %op = load <32 x double>, <32 x double>* %a 1500 %res = call <32 x double> @llvm.roundeven.v32f64(<32 x double> %op) 1501 store <32 x double> %res, <32 x double>* %a 1502 ret void 1503} 1504 1505; 1506; TRUNC -> FRINTZ 1507; 1508 1509; Don't use SVE for 64-bit vectors. 1510define <4 x half> @frintz_v4f16(<4 x half> %op) #0 { 1511; CHECK-LABEL: frintz_v4f16: 1512; CHECK: frintz v0.4h, v0.4h 1513; CHECK-NEXT: ret 1514 %res = call <4 x half> @llvm.trunc.v4f16(<4 x half> %op) 1515 ret <4 x half> %res 1516} 1517 1518; Don't use SVE for 128-bit vectors. 1519define <8 x half> @frintz_v8f16(<8 x half> %op) #0 { 1520; CHECK-LABEL: frintz_v8f16: 1521; CHECK: frintz v0.8h, v0.8h 1522; CHECK-NEXT: ret 1523 %res = call <8 x half> @llvm.trunc.v8f16(<8 x half> %op) 1524 ret <8 x half> %res 1525} 1526 1527define void @frintz_v16f16(<16 x half>* %a) #0 { 1528; CHECK-LABEL: frintz_v16f16: 1529; CHECK: ptrue [[PG:p[0-9]+]].h, vl16 1530; CHECK-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] 1531; CHECK-NEXT: frintz [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h 1532; CHECK-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 1533; CHECK-NEXT: ret 1534 %op = load <16 x half>, <16 x half>* %a 1535 %res = call <16 x half> @llvm.trunc.v16f16(<16 x half> %op) 1536 store <16 x half> %res, <16 x half>* %a 1537 ret void 1538} 1539 1540define void @frintz_v32f16(<32 x half>* %a) #0 { 1541; CHECK-LABEL: frintz_v32f16: 1542; VBITS_GE_512: ptrue [[PG:p[0-9]+]].h, vl32 1543; VBITS_GE_512-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] 1544; VBITS_GE_512-NEXT: frintz [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h 1545; VBITS_GE_512-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 1546; VBITS_GE_512-NEXT: ret 1547 1548; Ensure sensible type legalisation. 1549; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].h, vl16 1550; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16 1551; VBITS_EQ_256-DAG: ld1h { [[OP_LO:z[0-9]+]].h }, [[PG]]/z, [x0] 1552; VBITS_EQ_256-DAG: ld1h { [[OP_HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1] 1553; VBITS_EQ_256-DAG: frintz [[RES_LO:z[0-9]+]].h, [[PG]]/m, [[OP_LO]].h 1554; VBITS_EQ_256-DAG: frintz [[RES_HI:z[0-9]+]].h, [[PG]]/m, [[OP_HI]].h 1555; VBITS_EQ_256-DAG: st1h { [[RES_LO]].h }, [[PG]], [x0] 1556; VBITS_EQ_256-DAG: st1h { [[RES_HI]].h }, [[PG]], [x0, x[[NUMELTS]], lsl #1] 1557; VBITS_EQ_256-NEXT: ret 1558 %op = load <32 x half>, <32 x half>* %a 1559 %res = call <32 x half> @llvm.trunc.v32f16(<32 x half> %op) 1560 store <32 x half> %res, <32 x half>* %a 1561 ret void 1562} 1563 1564define void @frintz_v64f16(<64 x half>* %a) #0 { 1565; CHECK-LABEL: frintz_v64f16: 1566; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].h, vl64 1567; VBITS_GE_1024-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] 1568; VBITS_GE_1024-NEXT: frintz [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h 1569; VBITS_GE_1024-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 1570; VBITS_GE_1024-NEXT: ret 1571 %op = load <64 x half>, <64 x half>* %a 1572 %res = call <64 x half> @llvm.trunc.v64f16(<64 x half> %op) 1573 store <64 x half> %res, <64 x half>* %a 1574 ret void 1575} 1576 1577define void @frintz_v128f16(<128 x half>* %a) #0 { 1578; CHECK-LABEL: frintz_v128f16: 1579; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].h, vl128 1580; VBITS_GE_2048-NEXT: ld1h { [[OP:z[0-9]+]].h }, [[PG]]/z, [x0] 1581; VBITS_GE_2048-NEXT: frintz [[RES:z[0-9]+]].h, [[PG]]/m, [[OP]].h 1582; VBITS_GE_2048-NEXT: st1h { [[RES]].h }, [[PG]], [x0] 1583; VBITS_GE_2048-NEXT: ret 1584 %op = load <128 x half>, <128 x half>* %a 1585 %res = call <128 x half> @llvm.trunc.v128f16(<128 x half> %op) 1586 store <128 x half> %res, <128 x half>* %a 1587 ret void 1588} 1589 1590; Don't use SVE for 64-bit vectors. 1591define <2 x float> @frintz_v2f32(<2 x float> %op) #0 { 1592; CHECK-LABEL: frintz_v2f32: 1593; CHECK: frintz v0.2s, v0.2s 1594; CHECK-NEXT: ret 1595 %res = call <2 x float> @llvm.trunc.v2f32(<2 x float> %op) 1596 ret <2 x float> %res 1597} 1598 1599; Don't use SVE for 128-bit vectors. 1600define <4 x float> @frintz_v4f32(<4 x float> %op) #0 { 1601; CHECK-LABEL: frintz_v4f32: 1602; CHECK: frintz v0.4s, v0.4s 1603; CHECK-NEXT: ret 1604 %res = call <4 x float> @llvm.trunc.v4f32(<4 x float> %op) 1605 ret <4 x float> %res 1606} 1607 1608define void @frintz_v8f32(<8 x float>* %a) #0 { 1609; CHECK-LABEL: frintz_v8f32: 1610; CHECK: ptrue [[PG:p[0-9]+]].s, vl8 1611; CHECK-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] 1612; CHECK-NEXT: frintz [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s 1613; CHECK-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 1614; CHECK-NEXT: ret 1615 %op = load <8 x float>, <8 x float>* %a 1616 %res = call <8 x float> @llvm.trunc.v8f32(<8 x float> %op) 1617 store <8 x float> %res, <8 x float>* %a 1618 ret void 1619} 1620 1621define void @frintz_v16f32(<16 x float>* %a) #0 { 1622; CHECK-LABEL: frintz_v16f32: 1623; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16 1624; VBITS_GE_512-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] 1625; VBITS_GE_512-NEXT: frintz [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s 1626; VBITS_GE_512-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 1627; VBITS_GE_512-NEXT: ret 1628 1629; Ensure sensible type legalisation. 1630; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].s, vl8 1631; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8 1632; VBITS_EQ_256-DAG: ld1w { [[OP_LO:z[0-9]+]].s }, [[PG]]/z, [x0] 1633; VBITS_EQ_256-DAG: ld1w { [[OP_HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2] 1634; VBITS_EQ_256-DAG: frintz [[RES_LO:z[0-9]+]].s, [[PG]]/m, [[OP_LO]].s 1635; VBITS_EQ_256-DAG: frintz [[RES_HI:z[0-9]+]].s, [[PG]]/m, [[OP_HI]].s 1636; VBITS_EQ_256-DAG: st1w { [[RES_LO]].s }, [[PG]], [x0] 1637; VBITS_EQ_256-DAG: st1w { [[RES_HI]].s }, [[PG]], [x0, x[[NUMELTS]], lsl #2] 1638; VBITS_EQ_256-NEXT: ret 1639 %op = load <16 x float>, <16 x float>* %a 1640 %res = call <16 x float> @llvm.trunc.v16f32(<16 x float> %op) 1641 store <16 x float> %res, <16 x float>* %a 1642 ret void 1643} 1644 1645define void @frintz_v32f32(<32 x float>* %a) #0 { 1646; CHECK-LABEL: frintz_v32f32: 1647; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32 1648; VBITS_GE_1024-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] 1649; VBITS_GE_1024-NEXT: frintz [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s 1650; VBITS_GE_1024-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 1651; VBITS_GE_1024-NEXT: ret 1652 %op = load <32 x float>, <32 x float>* %a 1653 %res = call <32 x float> @llvm.trunc.v32f32(<32 x float> %op) 1654 store <32 x float> %res, <32 x float>* %a 1655 ret void 1656} 1657 1658define void @frintz_v64f32(<64 x float>* %a) #0 { 1659; CHECK-LABEL: frintz_v64f32: 1660; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64 1661; VBITS_GE_2048-NEXT: ld1w { [[OP:z[0-9]+]].s }, [[PG]]/z, [x0] 1662; VBITS_GE_2048-NEXT: frintz [[RES:z[0-9]+]].s, [[PG]]/m, [[OP]].s 1663; VBITS_GE_2048-NEXT: st1w { [[RES]].s }, [[PG]], [x0] 1664; VBITS_GE_2048-NEXT: ret 1665 %op = load <64 x float>, <64 x float>* %a 1666 %res = call <64 x float> @llvm.trunc.v64f32(<64 x float> %op) 1667 store <64 x float> %res, <64 x float>* %a 1668 ret void 1669} 1670 1671; Don't use SVE for 64-bit vectors. 1672define <1 x double> @frintz_v1f64(<1 x double> %op) #0 { 1673; CHECK-LABEL: frintz_v1f64: 1674; CHECK: frintz d0, d0 1675; CHECK-NEXT: ret 1676 %res = call <1 x double> @llvm.trunc.v1f64(<1 x double> %op) 1677 ret <1 x double> %res 1678} 1679 1680; Don't use SVE for 128-bit vectors. 1681define <2 x double> @frintz_v2f64(<2 x double> %op) #0 { 1682; CHECK-LABEL: frintz_v2f64: 1683; CHECK: frintz v0.2d, v0.2d 1684; CHECK-NEXT: ret 1685 %res = call <2 x double> @llvm.trunc.v2f64(<2 x double> %op) 1686 ret <2 x double> %res 1687} 1688 1689define void @frintz_v4f64(<4 x double>* %a) #0 { 1690; CHECK-LABEL: frintz_v4f64: 1691; CHECK: ptrue [[PG:p[0-9]+]].d, vl4 1692; CHECK-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] 1693; CHECK-NEXT: frintz [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d 1694; CHECK-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 1695; CHECK-NEXT: ret 1696 %op = load <4 x double>, <4 x double>* %a 1697 %res = call <4 x double> @llvm.trunc.v4f64(<4 x double> %op) 1698 store <4 x double> %res, <4 x double>* %a 1699 ret void 1700} 1701 1702define void @frintz_v8f64(<8 x double>* %a) #0 { 1703; CHECK-LABEL: frintz_v8f64: 1704; VBITS_GE_512: ptrue [[PG:p[0-9]+]].d, vl8 1705; VBITS_GE_512-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] 1706; VBITS_GE_512-NEXT: frintz [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d 1707; VBITS_GE_512-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 1708; VBITS_GE_512-NEXT: ret 1709 1710; Ensure sensible type legalisation. 1711; VBITS_EQ_256-DAG: ptrue [[PG:p[0-9]+]].d, vl4 1712; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4 1713; VBITS_EQ_256-DAG: ld1d { [[OP_LO:z[0-9]+]].d }, [[PG]]/z, [x0] 1714; VBITS_EQ_256-DAG: ld1d { [[OP_HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3] 1715; VBITS_EQ_256-DAG: frintz [[RES_LO:z[0-9]+]].d, [[PG]]/m, [[OP_LO]].d 1716; VBITS_EQ_256-DAG: frintz [[RES_HI:z[0-9]+]].d, [[PG]]/m, [[OP_HI]].d 1717; VBITS_EQ_256-DAG: st1d { [[RES_LO]].d }, [[PG]], [x0] 1718; VBITS_EQ_256-DAG: st1d { [[RES_HI]].d }, [[PG]], [x0, x[[NUMELTS]], lsl #3] 1719; VBITS_EQ_256-NEXT: ret 1720 %op = load <8 x double>, <8 x double>* %a 1721 %res = call <8 x double> @llvm.trunc.v8f64(<8 x double> %op) 1722 store <8 x double> %res, <8 x double>* %a 1723 ret void 1724} 1725 1726define void @frintz_v16f64(<16 x double>* %a) #0 { 1727; CHECK-LABEL: frintz_v16f64: 1728; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].d, vl16 1729; VBITS_GE_1024-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] 1730; VBITS_GE_1024-NEXT: frintz [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d 1731; VBITS_GE_1024-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 1732; VBITS_GE_1024-NEXT: ret 1733 %op = load <16 x double>, <16 x double>* %a 1734 %res = call <16 x double> @llvm.trunc.v16f64(<16 x double> %op) 1735 store <16 x double> %res, <16 x double>* %a 1736 ret void 1737} 1738 1739define void @frintz_v32f64(<32 x double>* %a) #0 { 1740; CHECK-LABEL: frintz_v32f64: 1741; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].d, vl32 1742; VBITS_GE_2048-NEXT: ld1d { [[OP:z[0-9]+]].d }, [[PG]]/z, [x0] 1743; VBITS_GE_2048-NEXT: frintz [[RES:z[0-9]+]].d, [[PG]]/m, [[OP]].d 1744; VBITS_GE_2048-NEXT: st1d { [[RES]].d }, [[PG]], [x0] 1745; VBITS_GE_2048-NEXT: ret 1746 %op = load <32 x double>, <32 x double>* %a 1747 %res = call <32 x double> @llvm.trunc.v32f64(<32 x double> %op) 1748 store <32 x double> %res, <32 x double>* %a 1749 ret void 1750} 1751 1752attributes #0 = { "target-features"="+sve" } 1753 1754declare <4 x half> @llvm.ceil.v4f16(<4 x half>) 1755declare <8 x half> @llvm.ceil.v8f16(<8 x half>) 1756declare <16 x half> @llvm.ceil.v16f16(<16 x half>) 1757declare <32 x half> @llvm.ceil.v32f16(<32 x half>) 1758declare <64 x half> @llvm.ceil.v64f16(<64 x half>) 1759declare <128 x half> @llvm.ceil.v128f16(<128 x half>) 1760declare <2 x float> @llvm.ceil.v2f32(<2 x float>) 1761declare <4 x float> @llvm.ceil.v4f32(<4 x float>) 1762declare <8 x float> @llvm.ceil.v8f32(<8 x float>) 1763declare <16 x float> @llvm.ceil.v16f32(<16 x float>) 1764declare <32 x float> @llvm.ceil.v32f32(<32 x float>) 1765declare <64 x float> @llvm.ceil.v64f32(<64 x float>) 1766declare <1 x double> @llvm.ceil.v1f64(<1 x double>) 1767declare <2 x double> @llvm.ceil.v2f64(<2 x double>) 1768declare <4 x double> @llvm.ceil.v4f64(<4 x double>) 1769declare <8 x double> @llvm.ceil.v8f64(<8 x double>) 1770declare <16 x double> @llvm.ceil.v16f64(<16 x double>) 1771declare <32 x double> @llvm.ceil.v32f64(<32 x double>) 1772 1773declare <4 x half> @llvm.floor.v4f16(<4 x half>) 1774declare <8 x half> @llvm.floor.v8f16(<8 x half>) 1775declare <16 x half> @llvm.floor.v16f16(<16 x half>) 1776declare <32 x half> @llvm.floor.v32f16(<32 x half>) 1777declare <64 x half> @llvm.floor.v64f16(<64 x half>) 1778declare <128 x half> @llvm.floor.v128f16(<128 x half>) 1779declare <2 x float> @llvm.floor.v2f32(<2 x float>) 1780declare <4 x float> @llvm.floor.v4f32(<4 x float>) 1781declare <8 x float> @llvm.floor.v8f32(<8 x float>) 1782declare <16 x float> @llvm.floor.v16f32(<16 x float>) 1783declare <32 x float> @llvm.floor.v32f32(<32 x float>) 1784declare <64 x float> @llvm.floor.v64f32(<64 x float>) 1785declare <1 x double> @llvm.floor.v1f64(<1 x double>) 1786declare <2 x double> @llvm.floor.v2f64(<2 x double>) 1787declare <4 x double> @llvm.floor.v4f64(<4 x double>) 1788declare <8 x double> @llvm.floor.v8f64(<8 x double>) 1789declare <16 x double> @llvm.floor.v16f64(<16 x double>) 1790declare <32 x double> @llvm.floor.v32f64(<32 x double>) 1791 1792declare <4 x half> @llvm.nearbyint.v4f16(<4 x half>) 1793declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>) 1794declare <16 x half> @llvm.nearbyint.v16f16(<16 x half>) 1795declare <32 x half> @llvm.nearbyint.v32f16(<32 x half>) 1796declare <64 x half> @llvm.nearbyint.v64f16(<64 x half>) 1797declare <128 x half> @llvm.nearbyint.v128f16(<128 x half>) 1798declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>) 1799declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) 1800declare <8 x float> @llvm.nearbyint.v8f32(<8 x float>) 1801declare <16 x float> @llvm.nearbyint.v16f32(<16 x float>) 1802declare <32 x float> @llvm.nearbyint.v32f32(<32 x float>) 1803declare <64 x float> @llvm.nearbyint.v64f32(<64 x float>) 1804declare <1 x double> @llvm.nearbyint.v1f64(<1 x double>) 1805declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) 1806declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>) 1807declare <8 x double> @llvm.nearbyint.v8f64(<8 x double>) 1808declare <16 x double> @llvm.nearbyint.v16f64(<16 x double>) 1809declare <32 x double> @llvm.nearbyint.v32f64(<32 x double>) 1810 1811declare <4 x half> @llvm.rint.v4f16(<4 x half>) 1812declare <8 x half> @llvm.rint.v8f16(<8 x half>) 1813declare <16 x half> @llvm.rint.v16f16(<16 x half>) 1814declare <32 x half> @llvm.rint.v32f16(<32 x half>) 1815declare <64 x half> @llvm.rint.v64f16(<64 x half>) 1816declare <128 x half> @llvm.rint.v128f16(<128 x half>) 1817declare <2 x float> @llvm.rint.v2f32(<2 x float>) 1818declare <4 x float> @llvm.rint.v4f32(<4 x float>) 1819declare <8 x float> @llvm.rint.v8f32(<8 x float>) 1820declare <16 x float> @llvm.rint.v16f32(<16 x float>) 1821declare <32 x float> @llvm.rint.v32f32(<32 x float>) 1822declare <64 x float> @llvm.rint.v64f32(<64 x float>) 1823declare <1 x double> @llvm.rint.v1f64(<1 x double>) 1824declare <2 x double> @llvm.rint.v2f64(<2 x double>) 1825declare <4 x double> @llvm.rint.v4f64(<4 x double>) 1826declare <8 x double> @llvm.rint.v8f64(<8 x double>) 1827declare <16 x double> @llvm.rint.v16f64(<16 x double>) 1828declare <32 x double> @llvm.rint.v32f64(<32 x double>) 1829 1830declare <4 x half> @llvm.round.v4f16(<4 x half>) 1831declare <8 x half> @llvm.round.v8f16(<8 x half>) 1832declare <16 x half> @llvm.round.v16f16(<16 x half>) 1833declare <32 x half> @llvm.round.v32f16(<32 x half>) 1834declare <64 x half> @llvm.round.v64f16(<64 x half>) 1835declare <128 x half> @llvm.round.v128f16(<128 x half>) 1836declare <2 x float> @llvm.round.v2f32(<2 x float>) 1837declare <4 x float> @llvm.round.v4f32(<4 x float>) 1838declare <8 x float> @llvm.round.v8f32(<8 x float>) 1839declare <16 x float> @llvm.round.v16f32(<16 x float>) 1840declare <32 x float> @llvm.round.v32f32(<32 x float>) 1841declare <64 x float> @llvm.round.v64f32(<64 x float>) 1842declare <1 x double> @llvm.round.v1f64(<1 x double>) 1843declare <2 x double> @llvm.round.v2f64(<2 x double>) 1844declare <4 x double> @llvm.round.v4f64(<4 x double>) 1845declare <8 x double> @llvm.round.v8f64(<8 x double>) 1846declare <16 x double> @llvm.round.v16f64(<16 x double>) 1847declare <32 x double> @llvm.round.v32f64(<32 x double>) 1848 1849declare <4 x half> @llvm.roundeven.v4f16(<4 x half>) 1850declare <8 x half> @llvm.roundeven.v8f16(<8 x half>) 1851declare <16 x half> @llvm.roundeven.v16f16(<16 x half>) 1852declare <32 x half> @llvm.roundeven.v32f16(<32 x half>) 1853declare <64 x half> @llvm.roundeven.v64f16(<64 x half>) 1854declare <128 x half> @llvm.roundeven.v128f16(<128 x half>) 1855declare <2 x float> @llvm.roundeven.v2f32(<2 x float>) 1856declare <4 x float> @llvm.roundeven.v4f32(<4 x float>) 1857declare <8 x float> @llvm.roundeven.v8f32(<8 x float>) 1858declare <16 x float> @llvm.roundeven.v16f32(<16 x float>) 1859declare <32 x float> @llvm.roundeven.v32f32(<32 x float>) 1860declare <64 x float> @llvm.roundeven.v64f32(<64 x float>) 1861declare <1 x double> @llvm.roundeven.v1f64(<1 x double>) 1862declare <2 x double> @llvm.roundeven.v2f64(<2 x double>) 1863declare <4 x double> @llvm.roundeven.v4f64(<4 x double>) 1864declare <8 x double> @llvm.roundeven.v8f64(<8 x double>) 1865declare <16 x double> @llvm.roundeven.v16f64(<16 x double>) 1866declare <32 x double> @llvm.roundeven.v32f64(<32 x double>) 1867 1868declare <4 x half> @llvm.trunc.v4f16(<4 x half>) 1869declare <8 x half> @llvm.trunc.v8f16(<8 x half>) 1870declare <16 x half> @llvm.trunc.v16f16(<16 x half>) 1871declare <32 x half> @llvm.trunc.v32f16(<32 x half>) 1872declare <64 x half> @llvm.trunc.v64f16(<64 x half>) 1873declare <128 x half> @llvm.trunc.v128f16(<128 x half>) 1874declare <2 x float> @llvm.trunc.v2f32(<2 x float>) 1875declare <4 x float> @llvm.trunc.v4f32(<4 x float>) 1876declare <8 x float> @llvm.trunc.v8f32(<8 x float>) 1877declare <16 x float> @llvm.trunc.v16f32(<16 x float>) 1878declare <32 x float> @llvm.trunc.v32f32(<32 x float>) 1879declare <64 x float> @llvm.trunc.v64f32(<64 x float>) 1880declare <1 x double> @llvm.trunc.v1f64(<1 x double>) 1881declare <2 x double> @llvm.trunc.v2f64(<2 x double>) 1882declare <4 x double> @llvm.trunc.v4f64(<4 x double>) 1883declare <8 x double> @llvm.trunc.v8f64(<8 x double>) 1884declare <16 x double> @llvm.trunc.v16f64(<16 x double>) 1885declare <32 x double> @llvm.trunc.v32f64(<32 x double>) 1886