1*13fbcb42Sjoerg//===--- arm_cde.td - ACLE intrinsic functions for CDE --------------------===// 2*13fbcb42Sjoerg// 3*13fbcb42Sjoerg// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*13fbcb42Sjoerg// See https://llvm.org/LICENSE.txt for license information. 5*13fbcb42Sjoerg// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*13fbcb42Sjoerg// 7*13fbcb42Sjoerg//===----------------------------------------------------------------------===// 8*13fbcb42Sjoerg// 9*13fbcb42Sjoerg// This file defines the set of ACLE-specified source-level intrinsic 10*13fbcb42Sjoerg// functions wrapping the CDE instructions. 11*13fbcb42Sjoerg// 12*13fbcb42Sjoerg//===----------------------------------------------------------------------===// 13*13fbcb42Sjoerg 14*13fbcb42Sjoerginclude "arm_mve_defs.td" 15*13fbcb42Sjoerg 16*13fbcb42Sjoerg// f64 is not defined in arm_mve_defs.td because MVE instructions only work with 17*13fbcb42Sjoerg// f16 and f32 18*13fbcb42Sjoergdef f64: PrimitiveType<"f", 64>; 19*13fbcb42Sjoerg 20*13fbcb42Sjoerg// Float<t> expects t to be a scalar type, and expands to the floating-point 21*13fbcb42Sjoerg// type of the same width. 22*13fbcb42Sjoergclass Float<Type t>: ComplexType<(CTO_CopyKind t, f32)>; 23*13fbcb42Sjoergdef FScalar: Float<Scalar>; 24*13fbcb42Sjoerg 25*13fbcb42Sjoerg// ACLE CDE intrinsic 26*13fbcb42Sjoergclass CDEIntrinsic<Type ret, dag args, dag codegen> 27*13fbcb42Sjoerg : Intrinsic<ret, args, codegen> { 28*13fbcb42Sjoerg let builtinExtension = "cde"; 29*13fbcb42Sjoerg} 30*13fbcb42Sjoerg 31*13fbcb42Sjoerg// Immediate (in range [0, 2^numBits - 1]) 32*13fbcb42Sjoergclass IB_ConstBits<int numBits> : IB_ConstRange<0, !add(!shl(1, numBits), -1)>; 33*13fbcb42Sjoerg// numBits-wide immediate of type u32 34*13fbcb42Sjoergclass CDEImmediateBits<int numBits> : Immediate<u32, IB_ConstBits<numBits>>; 35*13fbcb42Sjoerg 36*13fbcb42Sjoerg// LLVM IR CDE intrinsic 37*13fbcb42Sjoergclass CDEIRInt<string name, list<Type> params = [], bit appendKind = 0> 38*13fbcb42Sjoerg : IRIntBase<"arm_cde_" # name, params, appendKind>; 39*13fbcb42Sjoerg 40*13fbcb42Sjoerg// Class for generating function macros in arm_cde.h: 41*13fbcb42Sjoerg// "#define <name>(<params>) <definition>" 42*13fbcb42Sjoergclass FunctionMacro<list<string> params_, string definition_> { 43*13fbcb42Sjoerg list<string> params = params_; 44*13fbcb42Sjoerg string definition = definition_; 45*13fbcb42Sjoerg} 46*13fbcb42Sjoerg 47*13fbcb42Sjoerg// Coprocessor immediate 48*13fbcb42Sjoergdef imm_coproc : Immediate<sint, IB_ConstRange<0, 7>>; 49*13fbcb42Sjoerg 50*13fbcb42Sjoerg// Immediate integer parameters 51*13fbcb42Sjoergdef imm_3b : CDEImmediateBits<3>; 52*13fbcb42Sjoergdef imm_4b : CDEImmediateBits<4>; 53*13fbcb42Sjoergdef imm_6b : CDEImmediateBits<6>; 54*13fbcb42Sjoergdef imm_7b : CDEImmediateBits<7>; 55*13fbcb42Sjoergdef imm_9b : CDEImmediateBits<9>; 56*13fbcb42Sjoergdef imm_11b : CDEImmediateBits<11>; 57*13fbcb42Sjoergdef imm_12b : CDEImmediateBits<12>; 58*13fbcb42Sjoergdef imm_13b : CDEImmediateBits<13>; 59*13fbcb42Sjoerg 60*13fbcb42Sjoerg// CX* instructions operating on GPRs 61*13fbcb42Sjoergmulticlass CDE_CX_m<dag argsImm, dag argsReg, dag cgArgs> { 62*13fbcb42Sjoerg defvar cp = (args imm_coproc:$cp); 63*13fbcb42Sjoerg let pnt = PNT_None, params = T.None in { 64*13fbcb42Sjoerg def "" : CDEIntrinsic<u32, !con(cp, argsReg, argsImm), 65*13fbcb42Sjoerg !con((CDEIRInt<NAME> $cp), cgArgs, (? $imm))>; 66*13fbcb42Sjoerg def a : CDEIntrinsic<u32, !con(cp, (args u32:$acc), argsReg, argsImm), 67*13fbcb42Sjoerg !con((CDEIRInt<NAME # "a"> $cp, $acc), 68*13fbcb42Sjoerg cgArgs, (? $imm))>; 69*13fbcb42Sjoerg 70*13fbcb42Sjoerg def d : 71*13fbcb42Sjoerg CDEIntrinsic<u64, !con(cp, argsReg, argsImm), 72*13fbcb42Sjoerg (seq !con((CDEIRInt<NAME # "d"> $cp), cgArgs, (? $imm)):$pair, 73*13fbcb42Sjoerg (or (shl (u64 (xval $pair, 1)), (u64 32)), 74*13fbcb42Sjoerg (u64 (xval $pair, 0))))>; 75*13fbcb42Sjoerg def da : 76*13fbcb42Sjoerg CDEIntrinsic<u64, !con(cp, (args u64:$acc), argsReg, argsImm), 77*13fbcb42Sjoerg (seq (u32 (lshr $acc, (u64 32))):$acc_hi, 78*13fbcb42Sjoerg (u32 $acc):$acc_lo, 79*13fbcb42Sjoerg !con((CDEIRInt<NAME # "da"> $cp, $acc_lo, $acc_hi), cgArgs, 80*13fbcb42Sjoerg (? $imm)):$pair, 81*13fbcb42Sjoerg (or (shl (u64 (xval $pair, 1)), (u64 32)), 82*13fbcb42Sjoerg (u64 (xval $pair, 0))))>; 83*13fbcb42Sjoerg } 84*13fbcb42Sjoerg} 85*13fbcb42Sjoerg 86*13fbcb42Sjoergdefm cx1 : CDE_CX_m<(args imm_13b:$imm), (args), (?)>; 87*13fbcb42Sjoergdefm cx2 : CDE_CX_m<(args imm_9b:$imm), (args u32:$n), (? $n)>; 88*13fbcb42Sjoergdefm cx3 : CDE_CX_m<(args imm_6b:$imm), (args u32:$n, u32:$m), (? $n, $m)>; 89*13fbcb42Sjoerg 90*13fbcb42Sjoerg// VCX* instructions operating on VFP registers 91*13fbcb42Sjoergmulticlass CDE_VCXFP_m<dag argsImm, dag argsReg32, dag argsReg64, dag cgArgs> { 92*13fbcb42Sjoerg defvar cp = (args imm_coproc:$cp); 93*13fbcb42Sjoerg let pnt = PNT_None, params = [u32] in { 94*13fbcb42Sjoerg def "" : CDEIntrinsic<u32, !con(cp, argsReg32, argsImm), 95*13fbcb42Sjoerg (bitcast !con((CDEIRInt<NAME, [f32]> $cp), cgArgs, (? $imm)), 96*13fbcb42Sjoerg Scalar)>; 97*13fbcb42Sjoerg def a : CDEIntrinsic<u32, !con(cp, (args u32:$acc), argsReg32, argsImm), 98*13fbcb42Sjoerg (bitcast !con((CDEIRInt<NAME # "a", [f32]> $cp, 99*13fbcb42Sjoerg (bitcast $acc, FScalar)), cgArgs, (? $imm)), Scalar)>; 100*13fbcb42Sjoerg } 101*13fbcb42Sjoerg let pnt = PNT_None, params = [u64] in { 102*13fbcb42Sjoerg def d : CDEIntrinsic<u64, !con(cp, argsReg64, argsImm), 103*13fbcb42Sjoerg (bitcast !con((CDEIRInt<NAME, [f64]> $cp), cgArgs, (? $imm)), 104*13fbcb42Sjoerg Scalar)>; 105*13fbcb42Sjoerg def da : CDEIntrinsic<u64, !con(cp, (args u64:$acc), argsReg64, argsImm), 106*13fbcb42Sjoerg (bitcast !con((CDEIRInt<NAME # "a", [f64]> $cp, 107*13fbcb42Sjoerg (bitcast $acc, FScalar)), cgArgs, (? $imm)), Scalar)>; 108*13fbcb42Sjoerg } 109*13fbcb42Sjoerg} 110*13fbcb42Sjoerg 111*13fbcb42Sjoergdefm vcx1: CDE_VCXFP_m<(args imm_11b:$imm), (args), (args), (?)>; 112*13fbcb42Sjoergdefm vcx2: CDE_VCXFP_m<(args imm_6b:$imm), (args u32:$n), (args u64:$n), 113*13fbcb42Sjoerg (? (bitcast $n, FScalar))>; 114*13fbcb42Sjoergdefm vcx3: CDE_VCXFP_m<(args imm_3b:$imm), 115*13fbcb42Sjoerg (args u32:$n, u32:$m), (args u64:$n, u64:$m), 116*13fbcb42Sjoerg (? (bitcast $n, FScalar), (bitcast $m, FScalar))>; 117*13fbcb42Sjoerg 118*13fbcb42Sjoerg// VCX* instructions operating on Q vector registers 119*13fbcb42Sjoerg 120*13fbcb42Sjoergdef v16u8 : VecOf<u8>; 121*13fbcb42Sjoerg 122*13fbcb42Sjoerglet pnt = PNT_None, params = [u8] in 123*13fbcb42Sjoergdef vcx1q : CDEIntrinsic<Vector, (args imm_coproc:$cp, imm_12b:$imm), 124*13fbcb42Sjoerg (CDEIRInt<"vcx1q"> $cp, $imm)>; 125*13fbcb42Sjoerg 126*13fbcb42Sjoerglet pnt = PNT_Type, params = T.All, polymorphicOnly = 1 in { 127*13fbcb42Sjoerg def vcx1qa : 128*13fbcb42Sjoerg CDEIntrinsic<Vector, (args imm_coproc:$cp, Vector:$acc, imm_12b:$imm), 129*13fbcb42Sjoerg (bitcast (CDEIRInt<"vcx1qa"> $cp, (bitcast $acc, v16u8), $imm), 130*13fbcb42Sjoerg Vector)>; 131*13fbcb42Sjoerg 132*13fbcb42Sjoerg def vcx2q : 133*13fbcb42Sjoerg CDEIntrinsic<Vector, (args imm_coproc:$cp, Vector:$n, imm_7b:$imm), 134*13fbcb42Sjoerg (bitcast (CDEIRInt<"vcx2q"> $cp, (bitcast $n, VecOf<u8>), $imm), 135*13fbcb42Sjoerg Vector)>; 136*13fbcb42Sjoerg def vcx2q_u8 : 137*13fbcb42Sjoerg CDEIntrinsic<v16u8, (args imm_coproc:$cp, Vector:$n, imm_7b:$imm), 138*13fbcb42Sjoerg (CDEIRInt<"vcx2q"> $cp, (bitcast $n, VecOf<u8>), $imm)>; 139*13fbcb42Sjoerg 140*13fbcb42Sjoerg def vcx2qa_impl : 141*13fbcb42Sjoerg CDEIntrinsic<Vector, 142*13fbcb42Sjoerg (args imm_coproc:$cp, Vector:$acc, v16u8:$n, imm_7b:$imm), 143*13fbcb42Sjoerg (bitcast (CDEIRInt<"vcx2qa"> $cp, (bitcast $acc, v16u8), $n, $imm), 144*13fbcb42Sjoerg Vector)>; 145*13fbcb42Sjoerg 146*13fbcb42Sjoerg def vcx3q_impl : 147*13fbcb42Sjoerg CDEIntrinsic<Vector, 148*13fbcb42Sjoerg (args imm_coproc:$cp, Vector:$n, v16u8:$m, imm_4b:$imm), 149*13fbcb42Sjoerg (bitcast (CDEIRInt<"vcx3q"> $cp, (bitcast $n, v16u8), $m, $imm), 150*13fbcb42Sjoerg Vector)>; 151*13fbcb42Sjoerg def vcx3q_u8_impl : 152*13fbcb42Sjoerg CDEIntrinsic<v16u8, 153*13fbcb42Sjoerg (args imm_coproc:$cp, Vector:$n, v16u8:$m, imm_4b:$imm), 154*13fbcb42Sjoerg (CDEIRInt<"vcx3q"> $cp, (bitcast $n, v16u8), $m, $imm)>; 155*13fbcb42Sjoerg def vcx3qa_impl : 156*13fbcb42Sjoerg CDEIntrinsic<Vector, 157*13fbcb42Sjoerg (args imm_coproc:$cp, Vector:$acc, v16u8:$n, v16u8:$m, imm_4b:$imm), 158*13fbcb42Sjoerg (bitcast (CDEIRInt<"vcx3qa"> $cp, (bitcast $acc, v16u8), $n, $m, 159*13fbcb42Sjoerg $imm), 160*13fbcb42Sjoerg Vector)>; 161*13fbcb42Sjoerg} 162*13fbcb42Sjoerg 163*13fbcb42Sjoerg// Reinterpret intrinsics required to implement __arm_vcx*q with 2 or 3 164*13fbcb42Sjoerg// polymorphic paramters. 165*13fbcb42Sjoerglet params = [/* no u8 */ s8, u16, s16, u32, s32, u64, s64, f16, f32], 166*13fbcb42Sjoerg headerOnly = 1, polymorphicOnly = 1 in 167*13fbcb42Sjoergdef vreinterpretq_u8 : 168*13fbcb42Sjoerg Intrinsic<v16u8, (args Vector:$x), (vreinterpret $x, v16u8)>; 169*13fbcb42Sjoerg 170*13fbcb42Sjoerg// We need vreinterpretq_u8_u8 to avoid doing smart tricks in the macros 171*13fbcb42Sjoerglet params = [u8], polymorphicOnly = 1 in 172*13fbcb42Sjoergdef vreinterpretq_u8_cde : 173*13fbcb42Sjoerg CDEIntrinsic<v16u8, (args Vector:$x), (id $x)>, 174*13fbcb42Sjoerg NameOverride<"vreinterpretq_u8">; 175*13fbcb42Sjoerg 176*13fbcb42Sjoerg 177*13fbcb42Sjoergdef vcx2qa : FunctionMacro< 178*13fbcb42Sjoerg ["cp", "acc", "n", "imm"], 179*13fbcb42Sjoerg "__arm_vcx2qa_impl((cp), (acc), __arm_vreinterpretq_u8(n), (imm))">; 180*13fbcb42Sjoerg 181*13fbcb42Sjoergdef vcx3q : FunctionMacro< 182*13fbcb42Sjoerg ["cp", "n", "m", "imm"], 183*13fbcb42Sjoerg "__arm_vcx3q_impl((cp), (n), __arm_vreinterpretq_u8(m), (imm))">; 184*13fbcb42Sjoergdef vcx3q_u8 : FunctionMacro< 185*13fbcb42Sjoerg ["cp", "n", "m", "imm"], 186*13fbcb42Sjoerg "__arm_vcx3q_u8_impl((cp), (n), __arm_vreinterpretq_u8(m), (imm))">; 187*13fbcb42Sjoergdef vcx3qa : FunctionMacro< 188*13fbcb42Sjoerg ["cp", "acc", "n", "m", "imm"], 189*13fbcb42Sjoerg "__arm_vcx3qa_impl((cp), (acc), __arm_vreinterpretq_u8(n), " 190*13fbcb42Sjoerg "__arm_vreinterpretq_u8(m), (imm))">; 191*13fbcb42Sjoerg 192*13fbcb42Sjoergclass CDEIntrinsicMasked<string irname, dag argsReg, dag imm, dag cgArgs> 193*13fbcb42Sjoerg : CDEIntrinsic<Vector, 194*13fbcb42Sjoerg !con((args imm_coproc:$cp, Vector:$inactive_or_acc), 195*13fbcb42Sjoerg argsReg, imm, (args Predicate:$pred)), 196*13fbcb42Sjoerg !con((CDEIRInt<irname # "_predicated", [Vector,Predicate]> 197*13fbcb42Sjoerg $cp, $inactive_or_acc), cgArgs, (? $imm, $pred))> { 198*13fbcb42Sjoerg let params = T.All; 199*13fbcb42Sjoerg let polymorphicOnly = 1; 200*13fbcb42Sjoerg} 201*13fbcb42Sjoerg 202*13fbcb42Sjoergdef vcx1q_m : CDEIntrinsicMasked<"vcx1q", (args), (args imm_12b:$imm), (?)>; 203*13fbcb42Sjoergdef vcx1qa_m : CDEIntrinsicMasked<"vcx1qa", (args), (args imm_12b:$imm), (?)>; 204*13fbcb42Sjoerg 205*13fbcb42Sjoergmulticlass VCXPredicated<dag argsReg, dag imm, dag cgArgs, 206*13fbcb42Sjoerg list<string> macroArgs, string macro> { 207*13fbcb42Sjoerg def _m_impl : CDEIntrinsicMasked<NAME, argsReg, imm, cgArgs>; 208*13fbcb42Sjoerg def a_m_impl : CDEIntrinsicMasked<NAME#"a", argsReg, imm, cgArgs>; 209*13fbcb42Sjoerg 210*13fbcb42Sjoerg def _m: FunctionMacro< 211*13fbcb42Sjoerg !listconcat(["cp", "inactive"], macroArgs, ["imm", "pred"]), 212*13fbcb42Sjoerg "__arm_"#NAME#"_m_impl((cp), (inactive), "#macro#" (imm), (pred))">; 213*13fbcb42Sjoerg def a_m: FunctionMacro< 214*13fbcb42Sjoerg !listconcat(["cp", "acc"], macroArgs, ["imm", "pred"]), 215*13fbcb42Sjoerg "__arm_"#NAME#"a_m_impl((cp), (acc), "#macro#" (imm), (pred))">; 216*13fbcb42Sjoerg} 217*13fbcb42Sjoerg 218*13fbcb42Sjoergdefm vcx2q : 219*13fbcb42Sjoerg VCXPredicated<(args v16u8:$n), (args imm_7b:$imm), (? $n), ["n"], 220*13fbcb42Sjoerg "__arm_vreinterpretq_u8(n),">; 221*13fbcb42Sjoergdefm vcx3q : 222*13fbcb42Sjoerg VCXPredicated<(args v16u8:$n, v16u8:$m), (args imm_4b:$imm), (? $n, $m), 223*13fbcb42Sjoerg ["n", "m"], "__arm_vreinterpretq_u8(n), " 224*13fbcb42Sjoerg "__arm_vreinterpretq_u8(m),">; 225*13fbcb42Sjoerg 226*13fbcb42Sjoerg// vreinterpretq intrinsics required by the ACLE CDE specification 227*13fbcb42Sjoerg 228*13fbcb42Sjoergforeach desttype = [/* no u8 */ s8, u16, s16, u32, s32, u64, s64, f16, f32] in { 229*13fbcb42Sjoerg let params = [u8], headerOnly = 1, pnt = PNT_None in 230*13fbcb42Sjoerg def "vreinterpretq_" # desttype : Intrinsic< 231*13fbcb42Sjoerg VecOf<desttype>, (args Vector:$x), (vreinterpret $x, VecOf<desttype>)>; 232*13fbcb42Sjoerg} 233