1*13fbcb42Sjoerg//===--- arm_cde.td - ACLE intrinsic functions for CDE --------------------===//
2*13fbcb42Sjoerg//
3*13fbcb42Sjoerg// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*13fbcb42Sjoerg// See https://llvm.org/LICENSE.txt for license information.
5*13fbcb42Sjoerg// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*13fbcb42Sjoerg//
7*13fbcb42Sjoerg//===----------------------------------------------------------------------===//
8*13fbcb42Sjoerg//
9*13fbcb42Sjoerg// This file defines the set of ACLE-specified source-level intrinsic
10*13fbcb42Sjoerg// functions wrapping the CDE instructions.
11*13fbcb42Sjoerg//
12*13fbcb42Sjoerg//===----------------------------------------------------------------------===//
13*13fbcb42Sjoerg
14*13fbcb42Sjoerginclude "arm_mve_defs.td"
15*13fbcb42Sjoerg
16*13fbcb42Sjoerg// f64 is not defined in arm_mve_defs.td because MVE instructions only work with
17*13fbcb42Sjoerg// f16 and f32
18*13fbcb42Sjoergdef f64: PrimitiveType<"f", 64>;
19*13fbcb42Sjoerg
20*13fbcb42Sjoerg// Float<t> expects t to be a scalar type, and expands to the floating-point
21*13fbcb42Sjoerg// type of the same width.
22*13fbcb42Sjoergclass Float<Type t>: ComplexType<(CTO_CopyKind t, f32)>;
23*13fbcb42Sjoergdef FScalar: Float<Scalar>;
24*13fbcb42Sjoerg
25*13fbcb42Sjoerg// ACLE CDE intrinsic
26*13fbcb42Sjoergclass CDEIntrinsic<Type ret, dag args, dag codegen>
27*13fbcb42Sjoerg  : Intrinsic<ret, args, codegen> {
28*13fbcb42Sjoerg  let builtinExtension = "cde";
29*13fbcb42Sjoerg}
30*13fbcb42Sjoerg
31*13fbcb42Sjoerg// Immediate (in range [0, 2^numBits - 1])
32*13fbcb42Sjoergclass IB_ConstBits<int numBits> : IB_ConstRange<0, !add(!shl(1, numBits), -1)>;
33*13fbcb42Sjoerg// numBits-wide immediate of type u32
34*13fbcb42Sjoergclass CDEImmediateBits<int numBits> : Immediate<u32, IB_ConstBits<numBits>>;
35*13fbcb42Sjoerg
36*13fbcb42Sjoerg// LLVM IR CDE intrinsic
37*13fbcb42Sjoergclass CDEIRInt<string name, list<Type> params = [], bit appendKind = 0>
38*13fbcb42Sjoerg      : IRIntBase<"arm_cde_" # name, params, appendKind>;
39*13fbcb42Sjoerg
40*13fbcb42Sjoerg// Class for generating function macros in arm_cde.h:
41*13fbcb42Sjoerg// "#define <name>(<params>) <definition>"
42*13fbcb42Sjoergclass FunctionMacro<list<string> params_, string definition_> {
43*13fbcb42Sjoerg  list<string> params = params_;
44*13fbcb42Sjoerg  string definition = definition_;
45*13fbcb42Sjoerg}
46*13fbcb42Sjoerg
47*13fbcb42Sjoerg// Coprocessor immediate
48*13fbcb42Sjoergdef imm_coproc : Immediate<sint, IB_ConstRange<0, 7>>;
49*13fbcb42Sjoerg
50*13fbcb42Sjoerg// Immediate integer parameters
51*13fbcb42Sjoergdef imm_3b : CDEImmediateBits<3>;
52*13fbcb42Sjoergdef imm_4b : CDEImmediateBits<4>;
53*13fbcb42Sjoergdef imm_6b :  CDEImmediateBits<6>;
54*13fbcb42Sjoergdef imm_7b :  CDEImmediateBits<7>;
55*13fbcb42Sjoergdef imm_9b :  CDEImmediateBits<9>;
56*13fbcb42Sjoergdef imm_11b : CDEImmediateBits<11>;
57*13fbcb42Sjoergdef imm_12b : CDEImmediateBits<12>;
58*13fbcb42Sjoergdef imm_13b : CDEImmediateBits<13>;
59*13fbcb42Sjoerg
60*13fbcb42Sjoerg// CX* instructions operating on GPRs
61*13fbcb42Sjoergmulticlass CDE_CX_m<dag argsImm, dag argsReg, dag cgArgs> {
62*13fbcb42Sjoerg  defvar cp = (args imm_coproc:$cp);
63*13fbcb42Sjoerg  let pnt = PNT_None, params = T.None in {
64*13fbcb42Sjoerg    def "" : CDEIntrinsic<u32, !con(cp, argsReg, argsImm),
65*13fbcb42Sjoerg                               !con((CDEIRInt<NAME> $cp), cgArgs, (? $imm))>;
66*13fbcb42Sjoerg    def a  : CDEIntrinsic<u32, !con(cp, (args u32:$acc), argsReg, argsImm),
67*13fbcb42Sjoerg                               !con((CDEIRInt<NAME # "a"> $cp, $acc),
68*13fbcb42Sjoerg                                    cgArgs, (? $imm))>;
69*13fbcb42Sjoerg
70*13fbcb42Sjoerg    def d :
71*13fbcb42Sjoerg      CDEIntrinsic<u64, !con(cp, argsReg, argsImm),
72*13fbcb42Sjoerg            (seq !con((CDEIRInt<NAME # "d"> $cp), cgArgs, (? $imm)):$pair,
73*13fbcb42Sjoerg                 (or (shl (u64 (xval $pair, 1)), (u64 32)),
74*13fbcb42Sjoerg                          (u64 (xval $pair, 0))))>;
75*13fbcb42Sjoerg    def da :
76*13fbcb42Sjoerg      CDEIntrinsic<u64, !con(cp, (args u64:$acc), argsReg, argsImm),
77*13fbcb42Sjoerg            (seq (u32 (lshr $acc, (u64 32))):$acc_hi,
78*13fbcb42Sjoerg                 (u32 $acc):$acc_lo,
79*13fbcb42Sjoerg                 !con((CDEIRInt<NAME # "da"> $cp, $acc_lo, $acc_hi), cgArgs,
80*13fbcb42Sjoerg                       (? $imm)):$pair,
81*13fbcb42Sjoerg                 (or (shl (u64 (xval $pair, 1)), (u64 32)),
82*13fbcb42Sjoerg                          (u64 (xval $pair, 0))))>;
83*13fbcb42Sjoerg  }
84*13fbcb42Sjoerg}
85*13fbcb42Sjoerg
86*13fbcb42Sjoergdefm cx1 : CDE_CX_m<(args imm_13b:$imm), (args), (?)>;
87*13fbcb42Sjoergdefm cx2 : CDE_CX_m<(args imm_9b:$imm), (args u32:$n), (? $n)>;
88*13fbcb42Sjoergdefm cx3 : CDE_CX_m<(args imm_6b:$imm), (args u32:$n, u32:$m), (? $n, $m)>;
89*13fbcb42Sjoerg
90*13fbcb42Sjoerg// VCX* instructions operating on VFP registers
91*13fbcb42Sjoergmulticlass CDE_VCXFP_m<dag argsImm, dag argsReg32, dag argsReg64, dag cgArgs> {
92*13fbcb42Sjoerg  defvar cp = (args imm_coproc:$cp);
93*13fbcb42Sjoerg  let pnt = PNT_None, params = [u32] in {
94*13fbcb42Sjoerg    def "" : CDEIntrinsic<u32, !con(cp, argsReg32, argsImm),
95*13fbcb42Sjoerg          (bitcast !con((CDEIRInt<NAME, [f32]> $cp), cgArgs, (? $imm)),
96*13fbcb42Sjoerg                   Scalar)>;
97*13fbcb42Sjoerg    def a  : CDEIntrinsic<u32, !con(cp, (args u32:$acc), argsReg32, argsImm),
98*13fbcb42Sjoerg          (bitcast !con((CDEIRInt<NAME # "a", [f32]> $cp,
99*13fbcb42Sjoerg                         (bitcast $acc, FScalar)), cgArgs, (? $imm)), Scalar)>;
100*13fbcb42Sjoerg  }
101*13fbcb42Sjoerg  let pnt = PNT_None, params = [u64] in {
102*13fbcb42Sjoerg    def d  : CDEIntrinsic<u64, !con(cp, argsReg64, argsImm),
103*13fbcb42Sjoerg          (bitcast !con((CDEIRInt<NAME, [f64]> $cp), cgArgs, (? $imm)),
104*13fbcb42Sjoerg                   Scalar)>;
105*13fbcb42Sjoerg    def da : CDEIntrinsic<u64, !con(cp, (args u64:$acc), argsReg64, argsImm),
106*13fbcb42Sjoerg          (bitcast !con((CDEIRInt<NAME # "a", [f64]> $cp,
107*13fbcb42Sjoerg                         (bitcast $acc, FScalar)), cgArgs, (? $imm)), Scalar)>;
108*13fbcb42Sjoerg  }
109*13fbcb42Sjoerg}
110*13fbcb42Sjoerg
111*13fbcb42Sjoergdefm vcx1: CDE_VCXFP_m<(args imm_11b:$imm), (args), (args), (?)>;
112*13fbcb42Sjoergdefm vcx2: CDE_VCXFP_m<(args imm_6b:$imm), (args u32:$n), (args u64:$n),
113*13fbcb42Sjoerg                       (? (bitcast $n, FScalar))>;
114*13fbcb42Sjoergdefm vcx3: CDE_VCXFP_m<(args imm_3b:$imm),
115*13fbcb42Sjoerg                       (args u32:$n, u32:$m), (args u64:$n, u64:$m),
116*13fbcb42Sjoerg                       (? (bitcast $n, FScalar), (bitcast $m, FScalar))>;
117*13fbcb42Sjoerg
118*13fbcb42Sjoerg// VCX* instructions operating on Q vector registers
119*13fbcb42Sjoerg
120*13fbcb42Sjoergdef v16u8 : VecOf<u8>;
121*13fbcb42Sjoerg
122*13fbcb42Sjoerglet pnt = PNT_None, params = [u8] in
123*13fbcb42Sjoergdef vcx1q : CDEIntrinsic<Vector, (args imm_coproc:$cp, imm_12b:$imm),
124*13fbcb42Sjoerg                         (CDEIRInt<"vcx1q"> $cp, $imm)>;
125*13fbcb42Sjoerg
126*13fbcb42Sjoerglet pnt = PNT_Type, params = T.All, polymorphicOnly = 1 in {
127*13fbcb42Sjoerg  def vcx1qa :
128*13fbcb42Sjoerg    CDEIntrinsic<Vector, (args imm_coproc:$cp, Vector:$acc, imm_12b:$imm),
129*13fbcb42Sjoerg            (bitcast (CDEIRInt<"vcx1qa"> $cp, (bitcast $acc, v16u8), $imm),
130*13fbcb42Sjoerg                     Vector)>;
131*13fbcb42Sjoerg
132*13fbcb42Sjoerg  def vcx2q :
133*13fbcb42Sjoerg    CDEIntrinsic<Vector, (args imm_coproc:$cp, Vector:$n, imm_7b:$imm),
134*13fbcb42Sjoerg            (bitcast (CDEIRInt<"vcx2q"> $cp, (bitcast $n, VecOf<u8>), $imm),
135*13fbcb42Sjoerg                      Vector)>;
136*13fbcb42Sjoerg  def vcx2q_u8 :
137*13fbcb42Sjoerg    CDEIntrinsic<v16u8, (args imm_coproc:$cp, Vector:$n, imm_7b:$imm),
138*13fbcb42Sjoerg            (CDEIRInt<"vcx2q"> $cp, (bitcast $n, VecOf<u8>), $imm)>;
139*13fbcb42Sjoerg
140*13fbcb42Sjoerg  def vcx2qa_impl :
141*13fbcb42Sjoerg    CDEIntrinsic<Vector,
142*13fbcb42Sjoerg            (args imm_coproc:$cp, Vector:$acc, v16u8:$n, imm_7b:$imm),
143*13fbcb42Sjoerg            (bitcast (CDEIRInt<"vcx2qa"> $cp, (bitcast $acc, v16u8), $n, $imm),
144*13fbcb42Sjoerg                     Vector)>;
145*13fbcb42Sjoerg
146*13fbcb42Sjoerg  def vcx3q_impl :
147*13fbcb42Sjoerg    CDEIntrinsic<Vector,
148*13fbcb42Sjoerg            (args imm_coproc:$cp, Vector:$n, v16u8:$m, imm_4b:$imm),
149*13fbcb42Sjoerg            (bitcast (CDEIRInt<"vcx3q"> $cp, (bitcast $n, v16u8), $m, $imm),
150*13fbcb42Sjoerg                     Vector)>;
151*13fbcb42Sjoerg  def vcx3q_u8_impl :
152*13fbcb42Sjoerg    CDEIntrinsic<v16u8,
153*13fbcb42Sjoerg            (args imm_coproc:$cp, Vector:$n, v16u8:$m, imm_4b:$imm),
154*13fbcb42Sjoerg            (CDEIRInt<"vcx3q"> $cp, (bitcast $n, v16u8), $m, $imm)>;
155*13fbcb42Sjoerg  def vcx3qa_impl :
156*13fbcb42Sjoerg    CDEIntrinsic<Vector,
157*13fbcb42Sjoerg            (args imm_coproc:$cp, Vector:$acc, v16u8:$n, v16u8:$m, imm_4b:$imm),
158*13fbcb42Sjoerg            (bitcast (CDEIRInt<"vcx3qa"> $cp, (bitcast $acc, v16u8), $n, $m,
159*13fbcb42Sjoerg                                         $imm),
160*13fbcb42Sjoerg                     Vector)>;
161*13fbcb42Sjoerg}
162*13fbcb42Sjoerg
163*13fbcb42Sjoerg// Reinterpret intrinsics required to implement __arm_vcx*q with 2 or 3
164*13fbcb42Sjoerg// polymorphic paramters.
165*13fbcb42Sjoerglet params = [/* no u8 */ s8, u16, s16, u32, s32, u64, s64, f16, f32],
166*13fbcb42Sjoerg    headerOnly = 1, polymorphicOnly = 1 in
167*13fbcb42Sjoergdef vreinterpretq_u8 :
168*13fbcb42Sjoerg    Intrinsic<v16u8, (args Vector:$x), (vreinterpret $x, v16u8)>;
169*13fbcb42Sjoerg
170*13fbcb42Sjoerg// We need vreinterpretq_u8_u8 to avoid doing smart tricks in the macros
171*13fbcb42Sjoerglet params = [u8], polymorphicOnly = 1 in
172*13fbcb42Sjoergdef vreinterpretq_u8_cde :
173*13fbcb42Sjoerg    CDEIntrinsic<v16u8, (args Vector:$x), (id $x)>,
174*13fbcb42Sjoerg    NameOverride<"vreinterpretq_u8">;
175*13fbcb42Sjoerg
176*13fbcb42Sjoerg
177*13fbcb42Sjoergdef vcx2qa : FunctionMacro<
178*13fbcb42Sjoerg  ["cp", "acc", "n", "imm"],
179*13fbcb42Sjoerg  "__arm_vcx2qa_impl((cp), (acc), __arm_vreinterpretq_u8(n), (imm))">;
180*13fbcb42Sjoerg
181*13fbcb42Sjoergdef vcx3q : FunctionMacro<
182*13fbcb42Sjoerg  ["cp", "n", "m", "imm"],
183*13fbcb42Sjoerg  "__arm_vcx3q_impl((cp), (n), __arm_vreinterpretq_u8(m), (imm))">;
184*13fbcb42Sjoergdef vcx3q_u8 : FunctionMacro<
185*13fbcb42Sjoerg  ["cp", "n", "m", "imm"],
186*13fbcb42Sjoerg  "__arm_vcx3q_u8_impl((cp), (n), __arm_vreinterpretq_u8(m), (imm))">;
187*13fbcb42Sjoergdef vcx3qa : FunctionMacro<
188*13fbcb42Sjoerg  ["cp", "acc", "n", "m", "imm"],
189*13fbcb42Sjoerg  "__arm_vcx3qa_impl((cp), (acc), __arm_vreinterpretq_u8(n), "
190*13fbcb42Sjoerg                     "__arm_vreinterpretq_u8(m), (imm))">;
191*13fbcb42Sjoerg
192*13fbcb42Sjoergclass CDEIntrinsicMasked<string irname, dag argsReg, dag imm, dag cgArgs>
193*13fbcb42Sjoerg  : CDEIntrinsic<Vector,
194*13fbcb42Sjoerg      !con((args imm_coproc:$cp, Vector:$inactive_or_acc),
195*13fbcb42Sjoerg           argsReg, imm, (args Predicate:$pred)),
196*13fbcb42Sjoerg      !con((CDEIRInt<irname # "_predicated", [Vector,Predicate]>
197*13fbcb42Sjoerg            $cp, $inactive_or_acc), cgArgs, (? $imm, $pred))> {
198*13fbcb42Sjoerg  let params = T.All;
199*13fbcb42Sjoerg  let polymorphicOnly = 1;
200*13fbcb42Sjoerg}
201*13fbcb42Sjoerg
202*13fbcb42Sjoergdef vcx1q_m : CDEIntrinsicMasked<"vcx1q", (args), (args imm_12b:$imm), (?)>;
203*13fbcb42Sjoergdef vcx1qa_m : CDEIntrinsicMasked<"vcx1qa", (args), (args imm_12b:$imm), (?)>;
204*13fbcb42Sjoerg
205*13fbcb42Sjoergmulticlass VCXPredicated<dag argsReg, dag imm, dag cgArgs,
206*13fbcb42Sjoerg                         list<string> macroArgs, string macro> {
207*13fbcb42Sjoerg  def _m_impl : CDEIntrinsicMasked<NAME, argsReg, imm, cgArgs>;
208*13fbcb42Sjoerg  def a_m_impl : CDEIntrinsicMasked<NAME#"a", argsReg, imm, cgArgs>;
209*13fbcb42Sjoerg
210*13fbcb42Sjoerg  def _m: FunctionMacro<
211*13fbcb42Sjoerg    !listconcat(["cp", "inactive"], macroArgs, ["imm", "pred"]),
212*13fbcb42Sjoerg    "__arm_"#NAME#"_m_impl((cp), (inactive), "#macro#" (imm), (pred))">;
213*13fbcb42Sjoerg  def a_m: FunctionMacro<
214*13fbcb42Sjoerg    !listconcat(["cp", "acc"], macroArgs, ["imm", "pred"]),
215*13fbcb42Sjoerg    "__arm_"#NAME#"a_m_impl((cp), (acc), "#macro#" (imm), (pred))">;
216*13fbcb42Sjoerg}
217*13fbcb42Sjoerg
218*13fbcb42Sjoergdefm vcx2q :
219*13fbcb42Sjoerg  VCXPredicated<(args v16u8:$n), (args imm_7b:$imm), (? $n), ["n"],
220*13fbcb42Sjoerg                "__arm_vreinterpretq_u8(n),">;
221*13fbcb42Sjoergdefm vcx3q :
222*13fbcb42Sjoerg  VCXPredicated<(args v16u8:$n, v16u8:$m), (args imm_4b:$imm), (? $n, $m),
223*13fbcb42Sjoerg                ["n", "m"], "__arm_vreinterpretq_u8(n), "
224*13fbcb42Sjoerg                            "__arm_vreinterpretq_u8(m),">;
225*13fbcb42Sjoerg
226*13fbcb42Sjoerg// vreinterpretq intrinsics required by the ACLE CDE specification
227*13fbcb42Sjoerg
228*13fbcb42Sjoergforeach desttype = [/* no u8 */ s8, u16, s16, u32, s32, u64, s64, f16, f32] in {
229*13fbcb42Sjoerg  let params = [u8], headerOnly = 1, pnt = PNT_None in
230*13fbcb42Sjoerg  def "vreinterpretq_" # desttype : Intrinsic<
231*13fbcb42Sjoerg    VecOf<desttype>, (args Vector:$x), (vreinterpret $x, VecOf<desttype>)>;
232*13fbcb42Sjoerg}
233