1//===- arm_mve_defs.td - definitions and infrastructure for arm_mve.td ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The definitions in this file are designed to work in close conjunction with
10// clang/utils/TableGen/MveEmitter.cpp. Comments in there will probably be
11// useful as well.
12//
13//===----------------------------------------------------------------------===//
14
15// -----------------------------------------------------------------------------
16// Forward declarations.
17class Type;
18
19// -----------------------------------------------------------------------------
20// Dummy record used as the dag operator for the argument list of an intrinsic.
21//
22// We store arguments as a dag rather than a list<Type> so that we can give
23// each one a name, to be used in codegen. For example, (args Vector:$a,
24// Scalar:$b) defines the names $a and $b which the specification of the code
25// for that intrinsic can refer to.
26
27def args;
28
29// -----------------------------------------------------------------------------
30// Family of nodes for use in the codegen dag for an intrinsic, corresponding
31// to function calls that return LLVM IR nodes.
32class IRBuilderParam<int index_> { int index = index_; }
33class IRBuilderAddrParam<int index_> : IRBuilderParam<index_>;
34class IRBuilderIntParam<int index_, string type_> : IRBuilderParam<index_> {
35  string type = type_;
36}
37class IRBuilderBase {
38  // The prefix of the function call, including an open parenthesis.
39  string prefix;
40
41  // Any parameters that have types that have to be treated specially by the
42  // Tablegen back end. Generally these will be types other than llvm::Value *,
43  // although not all other types need special treatment (e.g. llvm::Type *).
44  list<IRBuilderParam> special_params = [];
45}
46class IRBuilder<string func> : IRBuilderBase {
47  // The usual case: a method called on the code gen function's instance of
48  // llvm::IRBuilder.
49  let prefix = "Builder." # func # "(";
50}
51class IRFunction<string func> : IRBuilderBase {
52  // Some other function that doesn't use the IRBuilder at all.
53  let prefix = func # "(";
54}
55class CGHelperFn<string func> : IRBuilderBase {
56  // A helper function defined in CGBuiltin.cpp, which takes the IRBuilder as
57  // an argument.
58  let prefix = func # "(Builder, ";
59}
60def add: IRBuilder<"CreateAdd">;
61def mul: IRBuilder<"CreateMul">;
62def not: IRBuilder<"CreateNot">;
63def or: IRBuilder<"CreateOr">;
64def and: IRBuilder<"CreateAnd">;
65def xor: IRBuilder<"CreateXor">;
66def sub: IRBuilder<"CreateSub">;
67def shl: IRBuilder<"CreateShl">;
68def lshr: IRBuilder<"CreateLShr">;
69def immshr: CGHelperFn<"MVEImmediateShr"> {
70  let special_params = [IRBuilderIntParam<1, "unsigned">,
71                        IRBuilderIntParam<2, "bool">];
72}
73def fadd: IRBuilder<"CreateFAdd">;
74def fmul: IRBuilder<"CreateFMul">;
75def fsub: IRBuilder<"CreateFSub">;
76def load: IRBuilder<"CreateLoad"> {
77  let special_params = [IRBuilderAddrParam<0>];
78}
79def store: IRBuilder<"CreateStore"> {
80  let special_params = [IRBuilderAddrParam<1>];
81}
82def xval: IRBuilder<"CreateExtractValue"> {
83  let special_params = [IRBuilderIntParam<1, "unsigned">];
84}
85def ielt_const: IRBuilder<"CreateInsertElement"> {
86  let special_params = [IRBuilderIntParam<2, "uint64_t">];
87}
88def ielt_var: IRBuilder<"CreateInsertElement">;
89def xelt_var: IRBuilder<"CreateExtractElement">;
90def trunc: IRBuilder<"CreateTrunc">;
91def bitcast: IRBuilder<"CreateBitCast">;
92def extend: CGHelperFn<"SignOrZeroExtend"> {
93  let special_params = [IRBuilderIntParam<2, "bool">];
94}
95def zeroinit: IRFunction<"llvm::Constant::getNullValue">;
96def undef: IRFunction<"UndefValue::get">;
97def icmp_eq: IRBuilder<"CreateICmpEQ">;
98def icmp_ne: IRBuilder<"CreateICmpNE">;
99def icmp_ugt: IRBuilder<"CreateICmpUGT">;
100def icmp_uge: IRBuilder<"CreateICmpUGE">;
101def icmp_ult: IRBuilder<"CreateICmpULT">;
102def icmp_ule: IRBuilder<"CreateICmpULE">;
103def icmp_sgt: IRBuilder<"CreateICmpSGT">;
104def icmp_sge: IRBuilder<"CreateICmpSGE">;
105def icmp_slt: IRBuilder<"CreateICmpSLT">;
106def icmp_sle: IRBuilder<"CreateICmpSLE">;
107def fcmp_eq: IRBuilder<"CreateFCmpOEQ">;
108def fcmp_ne: IRBuilder<"CreateFCmpUNE">; // not O: it must return true on NaNs
109def fcmp_gt: IRBuilder<"CreateFCmpOGT">;
110def fcmp_ge: IRBuilder<"CreateFCmpOGE">;
111def fcmp_lt: IRBuilder<"CreateFCmpOLT">;
112def fcmp_le: IRBuilder<"CreateFCmpOLE">;
113def splat: CGHelperFn<"ARMMVEVectorSplat">;
114def select: IRBuilder<"CreateSelect">;
115
116// A node that makes an Address out of a pointer-typed Value, by
117// providing an alignment as the second argument.
118def address;
119
120// Another node class you can use in the codegen dag. This one corresponds to
121// an IR intrinsic function, which has to be specialized to a particular list
122// of types.
123class IRIntBase<string name_, list<Type> params_ = [], bit appendKind_ = 0> {
124  string intname = name_;       // base name of the intrinsic
125  list<Type> params = params_;  // list of parameter types
126
127  // If this flag is set, then the IR intrinsic name will get a suffix _s, _u
128  // or _f depending on whether the main parameter type of the ACLE intrinsic
129  // being generated is a signed integer, unsigned integer, or float. Mostly
130  // this is useful for signed vs unsigned integers, because the ACLE
131  // intrinsics and the source-level integer types distinguish them, but at IR
132  // level the distinction has moved from the type system into the operations
133  // and you just have i32 or i16 etc. So when an IR intrinsic has to vary with
134  // signedness, you set this bit, and then you can still put the signed and
135  // unsigned versions in the same subclass of Intrinsic, and the Tablegen
136  // backend will take care of adding _s or _u as appropriate in each instance.
137  bit appendKind = appendKind_;
138}
139
140// Mostly we'll be using @llvm.arm.mve.* intrinsics, so here's a trivial
141// subclass that puts on that prefix.
142class IRInt<string name, list<Type> params = [], bit appendKind = 0>
143      : IRIntBase<"arm_mve_" # name, params, appendKind>;
144
145// The 'seq' node in a codegen dag specifies a set of IR operations to be
146// performed in order. It has the special ability to define extra variable
147// names, on top of the ones that refer to the intrinsic's parameters. For
148// example:
149//
150//   (seq (foo this, that):$a,
151//        (bar this, $a):$b
152//        (add $a, $b))
153//
154// defines the name $a to refer to the return value of the 'foo' operation;
155// then the 'bar' operation uses $a as one of its arguments, and the return
156// value of that is assigned the name $b; finally, $a and $b are added to give
157// the return value of the seq construction as a whole.
158def seq;
159
160// Another magic operation is 'unsignedflag', which you give a scalar
161// _type_ as an argument, and it expands into 1 for an unsigned type
162// and 0 for a signed (or floating) one.
163def unsignedflag;
164
165// If you put CustomCodegen<"foo"> in an intrinsic's codegen field, it
166// indicates that the IR generation for that intrinsic is done by handwritten
167// C++ and not autogenerated at all. The effect in the MVE builtin codegen
168// function is to break out of the main switch and fall through to the
169// manual-codegen cases below it, having set the CustomCodeGenType enumerated
170// variable to the value given by the 'type' string here.
171class CustomCodegen<string type_> { string type = type_; }
172
173// -----------------------------------------------------------------------------
174// System for building up complex instances of Type from simple ones.
175
176// ComplexType is used to represent any more complicated type: vectors,
177// multivectors, pointers etc. Its dag argument specifies how the type should
178// be constructed from simpler types. The operator of the dag will always be an
179// instance of ComplexTypeOp, defined below.
180class ComplexType<dag spec_>: Type { dag spec = spec_; }
181
182// Operators you can use in the ComplexType spec dag. These are an intermediate
183// layer, interpreted by MveEmitter::getType() in the Tablegen backend, and
184// only used in the definitions below. Actual intrinsic definitions in
185// arm_mve.td will use the defs defined below here.
186class ComplexTypeOp;
187def CTO_Parameter: ComplexTypeOp;
188def CTO_Vec: ComplexTypeOp;
189def CTO_Pred: ComplexTypeOp;
190class CTO_Tuple<int n_>: ComplexTypeOp { int n = n_; }
191class CTO_Pointer<bit const_>: ComplexTypeOp { bit const = const_; }
192def CTO_CopyKind: ComplexTypeOp;
193class CTO_ScaleSize<int num_, int denom_>: ComplexTypeOp {
194  int num = num_;
195  int denom = denom_;
196}
197
198// -----------------------------------------------------------------------------
199// Instances of Type intended to be used directly in the specification of an
200// intrinsic in arm_mve.td.
201
202// The type Void can be used for the return type of an intrinsic, and as the
203// parameter type for intrinsics that aren't actually parameterised by any kind
204// of _s32 / _f16 / _u8 suffix.
205def Void : Type;
206
207// A wrapper you can put on an intrinsic's argument type to prevent it from
208// being automatically promoted to i32 from a smaller integer type.
209class unpromoted<Type t> : Type { Type underlying_type = t; }
210
211// Primitive types: base class, and an instance for the set of scalar integer
212// and floating types that MVE uses.
213class PrimitiveType<string kind_, int size_>: Type {
214  string kind = kind_;
215  int size = size_;
216  string nameOverride = "";
217}
218
219// The type records defined by these foreaches have names like s32, f16, u8.
220foreach size = [8, 16, 32, 64] in
221  foreach kind = ["u", "s"] in
222    def kind # size: PrimitiveType<kind, size>;
223foreach size = [16, 32] in
224  foreach kind = ["f"] in
225    def kind # size: PrimitiveType<kind, size>;
226
227// Sometimes we need to refer to a type by a different name in C, when
228// ACLE defines a function parameter to be something like 'unsigned'
229// rather than uint32_t.
230def uint: PrimitiveType<"u", 32> { let nameOverride = "unsigned"; }
231def sint: PrimitiveType<"s", 32> { let nameOverride = "int"; }
232
233// VecOf<t> expects t to be a scalar, and gives a 128-bit vector of whatever it
234// is.
235class VecOf<Type t>: ComplexType<(CTO_Vec t)>;
236
237// NarrowedVecOf<t,v> expects t to be a scalar type, and v to be a vector
238// type. It returns a vector type whose element type is t, and whose lane
239// count is the same as the lane count of v. (Used as an intermediate value
240// type in the IR representation of a widening load: you load a vector of
241// small things out of memory, and then zext/sext them into a full 128-bit
242// output vector.)
243class NarrowedVecOf<Type t, Type v>: ComplexType<(CTO_Vec t, v)>;
244
245// PredOf expects t to be a scalar, and expands to a predicate vector which
246// (logically speaking) has the same number of lanes as VecOf<t> would.
247class PredOf<Type t>: ComplexType<(CTO_Pred t)>;
248
249// Scalar expands to whatever is the main parameter type of the current
250// intrinsic. Vector and Predicate expand to the vector and predicate types
251// corresponding to that.
252def Scalar: ComplexType<(CTO_Parameter)>;
253def Vector: VecOf<Scalar>;
254def Predicate: PredOf<Scalar>;
255
256// MultiVector<n> expands to a type containing n instances of Vector. (There's
257// no need to define this for a general underlying vector type, since it's only
258// used by vld2q and friends, which don't need that generality.)
259class MultiVector<int n>: ComplexType<(CTO_Tuple<n> Vector)>;
260
261// Ptr<t> and CPtr<t> expand to a pointer to t, or a pointer to const t,
262// respectively.
263class Ptr<Type t>: ComplexType<(CTO_Pointer<0> t)>;
264class CPtr<Type t>: ComplexType<(CTO_Pointer<1> t)>;
265
266// CopyKind<s,k> expects s and k to be scalar types. It returns a scalar type
267// whose kind (signed, unsigned or float) matches that of k, and whose size
268// matches that of s.
269class CopyKind<Type s, Type k>: ComplexType<(CTO_CopyKind s, k)>;
270
271// DoubleSize<k> expects k to be a scalar type. It returns a scalar type
272// whose kind (signed, unsigned or float) matches that of k, and whose size
273// is double that of k, if possible.
274class DoubleSize<Type k> : ComplexType<(CTO_ScaleSize<2, 1> k)>;
275class HalfSize<Type k>   : ComplexType<(CTO_ScaleSize<1, 2> k)>;
276
277// Unsigned<t> expects t to be a scalar type, and expands to the unsigned
278// integer scalar of the same size. So it returns u16 if you give it s16 or
279// f16 (or u16 itself). Similarly, Signed<t> makes the type signed.
280class Unsigned<Type t>: ComplexType<(CTO_CopyKind t, u32)>;
281class Signed<Type t>: ComplexType<(CTO_CopyKind t, s32)>;
282
283// UScalar and UVector expand to the unsigned-integer versions of
284// Scalar and Vector. SScalar and SVector are signed-integer versions.
285def UScalar: Unsigned<Scalar>;
286def UVector: VecOf<UScalar>;
287def SScalar: Signed<Scalar>;
288def SVector: VecOf<SScalar>;
289
290// DblVector expands to a vector of scalars of size twice the size of Scalar.
291// HalfVector, similarly, expands to a vector of half-sized scalars. And
292// UHalfVector is a vector of half-sized _unsigned integers_.
293def DblVector: VecOf<DoubleSize<Scalar>>;
294def HalfVector: VecOf<HalfSize<Scalar>>;
295def UHalfVector: VecOf<Unsigned<HalfSize<Scalar>>>;
296
297// Expands to the 32-bit integer of the same signedness as Scalar.
298def Scalar32: CopyKind<u32, Scalar>;
299// Expands to the 64-bit integer of the same signedness as Scalar.
300def Scalar64: CopyKind<u64, Scalar>;
301
302// -----------------------------------------------------------------------------
303// Internal definitions for specifying immediate arguments for an intrinsic.
304
305class ImmediateBounds;
306class Immediate<Type type_, ImmediateBounds bounds_>: Type {
307  Type type = type_;
308  ImmediateBounds bounds = bounds_;
309  string extra;
310  string extraarg;
311}
312class IB_ConstRange<int lo_, int hi_> : ImmediateBounds {
313  int lo = lo_;
314  int hi = hi_;
315}
316def IB_UEltValue : ImmediateBounds;
317def IB_LaneIndex : ImmediateBounds;
318class IB_EltBit<int base_, Type type_ = Scalar> : ImmediateBounds {
319  int base = base_;
320  Type type = type_;
321}
322
323// -----------------------------------------------------------------------------
324// End-user definitions for immediate arguments.
325
326// imm_simd and imm_simd_restrictive are used for the immediate operands to
327// intrinsics like vmvnq or vorrq. imm_simd_restrictive has to be an 8-bit
328// value shifted left by a whole number of bytes; imm_simd_vmvn can also be of
329// the form 0xXXFF for some byte value XX.
330def imm_simd_restrictive : Immediate<u32, IB_UEltValue> {
331  let extra = "ShiftedByte";
332}
333def imm_simd_vmvn : Immediate<u32, IB_UEltValue> {
334  let extra = "ShiftedByteOrXXFF";
335}
336
337// imm_1toN can take any value from 1 to N inclusive, where N is the number of
338// bits in the main parameter type. (E.g. an immediate shift count, in an
339// intrinsic that shifts every lane of a vector by the same amount.)
340//
341// imm_0toNm1 is the same but with the range offset by 1, i.e. 0 to N-1
342// inclusive.
343//
344// imm_1toHalfN is like imm_1toN, but applied to a half-width type.
345// (So if Scalar is s16, for example, it'll give you the range 1 to 8.)
346def imm_1toN : Immediate<sint, IB_EltBit<1>>;
347def imm_0toNm1 : Immediate<sint, IB_EltBit<0>>;
348def imm_1toHalfN : Immediate<sint, IB_EltBit<1, HalfSize<Scalar>>>;
349
350// imm_lane has to be the index of a vector lane in the main vector type, i.e
351// it can range from 0 to (128 / size of scalar)-1 inclusive. (e.g. vgetq_lane)
352def imm_lane : Immediate<sint, IB_LaneIndex>;
353
354// imm_1to32 can be in the range 1 to 32, unconditionally. (e.g. scalar shift
355// intrinsics)
356def imm_1to32 : Immediate<sint, IB_ConstRange<1, 32>>;
357
358// imm_1248 can be 1, 2, 4 or 8. (e.g. vidupq)
359def imm_1248 : Immediate<u32, IB_ConstRange<1, 8>> {
360  let extra = "Power2";
361}
362
363// imm_mem7bit<n> is a valid immediate offset for a load/store intrinsic whose
364// memory access size is n bytes (e.g. 1 for vldrb_[whatever], 2 for vldrh,
365// ...). The set of valid immediates for these is {-127*n, ..., -1*n, 0*n, 1*n,
366// ..., 127*n}.
367class imm_mem7bit<int membytes>
368  : Immediate<sint, IB_ConstRange<!mul(membytes, -127), !mul(membytes, 127)>> {
369  let extra = !if(!eq(membytes, 1), ?, "Multiple");
370  let extraarg = !cast<string>(membytes);
371}
372
373// -----------------------------------------------------------------------------
374// Specification of ways that the full name of an intrinsic can be mapped to
375// its shorter polymorphic name.
376
377class PolymorphicNameType<int nt_, string x_> {
378  int NumTypeSuffixesToDiscard = nt_;
379  string ExtraSuffixToDiscard = x_;
380}
381
382// PNT_None: the intrinsic is not polymorphic at all, so its short name is the
383// same as its long name. (E.g. scalar shift intrinsics such as uqshl.)
384def PNT_None:   PolymorphicNameType<0, ?>;
385
386// PNT_Type: the usual case, in which the polymorphic name is made by dropping
387// the type suffix, so it ends up the same as the Tablegen record name. E.g.
388// vaddq_u16 -> vaddq.
389def PNT_Type:   PolymorphicNameType<1, ?>;
390
391// PNT_2Type: the polymorphic name is made by dropping _two_ type suffixes.
392// E.g. vcvtq_f16_u16 -> vcvtq.
393def PNT_2Type:  PolymorphicNameType<2, ?>;
394
395// PNT_NType: the polymorphic name is made by dropping an "_n" suffix and a
396// type. E.g. vaddq_n_u16 -> vaddq.
397def PNT_NType:  PolymorphicNameType<1, "n">;
398
399// PNT_NType: the polymorphic name is made by just dropping an "_n" suffix
400// (even if it isn't at the end of the name). E.g. vidupq_n_u16 -> vidupq_u16.
401def PNT_N:      PolymorphicNameType<0, "n">;
402
403// PNT_WBType: the polymorphic name is made by dropping an "_wb" suffix and a
404// type. E.g. vidupq_m_wb_u16 -> vidupq_m.
405def PNT_WBType: PolymorphicNameType<1, "wb">;
406
407// PNT_WB: the polymorphic name is made by just dropping "_wb". E.g.
408// vidupq_wb_u16 -> vidupq_u16.
409def PNT_WB:     PolymorphicNameType<0, "wb">;
410
411// -----------------------------------------------------------------------------
412// The main class Intrinsic. Define one of these for each family of ACLE
413// intrinsics which are the same apart from some final type suffix (e.g.
414// vaddq_{s8,u8,f16,...}.
415//
416// The record's name plus that type suffix is taken to be the full unambiguous
417// name of the function. Its shorter polymorphic name is constructed from that
418// in turn, in a way specified by the PolymorphicNameType system above.
419
420class Intrinsic<Type ret_, dag args_, dag codegen_> {
421  // List of parameter types to suffix to this intrinsic's name. A separate
422  // actual ACLE intrinsic will be generated for each of these. Set it to
423  // [Void] if the intrinsic is not polymorphic at all.
424  list<Type> params;
425
426  // Return type and arguments for the intrinsic.
427  Type ret = ret_;
428  dag args = args_;
429
430  // Specification of how to generate its IR.
431  dag codegen = codegen_;
432
433  // Default to PNT_Type, which is by far the most common case.
434  PolymorphicNameType pnt = PNT_Type;
435
436  // A very few intrinsics _only_ have a polymorphic name.
437  bit polymorphicOnly = 0;
438
439  // True if the builtin has to avoid evaluating its arguments.
440  bit nonEvaluating = 0;
441
442  // Use to override the suffix letter to make e.g.vfooq_p16
443  // with an override suffix letter of "p".
444  string overrideKindLetter = "";
445}
446
447// Sometimes you have to use two separate Intrinsic declarations to
448// declare intrinsics that are logically the same family (e.g. vaddq,
449// because it needs to expand to an Add or FAdd IR node depending on
450// type). For that purpose, you can derive from NameOverride to
451// specify the intrinsic's base name independently of the Tablegen
452// record name.
453
454class NameOverride<string basename_> {
455  string basename = basename_;
456}
457
458// A wrapper to define both _m and _x versions of a predicated
459// intrinsic.
460multiclass IntrinsicMX<Type rettype, dag arguments, dag cg,
461                       int wantXVariant = 1,
462                       string nameSuffix = "",
463                       PolymorphicNameType pnt_x = PNT_Type> {
464  // The _m variant takes an initial parameter called $inactive, which
465  // provides the input value of the output register, i.e. all the
466  // inactive lanes in the predicated operation take their values from
467  // this.
468  def "_m" # nameSuffix:
469     Intrinsic<rettype, !con((args rettype:$inactive), arguments), cg>;
470
471  foreach unusedVar = !if(!eq(wantXVariant, 1), [1], []<int>) in {
472    // The _x variant leaves off that parameter, and simply uses an
473    // undef value of the same type.
474    def "_x" # nameSuffix:
475       Intrinsic<rettype, arguments, (seq (undef rettype):$inactive, cg)> {
476      // Allow overriding of the polymorphic name type, because
477      // sometimes the _m and _x variants polymorph differently
478      // (typically because the type of the inactive parameter can be
479      // used as a disambiguator if it's present).
480      let pnt = pnt_x;
481    }
482  }
483}
484
485// -----------------------------------------------------------------------------
486// Convenience lists of parameter types. 'T' is just a container record, so you
487// can define a typical intrinsic with 'let Params = T.Usual', or similar,
488// instead of having to repeat a long list every time.
489
490def T {
491  list<Type> Signed = [s8, s16, s32];
492  list<Type> Unsigned = [u8, u16, u32];
493  list<Type> Int = Signed # Unsigned;
494  list<Type> Float = [f16, f32];
495  list<Type> Usual = Int # Float;
496  list<Type> Int8 = [s8, u8];
497  list<Type> Int16 = [s16, u16];
498  list<Type> Int32 = [s32, u32];
499  list<Type> Int64 = [s64, u64];
500  list<Type> Poly = [u8, u16]; // Actually p8 and p16
501  list<Type> All8 = Int8;
502  list<Type> All16 = Int16 # [f16];
503  list<Type> All32 = Int32 # [f32];
504  list<Type> All64 = Int64;
505  list<Type> All = Usual # All64;
506}
507
508// -----------------------------------------------------------------------------
509// Container record for DAG constant values. These constants are used because
510// bit/int class/multiclass parameters cannot be used to produce a dag node:
511// for example (u32 x) where x is 0 is transformed into (u32 { 0 }) by the
512// Tablegen parser.
513def V {
514  dag False = (u32 0);
515  dag True  = (u32 1);
516}
517