1 //== llvm/CodeGen/GlobalISel/LegalizerHelper.h ---------------- -*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file A pass to convert the target-illegal operations created by IR -> MIR
10 /// translation into ones the target expects to be able to select. This may
11 /// occur in multiple phases, for example G_ADD <2 x i8> -> G_ADD <2 x i16> ->
12 /// G_ADD <4 x i16>.
13 ///
14 /// The LegalizerHelper class is where most of the work happens, and is
15 /// designed to be callable from other passes that find themselves with an
16 /// illegal instruction.
17 //
18 //===----------------------------------------------------------------------===//
19 
20 #ifndef LLVM_CODEGEN_GLOBALISEL_LEGALIZERHELPER_H
21 #define LLVM_CODEGEN_GLOBALISEL_LEGALIZERHELPER_H
22 
23 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
24 #include "llvm/CodeGen/RuntimeLibcalls.h"
25 #include "llvm/CodeGen/TargetOpcodes.h"
26 
27 namespace llvm {
28 // Forward declarations.
29 class APInt;
30 class GAnyLoad;
31 class GLoadStore;
32 class GStore;
33 class GenericMachineInstr;
34 class MachineFunction;
35 class MachineIRBuilder;
36 class MachineInstr;
37 class MachineInstrBuilder;
38 struct MachinePointerInfo;
39 template <typename T> class SmallVectorImpl;
40 class LegalizerInfo;
41 class MachineRegisterInfo;
42 class GISelChangeObserver;
43 class LostDebugLocObserver;
44 class TargetLowering;
45 
46 class LegalizerHelper {
47 public:
48   /// Expose MIRBuilder so clients can set their own RecordInsertInstruction
49   /// functions
50   MachineIRBuilder &MIRBuilder;
51 
52   /// To keep track of changes made by the LegalizerHelper.
53   GISelChangeObserver &Observer;
54 
55 private:
56   MachineRegisterInfo &MRI;
57   const LegalizerInfo &LI;
58   const TargetLowering &TLI;
59 
60 public:
61   enum LegalizeResult {
62     /// Instruction was already legal and no change was made to the
63     /// MachineFunction.
64     AlreadyLegal,
65 
66     /// Instruction has been legalized and the MachineFunction changed.
67     Legalized,
68 
69     /// Some kind of error has occurred and we could not legalize this
70     /// instruction.
71     UnableToLegalize,
72   };
73 
74   /// Expose LegalizerInfo so the clients can re-use.
getLegalizerInfo()75   const LegalizerInfo &getLegalizerInfo() const { return LI; }
getTargetLowering()76   const TargetLowering &getTargetLowering() const { return TLI; }
77 
78   LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer,
79                   MachineIRBuilder &B);
80   LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
81                   GISelChangeObserver &Observer, MachineIRBuilder &B);
82 
83   /// Replace \p MI by a sequence of legal instructions that can implement the
84   /// same operation. Note that this means \p MI may be deleted, so any iterator
85   /// steps should be performed before calling this function. \p Helper should
86   /// be initialized to the MachineFunction containing \p MI.
87   ///
88   /// Considered as an opaque blob, the legal code will use and define the same
89   /// registers as \p MI.
90   LegalizeResult legalizeInstrStep(MachineInstr &MI,
91                                    LostDebugLocObserver &LocObserver);
92 
93   /// Legalize an instruction by emiting a runtime library call instead.
94   LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver);
95 
96   /// Legalize an instruction by reducing the width of the underlying scalar
97   /// type.
98   LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy);
99 
100   /// Legalize an instruction by performing the operation on a wider scalar type
101   /// (for example a 16-bit addition can be safely performed at 32-bits
102   /// precision, ignoring the unused bits).
103   LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy);
104 
105   /// Legalize an instruction by replacing the value type
106   LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
107 
108   /// Legalize an instruction by splitting it into simpler parts, hopefully
109   /// understood by the target.
110   LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
111 
112   /// Legalize a vector instruction by splitting into multiple components, each
113   /// acting on the same scalar type as the original but with fewer elements.
114   LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
115                                      LLT NarrowTy);
116 
117   /// Legalize a vector instruction by increasing the number of vector elements
118   /// involved and ignoring the added elements later.
119   LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
120                                     LLT MoreTy);
121 
122   /// Cast the given value to an LLT::scalar with an equivalent size. Returns
123   /// the register to use if an instruction was inserted. Returns the original
124   /// register if no coercion was necessary.
125   //
126   // This may also fail and return Register() if there is no legal way to cast.
127   Register coerceToScalar(Register Val);
128 
129   /// Legalize a single operand \p OpIdx of the machine instruction \p MI as a
130   /// Use by extending the operand's type to \p WideTy using the specified \p
131   /// ExtOpcode for the extension instruction, and replacing the vreg of the
132   /// operand in place.
133   void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx,
134                       unsigned ExtOpcode);
135 
136   /// Legalize a single operand \p OpIdx of the machine instruction \p MI as a
137   /// Use by truncating the operand's type to \p NarrowTy using G_TRUNC, and
138   /// replacing the vreg of the operand in place.
139   void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx);
140 
141   /// Legalize a single operand \p OpIdx of the machine instruction \p MI as a
142   /// Def by extending the operand's type to \p WideTy and truncating it back
143   /// with the \p TruncOpcode, and replacing the vreg of the operand in place.
144   void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx = 0,
145                       unsigned TruncOpcode = TargetOpcode::G_TRUNC);
146 
147   // Legalize a single operand \p OpIdx of the machine instruction \p MI as a
148   // Def by truncating the operand's type to \p NarrowTy, replacing in place and
149   // extending back with \p ExtOpcode.
150   void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx,
151                        unsigned ExtOpcode);
152   /// Legalize a single operand \p OpIdx of the machine instruction \p MI as a
153   /// Def by performing it with additional vector elements and extracting the
154   /// result elements, and replacing the vreg of the operand in place.
155   void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx);
156 
157   /// Legalize a single operand \p OpIdx of the machine instruction \p MI as a
158   /// Use by producing a vector with undefined high elements, extracting the
159   /// original vector type, and replacing the vreg of the operand in place.
160   void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx);
161 
162   /// Legalize a single operand \p OpIdx of the machine instruction \p MI as a
163   /// use by inserting a G_BITCAST to \p CastTy
164   void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx);
165 
166   /// Legalize a single operand \p OpIdx of the machine instruction \p MI as a
167   /// def by inserting a G_BITCAST from \p CastTy
168   void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx);
169 
170 private:
171   LegalizeResult
172   widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, LLT WideTy);
173   LegalizeResult
174   widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT WideTy);
175   LegalizeResult
176   widenScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT WideTy);
177   LegalizeResult
178   widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT WideTy);
179   LegalizeResult widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
180                                            LLT WideTy);
181   LegalizeResult widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
182                                          LLT WideTy);
183   LegalizeResult widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
184                                  LLT WideTy);
185 
186   /// Helper function to split a wide generic register into bitwise blocks with
187   /// the given Type (which implies the number of blocks needed). The generic
188   /// registers created are appended to Ops, starting at bit 0 of Reg.
189   void extractParts(Register Reg, LLT Ty, int NumParts,
190                     SmallVectorImpl<Register> &VRegs);
191 
192   /// Version which handles irregular splits.
193   bool extractParts(Register Reg, LLT RegTy, LLT MainTy,
194                     LLT &LeftoverTy,
195                     SmallVectorImpl<Register> &VRegs,
196                     SmallVectorImpl<Register> &LeftoverVRegs);
197 
198   /// Version which handles irregular sub-vector splits.
199   void extractVectorParts(Register Reg, unsigned NumElst,
200                           SmallVectorImpl<Register> &VRegs);
201 
202   /// Helper function to build a wide generic register \p DstReg of type \p
203   /// RegTy from smaller parts. This will produce a G_MERGE_VALUES,
204   /// G_BUILD_VECTOR, G_CONCAT_VECTORS, or sequence of G_INSERT as appropriate
205   /// for the types.
206   ///
207   /// \p PartRegs must be registers of type \p PartTy.
208   ///
209   /// If \p ResultTy does not evenly break into \p PartTy sized pieces, the
210   /// remainder must be specified with \p LeftoverRegs of type \p LeftoverTy.
211   void insertParts(Register DstReg, LLT ResultTy,
212                    LLT PartTy, ArrayRef<Register> PartRegs,
213                    LLT LeftoverTy = LLT(), ArrayRef<Register> LeftoverRegs = {});
214 
215   /// Merge \p PartRegs with different types into \p DstReg.
216   void mergeMixedSubvectors(Register DstReg, ArrayRef<Register> PartRegs);
217 
218   void appendVectorElts(SmallVectorImpl<Register> &Elts, Register Reg);
219 
220   /// Unmerge \p SrcReg into smaller sized values, and append them to \p
221   /// Parts. The elements of \p Parts will be the greatest common divisor type
222   /// of \p DstTy, \p NarrowTy and the type of \p SrcReg. This will compute and
223   /// return the GCD type.
224   LLT extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
225                      LLT NarrowTy, Register SrcReg);
226 
227   /// Unmerge \p SrcReg into \p GCDTy typed registers. This will append all of
228   /// the unpacked registers to \p Parts. This version is if the common unmerge
229   /// type is already known.
230   void extractGCDType(SmallVectorImpl<Register> &Parts, LLT GCDTy,
231                       Register SrcReg);
232 
233   /// Produce a merge of values in \p VRegs to define \p DstReg. Perform a merge
234   /// from the least common multiple type, and convert as appropriate to \p
235   /// DstReg.
236   ///
237   /// \p VRegs should each have type \p GCDTy. This type should be greatest
238   /// common divisor type of \p DstReg, \p NarrowTy, and an undetermined source
239   /// type.
240   ///
241   /// \p NarrowTy is the desired result merge source type. If the source value
242   /// needs to be widened to evenly cover \p DstReg, inserts high bits
243   /// corresponding to the extension opcode \p PadStrategy.
244   ///
245   /// \p VRegs will be cleared, and the the result \p NarrowTy register pieces
246   /// will replace it. Returns The complete LCMTy that \p VRegs will cover when
247   /// merged.
248   LLT buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
249                           SmallVectorImpl<Register> &VRegs,
250                           unsigned PadStrategy = TargetOpcode::G_ANYEXT);
251 
252   /// Merge the values in \p RemergeRegs to an \p LCMTy typed value. Extract the
253   /// low bits into \p DstReg. This is intended to use the outputs from
254   /// buildLCMMergePieces after processing.
255   void buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
256                                 ArrayRef<Register> RemergeRegs);
257 
258   /// Perform generic multiplication of values held in multiple registers.
259   /// Generated instructions use only types NarrowTy and i1.
260   /// Destination can be same or two times size of the source.
261   void multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
262                          ArrayRef<Register> Src1Regs,
263                          ArrayRef<Register> Src2Regs, LLT NarrowTy);
264 
265   void changeOpcode(MachineInstr &MI, unsigned NewOpcode);
266 
267   LegalizeResult tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
268                                         LLT SrcTy, LLT NarrowTy,
269                                         unsigned ScalarOpc);
270 
271   // Memcpy family legalization helpers.
272   LegalizeResult lowerMemset(MachineInstr &MI, Register Dst, Register Val,
273                              uint64_t KnownLen, Align Alignment,
274                              bool IsVolatile);
275   LegalizeResult lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
276                                    uint64_t KnownLen, Align DstAlign,
277                                    Align SrcAlign, bool IsVolatile);
278   LegalizeResult lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
279                              uint64_t KnownLen, uint64_t Limit, Align DstAlign,
280                              Align SrcAlign, bool IsVolatile);
281   LegalizeResult lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
282                               uint64_t KnownLen, Align DstAlign, Align SrcAlign,
283                               bool IsVolatile);
284 
285 public:
286   /// Return the alignment to use for a stack temporary object with the given
287   /// type.
288   Align getStackTemporaryAlignment(LLT Type, Align MinAlign = Align()) const;
289 
290   /// Create a stack temporary based on the size in bytes and the alignment
291   MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment,
292                                            MachinePointerInfo &PtrInfo);
293 
294   /// Get a pointer to vector element \p Index located in memory for a vector of
295   /// type \p VecTy starting at a base address of \p VecPtr. If \p Index is out
296   /// of bounds the returned pointer is unspecified, but will be within the
297   /// vector bounds.
298   Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index);
299 
300   /// Handles most opcodes. Split \p MI into same instruction on sub-vectors or
301   /// scalars with \p NumElts elements (1 for scalar). Supports uneven splits:
302   /// there can be leftover sub-vector with fewer then \p NumElts or a leftover
303   /// scalar. To avoid this use moreElements first and set MI number of elements
304   /// to multiple of \p NumElts. Non-vector operands that should be used on all
305   /// sub-instructions without split are listed in \p NonVecOpIndices.
306   LegalizeResult fewerElementsVectorMultiEltType(
307       GenericMachineInstr &MI, unsigned NumElts,
308       std::initializer_list<unsigned> NonVecOpIndices = {});
309 
310   LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI,
311                                         unsigned NumElts);
312 
313   LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
314                                        LLT MoreTy);
315   LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx,
316                                            LLT MoreTy);
317 
318   LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI,
319                                                   unsigned TypeIdx,
320                                                   LLT NarrowTy);
321   LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
322                                           LLT NarrowTy);
323   LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
324                                                            unsigned TypeIdx,
325                                                            LLT NarrowTy);
326 
327   LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx,
328                                       LLT NarrowTy);
329 
330   LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
331                                              LLT HalfTy, LLT ShiftAmtTy);
332 
333   LegalizeResult fewerElementsVectorReductions(MachineInstr &MI,
334                                                unsigned TypeIdx, LLT NarrowTy);
335 
336   LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx,
337                                             LLT NarrowTy);
338 
339   LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
340   LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx,
341                                     LLT NarrowTy);
342   LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty);
343   LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
344   LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
345   LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
346 
347   LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
348   LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
349   LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
350   LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
351   LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
352   LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
353 
354   /// Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
355   LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
356                                          LLT CastTy);
357 
358   /// Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
359   LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
360                                         LLT CastTy);
361 
362   LegalizeResult lowerBitcast(MachineInstr &MI);
363   LegalizeResult lowerLoad(GAnyLoad &MI);
364   LegalizeResult lowerStore(GStore &MI);
365   LegalizeResult lowerBitCount(MachineInstr &MI);
366   LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI);
367   LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI);
368   LegalizeResult lowerFunnelShift(MachineInstr &MI);
369   LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI);
370   LegalizeResult lowerRotate(MachineInstr &MI);
371 
372   LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI);
373   LegalizeResult lowerUITOFP(MachineInstr &MI);
374   LegalizeResult lowerSITOFP(MachineInstr &MI);
375   LegalizeResult lowerFPTOUI(MachineInstr &MI);
376   LegalizeResult lowerFPTOSI(MachineInstr &MI);
377 
378   LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI);
379   LegalizeResult lowerFPTRUNC(MachineInstr &MI);
380   LegalizeResult lowerFPOWI(MachineInstr &MI);
381 
382   LegalizeResult lowerISFPCLASS(MachineInstr &MI);
383 
384   LegalizeResult lowerMinMax(MachineInstr &MI);
385   LegalizeResult lowerFCopySign(MachineInstr &MI);
386   LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI);
387   LegalizeResult lowerFMad(MachineInstr &MI);
388   LegalizeResult lowerIntrinsicRound(MachineInstr &MI);
389   LegalizeResult lowerFFloor(MachineInstr &MI);
390   LegalizeResult lowerMergeValues(MachineInstr &MI);
391   LegalizeResult lowerUnmergeValues(MachineInstr &MI);
392   LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI);
393   LegalizeResult lowerShuffleVector(MachineInstr &MI);
394   LegalizeResult lowerDynStackAlloc(MachineInstr &MI);
395   LegalizeResult lowerExtract(MachineInstr &MI);
396   LegalizeResult lowerInsert(MachineInstr &MI);
397   LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI);
398   LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI);
399   LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI);
400   LegalizeResult lowerShlSat(MachineInstr &MI);
401   LegalizeResult lowerBswap(MachineInstr &MI);
402   LegalizeResult lowerBitreverse(MachineInstr &MI);
403   LegalizeResult lowerReadWriteRegister(MachineInstr &MI);
404   LegalizeResult lowerSMULH_UMULH(MachineInstr &MI);
405   LegalizeResult lowerSelect(MachineInstr &MI);
406   LegalizeResult lowerDIVREM(MachineInstr &MI);
407   LegalizeResult lowerAbsToAddXor(MachineInstr &MI);
408   LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI);
409   LegalizeResult lowerVectorReduction(MachineInstr &MI);
410   LegalizeResult lowerMemcpyInline(MachineInstr &MI);
411   LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0);
412 };
413 
414 /// Helper function that creates a libcall to the given \p Name using the given
415 /// calling convention \p CC.
416 LegalizerHelper::LegalizeResult
417 createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
418               const CallLowering::ArgInfo &Result,
419               ArrayRef<CallLowering::ArgInfo> Args, CallingConv::ID CC);
420 
421 /// Helper function that creates the given libcall.
422 LegalizerHelper::LegalizeResult
423 createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
424               const CallLowering::ArgInfo &Result,
425               ArrayRef<CallLowering::ArgInfo> Args);
426 
427 /// Create a libcall to memcpy et al.
428 LegalizerHelper::LegalizeResult
429 createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
430                  MachineInstr &MI, LostDebugLocObserver &LocObserver);
431 
432 } // End namespace llvm.
433 
434 #endif
435