1 //===- subzero/src/IceTargetLoweringARM32.h - ARM32 lowering ----*- C++ -*-===//
2 //
3 //                        The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Declares the TargetLoweringARM32 class, which implements the
12 /// TargetLowering interface for the ARM 32-bit architecture.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef SUBZERO_SRC_ICETARGETLOWERINGARM32_H
17 #define SUBZERO_SRC_ICETARGETLOWERINGARM32_H
18 
19 #include "IceAssemblerARM32.h"
20 #include "IceDefs.h"
21 #include "IceInstARM32.h"
22 #include "IceRegistersARM32.h"
23 #include "IceTargetLowering.h"
24 
25 #include <utility>
26 
27 namespace Ice {
28 namespace ARM32 {
29 
30 // Class encapsulating ARM cpu features / instruction set.
31 class TargetARM32Features {
32   TargetARM32Features() = delete;
33   TargetARM32Features(const TargetARM32Features &) = delete;
34   TargetARM32Features &operator=(const TargetARM32Features &) = delete;
35 
36 public:
37   explicit TargetARM32Features(const ClFlags &Flags);
38 
39   enum ARM32InstructionSet {
40     Begin,
41     // Neon is the PNaCl baseline instruction set.
42     Neon = Begin,
43     HWDivArm, // HW divide in ARM mode (not just Thumb mode).
44     End
45   };
46 
hasFeature(ARM32InstructionSet I)47   bool hasFeature(ARM32InstructionSet I) const { return I <= InstructionSet; }
48 
49 private:
50   ARM32InstructionSet InstructionSet = ARM32InstructionSet::Begin;
51 };
52 
53 // The target lowering logic for ARM32.
54 class TargetARM32 : public TargetLowering {
55   TargetARM32() = delete;
56   TargetARM32(const TargetARM32 &) = delete;
57   TargetARM32 &operator=(const TargetARM32 &) = delete;
58 
59 public:
60   static void staticInit(GlobalContext *Ctx);
61 
shouldBePooled(const Constant * C)62   static bool shouldBePooled(const Constant *C) {
63     if (auto *ConstDouble = llvm::dyn_cast<ConstantDouble>(C)) {
64       return !Utils::isPositiveZero(ConstDouble->getValue());
65     }
66     if (llvm::isa<ConstantFloat>(C))
67       return true;
68     return false;
69   }
70 
getPointerType()71   static ::Ice::Type getPointerType() { return ::Ice::IceType_i32; }
72 
73   // TODO(jvoung): return a unique_ptr.
create(Cfg * Func)74   static std::unique_ptr<::Ice::TargetLowering> create(Cfg *Func) {
75     return makeUnique<TargetARM32>(Func);
76   }
77 
createAssembler()78   std::unique_ptr<::Ice::Assembler> createAssembler() const override {
79     const bool IsNonsfi = SandboxingType == ST_Nonsfi;
80     return makeUnique<ARM32::AssemblerARM32>(IsNonsfi);
81   }
82 
initNodeForLowering(CfgNode * Node)83   void initNodeForLowering(CfgNode *Node) override {
84     Computations.forgetProducers();
85     Computations.recordProducers(Node);
86     Computations.dump(Func);
87   }
88 
89   void translateOm1() override;
90   void translateO2() override;
91   bool doBranchOpt(Inst *I, const CfgNode *NextNode) override;
92 
getNumRegisters()93   SizeT getNumRegisters() const override { return RegARM32::Reg_NUM; }
94   Variable *getPhysicalRegister(RegNumT RegNum,
95                                 Type Ty = IceType_void) override;
96   const char *getRegName(RegNumT RegNum, Type Ty) const override;
97   SmallBitVector getRegisterSet(RegSetMask Include,
98                                 RegSetMask Exclude) const override;
99   const SmallBitVector &
getRegistersForVariable(const Variable * Var)100   getRegistersForVariable(const Variable *Var) const override {
101     RegClass RC = Var->getRegClass();
102     switch (RC) {
103     default:
104       assert(RC < RC_Target);
105       return TypeToRegisterSet[RC];
106     case RegARM32::RCARM32_QtoS:
107       return TypeToRegisterSet[RC];
108     }
109   }
110   const SmallBitVector &
getAllRegistersForVariable(const Variable * Var)111   getAllRegistersForVariable(const Variable *Var) const override {
112     RegClass RC = Var->getRegClass();
113     assert((RegARM32::RegClassARM32)RC < RegARM32::RCARM32_NUM);
114     return TypeToRegisterSetUnfiltered[RC];
115   }
getAliasesForRegister(RegNumT Reg)116   const SmallBitVector &getAliasesForRegister(RegNumT Reg) const override {
117     return RegisterAliases[Reg];
118   }
hasFramePointer()119   bool hasFramePointer() const override { return UsesFramePointer; }
setHasFramePointer()120   void setHasFramePointer() override { UsesFramePointer = true; }
getStackReg()121   RegNumT getStackReg() const override { return RegARM32::Reg_sp; }
getFrameReg()122   RegNumT getFrameReg() const override { return RegARM32::Reg_fp; }
getFrameOrStackReg()123   RegNumT getFrameOrStackReg() const override {
124     return UsesFramePointer ? getFrameReg() : getStackReg();
125   }
getReservedTmpReg()126   RegNumT getReservedTmpReg() const { return RegARM32::Reg_ip; }
127 
typeWidthInBytesOnStack(Type Ty)128   size_t typeWidthInBytesOnStack(Type Ty) const override {
129     // Round up to the next multiple of 4 bytes. In particular, i1, i8, and i16
130     // are rounded up to 4 bytes.
131     return (typeWidthInBytes(Ty) + 3) & ~3;
132   }
133   uint32_t getStackAlignment() const override;
reserveFixedAllocaArea(size_t Size,size_t Align)134   void reserveFixedAllocaArea(size_t Size, size_t Align) override {
135     FixedAllocaSizeBytes = Size;
136     assert(llvm::isPowerOf2_32(Align));
137     FixedAllocaAlignBytes = Align;
138     PrologEmitsFixedAllocas = true;
139   }
getFrameFixedAllocaOffset()140   int32_t getFrameFixedAllocaOffset() const override {
141     return FixedAllocaSizeBytes - (SpillAreaSizeBytes - MaxOutArgsSizeBytes);
142   }
maxOutArgsSizeBytes()143   uint32_t maxOutArgsSizeBytes() const override { return MaxOutArgsSizeBytes; }
144 
shouldSplitToVariable64On32(Type Ty)145   bool shouldSplitToVariable64On32(Type Ty) const override {
146     return Ty == IceType_i64;
147   }
148 
149   // TODO(ascull): what size is best for ARM?
getMinJumpTableSize()150   SizeT getMinJumpTableSize() const override { return 3; }
151   void emitJumpTable(const Cfg *Func,
152                      const InstJumpTable *JumpTable) const override;
153 
154   void emitVariable(const Variable *Var) const override;
155 
156   void emit(const ConstantUndef *C) const final;
157   void emit(const ConstantInteger32 *C) const final;
158   void emit(const ConstantInteger64 *C) const final;
159   void emit(const ConstantFloat *C) const final;
160   void emit(const ConstantDouble *C) const final;
161   void emit(const ConstantRelocatable *C) const final;
162 
163   void lowerArguments() override;
164   void addProlog(CfgNode *Node) override;
165   void addEpilog(CfgNode *Node) override;
166 
167   Operand *loOperand(Operand *Operand);
168   Operand *hiOperand(Operand *Operand);
169   void finishArgumentLowering(Variable *Arg, Variable *FramePtr,
170                               size_t BasicFrameOffset, size_t *InArgsSizeBytes);
171 
hasCPUFeature(TargetARM32Features::ARM32InstructionSet I)172   bool hasCPUFeature(TargetARM32Features::ARM32InstructionSet I) const {
173     return CPUFeatures.hasFeature(I);
174   }
175 
176   enum OperandLegalization {
177     Legal_Reg = 1 << 0,  /// physical register, not stack location
178     Legal_Flex = 1 << 1, /// A flexible operand2, which can hold rotated small
179                          /// immediates, shifted registers, or modified fp imm.
180     Legal_Mem = 1 << 2,  /// includes [r0, r1 lsl #2] as well as [sp, #12]
181     Legal_Rematerializable = 1 << 3,
182     Legal_Default = ~Legal_Rematerializable,
183   };
184 
185   using LegalMask = uint32_t;
186   Operand *legalizeUndef(Operand *From, RegNumT RegNum = RegNumT());
187   Operand *legalize(Operand *From, LegalMask Allowed = Legal_Default,
188                     RegNumT RegNum = RegNumT());
189   Variable *legalizeToReg(Operand *From, RegNumT RegNum = RegNumT());
190 
shAmtImm(uint32_t ShAmtImm)191   OperandARM32ShAmtImm *shAmtImm(uint32_t ShAmtImm) const {
192     assert(ShAmtImm < 32);
193     return OperandARM32ShAmtImm::create(
194         Func,
195         llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(ShAmtImm & 0x1F)));
196   }
197 
getCtx()198   GlobalContext *getCtx() const { return Ctx; }
199 
200 protected:
201   explicit TargetARM32(Cfg *Func);
202 
203   void postLower() override;
204 
205   enum SafeBoolChain {
206     SBC_No,
207     SBC_Yes,
208   };
209 
210   void lowerAlloca(const InstAlloca *Instr) override;
211   SafeBoolChain lowerInt1Arithmetic(const InstArithmetic *Instr);
212   void lowerInt64Arithmetic(InstArithmetic::OpKind Op, Variable *Dest,
213                             Operand *Src0, Operand *Src1);
214   void lowerArithmetic(const InstArithmetic *Instr) override;
215   void lowerAssign(const InstAssign *Instr) override;
216   void lowerBr(const InstBr *Instr) override;
217   void lowerCall(const InstCall *Instr) override;
218   void lowerCast(const InstCast *Instr) override;
219   void lowerExtractElement(const InstExtractElement *Instr) override;
220 
221   /// CondWhenTrue is a helper type returned by every method in the lowering
222   /// that emits code to set the condition codes.
223   class CondWhenTrue {
224   public:
225     explicit CondWhenTrue(CondARM32::Cond T0,
226                           CondARM32::Cond T1 = CondARM32::kNone)
WhenTrue0(T0)227         : WhenTrue0(T0), WhenTrue1(T1) {
228       assert(T1 == CondARM32::kNone || T0 != CondARM32::kNone);
229       assert(T1 != T0 || T0 == CondARM32::kNone);
230     }
231     CondARM32::Cond WhenTrue0;
232     CondARM32::Cond WhenTrue1;
233 
234     /// invert returns a new object with WhenTrue0 and WhenTrue1 inverted.
invert()235     CondWhenTrue invert() const {
236       switch (WhenTrue0) {
237       default:
238         if (WhenTrue1 == CondARM32::kNone)
239           return CondWhenTrue(InstARM32::getOppositeCondition(WhenTrue0));
240         return CondWhenTrue(InstARM32::getOppositeCondition(WhenTrue0),
241                             InstARM32::getOppositeCondition(WhenTrue1));
242       case CondARM32::AL:
243         return CondWhenTrue(CondARM32::kNone);
244       case CondARM32::kNone:
245         return CondWhenTrue(CondARM32::AL);
246       }
247     }
248   };
249 
250   CondWhenTrue lowerFcmpCond(const InstFcmp *Instr);
251   void lowerFcmp(const InstFcmp *Instr) override;
252   CondWhenTrue lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition,
253                                          Operand *Src0, Operand *Src1);
254   CondWhenTrue lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
255                                   Operand *Src1);
256   CondWhenTrue lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
257                                   Operand *Src1);
258   CondWhenTrue lowerIcmpCond(InstIcmp::ICond Condition, Operand *Src0,
259                              Operand *Src1);
260   CondWhenTrue lowerIcmpCond(const InstIcmp *Instr);
261   void lowerIcmp(const InstIcmp *Instr) override;
262   /// Emits the basic sequence for lower-linked/store-exclusive loops:
263   ///
264   /// retry:
265   ///        ldrex tmp, [Addr]
266   ///        StoreValue = Operation(tmp)
267   ///        strexCond success, StoreValue, [Addr]
268   ///        cmpCond success, #0
269   ///        bne retry
270   ///
271   /// Operation needs to return which value to strex in Addr, it must not change
272   /// the flags if Cond is not AL, and must not emit any instructions that could
273   /// end up writing to memory. Operation also needs to handle fake-defing for
274   /// i64 handling.
275   void
276   lowerLoadLinkedStoreExclusive(Type Ty, Operand *Addr,
277                                 std::function<Variable *(Variable *)> Operation,
278                                 CondARM32::Cond Cond = CondARM32::AL);
279   void lowerInt64AtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
280                            Operand *Val);
281   void lowerAtomicRMW(Variable *Dest, uint32_t Operation, Operand *Ptr,
282                       Operand *Val);
283   void lowerBreakpoint(const InstBreakpoint *Instr) override;
284   void lowerIntrinsicCall(const InstIntrinsicCall *Instr) override;
285   void lowerInsertElement(const InstInsertElement *Instr) override;
286   void lowerLoad(const InstLoad *Instr) override;
287   void lowerPhi(const InstPhi *Instr) override;
288   void lowerRet(const InstRet *Instr) override;
289   void lowerSelect(const InstSelect *Instr) override;
290   void lowerShuffleVector(const InstShuffleVector *Instr) override;
291   void lowerStore(const InstStore *Instr) override;
292   void lowerSwitch(const InstSwitch *Instr) override;
293   void lowerUnreachable(const InstUnreachable *Instr) override;
294   void prelowerPhis() override;
295   uint32_t getCallStackArgumentsSizeBytes(const InstCall *Instr) override;
296   void genTargetHelperCallFor(Inst *Instr) override;
297   void doAddressOptLoad() override;
298   void doAddressOptStore() override;
299   void randomlyInsertNop(float Probability,
300                          RandomNumberGenerator &RNG) override;
301 
302   OperandARM32Mem *formMemoryOperand(Operand *Ptr, Type Ty);
303 
304   Variable64On32 *makeI64RegPair();
305   Variable *makeReg(Type Ty, RegNumT RegNum = RegNumT());
306   static Type stackSlotType();
307   Variable *copyToReg(Operand *Src, RegNumT RegNum = RegNumT());
308   void alignRegisterPow2(Variable *Reg, uint32_t Align,
309                          RegNumT TmpRegNum = RegNumT());
310 
311   /// Returns a vector in a register with the given constant entries.
312   Variable *makeVectorOfZeros(Type Ty, RegNumT RegNum = RegNumT());
313 
314   void
315   makeRandomRegisterPermutation(llvm::SmallVectorImpl<RegNumT> &Permutation,
316                                 const SmallBitVector &ExcludeRegisters,
317                                 uint64_t Salt) const override;
318 
319   // If a divide-by-zero check is needed, inserts a: test; branch .LSKIP; trap;
320   // .LSKIP: <continuation>. If no check is needed nothing is inserted.
321   void div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi);
322   using ExtInstr = void (TargetARM32::*)(Variable *, Variable *,
323                                          CondARM32::Cond);
324   using DivInstr = void (TargetARM32::*)(Variable *, Variable *, Variable *,
325                                          CondARM32::Cond);
326   void lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R, Operand *Src1,
327                     ExtInstr ExtFunc, DivInstr DivFunc, bool IsRemainder);
328 
329   void lowerCLZ(Variable *Dest, Variable *ValLo, Variable *ValHi);
330 
331   // The following are helpers that insert lowered ARM32 instructions with
332   // minimal syntactic overhead, so that the lowering code can look as close to
333   // assembly as practical.
334   void _add(Variable *Dest, Variable *Src0, Operand *Src1,
335             CondARM32::Cond Pred = CondARM32::AL) {
336     Context.insert<InstARM32Add>(Dest, Src0, Src1, Pred);
337   }
338   void _adds(Variable *Dest, Variable *Src0, Operand *Src1,
339              CondARM32::Cond Pred = CondARM32::AL) {
340     constexpr bool SetFlags = true;
341     Context.insert<InstARM32Add>(Dest, Src0, Src1, Pred, SetFlags);
342     if (SetFlags) {
343       Context.insert<InstFakeUse>(Dest);
344     }
345   }
346   void _adc(Variable *Dest, Variable *Src0, Operand *Src1,
347             CondARM32::Cond Pred = CondARM32::AL) {
348     Context.insert<InstARM32Adc>(Dest, Src0, Src1, Pred);
349   }
350   void _and(Variable *Dest, Variable *Src0, Operand *Src1,
351             CondARM32::Cond Pred = CondARM32::AL) {
352     Context.insert<InstARM32And>(Dest, Src0, Src1, Pred);
353   }
354   void _asr(Variable *Dest, Variable *Src0, Operand *Src1,
355             CondARM32::Cond Pred = CondARM32::AL) {
356     Context.insert<InstARM32Asr>(Dest, Src0, Src1, Pred);
357   }
358   void _bic(Variable *Dest, Variable *Src0, Operand *Src1,
359             CondARM32::Cond Pred = CondARM32::AL) {
360     Context.insert<InstARM32Bic>(Dest, Src0, Src1, Pred);
361   }
_br(CfgNode * TargetTrue,CfgNode * TargetFalse,CondARM32::Cond Condition)362   void _br(CfgNode *TargetTrue, CfgNode *TargetFalse,
363            CondARM32::Cond Condition) {
364     Context.insert<InstARM32Br>(TargetTrue, TargetFalse, Condition);
365   }
_br(CfgNode * Target)366   void _br(CfgNode *Target) { Context.insert<InstARM32Br>(Target); }
_br(CfgNode * Target,CondARM32::Cond Condition)367   void _br(CfgNode *Target, CondARM32::Cond Condition) {
368     Context.insert<InstARM32Br>(Target, Condition);
369   }
_br(InstARM32Label * Label,CondARM32::Cond Condition)370   void _br(InstARM32Label *Label, CondARM32::Cond Condition) {
371     Context.insert<InstARM32Br>(Label, Condition);
372   }
373   void _cmn(Variable *Src0, Operand *Src1,
374             CondARM32::Cond Pred = CondARM32::AL) {
375     Context.insert<InstARM32Cmn>(Src0, Src1, Pred);
376   }
377   void _cmp(Variable *Src0, Operand *Src1,
378             CondARM32::Cond Pred = CondARM32::AL) {
379     Context.insert<InstARM32Cmp>(Src0, Src1, Pred);
380   }
381   void _clz(Variable *Dest, Variable *Src0,
382             CondARM32::Cond Pred = CondARM32::AL) {
383     Context.insert<InstARM32Clz>(Dest, Src0, Pred);
384   }
_dmb()385   void _dmb() { Context.insert<InstARM32Dmb>(); }
386   void _eor(Variable *Dest, Variable *Src0, Operand *Src1,
387             CondARM32::Cond Pred = CondARM32::AL) {
388     Context.insert<InstARM32Eor>(Dest, Src0, Src1, Pred);
389   }
390   /// _ldr, for all your memory to Variable data moves. It handles all types
391   /// (integer, floating point, and vectors.) Addr needs to be valid for Dest's
392   /// type (e.g., no immediates for vector loads, and no index registers for fp
393   /// loads.)
394   void _ldr(Variable *Dest, OperandARM32Mem *Addr,
395             CondARM32::Cond Pred = CondARM32::AL) {
396     Context.insert<InstARM32Ldr>(Dest, Addr, Pred);
397   }
398   InstARM32Ldrex *_ldrex(Variable *Dest, OperandARM32Mem *Addr,
399                          CondARM32::Cond Pred = CondARM32::AL) {
400     auto *Ldrex = Context.insert<InstARM32Ldrex>(Dest, Addr, Pred);
401     if (auto *Dest64 = llvm::dyn_cast<Variable64On32>(Dest)) {
402       Context.insert<InstFakeDef>(Dest64->getLo(), Dest);
403       Context.insert<InstFakeDef>(Dest64->getHi(), Dest);
404     }
405     return Ldrex;
406   }
407   void _lsl(Variable *Dest, Variable *Src0, Operand *Src1,
408             CondARM32::Cond Pred = CondARM32::AL) {
409     Context.insert<InstARM32Lsl>(Dest, Src0, Src1, Pred);
410   }
411   void _lsls(Variable *Dest, Variable *Src0, Operand *Src1,
412              CondARM32::Cond Pred = CondARM32::AL) {
413     constexpr bool SetFlags = true;
414     Context.insert<InstARM32Lsl>(Dest, Src0, Src1, Pred, SetFlags);
415     if (SetFlags) {
416       Context.insert<InstFakeUse>(Dest);
417     }
418   }
419   void _lsr(Variable *Dest, Variable *Src0, Operand *Src1,
420             CondARM32::Cond Pred = CondARM32::AL) {
421     Context.insert<InstARM32Lsr>(Dest, Src0, Src1, Pred);
422   }
423   void _mla(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc,
424             CondARM32::Cond Pred = CondARM32::AL) {
425     Context.insert<InstARM32Mla>(Dest, Src0, Src1, Acc, Pred);
426   }
427   void _mls(Variable *Dest, Variable *Src0, Variable *Src1, Variable *Acc,
428             CondARM32::Cond Pred = CondARM32::AL) {
429     Context.insert<InstARM32Mls>(Dest, Src0, Src1, Acc, Pred);
430   }
431   /// _mov, for all your Variable to Variable data movement needs. It handles
432   /// all types (integer, floating point, and vectors), as well as moves between
433   /// Core and VFP registers. This is not a panacea: you must obey the (weird,
434   /// confusing, non-uniform) rules for data moves in ARM.
435   void _mov(Variable *Dest, Operand *Src0,
436             CondARM32::Cond Pred = CondARM32::AL) {
437     // _mov used to be unique in the sense that it would create a temporary
438     // automagically if Dest was nullptr. It won't do that anymore, so we keep
439     // an assert around just in case there is some untested code path where Dest
440     // is nullptr.
441     assert(Dest != nullptr);
442     assert(!llvm::isa<OperandARM32Mem>(Src0));
443     auto *Instr = Context.insert<InstARM32Mov>(Dest, Src0, Pred);
444 
445     if (Instr->isMultiDest()) {
446       // If Instr is multi-dest, then Dest must be a Variable64On32. We add a
447       // fake-def for Instr.DestHi here.
448       assert(llvm::isa<Variable64On32>(Dest));
449       Context.insert<InstFakeDef>(Instr->getDestHi());
450     }
451   }
452 
453   void _mov_redefined(Variable *Dest, Operand *Src0,
454                       CondARM32::Cond Pred = CondARM32::AL) {
455     auto *Instr = Context.insert<InstARM32Mov>(Dest, Src0, Pred);
456     Instr->setDestRedefined();
457     if (Instr->isMultiDest()) {
458       // If Instr is multi-dest, then Dest must be a Variable64On32. We add a
459       // fake-def for Instr.DestHi here.
460       assert(llvm::isa<Variable64On32>(Dest));
461       Context.insert<InstFakeDef>(Instr->getDestHi());
462     }
463   }
464 
_nop()465   void _nop() { Context.insert<InstARM32Nop>(); }
466 
467   // Generates a vmov instruction to extract the given index from a vector
468   // register.
469   void _extractelement(Variable *Dest, Variable *Src0, uint32_t Index,
470                        CondARM32::Cond Pred = CondARM32::AL) {
471     Context.insert<InstARM32Extract>(Dest, Src0, Index, Pred);
472   }
473 
474   // Generates a vmov instruction to insert a value into the given index of a
475   // vector register.
476   void _insertelement(Variable *Dest, Variable *Src0, uint32_t Index,
477                       CondARM32::Cond Pred = CondARM32::AL) {
478     Context.insert<InstARM32Insert>(Dest, Src0, Index, Pred);
479   }
480 
481   // --------------------------------------------------------------------------
482   // Begin bool folding machinery.
483   //
484   // There are three types of boolean lowerings handled by this target:
485   //
486   // 1) Boolean expressions leading to a boolean Variable definition
487   // ---------------------------------------------------------------
488   //
489   // Whenever a i1 Variable is live out (i.e., its live range extends beyond
490   // the defining basic block) we do not fold the operation. We instead
491   // materialize (i.e., compute) the variable normally, so that it can be used
492   // when needed. We also materialize i1 values that are not single use to
493   // avoid code duplication. These expressions are not short circuited.
494   //
495   // 2) Boolean expressions leading to a select
496   // ------------------------------------------
497   //
498   // These include boolean chains leading to a select instruction, as well as
499   // i1 Sexts. These boolean expressions are lowered to:
500   //
501   // mov T, <false value>
502   // CC <- eval(Boolean Expression)
503   // movCC T, <true value>
504   //
505   // For Sexts, <false value> is 0, and <true value> is -1.
506   //
507   // 3) Boolean expressions leading to a br i1
508   // -----------------------------------------
509   //
510   // These are the boolean chains leading to a branch. These chains are
511   // short-circuited, i.e.:
512   //
513   //   A = or i1 B, C
514   //   br i1 A, label %T, label %F
515   //
516   // becomes
517   //
518   //   tst B
519   //   jne %T
520   //   tst B
521   //   jne %T
522   //   j %F
523   //
524   // and
525   //
526   //   A = and i1 B, C
527   //   br i1 A, label %T, label %F
528   //
529   // becomes
530   //
531   //   tst B
532   //   jeq %F
533   //   tst B
534   //   jeq %F
535   //   j %T
536   //
537   // Arbitrarily long chains are short circuited, e.g
538   //
539   //   A = or  i1 B, C
540   //   D = and i1 A, E
541   //   F = and i1 G, H
542   //   I = or i1 D, F
543   //   br i1 I, label %True, label %False
544   //
545   // becomes
546   //
547   // Label[A]:
548   //   tst B, 1
549   //   bne Label[D]
550   //   tst C, 1
551   //   beq Label[I]
552   // Label[D]:
553   //   tst E, 1
554   //   bne %True
555   // Label[I]
556   //   tst G, 1
557   //   beq %False
558   //   tst H, 1
559   //   beq %False (bne %True)
560 
561   /// lowerInt1 materializes Boolean to a Variable.
562   SafeBoolChain lowerInt1(Variable *Dest, Operand *Boolean);
563 
564   /// lowerInt1ForSelect generates the following instruction sequence:
565   ///
566   ///   mov T, FalseValue
567   ///   CC <- eval(Boolean)
568   ///   movCC T, TrueValue
569   ///   mov Dest, T
570   ///
571   /// It is used for lowering select i1, as well as i1 Sext.
572   void lowerInt1ForSelect(Variable *Dest, Operand *Boolean, Operand *TrueValue,
573                           Operand *FalseValue);
574 
575   /// LowerInt1BranchTarget is used by lowerIntForBranch. It wraps a CfgNode, or
576   /// an InstARM32Label (but never both) so that, during br i1 lowering, we can
577   /// create auxiliary labels for short circuiting the condition evaluation.
578   class LowerInt1BranchTarget {
579   public:
LowerInt1BranchTarget(CfgNode * const Target)580     explicit LowerInt1BranchTarget(CfgNode *const Target)
581         : NodeTarget(Target) {}
LowerInt1BranchTarget(InstARM32Label * const Target)582     explicit LowerInt1BranchTarget(InstARM32Label *const Target)
583         : LabelTarget(Target) {}
584 
585     /// createForLabelOrDuplicate will return a new LowerInt1BranchTarget that
586     /// is the exact copy of this if Label is nullptr; otherwise, the returned
587     /// object will wrap Label instead.
588     LowerInt1BranchTarget
createForLabelOrDuplicate(InstARM32Label * Label)589     createForLabelOrDuplicate(InstARM32Label *Label) const {
590       if (Label != nullptr)
591         return LowerInt1BranchTarget(Label);
592       if (NodeTarget)
593         return LowerInt1BranchTarget(NodeTarget);
594       return LowerInt1BranchTarget(LabelTarget);
595     }
596 
597     CfgNode *const NodeTarget = nullptr;
598     InstARM32Label *const LabelTarget = nullptr;
599   };
600 
601   /// LowerInt1AllowShortCircuit is a helper type used by lowerInt1ForBranch for
602   /// determining which type arithmetic is allowed to be short circuited. This
603   /// is useful for lowering
604   ///
605   ///   t1 = and i1 A, B
606   ///   t2 = and i1 t1, C
607   ///   br i1 t2, label %False, label %True
608   ///
609   /// to
610   ///
611   ///   tst A, 1
612   ///   beq %False
613   ///   tst B, 1
614   ///   beq %False
615   ///   tst C, 1
616   ///   bne %True
617   ///   b %False
618   ///
619   /// Without this information, short circuiting would only allow to short
620   /// circuit a single high level instruction. For example:
621   ///
622   ///   t1 = or i1 A, B
623   ///   t2 = and i1 t1, C
624   ///   br i1 t2, label %False, label %True
625   ///
626   /// cannot be lowered to
627   ///
628   ///   tst A, 1
629   ///   bne %True
630   ///   tst B, 1
631   ///   bne %True
632   ///   tst C, 1
633   ///   beq %True
634   ///   b %False
635   ///
636   /// It needs to be lowered to
637   ///
638   ///   tst A, 1
639   ///   bne Aux
640   ///   tst B, 1
641   ///   beq %False
642   /// Aux:
643   ///   tst C, 1
644   ///   bne %True
645   ///   b %False
646   ///
647   /// TODO(jpp): evaluate if this kind of short circuiting hurts performance (it
648   /// might.)
649   enum LowerInt1AllowShortCircuit {
650     SC_And = 1,
651     SC_Or = 2,
652     SC_All = SC_And | SC_Or,
653   };
654 
655   /// ShortCircuitCondAndLabel wraps the condition codes that should be used
656   /// after a lowerInt1ForBranch returns to branch to the
657   /// TrueTarget/FalseTarget. If ShortCircuitLabel is not nullptr, then the
658   /// called lowerInt1forBranch created an internal (i.e., short-circuit) label
659   /// used for short circuiting.
660   class ShortCircuitCondAndLabel {
661   public:
662     explicit ShortCircuitCondAndLabel(CondWhenTrue &&C,
663                                       InstARM32Label *L = nullptr)
Cond(std::move (C))664         : Cond(std::move(C)), ShortCircuitTarget(L) {}
665     const CondWhenTrue Cond;
666     InstARM32Label *const ShortCircuitTarget;
667 
assertNoLabelAndReturnCond()668     CondWhenTrue assertNoLabelAndReturnCond() const {
669       assert(ShortCircuitTarget == nullptr);
670       return Cond;
671     }
672   };
673 
674   /// lowerInt1ForBranch expands Boolean, and returns the condition codes that
675   /// are to be used for branching to the branch's TrueTarget. It may return a
676   /// label that the expansion of Boolean used to short circuit the chain's
677   /// evaluation.
678   ShortCircuitCondAndLabel
679   lowerInt1ForBranch(Operand *Boolean, const LowerInt1BranchTarget &TargetTrue,
680                      const LowerInt1BranchTarget &TargetFalse,
681                      uint32_t ShortCircuitable);
682 
683   // _br is a convenience wrapper that emits br instructions to Target.
684   void _br(const LowerInt1BranchTarget &BrTarget,
685            CondARM32::Cond Cond = CondARM32::AL) {
686     assert((BrTarget.NodeTarget == nullptr) !=
687            (BrTarget.LabelTarget == nullptr));
688     if (BrTarget.NodeTarget != nullptr)
689       _br(BrTarget.NodeTarget, Cond);
690     else
691       _br(BrTarget.LabelTarget, Cond);
692   }
693 
694   // _br_short_circuit is used when lowering InstArithmetic::And and
695   // InstArithmetic::Or and a short circuit branch is needed.
_br_short_circuit(const LowerInt1BranchTarget & Target,const CondWhenTrue & Cond)696   void _br_short_circuit(const LowerInt1BranchTarget &Target,
697                          const CondWhenTrue &Cond) {
698     if (Cond.WhenTrue1 != CondARM32::kNone) {
699       _br(Target, Cond.WhenTrue1);
700     }
701     if (Cond.WhenTrue0 != CondARM32::kNone) {
702       _br(Target, Cond.WhenTrue0);
703     }
704   }
705   // End of bool folding machinery
706   // --------------------------------------------------------------------------
707 
708   /// The Operand can only be a 16-bit immediate or a ConstantRelocatable (with
709   /// an upper16 relocation).
710   void _movt(Variable *Dest, Operand *Src0,
711              CondARM32::Cond Pred = CondARM32::AL) {
712     Context.insert<InstARM32Movt>(Dest, Src0, Pred);
713   }
714   void _movw(Variable *Dest, Operand *Src0,
715              CondARM32::Cond Pred = CondARM32::AL) {
716     Context.insert<InstARM32Movw>(Dest, Src0, Pred);
717   }
718   void _mul(Variable *Dest, Variable *Src0, Variable *Src1,
719             CondARM32::Cond Pred = CondARM32::AL) {
720     Context.insert<InstARM32Mul>(Dest, Src0, Src1, Pred);
721   }
722   void _mvn(Variable *Dest, Operand *Src0,
723             CondARM32::Cond Pred = CondARM32::AL) {
724     Context.insert<InstARM32Mvn>(Dest, Src0, Pred);
725   }
726   void _orr(Variable *Dest, Variable *Src0, Operand *Src1,
727             CondARM32::Cond Pred = CondARM32::AL) {
728     Context.insert<InstARM32Orr>(Dest, Src0, Src1, Pred);
729   }
730   void _orrs(Variable *Dest, Variable *Src0, Operand *Src1,
731              CondARM32::Cond Pred = CondARM32::AL) {
732     constexpr bool SetFlags = true;
733     Context.insert<InstARM32Orr>(Dest, Src0, Src1, Pred, SetFlags);
734     if (SetFlags) {
735       Context.insert<InstFakeUse>(Dest);
736     }
737   }
_push(const VarList & Sources)738   void _push(const VarList &Sources) { Context.insert<InstARM32Push>(Sources); }
_pop(const VarList & Dests)739   void _pop(const VarList &Dests) {
740     Context.insert<InstARM32Pop>(Dests);
741     // Mark dests as modified.
742     for (Variable *Dest : Dests)
743       Context.insert<InstFakeDef>(Dest);
744   }
745   void _rbit(Variable *Dest, Variable *Src0,
746              CondARM32::Cond Pred = CondARM32::AL) {
747     Context.insert<InstARM32Rbit>(Dest, Src0, Pred);
748   }
749   void _rev(Variable *Dest, Variable *Src0,
750             CondARM32::Cond Pred = CondARM32::AL) {
751     Context.insert<InstARM32Rev>(Dest, Src0, Pred);
752   }
753   void _ret(Variable *LR, Variable *Src0 = nullptr) {
754     Context.insert<InstARM32Ret>(LR, Src0);
755   }
756   void _rscs(Variable *Dest, Variable *Src0, Operand *Src1,
757              CondARM32::Cond Pred = CondARM32::AL) {
758     constexpr bool SetFlags = true;
759     Context.insert<InstARM32Rsc>(Dest, Src0, Src1, Pred, SetFlags);
760     if (SetFlags) {
761       Context.insert<InstFakeUse>(Dest);
762     }
763   }
764   void _rsc(Variable *Dest, Variable *Src0, Operand *Src1,
765             CondARM32::Cond Pred = CondARM32::AL) {
766     Context.insert<InstARM32Rsc>(Dest, Src0, Src1, Pred);
767   }
768   void _rsbs(Variable *Dest, Variable *Src0, Operand *Src1,
769              CondARM32::Cond Pred = CondARM32::AL) {
770     constexpr bool SetFlags = true;
771     Context.insert<InstARM32Rsb>(Dest, Src0, Src1, Pred, SetFlags);
772     if (SetFlags) {
773       Context.insert<InstFakeUse>(Dest);
774     }
775   }
776   void _rsb(Variable *Dest, Variable *Src0, Operand *Src1,
777             CondARM32::Cond Pred = CondARM32::AL) {
778     Context.insert<InstARM32Rsb>(Dest, Src0, Src1, Pred);
779   }
780   void _sbc(Variable *Dest, Variable *Src0, Operand *Src1,
781             CondARM32::Cond Pred = CondARM32::AL) {
782     Context.insert<InstARM32Sbc>(Dest, Src0, Src1, Pred);
783   }
784   void _sbcs(Variable *Dest, Variable *Src0, Operand *Src1,
785              CondARM32::Cond Pred = CondARM32::AL) {
786     constexpr bool SetFlags = true;
787     Context.insert<InstARM32Sbc>(Dest, Src0, Src1, Pred, SetFlags);
788     if (SetFlags) {
789       Context.insert<InstFakeUse>(Dest);
790     }
791   }
792   void _sdiv(Variable *Dest, Variable *Src0, Variable *Src1,
793              CondARM32::Cond Pred = CondARM32::AL) {
794     Context.insert<InstARM32Sdiv>(Dest, Src0, Src1, Pred);
795   }
796   /// _str, for all your Variable to memory transfers. Addr has the same
797   /// restrictions that it does in _ldr.
798   void _str(Variable *Value, OperandARM32Mem *Addr,
799             CondARM32::Cond Pred = CondARM32::AL) {
800     Context.insert<InstARM32Str>(Value, Addr, Pred);
801   }
802   InstARM32Strex *_strex(Variable *Dest, Variable *Value, OperandARM32Mem *Addr,
803                          CondARM32::Cond Pred = CondARM32::AL) {
804     if (auto *Value64 = llvm::dyn_cast<Variable64On32>(Value)) {
805       Context.insert<InstFakeUse>(Value64->getLo());
806       Context.insert<InstFakeUse>(Value64->getHi());
807     }
808     return Context.insert<InstARM32Strex>(Dest, Value, Addr, Pred);
809   }
810   void _sub(Variable *Dest, Variable *Src0, Operand *Src1,
811             CondARM32::Cond Pred = CondARM32::AL) {
812     Context.insert<InstARM32Sub>(Dest, Src0, Src1, Pred);
813   }
814   void _subs(Variable *Dest, Variable *Src0, Operand *Src1,
815              CondARM32::Cond Pred = CondARM32::AL) {
816     constexpr bool SetFlags = true;
817     Context.insert<InstARM32Sub>(Dest, Src0, Src1, Pred, SetFlags);
818     if (SetFlags) {
819       Context.insert<InstFakeUse>(Dest);
820     }
821   }
822   void _sxt(Variable *Dest, Variable *Src0,
823             CondARM32::Cond Pred = CondARM32::AL) {
824     Context.insert<InstARM32Sxt>(Dest, Src0, Pred);
825   }
826   void _tst(Variable *Src0, Operand *Src1,
827             CondARM32::Cond Pred = CondARM32::AL) {
828     Context.insert<InstARM32Tst>(Src0, Src1, Pred);
829   }
_trap()830   void _trap() { Context.insert<InstARM32Trap>(); }
831   void _udiv(Variable *Dest, Variable *Src0, Variable *Src1,
832              CondARM32::Cond Pred = CondARM32::AL) {
833     Context.insert<InstARM32Udiv>(Dest, Src0, Src1, Pred);
834   }
835   void _umull(Variable *DestLo, Variable *DestHi, Variable *Src0,
836               Variable *Src1, CondARM32::Cond Pred = CondARM32::AL) {
837     // umull requires DestLo and DestHi to be assigned to different GPRs. The
838     // following lines create overlapping liveness ranges for both variables. If
839     // either one of them is live, then they are both going to be live, and thus
840     // assigned to different registers; if they are both dead, then DCE will
841     // kick in and delete the following three instructions.
842     Context.insert<InstFakeDef>(DestHi);
843     Context.insert<InstARM32Umull>(DestLo, DestHi, Src0, Src1, Pred);
844     Context.insert<InstFakeDef>(DestHi, DestLo)->setDestRedefined();
845     Context.insert<InstFakeUse>(DestHi);
846   }
847   void _uxt(Variable *Dest, Variable *Src0,
848             CondARM32::Cond Pred = CondARM32::AL) {
849     Context.insert<InstARM32Uxt>(Dest, Src0, Pred);
850   }
851   void _vabs(Variable *Dest, Variable *Src,
852              CondARM32::Cond Pred = CondARM32::AL) {
853     Context.insert<InstARM32Vabs>(Dest, Src, Pred);
854   }
_vadd(Variable * Dest,Variable * Src0,Variable * Src1)855   void _vadd(Variable *Dest, Variable *Src0, Variable *Src1) {
856     Context.insert<InstARM32Vadd>(Dest, Src0, Src1);
857   }
_vand(Variable * Dest,Variable * Src0,Variable * Src1)858   void _vand(Variable *Dest, Variable *Src0, Variable *Src1) {
859     Context.insert<InstARM32Vand>(Dest, Src0, Src1);
860   }
_vbsl(Variable * Dest,Variable * Src0,Variable * Src1)861   InstARM32Vbsl *_vbsl(Variable *Dest, Variable *Src0, Variable *Src1) {
862     return Context.insert<InstARM32Vbsl>(Dest, Src0, Src1);
863   }
_vceq(Variable * Dest,Variable * Src0,Variable * Src1)864   void _vceq(Variable *Dest, Variable *Src0, Variable *Src1) {
865     Context.insert<InstARM32Vceq>(Dest, Src0, Src1);
866   }
_vcge(Variable * Dest,Variable * Src0,Variable * Src1)867   InstARM32Vcge *_vcge(Variable *Dest, Variable *Src0, Variable *Src1) {
868     return Context.insert<InstARM32Vcge>(Dest, Src0, Src1);
869   }
_vcgt(Variable * Dest,Variable * Src0,Variable * Src1)870   InstARM32Vcgt *_vcgt(Variable *Dest, Variable *Src0, Variable *Src1) {
871     return Context.insert<InstARM32Vcgt>(Dest, Src0, Src1);
872   }
873   void _vcvt(Variable *Dest, Variable *Src, InstARM32Vcvt::VcvtVariant Variant,
874              CondARM32::Cond Pred = CondARM32::AL) {
875     Context.insert<InstARM32Vcvt>(Dest, Src, Variant, Pred);
876   }
_vdiv(Variable * Dest,Variable * Src0,Variable * Src1)877   void _vdiv(Variable *Dest, Variable *Src0, Variable *Src1) {
878     Context.insert<InstARM32Vdiv>(Dest, Src0, Src1);
879   }
880   void _vcmp(Variable *Src0, Variable *Src1,
881              CondARM32::Cond Pred = CondARM32::AL) {
882     Context.insert<InstARM32Vcmp>(Src0, Src1, Pred);
883   }
884   void _vcmp(Variable *Src0, OperandARM32FlexFpZero *FpZero,
885              CondARM32::Cond Pred = CondARM32::AL) {
886     Context.insert<InstARM32Vcmp>(Src0, FpZero, Pred);
887   }
_vdup(Variable * Dest,Variable * Src,int Idx)888   void _vdup(Variable *Dest, Variable *Src, int Idx) {
889     Context.insert<InstARM32Vdup>(Dest, Src, Idx);
890   }
_veor(Variable * Dest,Variable * Src0,Variable * Src1)891   void _veor(Variable *Dest, Variable *Src0, Variable *Src1) {
892     Context.insert<InstARM32Veor>(Dest, Src0, Src1);
893   }
894   void _vldr1d(Variable *Dest, OperandARM32Mem *Addr,
895                CondARM32::Cond Pred = CondARM32::AL) {
896     Context.insert<InstARM32Vldr1d>(Dest, Addr, Pred);
897   }
898   void _vldr1q(Variable *Dest, OperandARM32Mem *Addr,
899                CondARM32::Cond Pred = CondARM32::AL) {
900     Context.insert<InstARM32Vldr1q>(Dest, Addr, Pred);
901   }
902   void _vmrs(CondARM32::Cond Pred = CondARM32::AL) {
903     Context.insert<InstARM32Vmrs>(Pred);
904   }
_vmla(Variable * Dest,Variable * Src0,Variable * Src1)905   void _vmla(Variable *Dest, Variable *Src0, Variable *Src1) {
906     Context.insert<InstARM32Vmla>(Dest, Src0, Src1);
907   }
_vmlap(Variable * Dest,Variable * Src0,Variable * Src1)908   void _vmlap(Variable *Dest, Variable *Src0, Variable *Src1) {
909     Context.insert<InstARM32Vmlap>(Dest, Src0, Src1);
910   }
_vmls(Variable * Dest,Variable * Src0,Variable * Src1)911   void _vmls(Variable *Dest, Variable *Src0, Variable *Src1) {
912     Context.insert<InstARM32Vmls>(Dest, Src0, Src1);
913   }
_vmovl(Variable * Dest,Variable * Src0,Variable * Src1)914   void _vmovl(Variable *Dest, Variable *Src0, Variable *Src1) {
915     Context.insert<InstARM32Vmovl>(Dest, Src0, Src1);
916   }
_vmovh(Variable * Dest,Variable * Src0,Variable * Src1)917   void _vmovh(Variable *Dest, Variable *Src0, Variable *Src1) {
918     Context.insert<InstARM32Vmovh>(Dest, Src0, Src1);
919   }
_vmovhl(Variable * Dest,Variable * Src0,Variable * Src1)920   void _vmovhl(Variable *Dest, Variable *Src0, Variable *Src1) {
921     Context.insert<InstARM32Vmovhl>(Dest, Src0, Src1);
922   }
_vmovlh(Variable * Dest,Variable * Src0,Variable * Src1)923   void _vmovlh(Variable *Dest, Variable *Src0, Variable *Src1) {
924     Context.insert<InstARM32Vmovlh>(Dest, Src0, Src1);
925   }
_vmul(Variable * Dest,Variable * Src0,Variable * Src1)926   void _vmul(Variable *Dest, Variable *Src0, Variable *Src1) {
927     Context.insert<InstARM32Vmul>(Dest, Src0, Src1);
928   }
_vmulh(Variable * Dest,Variable * Src0,Variable * Src1,bool Unsigned)929   void _vmulh(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) {
930     Context.insert<InstARM32Vmulh>(Dest, Src0, Src1)
931         ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed);
932   }
_vmvn(Variable * Dest,Variable * Src0)933   void _vmvn(Variable *Dest, Variable *Src0) {
934     Context.insert<InstARM32Vmvn>(Dest, Src0, CondARM32::AL);
935   }
_vneg(Variable * Dest,Variable * Src0)936   void _vneg(Variable *Dest, Variable *Src0) {
937     Context.insert<InstARM32Vneg>(Dest, Src0, CondARM32::AL)
938         ->setSignType(InstARM32::FS_Signed);
939   }
_vorr(Variable * Dest,Variable * Src0,Variable * Src1)940   void _vorr(Variable *Dest, Variable *Src0, Variable *Src1) {
941     Context.insert<InstARM32Vorr>(Dest, Src0, Src1);
942   }
_vqadd(Variable * Dest,Variable * Src0,Variable * Src1,bool Unsigned)943   void _vqadd(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) {
944     Context.insert<InstARM32Vqadd>(Dest, Src0, Src1)
945         ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed);
946   }
_vqmovn2(Variable * Dest,Variable * Src0,Variable * Src1,bool Unsigned,bool Saturating)947   void _vqmovn2(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned,
948                 bool Saturating) {
949     Context.insert<InstARM32Vqmovn2>(Dest, Src0, Src1)
950         ->setSignType(Saturating ? (Unsigned ? InstARM32::FS_Unsigned
951                                              : InstARM32::FS_Signed)
952                                  : InstARM32::FS_None);
953   }
_vqsub(Variable * Dest,Variable * Src0,Variable * Src1,bool Unsigned)954   void _vqsub(Variable *Dest, Variable *Src0, Variable *Src1, bool Unsigned) {
955     Context.insert<InstARM32Vqsub>(Dest, Src0, Src1)
956         ->setSignType(Unsigned ? InstARM32::FS_Unsigned : InstARM32::FS_Signed);
957   }
_vshl(Variable * Dest,Variable * Src0,Variable * Src1)958   InstARM32Vshl *_vshl(Variable *Dest, Variable *Src0, Variable *Src1) {
959     return Context.insert<InstARM32Vshl>(Dest, Src0, Src1);
960   }
_vshl(Variable * Dest,Variable * Src0,ConstantInteger32 * Src1)961   void _vshl(Variable *Dest, Variable *Src0, ConstantInteger32 *Src1) {
962     Context.insert<InstARM32Vshl>(Dest, Src0, Src1)
963         ->setSignType(InstARM32::FS_Unsigned);
964   }
_vshr(Variable * Dest,Variable * Src0,ConstantInteger32 * Src1)965   InstARM32Vshr *_vshr(Variable *Dest, Variable *Src0,
966                        ConstantInteger32 *Src1) {
967     return Context.insert<InstARM32Vshr>(Dest, Src0, Src1);
968   }
969   void _vsqrt(Variable *Dest, Variable *Src,
970               CondARM32::Cond Pred = CondARM32::AL) {
971     Context.insert<InstARM32Vsqrt>(Dest, Src, Pred);
972   }
973   void _vstr1d(Variable *Value, OperandARM32Mem *Addr,
974                CondARM32::Cond Pred = CondARM32::AL) {
975     Context.insert<InstARM32Vstr1>(Value, Addr, Pred, 32);
976   }
977   void _vstr1q(Variable *Value, OperandARM32Mem *Addr,
978                CondARM32::Cond Pred = CondARM32::AL) {
979     Context.insert<InstARM32Vstr1>(Value, Addr, Pred, 64);
980   }
_vsub(Variable * Dest,Variable * Src0,Variable * Src1)981   void _vsub(Variable *Dest, Variable *Src0, Variable *Src1) {
982     Context.insert<InstARM32Vsub>(Dest, Src0, Src1);
983   }
_vzip(Variable * Dest,Variable * Src0,Variable * Src1)984   void _vzip(Variable *Dest, Variable *Src0, Variable *Src1) {
985     Context.insert<InstARM32Vzip>(Dest, Src0, Src1);
986   }
987 
988   // Iterates over the CFG and determines the maximum outgoing stack arguments
989   // bytes. This information is later used during addProlog() to pre-allocate
990   // the outargs area.
991   // TODO(jpp): This could live in the Parser, if we provided a Target-specific
992   // method that the Parser could call.
993   void findMaxStackOutArgsSize();
994 
995   /// Returns true if the given Offset can be represented in a Load/Store Mem
996   /// Operand.
997   bool isLegalMemOffset(Type Ty, int32_t Offset) const;
998 
999   void postLowerLegalization();
1000 
1001   /// Manages the GotPtr variable, which is used for Nonsfi sandboxing.
1002   /// @{
1003   void createGotPtr();
1004   void insertGotPtrInitPlaceholder();
1005   VariableDeclaration *createGotRelocation(RelocOffset *AddPcReloc);
1006   void materializeGotAddr(CfgNode *Node);
1007   Variable *GotPtr = nullptr;
1008   // TODO(jpp): use CfgLocalAllocator.
1009   /// @}
1010 
1011   /// Manages the Gotoff relocations created during the function lowering. A
1012   /// single Gotoff relocation is created for each global variable used by the
1013   /// function being lowered.
1014   /// @{
1015   // TODO(jpp): if the same global G is used in different functions, then this
1016   // method will emit one G(gotoff) relocation per function.
1017   GlobalString createGotoffRelocation(const ConstantRelocatable *CR);
1018   CfgUnorderedSet<GlobalString> KnownGotoffs;
1019   /// @}
1020 
1021   /// Loads the constant relocatable Name to Register. Then invoke Finish to
1022   /// finish the relocatable lowering. Finish **must** use PC in its first
1023   /// emitted instruction, or the relocatable in Register will contain the wrong
1024   /// value.
1025   //
1026   // Lowered sequence:
1027   //
1028   // Movw:
1029   //     movw Register, #:lower16:Name - (End - Movw) - 8 .
1030   // Movt:
1031   //     movt Register, #:upper16:Name - (End - Movt) - 8 .
1032   //     PC = fake-def
1033   // End:
1034   //     Finish(PC)
1035   //
1036   // The -8 in movw/movt above is to account for the PC value that the first
1037   // instruction emitted by Finish(PC) will read.
1038   void
1039   loadNamedConstantRelocatablePIC(GlobalString Name, Variable *Register,
1040                                   std::function<void(Variable *PC)> Finish);
1041 
1042   /// Sandboxer defines methods for ensuring that "dangerous" operations are
1043   /// masked during sandboxed code emission. For regular, non-sandboxed code
1044   /// emission, its methods are simple pass-through methods.
1045   ///
1046   /// The Sandboxer also emits BundleLock/BundleUnlock pseudo-instructions
1047   /// in the constructor/destructor during sandboxed code emission. Therefore,
1048   /// it is a bad idea to create an object of this type and "keep it around."
1049   /// The recommended usage is:
1050   ///
1051   /// AutoSandboxing(this).<<operation>>(...);
1052   ///
1053   /// This usage ensures that no other instructions are inadvertently added to
1054   /// the bundle.
1055   class Sandboxer {
1056     Sandboxer() = delete;
1057     Sandboxer(const Sandboxer &) = delete;
1058     Sandboxer &operator=(const Sandboxer &) = delete;
1059 
1060   public:
1061     explicit Sandboxer(
1062         TargetARM32 *Target,
1063         InstBundleLock::Option BundleOption = InstBundleLock::Opt_None);
1064     ~Sandboxer();
1065 
1066     /// Increments sp:
1067     ///
1068     ///   add sp, sp, AddAmount
1069     ///   bic sp, sp, 0xc0000000
1070     ///
1071     /// (for the rationale, see the ARM 32-bit Sandbox Specification.)
1072     void add_sp(Operand *AddAmount);
1073 
1074     /// Emits code to align sp to the specified alignment:
1075     ///
1076     ///   bic/and sp, sp, Alignment
1077     ///   bic, sp, sp, 0xc0000000
1078     void align_sp(size_t Alignment);
1079 
1080     /// Emits a call instruction. If CallTarget is a Variable, it emits
1081     ///
1082     ///   bic CallTarget, CallTarget, 0xc000000f
1083     ///   bl CallTarget
1084     ///
1085     /// Otherwise, it emits
1086     ///
1087     ///   bl CallTarget
1088     ///
1089     /// Note: in sandboxed code calls are always emitted in addresses 12 mod 16.
1090     InstARM32Call *bl(Variable *ReturnReg, Operand *CallTarget);
1091 
1092     /// Emits a load:
1093     ///
1094     ///   bic rBase, rBase, 0xc0000000
1095     ///   ldr rDest, [rBase, #Offset]
1096     ///
1097     /// Exception: if rBase is r9 or sp, then the load is emitted as:
1098     ///
1099     ///   ldr rDest, [rBase, #Offset]
1100     ///
1101     /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are
1102     /// always valid.
1103     void ldr(Variable *Dest, OperandARM32Mem *Mem, CondARM32::Cond Pred);
1104 
1105     /// Emits a load exclusive:
1106     ///
1107     ///   bic rBase, rBase, 0xc0000000
1108     ///   ldrex rDest, [rBase]
1109     ///
1110     /// Exception: if rBase is r9 or sp, then the load is emitted as:
1111     ///
1112     ///   ldrex rDest, [rBase]
1113     ///
1114     /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are
1115     /// always valid.
1116     void ldrex(Variable *Dest, OperandARM32Mem *Mem, CondARM32::Cond Pred);
1117 
1118     /// Resets sp to Src:
1119     ///
1120     ///   mov sp, Src
1121     ///   bic sp, sp, 0xc0000000
1122     void reset_sp(Variable *Src);
1123 
1124     /// Emits code to return from a function:
1125     ///
1126     ///   bic lr, lr, 0xc000000f
1127     ///   bx lr
1128     void ret(Variable *RetAddr, Variable *RetValue);
1129 
1130     /// Emits a store:
1131     ///
1132     ///   bic rBase, rBase, 0xc0000000
1133     ///   str rSrc, [rBase, #Offset]
1134     ///
1135     /// Exception: if rBase is r9 or sp, then the store is emitted as:
1136     ///
1137     ///   str rDest, [rBase, #Offset]
1138     ///
1139     /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are
1140     /// always valid.
1141     void str(Variable *Src, OperandARM32Mem *Mem, CondARM32::Cond Pred);
1142 
1143     /// Emits a store exclusive:
1144     ///
1145     ///   bic rBase, rBase, 0xc0000000
1146     ///   strex rDest, rSrc, [rBase]
1147     ///
1148     /// Exception: if rBase is r9 or sp, then the store is emitted as:
1149     ///
1150     ///   strex rDest, rSrc, [rBase]
1151     ///
1152     /// because the NaCl ARM 32-bit Sandbox Specification guarantees they are
1153     /// always valid.
1154     void strex(Variable *Dest, Variable *Src, OperandARM32Mem *Mem,
1155                CondARM32::Cond Pred);
1156 
1157     /// Decrements sp:
1158     ///
1159     ///   sub sp, sp, SubAmount
1160     ///   bic sp, sp, 0xc0000000
1161     void sub_sp(Operand *SubAmount);
1162 
1163   private:
1164     TargetARM32 *const Target;
1165     const InstBundleLock::Option BundleOption;
1166     std::unique_ptr<AutoBundle> Bundler;
1167 
1168     void createAutoBundle();
1169   };
1170 
1171   class PostLoweringLegalizer {
1172     PostLoweringLegalizer() = delete;
1173     PostLoweringLegalizer(const PostLoweringLegalizer &) = delete;
1174     PostLoweringLegalizer &operator=(const PostLoweringLegalizer &) = delete;
1175 
1176   public:
PostLoweringLegalizer(TargetARM32 * Target)1177     explicit PostLoweringLegalizer(TargetARM32 *Target)
1178         : Target(Target), StackOrFrameReg(Target->getPhysicalRegister(
1179                               Target->getFrameOrStackReg())) {}
1180 
1181     void resetTempBaseIfClobberedBy(const Inst *Instr);
1182 
1183     // Ensures that the TempBase register held by the this legalizer (if any) is
1184     // assigned to IP.
assertNoTempOrAssignedToIP()1185     void assertNoTempOrAssignedToIP() const {
1186       assert(TempBaseReg == nullptr ||
1187              TempBaseReg->getRegNum() == Target->getReservedTmpReg());
1188     }
1189 
1190     // Legalizes Mem. if Mem.Base is a Reamaterializable variable, Mem.Offset is
1191     // fixed up.
1192     OperandARM32Mem *legalizeMemOperand(OperandARM32Mem *Mem,
1193                                         bool AllowOffsets = true);
1194 
1195     /// Legalizes Mov if its Source (or Destination) is a spilled Variable, or
1196     /// if its Source is a Rematerializable variable (this form is used in lieu
1197     /// of lea, which is not available in ARM.)
1198     ///
1199     /// Moves to memory become store instructions, and moves from memory, loads.
1200     void legalizeMov(InstARM32Mov *Mov);
1201 
1202   private:
1203     /// Creates a new Base register centered around [Base, +/- Offset].
1204     Variable *newBaseRegister(Variable *Base, int32_t Offset,
1205                               RegNumT ScratchRegNum);
1206 
1207     /// Creates a new, legal OperandARM32Mem for accessing Base + Offset.
1208     /// The returned mem operand is a legal operand for accessing memory that is
1209     /// of type Ty.
1210     ///
1211     /// If [Base, #Offset] is encodable, then the method returns a Mem operand
1212     /// expressing it. Otherwise,
1213     ///
1214     /// if [TempBaseReg, #Offset-TempBaseOffset] is a valid memory operand, the
1215     /// method will return that. Otherwise,
1216     ///
1217     /// a new base register ip=Base+Offset is created, and the method returns a
1218     /// memory operand expressing [ip, #0].
1219     OperandARM32Mem *createMemOperand(Type Ty, Variable *Base, int32_t Offset,
1220                                       bool AllowOffsets = true);
1221     TargetARM32 *const Target;
1222     Variable *const StackOrFrameReg;
1223     Variable *TempBaseReg = nullptr;
1224     int32_t TempBaseOffset = 0;
1225   };
1226 
1227   const bool NeedSandboxing;
1228   TargetARM32Features CPUFeatures;
1229   bool UsesFramePointer = false;
1230   bool NeedsStackAlignment = false;
1231   bool MaybeLeafFunc = true;
1232   size_t SpillAreaSizeBytes = 0;
1233   size_t FixedAllocaSizeBytes = 0;
1234   size_t FixedAllocaAlignBytes = 0;
1235   bool PrologEmitsFixedAllocas = false;
1236   uint32_t MaxOutArgsSizeBytes = 0;
1237   // TODO(jpp): std::array instead of array.
1238   static SmallBitVector TypeToRegisterSet[RegARM32::RCARM32_NUM];
1239   static SmallBitVector TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];
1240   static SmallBitVector RegisterAliases[RegARM32::Reg_NUM];
1241   SmallBitVector RegsUsed;
1242   VarList PhysicalRegisters[IceType_NUM];
1243   VarList PreservedGPRs;
1244   VarList PreservedSRegs;
1245 
1246   /// Helper class that understands the Calling Convention and register
1247   /// assignments. The first few integer type parameters can use r0-r3,
1248   /// regardless of their position relative to the floating-point/vector
1249   /// arguments in the argument list. Floating-point and vector arguments
1250   /// can use q0-q3 (aka d0-d7, s0-s15). For more information on the topic,
1251   /// see the ARM Architecture Procedure Calling Standards (AAPCS).
1252   ///
1253   /// Technically, arguments that can start with registers but extend beyond the
1254   /// available registers can be split between the registers and the stack.
1255   /// However, this is typically  for passing GPR structs by value, and PNaCl
1256   /// transforms expand this out.
1257   ///
1258   /// At (public) function entry, the stack must be 8-byte aligned.
1259   class CallingConv {
1260     CallingConv(const CallingConv &) = delete;
1261     CallingConv &operator=(const CallingConv &) = delete;
1262 
1263   public:
1264     CallingConv();
1265     ~CallingConv() = default;
1266 
1267     /// argInGPR returns true if there is a GPR available for the requested
1268     /// type, and false otherwise. If it returns true, Reg is set to the
1269     /// appropriate register number. Note that, when Ty == IceType_i64, Reg will
1270     /// be an I64 register pair.
1271     bool argInGPR(Type Ty, RegNumT *Reg);
1272 
1273     /// argInVFP is to floating-point/vector types what argInGPR is for integer
1274     /// types.
1275     bool argInVFP(Type Ty, RegNumT *Reg);
1276 
1277   private:
1278     void discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> *Regs);
1279     SmallBitVector GPRegsUsed;
1280     CfgVector<RegNumT> GPRArgs;
1281     CfgVector<RegNumT> I64Args;
1282 
1283     void discardUnavailableVFPRegs(CfgVector<RegNumT> *Regs);
1284     SmallBitVector VFPRegsUsed;
1285     CfgVector<RegNumT> FP32Args;
1286     CfgVector<RegNumT> FP64Args;
1287     CfgVector<RegNumT> Vec128Args;
1288   };
1289 
1290 private:
1291   ENABLE_MAKE_UNIQUE;
1292 
1293   OperandARM32Mem *formAddressingMode(Type Ty, Cfg *Func, const Inst *LdSt,
1294                                       Operand *Base);
1295 
1296   void postambleCtpop64(const InstCall *Instr);
1297   void preambleDivRem(const InstCall *Instr);
1298   CfgUnorderedMap<Operand *, void (TargetARM32::*)(const InstCall *Instr)>
1299       ARM32HelpersPreamble;
1300   CfgUnorderedMap<Operand *, void (TargetARM32::*)(const InstCall *Instr)>
1301       ARM32HelpersPostamble;
1302 
1303   class ComputationTracker {
1304   public:
1305     ComputationTracker() = default;
1306     ~ComputationTracker() = default;
1307 
forgetProducers()1308     void forgetProducers() { KnownComputations.clear(); }
1309     void recordProducers(CfgNode *Node);
1310 
getProducerOf(const Operand * Opnd)1311     const Inst *getProducerOf(const Operand *Opnd) const {
1312       auto *Var = llvm::dyn_cast<Variable>(Opnd);
1313       if (Var == nullptr) {
1314         return nullptr;
1315       }
1316 
1317       auto Iter = KnownComputations.find(Var->getIndex());
1318       if (Iter == KnownComputations.end()) {
1319         return nullptr;
1320       }
1321 
1322       return Iter->second.Instr;
1323     }
1324 
dump(const Cfg * Func)1325     void dump(const Cfg *Func) const {
1326       if (!BuildDefs::dump() || !Func->isVerbose(IceV_Folding))
1327         return;
1328       OstreamLocker L(Func->getContext());
1329       Ostream &Str = Func->getContext()->getStrDump();
1330       Str << "foldable producer:\n";
1331       for (const auto &Computation : KnownComputations) {
1332         Str << "    ";
1333         Computation.second.Instr->dump(Func);
1334         Str << "\n";
1335       }
1336       Str << "\n";
1337     }
1338 
1339   private:
1340     class ComputationEntry {
1341     public:
ComputationEntry(Inst * I,Type Ty)1342       ComputationEntry(Inst *I, Type Ty) : Instr(I), ComputationType(Ty) {}
1343       Inst *const Instr;
1344       // Boolean folding is disabled for variables whose live range is multi
1345       // block. We conservatively initialize IsLiveOut to true, and set it to
1346       // false once we find the end of the live range for the variable defined
1347       // by this instruction. If liveness analysis is not performed (e.g., in
1348       // Om1 mode) IsLiveOut will never be set to false, and folding will be
1349       // disabled.
1350       bool IsLiveOut = true;
1351       int32_t NumUses = 0;
1352       Type ComputationType;
1353     };
1354 
1355     // ComputationMap maps a Variable number to a payload identifying which
1356     // instruction defined it.
1357     using ComputationMap = CfgUnorderedMap<SizeT, ComputationEntry>;
1358     ComputationMap KnownComputations;
1359   };
1360 
1361   ComputationTracker Computations;
1362 
1363   // AllowTemporaryWithNoReg indicates if TargetARM32::makeReg() can be invoked
1364   // without specifying a physical register. This is needed for creating unbound
1365   // temporaries during Ice -> ARM lowering, but before register allocation.
1366   // This a safe-guard that no unbound temporaries are created during the
1367   // legalization post-passes.
1368   bool AllowTemporaryWithNoReg = true;
1369   // ForbidTemporaryWithoutReg is a RAII class that manages
1370   // AllowTemporaryWithNoReg.
1371   class ForbidTemporaryWithoutReg {
1372     ForbidTemporaryWithoutReg() = delete;
1373     ForbidTemporaryWithoutReg(const ForbidTemporaryWithoutReg &) = delete;
1374     ForbidTemporaryWithoutReg &
1375     operator=(const ForbidTemporaryWithoutReg &) = delete;
1376 
1377   public:
ForbidTemporaryWithoutReg(TargetARM32 * Target)1378     explicit ForbidTemporaryWithoutReg(TargetARM32 *Target) : Target(Target) {
1379       Target->AllowTemporaryWithNoReg = false;
1380     }
~ForbidTemporaryWithoutReg()1381     ~ForbidTemporaryWithoutReg() { Target->AllowTemporaryWithNoReg = true; }
1382 
1383   private:
1384     TargetARM32 *const Target;
1385   };
1386 };
1387 
1388 class TargetDataARM32 final : public TargetDataLowering {
1389   TargetDataARM32() = delete;
1390   TargetDataARM32(const TargetDataARM32 &) = delete;
1391   TargetDataARM32 &operator=(const TargetDataARM32 &) = delete;
1392 
1393 public:
create(GlobalContext * Ctx)1394   static std::unique_ptr<TargetDataLowering> create(GlobalContext *Ctx) {
1395     return std::unique_ptr<TargetDataLowering>(new TargetDataARM32(Ctx));
1396   }
1397 
1398   void lowerGlobals(const VariableDeclarationList &Vars,
1399                     const std::string &SectionSuffix) override;
1400   void lowerConstants() override;
1401   void lowerJumpTables() override;
1402 
1403 protected:
1404   explicit TargetDataARM32(GlobalContext *Ctx);
1405 
1406 private:
1407   ~TargetDataARM32() override = default;
1408 };
1409 
1410 class TargetHeaderARM32 final : public TargetHeaderLowering {
1411   TargetHeaderARM32() = delete;
1412   TargetHeaderARM32(const TargetHeaderARM32 &) = delete;
1413   TargetHeaderARM32 &operator=(const TargetHeaderARM32 &) = delete;
1414 
1415 public:
create(GlobalContext * Ctx)1416   static std::unique_ptr<TargetHeaderLowering> create(GlobalContext *Ctx) {
1417     return std::unique_ptr<TargetHeaderLowering>(new TargetHeaderARM32(Ctx));
1418   }
1419 
1420   void lower() override;
1421 
1422 protected:
1423   explicit TargetHeaderARM32(GlobalContext *Ctx);
1424 
1425 private:
1426   ~TargetHeaderARM32() = default;
1427 
1428   TargetARM32Features CPUFeatures;
1429 };
1430 
1431 } // end of namespace ARM32
1432 } // end of namespace Ice
1433 
1434 #endif // SUBZERO_SRC_ICETARGETLOWERINGARM32_H
1435