1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2 //
3 //                        The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Implements the TargetLoweringARM32 class, which consists almost
12 /// entirely of the lowering sequence for each high-level instruction.
13 ///
14 //===----------------------------------------------------------------------===//
15 #include "IceTargetLoweringARM32.h"
16 
17 #include "IceCfg.h"
18 #include "IceCfgNode.h"
19 #include "IceClFlags.h"
20 #include "IceDefs.h"
21 #include "IceELFObjectWriter.h"
22 #include "IceGlobalInits.h"
23 #include "IceInstARM32.def"
24 #include "IceInstARM32.h"
25 #include "IceInstVarIter.h"
26 #include "IceLiveness.h"
27 #include "IceOperand.h"
28 #include "IcePhiLoweringImpl.h"
29 #include "IceRegistersARM32.h"
30 #include "IceTargetLoweringARM32.def"
31 #include "IceUtils.h"
32 #include "llvm/Support/MathExtras.h"
33 
34 #include <algorithm>
35 #include <array>
36 #include <utility>
37 
38 namespace ARM32 {
createTargetLowering(::Ice::Cfg * Func)39 std::unique_ptr<::Ice::TargetLowering> createTargetLowering(::Ice::Cfg *Func) {
40   return ::Ice::ARM32::TargetARM32::create(Func);
41 }
42 
43 std::unique_ptr<::Ice::TargetDataLowering>
createTargetDataLowering(::Ice::GlobalContext * Ctx)44 createTargetDataLowering(::Ice::GlobalContext *Ctx) {
45   return ::Ice::ARM32::TargetDataARM32::create(Ctx);
46 }
47 
48 std::unique_ptr<::Ice::TargetHeaderLowering>
createTargetHeaderLowering(::Ice::GlobalContext * Ctx)49 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) {
50   return ::Ice::ARM32::TargetHeaderARM32::create(Ctx);
51 }
52 
staticInit(::Ice::GlobalContext * Ctx)53 void staticInit(::Ice::GlobalContext *Ctx) {
54   ::Ice::ARM32::TargetARM32::staticInit(Ctx);
55   if (Ice::getFlags().getUseNonsfi()) {
56     // In nonsfi, we need to reference the _GLOBAL_OFFSET_TABLE_ for accessing
57     // globals. The GOT is an external symbol (i.e., it is not defined in the
58     // pexe) so we need to register it as such so that ELF emission won't barf
59     // on an "unknown" symbol. The GOT is added to the External symbols list
60     // here because staticInit() is invoked in a single-thread context.
61     Ctx->getConstantExternSym(Ctx->getGlobalString(::Ice::GlobalOffsetTable));
62   }
63 }
64 
shouldBePooled(const::Ice::Constant * C)65 bool shouldBePooled(const ::Ice::Constant *C) {
66   return ::Ice::ARM32::TargetARM32::shouldBePooled(C);
67 }
68 
getPointerType()69 ::Ice::Type getPointerType() {
70   return ::Ice::ARM32::TargetARM32::getPointerType();
71 }
72 
73 } // end of namespace ARM32
74 
75 namespace Ice {
76 namespace ARM32 {
77 
78 namespace {
79 
80 /// SizeOf is used to obtain the size of an initializer list as a constexpr
81 /// expression. This is only needed until our C++ library is updated to
82 /// C++ 14 -- which defines constexpr members to std::initializer_list.
83 class SizeOf {
84   SizeOf(const SizeOf &) = delete;
85   SizeOf &operator=(const SizeOf &) = delete;
86 
87 public:
SizeOf()88   constexpr SizeOf() : Size(0) {}
89   template <typename... T>
SizeOf(T...)90   explicit constexpr SizeOf(T...) : Size(__length<T...>::value) {}
size() const91   constexpr SizeT size() const { return Size; }
92 
93 private:
94   template <typename T, typename... U> struct __length {
95     static constexpr std::size_t value = 1 + __length<U...>::value;
96   };
97 
98   template <typename T> struct __length<T> {
99     static constexpr std::size_t value = 1;
100   };
101 
102   const std::size_t Size;
103 };
104 
105 } // end of anonymous namespace
106 
107 // Defines the RegARM32::Table table with register information.
108 RegARM32::RegTableType RegARM32::RegTable[RegARM32::Reg_NUM] = {
109 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \
110           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \
111   {name,      encode,                                                          \
112    cc_arg,    scratch,                                                         \
113    preserved, stackptr,                                                        \
114    frameptr,  isGPR,                                                           \
115    isInt,     isI64Pair,                                                       \
116    isFP32,    isFP64,                                                          \
117    isVec128,  (SizeOf alias_init).size(),                                      \
118    alias_init},
119     REGARM32_TABLE
120 #undef X
121 };
122 
123 namespace {
124 
125 // The following table summarizes the logic for lowering the icmp instruction
126 // for i32 and narrower types. Each icmp condition has a clear mapping to an
127 // ARM32 conditional move instruction.
128 
129 const struct TableIcmp32_ {
130   CondARM32::Cond Mapping;
131 } TableIcmp32[] = {
132 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V)    \
133   {CondARM32::C_32},
134     ICMPARM32_TABLE
135 #undef X
136 };
137 
138 // The following table summarizes the logic for lowering the icmp instruction
139 // for the i64 type. Two conditional moves are needed for setting to 1 or 0.
140 // The operands may need to be swapped, and there is a slight difference for
141 // signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc).
142 const struct TableIcmp64_ {
143   bool IsSigned;
144   bool Swapped;
145   CondARM32::Cond C1, C2;
146 } TableIcmp64[] = {
147 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V)    \
148   {is_signed, swapped64, CondARM32::C1_64, CondARM32::C2_64},
149     ICMPARM32_TABLE
150 #undef X
151 };
152 
getIcmp32Mapping(InstIcmp::ICond Cond)153 CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) {
154   assert(Cond < llvm::array_lengthof(TableIcmp32));
155   return TableIcmp32[Cond].Mapping;
156 }
157 
158 // In some cases, there are x-macros tables for both high-level and low-level
159 // instructions/operands that use the same enum key value. The tables are kept
160 // separate to maintain a proper separation between abstraction layers. There
161 // is a risk that the tables could get out of sync if enum values are reordered
162 // or if entries are added or deleted. The following anonymous namespaces use
163 // static_asserts to ensure everything is kept in sync.
164 
165 // Validate the enum values in ICMPARM32_TABLE.
166 namespace {
167 // Define a temporary set of enum values based on low-level table entries.
168 enum _icmp_ll_enum {
169 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V)    \
170   _icmp_ll_##val,
171   ICMPARM32_TABLE
172 #undef X
173       _num
174 };
175 // Define a set of constants based on high-level table entries.
176 #define X(tag, reverse, str)                                                   \
177   static constexpr int _icmp_hl_##tag = InstIcmp::tag;
178 ICEINSTICMP_TABLE
179 #undef X
180 // Define a set of constants based on low-level table entries, and ensure the
181 // table entry keys are consistent.
182 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V)    \
183   static_assert(                                                               \
184       _icmp_ll_##val == _icmp_hl_##val,                                        \
185       "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE: " #val);
186 ICMPARM32_TABLE
187 #undef X
188 // Repeat the static asserts with respect to the high-level table entries in
189 // case the high-level table has extra entries.
190 #define X(tag, reverse, str)                                                   \
191   static_assert(                                                               \
192       _icmp_hl_##tag == _icmp_ll_##tag,                                        \
193       "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE: " #tag);
194 ICEINSTICMP_TABLE
195 #undef X
196 } // end of anonymous namespace
197 
198 // Stack alignment
199 const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16;
200 
201 // Value is in bytes. Return Value adjusted to the next highest multiple of the
202 // stack alignment.
applyStackAlignment(uint32_t Value)203 uint32_t applyStackAlignment(uint32_t Value) {
204   return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES);
205 }
206 
207 // Value is in bytes. Return Value adjusted to the next highest multiple of the
208 // stack alignment required for the given type.
applyStackAlignmentTy(uint32_t Value,Type Ty)209 uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
210   // Use natural alignment, except that normally (non-NaCl) ARM only aligns
211   // vectors to 8 bytes.
212   // TODO(jvoung): Check this ...
213   size_t typeAlignInBytes = typeWidthInBytes(Ty);
214   if (isVectorType(Ty))
215     typeAlignInBytes = 8;
216   return Utils::applyAlignment(Value, typeAlignInBytes);
217 }
218 
219 // Conservatively check if at compile time we know that the operand is
220 // definitely a non-zero integer.
isGuaranteedNonzeroInt(const Operand * Op)221 bool isGuaranteedNonzeroInt(const Operand *Op) {
222   if (auto *Const = llvm::dyn_cast_or_null<ConstantInteger32>(Op)) {
223     return Const->getValue() != 0;
224   }
225   return false;
226 }
227 
228 } // end of anonymous namespace
229 
TargetARM32Features(const ClFlags & Flags)230 TargetARM32Features::TargetARM32Features(const ClFlags &Flags) {
231   static_assert(
232       (ARM32InstructionSet::End - ARM32InstructionSet::Begin) ==
233           (TargetInstructionSet::ARM32InstructionSet_End -
234            TargetInstructionSet::ARM32InstructionSet_Begin),
235       "ARM32InstructionSet range different from TargetInstructionSet");
236   if (Flags.getTargetInstructionSet() !=
237       TargetInstructionSet::BaseInstructionSet) {
238     InstructionSet = static_cast<ARM32InstructionSet>(
239         (Flags.getTargetInstructionSet() -
240          TargetInstructionSet::ARM32InstructionSet_Begin) +
241         ARM32InstructionSet::Begin);
242   }
243 }
244 
245 namespace {
246 constexpr SizeT NumGPRArgs =
247 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \
248           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \
249   +(((cc_arg) > 0) ? 1 : 0)
250     REGARM32_GPR_TABLE
251 #undef X
252     ;
253 std::array<RegNumT, NumGPRArgs> GPRArgInitializer;
254 
255 constexpr SizeT NumI64Args =
256 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \
257           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \
258   +(((cc_arg) > 0) ? 1 : 0)
259     REGARM32_I64PAIR_TABLE
260 #undef X
261     ;
262 std::array<RegNumT, NumI64Args> I64ArgInitializer;
263 
264 constexpr SizeT NumFP32Args =
265 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \
266           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \
267   +(((cc_arg) > 0) ? 1 : 0)
268     REGARM32_FP32_TABLE
269 #undef X
270     ;
271 std::array<RegNumT, NumFP32Args> FP32ArgInitializer;
272 
273 constexpr SizeT NumFP64Args =
274 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \
275           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \
276   +(((cc_arg) > 0) ? 1 : 0)
277     REGARM32_FP64_TABLE
278 #undef X
279     ;
280 std::array<RegNumT, NumFP64Args> FP64ArgInitializer;
281 
282 constexpr SizeT NumVec128Args =
283 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \
284           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \
285   +(((cc_arg > 0)) ? 1 : 0)
286     REGARM32_VEC128_TABLE
287 #undef X
288     ;
289 std::array<RegNumT, NumVec128Args> Vec128ArgInitializer;
290 
getRegClassName(RegClass C)291 const char *getRegClassName(RegClass C) {
292   auto ClassNum = static_cast<RegARM32::RegClassARM32>(C);
293   assert(ClassNum < RegARM32::RCARM32_NUM);
294   switch (ClassNum) {
295   default:
296     assert(C < RC_Target);
297     return regClassString(C);
298   // Add handling of new register classes below.
299   case RegARM32::RCARM32_QtoS:
300     return "QtoS";
301   }
302 }
303 
304 } // end of anonymous namespace
305 
TargetARM32(Cfg * Func)306 TargetARM32::TargetARM32(Cfg *Func)
307     : TargetLowering(Func), NeedSandboxing(SandboxingType == ST_NaCl),
308       CPUFeatures(getFlags()) {}
309 
staticInit(GlobalContext * Ctx)310 void TargetARM32::staticInit(GlobalContext *Ctx) {
311   RegNumT::setLimit(RegARM32::Reg_NUM);
312   // Limit this size (or do all bitsets need to be the same width)???
313   SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);
314   SmallBitVector I64PairRegisters(RegARM32::Reg_NUM);
315   SmallBitVector Float32Registers(RegARM32::Reg_NUM);
316   SmallBitVector Float64Registers(RegARM32::Reg_NUM);
317   SmallBitVector VectorRegisters(RegARM32::Reg_NUM);
318   SmallBitVector QtoSRegisters(RegARM32::Reg_NUM);
319   SmallBitVector InvalidRegisters(RegARM32::Reg_NUM);
320   const unsigned EncodedReg_q8 = RegARM32::RegTable[RegARM32::Reg_q8].Encoding;
321   for (int i = 0; i < RegARM32::Reg_NUM; ++i) {
322     const auto &Entry = RegARM32::RegTable[i];
323     IntegerRegisters[i] = Entry.IsInt;
324     I64PairRegisters[i] = Entry.IsI64Pair;
325     Float32Registers[i] = Entry.IsFP32;
326     Float64Registers[i] = Entry.IsFP64;
327     VectorRegisters[i] = Entry.IsVec128;
328     RegisterAliases[i].resize(RegARM32::Reg_NUM);
329     // TODO(eholk): It would be better to store a QtoS flag in the
330     // IceRegistersARM32 table than to compare their encodings here.
331     QtoSRegisters[i] = Entry.IsVec128 && Entry.Encoding < EncodedReg_q8;
332     for (int j = 0; j < Entry.NumAliases; ++j) {
333       assert(i == j || !RegisterAliases[i][Entry.Aliases[j]]);
334       RegisterAliases[i].set(Entry.Aliases[j]);
335     }
336     assert(RegisterAliases[i][i]);
337     if (Entry.CCArg <= 0) {
338       continue;
339     }
340     const auto RegNum = RegNumT::fromInt(i);
341     if (Entry.IsGPR) {
342       GPRArgInitializer[Entry.CCArg - 1] = RegNum;
343     } else if (Entry.IsI64Pair) {
344       I64ArgInitializer[Entry.CCArg - 1] = RegNum;
345     } else if (Entry.IsFP32) {
346       FP32ArgInitializer[Entry.CCArg - 1] = RegNum;
347     } else if (Entry.IsFP64) {
348       FP64ArgInitializer[Entry.CCArg - 1] = RegNum;
349     } else if (Entry.IsVec128) {
350       Vec128ArgInitializer[Entry.CCArg - 1] = RegNum;
351     }
352   }
353   TypeToRegisterSet[IceType_void] = InvalidRegisters;
354   TypeToRegisterSet[IceType_i1] = IntegerRegisters;
355   TypeToRegisterSet[IceType_i8] = IntegerRegisters;
356   TypeToRegisterSet[IceType_i16] = IntegerRegisters;
357   TypeToRegisterSet[IceType_i32] = IntegerRegisters;
358   TypeToRegisterSet[IceType_i64] = I64PairRegisters;
359   TypeToRegisterSet[IceType_f32] = Float32Registers;
360   TypeToRegisterSet[IceType_f64] = Float64Registers;
361   TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
362   TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
363   TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
364   TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
365   TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
366   TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
367   TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
368   TypeToRegisterSet[RegARM32::RCARM32_QtoS] = QtoSRegisters;
369 
370   for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i)
371     TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
372 
373   filterTypeToRegisterSet(
374       Ctx, RegARM32::Reg_NUM, TypeToRegisterSet,
375       llvm::array_lengthof(TypeToRegisterSet),
376       [](RegNumT RegNum) -> std::string {
377         // This function simply removes ", " from the
378         // register name.
379         std::string Name = RegARM32::getRegName(RegNum);
380         constexpr const char RegSeparator[] = ", ";
381         constexpr size_t RegSeparatorWidth =
382             llvm::array_lengthof(RegSeparator) - 1;
383         for (size_t Pos = Name.find(RegSeparator); Pos != std::string::npos;
384              Pos = Name.find(RegSeparator)) {
385           Name.replace(Pos, RegSeparatorWidth, "");
386         }
387         return Name;
388       },
389       getRegClassName);
390 }
391 
392 namespace {
copyRegAllocFromInfWeightVariable64On32(const VarList & Vars)393 void copyRegAllocFromInfWeightVariable64On32(const VarList &Vars) {
394   for (Variable *Var : Vars) {
395     auto *Var64 = llvm::dyn_cast<Variable64On32>(Var);
396     if (!Var64) {
397       // This is not the variable we are looking for.
398       continue;
399     }
400     // only allow infinite-weight i64 temporaries to be register allocated.
401     assert(!Var64->hasReg() || Var64->mustHaveReg());
402     if (!Var64->hasReg()) {
403       continue;
404     }
405     const auto FirstReg =
406         RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(Var->getRegNum()));
407     // This assumes little endian.
408     Variable *Lo = Var64->getLo();
409     Variable *Hi = Var64->getHi();
410     assert(Lo->hasReg() == Hi->hasReg());
411     if (Lo->hasReg()) {
412       continue;
413     }
414     Lo->setRegNum(FirstReg);
415     Lo->setMustHaveReg();
416     Hi->setRegNum(RegNumT::fixme(FirstReg + 1));
417     Hi->setMustHaveReg();
418   }
419 }
420 } // end of anonymous namespace
421 
getCallStackArgumentsSizeBytes(const InstCall * Call)422 uint32_t TargetARM32::getCallStackArgumentsSizeBytes(const InstCall *Call) {
423   TargetARM32::CallingConv CC;
424   RegNumT DummyReg;
425   size_t OutArgsSizeBytes = 0;
426   for (SizeT i = 0, NumArgs = Call->getNumArgs(); i < NumArgs; ++i) {
427     Operand *Arg = legalizeUndef(Call->getArg(i));
428     const Type Ty = Arg->getType();
429     if (isScalarIntegerType(Ty)) {
430       if (CC.argInGPR(Ty, &DummyReg)) {
431         continue;
432       }
433     } else {
434       if (CC.argInVFP(Ty, &DummyReg)) {
435         continue;
436       }
437     }
438 
439     OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, Ty);
440     OutArgsSizeBytes += typeWidthInBytesOnStack(Ty);
441   }
442 
443   return applyStackAlignment(OutArgsSizeBytes);
444 }
445 
genTargetHelperCallFor(Inst * Instr)446 void TargetARM32::genTargetHelperCallFor(Inst *Instr) {
447   constexpr bool NoTailCall = false;
448   constexpr bool IsTargetHelperCall = true;
449 
450   switch (Instr->getKind()) {
451   default:
452     return;
453   case Inst::Arithmetic: {
454     Variable *Dest = Instr->getDest();
455     const Type DestTy = Dest->getType();
456     const InstArithmetic::OpKind Op =
457         llvm::cast<InstArithmetic>(Instr)->getOp();
458     if (isVectorType(DestTy)) {
459       switch (Op) {
460       default:
461         break;
462       case InstArithmetic::Fdiv:
463       case InstArithmetic::Frem:
464       case InstArithmetic::Sdiv:
465       case InstArithmetic::Srem:
466       case InstArithmetic::Udiv:
467       case InstArithmetic::Urem:
468         scalarizeArithmetic(Op, Dest, Instr->getSrc(0), Instr->getSrc(1));
469         Instr->setDeleted();
470         return;
471       }
472     }
473     switch (DestTy) {
474     default:
475       return;
476     case IceType_i64: {
477       // Technically, ARM has its own aeabi routines, but we can use the
478       // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses
479       // the more standard __moddi3 for rem.
480       RuntimeHelper HelperID = RuntimeHelper::H_Num;
481       switch (Op) {
482       default:
483         return;
484       case InstArithmetic::Udiv:
485         HelperID = RuntimeHelper::H_udiv_i64;
486         break;
487       case InstArithmetic::Sdiv:
488         HelperID = RuntimeHelper::H_sdiv_i64;
489         break;
490       case InstArithmetic::Urem:
491         HelperID = RuntimeHelper::H_urem_i64;
492         break;
493       case InstArithmetic::Srem:
494         HelperID = RuntimeHelper::H_srem_i64;
495         break;
496       }
497       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(HelperID);
498       ARM32HelpersPreamble[TargetHelper] = &TargetARM32::preambleDivRem;
499       constexpr SizeT MaxArgs = 2;
500       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
501                                             NoTailCall, IsTargetHelperCall);
502       Call->addArg(Instr->getSrc(0));
503       Call->addArg(Instr->getSrc(1));
504       Instr->setDeleted();
505       return;
506     }
507     case IceType_i32:
508     case IceType_i16:
509     case IceType_i8: {
510       const bool HasHWDiv = hasCPUFeature(TargetARM32Features::HWDivArm);
511       InstCast::OpKind CastKind;
512       RuntimeHelper HelperID = RuntimeHelper::H_Num;
513       switch (Op) {
514       default:
515         return;
516       case InstArithmetic::Udiv:
517         HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_udiv_i32;
518         CastKind = InstCast::Zext;
519         break;
520       case InstArithmetic::Sdiv:
521         HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_sdiv_i32;
522         CastKind = InstCast::Sext;
523         break;
524       case InstArithmetic::Urem:
525         HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_urem_i32;
526         CastKind = InstCast::Zext;
527         break;
528       case InstArithmetic::Srem:
529         HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_srem_i32;
530         CastKind = InstCast::Sext;
531         break;
532       }
533       if (HelperID == RuntimeHelper::H_Num) {
534         // HelperID should only ever be undefined when the processor does not
535         // have a hardware divider. If any other helpers are ever introduced,
536         // the following assert will have to be modified.
537         assert(HasHWDiv);
538         return;
539       }
540       Operand *Src0 = Instr->getSrc(0);
541       Operand *Src1 = Instr->getSrc(1);
542       if (DestTy != IceType_i32) {
543         // Src0 and Src1 have to be zero-, or signed-extended to i32. For Src0,
544         // we just insert a InstCast right before the call to the helper.
545         Variable *Src0_32 = Func->makeVariable(IceType_i32);
546         Context.insert<InstCast>(CastKind, Src0_32, Src0);
547         Src0 = Src0_32;
548 
549         // For extending Src1, we will just insert an InstCast if Src1 is not a
550         // Constant. If it is, then we extend it here, and not during program
551         // runtime. This allows preambleDivRem to optimize-out the div-by-0
552         // check.
553         if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
554           const int32_t ShAmt = (DestTy == IceType_i16) ? 16 : 24;
555           int32_t NewC = C->getValue();
556           if (CastKind == InstCast::Zext) {
557             NewC &= ~(0x80000000l >> ShAmt);
558           } else {
559             NewC = (NewC << ShAmt) >> ShAmt;
560           }
561           Src1 = Ctx->getConstantInt32(NewC);
562         } else {
563           Variable *Src1_32 = Func->makeVariable(IceType_i32);
564           Context.insert<InstCast>(CastKind, Src1_32, Src1);
565           Src1 = Src1_32;
566         }
567       }
568       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(HelperID);
569       ARM32HelpersPreamble[TargetHelper] = &TargetARM32::preambleDivRem;
570       constexpr SizeT MaxArgs = 2;
571       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
572                                             NoTailCall, IsTargetHelperCall);
573       assert(Src0->getType() == IceType_i32);
574       Call->addArg(Src0);
575       assert(Src1->getType() == IceType_i32);
576       Call->addArg(Src1);
577       Instr->setDeleted();
578       return;
579     }
580     case IceType_f64:
581     case IceType_f32: {
582       if (Op != InstArithmetic::Frem) {
583         return;
584       }
585       constexpr SizeT MaxArgs = 2;
586       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(
587           DestTy == IceType_f32 ? RuntimeHelper::H_frem_f32
588                                 : RuntimeHelper::H_frem_f64);
589       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
590                                             NoTailCall, IsTargetHelperCall);
591       Call->addArg(Instr->getSrc(0));
592       Call->addArg(Instr->getSrc(1));
593       Instr->setDeleted();
594       return;
595     }
596     }
597     llvm::report_fatal_error("Control flow should never have reached here.");
598   }
599   case Inst::Cast: {
600     Variable *Dest = Instr->getDest();
601     Operand *Src0 = Instr->getSrc(0);
602     const Type DestTy = Dest->getType();
603     const Type SrcTy = Src0->getType();
604     auto *CastInstr = llvm::cast<InstCast>(Instr);
605     const InstCast::OpKind CastKind = CastInstr->getCastKind();
606 
607     switch (CastKind) {
608     default:
609       return;
610     case InstCast::Fptosi:
611     case InstCast::Fptoui: {
612       if (DestTy != IceType_i64) {
613         return;
614       }
615       const bool DestIsSigned = CastKind == InstCast::Fptosi;
616       const bool Src0IsF32 = isFloat32Asserting32Or64(SrcTy);
617       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(
618           Src0IsF32 ? (DestIsSigned ? RuntimeHelper::H_fptosi_f32_i64
619                                     : RuntimeHelper::H_fptoui_f32_i64)
620                     : (DestIsSigned ? RuntimeHelper::H_fptosi_f64_i64
621                                     : RuntimeHelper::H_fptoui_f64_i64));
622       static constexpr SizeT MaxArgs = 1;
623       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
624                                             NoTailCall, IsTargetHelperCall);
625       Call->addArg(Src0);
626       Instr->setDeleted();
627       return;
628     }
629     case InstCast::Sitofp:
630     case InstCast::Uitofp: {
631       if (SrcTy != IceType_i64) {
632         return;
633       }
634       const bool SourceIsSigned = CastKind == InstCast::Sitofp;
635       const bool DestIsF32 = isFloat32Asserting32Or64(Dest->getType());
636       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(
637           DestIsF32 ? (SourceIsSigned ? RuntimeHelper::H_sitofp_i64_f32
638                                       : RuntimeHelper::H_uitofp_i64_f32)
639                     : (SourceIsSigned ? RuntimeHelper::H_sitofp_i64_f64
640                                       : RuntimeHelper::H_uitofp_i64_f64));
641       static constexpr SizeT MaxArgs = 1;
642       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
643                                             NoTailCall, IsTargetHelperCall);
644       Call->addArg(Src0);
645       Instr->setDeleted();
646       return;
647     }
648     case InstCast::Bitcast: {
649       if (DestTy == SrcTy) {
650         return;
651       }
652       Variable *CallDest = Dest;
653       RuntimeHelper HelperID = RuntimeHelper::H_Num;
654       switch (DestTy) {
655       default:
656         return;
657       case IceType_i8:
658         assert(SrcTy == IceType_v8i1);
659         HelperID = RuntimeHelper::H_bitcast_8xi1_i8;
660         CallDest = Func->makeVariable(IceType_i32);
661         break;
662       case IceType_i16:
663         assert(SrcTy == IceType_v16i1);
664         HelperID = RuntimeHelper::H_bitcast_16xi1_i16;
665         CallDest = Func->makeVariable(IceType_i32);
666         break;
667       case IceType_v8i1: {
668         assert(SrcTy == IceType_i8);
669         HelperID = RuntimeHelper::H_bitcast_i8_8xi1;
670         Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
671         // Arguments to functions are required to be at least 32 bits wide.
672         Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
673         Src0 = Src0AsI32;
674       } break;
675       case IceType_v16i1: {
676         assert(SrcTy == IceType_i16);
677         HelperID = RuntimeHelper::H_bitcast_i16_16xi1;
678         Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
679         // Arguments to functions are required to be at least 32 bits wide.
680         Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
681         Src0 = Src0AsI32;
682       } break;
683       }
684       constexpr SizeT MaxSrcs = 1;
685       InstCall *Call = makeHelperCall(HelperID, CallDest, MaxSrcs);
686       Call->addArg(Src0);
687       Context.insert(Call);
688       // The PNaCl ABI disallows i8/i16 return types, so truncate the helper
689       // call result to the appropriate type as necessary.
690       if (CallDest->getType() != Dest->getType())
691         Context.insert<InstCast>(InstCast::Trunc, Dest, CallDest);
692       Instr->setDeleted();
693       return;
694     }
695     case InstCast::Trunc: {
696       if (DestTy == SrcTy) {
697         return;
698       }
699       if (!isVectorType(SrcTy)) {
700         return;
701       }
702       assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
703       assert(typeElementType(DestTy) == IceType_i1);
704       assert(isVectorIntegerType(SrcTy));
705       return;
706     }
707     case InstCast::Sext:
708     case InstCast::Zext: {
709       if (DestTy == SrcTy) {
710         return;
711       }
712       if (!isVectorType(DestTy)) {
713         return;
714       }
715       assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
716       assert(typeElementType(SrcTy) == IceType_i1);
717       assert(isVectorIntegerType(DestTy));
718       return;
719     }
720     }
721     llvm::report_fatal_error("Control flow should never have reached here.");
722   }
723   case Inst::IntrinsicCall: {
724     Variable *Dest = Instr->getDest();
725     auto *IntrinsicCall = llvm::cast<InstIntrinsicCall>(Instr);
726     Intrinsics::IntrinsicID ID = IntrinsicCall->getIntrinsicInfo().ID;
727     switch (ID) {
728     default:
729       return;
730     case Intrinsics::Ctpop: {
731       Operand *Src0 = IntrinsicCall->getArg(0);
732       Operand *TargetHelper =
733           Ctx->getRuntimeHelperFunc(isInt32Asserting32Or64(Src0->getType())
734                                         ? RuntimeHelper::H_call_ctpop_i32
735                                         : RuntimeHelper::H_call_ctpop_i64);
736       static constexpr SizeT MaxArgs = 1;
737       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
738                                             NoTailCall, IsTargetHelperCall);
739       Call->addArg(Src0);
740       Instr->setDeleted();
741       if (Src0->getType() == IceType_i64) {
742         ARM32HelpersPostamble[TargetHelper] = &TargetARM32::postambleCtpop64;
743       }
744       return;
745     }
746     case Intrinsics::Longjmp: {
747       static constexpr SizeT MaxArgs = 2;
748       static constexpr Variable *NoDest = nullptr;
749       Operand *TargetHelper =
750           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_longjmp);
751       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
752                                             NoTailCall, IsTargetHelperCall);
753       Call->addArg(IntrinsicCall->getArg(0));
754       Call->addArg(IntrinsicCall->getArg(1));
755       Instr->setDeleted();
756       return;
757     }
758     case Intrinsics::Memcpy: {
759       // In the future, we could potentially emit an inline memcpy/memset, etc.
760       // for intrinsic calls w/ a known length.
761       static constexpr SizeT MaxArgs = 3;
762       static constexpr Variable *NoDest = nullptr;
763       Operand *TargetHelper =
764           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memcpy);
765       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
766                                             NoTailCall, IsTargetHelperCall);
767       Call->addArg(IntrinsicCall->getArg(0));
768       Call->addArg(IntrinsicCall->getArg(1));
769       Call->addArg(IntrinsicCall->getArg(2));
770       Instr->setDeleted();
771       return;
772     }
773     case Intrinsics::Memmove: {
774       static constexpr SizeT MaxArgs = 3;
775       static constexpr Variable *NoDest = nullptr;
776       Operand *TargetHelper =
777           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memmove);
778       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
779                                             NoTailCall, IsTargetHelperCall);
780       Call->addArg(IntrinsicCall->getArg(0));
781       Call->addArg(IntrinsicCall->getArg(1));
782       Call->addArg(IntrinsicCall->getArg(2));
783       Instr->setDeleted();
784       return;
785     }
786     case Intrinsics::Memset: {
787       // The value operand needs to be extended to a stack slot size because the
788       // PNaCl ABI requires arguments to be at least 32 bits wide.
789       Operand *ValOp = IntrinsicCall->getArg(1);
790       assert(ValOp->getType() == IceType_i8);
791       Variable *ValExt = Func->makeVariable(stackSlotType());
792       Context.insert<InstCast>(InstCast::Zext, ValExt, ValOp);
793 
794       // Technically, ARM has its own __aeabi_memset, but we can use plain
795       // memset too. The value and size argument need to be flipped if we ever
796       // decide to use __aeabi_memset.
797       static constexpr SizeT MaxArgs = 3;
798       static constexpr Variable *NoDest = nullptr;
799       Operand *TargetHelper =
800           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memset);
801       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
802                                             NoTailCall, IsTargetHelperCall);
803       Call->addArg(IntrinsicCall->getArg(0));
804       Call->addArg(ValExt);
805       Call->addArg(IntrinsicCall->getArg(2));
806       Instr->setDeleted();
807       return;
808     }
809     case Intrinsics::NaClReadTP: {
810       if (SandboxingType == ST_NaCl) {
811         return;
812       }
813       static constexpr SizeT MaxArgs = 0;
814       Operand *TargetHelper =
815           SandboxingType == ST_Nonsfi
816               ? Ctx->getConstantExternSym(
817                     Ctx->getGlobalString("__aeabi_read_tp"))
818               : Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_read_tp);
819       Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, NoTailCall,
820                                IsTargetHelperCall);
821       Instr->setDeleted();
822       return;
823     }
824     case Intrinsics::Setjmp: {
825       static constexpr SizeT MaxArgs = 1;
826       Operand *TargetHelper =
827           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_setjmp);
828       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
829                                             NoTailCall, IsTargetHelperCall);
830       Call->addArg(IntrinsicCall->getArg(0));
831       Instr->setDeleted();
832       return;
833     }
834     }
835     llvm::report_fatal_error("Control flow should never have reached here.");
836   }
837   }
838 }
839 
findMaxStackOutArgsSize()840 void TargetARM32::findMaxStackOutArgsSize() {
841   // MinNeededOutArgsBytes should be updated if the Target ever creates a
842   // high-level InstCall that requires more stack bytes.
843   constexpr size_t MinNeededOutArgsBytes = 0;
844   MaxOutArgsSizeBytes = MinNeededOutArgsBytes;
845   for (CfgNode *Node : Func->getNodes()) {
846     Context.init(Node);
847     while (!Context.atEnd()) {
848       PostIncrLoweringContext PostIncrement(Context);
849       Inst *CurInstr = iteratorToInst(Context.getCur());
850       if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) {
851         SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call);
852         MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes);
853       }
854     }
855   }
856 }
857 
createGotPtr()858 void TargetARM32::createGotPtr() {
859   if (SandboxingType != ST_Nonsfi) {
860     return;
861   }
862   GotPtr = Func->makeVariable(IceType_i32);
863 }
864 
insertGotPtrInitPlaceholder()865 void TargetARM32::insertGotPtrInitPlaceholder() {
866   if (SandboxingType != ST_Nonsfi) {
867     return;
868   }
869   assert(GotPtr != nullptr);
870   // We add the two placeholder instructions here. The first fakedefs T, an
871   // infinite-weight temporary, while the second fakedefs the GotPtr "using" T.
872   // This is needed because the GotPtr initialization, if needed, will require
873   // a register:
874   //
875   //   movw     reg, _GLOBAL_OFFSET_TABLE_ - 16 - .
876   //   movt     reg, _GLOBAL_OFFSET_TABLE_ - 12 - .
877   //   add      reg, pc, reg
878   //   mov      GotPtr, reg
879   //
880   // If GotPtr is not used, then both these pseudo-instructions are dce'd.
881   Variable *T = makeReg(IceType_i32);
882   Context.insert<InstFakeDef>(T);
883   Context.insert<InstFakeDef>(GotPtr, T);
884 }
885 
886 GlobalString
createGotoffRelocation(const ConstantRelocatable * CR)887 TargetARM32::createGotoffRelocation(const ConstantRelocatable *CR) {
888   GlobalString CRName = CR->getName();
889   GlobalString CRGotoffName =
890       Ctx->getGlobalString("GOTOFF$" + Func->getFunctionName() + "$" + CRName);
891   if (KnownGotoffs.count(CRGotoffName) == 0) {
892     constexpr bool SuppressMangling = true;
893     auto *Global =
894         VariableDeclaration::create(Func->getGlobalPool(), SuppressMangling);
895     Global->setIsConstant(true);
896     Global->setName(CRName);
897     Func->getGlobalPool()->willNotBeEmitted(Global);
898 
899     auto *Gotoff =
900         VariableDeclaration::create(Func->getGlobalPool(), SuppressMangling);
901     constexpr auto GotFixup = R_ARM_GOTOFF32;
902     Gotoff->setIsConstant(true);
903     Gotoff->addInitializer(VariableDeclaration::RelocInitializer::create(
904         Func->getGlobalPool(), Global, {RelocOffset::create(Ctx, 0)},
905         GotFixup));
906     Gotoff->setName(CRGotoffName);
907     Func->addGlobal(Gotoff);
908     KnownGotoffs.emplace(CRGotoffName);
909   }
910   return CRGotoffName;
911 }
912 
materializeGotAddr(CfgNode * Node)913 void TargetARM32::materializeGotAddr(CfgNode *Node) {
914   if (SandboxingType != ST_Nonsfi) {
915     return;
916   }
917 
918   // At first, we try to find the
919   //    GotPtr = def T
920   // pseudo-instruction that we placed for defining the got ptr. That
921   // instruction is not just a place-holder for defining the GotPtr (thus
922   // keeping liveness consistent), but it is also located at a point where it is
923   // safe to materialize the got addr -- i.e., before loading parameters to
924   // registers, but after moving register parameters from their home location.
925   InstFakeDef *DefGotPtr = nullptr;
926   for (auto &Inst : Node->getInsts()) {
927     auto *FakeDef = llvm::dyn_cast<InstFakeDef>(&Inst);
928     if (FakeDef != nullptr && FakeDef->getDest() == GotPtr) {
929       DefGotPtr = FakeDef;
930       break;
931     }
932   }
933 
934   if (DefGotPtr == nullptr || DefGotPtr->isDeleted()) {
935     return;
936   }
937 
938   // The got addr needs to be materialized at the same point where DefGotPtr
939   // lives.
940   Context.setInsertPoint(instToIterator(DefGotPtr));
941   assert(DefGotPtr->getSrcSize() == 1);
942   auto *T = llvm::cast<Variable>(DefGotPtr->getSrc(0));
943   loadNamedConstantRelocatablePIC(Ctx->getGlobalString(GlobalOffsetTable), T,
944                                   [this, T](Variable *PC) { _add(T, PC, T); });
945   _mov(GotPtr, T);
946   DefGotPtr->setDeleted();
947 }
948 
loadNamedConstantRelocatablePIC(GlobalString Name,Variable * Register,std::function<void (Variable * PC)> Finish)949 void TargetARM32::loadNamedConstantRelocatablePIC(
950     GlobalString Name, Variable *Register,
951     std::function<void(Variable *PC)> Finish) {
952   assert(SandboxingType == ST_Nonsfi);
953   // We makeReg() here instead of getPhysicalRegister() because the latter ends
954   // up creating multi-blocks temporaries that liveness fails to validate.
955   auto *PC = makeReg(IceType_i32, RegARM32::Reg_pc);
956 
957   auto *AddPcReloc = RelocOffset::create(Ctx);
958   AddPcReloc->setSubtract(true);
959   auto *AddPcLabel = InstARM32Label::create(Func, this);
960   AddPcLabel->setRelocOffset(AddPcReloc);
961 
962   auto *MovwReloc = RelocOffset::create(Ctx);
963   auto *MovwLabel = InstARM32Label::create(Func, this);
964   MovwLabel->setRelocOffset(MovwReloc);
965 
966   auto *MovtReloc = RelocOffset::create(Ctx);
967   auto *MovtLabel = InstARM32Label::create(Func, this);
968   MovtLabel->setRelocOffset(MovtReloc);
969 
970   // The EmitString for these constant relocatables have hardcoded offsets
971   // attached to them. This could be dangerous if, e.g., we ever implemented
972   // instruction scheduling but llvm-mc currently does not support
973   //
974   //   movw reg, #:lower16:(Symbol - Label - Number)
975   //   movt reg, #:upper16:(Symbol - Label - Number)
976   //
977   // relocations.
978   static constexpr RelocOffsetT PcOffset = -8;
979   auto *CRLower = Ctx->getConstantSymWithEmitString(
980       PcOffset, {MovwReloc, AddPcReloc}, Name, Name + " -16");
981   auto *CRUpper = Ctx->getConstantSymWithEmitString(
982       PcOffset, {MovtReloc, AddPcReloc}, Name, Name + " -12");
983 
984   Context.insert(MovwLabel);
985   _movw(Register, CRLower);
986   Context.insert(MovtLabel);
987   _movt(Register, CRUpper);
988   // PC = fake-def to keep liveness consistent.
989   Context.insert<InstFakeDef>(PC);
990   Context.insert(AddPcLabel);
991   Finish(PC);
992 }
993 
translateO2()994 void TargetARM32::translateO2() {
995   TimerMarker T(TimerStack::TT_O2, Func);
996 
997   // TODO(stichnot): share passes with other targets?
998   // https://code.google.com/p/nativeclient/issues/detail?id=4094
999   if (SandboxingType == ST_Nonsfi) {
1000     createGotPtr();
1001   }
1002   genTargetHelperCalls();
1003   findMaxStackOutArgsSize();
1004 
1005   // Do not merge Alloca instructions, and lay out the stack.
1006   static constexpr bool SortAndCombineAllocas = true;
1007   Func->processAllocas(SortAndCombineAllocas);
1008   Func->dump("After Alloca processing");
1009 
1010   if (!getFlags().getEnablePhiEdgeSplit()) {
1011     // Lower Phi instructions.
1012     Func->placePhiLoads();
1013     if (Func->hasError())
1014       return;
1015     Func->placePhiStores();
1016     if (Func->hasError())
1017       return;
1018     Func->deletePhis();
1019     if (Func->hasError())
1020       return;
1021     Func->dump("After Phi lowering");
1022   }
1023 
1024   // Address mode optimization.
1025   Func->getVMetadata()->init(VMK_SingleDefs);
1026   Func->doAddressOpt();
1027   Func->materializeVectorShuffles();
1028 
1029   // Argument lowering
1030   Func->doArgLowering();
1031 
1032   // Target lowering. This requires liveness analysis for some parts of the
1033   // lowering decisions, such as compare/branch fusing. If non-lightweight
1034   // liveness analysis is used, the instructions need to be renumbered first.
1035   // TODO: This renumbering should only be necessary if we're actually
1036   // calculating live intervals, which we only do for register allocation.
1037   Func->renumberInstructions();
1038   if (Func->hasError())
1039     return;
1040 
1041   // TODO: It should be sufficient to use the fastest liveness calculation,
1042   // i.e. livenessLightweight(). However, for some reason that slows down the
1043   // rest of the translation. Investigate.
1044   Func->liveness(Liveness_Basic);
1045   if (Func->hasError())
1046     return;
1047   Func->dump("After ARM32 address mode opt");
1048 
1049   if (SandboxingType == ST_Nonsfi) {
1050     insertGotPtrInitPlaceholder();
1051   }
1052   Func->genCode();
1053   if (Func->hasError())
1054     return;
1055   Func->dump("After ARM32 codegen");
1056 
1057   // Register allocation. This requires instruction renumbering and full
1058   // liveness analysis.
1059   Func->renumberInstructions();
1060   if (Func->hasError())
1061     return;
1062   Func->liveness(Liveness_Intervals);
1063   if (Func->hasError())
1064     return;
1065   // The post-codegen dump is done here, after liveness analysis and associated
1066   // cleanup, to make the dump cleaner and more useful.
1067   Func->dump("After initial ARM32 codegen");
1068   // Validate the live range computations. The expensive validation call is
1069   // deliberately only made when assertions are enabled.
1070   assert(Func->validateLiveness());
1071   Func->getVMetadata()->init(VMK_All);
1072   regAlloc(RAK_Global);
1073   if (Func->hasError())
1074     return;
1075 
1076   copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
1077   Func->dump("After linear scan regalloc");
1078 
1079   if (getFlags().getEnablePhiEdgeSplit()) {
1080     Func->advancedPhiLowering();
1081     Func->dump("After advanced Phi lowering");
1082   }
1083 
1084   ForbidTemporaryWithoutReg _(this);
1085 
1086   // Stack frame mapping.
1087   Func->genFrame();
1088   if (Func->hasError())
1089     return;
1090   Func->dump("After stack frame mapping");
1091 
1092   postLowerLegalization();
1093   if (Func->hasError())
1094     return;
1095   Func->dump("After postLowerLegalization");
1096 
1097   Func->contractEmptyNodes();
1098   Func->reorderNodes();
1099 
1100   // Branch optimization. This needs to be done just before code emission. In
1101   // particular, no transformations that insert or reorder CfgNodes should be
1102   // done after branch optimization. We go ahead and do it before nop insertion
1103   // to reduce the amount of work needed for searching for opportunities.
1104   Func->doBranchOpt();
1105   Func->dump("After branch optimization");
1106 
1107   // Nop insertion
1108   if (getFlags().getShouldDoNopInsertion()) {
1109     Func->doNopInsertion();
1110   }
1111 }
1112 
translateOm1()1113 void TargetARM32::translateOm1() {
1114   TimerMarker T(TimerStack::TT_Om1, Func);
1115 
1116   // TODO(stichnot): share passes with other targets?
1117   if (SandboxingType == ST_Nonsfi) {
1118     createGotPtr();
1119   }
1120 
1121   genTargetHelperCalls();
1122   findMaxStackOutArgsSize();
1123 
1124   // Do not merge Alloca instructions, and lay out the stack.
1125   static constexpr bool DontSortAndCombineAllocas = false;
1126   Func->processAllocas(DontSortAndCombineAllocas);
1127   Func->dump("After Alloca processing");
1128 
1129   Func->placePhiLoads();
1130   if (Func->hasError())
1131     return;
1132   Func->placePhiStores();
1133   if (Func->hasError())
1134     return;
1135   Func->deletePhis();
1136   if (Func->hasError())
1137     return;
1138   Func->dump("After Phi lowering");
1139 
1140   Func->doArgLowering();
1141 
1142   if (SandboxingType == ST_Nonsfi) {
1143     insertGotPtrInitPlaceholder();
1144   }
1145   Func->genCode();
1146   if (Func->hasError())
1147     return;
1148   Func->dump("After initial ARM32 codegen");
1149 
1150   regAlloc(RAK_InfOnly);
1151   if (Func->hasError())
1152     return;
1153 
1154   copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
1155   Func->dump("After regalloc of infinite-weight variables");
1156 
1157   ForbidTemporaryWithoutReg _(this);
1158 
1159   Func->genFrame();
1160   if (Func->hasError())
1161     return;
1162   Func->dump("After stack frame mapping");
1163 
1164   postLowerLegalization();
1165   if (Func->hasError())
1166     return;
1167   Func->dump("After postLowerLegalization");
1168 
1169   // Nop insertion
1170   if (getFlags().getShouldDoNopInsertion()) {
1171     Func->doNopInsertion();
1172   }
1173 }
1174 
getStackAlignment() const1175 uint32_t TargetARM32::getStackAlignment() const {
1176   return ARM32_STACK_ALIGNMENT_BYTES;
1177 }
1178 
doBranchOpt(Inst * I,const CfgNode * NextNode)1179 bool TargetARM32::doBranchOpt(Inst *I, const CfgNode *NextNode) {
1180   if (auto *Br = llvm::dyn_cast<InstARM32Br>(I)) {
1181     return Br->optimizeBranch(NextNode);
1182   }
1183   return false;
1184 }
1185 
getRegName(RegNumT RegNum,Type Ty) const1186 const char *TargetARM32::getRegName(RegNumT RegNum, Type Ty) const {
1187   (void)Ty;
1188   return RegARM32::getRegName(RegNum);
1189 }
1190 
getPhysicalRegister(RegNumT RegNum,Type Ty)1191 Variable *TargetARM32::getPhysicalRegister(RegNumT RegNum, Type Ty) {
1192   static const Type DefaultType[] = {
1193 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr,   \
1194           isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init)       \
1195   (isFP32)                                                                     \
1196       ? IceType_f32                                                            \
1197       : ((isFP64) ? IceType_f64 : ((isVec128 ? IceType_v4i32 : IceType_i32))),
1198       REGARM32_TABLE
1199 #undef X
1200   };
1201 
1202   if (Ty == IceType_void) {
1203     assert(unsigned(RegNum) < llvm::array_lengthof(DefaultType));
1204     Ty = DefaultType[RegNum];
1205   }
1206   if (PhysicalRegisters[Ty].empty())
1207     PhysicalRegisters[Ty].resize(RegARM32::Reg_NUM);
1208   assert(unsigned(RegNum) < PhysicalRegisters[Ty].size());
1209   Variable *Reg = PhysicalRegisters[Ty][RegNum];
1210   if (Reg == nullptr) {
1211     Reg = Func->makeVariable(Ty);
1212     Reg->setRegNum(RegNum);
1213     PhysicalRegisters[Ty][RegNum] = Reg;
1214     // Specially mark a named physical register as an "argument" so that it is
1215     // considered live upon function entry.  Otherwise it's possible to get
1216     // liveness validation errors for saving callee-save registers.
1217     Func->addImplicitArg(Reg);
1218     // Don't bother tracking the live range of a named physical register.
1219     Reg->setIgnoreLiveness();
1220   }
1221   return Reg;
1222 }
1223 
emitJumpTable(const Cfg * Func,const InstJumpTable * JumpTable) const1224 void TargetARM32::emitJumpTable(const Cfg *Func,
1225                                 const InstJumpTable *JumpTable) const {
1226   (void)Func;
1227   (void)JumpTable;
1228   UnimplementedError(getFlags());
1229 }
1230 
emitVariable(const Variable * Var) const1231 void TargetARM32::emitVariable(const Variable *Var) const {
1232   if (!BuildDefs::dump())
1233     return;
1234   Ostream &Str = Ctx->getStrEmit();
1235   if (Var->hasReg()) {
1236     Str << getRegName(Var->getRegNum(), Var->getType());
1237     return;
1238   }
1239   if (Var->mustHaveReg()) {
1240     llvm::report_fatal_error("Infinite-weight Variable (" + Var->getName() +
1241                              ") has no register assigned - function " +
1242                              Func->getFunctionName());
1243   }
1244   assert(!Var->isRematerializable());
1245   int32_t Offset = Var->getStackOffset();
1246   auto BaseRegNum = Var->getBaseRegNum();
1247   if (BaseRegNum.hasNoValue()) {
1248     BaseRegNum = getFrameOrStackReg();
1249   }
1250   const Type VarTy = Var->getType();
1251   Str << "[" << getRegName(BaseRegNum, VarTy);
1252   if (Offset != 0) {
1253     Str << ", #" << Offset;
1254   }
1255   Str << "]";
1256 }
1257 
CallingConv()1258 TargetARM32::CallingConv::CallingConv()
1259     : GPRegsUsed(RegARM32::Reg_NUM),
1260       GPRArgs(GPRArgInitializer.rbegin(), GPRArgInitializer.rend()),
1261       I64Args(I64ArgInitializer.rbegin(), I64ArgInitializer.rend()),
1262       VFPRegsUsed(RegARM32::Reg_NUM),
1263       FP32Args(FP32ArgInitializer.rbegin(), FP32ArgInitializer.rend()),
1264       FP64Args(FP64ArgInitializer.rbegin(), FP64ArgInitializer.rend()),
1265       Vec128Args(Vec128ArgInitializer.rbegin(), Vec128ArgInitializer.rend()) {}
1266 
argInGPR(Type Ty,RegNumT * Reg)1267 bool TargetARM32::CallingConv::argInGPR(Type Ty, RegNumT *Reg) {
1268   CfgVector<RegNumT> *Source;
1269 
1270   switch (Ty) {
1271   default: {
1272     assert(isScalarIntegerType(Ty));
1273     Source = &GPRArgs;
1274   } break;
1275   case IceType_i64: {
1276     Source = &I64Args;
1277   } break;
1278   }
1279 
1280   discardUnavailableGPRsAndTheirAliases(Source);
1281 
1282   if (Source->empty()) {
1283     GPRegsUsed.set();
1284     return false;
1285   }
1286 
1287   *Reg = Source->back();
1288   // Note that we don't Source->pop_back() here. This is intentional. Notice how
1289   // we mark all of Reg's aliases as Used. So, for the next argument,
1290   // Source->back() is marked as unavailable, and it is thus implicitly popped
1291   // from the stack.
1292   GPRegsUsed |= RegisterAliases[*Reg];
1293   return true;
1294 }
1295 
1296 // GPR are not packed when passing parameters. Thus, a function foo(i32, i64,
1297 // i32) will have the first argument in r0, the second in r1-r2, and the third
1298 // on the stack. To model this behavior, whenever we pop a register from Regs,
1299 // we remove all of its aliases from the pool of available GPRs. This has the
1300 // effect of computing the "closure" on the GPR registers.
discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> * Regs)1301 void TargetARM32::CallingConv::discardUnavailableGPRsAndTheirAliases(
1302     CfgVector<RegNumT> *Regs) {
1303   while (!Regs->empty() && GPRegsUsed[Regs->back()]) {
1304     GPRegsUsed |= RegisterAliases[Regs->back()];
1305     Regs->pop_back();
1306   }
1307 }
1308 
argInVFP(Type Ty,RegNumT * Reg)1309 bool TargetARM32::CallingConv::argInVFP(Type Ty, RegNumT *Reg) {
1310   CfgVector<RegNumT> *Source;
1311 
1312   switch (Ty) {
1313   default: {
1314     assert(isVectorType(Ty));
1315     Source = &Vec128Args;
1316   } break;
1317   case IceType_f32: {
1318     Source = &FP32Args;
1319   } break;
1320   case IceType_f64: {
1321     Source = &FP64Args;
1322   } break;
1323   }
1324 
1325   discardUnavailableVFPRegs(Source);
1326 
1327   if (Source->empty()) {
1328     VFPRegsUsed.set();
1329     return false;
1330   }
1331 
1332   *Reg = Source->back();
1333   VFPRegsUsed |= RegisterAliases[*Reg];
1334   return true;
1335 }
1336 
1337 // Arguments in VFP registers are not packed, so we don't mark the popped
1338 // registers' aliases as unavailable.
discardUnavailableVFPRegs(CfgVector<RegNumT> * Regs)1339 void TargetARM32::CallingConv::discardUnavailableVFPRegs(
1340     CfgVector<RegNumT> *Regs) {
1341   while (!Regs->empty() && VFPRegsUsed[Regs->back()]) {
1342     Regs->pop_back();
1343   }
1344 }
1345 
lowerArguments()1346 void TargetARM32::lowerArguments() {
1347   VarList &Args = Func->getArgs();
1348   TargetARM32::CallingConv CC;
1349 
1350   // For each register argument, replace Arg in the argument list with the home
1351   // register. Then generate an instruction in the prolog to copy the home
1352   // register to the assigned location of Arg.
1353   Context.init(Func->getEntryNode());
1354   Context.setInsertPoint(Context.getCur());
1355 
1356   for (SizeT I = 0, E = Args.size(); I < E; ++I) {
1357     Variable *Arg = Args[I];
1358     Type Ty = Arg->getType();
1359     RegNumT RegNum;
1360     if (isScalarIntegerType(Ty)) {
1361       if (!CC.argInGPR(Ty, &RegNum)) {
1362         continue;
1363       }
1364     } else {
1365       if (!CC.argInVFP(Ty, &RegNum)) {
1366         continue;
1367       }
1368     }
1369 
1370     Variable *RegisterArg = Func->makeVariable(Ty);
1371     if (BuildDefs::dump()) {
1372       RegisterArg->setName(Func, "home_reg:" + Arg->getName());
1373     }
1374     RegisterArg->setIsArg();
1375     Arg->setIsArg(false);
1376     Args[I] = RegisterArg;
1377     switch (Ty) {
1378     default: {
1379       RegisterArg->setRegNum(RegNum);
1380     } break;
1381     case IceType_i64: {
1382       auto *RegisterArg64 = llvm::cast<Variable64On32>(RegisterArg);
1383       RegisterArg64->initHiLo(Func);
1384       RegisterArg64->getLo()->setRegNum(
1385           RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(RegNum)));
1386       RegisterArg64->getHi()->setRegNum(
1387           RegNumT::fixme(RegARM32::getI64PairSecondGPRNum(RegNum)));
1388     } break;
1389     }
1390     Context.insert<InstAssign>(Arg, RegisterArg);
1391   }
1392 }
1393 
1394 // Helper function for addProlog().
1395 //
1396 // This assumes Arg is an argument passed on the stack. This sets the frame
1397 // offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
1398 // I64 arg that has been split into Lo and Hi components, it calls itself
1399 // recursively on the components, taking care to handle Lo first because of the
1400 // little-endian architecture. Lastly, this function generates an instruction
1401 // to copy Arg into its assigned register if applicable.
finishArgumentLowering(Variable * Arg,Variable * FramePtr,size_t BasicFrameOffset,size_t * InArgsSizeBytes)1402 void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
1403                                          size_t BasicFrameOffset,
1404                                          size_t *InArgsSizeBytes) {
1405   const Type Ty = Arg->getType();
1406   *InArgsSizeBytes = applyStackAlignmentTy(*InArgsSizeBytes, Ty);
1407 
1408   if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) {
1409     Variable *const Lo = Arg64On32->getLo();
1410     Variable *const Hi = Arg64On32->getHi();
1411     finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
1412     finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
1413     return;
1414   }
1415   assert(Ty != IceType_i64);
1416 
1417   const int32_t ArgStackOffset = BasicFrameOffset + *InArgsSizeBytes;
1418   *InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
1419 
1420   if (!Arg->hasReg()) {
1421     Arg->setStackOffset(ArgStackOffset);
1422     return;
1423   }
1424 
1425   // If the argument variable has been assigned a register, we need to copy the
1426   // value from the stack slot.
1427   Variable *Parameter = Func->makeVariable(Ty);
1428   Parameter->setMustNotHaveReg();
1429   Parameter->setStackOffset(ArgStackOffset);
1430   _mov(Arg, Parameter);
1431 }
1432 
stackSlotType()1433 Type TargetARM32::stackSlotType() { return IceType_i32; }
1434 
addProlog(CfgNode * Node)1435 void TargetARM32::addProlog(CfgNode *Node) {
1436   // Stack frame layout:
1437   //
1438   // +------------------------+
1439   // | 1. preserved registers |
1440   // +------------------------+
1441   // | 2. padding             |
1442   // +------------------------+ <--- FramePointer (if used)
1443   // | 3. global spill area   |
1444   // +------------------------+
1445   // | 4. padding             |
1446   // +------------------------+
1447   // | 5. local spill area    |
1448   // +------------------------+
1449   // | 6. padding             |
1450   // +------------------------+
1451   // | 7. allocas (variable)  |
1452   // +------------------------+
1453   // | 8. padding             |
1454   // +------------------------+
1455   // | 9. out args            |
1456   // +------------------------+ <--- StackPointer
1457   //
1458   // The following variables record the size in bytes of the given areas:
1459   //  * PreservedRegsSizeBytes: area 1
1460   //  * SpillAreaPaddingBytes:  area 2
1461   //  * GlobalsSize:            area 3
1462   //  * GlobalsAndSubsequentPaddingSize: areas 3 - 4
1463   //  * LocalsSpillAreaSize:    area 5
1464   //  * SpillAreaSizeBytes:     areas 2 - 6, and 9
1465   //  * MaxOutArgsSizeBytes:    area 9
1466   //
1467   // Determine stack frame offsets for each Variable without a register
1468   // assignment.  This can be done as one variable per stack slot.  Or, do
1469   // coalescing by running the register allocator again with an infinite set of
1470   // registers (as a side effect, this gives variables a second chance at
1471   // physical register assignment).
1472   //
1473   // A middle ground approach is to leverage sparsity and allocate one block of
1474   // space on the frame for globals (variables with multi-block lifetime), and
1475   // one block to share for locals (single-block lifetime).
1476 
1477   Context.init(Node);
1478   Context.setInsertPoint(Context.getCur());
1479 
1480   SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None);
1481   RegsUsed = SmallBitVector(CalleeSaves.size());
1482   VarList SortedSpilledVariables;
1483   size_t GlobalsSize = 0;
1484   // If there is a separate locals area, this represents that area. Otherwise
1485   // it counts any variable not counted by GlobalsSize.
1486   SpillAreaSizeBytes = 0;
1487   // If there is a separate locals area, this specifies the alignment for it.
1488   uint32_t LocalsSlotsAlignmentBytes = 0;
1489   // The entire spill locations area gets aligned to largest natural alignment
1490   // of the variables that have a spill slot.
1491   uint32_t SpillAreaAlignmentBytes = 0;
1492   // For now, we don't have target-specific variables that need special
1493   // treatment (no stack-slot-linked SpillVariable type).
1494   std::function<bool(Variable *)> TargetVarHook = [](Variable *Var) {
1495     static constexpr bool AssignStackSlot = false;
1496     static constexpr bool DontAssignStackSlot = !AssignStackSlot;
1497     if (llvm::isa<Variable64On32>(Var)) {
1498       return DontAssignStackSlot;
1499     }
1500     return AssignStackSlot;
1501   };
1502 
1503   // Compute the list of spilled variables and bounds for GlobalsSize, etc.
1504   getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
1505                         &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
1506                         &LocalsSlotsAlignmentBytes, TargetVarHook);
1507   uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
1508   SpillAreaSizeBytes += GlobalsSize;
1509 
1510   // Add push instructions for preserved registers. On ARM, "push" can push a
1511   // whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has
1512   // callee-saved float/vector registers.
1513   //
1514   // The "vpush" instruction can handle a whole list of float/vector registers,
1515   // but it only handles contiguous sequences of registers by specifying the
1516   // start and the length.
1517   PreservedGPRs.reserve(CalleeSaves.size());
1518   PreservedSRegs.reserve(CalleeSaves.size());
1519 
1520   // Consider FP and LR as callee-save / used as needed.
1521   if (UsesFramePointer) {
1522     if (RegsUsed[RegARM32::Reg_fp]) {
1523       llvm::report_fatal_error("Frame pointer has been used.");
1524     }
1525     CalleeSaves[RegARM32::Reg_fp] = true;
1526     RegsUsed[RegARM32::Reg_fp] = true;
1527   }
1528   if (!MaybeLeafFunc) {
1529     CalleeSaves[RegARM32::Reg_lr] = true;
1530     RegsUsed[RegARM32::Reg_lr] = true;
1531   }
1532 
1533   // Make two passes over the used registers. The first pass records all the
1534   // used registers -- and their aliases. Then, we figure out which GPRs and
1535   // VFP S registers should be saved. We don't bother saving D/Q registers
1536   // because their uses are recorded as S regs uses.
1537   SmallBitVector ToPreserve(RegARM32::Reg_NUM);
1538   for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
1539     if (NeedSandboxing && i == RegARM32::Reg_r9) {
1540       // r9 is never updated in sandboxed code.
1541       continue;
1542     }
1543     if (CalleeSaves[i] && RegsUsed[i]) {
1544       ToPreserve |= RegisterAliases[i];
1545     }
1546   }
1547 
1548   uint32_t NumCallee = 0;
1549   size_t PreservedRegsSizeBytes = 0;
1550 
1551   // RegClasses is a tuple of
1552   //
1553   // <First Register in Class, Last Register in Class, Vector of Save Registers>
1554   //
1555   // We use this tuple to figure out which register we should push/pop during
1556   // prolog/epilog.
1557   using RegClassType = std::tuple<uint32_t, uint32_t, VarList *>;
1558   const RegClassType RegClasses[] = {
1559       RegClassType(RegARM32::Reg_GPR_First, RegARM32::Reg_GPR_Last,
1560                    &PreservedGPRs),
1561       RegClassType(RegARM32::Reg_SREG_First, RegARM32::Reg_SREG_Last,
1562                    &PreservedSRegs)};
1563   for (const auto &RegClass : RegClasses) {
1564     const uint32_t FirstRegInClass = std::get<0>(RegClass);
1565     const uint32_t LastRegInClass = std::get<1>(RegClass);
1566     VarList *const PreservedRegsInClass = std::get<2>(RegClass);
1567     for (uint32_t Reg = FirstRegInClass; Reg <= LastRegInClass; ++Reg) {
1568       if (!ToPreserve[Reg]) {
1569         continue;
1570       }
1571       ++NumCallee;
1572       Variable *PhysicalRegister = getPhysicalRegister(RegNumT::fromInt(Reg));
1573       PreservedRegsSizeBytes +=
1574           typeWidthInBytesOnStack(PhysicalRegister->getType());
1575       PreservedRegsInClass->push_back(PhysicalRegister);
1576     }
1577   }
1578 
1579   Ctx->statsUpdateRegistersSaved(NumCallee);
1580   if (!PreservedSRegs.empty())
1581     _push(PreservedSRegs);
1582   if (!PreservedGPRs.empty())
1583     _push(PreservedGPRs);
1584 
1585   // Generate "mov FP, SP" if needed.
1586   if (UsesFramePointer) {
1587     Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
1588     Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1589     _mov(FP, SP);
1590     // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
1591     Context.insert<InstFakeUse>(FP);
1592   }
1593 
1594   // Align the variables area. SpillAreaPaddingBytes is the size of the region
1595   // after the preserved registers and before the spill areas.
1596   // LocalsSlotsPaddingBytes is the amount of padding between the globals and
1597   // locals area if they are separate.
1598   assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES);
1599   assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
1600   uint32_t SpillAreaPaddingBytes = 0;
1601   uint32_t LocalsSlotsPaddingBytes = 0;
1602   alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
1603                        GlobalsSize, LocalsSlotsAlignmentBytes,
1604                        &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
1605   SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
1606   uint32_t GlobalsAndSubsequentPaddingSize =
1607       GlobalsSize + LocalsSlotsPaddingBytes;
1608 
1609   // Adds the out args space to the stack, and align SP if necessary.
1610   if (!NeedsStackAlignment) {
1611     SpillAreaSizeBytes += MaxOutArgsSizeBytes;
1612   } else {
1613     uint32_t StackOffset = PreservedRegsSizeBytes;
1614     uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
1615     StackSize = applyStackAlignment(StackSize + MaxOutArgsSizeBytes);
1616     SpillAreaSizeBytes = StackSize - StackOffset;
1617   }
1618 
1619   // Combine fixed alloca with SpillAreaSize.
1620   SpillAreaSizeBytes += FixedAllocaSizeBytes;
1621 
1622   // Generate "sub sp, SpillAreaSizeBytes"
1623   if (SpillAreaSizeBytes) {
1624     // Use the scratch register if needed to legalize the immediate.
1625     Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
1626                                   Legal_Reg | Legal_Flex, getReservedTmpReg());
1627     Sandboxer(this).sub_sp(SubAmount);
1628     if (FixedAllocaAlignBytes > ARM32_STACK_ALIGNMENT_BYTES) {
1629       Sandboxer(this).align_sp(FixedAllocaAlignBytes);
1630     }
1631   }
1632 
1633   Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
1634 
1635   // Fill in stack offsets for stack args, and copy args into registers for
1636   // those that were register-allocated. Args are pushed right to left, so
1637   // Arg[0] is closest to the stack/frame pointer.
1638   Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
1639   size_t BasicFrameOffset = PreservedRegsSizeBytes;
1640   if (!UsesFramePointer)
1641     BasicFrameOffset += SpillAreaSizeBytes;
1642 
1643   materializeGotAddr(Node);
1644 
1645   const VarList &Args = Func->getArgs();
1646   size_t InArgsSizeBytes = 0;
1647   TargetARM32::CallingConv CC;
1648   for (Variable *Arg : Args) {
1649     RegNumT DummyReg;
1650     const Type Ty = Arg->getType();
1651 
1652     // Skip arguments passed in registers.
1653     if (isScalarIntegerType(Ty)) {
1654       if (CC.argInGPR(Ty, &DummyReg)) {
1655         continue;
1656       }
1657     } else {
1658       if (CC.argInVFP(Ty, &DummyReg)) {
1659         continue;
1660       }
1661     }
1662     finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, &InArgsSizeBytes);
1663   }
1664 
1665   // Fill in stack offsets for locals.
1666   assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
1667                       SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
1668                       UsesFramePointer);
1669   this->HasComputedFrame = true;
1670 
1671   if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
1672     OstreamLocker _(Func->getContext());
1673     Ostream &Str = Func->getContext()->getStrDump();
1674 
1675     Str << "Stack layout:\n";
1676     uint32_t SPAdjustmentPaddingSize =
1677         SpillAreaSizeBytes - LocalsSpillAreaSize -
1678         GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -
1679         MaxOutArgsSizeBytes;
1680     Str << " in-args = " << InArgsSizeBytes << " bytes\n"
1681         << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
1682         << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
1683         << " globals spill area = " << GlobalsSize << " bytes\n"
1684         << " globals-locals spill areas intermediate padding = "
1685         << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
1686         << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
1687         << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";
1688 
1689     Str << "Stack details:\n"
1690         << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
1691         << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
1692         << " outgoing args size = " << MaxOutArgsSizeBytes << " bytes\n"
1693         << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
1694         << " bytes\n"
1695         << " is FP based = " << UsesFramePointer << "\n";
1696   }
1697 }
1698 
addEpilog(CfgNode * Node)1699 void TargetARM32::addEpilog(CfgNode *Node) {
1700   InstList &Insts = Node->getInsts();
1701   InstList::reverse_iterator RI, E;
1702   for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
1703     if (llvm::isa<InstARM32Ret>(*RI))
1704       break;
1705   }
1706   if (RI == E)
1707     return;
1708 
1709   // Convert the reverse_iterator position into its corresponding (forward)
1710   // iterator position.
1711   InstList::iterator InsertPoint = reverseToForwardIterator(RI);
1712   --InsertPoint;
1713   Context.init(Node);
1714   Context.setInsertPoint(InsertPoint);
1715 
1716   Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1717   if (UsesFramePointer) {
1718     Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
1719     // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
1720     // use of SP before the assignment of SP=FP keeps previous SP adjustments
1721     // from being dead-code eliminated.
1722     Context.insert<InstFakeUse>(SP);
1723     Sandboxer(this).reset_sp(FP);
1724   } else {
1725     // add SP, SpillAreaSizeBytes
1726     if (SpillAreaSizeBytes) {
1727       // Use the scratch register if needed to legalize the immediate.
1728       Operand *AddAmount =
1729           legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
1730                    Legal_Reg | Legal_Flex, getReservedTmpReg());
1731       Sandboxer(this).add_sp(AddAmount);
1732     }
1733   }
1734 
1735   if (!PreservedGPRs.empty())
1736     _pop(PreservedGPRs);
1737   if (!PreservedSRegs.empty())
1738     _pop(PreservedSRegs);
1739 
1740   if (!getFlags().getUseSandboxing())
1741     return;
1742 
1743   // Change the original ret instruction into a sandboxed return sequence.
1744   //
1745   // bundle_lock
1746   // bic lr, #0xc000000f
1747   // bx lr
1748   // bundle_unlock
1749   //
1750   // This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to
1751   // restrict to the lower 1GB as well.
1752   Variable *LR = getPhysicalRegister(RegARM32::Reg_lr);
1753   Variable *RetValue = nullptr;
1754   if (RI->getSrcSize())
1755     RetValue = llvm::cast<Variable>(RI->getSrc(0));
1756 
1757   Sandboxer(this).ret(LR, RetValue);
1758 
1759   RI->setDeleted();
1760 }
1761 
isLegalMemOffset(Type Ty,int32_t Offset) const1762 bool TargetARM32::isLegalMemOffset(Type Ty, int32_t Offset) const {
1763   constexpr bool ZeroExt = false;
1764   return OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset);
1765 }
1766 
newBaseRegister(Variable * Base,int32_t Offset,RegNumT ScratchRegNum)1767 Variable *TargetARM32::PostLoweringLegalizer::newBaseRegister(
1768     Variable *Base, int32_t Offset, RegNumT ScratchRegNum) {
1769   // Legalize will likely need a movw/movt combination, but if the top bits are
1770   // all 0 from negating the offset and subtracting, we could use that instead.
1771   const bool ShouldSub = Offset != 0 && (-Offset & 0xFFFF0000) == 0;
1772   Variable *ScratchReg = Target->makeReg(IceType_i32, ScratchRegNum);
1773   if (ShouldSub) {
1774     Operand *OffsetVal =
1775         Target->legalize(Target->Ctx->getConstantInt32(-Offset),
1776                          Legal_Reg | Legal_Flex, ScratchRegNum);
1777     Target->_sub(ScratchReg, Base, OffsetVal);
1778   } else {
1779     Operand *OffsetVal =
1780         Target->legalize(Target->Ctx->getConstantInt32(Offset),
1781                          Legal_Reg | Legal_Flex, ScratchRegNum);
1782     Target->_add(ScratchReg, Base, OffsetVal);
1783   }
1784 
1785   if (ScratchRegNum == Target->getReservedTmpReg()) {
1786     const bool BaseIsStackOrFramePtr =
1787         Base->getRegNum() == Target->getFrameOrStackReg();
1788     // There is currently no code path that would trigger this assertion, so we
1789     // leave this assertion here in case it is ever violated. This is not a
1790     // fatal error (thus the use of assert() and not llvm::report_fatal_error)
1791     // as the program compiled by subzero will still work correctly.
1792     assert(BaseIsStackOrFramePtr);
1793     // Side-effect: updates TempBase to reflect the new Temporary.
1794     if (BaseIsStackOrFramePtr) {
1795       TempBaseReg = ScratchReg;
1796       TempBaseOffset = Offset;
1797     } else {
1798       TempBaseReg = nullptr;
1799       TempBaseOffset = 0;
1800     }
1801   }
1802 
1803   return ScratchReg;
1804 }
1805 
createMemOperand(Type Ty,Variable * Base,int32_t Offset,bool AllowOffsets)1806 OperandARM32Mem *TargetARM32::PostLoweringLegalizer::createMemOperand(
1807     Type Ty, Variable *Base, int32_t Offset, bool AllowOffsets) {
1808   assert(!Base->isRematerializable());
1809   if (Offset == 0 || (AllowOffsets && Target->isLegalMemOffset(Ty, Offset))) {
1810     return OperandARM32Mem::create(
1811         Target->Func, Ty, Base,
1812         llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)),
1813         OperandARM32Mem::Offset);
1814   }
1815 
1816   if (!AllowOffsets || TempBaseReg == nullptr) {
1817     newBaseRegister(Base, Offset, Target->getReservedTmpReg());
1818   }
1819 
1820   int32_t OffsetDiff = Offset - TempBaseOffset;
1821   assert(AllowOffsets || OffsetDiff == 0);
1822 
1823   if (!Target->isLegalMemOffset(Ty, OffsetDiff)) {
1824     newBaseRegister(Base, Offset, Target->getReservedTmpReg());
1825     OffsetDiff = 0;
1826   }
1827 
1828   assert(!TempBaseReg->isRematerializable());
1829   return OperandARM32Mem::create(
1830       Target->Func, Ty, TempBaseReg,
1831       llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(OffsetDiff)),
1832       OperandARM32Mem::Offset);
1833 }
1834 
resetTempBaseIfClobberedBy(const Inst * Instr)1835 void TargetARM32::PostLoweringLegalizer::resetTempBaseIfClobberedBy(
1836     const Inst *Instr) {
1837   bool ClobbersTempBase = false;
1838   if (TempBaseReg != nullptr) {
1839     Variable *Dest = Instr->getDest();
1840     if (llvm::isa<InstARM32Call>(Instr)) {
1841       // The following assertion is an invariant, so we remove it from the if
1842       // test. If the invariant is ever broken/invalidated/changed, remember
1843       // to add it back to the if condition.
1844       assert(TempBaseReg->getRegNum() == Target->getReservedTmpReg());
1845       // The linker may need to clobber IP if the call is too far from PC. Thus,
1846       // we assume IP will be overwritten.
1847       ClobbersTempBase = true;
1848     } else if (Dest != nullptr &&
1849                Dest->getRegNum() == TempBaseReg->getRegNum()) {
1850       // Register redefinition.
1851       ClobbersTempBase = true;
1852     }
1853   }
1854 
1855   if (ClobbersTempBase) {
1856     TempBaseReg = nullptr;
1857     TempBaseOffset = 0;
1858   }
1859 }
1860 
legalizeMov(InstARM32Mov * MovInstr)1861 void TargetARM32::PostLoweringLegalizer::legalizeMov(InstARM32Mov *MovInstr) {
1862   Variable *Dest = MovInstr->getDest();
1863   assert(Dest != nullptr);
1864   Type DestTy = Dest->getType();
1865   assert(DestTy != IceType_i64);
1866 
1867   Operand *Src = MovInstr->getSrc(0);
1868   Type SrcTy = Src->getType();
1869   (void)SrcTy;
1870   assert(SrcTy != IceType_i64);
1871 
1872   if (MovInstr->isMultiDest() || MovInstr->isMultiSource())
1873     return;
1874 
1875   bool Legalized = false;
1876   if (!Dest->hasReg()) {
1877     auto *SrcR = llvm::cast<Variable>(Src);
1878     assert(SrcR->hasReg());
1879     assert(!SrcR->isRematerializable());
1880     const int32_t Offset = Dest->getStackOffset();
1881     // This is a _mov(Mem(), Variable), i.e., a store.
1882     TargetARM32::Sandboxer(Target).str(
1883         SrcR, createMemOperand(DestTy, StackOrFrameReg, Offset),
1884         MovInstr->getPredicate());
1885     // _str() does not have a Dest, so we add a fake-def(Dest).
1886     Target->Context.insert<InstFakeDef>(Dest);
1887     Legalized = true;
1888   } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) {
1889     if (Var->isRematerializable()) {
1890       // This is equivalent to an x86 _lea(RematOffset(%esp/%ebp), Variable).
1891 
1892       // ExtraOffset is only needed for frame-pointer based frames as we have
1893       // to account for spill storage.
1894       const int32_t ExtraOffset = (Var->getRegNum() == Target->getFrameReg())
1895                                       ? Target->getFrameFixedAllocaOffset()
1896                                       : 0;
1897 
1898       const int32_t Offset = Var->getStackOffset() + ExtraOffset;
1899       Variable *Base = Target->getPhysicalRegister(Var->getRegNum());
1900       Variable *T = newBaseRegister(Base, Offset, Dest->getRegNum());
1901       Target->_mov(Dest, T);
1902       Legalized = true;
1903     } else {
1904       if (!Var->hasReg()) {
1905         // This is a _mov(Variable, Mem()), i.e., a load.
1906         const int32_t Offset = Var->getStackOffset();
1907         TargetARM32::Sandboxer(Target).ldr(
1908             Dest, createMemOperand(DestTy, StackOrFrameReg, Offset),
1909             MovInstr->getPredicate());
1910         Legalized = true;
1911       }
1912     }
1913   }
1914 
1915   if (Legalized) {
1916     if (MovInstr->isDestRedefined()) {
1917       Target->_set_dest_redefined();
1918     }
1919     MovInstr->setDeleted();
1920   }
1921 }
1922 
1923 // ARM32 address modes:
1924 //  ld/st i[8|16|32]: [reg], [reg +/- imm12], [pc +/- imm12],
1925 //                    [reg +/- reg << shamt5]
1926 //  ld/st f[32|64]  : [reg], [reg +/- imm8] , [pc +/- imm8]
1927 //  ld/st vectors   : [reg]
1928 //
1929 // For now, we don't handle address modes with Relocatables.
1930 namespace {
1931 // MemTraits contains per-type valid address mode information.
1932 #define X(tag, elementty, int_width, fp_width, uvec_width, svec_width, sbits,  \
1933           ubits, rraddr, shaddr)                                               \
1934   static_assert(!(shaddr) || rraddr, "Check ICETYPEARM32_TABLE::" #tag);
1935 ICETYPEARM32_TABLE
1936 #undef X
1937 
1938 static const struct {
1939   int32_t ValidImmMask;
1940   bool CanHaveImm;
1941   bool CanHaveIndex;
1942   bool CanHaveShiftedIndex;
1943 } MemTraits[] = {
1944 #define X(tag, elementty, int_width, fp_width, uvec_width, svec_width, sbits,  \
1945           ubits, rraddr, shaddr)                                               \
1946   {                                                                            \
1947       (1 << ubits) - 1,                                                        \
1948       (ubits) > 0,                                                             \
1949       rraddr,                                                                  \
1950       shaddr,                                                                  \
1951   },
1952     ICETYPEARM32_TABLE
1953 #undef X
1954 };
1955 static constexpr SizeT MemTraitsSize = llvm::array_lengthof(MemTraits);
1956 } // end of anonymous namespace
1957 
1958 OperandARM32Mem *
legalizeMemOperand(OperandARM32Mem * Mem,bool AllowOffsets)1959 TargetARM32::PostLoweringLegalizer::legalizeMemOperand(OperandARM32Mem *Mem,
1960                                                        bool AllowOffsets) {
1961   assert(!Mem->isRegReg() || !Mem->getIndex()->isRematerializable());
1962   assert(Mem->isRegReg() || Target->isLegalMemOffset(
1963                                 Mem->getType(), Mem->getOffset()->getValue()));
1964 
1965   bool Legalized = false;
1966   Variable *Base = Mem->getBase();
1967   int32_t Offset = Mem->isRegReg() ? 0 : Mem->getOffset()->getValue();
1968   if (Base->isRematerializable()) {
1969     const int32_t ExtraOffset = (Base->getRegNum() == Target->getFrameReg())
1970                                     ? Target->getFrameFixedAllocaOffset()
1971                                     : 0;
1972     Offset += Base->getStackOffset() + ExtraOffset;
1973     Base = Target->getPhysicalRegister(Base->getRegNum());
1974     assert(!Base->isRematerializable());
1975     Legalized = true;
1976   }
1977 
1978   if (!Legalized && !Target->NeedSandboxing) {
1979     return nullptr;
1980   }
1981 
1982   if (!Mem->isRegReg()) {
1983     return createMemOperand(Mem->getType(), Base, Offset, AllowOffsets);
1984   }
1985 
1986   if (Target->NeedSandboxing) {
1987     llvm::report_fatal_error("Reg-Reg address mode is not allowed.");
1988   }
1989 
1990   assert(MemTraits[Mem->getType()].CanHaveIndex);
1991 
1992   if (Offset != 0) {
1993     if (TempBaseReg == nullptr) {
1994       Base = newBaseRegister(Base, Offset, Target->getReservedTmpReg());
1995     } else {
1996       uint32_t Imm8, Rotate;
1997       const int32_t OffsetDiff = Offset - TempBaseOffset;
1998       if (OffsetDiff == 0) {
1999         Base = TempBaseReg;
2000       } else if (OperandARM32FlexImm::canHoldImm(OffsetDiff, &Rotate, &Imm8)) {
2001         auto *OffsetDiffF = OperandARM32FlexImm::create(
2002             Target->Func, IceType_i32, Imm8, Rotate);
2003         Target->_add(TempBaseReg, TempBaseReg, OffsetDiffF);
2004         TempBaseOffset += OffsetDiff;
2005         Base = TempBaseReg;
2006       } else if (OperandARM32FlexImm::canHoldImm(-OffsetDiff, &Rotate, &Imm8)) {
2007         auto *OffsetDiffF = OperandARM32FlexImm::create(
2008             Target->Func, IceType_i32, Imm8, Rotate);
2009         Target->_sub(TempBaseReg, TempBaseReg, OffsetDiffF);
2010         TempBaseOffset += OffsetDiff;
2011         Base = TempBaseReg;
2012       } else {
2013         Base = newBaseRegister(Base, Offset, Target->getReservedTmpReg());
2014       }
2015     }
2016   }
2017 
2018   return OperandARM32Mem::create(Target->Func, Mem->getType(), Base,
2019                                  Mem->getIndex(), Mem->getShiftOp(),
2020                                  Mem->getShiftAmt(), Mem->getAddrMode());
2021 }
2022 
postLowerLegalization()2023 void TargetARM32::postLowerLegalization() {
2024   // If a stack variable's frame offset doesn't fit, convert from:
2025   //   ldr X, OFF[SP]
2026   // to:
2027   //   movw/movt TMP, OFF_PART
2028   //   add TMP, TMP, SP
2029   //   ldr X, OFF_MORE[TMP]
2030   //
2031   // This is safe because we have reserved TMP, and add for ARM does not
2032   // clobber the flags register.
2033   Func->dump("Before postLowerLegalization");
2034   assert(hasComputedFrame());
2035   // Do a fairly naive greedy clustering for now. Pick the first stack slot
2036   // that's out of bounds and make a new base reg using the architecture's temp
2037   // register. If that works for the next slot, then great. Otherwise, create a
2038   // new base register, clobbering the previous base register. Never share a
2039   // base reg across different basic blocks. This isn't ideal if local and
2040   // multi-block variables are far apart and their references are interspersed.
2041   // It may help to be more coordinated about assign stack slot numbers and may
2042   // help to assign smaller offsets to higher-weight variables so that they
2043   // don't depend on this legalization.
2044   for (CfgNode *Node : Func->getNodes()) {
2045     Context.init(Node);
2046     // One legalizer per basic block, otherwise we would share the Temporary
2047     // Base Register between basic blocks.
2048     PostLoweringLegalizer Legalizer(this);
2049     while (!Context.atEnd()) {
2050       PostIncrLoweringContext PostIncrement(Context);
2051       Inst *CurInstr = iteratorToInst(Context.getCur());
2052 
2053       // Check if the previous TempBaseReg is clobbered, and reset if needed.
2054       Legalizer.resetTempBaseIfClobberedBy(CurInstr);
2055 
2056       if (auto *MovInstr = llvm::dyn_cast<InstARM32Mov>(CurInstr)) {
2057         Legalizer.legalizeMov(MovInstr);
2058       } else if (auto *LdrInstr = llvm::dyn_cast<InstARM32Ldr>(CurInstr)) {
2059         if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
2060                 llvm::cast<OperandARM32Mem>(LdrInstr->getSrc(0)))) {
2061           Sandboxer(this).ldr(CurInstr->getDest(), LegalMem,
2062                               LdrInstr->getPredicate());
2063           CurInstr->setDeleted();
2064         }
2065       } else if (auto *LdrexInstr = llvm::dyn_cast<InstARM32Ldrex>(CurInstr)) {
2066         constexpr bool DisallowOffsetsBecauseLdrex = false;
2067         if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
2068                 llvm::cast<OperandARM32Mem>(LdrexInstr->getSrc(0)),
2069                 DisallowOffsetsBecauseLdrex)) {
2070           Sandboxer(this).ldrex(CurInstr->getDest(), LegalMem,
2071                                 LdrexInstr->getPredicate());
2072           CurInstr->setDeleted();
2073         }
2074       } else if (auto *StrInstr = llvm::dyn_cast<InstARM32Str>(CurInstr)) {
2075         if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
2076                 llvm::cast<OperandARM32Mem>(StrInstr->getSrc(1)))) {
2077           Sandboxer(this).str(llvm::cast<Variable>(CurInstr->getSrc(0)),
2078                               LegalMem, StrInstr->getPredicate());
2079           CurInstr->setDeleted();
2080         }
2081       } else if (auto *StrexInstr = llvm::dyn_cast<InstARM32Strex>(CurInstr)) {
2082         constexpr bool DisallowOffsetsBecauseStrex = false;
2083         if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
2084                 llvm::cast<OperandARM32Mem>(StrexInstr->getSrc(1)),
2085                 DisallowOffsetsBecauseStrex)) {
2086           Sandboxer(this).strex(CurInstr->getDest(),
2087                                 llvm::cast<Variable>(CurInstr->getSrc(0)),
2088                                 LegalMem, StrexInstr->getPredicate());
2089           CurInstr->setDeleted();
2090         }
2091       }
2092 
2093       // Sanity-check: the Legalizer will either have no Temp, or it will be
2094       // bound to IP.
2095       Legalizer.assertNoTempOrAssignedToIP();
2096     }
2097   }
2098 }
2099 
loOperand(Operand * Operand)2100 Operand *TargetARM32::loOperand(Operand *Operand) {
2101   assert(Operand->getType() == IceType_i64);
2102   if (Operand->getType() != IceType_i64)
2103     return Operand;
2104   if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
2105     return Var64On32->getLo();
2106   if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand))
2107     return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
2108   if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
2109     // Conservatively disallow memory operands with side-effects (pre/post
2110     // increment) in case of duplication.
2111     assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
2112            Mem->getAddrMode() == OperandARM32Mem::NegOffset);
2113     if (Mem->isRegReg()) {
2114       Variable *IndexR = legalizeToReg(Mem->getIndex());
2115       return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(), IndexR,
2116                                      Mem->getShiftOp(), Mem->getShiftAmt(),
2117                                      Mem->getAddrMode());
2118     } else {
2119       return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
2120                                      Mem->getOffset(), Mem->getAddrMode());
2121     }
2122   }
2123   llvm::report_fatal_error("Unsupported operand type");
2124   return nullptr;
2125 }
2126 
hiOperand(Operand * Operand)2127 Operand *TargetARM32::hiOperand(Operand *Operand) {
2128   assert(Operand->getType() == IceType_i64);
2129   if (Operand->getType() != IceType_i64)
2130     return Operand;
2131   if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
2132     return Var64On32->getHi();
2133   if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
2134     return Ctx->getConstantInt32(
2135         static_cast<uint32_t>(Const->getValue() >> 32));
2136   }
2137   if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
2138     // Conservatively disallow memory operands with side-effects in case of
2139     // duplication.
2140     assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
2141            Mem->getAddrMode() == OperandARM32Mem::NegOffset);
2142     const Type SplitType = IceType_i32;
2143     if (Mem->isRegReg()) {
2144       // We have to make a temp variable T, and add 4 to either Base or Index.
2145       // The Index may be shifted, so adding 4 can mean something else. Thus,
2146       // prefer T := Base + 4, and use T as the new Base.
2147       Variable *Base = Mem->getBase();
2148       Constant *Four = Ctx->getConstantInt32(4);
2149       Variable *NewBase = Func->makeVariable(Base->getType());
2150       lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
2151                                              Base, Four));
2152       Variable *BaseR = legalizeToReg(NewBase);
2153       Variable *IndexR = legalizeToReg(Mem->getIndex());
2154       return OperandARM32Mem::create(Func, SplitType, BaseR, IndexR,
2155                                      Mem->getShiftOp(), Mem->getShiftAmt(),
2156                                      Mem->getAddrMode());
2157     } else {
2158       Variable *Base = Mem->getBase();
2159       ConstantInteger32 *Offset = Mem->getOffset();
2160       assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
2161       int32_t NextOffsetVal = Offset->getValue() + 4;
2162       constexpr bool ZeroExt = false;
2163       if (!OperandARM32Mem::canHoldOffset(SplitType, ZeroExt, NextOffsetVal)) {
2164         // We have to make a temp variable and add 4 to either Base or Offset.
2165         // If we add 4 to Offset, this will convert a non-RegReg addressing
2166         // mode into a RegReg addressing mode. Since NaCl sandboxing disallows
2167         // RegReg addressing modes, prefer adding to base and replacing
2168         // instead. Thus we leave the old offset alone.
2169         Constant *_4 = Ctx->getConstantInt32(4);
2170         Variable *NewBase = Func->makeVariable(Base->getType());
2171         lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add,
2172                                                NewBase, Base, _4));
2173         Base = NewBase;
2174       } else {
2175         Offset =
2176             llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
2177       }
2178       Variable *BaseR = legalizeToReg(Base);
2179       return OperandARM32Mem::create(Func, SplitType, BaseR, Offset,
2180                                      Mem->getAddrMode());
2181     }
2182   }
2183   llvm::report_fatal_error("Unsupported operand type");
2184   return nullptr;
2185 }
2186 
getRegisterSet(RegSetMask Include,RegSetMask Exclude) const2187 SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include,
2188                                            RegSetMask Exclude) const {
2189   SmallBitVector Registers(RegARM32::Reg_NUM);
2190 
2191   for (uint32_t i = 0; i < RegARM32::Reg_NUM; ++i) {
2192     const auto &Entry = RegARM32::RegTable[i];
2193     if (Entry.Scratch && (Include & RegSet_CallerSave))
2194       Registers[i] = true;
2195     if (Entry.Preserved && (Include & RegSet_CalleeSave))
2196       Registers[i] = true;
2197     if (Entry.StackPtr && (Include & RegSet_StackPointer))
2198       Registers[i] = true;
2199     if (Entry.FramePtr && (Include & RegSet_FramePointer))
2200       Registers[i] = true;
2201     if (Entry.Scratch && (Exclude & RegSet_CallerSave))
2202       Registers[i] = false;
2203     if (Entry.Preserved && (Exclude & RegSet_CalleeSave))
2204       Registers[i] = false;
2205     if (Entry.StackPtr && (Exclude & RegSet_StackPointer))
2206       Registers[i] = false;
2207     if (Entry.FramePtr && (Exclude & RegSet_FramePointer))
2208       Registers[i] = false;
2209   }
2210 
2211   return Registers;
2212 }
2213 
lowerAlloca(const InstAlloca * Instr)2214 void TargetARM32::lowerAlloca(const InstAlloca *Instr) {
2215   // Conservatively require the stack to be aligned. Some stack adjustment
2216   // operations implemented below assume that the stack is aligned before the
2217   // alloca. All the alloca code ensures that the stack alignment is preserved
2218   // after the alloca. The stack alignment restriction can be relaxed in some
2219   // cases.
2220   NeedsStackAlignment = true;
2221 
2222   // For default align=0, set it to the real value 1, to avoid any
2223   // bit-manipulation problems below.
2224   const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes());
2225 
2226   // LLVM enforces power of 2 alignment.
2227   assert(llvm::isPowerOf2_32(AlignmentParam));
2228   assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES));
2229 
2230   const uint32_t Alignment =
2231       std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES);
2232   const bool OverAligned = Alignment > ARM32_STACK_ALIGNMENT_BYTES;
2233   const bool OptM1 = Func->getOptLevel() == Opt_m1;
2234   const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset();
2235   const bool UseFramePointer =
2236       hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
2237 
2238   if (UseFramePointer)
2239     setHasFramePointer();
2240 
2241   Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
2242   if (OverAligned) {
2243     Sandboxer(this).align_sp(Alignment);
2244   }
2245 
2246   Variable *Dest = Instr->getDest();
2247   Operand *TotalSize = Instr->getSizeInBytes();
2248 
2249   if (const auto *ConstantTotalSize =
2250           llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
2251     const uint32_t Value =
2252         Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);
2253     // Constant size alloca.
2254     if (!UseFramePointer) {
2255       // If we don't need a Frame Pointer, this alloca has a known offset to the
2256       // stack pointer. We don't need adjust the stack pointer, nor assign any
2257       // value to Dest, as Dest is rematerializable.
2258       assert(Dest->isRematerializable());
2259       FixedAllocaSizeBytes += Value;
2260       Context.insert<InstFakeDef>(Dest);
2261       return;
2262     }
2263 
2264     // If a frame pointer is required, then we need to store the alloca'd result
2265     // in Dest.
2266     Operand *SubAmountRF =
2267         legalize(Ctx->getConstantInt32(Value), Legal_Reg | Legal_Flex);
2268     Sandboxer(this).sub_sp(SubAmountRF);
2269   } else {
2270     // Non-constant sizes need to be adjusted to the next highest multiple of
2271     // the required alignment at runtime.
2272     TotalSize = legalize(TotalSize, Legal_Reg | Legal_Flex);
2273     Variable *T = makeReg(IceType_i32);
2274     _mov(T, TotalSize);
2275     Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1));
2276     _add(T, T, AddAmount);
2277     alignRegisterPow2(T, Alignment);
2278     Sandboxer(this).sub_sp(T);
2279   }
2280 
2281   // Adds back a few bytes to SP to account for the out args area.
2282   Variable *T = SP;
2283   if (MaxOutArgsSizeBytes != 0) {
2284     T = makeReg(getPointerType());
2285     Operand *OutArgsSizeRF = legalize(
2286         Ctx->getConstantInt32(MaxOutArgsSizeBytes), Legal_Reg | Legal_Flex);
2287     _add(T, SP, OutArgsSizeRF);
2288   }
2289 
2290   _mov(Dest, T);
2291 }
2292 
div0Check(Type Ty,Operand * SrcLo,Operand * SrcHi)2293 void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) {
2294   if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi))
2295     return;
2296   Variable *SrcLoReg = legalizeToReg(SrcLo);
2297   switch (Ty) {
2298   default:
2299     llvm_unreachable(
2300         ("Unexpected type in div0Check: " + typeStdString(Ty)).c_str());
2301   case IceType_i8:
2302   case IceType_i16: {
2303     Operand *ShAmtImm = shAmtImm(32 - getScalarIntBitWidth(Ty));
2304     Variable *T = makeReg(IceType_i32);
2305     _lsls(T, SrcLoReg, ShAmtImm);
2306     Context.insert<InstFakeUse>(T);
2307   } break;
2308   case IceType_i32: {
2309     _tst(SrcLoReg, SrcLoReg);
2310     break;
2311   }
2312   case IceType_i64: {
2313     Variable *T = makeReg(IceType_i32);
2314     _orrs(T, SrcLoReg, legalize(SrcHi, Legal_Reg | Legal_Flex));
2315     // T isn't going to be used, but we need the side-effect of setting flags
2316     // from this operation.
2317     Context.insert<InstFakeUse>(T);
2318   }
2319   }
2320   auto *Label = InstARM32Label::create(Func, this);
2321   _br(Label, CondARM32::NE);
2322   _trap();
2323   Context.insert(Label);
2324 }
2325 
lowerIDivRem(Variable * Dest,Variable * T,Variable * Src0R,Operand * Src1,ExtInstr ExtFunc,DivInstr DivFunc,bool IsRemainder)2326 void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R,
2327                                Operand *Src1, ExtInstr ExtFunc,
2328                                DivInstr DivFunc, bool IsRemainder) {
2329   div0Check(Dest->getType(), Src1, nullptr);
2330   Variable *Src1R = legalizeToReg(Src1);
2331   Variable *T0R = Src0R;
2332   Variable *T1R = Src1R;
2333   if (Dest->getType() != IceType_i32) {
2334     T0R = makeReg(IceType_i32);
2335     (this->*ExtFunc)(T0R, Src0R, CondARM32::AL);
2336     T1R = makeReg(IceType_i32);
2337     (this->*ExtFunc)(T1R, Src1R, CondARM32::AL);
2338   }
2339   if (hasCPUFeature(TargetARM32Features::HWDivArm)) {
2340     (this->*DivFunc)(T, T0R, T1R, CondARM32::AL);
2341     if (IsRemainder) {
2342       Variable *T2 = makeReg(IceType_i32);
2343       _mls(T2, T, T1R, T0R);
2344       T = T2;
2345     }
2346     _mov(Dest, T);
2347   } else {
2348     llvm::report_fatal_error("div should have already been turned into a call");
2349   }
2350 }
2351 
2352 TargetARM32::SafeBoolChain
lowerInt1Arithmetic(const InstArithmetic * Instr)2353 TargetARM32::lowerInt1Arithmetic(const InstArithmetic *Instr) {
2354   Variable *Dest = Instr->getDest();
2355   assert(Dest->getType() == IceType_i1);
2356 
2357   // So folding didn't work for Instr. Not a problem: We just need to
2358   // materialize the Sources, and perform the operation. We create regular
2359   // Variables (and not infinite-weight ones) because this call might recurse a
2360   // lot, and we might end up with tons of infinite weight temporaries.
2361   assert(Instr->getSrcSize() == 2);
2362   Variable *Src0 = Func->makeVariable(IceType_i1);
2363   SafeBoolChain Src0Safe = lowerInt1(Src0, Instr->getSrc(0));
2364 
2365   Operand *Src1 = Instr->getSrc(1);
2366   SafeBoolChain Src1Safe = SBC_Yes;
2367 
2368   if (!llvm::isa<Constant>(Src1)) {
2369     Variable *Src1V = Func->makeVariable(IceType_i1);
2370     Src1Safe = lowerInt1(Src1V, Src1);
2371     Src1 = Src1V;
2372   }
2373 
2374   Variable *T = makeReg(IceType_i1);
2375   Src0 = legalizeToReg(Src0);
2376   Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);
2377   switch (Instr->getOp()) {
2378   default:
2379     // If this Unreachable is ever executed, add the offending operation to
2380     // the list of valid consumers.
2381     llvm::report_fatal_error("Unhandled i1 Op");
2382   case InstArithmetic::And:
2383     _and(T, Src0, Src1RF);
2384     break;
2385   case InstArithmetic::Or:
2386     _orr(T, Src0, Src1RF);
2387     break;
2388   case InstArithmetic::Xor:
2389     _eor(T, Src0, Src1RF);
2390     break;
2391   }
2392   _mov(Dest, T);
2393   return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No;
2394 }
2395 
2396 namespace {
2397 // NumericOperands is used during arithmetic/icmp lowering for constant folding.
2398 // It holds the two sources operands, and maintains some state as to whether one
2399 // of them is a constant. If one of the operands is a constant, then it will be
2400 // be stored as the operation's second source, with a bit indicating whether the
2401 // operands were swapped.
2402 //
2403 // The class is split into a base class with operand type-independent methods,
2404 // and a derived, templated class, for each type of operand we want to fold
2405 // constants for:
2406 //
2407 // NumericOperandsBase --> NumericOperands<ConstantFloat>
2408 //                     --> NumericOperands<ConstantDouble>
2409 //                     --> NumericOperands<ConstantInt32>
2410 //
2411 // NumericOperands<ConstantInt32> also exposes helper methods for emitting
2412 // inverted/negated immediates.
2413 class NumericOperandsBase {
2414   NumericOperandsBase() = delete;
2415   NumericOperandsBase(const NumericOperandsBase &) = delete;
2416   NumericOperandsBase &operator=(const NumericOperandsBase &) = delete;
2417 
2418 public:
NumericOperandsBase(Operand * S0,Operand * S1)2419   NumericOperandsBase(Operand *S0, Operand *S1)
2420       : Src0(NonConstOperand(S0, S1)), Src1(ConstOperand(S0, S1)),
2421         Swapped(Src0 == S1 && S0 != S1) {
2422     assert(Src0 != nullptr);
2423     assert(Src1 != nullptr);
2424     assert(Src0 != Src1 || S0 == S1);
2425   }
2426 
hasConstOperand() const2427   bool hasConstOperand() const {
2428     return llvm::isa<Constant>(Src1) && !llvm::isa<ConstantRelocatable>(Src1);
2429   }
2430 
swappedOperands() const2431   bool swappedOperands() const { return Swapped; }
2432 
src0R(TargetARM32 * Target) const2433   Variable *src0R(TargetARM32 *Target) const {
2434     return legalizeToReg(Target, Src0);
2435   }
2436 
unswappedSrc0R(TargetARM32 * Target) const2437   Variable *unswappedSrc0R(TargetARM32 *Target) const {
2438     return legalizeToReg(Target, Swapped ? Src1 : Src0);
2439   }
2440 
src1RF(TargetARM32 * Target) const2441   Operand *src1RF(TargetARM32 *Target) const {
2442     return legalizeToRegOrFlex(Target, Src1);
2443   }
2444 
unswappedSrc1R(TargetARM32 * Target) const2445   Variable *unswappedSrc1R(TargetARM32 *Target) const {
2446     return legalizeToReg(Target, Swapped ? Src0 : Src1);
2447   }
2448 
src1() const2449   Operand *src1() const { return Src1; }
2450 
2451 protected:
2452   Operand *const Src0;
2453   Operand *const Src1;
2454   const bool Swapped;
2455 
legalizeToReg(TargetARM32 * Target,Operand * Src)2456   static Variable *legalizeToReg(TargetARM32 *Target, Operand *Src) {
2457     return Target->legalizeToReg(Src);
2458   }
2459 
legalizeToRegOrFlex(TargetARM32 * Target,Operand * Src)2460   static Operand *legalizeToRegOrFlex(TargetARM32 *Target, Operand *Src) {
2461     return Target->legalize(Src,
2462                             TargetARM32::Legal_Reg | TargetARM32::Legal_Flex);
2463   }
2464 
2465 private:
NonConstOperand(Operand * S0,Operand * S1)2466   static Operand *NonConstOperand(Operand *S0, Operand *S1) {
2467     if (!llvm::isa<Constant>(S0))
2468       return S0;
2469     if (!llvm::isa<Constant>(S1))
2470       return S1;
2471     if (llvm::isa<ConstantRelocatable>(S1) &&
2472         !llvm::isa<ConstantRelocatable>(S0))
2473       return S1;
2474     return S0;
2475   }
2476 
ConstOperand(Operand * S0,Operand * S1)2477   static Operand *ConstOperand(Operand *S0, Operand *S1) {
2478     if (!llvm::isa<Constant>(S0))
2479       return S1;
2480     if (!llvm::isa<Constant>(S1))
2481       return S0;
2482     if (llvm::isa<ConstantRelocatable>(S1) &&
2483         !llvm::isa<ConstantRelocatable>(S0))
2484       return S0;
2485     return S1;
2486   }
2487 };
2488 
2489 template <typename C> class NumericOperands : public NumericOperandsBase {
2490   NumericOperands() = delete;
2491   NumericOperands(const NumericOperands &) = delete;
2492   NumericOperands &operator=(const NumericOperands &) = delete;
2493 
2494 public:
NumericOperands(Operand * S0,Operand * S1)2495   NumericOperands(Operand *S0, Operand *S1) : NumericOperandsBase(S0, S1) {
2496     assert(!hasConstOperand() || llvm::isa<C>(this->Src1));
2497   }
2498 
getConstantValue() const2499   typename C::PrimType getConstantValue() const {
2500     return llvm::cast<C>(Src1)->getValue();
2501   }
2502 };
2503 
2504 using FloatOperands = NumericOperands<ConstantFloat>;
2505 using DoubleOperands = NumericOperands<ConstantDouble>;
2506 
2507 class Int32Operands : public NumericOperands<ConstantInteger32> {
2508   Int32Operands() = delete;
2509   Int32Operands(const Int32Operands &) = delete;
2510   Int32Operands &operator=(const Int32Operands &) = delete;
2511 
2512 public:
Int32Operands(Operand * S0,Operand * S1)2513   Int32Operands(Operand *S0, Operand *S1) : NumericOperands(S0, S1) {}
2514 
unswappedSrc1RShAmtImm(TargetARM32 * Target) const2515   Operand *unswappedSrc1RShAmtImm(TargetARM32 *Target) const {
2516     if (!swappedOperands() && hasConstOperand()) {
2517       return Target->shAmtImm(getConstantValue() & 0x1F);
2518     }
2519     return legalizeToReg(Target, Swapped ? Src0 : Src1);
2520   }
2521 
isSrc1ImmediateZero() const2522   bool isSrc1ImmediateZero() const {
2523     if (!swappedOperands() && hasConstOperand()) {
2524       return getConstantValue() == 0;
2525     }
2526     return false;
2527   }
2528 
immediateIsFlexEncodable() const2529   bool immediateIsFlexEncodable() const {
2530     uint32_t Rotate, Imm8;
2531     return OperandARM32FlexImm::canHoldImm(getConstantValue(), &Rotate, &Imm8);
2532   }
2533 
negatedImmediateIsFlexEncodable() const2534   bool negatedImmediateIsFlexEncodable() const {
2535     uint32_t Rotate, Imm8;
2536     return OperandARM32FlexImm::canHoldImm(
2537         -static_cast<int32_t>(getConstantValue()), &Rotate, &Imm8);
2538   }
2539 
negatedSrc1F(TargetARM32 * Target) const2540   Operand *negatedSrc1F(TargetARM32 *Target) const {
2541     return legalizeToRegOrFlex(Target,
2542                                Target->getCtx()->getConstantInt32(
2543                                    -static_cast<int32_t>(getConstantValue())));
2544   }
2545 
invertedImmediateIsFlexEncodable() const2546   bool invertedImmediateIsFlexEncodable() const {
2547     uint32_t Rotate, Imm8;
2548     return OperandARM32FlexImm::canHoldImm(
2549         ~static_cast<uint32_t>(getConstantValue()), &Rotate, &Imm8);
2550   }
2551 
invertedSrc1F(TargetARM32 * Target) const2552   Operand *invertedSrc1F(TargetARM32 *Target) const {
2553     return legalizeToRegOrFlex(Target,
2554                                Target->getCtx()->getConstantInt32(
2555                                    ~static_cast<uint32_t>(getConstantValue())));
2556   }
2557 };
2558 } // end of anonymous namespace
2559 
preambleDivRem(const InstCall * Instr)2560 void TargetARM32::preambleDivRem(const InstCall *Instr) {
2561   Operand *Src1 = Instr->getArg(1);
2562 
2563   switch (Src1->getType()) {
2564   default:
2565     llvm::report_fatal_error("Invalid type for idiv.");
2566   case IceType_i64: {
2567     if (auto *C = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2568       if (C->getValue() == 0) {
2569         _trap();
2570         return;
2571       }
2572     }
2573     div0Check(IceType_i64, loOperand(Src1), hiOperand(Src1));
2574     return;
2575   }
2576   case IceType_i32: {
2577     // Src0 and Src1 have already been appropriately extended to an i32, so we
2578     // don't check for i8 and i16.
2579     if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
2580       if (C->getValue() == 0) {
2581         _trap();
2582         return;
2583       }
2584     }
2585     div0Check(IceType_i32, Src1, nullptr);
2586     return;
2587   }
2588   }
2589 }
2590 
lowerInt64Arithmetic(InstArithmetic::OpKind Op,Variable * Dest,Operand * Src0,Operand * Src1)2591 void TargetARM32::lowerInt64Arithmetic(InstArithmetic::OpKind Op,
2592                                        Variable *Dest, Operand *Src0,
2593                                        Operand *Src1) {
2594   Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1));
2595   Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1));
2596   assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands());
2597   assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand());
2598 
2599   auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
2600   auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2601   Variable *T_Lo = makeReg(DestLo->getType());
2602   Variable *T_Hi = makeReg(DestHi->getType());
2603 
2604   switch (Op) {
2605   case InstArithmetic::_num:
2606     llvm::report_fatal_error("Unknown arithmetic operator");
2607     return;
2608   case InstArithmetic::Add: {
2609     Variable *Src0LoR = SrcsLo.src0R(this);
2610     Operand *Src1LoRF = SrcsLo.src1RF(this);
2611     Variable *Src0HiR = SrcsHi.src0R(this);
2612     Operand *Src1HiRF = SrcsHi.src1RF(this);
2613     _adds(T_Lo, Src0LoR, Src1LoRF);
2614     _mov(DestLo, T_Lo);
2615     _adc(T_Hi, Src0HiR, Src1HiRF);
2616     _mov(DestHi, T_Hi);
2617     return;
2618   }
2619   case InstArithmetic::And: {
2620     Variable *Src0LoR = SrcsLo.src0R(this);
2621     Operand *Src1LoRF = SrcsLo.src1RF(this);
2622     Variable *Src0HiR = SrcsHi.src0R(this);
2623     Operand *Src1HiRF = SrcsHi.src1RF(this);
2624     _and(T_Lo, Src0LoR, Src1LoRF);
2625     _mov(DestLo, T_Lo);
2626     _and(T_Hi, Src0HiR, Src1HiRF);
2627     _mov(DestHi, T_Hi);
2628     return;
2629   }
2630   case InstArithmetic::Or: {
2631     Variable *Src0LoR = SrcsLo.src0R(this);
2632     Operand *Src1LoRF = SrcsLo.src1RF(this);
2633     Variable *Src0HiR = SrcsHi.src0R(this);
2634     Operand *Src1HiRF = SrcsHi.src1RF(this);
2635     _orr(T_Lo, Src0LoR, Src1LoRF);
2636     _mov(DestLo, T_Lo);
2637     _orr(T_Hi, Src0HiR, Src1HiRF);
2638     _mov(DestHi, T_Hi);
2639     return;
2640   }
2641   case InstArithmetic::Xor: {
2642     Variable *Src0LoR = SrcsLo.src0R(this);
2643     Operand *Src1LoRF = SrcsLo.src1RF(this);
2644     Variable *Src0HiR = SrcsHi.src0R(this);
2645     Operand *Src1HiRF = SrcsHi.src1RF(this);
2646     _eor(T_Lo, Src0LoR, Src1LoRF);
2647     _mov(DestLo, T_Lo);
2648     _eor(T_Hi, Src0HiR, Src1HiRF);
2649     _mov(DestHi, T_Hi);
2650     return;
2651   }
2652   case InstArithmetic::Sub: {
2653     Variable *Src0LoR = SrcsLo.src0R(this);
2654     Operand *Src1LoRF = SrcsLo.src1RF(this);
2655     Variable *Src0HiR = SrcsHi.src0R(this);
2656     Operand *Src1HiRF = SrcsHi.src1RF(this);
2657     if (SrcsLo.swappedOperands()) {
2658       _rsbs(T_Lo, Src0LoR, Src1LoRF);
2659       _mov(DestLo, T_Lo);
2660       _rsc(T_Hi, Src0HiR, Src1HiRF);
2661       _mov(DestHi, T_Hi);
2662     } else {
2663       _subs(T_Lo, Src0LoR, Src1LoRF);
2664       _mov(DestLo, T_Lo);
2665       _sbc(T_Hi, Src0HiR, Src1HiRF);
2666       _mov(DestHi, T_Hi);
2667     }
2668     return;
2669   }
2670   case InstArithmetic::Mul: {
2671     // GCC 4.8 does:
2672     // a=b*c ==>
2673     //   t_acc =(mul) (b.lo * c.hi)
2674     //   t_acc =(mla) (c.lo * b.hi) + t_acc
2675     //   t.hi,t.lo =(umull) b.lo * c.lo
2676     //   t.hi += t_acc
2677     //   a.lo = t.lo
2678     //   a.hi = t.hi
2679     //
2680     // LLVM does:
2681     //   t.hi,t.lo =(umull) b.lo * c.lo
2682     //   t.hi =(mla) (b.lo * c.hi) + t.hi
2683     //   t.hi =(mla) (b.hi * c.lo) + t.hi
2684     //   a.lo = t.lo
2685     //   a.hi = t.hi
2686     //
2687     // LLVM's lowering has fewer instructions, but more register pressure:
2688     // t.lo is live from beginning to end, while GCC delays the two-dest
2689     // instruction till the end, and kills c.hi immediately.
2690     Variable *T_Acc = makeReg(IceType_i32);
2691     Variable *T_Acc1 = makeReg(IceType_i32);
2692     Variable *T_Hi1 = makeReg(IceType_i32);
2693     Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);
2694     Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);
2695     Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);
2696     Variable *Src1RHi = SrcsHi.unswappedSrc1R(this);
2697     _mul(T_Acc, Src0RLo, Src1RHi);
2698     _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);
2699     _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);
2700     _add(T_Hi, T_Hi1, T_Acc1);
2701     _mov(DestLo, T_Lo);
2702     _mov(DestHi, T_Hi);
2703     return;
2704   }
2705   case InstArithmetic::Shl: {
2706     if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) {
2707       Variable *Src0RLo = SrcsLo.src0R(this);
2708       // Truncating the ShAmt to [0, 63] because that's what ARM does anyway.
2709       const int32_t ShAmtImm = SrcsLo.getConstantValue() & 0x3F;
2710       if (ShAmtImm == 0) {
2711         _mov(DestLo, Src0RLo);
2712         _mov(DestHi, SrcsHi.src0R(this));
2713         return;
2714       }
2715 
2716       if (ShAmtImm >= 32) {
2717         if (ShAmtImm == 32) {
2718           _mov(DestHi, Src0RLo);
2719         } else {
2720           Operand *ShAmtOp = shAmtImm(ShAmtImm - 32);
2721           _lsl(T_Hi, Src0RLo, ShAmtOp);
2722           _mov(DestHi, T_Hi);
2723         }
2724 
2725         Operand *_0 =
2726             legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
2727         _mov(T_Lo, _0);
2728         _mov(DestLo, T_Lo);
2729         return;
2730       }
2731 
2732       Variable *Src0RHi = SrcsHi.src0R(this);
2733       Operand *ShAmtOp = shAmtImm(ShAmtImm);
2734       Operand *ComplShAmtOp = shAmtImm(32 - ShAmtImm);
2735       _lsl(T_Hi, Src0RHi, ShAmtOp);
2736       _orr(T_Hi, T_Hi,
2737            OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
2738                                        OperandARM32::LSR, ComplShAmtOp));
2739       _mov(DestHi, T_Hi);
2740 
2741       _lsl(T_Lo, Src0RLo, ShAmtOp);
2742       _mov(DestLo, T_Lo);
2743       return;
2744     }
2745 
2746     // a=b<<c ==>
2747     // pnacl-llc does:
2748     // mov     t_b.lo, b.lo
2749     // mov     t_b.hi, b.hi
2750     // mov     t_c.lo, c.lo
2751     // rsb     T0, t_c.lo, #32
2752     // lsr     T1, t_b.lo, T0
2753     // orr     t_a.hi, T1, t_b.hi, lsl t_c.lo
2754     // sub     T2, t_c.lo, #32
2755     // cmp     T2, #0
2756     // lslge   t_a.hi, t_b.lo, T2
2757     // lsl     t_a.lo, t_b.lo, t_c.lo
2758     // mov     a.lo, t_a.lo
2759     // mov     a.hi, t_a.hi
2760     //
2761     // GCC 4.8 does:
2762     // sub t_c1, c.lo, #32
2763     // lsl t_hi, b.hi, c.lo
2764     // orr t_hi, t_hi, b.lo, lsl t_c1
2765     // rsb t_c2, c.lo, #32
2766     // orr t_hi, t_hi, b.lo, lsr t_c2
2767     // lsl t_lo, b.lo, c.lo
2768     // a.lo = t_lo
2769     // a.hi = t_hi
2770     //
2771     // These are incompatible, therefore we mimic pnacl-llc.
2772     // Can be strength-reduced for constant-shifts, but we don't do that for
2773     // now.
2774     // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On
2775     // ARM, shifts only take the lower 8 bits of the shift register, and
2776     // saturate to the range 0-32, so the negative value will saturate to 32.
2777     Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
2778     Operand *_0 =
2779         legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
2780     Variable *T0 = makeReg(IceType_i32);
2781     Variable *T1 = makeReg(IceType_i32);
2782     Variable *T2 = makeReg(IceType_i32);
2783     Variable *TA_Hi = makeReg(IceType_i32);
2784     Variable *TA_Lo = makeReg(IceType_i32);
2785     Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);
2786     Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);
2787     Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);
2788     _rsb(T0, Src1RLo, _32);
2789     _lsr(T1, Src0RLo, T0);
2790     _orr(TA_Hi, T1,
2791          OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
2792                                      OperandARM32::LSL, Src1RLo));
2793     _sub(T2, Src1RLo, _32);
2794     _cmp(T2, _0);
2795     _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE);
2796     _set_dest_redefined();
2797     _lsl(TA_Lo, Src0RLo, Src1RLo);
2798     _mov(DestLo, TA_Lo);
2799     _mov(DestHi, TA_Hi);
2800     return;
2801   }
2802   case InstArithmetic::Lshr:
2803   case InstArithmetic::Ashr: {
2804     const bool ASR = Op == InstArithmetic::Ashr;
2805     if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) {
2806       Variable *Src0RHi = SrcsHi.src0R(this);
2807       // Truncating the ShAmt to [0, 63] because that's what ARM does anyway.
2808       const int32_t ShAmt = SrcsLo.getConstantValue() & 0x3F;
2809       if (ShAmt == 0) {
2810         _mov(DestHi, Src0RHi);
2811         _mov(DestLo, SrcsLo.src0R(this));
2812         return;
2813       }
2814 
2815       if (ShAmt >= 32) {
2816         if (ShAmt == 32) {
2817           _mov(DestLo, Src0RHi);
2818         } else {
2819           Operand *ShAmtImm = shAmtImm(ShAmt - 32);
2820           if (ASR) {
2821             _asr(T_Lo, Src0RHi, ShAmtImm);
2822           } else {
2823             _lsr(T_Lo, Src0RHi, ShAmtImm);
2824           }
2825           _mov(DestLo, T_Lo);
2826         }
2827 
2828         if (ASR) {
2829           Operand *_31 = shAmtImm(31);
2830           _asr(T_Hi, Src0RHi, _31);
2831         } else {
2832           Operand *_0 = legalize(Ctx->getConstantZero(IceType_i32),
2833                                  Legal_Reg | Legal_Flex);
2834           _mov(T_Hi, _0);
2835         }
2836         _mov(DestHi, T_Hi);
2837         return;
2838       }
2839 
2840       Variable *Src0RLo = SrcsLo.src0R(this);
2841       Operand *ShAmtImm = shAmtImm(ShAmt);
2842       Operand *ComplShAmtImm = shAmtImm(32 - ShAmt);
2843       _lsr(T_Lo, Src0RLo, ShAmtImm);
2844       _orr(T_Lo, T_Lo,
2845            OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
2846                                        OperandARM32::LSL, ComplShAmtImm));
2847       _mov(DestLo, T_Lo);
2848 
2849       if (ASR) {
2850         _asr(T_Hi, Src0RHi, ShAmtImm);
2851       } else {
2852         _lsr(T_Hi, Src0RHi, ShAmtImm);
2853       }
2854       _mov(DestHi, T_Hi);
2855       return;
2856     }
2857 
2858     // a=b>>c
2859     // pnacl-llc does:
2860     // mov        t_b.lo, b.lo
2861     // mov        t_b.hi, b.hi
2862     // mov        t_c.lo, c.lo
2863     // lsr        T0, t_b.lo, t_c.lo
2864     // rsb        T1, t_c.lo, #32
2865     // orr        t_a.lo, T0, t_b.hi, lsl T1
2866     // sub        T2, t_c.lo, #32
2867     // cmp        T2, #0
2868     // [al]srge   t_a.lo, t_b.hi, T2
2869     // [al]sr     t_a.hi, t_b.hi, t_c.lo
2870     // mov        a.lo, t_a.lo
2871     // mov        a.hi, t_a.hi
2872     //
2873     // GCC 4.8 does (lsr):
2874     // rsb        t_c1, c.lo, #32
2875     // lsr        t_lo, b.lo, c.lo
2876     // orr        t_lo, t_lo, b.hi, lsl t_c1
2877     // sub        t_c2, c.lo, #32
2878     // orr        t_lo, t_lo, b.hi, lsr t_c2
2879     // lsr        t_hi, b.hi, c.lo
2880     // mov        a.lo, t_lo
2881     // mov        a.hi, t_hi
2882     //
2883     // These are incompatible, therefore we mimic pnacl-llc.
2884     Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
2885     Operand *_0 =
2886         legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
2887     Variable *T0 = makeReg(IceType_i32);
2888     Variable *T1 = makeReg(IceType_i32);
2889     Variable *T2 = makeReg(IceType_i32);
2890     Variable *TA_Lo = makeReg(IceType_i32);
2891     Variable *TA_Hi = makeReg(IceType_i32);
2892     Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);
2893     Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);
2894     Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);
2895     _lsr(T0, Src0RLo, Src1RLo);
2896     _rsb(T1, Src1RLo, _32);
2897     _orr(TA_Lo, T0,
2898          OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
2899                                      OperandARM32::LSL, T1));
2900     _sub(T2, Src1RLo, _32);
2901     _cmp(T2, _0);
2902     if (ASR) {
2903       _asr(TA_Lo, Src0RHi, T2, CondARM32::GE);
2904       _set_dest_redefined();
2905       _asr(TA_Hi, Src0RHi, Src1RLo);
2906     } else {
2907       _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE);
2908       _set_dest_redefined();
2909       _lsr(TA_Hi, Src0RHi, Src1RLo);
2910     }
2911     _mov(DestLo, TA_Lo);
2912     _mov(DestHi, TA_Hi);
2913     return;
2914   }
2915   case InstArithmetic::Fadd:
2916   case InstArithmetic::Fsub:
2917   case InstArithmetic::Fmul:
2918   case InstArithmetic::Fdiv:
2919   case InstArithmetic::Frem:
2920     llvm::report_fatal_error("FP instruction with i64 type");
2921     return;
2922   case InstArithmetic::Udiv:
2923   case InstArithmetic::Sdiv:
2924   case InstArithmetic::Urem:
2925   case InstArithmetic::Srem:
2926     llvm::report_fatal_error("Call-helper-involved instruction for i64 type "
2927                              "should have already been handled before");
2928     return;
2929   }
2930 }
2931 
2932 namespace {
2933 // StrengthReduction is a namespace with the strength reduction machinery. The
2934 // entry point is the StrengthReduction::tryToOptimize method. It returns true
2935 // if the optimization can be performed, and false otherwise.
2936 //
2937 // If the optimization can be performed, tryToOptimize sets its NumOperations
2938 // parameter to the number of shifts that are needed to perform the
2939 // multiplication; and it sets the Operations parameter with <ShAmt, AddOrSub>
2940 // tuples that describe how to materialize the multiplication.
2941 //
2942 // The algorithm finds contiguous 1s in the Multiplication source, and uses one
2943 // or two shifts to materialize it. A sequence of 1s, e.g.,
2944 //
2945 //                  M           N
2946 //   ...00000000000011111...111110000000...
2947 //
2948 // is materializable with (1 << (M + 1)) - (1 << N):
2949 //
2950 //   ...00000000000100000...000000000000...      [1 << (M + 1)]
2951 //   ...00000000000000000...000010000000... (-)  [1 << N]
2952 //   --------------------------------------
2953 //   ...00000000000011111...111110000000...
2954 //
2955 // And a single bit set, which is just a left shift.
2956 namespace StrengthReduction {
2957 enum AggregationOperation {
2958   AO_Invalid,
2959   AO_Add,
2960   AO_Sub,
2961 };
2962 
2963 // AggregateElement is a glorified <ShAmt, AddOrSub> tuple.
2964 class AggregationElement {
2965   AggregationElement(const AggregationElement &) = delete;
2966 
2967 public:
2968   AggregationElement() = default;
2969   AggregationElement &operator=(const AggregationElement &) = default;
AggregationElement(AggregationOperation Op,uint32_t ShAmt)2970   AggregationElement(AggregationOperation Op, uint32_t ShAmt)
2971       : Op(Op), ShAmt(ShAmt) {}
2972 
createShiftedOperand(Cfg * Func,Variable * OpR) const2973   Operand *createShiftedOperand(Cfg *Func, Variable *OpR) const {
2974     assert(OpR->mustHaveReg());
2975     if (ShAmt == 0) {
2976       return OpR;
2977     }
2978     return OperandARM32FlexReg::create(
2979         Func, IceType_i32, OpR, OperandARM32::LSL,
2980         OperandARM32ShAmtImm::create(
2981             Func, llvm::cast<ConstantInteger32>(
2982                       Func->getContext()->getConstantInt32(ShAmt))));
2983   }
2984 
aggregateWithAdd() const2985   bool aggregateWithAdd() const {
2986     switch (Op) {
2987     case AO_Invalid:
2988       llvm::report_fatal_error("Invalid Strength Reduction Operations.");
2989     case AO_Add:
2990       return true;
2991     case AO_Sub:
2992       return false;
2993     }
2994     llvm_unreachable("(silence g++ warning)");
2995   }
2996 
shAmt() const2997   uint32_t shAmt() const { return ShAmt; }
2998 
2999 private:
3000   AggregationOperation Op = AO_Invalid;
3001   uint32_t ShAmt;
3002 };
3003 
3004 // [RangeStart, RangeEnd] is a range of 1s in Src.
3005 template <std::size_t N>
addOperations(uint32_t RangeStart,uint32_t RangeEnd,SizeT * NumOperations,std::array<AggregationElement,N> * Operations)3006 bool addOperations(uint32_t RangeStart, uint32_t RangeEnd, SizeT *NumOperations,
3007                    std::array<AggregationElement, N> *Operations) {
3008   assert(*NumOperations < N);
3009   if (RangeStart == RangeEnd) {
3010     // Single bit set:
3011     // Src           : 0...00010...
3012     // RangeStart    :        ^
3013     // RangeEnd      :        ^
3014     // NegSrc        : 0...00001...
3015     (*Operations)[*NumOperations] = AggregationElement(AO_Add, RangeStart);
3016     ++(*NumOperations);
3017     return true;
3018   }
3019 
3020   // Sequence of 1s: (two operations required.)
3021   // Src           : 0...00011...110...
3022   // RangeStart    :        ^
3023   // RangeEnd      :              ^
3024   // NegSrc        : 0...00000...001...
3025   if (*NumOperations + 1 >= N) {
3026     return false;
3027   }
3028   (*Operations)[*NumOperations] = AggregationElement(AO_Add, RangeStart + 1);
3029   ++(*NumOperations);
3030   (*Operations)[*NumOperations] = AggregationElement(AO_Sub, RangeEnd);
3031   ++(*NumOperations);
3032   return true;
3033 }
3034 
3035 // tryToOptmize scans Src looking for sequences of 1s (including the unitary bit
3036 // 1 surrounded by zeroes.
3037 template <std::size_t N>
tryToOptimize(uint32_t Src,SizeT * NumOperations,std::array<AggregationElement,N> * Operations)3038 bool tryToOptimize(uint32_t Src, SizeT *NumOperations,
3039                    std::array<AggregationElement, N> *Operations) {
3040   constexpr uint32_t SrcSizeBits = sizeof(Src) * CHAR_BIT;
3041   uint32_t NegSrc = ~Src;
3042 
3043   *NumOperations = 0;
3044   while (Src != 0 && *NumOperations < N) {
3045     // Each step of the algorithm:
3046     //   * finds L, the last bit set in Src;
3047     //   * clears all the upper bits in NegSrc up to bit L;
3048     //   * finds nL, the last bit set in NegSrc;
3049     //   * clears all the upper bits in Src up to bit nL;
3050     //
3051     // if L == nL + 1, then a unitary 1 was found in Src. Otherwise, a sequence
3052     // of 1s starting at L, and ending at nL + 1, was found.
3053     const uint32_t SrcLastBitSet = llvm::findLastSet(Src);
3054     const uint32_t NegSrcClearMask =
3055         (SrcLastBitSet == 0) ? 0
3056                              : (0xFFFFFFFFu) >> (SrcSizeBits - SrcLastBitSet);
3057     NegSrc &= NegSrcClearMask;
3058     if (NegSrc == 0) {
3059       if (addOperations(SrcLastBitSet, 0, NumOperations, Operations)) {
3060         return true;
3061       }
3062       return false;
3063     }
3064     const uint32_t NegSrcLastBitSet = llvm::findLastSet(NegSrc);
3065     assert(NegSrcLastBitSet < SrcLastBitSet);
3066     const uint32_t SrcClearMask =
3067         (NegSrcLastBitSet == 0)
3068             ? 0
3069             : (0xFFFFFFFFu) >> (SrcSizeBits - NegSrcLastBitSet);
3070     Src &= SrcClearMask;
3071     if (!addOperations(SrcLastBitSet, NegSrcLastBitSet + 1, NumOperations,
3072                        Operations)) {
3073       return false;
3074     }
3075   }
3076 
3077   return Src == 0;
3078 }
3079 } // end of namespace StrengthReduction
3080 } // end of anonymous namespace
3081 
lowerArithmetic(const InstArithmetic * Instr)3082 void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
3083   Variable *Dest = Instr->getDest();
3084 
3085   if (Dest->isRematerializable()) {
3086     Context.insert<InstFakeDef>(Dest);
3087     return;
3088   }
3089 
3090   Type DestTy = Dest->getType();
3091   if (DestTy == IceType_i1) {
3092     lowerInt1Arithmetic(Instr);
3093     return;
3094   }
3095 
3096   Operand *Src0 = legalizeUndef(Instr->getSrc(0));
3097   Operand *Src1 = legalizeUndef(Instr->getSrc(1));
3098   if (DestTy == IceType_i64) {
3099     lowerInt64Arithmetic(Instr->getOp(), Instr->getDest(), Src0, Src1);
3100     return;
3101   }
3102 
3103   if (isVectorType(DestTy)) {
3104     switch (Instr->getOp()) {
3105     default:
3106       UnimplementedLoweringError(this, Instr);
3107       return;
3108     // Explicitly allow vector instructions we have implemented/enabled.
3109     case InstArithmetic::Add:
3110     case InstArithmetic::And:
3111     case InstArithmetic::Ashr:
3112     case InstArithmetic::Fadd:
3113     case InstArithmetic::Fmul:
3114     case InstArithmetic::Fsub:
3115     case InstArithmetic::Lshr:
3116     case InstArithmetic::Mul:
3117     case InstArithmetic::Or:
3118     case InstArithmetic::Shl:
3119     case InstArithmetic::Sub:
3120     case InstArithmetic::Xor:
3121       break;
3122     }
3123   }
3124 
3125   Variable *T = makeReg(DestTy);
3126 
3127   // * Handle div/rem separately. They require a non-legalized Src1 to inspect
3128   // whether or not Src1 is a non-zero constant. Once legalized it is more
3129   // difficult to determine (constant may be moved to a register).
3130   // * Handle floating point arithmetic separately: they require Src1 to be
3131   // legalized to a register.
3132   switch (Instr->getOp()) {
3133   default:
3134     break;
3135   case InstArithmetic::Udiv: {
3136     constexpr bool NotRemainder = false;
3137     Variable *Src0R = legalizeToReg(Src0);
3138     lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
3139                  NotRemainder);
3140     return;
3141   }
3142   case InstArithmetic::Sdiv: {
3143     constexpr bool NotRemainder = false;
3144     Variable *Src0R = legalizeToReg(Src0);
3145     lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
3146                  NotRemainder);
3147     return;
3148   }
3149   case InstArithmetic::Urem: {
3150     constexpr bool IsRemainder = true;
3151     Variable *Src0R = legalizeToReg(Src0);
3152     lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
3153                  IsRemainder);
3154     return;
3155   }
3156   case InstArithmetic::Srem: {
3157     constexpr bool IsRemainder = true;
3158     Variable *Src0R = legalizeToReg(Src0);
3159     lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
3160                  IsRemainder);
3161     return;
3162   }
3163   case InstArithmetic::Frem: {
3164     if (!isScalarFloatingType(DestTy)) {
3165       llvm::report_fatal_error("Unexpected type when lowering frem.");
3166     }
3167     llvm::report_fatal_error("Frem should have already been lowered.");
3168   }
3169   case InstArithmetic::Fadd: {
3170     Variable *Src0R = legalizeToReg(Src0);
3171     if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
3172       Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
3173       Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
3174       _vmla(Src0R, Src1R, Src2R);
3175       _mov(Dest, Src0R);
3176       return;
3177     }
3178 
3179     Variable *Src1R = legalizeToReg(Src1);
3180     _vadd(T, Src0R, Src1R);
3181     _mov(Dest, T);
3182     return;
3183   }
3184   case InstArithmetic::Fsub: {
3185     Variable *Src0R = legalizeToReg(Src0);
3186     if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
3187       Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
3188       Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
3189       _vmls(Src0R, Src1R, Src2R);
3190       _mov(Dest, Src0R);
3191       return;
3192     }
3193     Variable *Src1R = legalizeToReg(Src1);
3194     _vsub(T, Src0R, Src1R);
3195     _mov(Dest, T);
3196     return;
3197   }
3198   case InstArithmetic::Fmul: {
3199     Variable *Src0R = legalizeToReg(Src0);
3200     Variable *Src1R = legalizeToReg(Src1);
3201     _vmul(T, Src0R, Src1R);
3202     _mov(Dest, T);
3203     return;
3204   }
3205   case InstArithmetic::Fdiv: {
3206     Variable *Src0R = legalizeToReg(Src0);
3207     Variable *Src1R = legalizeToReg(Src1);
3208     _vdiv(T, Src0R, Src1R);
3209     _mov(Dest, T);
3210     return;
3211   }
3212   }
3213 
3214   // Handle everything else here.
3215   Int32Operands Srcs(Src0, Src1);
3216   switch (Instr->getOp()) {
3217   case InstArithmetic::_num:
3218     llvm::report_fatal_error("Unknown arithmetic operator");
3219     return;
3220   case InstArithmetic::Add: {
3221     if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
3222       assert(!isVectorType(DestTy));
3223       Variable *Src0R = legalizeToReg(Src0);
3224       Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
3225       Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
3226       _mla(T, Src1R, Src2R, Src0R);
3227       _mov(Dest, T);
3228       return;
3229     }
3230 
3231     if (Srcs.hasConstOperand()) {
3232       if (!Srcs.immediateIsFlexEncodable() &&
3233           Srcs.negatedImmediateIsFlexEncodable()) {
3234         assert(!isVectorType(DestTy));
3235         Variable *Src0R = Srcs.src0R(this);
3236         Operand *Src1F = Srcs.negatedSrc1F(this);
3237         if (!Srcs.swappedOperands()) {
3238           _sub(T, Src0R, Src1F);
3239         } else {
3240           _rsb(T, Src0R, Src1F);
3241         }
3242         _mov(Dest, T);
3243         return;
3244       }
3245     }
3246     Variable *Src0R = Srcs.src0R(this);
3247     if (isVectorType(DestTy)) {
3248       Variable *Src1R = legalizeToReg(Src1);
3249       _vadd(T, Src0R, Src1R);
3250     } else {
3251       Operand *Src1RF = Srcs.src1RF(this);
3252       _add(T, Src0R, Src1RF);
3253     }
3254     _mov(Dest, T);
3255     return;
3256   }
3257   case InstArithmetic::And: {
3258     if (Srcs.hasConstOperand()) {
3259       if (!Srcs.immediateIsFlexEncodable() &&
3260           Srcs.invertedImmediateIsFlexEncodable()) {
3261         Variable *Src0R = Srcs.src0R(this);
3262         Operand *Src1F = Srcs.invertedSrc1F(this);
3263         _bic(T, Src0R, Src1F);
3264         _mov(Dest, T);
3265         return;
3266       }
3267     }
3268     assert(isIntegerType(DestTy));
3269     Variable *Src0R = Srcs.src0R(this);
3270     if (isVectorType(DestTy)) {
3271       Variable *Src1R = legalizeToReg(Src1);
3272       _vand(T, Src0R, Src1R);
3273     } else {
3274       Operand *Src1RF = Srcs.src1RF(this);
3275       _and(T, Src0R, Src1RF);
3276     }
3277     _mov(Dest, T);
3278     return;
3279   }
3280   case InstArithmetic::Or: {
3281     Variable *Src0R = Srcs.src0R(this);
3282     assert(isIntegerType(DestTy));
3283     if (isVectorType(DestTy)) {
3284       Variable *Src1R = legalizeToReg(Src1);
3285       _vorr(T, Src0R, Src1R);
3286     } else {
3287       Operand *Src1RF = Srcs.src1RF(this);
3288       _orr(T, Src0R, Src1RF);
3289     }
3290     _mov(Dest, T);
3291     return;
3292   }
3293   case InstArithmetic::Xor: {
3294     Variable *Src0R = Srcs.src0R(this);
3295     assert(isIntegerType(DestTy));
3296     if (isVectorType(DestTy)) {
3297       Variable *Src1R = legalizeToReg(Src1);
3298       _veor(T, Src0R, Src1R);
3299     } else {
3300       Operand *Src1RF = Srcs.src1RF(this);
3301       _eor(T, Src0R, Src1RF);
3302     }
3303     _mov(Dest, T);
3304     return;
3305   }
3306   case InstArithmetic::Sub: {
3307     if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
3308       assert(!isVectorType(DestTy));
3309       Variable *Src0R = legalizeToReg(Src0);
3310       Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
3311       Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
3312       _mls(T, Src1R, Src2R, Src0R);
3313       _mov(Dest, T);
3314       return;
3315     }
3316 
3317     if (Srcs.hasConstOperand()) {
3318       assert(!isVectorType(DestTy));
3319       if (Srcs.immediateIsFlexEncodable()) {
3320         Variable *Src0R = Srcs.src0R(this);
3321         Operand *Src1RF = Srcs.src1RF(this);
3322         if (Srcs.swappedOperands()) {
3323           _rsb(T, Src0R, Src1RF);
3324         } else {
3325           _sub(T, Src0R, Src1RF);
3326         }
3327         _mov(Dest, T);
3328         return;
3329       }
3330       if (!Srcs.swappedOperands() && Srcs.negatedImmediateIsFlexEncodable()) {
3331         Variable *Src0R = Srcs.src0R(this);
3332         Operand *Src1F = Srcs.negatedSrc1F(this);
3333         _add(T, Src0R, Src1F);
3334         _mov(Dest, T);
3335         return;
3336       }
3337     }
3338     Variable *Src0R = Srcs.unswappedSrc0R(this);
3339     Variable *Src1R = Srcs.unswappedSrc1R(this);
3340     if (isVectorType(DestTy)) {
3341       _vsub(T, Src0R, Src1R);
3342     } else {
3343       _sub(T, Src0R, Src1R);
3344     }
3345     _mov(Dest, T);
3346     return;
3347   }
3348   case InstArithmetic::Mul: {
3349     const bool OptM1 = Func->getOptLevel() == Opt_m1;
3350     if (!OptM1 && Srcs.hasConstOperand()) {
3351       constexpr std::size_t MaxShifts = 4;
3352       std::array<StrengthReduction::AggregationElement, MaxShifts> Shifts;
3353       SizeT NumOperations;
3354       int32_t Const = Srcs.getConstantValue();
3355       const bool Invert = Const < 0;
3356       const bool MultiplyByZero = Const == 0;
3357       Operand *_0 =
3358           legalize(Ctx->getConstantZero(DestTy), Legal_Reg | Legal_Flex);
3359 
3360       if (MultiplyByZero) {
3361         _mov(T, _0);
3362         _mov(Dest, T);
3363         return;
3364       }
3365 
3366       if (Invert) {
3367         Const = -Const;
3368       }
3369 
3370       if (StrengthReduction::tryToOptimize(Const, &NumOperations, &Shifts)) {
3371         assert(NumOperations >= 1);
3372         Variable *Src0R = Srcs.src0R(this);
3373         int32_t Start;
3374         int32_t End;
3375         if (NumOperations == 1 || Shifts[NumOperations - 1].shAmt() != 0) {
3376           // Multiplication by a power of 2 (NumOperations == 1); or
3377           // Multiplication by a even number not a power of 2.
3378           Start = 1;
3379           End = NumOperations;
3380           assert(Shifts[0].aggregateWithAdd());
3381           _lsl(T, Src0R, shAmtImm(Shifts[0].shAmt()));
3382         } else {
3383           // Multiplication by an odd number. Put the free barrel shifter to a
3384           // good use.
3385           Start = 0;
3386           End = NumOperations - 2;
3387           const StrengthReduction::AggregationElement &Last =
3388               Shifts[NumOperations - 1];
3389           const StrengthReduction::AggregationElement &SecondToLast =
3390               Shifts[NumOperations - 2];
3391           if (!Last.aggregateWithAdd()) {
3392             assert(SecondToLast.aggregateWithAdd());
3393             _rsb(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R));
3394           } else if (!SecondToLast.aggregateWithAdd()) {
3395             assert(Last.aggregateWithAdd());
3396             _sub(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R));
3397           } else {
3398             _add(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R));
3399           }
3400         }
3401 
3402         // Odd numbers :   S                                 E   I   I
3403         //               +---+---+---+---+---+---+ ... +---+---+---+---+
3404         //     Shifts  = |   |   |   |   |   |   | ... |   |   |   |   |
3405         //               +---+---+---+---+---+---+ ... +---+---+---+---+
3406         // Even numbers:   I   S                                     E
3407         //
3408         // S: Start; E: End; I: Init
3409         for (int32_t I = Start; I < End; ++I) {
3410           const StrengthReduction::AggregationElement &Current = Shifts[I];
3411           Operand *SrcF = Current.createShiftedOperand(Func, Src0R);
3412           if (Current.aggregateWithAdd()) {
3413             _add(T, T, SrcF);
3414           } else {
3415             _sub(T, T, SrcF);
3416           }
3417         }
3418 
3419         if (Invert) {
3420           // T = 0 - T.
3421           _rsb(T, T, _0);
3422         }
3423 
3424         _mov(Dest, T);
3425         return;
3426       }
3427     }
3428     Variable *Src0R = Srcs.unswappedSrc0R(this);
3429     Variable *Src1R = Srcs.unswappedSrc1R(this);
3430     if (isVectorType(DestTy)) {
3431       _vmul(T, Src0R, Src1R);
3432     } else {
3433       _mul(T, Src0R, Src1R);
3434     }
3435     _mov(Dest, T);
3436     return;
3437   }
3438   case InstArithmetic::Shl: {
3439     Variable *Src0R = Srcs.unswappedSrc0R(this);
3440     if (!isVectorType(T->getType())) {
3441       if (Srcs.isSrc1ImmediateZero()) {
3442         _mov(T, Src0R);
3443       } else {
3444         Operand *Src1R = Srcs.unswappedSrc1RShAmtImm(this);
3445         _lsl(T, Src0R, Src1R);
3446       }
3447     } else {
3448       if (Srcs.hasConstOperand()) {
3449         ConstantInteger32 *ShAmt = llvm::cast<ConstantInteger32>(Srcs.src1());
3450         _vshl(T, Src0R, ShAmt);
3451       } else {
3452         auto *Src1R = Srcs.unswappedSrc1R(this);
3453         _vshl(T, Src0R, Src1R)->setSignType(InstARM32::FS_Unsigned);
3454       }
3455     }
3456     _mov(Dest, T);
3457     return;
3458   }
3459   case InstArithmetic::Lshr: {
3460     Variable *Src0R = Srcs.unswappedSrc0R(this);
3461     if (!isVectorType(T->getType())) {
3462       if (DestTy != IceType_i32) {
3463         _uxt(Src0R, Src0R);
3464       }
3465       if (Srcs.isSrc1ImmediateZero()) {
3466         _mov(T, Src0R);
3467       } else {
3468         Operand *Src1R = Srcs.unswappedSrc1RShAmtImm(this);
3469         _lsr(T, Src0R, Src1R);
3470       }
3471     } else {
3472       if (Srcs.hasConstOperand()) {
3473         ConstantInteger32 *ShAmt = llvm::cast<ConstantInteger32>(Srcs.src1());
3474         _vshr(T, Src0R, ShAmt)->setSignType(InstARM32::FS_Unsigned);
3475       } else {
3476         auto *Src1R = Srcs.unswappedSrc1R(this);
3477         auto *Src1RNeg = makeReg(Src1R->getType());
3478         _vneg(Src1RNeg, Src1R);
3479         _vshl(T, Src0R, Src1RNeg)->setSignType(InstARM32::FS_Unsigned);
3480       }
3481     }
3482     _mov(Dest, T);
3483     return;
3484   }
3485   case InstArithmetic::Ashr: {
3486     Variable *Src0R = Srcs.unswappedSrc0R(this);
3487     if (!isVectorType(T->getType())) {
3488       if (DestTy != IceType_i32) {
3489         _sxt(Src0R, Src0R);
3490       }
3491       if (Srcs.isSrc1ImmediateZero()) {
3492         _mov(T, Src0R);
3493       } else {
3494         _asr(T, Src0R, Srcs.unswappedSrc1RShAmtImm(this));
3495       }
3496     } else {
3497       if (Srcs.hasConstOperand()) {
3498         ConstantInteger32 *ShAmt = llvm::cast<ConstantInteger32>(Srcs.src1());
3499         _vshr(T, Src0R, ShAmt)->setSignType(InstARM32::FS_Signed);
3500       } else {
3501         auto *Src1R = Srcs.unswappedSrc1R(this);
3502         auto *Src1RNeg = makeReg(Src1R->getType());
3503         _vneg(Src1RNeg, Src1R);
3504         _vshl(T, Src0R, Src1RNeg)->setSignType(InstARM32::FS_Signed);
3505       }
3506     }
3507     _mov(Dest, T);
3508     return;
3509   }
3510   case InstArithmetic::Udiv:
3511   case InstArithmetic::Sdiv:
3512   case InstArithmetic::Urem:
3513   case InstArithmetic::Srem:
3514     llvm::report_fatal_error(
3515         "Integer div/rem should have been handled earlier.");
3516     return;
3517   case InstArithmetic::Fadd:
3518   case InstArithmetic::Fsub:
3519   case InstArithmetic::Fmul:
3520   case InstArithmetic::Fdiv:
3521   case InstArithmetic::Frem:
3522     llvm::report_fatal_error(
3523         "Floating point arith should have been handled earlier.");
3524     return;
3525   }
3526 }
3527 
lowerAssign(const InstAssign * Instr)3528 void TargetARM32::lowerAssign(const InstAssign *Instr) {
3529   Variable *Dest = Instr->getDest();
3530 
3531   if (Dest->isRematerializable()) {
3532     Context.insert<InstFakeDef>(Dest);
3533     return;
3534   }
3535 
3536   Operand *Src0 = Instr->getSrc(0);
3537   assert(Dest->getType() == Src0->getType());
3538   if (Dest->getType() == IceType_i64) {
3539     Src0 = legalizeUndef(Src0);
3540 
3541     Variable *T_Lo = makeReg(IceType_i32);
3542     auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3543     Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
3544     _mov(T_Lo, Src0Lo);
3545     _mov(DestLo, T_Lo);
3546 
3547     Variable *T_Hi = makeReg(IceType_i32);
3548     auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3549     Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
3550     _mov(T_Hi, Src0Hi);
3551     _mov(DestHi, T_Hi);
3552 
3553     return;
3554   }
3555 
3556   Operand *NewSrc;
3557   if (Dest->hasReg()) {
3558     // If Dest already has a physical register, then legalize the Src operand
3559     // into a Variable with the same register assignment. This especially
3560     // helps allow the use of Flex operands.
3561     NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum());
3562   } else {
3563     // Dest could be a stack operand. Since we could potentially need to do a
3564     // Store (and store can only have Register operands), legalize this to a
3565     // register.
3566     NewSrc = legalize(Src0, Legal_Reg);
3567   }
3568 
3569   if (isVectorType(Dest->getType()) || isScalarFloatingType(Dest->getType())) {
3570     NewSrc = legalize(NewSrc, Legal_Reg | Legal_Mem);
3571   }
3572   _mov(Dest, NewSrc);
3573 }
3574 
lowerInt1ForBranch(Operand * Boolean,const LowerInt1BranchTarget & TargetTrue,const LowerInt1BranchTarget & TargetFalse,uint32_t ShortCircuitable)3575 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch(
3576     Operand *Boolean, const LowerInt1BranchTarget &TargetTrue,
3577     const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) {
3578   InstARM32Label *NewShortCircuitLabel = nullptr;
3579   Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
3580 
3581   const Inst *Producer = Computations.getProducerOf(Boolean);
3582 
3583   if (Producer == nullptr) {
3584     // No producer, no problem: just do emit code to perform (Boolean & 1) and
3585     // set the flags register. The branch should be taken if the resulting flags
3586     // indicate a non-zero result.
3587     _tst(legalizeToReg(Boolean), _1);
3588     return ShortCircuitCondAndLabel(CondWhenTrue(CondARM32::NE));
3589   }
3590 
3591   switch (Producer->getKind()) {
3592   default:
3593     llvm::report_fatal_error("Unexpected producer.");
3594   case Inst::Icmp: {
3595     return ShortCircuitCondAndLabel(
3596         lowerIcmpCond(llvm::cast<InstIcmp>(Producer)));
3597   } break;
3598   case Inst::Fcmp: {
3599     return ShortCircuitCondAndLabel(
3600         lowerFcmpCond(llvm::cast<InstFcmp>(Producer)));
3601   } break;
3602   case Inst::Cast: {
3603     const auto *CastProducer = llvm::cast<InstCast>(Producer);
3604     assert(CastProducer->getCastKind() == InstCast::Trunc);
3605     Operand *Src = CastProducer->getSrc(0);
3606     if (Src->getType() == IceType_i64)
3607       Src = loOperand(Src);
3608     _tst(legalizeToReg(Src), _1);
3609     return ShortCircuitCondAndLabel(CondWhenTrue(CondARM32::NE));
3610   } break;
3611   case Inst::Arithmetic: {
3612     const auto *ArithProducer = llvm::cast<InstArithmetic>(Producer);
3613     switch (ArithProducer->getOp()) {
3614     default:
3615       llvm::report_fatal_error("Unhandled Arithmetic Producer.");
3616     case InstArithmetic::And: {
3617       if (!(ShortCircuitable & SC_And)) {
3618         NewShortCircuitLabel = InstARM32Label::create(Func, this);
3619       }
3620 
3621       LowerInt1BranchTarget NewTarget =
3622           TargetFalse.createForLabelOrDuplicate(NewShortCircuitLabel);
3623 
3624       ShortCircuitCondAndLabel CondAndLabel = lowerInt1ForBranch(
3625           Producer->getSrc(0), TargetTrue, NewTarget, SC_And);
3626       const CondWhenTrue &Cond = CondAndLabel.Cond;
3627 
3628       _br_short_circuit(NewTarget, Cond.invert());
3629 
3630       InstARM32Label *const ShortCircuitLabel = CondAndLabel.ShortCircuitTarget;
3631       if (ShortCircuitLabel != nullptr)
3632         Context.insert(ShortCircuitLabel);
3633 
3634       return ShortCircuitCondAndLabel(
3635           lowerInt1ForBranch(Producer->getSrc(1), TargetTrue, NewTarget, SC_All)
3636               .assertNoLabelAndReturnCond(),
3637           NewShortCircuitLabel);
3638     } break;
3639     case InstArithmetic::Or: {
3640       if (!(ShortCircuitable & SC_Or)) {
3641         NewShortCircuitLabel = InstARM32Label::create(Func, this);
3642       }
3643 
3644       LowerInt1BranchTarget NewTarget =
3645           TargetTrue.createForLabelOrDuplicate(NewShortCircuitLabel);
3646 
3647       ShortCircuitCondAndLabel CondAndLabel = lowerInt1ForBranch(
3648           Producer->getSrc(0), NewTarget, TargetFalse, SC_Or);
3649       const CondWhenTrue &Cond = CondAndLabel.Cond;
3650 
3651       _br_short_circuit(NewTarget, Cond);
3652 
3653       InstARM32Label *const ShortCircuitLabel = CondAndLabel.ShortCircuitTarget;
3654       if (ShortCircuitLabel != nullptr)
3655         Context.insert(ShortCircuitLabel);
3656 
3657       return ShortCircuitCondAndLabel(lowerInt1ForBranch(Producer->getSrc(1),
3658                                                          NewTarget, TargetFalse,
3659                                                          SC_All)
3660                                           .assertNoLabelAndReturnCond(),
3661                                       NewShortCircuitLabel);
3662     } break;
3663     }
3664   }
3665   }
3666 }
3667 
lowerBr(const InstBr * Instr)3668 void TargetARM32::lowerBr(const InstBr *Instr) {
3669   if (Instr->isUnconditional()) {
3670     _br(Instr->getTargetUnconditional());
3671     return;
3672   }
3673 
3674   CfgNode *TargetTrue = Instr->getTargetTrue();
3675   CfgNode *TargetFalse = Instr->getTargetFalse();
3676   ShortCircuitCondAndLabel CondAndLabel = lowerInt1ForBranch(
3677       Instr->getCondition(), LowerInt1BranchTarget(TargetTrue),
3678       LowerInt1BranchTarget(TargetFalse), SC_All);
3679   assert(CondAndLabel.ShortCircuitTarget == nullptr);
3680 
3681   const CondWhenTrue &Cond = CondAndLabel.Cond;
3682   if (Cond.WhenTrue1 != CondARM32::kNone) {
3683     assert(Cond.WhenTrue0 != CondARM32::AL);
3684     _br(TargetTrue, Cond.WhenTrue1);
3685   }
3686 
3687   switch (Cond.WhenTrue0) {
3688   default:
3689     _br(TargetTrue, TargetFalse, Cond.WhenTrue0);
3690     break;
3691   case CondARM32::kNone:
3692     _br(TargetFalse);
3693     break;
3694   case CondARM32::AL:
3695     _br(TargetTrue);
3696     break;
3697   }
3698 }
3699 
lowerCall(const InstCall * Instr)3700 void TargetARM32::lowerCall(const InstCall *Instr) {
3701   Operand *CallTarget = Instr->getCallTarget();
3702   if (Instr->isTargetHelperCall()) {
3703     auto TargetHelperPreamble = ARM32HelpersPreamble.find(CallTarget);
3704     if (TargetHelperPreamble != ARM32HelpersPreamble.end()) {
3705       (this->*TargetHelperPreamble->second)(Instr);
3706     }
3707   }
3708   MaybeLeafFunc = false;
3709   NeedsStackAlignment = true;
3710 
3711   // Assign arguments to registers and stack. Also reserve stack.
3712   TargetARM32::CallingConv CC;
3713   // Pair of Arg Operand -> GPR number assignments.
3714   llvm::SmallVector<std::pair<Operand *, RegNumT>, NumGPRArgs> GPRArgs;
3715   llvm::SmallVector<std::pair<Operand *, RegNumT>, NumFP32Args> FPArgs;
3716   // Pair of Arg Operand -> stack offset.
3717   llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;
3718   size_t ParameterAreaSizeBytes = 0;
3719 
3720   // Classify each argument operand according to the location where the
3721   // argument is passed.
3722   for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
3723     Operand *Arg = legalizeUndef(Instr->getArg(i));
3724     const Type Ty = Arg->getType();
3725     bool InReg = false;
3726     RegNumT Reg;
3727     if (isScalarIntegerType(Ty)) {
3728       InReg = CC.argInGPR(Ty, &Reg);
3729     } else {
3730       InReg = CC.argInVFP(Ty, &Reg);
3731     }
3732 
3733     if (!InReg) {
3734       ParameterAreaSizeBytes =
3735           applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
3736       StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
3737       ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty);
3738       continue;
3739     }
3740 
3741     if (Ty == IceType_i64) {
3742       Operand *Lo = loOperand(Arg);
3743       Operand *Hi = hiOperand(Arg);
3744       GPRArgs.push_back(std::make_pair(
3745           Lo, RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(Reg))));
3746       GPRArgs.push_back(std::make_pair(
3747           Hi, RegNumT::fixme(RegARM32::getI64PairSecondGPRNum(Reg))));
3748     } else if (isScalarIntegerType(Ty)) {
3749       GPRArgs.push_back(std::make_pair(Arg, Reg));
3750     } else {
3751       FPArgs.push_back(std::make_pair(Arg, Reg));
3752     }
3753   }
3754 
3755   // Adjust the parameter area so that the stack is aligned. It is assumed that
3756   // the stack is already aligned at the start of the calling sequence.
3757   ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
3758 
3759   if (ParameterAreaSizeBytes > MaxOutArgsSizeBytes) {
3760     llvm::report_fatal_error("MaxOutArgsSizeBytes is not really a max.");
3761   }
3762 
3763   // Copy arguments that are passed on the stack to the appropriate stack
3764   // locations.
3765   Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
3766   for (auto &StackArg : StackArgs) {
3767     ConstantInteger32 *Loc =
3768         llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
3769     Type Ty = StackArg.first->getType();
3770     OperandARM32Mem *Addr;
3771     constexpr bool SignExt = false;
3772     if (OperandARM32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) {
3773       Addr = OperandARM32Mem::create(Func, Ty, SP, Loc);
3774     } else {
3775       Variable *NewBase = Func->makeVariable(SP->getType());
3776       lowerArithmetic(
3777           InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc));
3778       Addr = formMemoryOperand(NewBase, Ty);
3779     }
3780     lowerStore(InstStore::create(Func, StackArg.first, Addr));
3781   }
3782 
3783   // Generate the call instruction. Assign its result to a temporary with high
3784   // register allocation weight.
3785   Variable *Dest = Instr->getDest();
3786   // ReturnReg doubles as ReturnRegLo as necessary.
3787   Variable *ReturnReg = nullptr;
3788   Variable *ReturnRegHi = nullptr;
3789   if (Dest) {
3790     switch (Dest->getType()) {
3791     case IceType_NUM:
3792       llvm::report_fatal_error("Invalid Call dest type");
3793       break;
3794     case IceType_void:
3795       break;
3796     case IceType_i1:
3797       assert(Computations.getProducerOf(Dest) == nullptr);
3798     // Fall-through intended.
3799     case IceType_i8:
3800     case IceType_i16:
3801     case IceType_i32:
3802       ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_r0);
3803       break;
3804     case IceType_i64:
3805       ReturnReg = makeReg(IceType_i32, RegARM32::Reg_r0);
3806       ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1);
3807       break;
3808     case IceType_f32:
3809       ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_s0);
3810       break;
3811     case IceType_f64:
3812       ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_d0);
3813       break;
3814     case IceType_v4i1:
3815     case IceType_v8i1:
3816     case IceType_v16i1:
3817     case IceType_v16i8:
3818     case IceType_v8i16:
3819     case IceType_v4i32:
3820     case IceType_v4f32:
3821       ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_q0);
3822       break;
3823     }
3824   }
3825 
3826   // Allow ConstantRelocatable to be left alone as a direct call, but force
3827   // other constants like ConstantInteger32 to be in a register and make it an
3828   // indirect call.
3829   if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
3830     CallTarget = legalize(CallTarget, Legal_Reg);
3831   }
3832 
3833   // Copy arguments to be passed in registers to the appropriate registers.
3834   CfgVector<Variable *> RegArgs;
3835   for (auto &FPArg : FPArgs) {
3836     RegArgs.emplace_back(legalizeToReg(FPArg.first, FPArg.second));
3837   }
3838   for (auto &GPRArg : GPRArgs) {
3839     RegArgs.emplace_back(legalizeToReg(GPRArg.first, GPRArg.second));
3840   }
3841 
3842   // Generate a FakeUse of register arguments so that they do not get dead code
3843   // eliminated as a result of the FakeKill of scratch registers after the call.
3844   // These fake-uses need to be placed here to avoid argument registers from
3845   // being used during the legalizeToReg() calls above.
3846   for (auto *RegArg : RegArgs) {
3847     Context.insert<InstFakeUse>(RegArg);
3848   }
3849 
3850   InstARM32Call *NewCall =
3851       Sandboxer(this, InstBundleLock::Opt_AlignToEnd).bl(ReturnReg, CallTarget);
3852 
3853   if (ReturnRegHi)
3854     Context.insert<InstFakeDef>(ReturnRegHi);
3855 
3856   // Insert a register-kill pseudo instruction.
3857   Context.insert<InstFakeKill>(NewCall);
3858 
3859   // Generate a FakeUse to keep the call live if necessary.
3860   if (Instr->hasSideEffects() && ReturnReg) {
3861     Context.insert<InstFakeUse>(ReturnReg);
3862   }
3863 
3864   if (Dest != nullptr) {
3865     // Assign the result of the call to Dest.
3866     if (ReturnReg != nullptr) {
3867       if (ReturnRegHi) {
3868         auto *Dest64On32 = llvm::cast<Variable64On32>(Dest);
3869         Variable *DestLo = Dest64On32->getLo();
3870         Variable *DestHi = Dest64On32->getHi();
3871         _mov(DestLo, ReturnReg);
3872         _mov(DestHi, ReturnRegHi);
3873       } else {
3874         if (isFloatingType(Dest->getType()) || isVectorType(Dest->getType())) {
3875           _mov(Dest, ReturnReg);
3876         } else {
3877           assert(isIntegerType(Dest->getType()) &&
3878                  typeWidthInBytes(Dest->getType()) <= 4);
3879           _mov(Dest, ReturnReg);
3880         }
3881       }
3882     }
3883   }
3884 
3885   if (Instr->isTargetHelperCall()) {
3886     auto TargetHelpersPostamble = ARM32HelpersPostamble.find(CallTarget);
3887     if (TargetHelpersPostamble != ARM32HelpersPostamble.end()) {
3888       (this->*TargetHelpersPostamble->second)(Instr);
3889     }
3890   }
3891 }
3892 
3893 namespace {
configureBitcastTemporary(Variable64On32 * Var)3894 void configureBitcastTemporary(Variable64On32 *Var) {
3895   Var->setMustNotHaveReg();
3896   Var->getHi()->setMustHaveReg();
3897   Var->getLo()->setMustHaveReg();
3898 }
3899 } // end of anonymous namespace
3900 
lowerCast(const InstCast * Instr)3901 void TargetARM32::lowerCast(const InstCast *Instr) {
3902   InstCast::OpKind CastKind = Instr->getCastKind();
3903   Variable *Dest = Instr->getDest();
3904   const Type DestTy = Dest->getType();
3905   Operand *Src0 = legalizeUndef(Instr->getSrc(0));
3906   switch (CastKind) {
3907   default:
3908     Func->setError("Cast type not supported");
3909     return;
3910   case InstCast::Sext: {
3911     if (isVectorType(DestTy)) {
3912       Variable *T0 = makeReg(DestTy);
3913       Variable *T1 = makeReg(DestTy);
3914       ConstantInteger32 *ShAmt = nullptr;
3915       switch (DestTy) {
3916       default:
3917         llvm::report_fatal_error("Unexpected type in vector sext.");
3918       case IceType_v16i8:
3919         ShAmt = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(7));
3920         break;
3921       case IceType_v8i16:
3922         ShAmt = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(15));
3923         break;
3924       case IceType_v4i32:
3925         ShAmt = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(31));
3926         break;
3927       }
3928       auto *Src0R = legalizeToReg(Src0);
3929       _vshl(T0, Src0R, ShAmt);
3930       _vshr(T1, T0, ShAmt)->setSignType(InstARM32::FS_Signed);
3931       _mov(Dest, T1);
3932     } else if (DestTy == IceType_i64) {
3933       // t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2
3934       Constant *ShiftAmt = Ctx->getConstantInt32(31);
3935       auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3936       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3937       Variable *T_Lo = makeReg(DestLo->getType());
3938       if (Src0->getType() == IceType_i32) {
3939         Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
3940         _mov(T_Lo, Src0RF);
3941       } else if (Src0->getType() != IceType_i1) {
3942         Variable *Src0R = legalizeToReg(Src0);
3943         _sxt(T_Lo, Src0R);
3944       } else {
3945         Operand *_0 = Ctx->getConstantZero(IceType_i32);
3946         Operand *_m1 = Ctx->getConstantInt32(-1);
3947         lowerInt1ForSelect(T_Lo, Src0, _m1, _0);
3948       }
3949       _mov(DestLo, T_Lo);
3950       Variable *T_Hi = makeReg(DestHi->getType());
3951       if (Src0->getType() != IceType_i1) {
3952         _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, T_Lo,
3953                                                OperandARM32::ASR, ShiftAmt));
3954       } else {
3955         // For i1, the asr instruction is already done above.
3956         _mov(T_Hi, T_Lo);
3957       }
3958       _mov(DestHi, T_Hi);
3959     } else if (Src0->getType() != IceType_i1) {
3960       // t1 = sxt src; dst = t1
3961       Variable *Src0R = legalizeToReg(Src0);
3962       Variable *T = makeReg(DestTy);
3963       _sxt(T, Src0R);
3964       _mov(Dest, T);
3965     } else {
3966       Constant *_0 = Ctx->getConstantZero(IceType_i32);
3967       Operand *_m1 = Ctx->getConstantInt(DestTy, -1);
3968       Variable *T = makeReg(DestTy);
3969       lowerInt1ForSelect(T, Src0, _m1, _0);
3970       _mov(Dest, T);
3971     }
3972     break;
3973   }
3974   case InstCast::Zext: {
3975     if (isVectorType(DestTy)) {
3976       auto *Mask = makeReg(DestTy);
3977       auto *_1 = Ctx->getConstantInt32(1);
3978       auto *T = makeReg(DestTy);
3979       auto *Src0R = legalizeToReg(Src0);
3980       _mov(Mask, _1);
3981       _vand(T, Src0R, Mask);
3982       _mov(Dest, T);
3983     } else if (DestTy == IceType_i64) {
3984       // t1=uxtb src; dst.lo=t1; dst.hi=0
3985       Operand *_0 =
3986           legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
3987       auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3988       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3989       Variable *T_Lo = makeReg(DestLo->getType());
3990 
3991       switch (Src0->getType()) {
3992       default: {
3993         assert(Src0->getType() != IceType_i64);
3994         _uxt(T_Lo, legalizeToReg(Src0));
3995       } break;
3996       case IceType_i32: {
3997         _mov(T_Lo, legalize(Src0, Legal_Reg | Legal_Flex));
3998       } break;
3999       case IceType_i1: {
4000         SafeBoolChain Safe = lowerInt1(T_Lo, Src0);
4001         if (Safe == SBC_No) {
4002           Operand *_1 =
4003               legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
4004           _and(T_Lo, T_Lo, _1);
4005         }
4006       } break;
4007       }
4008 
4009       _mov(DestLo, T_Lo);
4010 
4011       Variable *T_Hi = makeReg(DestLo->getType());
4012       _mov(T_Hi, _0);
4013       _mov(DestHi, T_Hi);
4014     } else if (Src0->getType() == IceType_i1) {
4015       Variable *T = makeReg(DestTy);
4016 
4017       SafeBoolChain Safe = lowerInt1(T, Src0);
4018       if (Safe == SBC_No) {
4019         Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
4020         _and(T, T, _1);
4021       }
4022 
4023       _mov(Dest, T);
4024     } else {
4025       // t1 = uxt src; dst = t1
4026       Variable *Src0R = legalizeToReg(Src0);
4027       Variable *T = makeReg(DestTy);
4028       _uxt(T, Src0R);
4029       _mov(Dest, T);
4030     }
4031     break;
4032   }
4033   case InstCast::Trunc: {
4034     if (isVectorType(DestTy)) {
4035       auto *T = makeReg(DestTy);
4036       auto *Src0R = legalizeToReg(Src0);
4037       _mov(T, Src0R);
4038       _mov(Dest, T);
4039     } else {
4040       if (Src0->getType() == IceType_i64)
4041         Src0 = loOperand(Src0);
4042       Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
4043       // t1 = trunc Src0RF; Dest = t1
4044       Variable *T = makeReg(DestTy);
4045       _mov(T, Src0RF);
4046       if (DestTy == IceType_i1)
4047         _and(T, T, Ctx->getConstantInt1(1));
4048       _mov(Dest, T);
4049     }
4050     break;
4051   }
4052   case InstCast::Fptrunc:
4053   case InstCast::Fpext: {
4054     // fptrunc: dest.f32 = fptrunc src0.fp64
4055     // fpext: dest.f64 = fptrunc src0.fp32
4056     const bool IsTrunc = CastKind == InstCast::Fptrunc;
4057     assert(!isVectorType(DestTy));
4058     assert(DestTy == (IsTrunc ? IceType_f32 : IceType_f64));
4059     assert(Src0->getType() == (IsTrunc ? IceType_f64 : IceType_f32));
4060     Variable *Src0R = legalizeToReg(Src0);
4061     Variable *T = makeReg(DestTy);
4062     _vcvt(T, Src0R, IsTrunc ? InstARM32Vcvt::D2s : InstARM32Vcvt::S2d);
4063     _mov(Dest, T);
4064     break;
4065   }
4066   case InstCast::Fptosi:
4067   case InstCast::Fptoui: {
4068     const bool DestIsSigned = CastKind == InstCast::Fptosi;
4069     Variable *Src0R = legalizeToReg(Src0);
4070 
4071     if (isVectorType(DestTy)) {
4072       assert(typeElementType(Src0->getType()) == IceType_f32);
4073       auto *T = makeReg(DestTy);
4074       _vcvt(T, Src0R,
4075             DestIsSigned ? InstARM32Vcvt::Vs2si : InstARM32Vcvt::Vs2ui);
4076       _mov(Dest, T);
4077       break;
4078     }
4079 
4080     const bool Src0IsF32 = isFloat32Asserting32Or64(Src0->getType());
4081     if (llvm::isa<Variable64On32>(Dest)) {
4082       llvm::report_fatal_error("fp-to-i64 should have been pre-lowered.");
4083     }
4084     // fptosi:
4085     //     t1.fp = vcvt src0.fp
4086     //     t2.i32 = vmov t1.fp
4087     //     dest.int = conv t2.i32     @ Truncates the result if needed.
4088     // fptoui:
4089     //     t1.fp = vcvt src0.fp
4090     //     t2.u32 = vmov t1.fp
4091     //     dest.uint = conv t2.u32    @ Truncates the result if needed.
4092     Variable *T_fp = makeReg(IceType_f32);
4093     const InstARM32Vcvt::VcvtVariant Conversion =
4094         Src0IsF32 ? (DestIsSigned ? InstARM32Vcvt::S2si : InstARM32Vcvt::S2ui)
4095                   : (DestIsSigned ? InstARM32Vcvt::D2si : InstARM32Vcvt::D2ui);
4096     _vcvt(T_fp, Src0R, Conversion);
4097     Variable *T = makeReg(IceType_i32);
4098     _mov(T, T_fp);
4099     if (DestTy != IceType_i32) {
4100       Variable *T_1 = makeReg(DestTy);
4101       lowerCast(InstCast::create(Func, InstCast::Trunc, T_1, T));
4102       T = T_1;
4103     }
4104     _mov(Dest, T);
4105     break;
4106   }
4107   case InstCast::Sitofp:
4108   case InstCast::Uitofp: {
4109     const bool SourceIsSigned = CastKind == InstCast::Sitofp;
4110 
4111     if (isVectorType(DestTy)) {
4112       assert(typeElementType(DestTy) == IceType_f32);
4113       auto *T = makeReg(DestTy);
4114       Variable *Src0R = legalizeToReg(Src0);
4115       _vcvt(T, Src0R,
4116             SourceIsSigned ? InstARM32Vcvt::Vsi2s : InstARM32Vcvt::Vui2s);
4117       _mov(Dest, T);
4118       break;
4119     }
4120 
4121     const bool DestIsF32 = isFloat32Asserting32Or64(DestTy);
4122     if (Src0->getType() == IceType_i64) {
4123       llvm::report_fatal_error("i64-to-fp should have been pre-lowered.");
4124     }
4125     // sitofp:
4126     //     t1.i32 = sext src.int    @ sign-extends src0 if needed.
4127     //     t2.fp32 = vmov t1.i32
4128     //     t3.fp = vcvt.{fp}.s32    @ fp is either f32 or f64
4129     // uitofp:
4130     //     t1.i32 = zext src.int    @ zero-extends src0 if needed.
4131     //     t2.fp32 = vmov t1.i32
4132     //     t3.fp = vcvt.{fp}.s32    @ fp is either f32 or f64
4133     if (Src0->getType() != IceType_i32) {
4134       Variable *Src0R_32 = makeReg(IceType_i32);
4135       lowerCast(InstCast::create(
4136           Func, SourceIsSigned ? InstCast::Sext : InstCast::Zext, Src0R_32,
4137           Src0));
4138       Src0 = Src0R_32;
4139     }
4140     Variable *Src0R = legalizeToReg(Src0);
4141     Variable *Src0R_f32 = makeReg(IceType_f32);
4142     _mov(Src0R_f32, Src0R);
4143     Src0R = Src0R_f32;
4144     Variable *T = makeReg(DestTy);
4145     const InstARM32Vcvt::VcvtVariant Conversion =
4146         DestIsF32
4147             ? (SourceIsSigned ? InstARM32Vcvt::Si2s : InstARM32Vcvt::Ui2s)
4148             : (SourceIsSigned ? InstARM32Vcvt::Si2d : InstARM32Vcvt::Ui2d);
4149     _vcvt(T, Src0R, Conversion);
4150     _mov(Dest, T);
4151     break;
4152   }
4153   case InstCast::Bitcast: {
4154     Operand *Src0 = Instr->getSrc(0);
4155     if (DestTy == Src0->getType()) {
4156       auto *Assign = InstAssign::create(Func, Dest, Src0);
4157       lowerAssign(Assign);
4158       return;
4159     }
4160     switch (DestTy) {
4161     case IceType_NUM:
4162     case IceType_void:
4163       llvm::report_fatal_error("Unexpected bitcast.");
4164     case IceType_i1:
4165       UnimplementedLoweringError(this, Instr);
4166       break;
4167     case IceType_i8:
4168       assert(Src0->getType() == IceType_v8i1);
4169       llvm::report_fatal_error(
4170           "i8 to v8i1 conversion should have been prelowered.");
4171       break;
4172     case IceType_i16:
4173       assert(Src0->getType() == IceType_v16i1);
4174       llvm::report_fatal_error(
4175           "i16 to v16i1 conversion should have been prelowered.");
4176       break;
4177     case IceType_i32:
4178     case IceType_f32: {
4179       Variable *Src0R = legalizeToReg(Src0);
4180       Variable *T = makeReg(DestTy);
4181       _mov(T, Src0R);
4182       lowerAssign(InstAssign::create(Func, Dest, T));
4183       break;
4184     }
4185     case IceType_i64: {
4186       // t0, t1 <- src0
4187       // dest[31..0]  = t0
4188       // dest[63..32] = t1
4189       assert(Src0->getType() == IceType_f64);
4190       auto *T = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4191       T->initHiLo(Func);
4192       configureBitcastTemporary(T);
4193       Variable *Src0R = legalizeToReg(Src0);
4194       _mov(T, Src0R);
4195       Context.insert<InstFakeUse>(T->getHi());
4196       Context.insert<InstFakeUse>(T->getLo());
4197       lowerAssign(InstAssign::create(Func, Dest, T));
4198       break;
4199     }
4200     case IceType_f64: {
4201       // T0 <- lo(src)
4202       // T1 <- hi(src)
4203       // vmov T2, T0, T1
4204       // Dest <- T2
4205       assert(Src0->getType() == IceType_i64);
4206       Variable *T = makeReg(DestTy);
4207       auto *Src64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4208       Src64->initHiLo(Func);
4209       configureBitcastTemporary(Src64);
4210       lowerAssign(InstAssign::create(Func, Src64, Src0));
4211       _mov(T, Src64);
4212       lowerAssign(InstAssign::create(Func, Dest, T));
4213       break;
4214     }
4215     case IceType_v8i1:
4216       assert(Src0->getType() == IceType_i8);
4217       llvm::report_fatal_error(
4218           "v8i1 to i8 conversion should have been prelowered.");
4219       break;
4220     case IceType_v16i1:
4221       assert(Src0->getType() == IceType_i16);
4222       llvm::report_fatal_error(
4223           "v16i1 to i16 conversion should have been prelowered.");
4224       break;
4225     case IceType_v4i1:
4226     case IceType_v8i16:
4227     case IceType_v16i8:
4228     case IceType_v4f32:
4229     case IceType_v4i32: {
4230       assert(typeWidthInBytes(DestTy) == typeWidthInBytes(Src0->getType()));
4231       assert(isVectorType(DestTy) == isVectorType(Src0->getType()));
4232       Variable *T = makeReg(DestTy);
4233       _mov(T, Src0);
4234       _mov(Dest, T);
4235       break;
4236     }
4237     }
4238     break;
4239   }
4240   }
4241 }
4242 
lowerExtractElement(const InstExtractElement * Instr)4243 void TargetARM32::lowerExtractElement(const InstExtractElement *Instr) {
4244   Variable *Dest = Instr->getDest();
4245   Type DestTy = Dest->getType();
4246 
4247   Variable *Src0 = legalizeToReg(Instr->getSrc(0));
4248   Operand *Src1 = Instr->getSrc(1);
4249 
4250   if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src1)) {
4251     const uint32_t Index = Imm->getValue();
4252     Variable *T = makeReg(DestTy);
4253     Variable *TSrc0 = makeReg(Src0->getType());
4254 
4255     if (isFloatingType(DestTy)) {
4256       // We need to make sure the source is in a suitable register.
4257       TSrc0->setRegClass(RegARM32::RCARM32_QtoS);
4258     }
4259 
4260     _mov(TSrc0, Src0);
4261     _extractelement(T, TSrc0, Index);
4262     _mov(Dest, T);
4263     return;
4264   }
4265   assert(false && "extractelement requires a constant index");
4266 }
4267 
4268 namespace {
4269 // Validates FCMPARM32_TABLE's declaration w.r.t. InstFcmp::FCondition ordering
4270 // (and naming).
4271 enum {
4272 #define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V) _fcmp_ll_##val,
4273   FCMPARM32_TABLE
4274 #undef X
4275       _fcmp_ll_NUM
4276 };
4277 
4278 enum {
4279 #define X(tag, str) _fcmp_hl_##tag = InstFcmp::tag,
4280   ICEINSTFCMP_TABLE
4281 #undef X
4282       _fcmp_hl_NUM
4283 };
4284 
4285 static_assert((uint32_t)_fcmp_hl_NUM == (uint32_t)_fcmp_ll_NUM,
4286               "Inconsistency between high-level and low-level fcmp tags.");
4287 #define X(tag, str)                                                            \
4288   static_assert(                                                               \
4289       (uint32_t)_fcmp_hl_##tag == (uint32_t)_fcmp_ll_##tag,                    \
4290       "Inconsistency between high-level and low-level fcmp tag " #tag);
4291 ICEINSTFCMP_TABLE
4292 #undef X
4293 
4294 struct {
4295   CondARM32::Cond CC0;
4296   CondARM32::Cond CC1;
4297 } TableFcmp[] = {
4298 #define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V)                           \
4299   {CondARM32::CC0, CondARM32::CC1},
4300     FCMPARM32_TABLE
4301 #undef X
4302 };
4303 
isFloatingPointZero(const Operand * Src)4304 bool isFloatingPointZero(const Operand *Src) {
4305   if (const auto *F32 = llvm::dyn_cast<const ConstantFloat>(Src)) {
4306     return Utils::isPositiveZero(F32->getValue());
4307   }
4308 
4309   if (const auto *F64 = llvm::dyn_cast<const ConstantDouble>(Src)) {
4310     return Utils::isPositiveZero(F64->getValue());
4311   }
4312 
4313   return false;
4314 }
4315 } // end of anonymous namespace
4316 
lowerFcmpCond(const InstFcmp * Instr)4317 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) {
4318   InstFcmp::FCond Condition = Instr->getCondition();
4319   switch (Condition) {
4320   case InstFcmp::False:
4321     return CondWhenTrue(CondARM32::kNone);
4322   case InstFcmp::True:
4323     return CondWhenTrue(CondARM32::AL);
4324     break;
4325   default: {
4326     Variable *Src0R = legalizeToReg(Instr->getSrc(0));
4327     Operand *Src1 = Instr->getSrc(1);
4328     if (isFloatingPointZero(Src1)) {
4329       _vcmp(Src0R, OperandARM32FlexFpZero::create(Func, Src0R->getType()));
4330     } else {
4331       _vcmp(Src0R, legalizeToReg(Src1));
4332     }
4333     _vmrs();
4334     assert(Condition < llvm::array_lengthof(TableFcmp));
4335     return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1);
4336   }
4337   }
4338 }
4339 
lowerFcmp(const InstFcmp * Instr)4340 void TargetARM32::lowerFcmp(const InstFcmp *Instr) {
4341   Variable *Dest = Instr->getDest();
4342   const Type DestTy = Dest->getType();
4343 
4344   if (isVectorType(DestTy)) {
4345     if (Instr->getCondition() == InstFcmp::False) {
4346       constexpr Type SafeTypeForMovingConstant = IceType_v4i32;
4347       auto *T = makeReg(SafeTypeForMovingConstant);
4348       _mov(T, llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(0)));
4349       _mov(Dest, T);
4350       return;
4351     }
4352 
4353     if (Instr->getCondition() == InstFcmp::True) {
4354       constexpr Type SafeTypeForMovingConstant = IceType_v4i32;
4355       auto *T = makeReg(SafeTypeForMovingConstant);
4356       _mov(T, llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(1)));
4357       _mov(Dest, T);
4358       return;
4359     }
4360 
4361     Variable *T0;
4362     Variable *T1;
4363     bool Negate = false;
4364     auto *Src0 = legalizeToReg(Instr->getSrc(0));
4365     auto *Src1 = legalizeToReg(Instr->getSrc(1));
4366 
4367     switch (Instr->getCondition()) {
4368     default:
4369       llvm::report_fatal_error("Unhandled fp comparison.");
4370 #define _Vcnone(Tptr, S0, S1)                                                  \
4371   do {                                                                         \
4372     *(Tptr) = nullptr;                                                         \
4373   } while (0)
4374 #define _Vceq(Tptr, S0, S1)                                                    \
4375   do {                                                                         \
4376     *(Tptr) = makeReg(DestTy);                                                 \
4377     _vceq(*(Tptr), S0, S1);                                                    \
4378   } while (0)
4379 #define _Vcge(Tptr, S0, S1)                                                    \
4380   do {                                                                         \
4381     *(Tptr) = makeReg(DestTy);                                                 \
4382     _vcge(*(Tptr), S0, S1)->setSignType(InstARM32::FS_Signed);                 \
4383   } while (0)
4384 #define _Vcgt(Tptr, S0, S1)                                                    \
4385   do {                                                                         \
4386     *(Tptr) = makeReg(DestTy);                                                 \
4387     _vcgt(*(Tptr), S0, S1)->setSignType(InstARM32::FS_Signed);                 \
4388   } while (0)
4389 #define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V)                           \
4390   case InstFcmp::val: {                                                        \
4391     _Vc##CC0_V(&T0, (INV_V) ? Src1 : Src0, (INV_V) ? Src0 : Src1);             \
4392     _Vc##CC1_V(&T1, (INV_V) ? Src0 : Src1, (INV_V) ? Src1 : Src0);             \
4393     Negate = NEG_V;                                                            \
4394   } break;
4395       FCMPARM32_TABLE
4396 #undef X
4397 #undef _Vcgt
4398 #undef _Vcge
4399 #undef _Vceq
4400 #undef _Vcnone
4401     }
4402     assert(T0 != nullptr);
4403     Variable *T = T0;
4404     if (T1 != nullptr) {
4405       T = makeReg(DestTy);
4406       _vorr(T, T0, T1);
4407     }
4408 
4409     if (Negate) {
4410       auto *TNeg = makeReg(DestTy);
4411       _vmvn(TNeg, T);
4412       T = TNeg;
4413     }
4414 
4415     _mov(Dest, T);
4416     return;
4417   }
4418 
4419   Variable *T = makeReg(IceType_i1);
4420   Operand *_1 = legalize(Ctx->getConstantInt32(1), Legal_Reg | Legal_Flex);
4421   Operand *_0 =
4422       legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
4423 
4424   CondWhenTrue Cond = lowerFcmpCond(Instr);
4425 
4426   bool RedefineT = false;
4427   if (Cond.WhenTrue0 != CondARM32::AL) {
4428     _mov(T, _0);
4429     RedefineT = true;
4430   }
4431 
4432   if (Cond.WhenTrue0 == CondARM32::kNone) {
4433     _mov(Dest, T);
4434     return;
4435   }
4436 
4437   if (RedefineT) {
4438     _mov_redefined(T, _1, Cond.WhenTrue0);
4439   } else {
4440     _mov(T, _1, Cond.WhenTrue0);
4441   }
4442 
4443   if (Cond.WhenTrue1 != CondARM32::kNone) {
4444     _mov_redefined(T, _1, Cond.WhenTrue1);
4445   }
4446 
4447   _mov(Dest, T);
4448 }
4449 
4450 TargetARM32::CondWhenTrue
lowerInt64IcmpCond(InstIcmp::ICond Condition,Operand * Src0,Operand * Src1)4451 TargetARM32::lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
4452                                 Operand *Src1) {
4453   assert(Condition < llvm::array_lengthof(TableIcmp64));
4454 
4455   Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1));
4456   Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1));
4457   assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand());
4458   assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands());
4459 
4460   if (SrcsLo.hasConstOperand()) {
4461     const uint32_t ValueLo = SrcsLo.getConstantValue();
4462     const uint32_t ValueHi = SrcsHi.getConstantValue();
4463     const uint64_t Value = (static_cast<uint64_t>(ValueHi) << 32) | ValueLo;
4464     if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) &&
4465         Value == 0) {
4466       Variable *T = makeReg(IceType_i32);
4467       Variable *Src0LoR = SrcsLo.src0R(this);
4468       Variable *Src0HiR = SrcsHi.src0R(this);
4469       _orrs(T, Src0LoR, Src0HiR);
4470       Context.insert<InstFakeUse>(T);
4471       return CondWhenTrue(TableIcmp64[Condition].C1);
4472     }
4473 
4474     Variable *Src0RLo = SrcsLo.src0R(this);
4475     Variable *Src0RHi = SrcsHi.src0R(this);
4476     Operand *Src1RFLo = SrcsLo.src1RF(this);
4477     Operand *Src1RFHi = ValueLo == ValueHi ? Src1RFLo : SrcsHi.src1RF(this);
4478 
4479     const bool UseRsb =
4480         TableIcmp64[Condition].Swapped != SrcsLo.swappedOperands();
4481 
4482     if (UseRsb) {
4483       if (TableIcmp64[Condition].IsSigned) {
4484         Variable *T = makeReg(IceType_i32);
4485         _rsbs(T, Src0RLo, Src1RFLo);
4486         Context.insert<InstFakeUse>(T);
4487 
4488         T = makeReg(IceType_i32);
4489         _rscs(T, Src0RHi, Src1RFHi);
4490         // We need to add a FakeUse here because liveness gets mad at us (Def
4491         // without Use.) Note that flag-setting instructions are considered to
4492         // have side effects and, therefore, are not DCE'ed.
4493         Context.insert<InstFakeUse>(T);
4494       } else {
4495         Variable *T = makeReg(IceType_i32);
4496         _rsbs(T, Src0RHi, Src1RFHi);
4497         Context.insert<InstFakeUse>(T);
4498 
4499         T = makeReg(IceType_i32);
4500         _rsbs(T, Src0RLo, Src1RFLo, CondARM32::EQ);
4501         Context.insert<InstFakeUse>(T);
4502       }
4503     } else {
4504       if (TableIcmp64[Condition].IsSigned) {
4505         _cmp(Src0RLo, Src1RFLo);
4506         Variable *T = makeReg(IceType_i32);
4507         _sbcs(T, Src0RHi, Src1RFHi);
4508         Context.insert<InstFakeUse>(T);
4509       } else {
4510         _cmp(Src0RHi, Src1RFHi);
4511         _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);
4512       }
4513     }
4514 
4515     return CondWhenTrue(TableIcmp64[Condition].C1);
4516   }
4517 
4518   Variable *Src0RLo, *Src0RHi;
4519   Operand *Src1RFLo, *Src1RFHi;
4520   if (TableIcmp64[Condition].Swapped) {
4521     Src0RLo = legalizeToReg(loOperand(Src1));
4522     Src0RHi = legalizeToReg(hiOperand(Src1));
4523     Src1RFLo = legalizeToReg(loOperand(Src0));
4524     Src1RFHi = legalizeToReg(hiOperand(Src0));
4525   } else {
4526     Src0RLo = legalizeToReg(loOperand(Src0));
4527     Src0RHi = legalizeToReg(hiOperand(Src0));
4528     Src1RFLo = legalizeToReg(loOperand(Src1));
4529     Src1RFHi = legalizeToReg(hiOperand(Src1));
4530   }
4531 
4532   // a=icmp cond, b, c ==>
4533   // GCC does:
4534   //   cmp      b.hi, c.hi     or  cmp      b.lo, c.lo
4535   //   cmp.eq   b.lo, c.lo         sbcs t1, b.hi, c.hi
4536   //   mov.<C1> t, #1              mov.<C1> t, #1
4537   //   mov.<C2> t, #0              mov.<C2> t, #0
4538   //   mov      a, t               mov      a, t
4539   // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"
4540   // is used for signed compares. In some cases, b and c need to be swapped as
4541   // well.
4542   //
4543   // LLVM does:
4544   // for EQ and NE:
4545   //   eor  t1, b.hi, c.hi
4546   //   eor  t2, b.lo, c.hi
4547   //   orrs t, t1, t2
4548   //   mov.<C> t, #1
4549   //   mov  a, t
4550   //
4551   // that's nice in that it's just as short but has fewer dependencies for
4552   // better ILP at the cost of more registers.
4553   //
4554   // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two
4555   // unconditional mov #0, two cmps, two conditional mov #1, and one
4556   // conditional reg mov. That has few dependencies for good ILP, but is a
4557   // longer sequence.
4558   //
4559   // So, we are going with the GCC version since it's usually better (except
4560   // perhaps for eq/ne). We could revisit special-casing eq/ne later.
4561   if (TableIcmp64[Condition].IsSigned) {
4562     Variable *ScratchReg = makeReg(IceType_i32);
4563     _cmp(Src0RLo, Src1RFLo);
4564     _sbcs(ScratchReg, Src0RHi, Src1RFHi);
4565     // ScratchReg isn't going to be used, but we need the side-effect of
4566     // setting flags from this operation.
4567     Context.insert<InstFakeUse>(ScratchReg);
4568   } else {
4569     _cmp(Src0RHi, Src1RFHi);
4570     _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);
4571   }
4572   return CondWhenTrue(TableIcmp64[Condition].C1);
4573 }
4574 
4575 TargetARM32::CondWhenTrue
lowerInt32IcmpCond(InstIcmp::ICond Condition,Operand * Src0,Operand * Src1)4576 TargetARM32::lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
4577                                 Operand *Src1) {
4578   Int32Operands Srcs(Src0, Src1);
4579   if (!Srcs.hasConstOperand()) {
4580 
4581     Variable *Src0R = Srcs.src0R(this);
4582     Operand *Src1RF = Srcs.src1RF(this);
4583     _cmp(Src0R, Src1RF);
4584     return CondWhenTrue(getIcmp32Mapping(Condition));
4585   }
4586 
4587   Variable *Src0R = Srcs.src0R(this);
4588   const int32_t Value = Srcs.getConstantValue();
4589   if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && Value == 0) {
4590     _tst(Src0R, Src0R);
4591     return CondWhenTrue(getIcmp32Mapping(Condition));
4592   }
4593 
4594   if (!Srcs.swappedOperands() && !Srcs.immediateIsFlexEncodable() &&
4595       Srcs.negatedImmediateIsFlexEncodable()) {
4596     Operand *Src1F = Srcs.negatedSrc1F(this);
4597     _cmn(Src0R, Src1F);
4598     return CondWhenTrue(getIcmp32Mapping(Condition));
4599   }
4600 
4601   Operand *Src1RF = Srcs.src1RF(this);
4602   if (!Srcs.swappedOperands()) {
4603     _cmp(Src0R, Src1RF);
4604   } else {
4605     Variable *T = makeReg(IceType_i32);
4606     _rsbs(T, Src0R, Src1RF);
4607     Context.insert<InstFakeUse>(T);
4608   }
4609   return CondWhenTrue(getIcmp32Mapping(Condition));
4610 }
4611 
4612 TargetARM32::CondWhenTrue
lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition,Operand * Src0,Operand * Src1)4613 TargetARM32::lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
4614                                        Operand *Src1) {
4615   Int32Operands Srcs(Src0, Src1);
4616   const int32_t ShAmt = 32 - getScalarIntBitWidth(Src0->getType());
4617   assert(ShAmt >= 0);
4618 
4619   if (!Srcs.hasConstOperand()) {
4620     Variable *Src0R = makeReg(IceType_i32);
4621     Operand *ShAmtImm = shAmtImm(ShAmt);
4622     _lsl(Src0R, legalizeToReg(Src0), ShAmtImm);
4623 
4624     Variable *Src1R = legalizeToReg(Src1);
4625     auto *Src1F = OperandARM32FlexReg::create(Func, IceType_i32, Src1R,
4626                                               OperandARM32::LSL, ShAmtImm);
4627     _cmp(Src0R, Src1F);
4628     return CondWhenTrue(getIcmp32Mapping(Condition));
4629   }
4630 
4631   const int32_t Value = Srcs.getConstantValue();
4632   if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && Value == 0) {
4633     Operand *ShAmtImm = shAmtImm(ShAmt);
4634     Variable *T = makeReg(IceType_i32);
4635     _lsls(T, Srcs.src0R(this), ShAmtImm);
4636     Context.insert<InstFakeUse>(T);
4637     return CondWhenTrue(getIcmp32Mapping(Condition));
4638   }
4639 
4640   Variable *ConstR = makeReg(IceType_i32);
4641   _mov(ConstR,
4642        legalize(Ctx->getConstantInt32(Value << ShAmt), Legal_Reg | Legal_Flex));
4643   Operand *NonConstF = OperandARM32FlexReg::create(
4644       Func, IceType_i32, Srcs.src0R(this), OperandARM32::LSL,
4645       Ctx->getConstantInt32(ShAmt));
4646 
4647   if (Srcs.swappedOperands()) {
4648     _cmp(ConstR, NonConstF);
4649   } else {
4650     Variable *T = makeReg(IceType_i32);
4651     _rsbs(T, ConstR, NonConstF);
4652     Context.insert<InstFakeUse>(T);
4653   }
4654   return CondWhenTrue(getIcmp32Mapping(Condition));
4655 }
4656 
lowerIcmpCond(const InstIcmp * Instr)4657 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Instr) {
4658   return lowerIcmpCond(Instr->getCondition(), Instr->getSrc(0),
4659                        Instr->getSrc(1));
4660 }
4661 
lowerIcmpCond(InstIcmp::ICond Condition,Operand * Src0,Operand * Src1)4662 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(InstIcmp::ICond Condition,
4663                                                      Operand *Src0,
4664                                                      Operand *Src1) {
4665   Src0 = legalizeUndef(Src0);
4666   Src1 = legalizeUndef(Src1);
4667 
4668   // a=icmp cond b, c ==>
4669   // GCC does:
4670   //   <u/s>xtb tb, b
4671   //   <u/s>xtb tc, c
4672   //   cmp      tb, tc
4673   //   mov.C1   t, #0
4674   //   mov.C2   t, #1
4675   //   mov      a, t
4676   // where the unsigned/sign extension is not needed for 32-bit. They also have
4677   // special cases for EQ and NE. E.g., for NE:
4678   //   <extend to tb, tc>
4679   //   subs     t, tb, tc
4680   //   movne    t, #1
4681   //   mov      a, t
4682   //
4683   // LLVM does:
4684   //   lsl     tb, b, #<N>
4685   //   mov     t, #0
4686   //   cmp     tb, c, lsl #<N>
4687   //   mov.<C> t, #1
4688   //   mov     a, t
4689   //
4690   // the left shift is by 0, 16, or 24, which allows the comparison to focus on
4691   // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For
4692   // the unsigned case, for some reason it does similar to GCC and does a uxtb
4693   // first. It's not clear to me why that special-casing is needed.
4694   //
4695   // We'll go with the LLVM way for now, since it's shorter and has just as few
4696   // dependencies.
4697   switch (Src0->getType()) {
4698   default:
4699     llvm::report_fatal_error("Unhandled type in lowerIcmpCond");
4700   case IceType_i1:
4701   case IceType_i8:
4702   case IceType_i16:
4703     return lowerInt8AndInt16IcmpCond(Condition, Src0, Src1);
4704   case IceType_i32:
4705     return lowerInt32IcmpCond(Condition, Src0, Src1);
4706   case IceType_i64:
4707     return lowerInt64IcmpCond(Condition, Src0, Src1);
4708   }
4709 }
4710 
lowerIcmp(const InstIcmp * Instr)4711 void TargetARM32::lowerIcmp(const InstIcmp *Instr) {
4712   Variable *Dest = Instr->getDest();
4713   const Type DestTy = Dest->getType();
4714 
4715   if (isVectorType(DestTy)) {
4716     auto *T = makeReg(DestTy);
4717     auto *Src0 = legalizeToReg(Instr->getSrc(0));
4718     auto *Src1 = legalizeToReg(Instr->getSrc(1));
4719     const Type SrcTy = Src0->getType();
4720 
4721     bool NeedsShl = false;
4722     Type NewTypeAfterShl;
4723     SizeT ShAmt;
4724     switch (SrcTy) {
4725     default:
4726       break;
4727     case IceType_v16i1:
4728       NeedsShl = true;
4729       NewTypeAfterShl = IceType_v16i8;
4730       ShAmt = 7;
4731       break;
4732     case IceType_v8i1:
4733       NeedsShl = true;
4734       NewTypeAfterShl = IceType_v8i16;
4735       ShAmt = 15;
4736       break;
4737     case IceType_v4i1:
4738       NeedsShl = true;
4739       NewTypeAfterShl = IceType_v4i32;
4740       ShAmt = 31;
4741       break;
4742     }
4743 
4744     if (NeedsShl) {
4745       auto *Imm = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(ShAmt));
4746       auto *Src0T = makeReg(NewTypeAfterShl);
4747       auto *Src0Shl = makeReg(NewTypeAfterShl);
4748       _mov(Src0T, Src0);
4749       _vshl(Src0Shl, Src0T, Imm);
4750       Src0 = Src0Shl;
4751 
4752       auto *Src1T = makeReg(NewTypeAfterShl);
4753       auto *Src1Shl = makeReg(NewTypeAfterShl);
4754       _mov(Src1T, Src1);
4755       _vshl(Src1Shl, Src1T, Imm);
4756       Src1 = Src1Shl;
4757     }
4758 
4759     switch (Instr->getCondition()) {
4760     default:
4761       llvm::report_fatal_error("Unhandled integer comparison.");
4762 #define _Vceq(T, S0, S1, Signed) _vceq(T, S0, S1)
4763 #define _Vcge(T, S0, S1, Signed)                                               \
4764   _vcge(T, S0, S1)->setSignType(Signed ? InstARM32::FS_Signed                  \
4765                                        : InstARM32::FS_Unsigned)
4766 #define _Vcgt(T, S0, S1, Signed)                                               \
4767   _vcgt(T, S0, S1)->setSignType(Signed ? InstARM32::FS_Signed                  \
4768                                        : InstARM32::FS_Unsigned)
4769 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V)    \
4770   case InstIcmp::val: {                                                        \
4771     _Vc##C_V(T, (INV_V) ? Src1 : Src0, (INV_V) ? Src0 : Src1, is_signed);      \
4772     if (NEG_V) {                                                               \
4773       auto *TInv = makeReg(DestTy);                                            \
4774       _vmvn(TInv, T);                                                          \
4775       T = TInv;                                                                \
4776     }                                                                          \
4777   } break;
4778       ICMPARM32_TABLE
4779 #undef X
4780 #undef _Vcgt
4781 #undef _Vcge
4782 #undef _Vceq
4783     }
4784     _mov(Dest, T);
4785     return;
4786   }
4787 
4788   Operand *_0 =
4789       legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
4790   Operand *_1 = legalize(Ctx->getConstantInt32(1), Legal_Reg | Legal_Flex);
4791   Variable *T = makeReg(IceType_i1);
4792 
4793   _mov(T, _0);
4794   CondWhenTrue Cond = lowerIcmpCond(Instr);
4795   _mov_redefined(T, _1, Cond.WhenTrue0);
4796   _mov(Dest, T);
4797 
4798   assert(Cond.WhenTrue1 == CondARM32::kNone);
4799 
4800   return;
4801 }
4802 
lowerInsertElement(const InstInsertElement * Instr)4803 void TargetARM32::lowerInsertElement(const InstInsertElement *Instr) {
4804   Variable *Dest = Instr->getDest();
4805   Type DestTy = Dest->getType();
4806 
4807   Variable *Src0 = legalizeToReg(Instr->getSrc(0));
4808   Variable *Src1 = legalizeToReg(Instr->getSrc(1));
4809   Operand *Src2 = Instr->getSrc(2);
4810 
4811   if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src2)) {
4812     const uint32_t Index = Imm->getValue();
4813     Variable *T = makeReg(DestTy);
4814 
4815     if (isFloatingType(DestTy)) {
4816       T->setRegClass(RegARM32::RCARM32_QtoS);
4817     }
4818 
4819     _mov(T, Src0);
4820     _insertelement(T, Src1, Index);
4821     _set_dest_redefined();
4822     _mov(Dest, T);
4823     return;
4824   }
4825   assert(false && "insertelement requires a constant index");
4826 }
4827 
4828 namespace {
getConstantMemoryOrder(Operand * Opnd)4829 inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
4830   if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
4831     return Integer->getValue();
4832   return Intrinsics::MemoryOrderInvalid;
4833 }
4834 } // end of anonymous namespace
4835 
lowerLoadLinkedStoreExclusive(Type Ty,Operand * Addr,std::function<Variable * (Variable *)> Operation,CondARM32::Cond Cond)4836 void TargetARM32::lowerLoadLinkedStoreExclusive(
4837     Type Ty, Operand *Addr, std::function<Variable *(Variable *)> Operation,
4838     CondARM32::Cond Cond) {
4839 
4840   auto *Retry = Context.insert<InstARM32Label>(this);
4841 
4842   { // scoping for loop highlighting.
4843     Variable *Success = makeReg(IceType_i32);
4844     Variable *Tmp = (Ty == IceType_i64) ? makeI64RegPair() : makeReg(Ty);
4845     auto *_0 = Ctx->getConstantZero(IceType_i32);
4846 
4847     Context.insert<InstFakeDef>(Tmp);
4848     Context.insert<InstFakeUse>(Tmp);
4849     Variable *AddrR = legalizeToReg(Addr);
4850     _ldrex(Tmp, formMemoryOperand(AddrR, Ty))->setDestRedefined();
4851     auto *StoreValue = Operation(Tmp);
4852     assert(StoreValue->mustHaveReg());
4853     // strex requires Dest to be a register other than Value or Addr. This
4854     // restriction is cleanly represented by adding an "early" definition of
4855     // Dest (or a latter use of all the sources.)
4856     Context.insert<InstFakeDef>(Success);
4857     if (Cond != CondARM32::AL) {
4858       _mov_redefined(Success, legalize(_0, Legal_Reg | Legal_Flex),
4859                      InstARM32::getOppositeCondition(Cond));
4860     }
4861     _strex(Success, StoreValue, formMemoryOperand(AddrR, Ty), Cond)
4862         ->setDestRedefined();
4863     _cmp(Success, _0);
4864   }
4865 
4866   _br(Retry, CondARM32::NE);
4867 }
4868 
4869 namespace {
createArithInst(Cfg * Func,uint32_t Operation,Variable * Dest,Variable * Src0,Operand * Src1)4870 InstArithmetic *createArithInst(Cfg *Func, uint32_t Operation, Variable *Dest,
4871                                 Variable *Src0, Operand *Src1) {
4872   InstArithmetic::OpKind Oper;
4873   switch (Operation) {
4874   default:
4875     llvm::report_fatal_error("Unknown AtomicRMW operation");
4876   case Intrinsics::AtomicExchange:
4877     llvm::report_fatal_error("Can't handle Atomic xchg operation");
4878   case Intrinsics::AtomicAdd:
4879     Oper = InstArithmetic::Add;
4880     break;
4881   case Intrinsics::AtomicAnd:
4882     Oper = InstArithmetic::And;
4883     break;
4884   case Intrinsics::AtomicSub:
4885     Oper = InstArithmetic::Sub;
4886     break;
4887   case Intrinsics::AtomicOr:
4888     Oper = InstArithmetic::Or;
4889     break;
4890   case Intrinsics::AtomicXor:
4891     Oper = InstArithmetic::Xor;
4892     break;
4893   }
4894   return InstArithmetic::create(Func, Oper, Dest, Src0, Src1);
4895 }
4896 } // end of anonymous namespace
4897 
lowerAtomicRMW(Variable * Dest,uint32_t Operation,Operand * Addr,Operand * Val)4898 void TargetARM32::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
4899                                  Operand *Addr, Operand *Val) {
4900   // retry:
4901   //     ldrex tmp, [addr]
4902   //     mov contents, tmp
4903   //     op result, contents, Val
4904   //     strex success, result, [addr]
4905   //     cmp success, 0
4906   //     jne retry
4907   //     fake-use(addr, operand)  @ prevents undesirable clobbering.
4908   //     mov dest, contents
4909   auto DestTy = Dest->getType();
4910 
4911   if (DestTy == IceType_i64) {
4912     lowerInt64AtomicRMW(Dest, Operation, Addr, Val);
4913     return;
4914   }
4915 
4916   Operand *ValRF = nullptr;
4917   if (llvm::isa<ConstantInteger32>(Val)) {
4918     ValRF = Val;
4919   } else {
4920     ValRF = legalizeToReg(Val);
4921   }
4922   auto *ContentsR = makeReg(DestTy);
4923   auto *ResultR = makeReg(DestTy);
4924 
4925   _dmb();
4926   lowerLoadLinkedStoreExclusive(
4927       DestTy, Addr,
4928       [this, Operation, ResultR, ContentsR, ValRF](Variable *Tmp) {
4929         lowerAssign(InstAssign::create(Func, ContentsR, Tmp));
4930         if (Operation == Intrinsics::AtomicExchange) {
4931           lowerAssign(InstAssign::create(Func, ResultR, ValRF));
4932         } else {
4933           lowerArithmetic(
4934               createArithInst(Func, Operation, ResultR, ContentsR, ValRF));
4935         }
4936         return ResultR;
4937       });
4938   _dmb();
4939   if (auto *ValR = llvm::dyn_cast<Variable>(ValRF)) {
4940     Context.insert<InstFakeUse>(ValR);
4941   }
4942   // Can't dce ContentsR.
4943   Context.insert<InstFakeUse>(ContentsR);
4944   lowerAssign(InstAssign::create(Func, Dest, ContentsR));
4945 }
4946 
lowerInt64AtomicRMW(Variable * Dest,uint32_t Operation,Operand * Addr,Operand * Val)4947 void TargetARM32::lowerInt64AtomicRMW(Variable *Dest, uint32_t Operation,
4948                                       Operand *Addr, Operand *Val) {
4949   assert(Dest->getType() == IceType_i64);
4950 
4951   auto *ResultR = makeI64RegPair();
4952 
4953   Context.insert<InstFakeDef>(ResultR);
4954 
4955   Operand *ValRF = nullptr;
4956   if (llvm::dyn_cast<ConstantInteger64>(Val)) {
4957     ValRF = Val;
4958   } else {
4959     auto *ValR64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4960     ValR64->initHiLo(Func);
4961     ValR64->setMustNotHaveReg();
4962     ValR64->getLo()->setMustHaveReg();
4963     ValR64->getHi()->setMustHaveReg();
4964     lowerAssign(InstAssign::create(Func, ValR64, Val));
4965     ValRF = ValR64;
4966   }
4967 
4968   auto *ContentsR = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4969   ContentsR->initHiLo(Func);
4970   ContentsR->setMustNotHaveReg();
4971   ContentsR->getLo()->setMustHaveReg();
4972   ContentsR->getHi()->setMustHaveReg();
4973 
4974   _dmb();
4975   lowerLoadLinkedStoreExclusive(
4976       IceType_i64, Addr,
4977       [this, Operation, ResultR, ContentsR, ValRF](Variable *Tmp) {
4978         lowerAssign(InstAssign::create(Func, ContentsR, Tmp));
4979         Context.insert<InstFakeUse>(Tmp);
4980         if (Operation == Intrinsics::AtomicExchange) {
4981           lowerAssign(InstAssign::create(Func, ResultR, ValRF));
4982         } else {
4983           lowerArithmetic(
4984               createArithInst(Func, Operation, ResultR, ContentsR, ValRF));
4985         }
4986         Context.insert<InstFakeUse>(ResultR->getHi());
4987         Context.insert<InstFakeDef>(ResultR, ResultR->getLo())
4988             ->setDestRedefined();
4989         return ResultR;
4990       });
4991   _dmb();
4992   if (auto *ValR64 = llvm::dyn_cast<Variable64On32>(ValRF)) {
4993     Context.insert<InstFakeUse>(ValR64->getLo());
4994     Context.insert<InstFakeUse>(ValR64->getHi());
4995   }
4996   lowerAssign(InstAssign::create(Func, Dest, ContentsR));
4997 }
4998 
postambleCtpop64(const InstCall * Instr)4999 void TargetARM32::postambleCtpop64(const InstCall *Instr) {
5000   Operand *Arg0 = Instr->getArg(0);
5001   if (isInt32Asserting32Or64(Arg0->getType())) {
5002     return;
5003   }
5004   // The popcount helpers always return 32-bit values, while the intrinsic's
5005   // signature matches some 64-bit platform's native instructions and expect to
5006   // fill a 64-bit reg. Thus, clear the upper bits of the dest just in case the
5007   // user doesn't do that in the IR or doesn't toss the bits via truncate.
5008   auto *DestHi = llvm::cast<Variable>(hiOperand(Instr->getDest()));
5009   Variable *T = makeReg(IceType_i32);
5010   Operand *_0 =
5011       legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
5012   _mov(T, _0);
5013   _mov(DestHi, T);
5014 }
5015 
lowerIntrinsicCall(const InstIntrinsicCall * Instr)5016 void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
5017   Variable *Dest = Instr->getDest();
5018   Type DestTy = (Dest != nullptr) ? Dest->getType() : IceType_void;
5019   Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID;
5020   switch (ID) {
5021   case Intrinsics::AtomicFence:
5022   case Intrinsics::AtomicFenceAll:
5023     assert(Dest == nullptr);
5024     _dmb();
5025     return;
5026   case Intrinsics::AtomicIsLockFree: {
5027     Operand *ByteSize = Instr->getArg(0);
5028     auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize);
5029     if (CI == nullptr) {
5030       // The PNaCl ABI requires the byte size to be a compile-time constant.
5031       Func->setError("AtomicIsLockFree byte size should be compile-time const");
5032       return;
5033     }
5034     static constexpr int32_t NotLockFree = 0;
5035     static constexpr int32_t LockFree = 1;
5036     int32_t Result = NotLockFree;
5037     switch (CI->getValue()) {
5038     case 1:
5039     case 2:
5040     case 4:
5041     case 8:
5042       Result = LockFree;
5043       break;
5044     }
5045     _mov(Dest, legalizeToReg(Ctx->getConstantInt32(Result)));
5046     return;
5047   }
5048   case Intrinsics::AtomicLoad: {
5049     assert(isScalarIntegerType(DestTy));
5050     // We require the memory address to be naturally aligned. Given that is the
5051     // case, then normal loads are atomic.
5052     if (!Intrinsics::isMemoryOrderValid(
5053             ID, getConstantMemoryOrder(Instr->getArg(1)))) {
5054       Func->setError("Unexpected memory ordering for AtomicLoad");
5055       return;
5056     }
5057     Variable *T;
5058 
5059     if (DestTy == IceType_i64) {
5060       // ldrex is the only arm instruction that is guaranteed to load a 64-bit
5061       // integer atomically. Everything else works with a regular ldr.
5062       T = makeI64RegPair();
5063       _ldrex(T, formMemoryOperand(Instr->getArg(0), IceType_i64));
5064     } else {
5065       T = makeReg(DestTy);
5066       _ldr(T, formMemoryOperand(Instr->getArg(0), DestTy));
5067     }
5068     _dmb();
5069     lowerAssign(InstAssign::create(Func, Dest, T));
5070     // Adding a fake-use T to ensure the atomic load is not removed if Dest is
5071     // unused.
5072     Context.insert<InstFakeUse>(T);
5073     return;
5074   }
5075   case Intrinsics::AtomicStore: {
5076     // We require the memory address to be naturally aligned. Given that is the
5077     // case, then normal loads are atomic.
5078     if (!Intrinsics::isMemoryOrderValid(
5079             ID, getConstantMemoryOrder(Instr->getArg(2)))) {
5080       Func->setError("Unexpected memory ordering for AtomicStore");
5081       return;
5082     }
5083 
5084     auto *Value = Instr->getArg(0);
5085     if (Value->getType() == IceType_i64) {
5086       auto *ValueR = makeI64RegPair();
5087       Context.insert<InstFakeDef>(ValueR);
5088       lowerAssign(InstAssign::create(Func, ValueR, Value));
5089       _dmb();
5090       lowerLoadLinkedStoreExclusive(
5091           IceType_i64, Instr->getArg(1), [this, ValueR](Variable *Tmp) {
5092             // The following fake-use prevents the ldrex instruction from being
5093             // dead code eliminated.
5094             Context.insert<InstFakeUse>(llvm::cast<Variable>(loOperand(Tmp)));
5095             Context.insert<InstFakeUse>(llvm::cast<Variable>(hiOperand(Tmp)));
5096             Context.insert<InstFakeUse>(Tmp);
5097             return ValueR;
5098           });
5099       Context.insert<InstFakeUse>(ValueR);
5100       _dmb();
5101       return;
5102     }
5103 
5104     auto *ValueR = legalizeToReg(Instr->getArg(0));
5105     const auto ValueTy = ValueR->getType();
5106     assert(isScalarIntegerType(ValueTy));
5107     auto *Addr = legalizeToReg(Instr->getArg(1));
5108 
5109     // non-64-bit stores are atomically as long as the address is aligned. This
5110     // is PNaCl, so addresses are aligned.
5111     _dmb();
5112     _str(ValueR, formMemoryOperand(Addr, ValueTy));
5113     _dmb();
5114     return;
5115   }
5116   case Intrinsics::AtomicCmpxchg: {
5117     // retry:
5118     //     ldrex tmp, [addr]
5119     //     cmp tmp, expected
5120     //     mov expected, tmp
5121     //     strexeq success, new, [addr]
5122     //     cmpeq success, #0
5123     //     bne retry
5124     //     mov dest, expected
5125     assert(isScalarIntegerType(DestTy));
5126     // We require the memory address to be naturally aligned. Given that is the
5127     // case, then normal loads are atomic.
5128     if (!Intrinsics::isMemoryOrderValid(
5129             ID, getConstantMemoryOrder(Instr->getArg(3)),
5130             getConstantMemoryOrder(Instr->getArg(4)))) {
5131       Func->setError("Unexpected memory ordering for AtomicCmpxchg");
5132       return;
5133     }
5134 
5135     if (DestTy == IceType_i64) {
5136       Variable *LoadedValue = nullptr;
5137 
5138       auto *New = makeI64RegPair();
5139       Context.insert<InstFakeDef>(New);
5140       lowerAssign(InstAssign::create(Func, New, Instr->getArg(2)));
5141 
5142       auto *Expected = makeI64RegPair();
5143       Context.insert<InstFakeDef>(Expected);
5144       lowerAssign(InstAssign::create(Func, Expected, Instr->getArg(1)));
5145 
5146       _dmb();
5147       lowerLoadLinkedStoreExclusive(
5148           DestTy, Instr->getArg(0),
5149           [this, Expected, New, &LoadedValue](Variable *Tmp) {
5150             auto *ExpectedLoR = llvm::cast<Variable>(loOperand(Expected));
5151             auto *ExpectedHiR = llvm::cast<Variable>(hiOperand(Expected));
5152             auto *TmpLoR = llvm::cast<Variable>(loOperand(Tmp));
5153             auto *TmpHiR = llvm::cast<Variable>(hiOperand(Tmp));
5154             _cmp(TmpLoR, ExpectedLoR);
5155             _cmp(TmpHiR, ExpectedHiR, CondARM32::EQ);
5156             LoadedValue = Tmp;
5157             return New;
5158           },
5159           CondARM32::EQ);
5160       _dmb();
5161 
5162       Context.insert<InstFakeUse>(LoadedValue);
5163       lowerAssign(InstAssign::create(Func, Dest, LoadedValue));
5164       // The fake-use Expected prevents the assignments to Expected (above)
5165       // from being removed if Dest is not used.
5166       Context.insert<InstFakeUse>(Expected);
5167       // New needs to be alive here, or its live range will end in the
5168       // strex instruction.
5169       Context.insert<InstFakeUse>(New);
5170       return;
5171     }
5172 
5173     auto *New = legalizeToReg(Instr->getArg(2));
5174     auto *Expected = legalizeToReg(Instr->getArg(1));
5175     Variable *LoadedValue = nullptr;
5176 
5177     _dmb();
5178     lowerLoadLinkedStoreExclusive(
5179         DestTy, Instr->getArg(0),
5180         [this, Expected, New, &LoadedValue](Variable *Tmp) {
5181           lowerIcmpCond(InstIcmp::Eq, Tmp, Expected);
5182           LoadedValue = Tmp;
5183           return New;
5184         },
5185         CondARM32::EQ);
5186     _dmb();
5187 
5188     lowerAssign(InstAssign::create(Func, Dest, LoadedValue));
5189     Context.insert<InstFakeUse>(Expected);
5190     Context.insert<InstFakeUse>(New);
5191     return;
5192   }
5193   case Intrinsics::AtomicRMW: {
5194     if (!Intrinsics::isMemoryOrderValid(
5195             ID, getConstantMemoryOrder(Instr->getArg(3)))) {
5196       Func->setError("Unexpected memory ordering for AtomicRMW");
5197       return;
5198     }
5199     lowerAtomicRMW(
5200         Dest,
5201         static_cast<uint32_t>(
5202             llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),
5203         Instr->getArg(1), Instr->getArg(2));
5204     return;
5205   }
5206   case Intrinsics::Bswap: {
5207     Operand *Val = Instr->getArg(0);
5208     Type Ty = Val->getType();
5209     if (Ty == IceType_i64) {
5210       Val = legalizeUndef(Val);
5211       Variable *Val_Lo = legalizeToReg(loOperand(Val));
5212       Variable *Val_Hi = legalizeToReg(hiOperand(Val));
5213       Variable *T_Lo = makeReg(IceType_i32);
5214       Variable *T_Hi = makeReg(IceType_i32);
5215       auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
5216       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
5217       _rev(T_Lo, Val_Lo);
5218       _rev(T_Hi, Val_Hi);
5219       _mov(DestLo, T_Hi);
5220       _mov(DestHi, T_Lo);
5221     } else {
5222       assert(Ty == IceType_i32 || Ty == IceType_i16);
5223       Variable *ValR = legalizeToReg(Val);
5224       Variable *T = makeReg(Ty);
5225       _rev(T, ValR);
5226       if (Val->getType() == IceType_i16) {
5227         Operand *_16 = shAmtImm(16);
5228         _lsr(T, T, _16);
5229       }
5230       _mov(Dest, T);
5231     }
5232     return;
5233   }
5234   case Intrinsics::Ctpop: {
5235     llvm::report_fatal_error("Ctpop should have been prelowered.");
5236   }
5237   case Intrinsics::Ctlz: {
5238     // The "is zero undef" parameter is ignored and we always return a
5239     // well-defined value.
5240     Operand *Val = Instr->getArg(0);
5241     Variable *ValLoR;
5242     Variable *ValHiR = nullptr;
5243     if (Val->getType() == IceType_i64) {
5244       Val = legalizeUndef(Val);
5245       ValLoR = legalizeToReg(loOperand(Val));
5246       ValHiR = legalizeToReg(hiOperand(Val));
5247     } else {
5248       ValLoR = legalizeToReg(Val);
5249     }
5250     lowerCLZ(Dest, ValLoR, ValHiR);
5251     return;
5252   }
5253   case Intrinsics::Cttz: {
5254     // Essentially like Clz, but reverse the bits first.
5255     Operand *Val = Instr->getArg(0);
5256     Variable *ValLoR;
5257     Variable *ValHiR = nullptr;
5258     if (Val->getType() == IceType_i64) {
5259       Val = legalizeUndef(Val);
5260       ValLoR = legalizeToReg(loOperand(Val));
5261       ValHiR = legalizeToReg(hiOperand(Val));
5262       Variable *TLo = makeReg(IceType_i32);
5263       Variable *THi = makeReg(IceType_i32);
5264       _rbit(TLo, ValLoR);
5265       _rbit(THi, ValHiR);
5266       ValLoR = THi;
5267       ValHiR = TLo;
5268     } else {
5269       ValLoR = legalizeToReg(Val);
5270       Variable *T = makeReg(IceType_i32);
5271       _rbit(T, ValLoR);
5272       ValLoR = T;
5273     }
5274     lowerCLZ(Dest, ValLoR, ValHiR);
5275     return;
5276   }
5277   case Intrinsics::Fabs: {
5278     Variable *T = makeReg(DestTy);
5279     _vabs(T, legalizeToReg(Instr->getArg(0)));
5280     _mov(Dest, T);
5281     return;
5282   }
5283   case Intrinsics::Longjmp: {
5284     llvm::report_fatal_error("longjmp should have been prelowered.");
5285   }
5286   case Intrinsics::Memcpy: {
5287     llvm::report_fatal_error("memcpy should have been prelowered.");
5288   }
5289   case Intrinsics::Memmove: {
5290     llvm::report_fatal_error("memmove should have been prelowered.");
5291   }
5292   case Intrinsics::Memset: {
5293     llvm::report_fatal_error("memmove should have been prelowered.");
5294   }
5295   case Intrinsics::NaClReadTP: {
5296     if (SandboxingType != ST_NaCl) {
5297       llvm::report_fatal_error("nacl-read-tp should have been prelowered.");
5298     }
5299     Variable *TP = legalizeToReg(OperandARM32Mem::create(
5300         Func, getPointerType(), getPhysicalRegister(RegARM32::Reg_r9),
5301         llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))));
5302     _mov(Dest, TP);
5303     return;
5304   }
5305   case Intrinsics::Setjmp: {
5306     llvm::report_fatal_error("setjmp should have been prelowered.");
5307   }
5308   case Intrinsics::Sqrt: {
5309     assert(isScalarFloatingType(Dest->getType()) ||
5310            getFlags().getApplicationBinaryInterface() != ::Ice::ABI_PNaCl);
5311     Variable *Src = legalizeToReg(Instr->getArg(0));
5312     Variable *T = makeReg(DestTy);
5313     _vsqrt(T, Src);
5314     _mov(Dest, T);
5315     return;
5316   }
5317   case Intrinsics::Stacksave: {
5318     Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
5319     _mov(Dest, SP);
5320     return;
5321   }
5322   case Intrinsics::Stackrestore: {
5323     Variable *Val = legalizeToReg(Instr->getArg(0));
5324     Sandboxer(this).reset_sp(Val);
5325     return;
5326   }
5327   case Intrinsics::Trap:
5328     _trap();
5329     return;
5330   case Intrinsics::AddSaturateSigned:
5331   case Intrinsics::AddSaturateUnsigned: {
5332     bool Unsigned = (ID == Intrinsics::AddSaturateUnsigned);
5333     Variable *Src0 = legalizeToReg(Instr->getArg(0));
5334     Variable *Src1 = legalizeToReg(Instr->getArg(1));
5335     Variable *T = makeReg(DestTy);
5336     _vqadd(T, Src0, Src1, Unsigned);
5337     _mov(Dest, T);
5338     return;
5339   }
5340   case Intrinsics::LoadSubVector: {
5341     assert(llvm::isa<ConstantInteger32>(Instr->getArg(1)) &&
5342            "LoadSubVector second argument must be a constant");
5343     Variable *Dest = Instr->getDest();
5344     Type Ty = Dest->getType();
5345     auto *SubVectorSize = llvm::cast<ConstantInteger32>(Instr->getArg(1));
5346     Operand *Addr = Instr->getArg(0);
5347     OperandARM32Mem *Src = formMemoryOperand(Addr, Ty);
5348     doMockBoundsCheck(Src);
5349 
5350     if (Dest->isRematerializable()) {
5351       Context.insert<InstFakeDef>(Dest);
5352       return;
5353     }
5354 
5355     auto *T = makeReg(Ty);
5356     switch (SubVectorSize->getValue()) {
5357     case 4:
5358       _vldr1d(T, Src);
5359       break;
5360     case 8:
5361       _vldr1q(T, Src);
5362       break;
5363     default:
5364       Func->setError("Unexpected size for LoadSubVector");
5365       return;
5366     }
5367     _mov(Dest, T);
5368     return;
5369   }
5370   case Intrinsics::StoreSubVector: {
5371     assert(llvm::isa<ConstantInteger32>(Instr->getArg(2)) &&
5372            "StoreSubVector third argument must be a constant");
5373     auto *SubVectorSize = llvm::cast<ConstantInteger32>(Instr->getArg(2));
5374     Variable *Value = legalizeToReg(Instr->getArg(0));
5375     Operand *Addr = Instr->getArg(1);
5376     OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
5377     doMockBoundsCheck(NewAddr);
5378 
5379     Value = legalizeToReg(Value);
5380 
5381     switch (SubVectorSize->getValue()) {
5382     case 4:
5383       _vstr1d(Value, NewAddr);
5384       break;
5385     case 8:
5386       _vstr1q(Value, NewAddr);
5387       break;
5388     default:
5389       Func->setError("Unexpected size for StoreSubVector");
5390       return;
5391     }
5392     return;
5393   }
5394   case Intrinsics::MultiplyAddPairs: {
5395     Variable *Src0 = legalizeToReg(Instr->getArg(0));
5396     Variable *Src1 = legalizeToReg(Instr->getArg(1));
5397     Variable *T = makeReg(DestTy);
5398     _vmlap(T, Src0, Src1);
5399     _mov(Dest, T);
5400     return;
5401   }
5402   case Intrinsics::MultiplyHighSigned:
5403   case Intrinsics::MultiplyHighUnsigned: {
5404     bool Unsigned = (ID == Intrinsics::MultiplyHighUnsigned);
5405     Variable *Src0 = legalizeToReg(Instr->getArg(0));
5406     Variable *Src1 = legalizeToReg(Instr->getArg(1));
5407     Variable *T = makeReg(DestTy);
5408     _vmulh(T, Src0, Src1, Unsigned);
5409     _mov(Dest, T);
5410     return;
5411   }
5412   case Intrinsics::Nearbyint: {
5413     UnimplementedLoweringError(this, Instr);
5414     return;
5415   }
5416   case Intrinsics::Round: {
5417     UnimplementedLoweringError(this, Instr);
5418     return;
5419   }
5420   case Intrinsics::SignMask: {
5421     UnimplementedLoweringError(this, Instr);
5422     return;
5423   }
5424   case Intrinsics::SubtractSaturateSigned:
5425   case Intrinsics::SubtractSaturateUnsigned: {
5426     bool Unsigned = (ID == Intrinsics::SubtractSaturateUnsigned);
5427     Variable *Src0 = legalizeToReg(Instr->getArg(0));
5428     Variable *Src1 = legalizeToReg(Instr->getArg(1));
5429     Variable *T = makeReg(DestTy);
5430     _vqsub(T, Src0, Src1, Unsigned);
5431     _mov(Dest, T);
5432     return;
5433   }
5434   case Intrinsics::VectorPackSigned:
5435   case Intrinsics::VectorPackUnsigned: {
5436     bool Unsigned = (ID == Intrinsics::VectorPackUnsigned);
5437     bool Saturating = true;
5438     Variable *Src0 = legalizeToReg(Instr->getArg(0));
5439     Variable *Src1 = legalizeToReg(Instr->getArg(1));
5440     Variable *T = makeReg(DestTy);
5441     _vqmovn2(T, Src0, Src1, Unsigned, Saturating);
5442     _mov(Dest, T);
5443     return;
5444   }
5445   default: // UnknownIntrinsic
5446     Func->setError("Unexpected intrinsic");
5447     return;
5448   }
5449   return;
5450 }
5451 
lowerCLZ(Variable * Dest,Variable * ValLoR,Variable * ValHiR)5452 void TargetARM32::lowerCLZ(Variable *Dest, Variable *ValLoR, Variable *ValHiR) {
5453   Type Ty = Dest->getType();
5454   assert(Ty == IceType_i32 || Ty == IceType_i64);
5455   Variable *T = makeReg(IceType_i32);
5456   _clz(T, ValLoR);
5457   if (Ty == IceType_i64) {
5458     auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
5459     auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
5460     Operand *Zero =
5461         legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
5462     Operand *ThirtyTwo =
5463         legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
5464     _cmp(ValHiR, Zero);
5465     Variable *T2 = makeReg(IceType_i32);
5466     _add(T2, T, ThirtyTwo);
5467     _clz(T2, ValHiR, CondARM32::NE);
5468     // T2 is actually a source as well when the predicate is not AL (since it
5469     // may leave T2 alone). We use _set_dest_redefined to prolong the liveness
5470     // of T2 as if it was used as a source.
5471     _set_dest_redefined();
5472     _mov(DestLo, T2);
5473     Variable *T3 = makeReg(Zero->getType());
5474     _mov(T3, Zero);
5475     _mov(DestHi, T3);
5476     return;
5477   }
5478   _mov(Dest, T);
5479   return;
5480 }
5481 
lowerLoad(const InstLoad * Load)5482 void TargetARM32::lowerLoad(const InstLoad *Load) {
5483   // A Load instruction can be treated the same as an Assign instruction, after
5484   // the source operand is transformed into an OperandARM32Mem operand.
5485   Type Ty = Load->getDest()->getType();
5486   Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
5487   Variable *DestLoad = Load->getDest();
5488 
5489   // TODO(jvoung): handled folding opportunities. Sign and zero extension can
5490   // be folded into a load.
5491   auto *Assign = InstAssign::create(Func, DestLoad, Src0);
5492   lowerAssign(Assign);
5493 }
5494 
5495 namespace {
dumpAddressOpt(const Cfg * Func,const Variable * Base,int32_t Offset,const Variable * OffsetReg,int16_t OffsetRegShAmt,const Inst * Reason)5496 void dumpAddressOpt(const Cfg *Func, const Variable *Base, int32_t Offset,
5497                     const Variable *OffsetReg, int16_t OffsetRegShAmt,
5498                     const Inst *Reason) {
5499   if (!BuildDefs::dump())
5500     return;
5501   if (!Func->isVerbose(IceV_AddrOpt))
5502     return;
5503   OstreamLocker _(Func->getContext());
5504   Ostream &Str = Func->getContext()->getStrDump();
5505   Str << "Instruction: ";
5506   Reason->dumpDecorated(Func);
5507   Str << "  results in Base=";
5508   if (Base)
5509     Base->dump(Func);
5510   else
5511     Str << "<null>";
5512   Str << ", OffsetReg=";
5513   if (OffsetReg)
5514     OffsetReg->dump(Func);
5515   else
5516     Str << "<null>";
5517   Str << ", Shift=" << OffsetRegShAmt << ", Offset=" << Offset << "\n";
5518 }
5519 
matchAssign(const VariablesMetadata * VMetadata,Variable ** Var,int32_t * Offset,const Inst ** Reason)5520 bool matchAssign(const VariablesMetadata *VMetadata, Variable **Var,
5521                  int32_t *Offset, const Inst **Reason) {
5522   // Var originates from Var=SrcVar ==> set Var:=SrcVar
5523   if (*Var == nullptr)
5524     return false;
5525   const Inst *VarAssign = VMetadata->getSingleDefinition(*Var);
5526   if (!VarAssign)
5527     return false;
5528   assert(!VMetadata->isMultiDef(*Var));
5529   if (!llvm::isa<InstAssign>(VarAssign))
5530     return false;
5531 
5532   Operand *SrcOp = VarAssign->getSrc(0);
5533   bool Optimized = false;
5534   if (auto *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {
5535     if (!VMetadata->isMultiDef(SrcVar) ||
5536         // TODO: ensure SrcVar stays single-BB
5537         false) {
5538       Optimized = true;
5539       *Var = SrcVar;
5540     } else if (auto *Const = llvm::dyn_cast<ConstantInteger32>(SrcOp)) {
5541       int32_t MoreOffset = Const->getValue();
5542       int32_t NewOffset = MoreOffset + *Offset;
5543       if (Utils::WouldOverflowAdd(*Offset, MoreOffset))
5544         return false;
5545       *Var = nullptr;
5546       *Offset += NewOffset;
5547       Optimized = true;
5548     }
5549   }
5550 
5551   if (Optimized) {
5552     *Reason = VarAssign;
5553   }
5554 
5555   return Optimized;
5556 }
5557 
isAddOrSub(const Inst * Instr,InstArithmetic::OpKind * Kind)5558 bool isAddOrSub(const Inst *Instr, InstArithmetic::OpKind *Kind) {
5559   if (const auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {
5560     switch (Arith->getOp()) {
5561     default:
5562       return false;
5563     case InstArithmetic::Add:
5564     case InstArithmetic::Sub:
5565       *Kind = Arith->getOp();
5566       return true;
5567     }
5568   }
5569   return false;
5570 }
5571 
matchCombinedBaseIndex(const VariablesMetadata * VMetadata,Variable ** Base,Variable ** OffsetReg,int32_t OffsetRegShamt,const Inst ** Reason)5572 bool matchCombinedBaseIndex(const VariablesMetadata *VMetadata, Variable **Base,
5573                             Variable **OffsetReg, int32_t OffsetRegShamt,
5574                             const Inst **Reason) {
5575   // OffsetReg==nullptr && Base is Base=Var1+Var2 ==>
5576   //   set Base=Var1, OffsetReg=Var2, Shift=0
5577   if (*Base == nullptr)
5578     return false;
5579   if (*OffsetReg != nullptr)
5580     return false;
5581   (void)OffsetRegShamt;
5582   assert(OffsetRegShamt == 0);
5583   const Inst *BaseInst = VMetadata->getSingleDefinition(*Base);
5584   if (BaseInst == nullptr)
5585     return false;
5586   assert(!VMetadata->isMultiDef(*Base));
5587   if (BaseInst->getSrcSize() < 2)
5588     return false;
5589   auto *Var1 = llvm::dyn_cast<Variable>(BaseInst->getSrc(0));
5590   if (!Var1)
5591     return false;
5592   if (VMetadata->isMultiDef(Var1))
5593     return false;
5594   auto *Var2 = llvm::dyn_cast<Variable>(BaseInst->getSrc(1));
5595   if (!Var2)
5596     return false;
5597   if (VMetadata->isMultiDef(Var2))
5598     return false;
5599   InstArithmetic::OpKind _;
5600   if (!isAddOrSub(BaseInst, &_) ||
5601       // TODO: ensure Var1 and Var2 stay single-BB
5602       false)
5603     return false;
5604   *Base = Var1;
5605   *OffsetReg = Var2;
5606   // OffsetRegShamt is already 0.
5607   *Reason = BaseInst;
5608   return true;
5609 }
5610 
matchShiftedOffsetReg(const VariablesMetadata * VMetadata,Variable ** OffsetReg,OperandARM32::ShiftKind * Kind,int32_t * OffsetRegShamt,const Inst ** Reason)5611 bool matchShiftedOffsetReg(const VariablesMetadata *VMetadata,
5612                            Variable **OffsetReg, OperandARM32::ShiftKind *Kind,
5613                            int32_t *OffsetRegShamt, const Inst **Reason) {
5614   // OffsetReg is OffsetReg=Var*Const && log2(Const)+Shift<=32 ==>
5615   //   OffsetReg=Var, Shift+=log2(Const)
5616   // OffsetReg is OffsetReg=Var<<Const && Const+Shift<=32 ==>
5617   //   OffsetReg=Var, Shift+=Const
5618   // OffsetReg is OffsetReg=Var>>Const && Const-Shift>=-32 ==>
5619   //   OffsetReg=Var, Shift-=Const
5620   OperandARM32::ShiftKind NewShiftKind = OperandARM32::kNoShift;
5621   if (*OffsetReg == nullptr)
5622     return false;
5623   auto *IndexInst = VMetadata->getSingleDefinition(*OffsetReg);
5624   if (IndexInst == nullptr)
5625     return false;
5626   assert(!VMetadata->isMultiDef(*OffsetReg));
5627   if (IndexInst->getSrcSize() < 2)
5628     return false;
5629   auto *ArithInst = llvm::dyn_cast<InstArithmetic>(IndexInst);
5630   if (ArithInst == nullptr)
5631     return false;
5632   auto *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0));
5633   if (Var == nullptr)
5634     return false;
5635   auto *Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1));
5636   if (Const == nullptr) {
5637     assert(!llvm::isa<ConstantInteger32>(ArithInst->getSrc(0)));
5638     return false;
5639   }
5640   if (VMetadata->isMultiDef(Var) || Const->getType() != IceType_i32)
5641     return false;
5642 
5643   uint32_t NewShamt = -1;
5644   switch (ArithInst->getOp()) {
5645   default:
5646     return false;
5647   case InstArithmetic::Shl: {
5648     NewShiftKind = OperandARM32::LSL;
5649     NewShamt = Const->getValue();
5650     if (NewShamt > 31)
5651       return false;
5652   } break;
5653   case InstArithmetic::Lshr: {
5654     NewShiftKind = OperandARM32::LSR;
5655     NewShamt = Const->getValue();
5656     if (NewShamt > 31)
5657       return false;
5658   } break;
5659   case InstArithmetic::Ashr: {
5660     NewShiftKind = OperandARM32::ASR;
5661     NewShamt = Const->getValue();
5662     if (NewShamt > 31)
5663       return false;
5664   } break;
5665   case InstArithmetic::Udiv:
5666   case InstArithmetic::Mul: {
5667     const uint32_t UnsignedConst = Const->getValue();
5668     NewShamt = llvm::findFirstSet(UnsignedConst);
5669     if (NewShamt != llvm::findLastSet(UnsignedConst)) {
5670       // First bit set is not the same as the last bit set, so Const is not
5671       // a power of 2.
5672       return false;
5673     }
5674     NewShiftKind = ArithInst->getOp() == InstArithmetic::Udiv
5675                        ? OperandARM32::LSR
5676                        : OperandARM32::LSL;
5677   } break;
5678   }
5679   // Allowed "transitions":
5680   //   kNoShift -> * iff NewShamt < 31
5681   //   LSL -> LSL    iff NewShamt + OffsetRegShamt < 31
5682   //   LSR -> LSR    iff NewShamt + OffsetRegShamt < 31
5683   //   ASR -> ASR    iff NewShamt + OffsetRegShamt < 31
5684   if (*Kind != OperandARM32::kNoShift && *Kind != NewShiftKind) {
5685     return false;
5686   }
5687   const int32_t NewOffsetRegShamt = *OffsetRegShamt + NewShamt;
5688   if (NewOffsetRegShamt > 31)
5689     return false;
5690   *OffsetReg = Var;
5691   *OffsetRegShamt = NewOffsetRegShamt;
5692   *Kind = NewShiftKind;
5693   *Reason = IndexInst;
5694   return true;
5695 }
5696 
matchOffsetBase(const VariablesMetadata * VMetadata,Variable ** Base,int32_t * Offset,const Inst ** Reason)5697 bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable **Base,
5698                      int32_t *Offset, const Inst **Reason) {
5699   // Base is Base=Var+Const || Base is Base=Const+Var ==>
5700   //   set Base=Var, Offset+=Const
5701   // Base is Base=Var-Const ==>
5702   //   set Base=Var, Offset-=Const
5703   if (*Base == nullptr)
5704     return false;
5705   const Inst *BaseInst = VMetadata->getSingleDefinition(*Base);
5706   if (BaseInst == nullptr) {
5707     return false;
5708   }
5709   assert(!VMetadata->isMultiDef(*Base));
5710 
5711   auto *ArithInst = llvm::dyn_cast<const InstArithmetic>(BaseInst);
5712   if (ArithInst == nullptr)
5713     return false;
5714   InstArithmetic::OpKind Kind;
5715   if (!isAddOrSub(ArithInst, &Kind))
5716     return false;
5717   bool IsAdd = Kind == InstArithmetic::Add;
5718   Operand *Src0 = ArithInst->getSrc(0);
5719   Operand *Src1 = ArithInst->getSrc(1);
5720   auto *Var0 = llvm::dyn_cast<Variable>(Src0);
5721   auto *Var1 = llvm::dyn_cast<Variable>(Src1);
5722   auto *Const0 = llvm::dyn_cast<ConstantInteger32>(Src0);
5723   auto *Const1 = llvm::dyn_cast<ConstantInteger32>(Src1);
5724   Variable *NewBase = nullptr;
5725   int32_t NewOffset = *Offset;
5726 
5727   if (Var0 == nullptr && Const0 == nullptr) {
5728     assert(llvm::isa<ConstantRelocatable>(Src0));
5729     return false;
5730   }
5731 
5732   if (Var1 == nullptr && Const1 == nullptr) {
5733     assert(llvm::isa<ConstantRelocatable>(Src1));
5734     return false;
5735   }
5736 
5737   if (Var0 && Var1)
5738     // TODO(jpp): merge base/index splitting into here.
5739     return false;
5740   if (!IsAdd && Var1)
5741     return false;
5742   if (Var0)
5743     NewBase = Var0;
5744   else if (Var1)
5745     NewBase = Var1;
5746   // Compute the updated constant offset.
5747   if (Const0) {
5748     int32_t MoreOffset = IsAdd ? Const0->getValue() : -Const0->getValue();
5749     if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5750       return false;
5751     NewOffset += MoreOffset;
5752   }
5753   if (Const1) {
5754     int32_t MoreOffset = IsAdd ? Const1->getValue() : -Const1->getValue();
5755     if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5756       return false;
5757     NewOffset += MoreOffset;
5758   }
5759 
5760   // Update the computed address parameters once we are sure optimization
5761   // is valid.
5762   *Base = NewBase;
5763   *Offset = NewOffset;
5764   *Reason = BaseInst;
5765   return true;
5766 }
5767 } // end of anonymous namespace
5768 
formAddressingMode(Type Ty,Cfg * Func,const Inst * LdSt,Operand * Base)5769 OperandARM32Mem *TargetARM32::formAddressingMode(Type Ty, Cfg *Func,
5770                                                  const Inst *LdSt,
5771                                                  Operand *Base) {
5772   assert(Base != nullptr);
5773   int32_t OffsetImm = 0;
5774   Variable *OffsetReg = nullptr;
5775   int32_t OffsetRegShamt = 0;
5776   OperandARM32::ShiftKind ShiftKind = OperandARM32::kNoShift;
5777 
5778   Func->resetCurrentNode();
5779   if (Func->isVerbose(IceV_AddrOpt)) {
5780     OstreamLocker _(Func->getContext());
5781     Ostream &Str = Func->getContext()->getStrDump();
5782     Str << "\nAddress mode formation:\t";
5783     LdSt->dumpDecorated(Func);
5784   }
5785 
5786   if (isVectorType(Ty))
5787     // vector loads and stores do not allow offsets, and only support the
5788     // "[reg]" addressing mode (the other supported modes are write back.)
5789     return nullptr;
5790 
5791   auto *BaseVar = llvm::dyn_cast<Variable>(Base);
5792   if (BaseVar == nullptr)
5793     return nullptr;
5794 
5795   (void)MemTraitsSize;
5796   assert(Ty < MemTraitsSize);
5797   auto *TypeTraits = &MemTraits[Ty];
5798   const bool CanHaveIndex = !NeedSandboxing && TypeTraits->CanHaveIndex;
5799   const bool CanHaveShiftedIndex =
5800       !NeedSandboxing && TypeTraits->CanHaveShiftedIndex;
5801   const bool CanHaveImm = TypeTraits->CanHaveImm;
5802   const int32_t ValidImmMask = TypeTraits->ValidImmMask;
5803   (void)ValidImmMask;
5804   assert(!CanHaveImm || ValidImmMask >= 0);
5805 
5806   const VariablesMetadata *VMetadata = Func->getVMetadata();
5807   const Inst *Reason = nullptr;
5808 
5809   do {
5810     if (Reason != nullptr) {
5811       dumpAddressOpt(Func, BaseVar, OffsetImm, OffsetReg, OffsetRegShamt,
5812                      Reason);
5813       Reason = nullptr;
5814     }
5815 
5816     if (matchAssign(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5817       continue;
5818     }
5819 
5820     if (CanHaveIndex &&
5821         matchAssign(VMetadata, &OffsetReg, &OffsetImm, &Reason)) {
5822       continue;
5823     }
5824 
5825     if (CanHaveIndex && matchCombinedBaseIndex(VMetadata, &BaseVar, &OffsetReg,
5826                                                OffsetRegShamt, &Reason)) {
5827       continue;
5828     }
5829 
5830     if (CanHaveShiftedIndex) {
5831       if (matchShiftedOffsetReg(VMetadata, &OffsetReg, &ShiftKind,
5832                                 &OffsetRegShamt, &Reason)) {
5833         continue;
5834       }
5835 
5836       if ((OffsetRegShamt == 0) &&
5837           matchShiftedOffsetReg(VMetadata, &BaseVar, &ShiftKind,
5838                                 &OffsetRegShamt, &Reason)) {
5839         std::swap(BaseVar, OffsetReg);
5840         continue;
5841       }
5842     }
5843 
5844     if (matchOffsetBase(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5845       continue;
5846     }
5847   } while (Reason);
5848 
5849   if (BaseVar == nullptr) {
5850     // [OffsetReg{, LSL Shamt}{, #OffsetImm}] is not legal in ARM, so we have to
5851     // legalize the addressing mode to [BaseReg, OffsetReg{, LSL Shamt}].
5852     // Instead of a zeroed BaseReg, we initialize it with OffsetImm:
5853     //
5854     // [OffsetReg{, LSL Shamt}{, #OffsetImm}] ->
5855     //     mov BaseReg, #OffsetImm
5856     //     use of [BaseReg, OffsetReg{, LSL Shamt}]
5857     //
5858     const Type PointerType = getPointerType();
5859     BaseVar = makeReg(PointerType);
5860     Context.insert<InstAssign>(BaseVar, Ctx->getConstantInt32(OffsetImm));
5861     OffsetImm = 0;
5862   } else if (OffsetImm != 0) {
5863     // ARM Ldr/Str instructions have limited range immediates. The formation
5864     // loop above materialized an Immediate carelessly, so we ensure the
5865     // generated offset is sane.
5866     const int32_t PositiveOffset = OffsetImm > 0 ? OffsetImm : -OffsetImm;
5867     const InstArithmetic::OpKind Op =
5868         OffsetImm > 0 ? InstArithmetic::Add : InstArithmetic::Sub;
5869 
5870     if (!CanHaveImm || !isLegalMemOffset(Ty, OffsetImm) ||
5871         OffsetReg != nullptr) {
5872       if (OffsetReg == nullptr) {
5873         // We formed a [Base, #const] addressing mode which is not encodable in
5874         // ARM. There is little point in forming an address mode now if we don't
5875         // have an offset. Effectively, we would end up with something like
5876         //
5877         // [Base, #const] -> add T, Base, #const
5878         //                   use of [T]
5879         //
5880         // Which is exactly what we already have. So we just bite the bullet
5881         // here and don't form any address mode.
5882         return nullptr;
5883       }
5884       // We formed [Base, Offset {, LSL Amnt}, #const]. Oops. Legalize it to
5885       //
5886       // [Base, Offset, {LSL amount}, #const] ->
5887       //      add T, Base, #const
5888       //      use of [T, Offset {, LSL amount}]
5889       const Type PointerType = getPointerType();
5890       Variable *T = makeReg(PointerType);
5891       Context.insert<InstArithmetic>(Op, T, BaseVar,
5892                                      Ctx->getConstantInt32(PositiveOffset));
5893       BaseVar = T;
5894       OffsetImm = 0;
5895     }
5896   }
5897 
5898   assert(BaseVar != nullptr);
5899   assert(OffsetImm == 0 || OffsetReg == nullptr);
5900   assert(OffsetReg == nullptr || CanHaveIndex);
5901   assert(OffsetImm < 0 ? (ValidImmMask & -OffsetImm) == -OffsetImm
5902                        : (ValidImmMask & OffsetImm) == OffsetImm);
5903 
5904   if (OffsetReg != nullptr) {
5905     Variable *OffsetR = makeReg(getPointerType());
5906     Context.insert<InstAssign>(OffsetR, OffsetReg);
5907     return OperandARM32Mem::create(Func, Ty, BaseVar, OffsetR, ShiftKind,
5908                                    OffsetRegShamt);
5909   }
5910 
5911   return OperandARM32Mem::create(
5912       Func, Ty, BaseVar,
5913       llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetImm)));
5914 }
5915 
doAddressOptLoad()5916 void TargetARM32::doAddressOptLoad() {
5917   Inst *Instr = iteratorToInst(Context.getCur());
5918   assert(llvm::isa<InstLoad>(Instr));
5919   Variable *Dest = Instr->getDest();
5920   Operand *Addr = Instr->getSrc(0);
5921   if (OperandARM32Mem *Mem =
5922           formAddressingMode(Dest->getType(), Func, Instr, Addr)) {
5923     Instr->setDeleted();
5924     Context.insert<InstLoad>(Dest, Mem);
5925   }
5926 }
5927 
randomlyInsertNop(float Probability,RandomNumberGenerator & RNG)5928 void TargetARM32::randomlyInsertNop(float Probability,
5929                                     RandomNumberGenerator &RNG) {
5930   RandomNumberGeneratorWrapper RNGW(RNG);
5931   if (RNGW.getTrueWithProbability(Probability)) {
5932     _nop();
5933   }
5934 }
5935 
lowerPhi(const InstPhi *)5936 void TargetARM32::lowerPhi(const InstPhi * /*Instr*/) {
5937   Func->setError("Phi found in regular instruction list");
5938 }
5939 
lowerRet(const InstRet * Instr)5940 void TargetARM32::lowerRet(const InstRet *Instr) {
5941   Variable *Reg = nullptr;
5942   if (Instr->hasRetValue()) {
5943     Operand *Src0 = Instr->getRetValue();
5944     Type Ty = Src0->getType();
5945     if (Ty == IceType_i64) {
5946       Src0 = legalizeUndef(Src0);
5947       Variable *R0 = legalizeToReg(loOperand(Src0), RegARM32::Reg_r0);
5948       Variable *R1 = legalizeToReg(hiOperand(Src0), RegARM32::Reg_r1);
5949       Reg = R0;
5950       Context.insert<InstFakeUse>(R1);
5951     } else if (Ty == IceType_f32) {
5952       Variable *S0 = legalizeToReg(Src0, RegARM32::Reg_s0);
5953       Reg = S0;
5954     } else if (Ty == IceType_f64) {
5955       Variable *D0 = legalizeToReg(Src0, RegARM32::Reg_d0);
5956       Reg = D0;
5957     } else if (isVectorType(Src0->getType())) {
5958       Variable *Q0 = legalizeToReg(Src0, RegARM32::Reg_q0);
5959       Reg = Q0;
5960     } else {
5961       Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex);
5962       Reg = makeReg(Src0F->getType(), RegARM32::Reg_r0);
5963       _mov(Reg, Src0F, CondARM32::AL);
5964     }
5965   }
5966   // Add a ret instruction even if sandboxing is enabled, because addEpilog
5967   // explicitly looks for a ret instruction as a marker for where to insert the
5968   // frame removal instructions. addEpilog is responsible for restoring the
5969   // "lr" register as needed prior to this ret instruction.
5970   _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg);
5971 
5972   // Add a fake use of sp to make sure sp stays alive for the entire function.
5973   // Otherwise post-call sp adjustments get dead-code eliminated.
5974   // TODO: Are there more places where the fake use should be inserted? E.g.
5975   // "void f(int n){while(1) g(n);}" may not have a ret instruction.
5976   Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
5977   Context.insert<InstFakeUse>(SP);
5978 }
5979 
lowerShuffleVector(const InstShuffleVector * Instr)5980 void TargetARM32::lowerShuffleVector(const InstShuffleVector *Instr) {
5981   auto *Dest = Instr->getDest();
5982   const Type DestTy = Dest->getType();
5983 
5984   auto *T = makeReg(DestTy);
5985   auto *Src0 = Instr->getSrc(0);
5986   auto *Src1 = Instr->getSrc(1);
5987   const SizeT NumElements = typeNumElements(DestTy);
5988   const Type ElementType = typeElementType(DestTy);
5989 
5990   bool Replicate = true;
5991   for (SizeT I = 1; Replicate && I < Instr->getNumIndexes(); ++I) {
5992     if (Instr->getIndexValue(I) != Instr->getIndexValue(0)) {
5993       Replicate = false;
5994     }
5995   }
5996 
5997   if (Replicate) {
5998     Variable *Src0Var = legalizeToReg(Src0);
5999     _vdup(T, Src0Var, Instr->getIndexValue(0));
6000     _mov(Dest, T);
6001     return;
6002   }
6003 
6004   switch (DestTy) {
6005   case IceType_v8i1:
6006   case IceType_v8i16: {
6007     static constexpr SizeT ExpectedNumElements = 8;
6008     assert(ExpectedNumElements == Instr->getNumIndexes());
6009     (void)ExpectedNumElements;
6010 
6011     if (Instr->indexesAre(0, 0, 1, 1, 2, 2, 3, 3)) {
6012       Variable *Src0R = legalizeToReg(Src0);
6013       _vzip(T, Src0R, Src0R);
6014       _mov(Dest, T);
6015       return;
6016     }
6017 
6018     if (Instr->indexesAre(0, 8, 1, 9, 2, 10, 3, 11)) {
6019       Variable *Src0R = legalizeToReg(Src0);
6020       Variable *Src1R = legalizeToReg(Src1);
6021       _vzip(T, Src0R, Src1R);
6022       _mov(Dest, T);
6023       return;
6024     }
6025 
6026     if (Instr->indexesAre(0, 2, 4, 6, 0, 2, 4, 6)) {
6027       Variable *Src0R = legalizeToReg(Src0);
6028       _vqmovn2(T, Src0R, Src0R, false, false);
6029       _mov(Dest, T);
6030       return;
6031     }
6032   } break;
6033   case IceType_v16i1:
6034   case IceType_v16i8: {
6035     static constexpr SizeT ExpectedNumElements = 16;
6036     assert(ExpectedNumElements == Instr->getNumIndexes());
6037     (void)ExpectedNumElements;
6038 
6039     if (Instr->indexesAre(0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7)) {
6040       Variable *Src0R = legalizeToReg(Src0);
6041       _vzip(T, Src0R, Src0R);
6042       _mov(Dest, T);
6043       return;
6044     }
6045 
6046     if (Instr->indexesAre(0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7,
6047                           23)) {
6048       Variable *Src0R = legalizeToReg(Src0);
6049       Variable *Src1R = legalizeToReg(Src1);
6050       _vzip(T, Src0R, Src1R);
6051       _mov(Dest, T);
6052       return;
6053     }
6054   } break;
6055   case IceType_v4i1:
6056   case IceType_v4i32:
6057   case IceType_v4f32: {
6058     static constexpr SizeT ExpectedNumElements = 4;
6059     assert(ExpectedNumElements == Instr->getNumIndexes());
6060     (void)ExpectedNumElements;
6061 
6062     if (Instr->indexesAre(0, 0, 1, 1)) {
6063       Variable *Src0R = legalizeToReg(Src0);
6064       _vzip(T, Src0R, Src0R);
6065       _mov(Dest, T);
6066       return;
6067     }
6068 
6069     if (Instr->indexesAre(0, 4, 1, 5)) {
6070       Variable *Src0R = legalizeToReg(Src0);
6071       Variable *Src1R = legalizeToReg(Src1);
6072       _vzip(T, Src0R, Src1R);
6073       _mov(Dest, T);
6074       return;
6075     }
6076 
6077     if (Instr->indexesAre(0, 1, 4, 5)) {
6078       Variable *Src0R = legalizeToReg(Src0);
6079       Variable *Src1R = legalizeToReg(Src1);
6080       _vmovlh(T, Src0R, Src1R);
6081       _mov(Dest, T);
6082       return;
6083     }
6084 
6085     if (Instr->indexesAre(2, 3, 2, 3)) {
6086       Variable *Src0R = legalizeToReg(Src0);
6087       _vmovhl(T, Src0R, Src0R);
6088       _mov(Dest, T);
6089       return;
6090     }
6091 
6092     if (Instr->indexesAre(2, 3, 6, 7)) {
6093       Variable *Src0R = legalizeToReg(Src0);
6094       Variable *Src1R = legalizeToReg(Src1);
6095       _vmovhl(T, Src1R, Src0R);
6096       _mov(Dest, T);
6097       return;
6098     }
6099   } break;
6100   default:
6101     break;
6102     // TODO(jpp): figure out how to properly lower this without scalarization.
6103   }
6104 
6105   // Unoptimized shuffle. Perform a series of inserts and extracts.
6106   Context.insert<InstFakeDef>(T);
6107   for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) {
6108     auto *Index = Instr->getIndex(I);
6109     const SizeT Elem = Index->getValue();
6110     auto *ExtElmt = makeReg(ElementType);
6111     if (Elem < NumElements) {
6112       lowerExtractElement(
6113           InstExtractElement::create(Func, ExtElmt, Src0, Index));
6114     } else {
6115       lowerExtractElement(InstExtractElement::create(
6116           Func, ExtElmt, Src1,
6117           Ctx->getConstantInt32(Index->getValue() - NumElements)));
6118     }
6119     auto *NewT = makeReg(DestTy);
6120     lowerInsertElement(InstInsertElement::create(Func, NewT, T, ExtElmt,
6121                                                  Ctx->getConstantInt32(I)));
6122     T = NewT;
6123   }
6124   _mov(Dest, T);
6125 }
6126 
lowerSelect(const InstSelect * Instr)6127 void TargetARM32::lowerSelect(const InstSelect *Instr) {
6128   Variable *Dest = Instr->getDest();
6129   Type DestTy = Dest->getType();
6130   Operand *SrcT = Instr->getTrueOperand();
6131   Operand *SrcF = Instr->getFalseOperand();
6132   Operand *Condition = Instr->getCondition();
6133 
6134   if (!isVectorType(DestTy)) {
6135     lowerInt1ForSelect(Dest, Condition, legalizeUndef(SrcT),
6136                        legalizeUndef(SrcF));
6137     return;
6138   }
6139 
6140   Type TType = DestTy;
6141   switch (DestTy) {
6142   default:
6143     llvm::report_fatal_error("Unexpected type for vector select.");
6144   case IceType_v4i1:
6145     TType = IceType_v4i32;
6146     break;
6147   case IceType_v8i1:
6148     TType = IceType_v8i16;
6149     break;
6150   case IceType_v16i1:
6151     TType = IceType_v16i8;
6152     break;
6153   case IceType_v4f32:
6154     TType = IceType_v4i32;
6155     break;
6156   case IceType_v4i32:
6157   case IceType_v8i16:
6158   case IceType_v16i8:
6159     break;
6160   }
6161   auto *T = makeReg(TType);
6162   lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
6163   auto *SrcTR = legalizeToReg(SrcT);
6164   auto *SrcFR = legalizeToReg(SrcF);
6165   _vbsl(T, SrcTR, SrcFR)->setDestRedefined();
6166   _mov(Dest, T);
6167 }
6168 
lowerStore(const InstStore * Instr)6169 void TargetARM32::lowerStore(const InstStore *Instr) {
6170   Operand *Value = Instr->getData();
6171   Operand *Addr = Instr->getAddr();
6172   OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
6173   Type Ty = NewAddr->getType();
6174 
6175   if (Ty == IceType_i64) {
6176     Value = legalizeUndef(Value);
6177     Variable *ValueHi = legalizeToReg(hiOperand(Value));
6178     Variable *ValueLo = legalizeToReg(loOperand(Value));
6179     _str(ValueHi, llvm::cast<OperandARM32Mem>(hiOperand(NewAddr)));
6180     _str(ValueLo, llvm::cast<OperandARM32Mem>(loOperand(NewAddr)));
6181   } else {
6182     Variable *ValueR = legalizeToReg(Value);
6183     _str(ValueR, NewAddr);
6184   }
6185 }
6186 
doAddressOptStore()6187 void TargetARM32::doAddressOptStore() {
6188   Inst *Instr = iteratorToInst(Context.getCur());
6189   assert(llvm::isa<InstStore>(Instr));
6190   Operand *Src = Instr->getSrc(0);
6191   Operand *Addr = Instr->getSrc(1);
6192   if (OperandARM32Mem *Mem =
6193           formAddressingMode(Src->getType(), Func, Instr, Addr)) {
6194     Instr->setDeleted();
6195     Context.insert<InstStore>(Src, Mem);
6196   }
6197 }
6198 
lowerSwitch(const InstSwitch * Instr)6199 void TargetARM32::lowerSwitch(const InstSwitch *Instr) {
6200   // This implements the most naive possible lowering.
6201   // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
6202   Operand *Src0 = Instr->getComparison();
6203   SizeT NumCases = Instr->getNumCases();
6204   if (Src0->getType() == IceType_i64) {
6205     Src0 = legalizeUndef(Src0);
6206     Variable *Src0Lo = legalizeToReg(loOperand(Src0));
6207     Variable *Src0Hi = legalizeToReg(hiOperand(Src0));
6208     for (SizeT I = 0; I < NumCases; ++I) {
6209       Operand *ValueLo = Ctx->getConstantInt32(Instr->getValue(I));
6210       Operand *ValueHi = Ctx->getConstantInt32(Instr->getValue(I) >> 32);
6211       ValueLo = legalize(ValueLo, Legal_Reg | Legal_Flex);
6212       ValueHi = legalize(ValueHi, Legal_Reg | Legal_Flex);
6213       _cmp(Src0Lo, ValueLo);
6214       _cmp(Src0Hi, ValueHi, CondARM32::EQ);
6215       _br(Instr->getLabel(I), CondARM32::EQ);
6216     }
6217     _br(Instr->getLabelDefault());
6218     return;
6219   }
6220 
6221   Variable *Src0Var = legalizeToReg(Src0);
6222   // If Src0 is not an i32, we left shift it -- see the icmp lowering for the
6223   // reason.
6224   assert(Src0Var->mustHaveReg());
6225   const size_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());
6226   assert(ShiftAmt < 32);
6227   if (ShiftAmt > 0) {
6228     Operand *ShAmtImm = shAmtImm(ShiftAmt);
6229     Variable *T = makeReg(IceType_i32);
6230     _lsl(T, Src0Var, ShAmtImm);
6231     Src0Var = T;
6232   }
6233 
6234   for (SizeT I = 0; I < NumCases; ++I) {
6235     Operand *Value = Ctx->getConstantInt32(Instr->getValue(I) << ShiftAmt);
6236     Value = legalize(Value, Legal_Reg | Legal_Flex);
6237     _cmp(Src0Var, Value);
6238     _br(Instr->getLabel(I), CondARM32::EQ);
6239   }
6240   _br(Instr->getLabelDefault());
6241 }
6242 
lowerBreakpoint(const InstBreakpoint * Instr)6243 void TargetARM32::lowerBreakpoint(const InstBreakpoint *Instr) {
6244   UnimplementedLoweringError(this, Instr);
6245 }
6246 
lowerUnreachable(const InstUnreachable *)6247 void TargetARM32::lowerUnreachable(const InstUnreachable * /*Instr*/) {
6248   _trap();
6249 }
6250 
6251 namespace {
6252 // Returns whether Opnd needs the GOT address. Currently, ConstantRelocatables,
6253 // and fp constants will need access to the GOT address.
operandNeedsGot(const Operand * Opnd)6254 bool operandNeedsGot(const Operand *Opnd) {
6255   if (llvm::isa<ConstantRelocatable>(Opnd)) {
6256     return true;
6257   }
6258 
6259   if (llvm::isa<ConstantFloat>(Opnd)) {
6260     uint32_t _;
6261     return !OperandARM32FlexFpImm::canHoldImm(Opnd, &_);
6262   }
6263 
6264   const auto *F64 = llvm::dyn_cast<ConstantDouble>(Opnd);
6265   if (F64 != nullptr) {
6266     uint32_t _;
6267     return !OperandARM32FlexFpImm::canHoldImm(Opnd, &_) &&
6268            !isFloatingPointZero(F64);
6269   }
6270 
6271   return false;
6272 }
6273 
6274 // Returns whether Phi needs the GOT address (which it does if any of its
6275 // operands needs the GOT address.)
phiNeedsGot(const InstPhi * Phi)6276 bool phiNeedsGot(const InstPhi *Phi) {
6277   if (Phi->isDeleted()) {
6278     return false;
6279   }
6280 
6281   for (SizeT I = 0; I < Phi->getSrcSize(); ++I) {
6282     if (operandNeedsGot(Phi->getSrc(I))) {
6283       return true;
6284     }
6285   }
6286 
6287   return false;
6288 }
6289 
6290 // Returns whether **any** phi in Node needs the GOT address.
anyPhiInNodeNeedsGot(CfgNode * Node)6291 bool anyPhiInNodeNeedsGot(CfgNode *Node) {
6292   for (auto &Inst : Node->getPhis()) {
6293     if (phiNeedsGot(llvm::cast<InstPhi>(&Inst))) {
6294       return true;
6295     }
6296   }
6297   return false;
6298 }
6299 
6300 } // end of anonymous namespace
6301 
prelowerPhis()6302 void TargetARM32::prelowerPhis() {
6303   CfgNode *Node = Context.getNode();
6304 
6305   if (SandboxingType == ST_Nonsfi) {
6306     assert(GotPtr != nullptr);
6307     if (anyPhiInNodeNeedsGot(Node)) {
6308       // If any phi instruction needs the GOT address, we place a
6309       //   fake-use GotPtr
6310       // in Node to prevent the GotPtr's initialization from being dead code
6311       // eliminated.
6312       Node->getInsts().push_front(InstFakeUse::create(Func, GotPtr));
6313     }
6314   }
6315 
6316   PhiLowering::prelowerPhis32Bit(this, Node, Func);
6317 }
6318 
makeVectorOfZeros(Type Ty,RegNumT RegNum)6319 Variable *TargetARM32::makeVectorOfZeros(Type Ty, RegNumT RegNum) {
6320   Variable *Reg = makeReg(Ty, RegNum);
6321   Context.insert<InstFakeDef>(Reg);
6322   assert(isVectorType(Ty));
6323   _veor(Reg, Reg, Reg);
6324   return Reg;
6325 }
6326 
6327 // Helper for legalize() to emit the right code to lower an operand to a
6328 // register of the appropriate type.
copyToReg(Operand * Src,RegNumT RegNum)6329 Variable *TargetARM32::copyToReg(Operand *Src, RegNumT RegNum) {
6330   Type Ty = Src->getType();
6331   Variable *Reg = makeReg(Ty, RegNum);
6332   if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Src)) {
6333     _ldr(Reg, Mem);
6334   } else {
6335     _mov(Reg, Src);
6336   }
6337   return Reg;
6338 }
6339 
6340 // TODO(jpp): remove unneeded else clauses in legalize.
legalize(Operand * From,LegalMask Allowed,RegNumT RegNum)6341 Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
6342                                RegNumT RegNum) {
6343   Type Ty = From->getType();
6344   // Assert that a physical register is allowed. To date, all calls to
6345   // legalize() allow a physical register. Legal_Flex converts registers to the
6346   // right type OperandARM32FlexReg as needed.
6347   assert(Allowed & Legal_Reg);
6348 
6349   // Copied ipsis literis from TargetX86Base<Machine>.
6350   if (RegNum.hasNoValue()) {
6351     if (Variable *Subst = getContext().availabilityGet(From)) {
6352       // At this point we know there is a potential substitution available.
6353       if (!Subst->isRematerializable() && Subst->mustHaveReg() &&
6354           !Subst->hasReg()) {
6355         // At this point we know the substitution will have a register.
6356         if (From->getType() == Subst->getType()) {
6357           // At this point we know the substitution's register is compatible.
6358           return Subst;
6359         }
6360       }
6361     }
6362   }
6363 
6364   // Go through the various types of operands: OperandARM32Mem,
6365   // OperandARM32Flex, Constant, and Variable. Given the above assertion, if
6366   // type of operand is not legal (e.g., OperandARM32Mem and !Legal_Mem), we
6367   // can always copy to a register.
6368   if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(From)) {
6369     // Before doing anything with a Mem operand, we need to ensure that the
6370     // Base and Index components are in physical registers.
6371     Variable *Base = Mem->getBase();
6372     Variable *Index = Mem->getIndex();
6373     ConstantInteger32 *Offset = Mem->getOffset();
6374     assert(Index == nullptr || Offset == nullptr);
6375     Variable *RegBase = nullptr;
6376     Variable *RegIndex = nullptr;
6377     assert(Base);
6378     RegBase = llvm::cast<Variable>(
6379         legalize(Base, Legal_Reg | Legal_Rematerializable));
6380     assert(Ty < MemTraitsSize);
6381     if (Index) {
6382       assert(Offset == nullptr);
6383       assert(MemTraits[Ty].CanHaveIndex);
6384       RegIndex = legalizeToReg(Index);
6385     }
6386     if (Offset && Offset->getValue() != 0) {
6387       assert(Index == nullptr);
6388       static constexpr bool ZeroExt = false;
6389       assert(MemTraits[Ty].CanHaveImm);
6390       if (!OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset->getValue())) {
6391         llvm::report_fatal_error("Invalid memory offset.");
6392       }
6393     }
6394 
6395     // Create a new operand if there was a change.
6396     if (Base != RegBase || Index != RegIndex) {
6397       // There is only a reg +/- reg or reg + imm form.
6398       // Figure out which to re-create.
6399       if (RegIndex) {
6400         Mem = OperandARM32Mem::create(Func, Ty, RegBase, RegIndex,
6401                                       Mem->getShiftOp(), Mem->getShiftAmt(),
6402                                       Mem->getAddrMode());
6403       } else {
6404         Mem = OperandARM32Mem::create(Func, Ty, RegBase, Offset,
6405                                       Mem->getAddrMode());
6406       }
6407     }
6408     if (Allowed & Legal_Mem) {
6409       From = Mem;
6410     } else {
6411       Variable *Reg = makeReg(Ty, RegNum);
6412       _ldr(Reg, Mem);
6413       From = Reg;
6414     }
6415     return From;
6416   }
6417 
6418   if (auto *Flex = llvm::dyn_cast<OperandARM32Flex>(From)) {
6419     if (!(Allowed & Legal_Flex)) {
6420       if (auto *FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) {
6421         if (FlexReg->getShiftOp() == OperandARM32::kNoShift) {
6422           From = FlexReg->getReg();
6423           // Fall through and let From be checked as a Variable below, where it
6424           // may or may not need a register.
6425         } else {
6426           return copyToReg(Flex, RegNum);
6427         }
6428       } else {
6429         return copyToReg(Flex, RegNum);
6430       }
6431     } else {
6432       return From;
6433     }
6434   }
6435 
6436   if (llvm::isa<Constant>(From)) {
6437     if (llvm::isa<ConstantUndef>(From)) {
6438       From = legalizeUndef(From, RegNum);
6439       if (isVectorType(Ty))
6440         return From;
6441     }
6442     // There should be no constants of vector type (other than undef).
6443     assert(!isVectorType(Ty));
6444     if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
6445       uint32_t RotateAmt;
6446       uint32_t Immed_8;
6447       uint32_t Value = static_cast<uint32_t>(C32->getValue());
6448       if (OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) {
6449         // The immediate can be encoded as a Flex immediate. We may return the
6450         // Flex operand if the caller has Allow'ed it.
6451         auto *OpF = OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
6452         const bool CanBeFlex = Allowed & Legal_Flex;
6453         if (CanBeFlex)
6454           return OpF;
6455         return copyToReg(OpF, RegNum);
6456       } else if (OperandARM32FlexImm::canHoldImm(~Value, &RotateAmt,
6457                                                  &Immed_8)) {
6458         // Even though the immediate can't be encoded as a Flex operand, its
6459         // inverted bit pattern can, thus we use ARM's mvn to load the 32-bit
6460         // constant with a single instruction.
6461         auto *InvOpF =
6462             OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
6463         Variable *Reg = makeReg(Ty, RegNum);
6464         _mvn(Reg, InvOpF);
6465         return Reg;
6466       } else {
6467         // Do a movw/movt to a register.
6468         Variable *Reg = makeReg(Ty, RegNum);
6469         uint32_t UpperBits = (Value >> 16) & 0xFFFF;
6470         _movw(Reg,
6471               UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32);
6472         if (UpperBits != 0) {
6473           _movt(Reg, Ctx->getConstantInt32(UpperBits));
6474         }
6475         return Reg;
6476       }
6477     } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {
6478       Variable *Reg = makeReg(Ty, RegNum);
6479       if (SandboxingType != ST_Nonsfi) {
6480         _movw(Reg, C);
6481         _movt(Reg, C);
6482       } else {
6483         auto *GotAddr = legalizeToReg(GotPtr);
6484         GlobalString CGotoffName = createGotoffRelocation(C);
6485         loadNamedConstantRelocatablePIC(
6486             CGotoffName, Reg, [this, Reg](Variable *PC) {
6487               _ldr(Reg, OperandARM32Mem::create(Func, IceType_i32, PC, Reg));
6488             });
6489         _add(Reg, GotAddr, Reg);
6490       }
6491       return Reg;
6492     } else {
6493       assert(isScalarFloatingType(Ty));
6494       uint32_t ModifiedImm;
6495       if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) {
6496         Variable *T = makeReg(Ty, RegNum);
6497         _mov(T,
6498              OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm));
6499         return T;
6500       }
6501 
6502       if (Ty == IceType_f64 && isFloatingPointZero(From)) {
6503         // Use T = T ^ T to load a 64-bit fp zero. This does not work for f32
6504         // because ARM does not have a veor instruction with S registers.
6505         Variable *T = makeReg(IceType_f64, RegNum);
6506         Context.insert<InstFakeDef>(T);
6507         _veor(T, T, T);
6508         return T;
6509       }
6510 
6511       // Load floats/doubles from literal pool.
6512       auto *CFrom = llvm::cast<Constant>(From);
6513       assert(CFrom->getShouldBePooled());
6514       Constant *Offset = Ctx->getConstantSym(0, CFrom->getLabelName());
6515       Variable *BaseReg = nullptr;
6516       if (SandboxingType == ST_Nonsfi) {
6517         // vldr does not support the [base, index] addressing mode, so we need
6518         // to legalize Offset to a register. Otherwise, we could simply
6519         //   vldr dest, [got, reg(Offset)]
6520         BaseReg = legalizeToReg(Offset);
6521       } else {
6522         BaseReg = makeReg(getPointerType());
6523         _movw(BaseReg, Offset);
6524         _movt(BaseReg, Offset);
6525       }
6526       From = formMemoryOperand(BaseReg, Ty);
6527       return copyToReg(From, RegNum);
6528     }
6529   }
6530 
6531   if (auto *Var = llvm::dyn_cast<Variable>(From)) {
6532     if (Var->isRematerializable()) {
6533       if (Allowed & Legal_Rematerializable) {
6534         return From;
6535       }
6536 
6537       Variable *T = makeReg(Var->getType(), RegNum);
6538       _mov(T, Var);
6539       return T;
6540     }
6541     // Check if the variable is guaranteed a physical register. This can happen
6542     // either when the variable is pre-colored or when it is assigned infinite
6543     // weight.
6544     bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
6545     // We need a new physical register for the operand if:
6546     //   Mem is not allowed and Var isn't guaranteed a physical
6547     //   register, or
6548     //   RegNum is required and Var->getRegNum() doesn't match.
6549     if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
6550         (RegNum.hasValue() && (RegNum != Var->getRegNum()))) {
6551       From = copyToReg(From, RegNum);
6552     }
6553     return From;
6554   }
6555   llvm::report_fatal_error("Unhandled operand kind in legalize()");
6556 
6557   return From;
6558 }
6559 
6560 /// Provide a trivial wrapper to legalize() for this common usage.
legalizeToReg(Operand * From,RegNumT RegNum)6561 Variable *TargetARM32::legalizeToReg(Operand *From, RegNumT RegNum) {
6562   return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
6563 }
6564 
6565 /// Legalize undef values to concrete values.
legalizeUndef(Operand * From,RegNumT RegNum)6566 Operand *TargetARM32::legalizeUndef(Operand *From, RegNumT RegNum) {
6567   Type Ty = From->getType();
6568   if (llvm::isa<ConstantUndef>(From)) {
6569     // Lower undefs to zero. Another option is to lower undefs to an
6570     // uninitialized register; however, using an uninitialized register results
6571     // in less predictable code.
6572     //
6573     // If in the future the implementation is changed to lower undef values to
6574     // uninitialized registers, a FakeDef will be needed:
6575     // Context.insert(InstFakeDef::create(Func, Reg)); This is in order to
6576     // ensure that the live range of Reg is not overestimated. If the constant
6577     // being lowered is a 64 bit value, then the result should be split and the
6578     // lo and hi components will need to go in uninitialized registers.
6579     if (isVectorType(Ty))
6580       return makeVectorOfZeros(Ty, RegNum);
6581     return Ctx->getConstantZero(Ty);
6582   }
6583   return From;
6584 }
6585 
formMemoryOperand(Operand * Operand,Type Ty)6586 OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) {
6587   auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand);
6588   // It may be the case that address mode optimization already creates an
6589   // OperandARM32Mem, so in that case it wouldn't need another level of
6590   // transformation.
6591   if (Mem) {
6592     return llvm::cast<OperandARM32Mem>(legalize(Mem));
6593   }
6594   // If we didn't do address mode optimization, then we only have a
6595   // base/offset to work with. ARM always requires a base register, so
6596   // just use that to hold the operand.
6597   auto *Base = llvm::cast<Variable>(
6598       legalize(Operand, Legal_Reg | Legal_Rematerializable));
6599   return OperandARM32Mem::create(
6600       Func, Ty, Base,
6601       llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
6602 }
6603 
makeI64RegPair()6604 Variable64On32 *TargetARM32::makeI64RegPair() {
6605   Variable64On32 *Reg =
6606       llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
6607   Reg->setMustHaveReg();
6608   Reg->initHiLo(Func);
6609   Reg->getLo()->setMustNotHaveReg();
6610   Reg->getHi()->setMustNotHaveReg();
6611   return Reg;
6612 }
6613 
makeReg(Type Type,RegNumT RegNum)6614 Variable *TargetARM32::makeReg(Type Type, RegNumT RegNum) {
6615   // There aren't any 64-bit integer registers for ARM32.
6616   assert(Type != IceType_i64);
6617   assert(AllowTemporaryWithNoReg || RegNum.hasValue());
6618   Variable *Reg = Func->makeVariable(Type);
6619   if (RegNum.hasValue())
6620     Reg->setRegNum(RegNum);
6621   else
6622     Reg->setMustHaveReg();
6623   return Reg;
6624 }
6625 
alignRegisterPow2(Variable * Reg,uint32_t Align,RegNumT TmpRegNum)6626 void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align,
6627                                     RegNumT TmpRegNum) {
6628   assert(llvm::isPowerOf2_32(Align));
6629   uint32_t RotateAmt;
6630   uint32_t Immed_8;
6631   Operand *Mask;
6632   // Use AND or BIC to mask off the bits, depending on which immediate fits (if
6633   // it fits at all). Assume Align is usually small, in which case BIC works
6634   // better. Thus, this rounds down to the alignment.
6635   if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) {
6636     Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex,
6637                     TmpRegNum);
6638     _bic(Reg, Reg, Mask);
6639   } else {
6640     Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex,
6641                     TmpRegNum);
6642     _and(Reg, Reg, Mask);
6643   }
6644 }
6645 
postLower()6646 void TargetARM32::postLower() {
6647   if (Func->getOptLevel() == Opt_m1)
6648     return;
6649   markRedefinitions();
6650   Context.availabilityUpdate();
6651 }
6652 
makeRandomRegisterPermutation(llvm::SmallVectorImpl<RegNumT> & Permutation,const SmallBitVector & ExcludeRegisters,uint64_t Salt) const6653 void TargetARM32::makeRandomRegisterPermutation(
6654     llvm::SmallVectorImpl<RegNumT> &Permutation,
6655     const SmallBitVector &ExcludeRegisters, uint64_t Salt) const {
6656   (void)Permutation;
6657   (void)ExcludeRegisters;
6658   (void)Salt;
6659   UnimplementedError(getFlags());
6660 }
6661 
emit(const ConstantInteger32 * C) const6662 void TargetARM32::emit(const ConstantInteger32 *C) const {
6663   if (!BuildDefs::dump())
6664     return;
6665   Ostream &Str = Ctx->getStrEmit();
6666   Str << "#" << C->getValue();
6667 }
6668 
emit(const ConstantInteger64 *) const6669 void TargetARM32::emit(const ConstantInteger64 *) const {
6670   llvm::report_fatal_error("Not expecting to emit 64-bit integers");
6671 }
6672 
emit(const ConstantFloat * C) const6673 void TargetARM32::emit(const ConstantFloat *C) const {
6674   (void)C;
6675   UnimplementedError(getFlags());
6676 }
6677 
emit(const ConstantDouble * C) const6678 void TargetARM32::emit(const ConstantDouble *C) const {
6679   (void)C;
6680   UnimplementedError(getFlags());
6681 }
6682 
emit(const ConstantUndef *) const6683 void TargetARM32::emit(const ConstantUndef *) const {
6684   llvm::report_fatal_error("undef value encountered by emitter.");
6685 }
6686 
emit(const ConstantRelocatable * C) const6687 void TargetARM32::emit(const ConstantRelocatable *C) const {
6688   if (!BuildDefs::dump())
6689     return;
6690   Ostream &Str = Ctx->getStrEmit();
6691   Str << "#";
6692   emitWithoutPrefix(C);
6693 }
6694 
lowerInt1ForSelect(Variable * Dest,Operand * Boolean,Operand * TrueValue,Operand * FalseValue)6695 void TargetARM32::lowerInt1ForSelect(Variable *Dest, Operand *Boolean,
6696                                      Operand *TrueValue, Operand *FalseValue) {
6697   Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
6698 
6699   assert(Boolean->getType() == IceType_i1);
6700 
6701   bool NeedsAnd1 = false;
6702   if (TrueValue->getType() == IceType_i1) {
6703     assert(FalseValue->getType() == IceType_i1);
6704 
6705     Variable *TrueValueV = Func->makeVariable(IceType_i1);
6706     SafeBoolChain Src0Safe = lowerInt1(TrueValueV, TrueValue);
6707     TrueValue = TrueValueV;
6708 
6709     Variable *FalseValueV = Func->makeVariable(IceType_i1);
6710     SafeBoolChain Src1Safe = lowerInt1(FalseValueV, FalseValue);
6711     FalseValue = FalseValueV;
6712 
6713     NeedsAnd1 = Src0Safe == SBC_No || Src1Safe == SBC_No;
6714   }
6715 
6716   Variable *DestLo = (Dest->getType() == IceType_i64)
6717                          ? llvm::cast<Variable>(loOperand(Dest))
6718                          : Dest;
6719   Variable *DestHi = (Dest->getType() == IceType_i64)
6720                          ? llvm::cast<Variable>(hiOperand(Dest))
6721                          : nullptr;
6722   Operand *FalseValueLo = (FalseValue->getType() == IceType_i64)
6723                               ? loOperand(FalseValue)
6724                               : FalseValue;
6725   Operand *FalseValueHi =
6726       (FalseValue->getType() == IceType_i64) ? hiOperand(FalseValue) : nullptr;
6727 
6728   Operand *TrueValueLo =
6729       (TrueValue->getType() == IceType_i64) ? loOperand(TrueValue) : TrueValue;
6730   Operand *TrueValueHi =
6731       (TrueValue->getType() == IceType_i64) ? hiOperand(TrueValue) : nullptr;
6732 
6733   Variable *T_Lo = makeReg(DestLo->getType());
6734   Variable *T_Hi = (DestHi == nullptr) ? nullptr : makeReg(DestHi->getType());
6735 
6736   _mov(T_Lo, legalize(FalseValueLo, Legal_Reg | Legal_Flex));
6737   if (DestHi) {
6738     _mov(T_Hi, legalize(FalseValueHi, Legal_Reg | Legal_Flex));
6739   }
6740 
6741   CondWhenTrue Cond(CondARM32::kNone);
6742   // FlagsWereSet is used to determine wether Boolean was folded or not. If not,
6743   // add an explicit _tst instruction below.
6744   bool FlagsWereSet = false;
6745   if (const Inst *Producer = Computations.getProducerOf(Boolean)) {
6746     switch (Producer->getKind()) {
6747     default:
6748       llvm::report_fatal_error("Unexpected producer.");
6749     case Inst::Icmp: {
6750       Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer));
6751       FlagsWereSet = true;
6752     } break;
6753     case Inst::Fcmp: {
6754       Cond = lowerFcmpCond(llvm::cast<InstFcmp>(Producer));
6755       FlagsWereSet = true;
6756     } break;
6757     case Inst::Cast: {
6758       const auto *CastProducer = llvm::cast<InstCast>(Producer);
6759       assert(CastProducer->getCastKind() == InstCast::Trunc);
6760       Boolean = CastProducer->getSrc(0);
6761       // No flags were set, so a _tst(Src, 1) will be emitted below. Don't
6762       // bother legalizing Src to a Reg because it will be legalized before
6763       // emitting the tst instruction.
6764       FlagsWereSet = false;
6765     } break;
6766     case Inst::Arithmetic: {
6767       // This is a special case: we eagerly assumed Producer could be folded,
6768       // but in reality, it can't. No reason to panic: we just lower it using
6769       // the regular lowerArithmetic helper.
6770       const auto *ArithProducer = llvm::cast<InstArithmetic>(Producer);
6771       lowerArithmetic(ArithProducer);
6772       Boolean = ArithProducer->getDest();
6773       // No flags were set, so a _tst(Dest, 1) will be emitted below. Don't
6774       // bother legalizing Dest to a Reg because it will be legalized before
6775       // emitting  the tst instruction.
6776       FlagsWereSet = false;
6777     } break;
6778     }
6779   }
6780 
6781   if (!FlagsWereSet) {
6782     // No flags have been set, so emit a tst Boolean, 1.
6783     Variable *Src = legalizeToReg(Boolean);
6784     _tst(Src, _1);
6785     Cond = CondWhenTrue(CondARM32::NE); // i.e., CondARM32::NotZero.
6786   }
6787 
6788   if (Cond.WhenTrue0 == CondARM32::kNone) {
6789     assert(Cond.WhenTrue1 == CondARM32::kNone);
6790   } else {
6791     _mov_redefined(T_Lo, legalize(TrueValueLo, Legal_Reg | Legal_Flex),
6792                    Cond.WhenTrue0);
6793     if (DestHi) {
6794       _mov_redefined(T_Hi, legalize(TrueValueHi, Legal_Reg | Legal_Flex),
6795                      Cond.WhenTrue0);
6796     }
6797   }
6798 
6799   if (Cond.WhenTrue1 != CondARM32::kNone) {
6800     _mov_redefined(T_Lo, legalize(TrueValueLo, Legal_Reg | Legal_Flex),
6801                    Cond.WhenTrue1);
6802     if (DestHi) {
6803       _mov_redefined(T_Hi, legalize(TrueValueHi, Legal_Reg | Legal_Flex),
6804                      Cond.WhenTrue1);
6805     }
6806   }
6807 
6808   if (NeedsAnd1) {
6809     // We lowered something that is unsafe (i.e., can't provably be zero or
6810     // one). Truncate the result.
6811     _and(T_Lo, T_Lo, _1);
6812   }
6813 
6814   _mov(DestLo, T_Lo);
6815   if (DestHi) {
6816     _mov(DestHi, T_Hi);
6817   }
6818 }
6819 
lowerInt1(Variable * Dest,Operand * Boolean)6820 TargetARM32::SafeBoolChain TargetARM32::lowerInt1(Variable *Dest,
6821                                                   Operand *Boolean) {
6822   assert(Boolean->getType() == IceType_i1);
6823   Variable *T = makeReg(IceType_i1);
6824   Operand *_0 =
6825       legalize(Ctx->getConstantZero(IceType_i1), Legal_Reg | Legal_Flex);
6826   Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
6827 
6828   SafeBoolChain Safe = SBC_Yes;
6829   if (const Inst *Producer = Computations.getProducerOf(Boolean)) {
6830     switch (Producer->getKind()) {
6831     default:
6832       llvm::report_fatal_error("Unexpected producer.");
6833     case Inst::Icmp: {
6834       _mov(T, _0);
6835       CondWhenTrue Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer));
6836       assert(Cond.WhenTrue0 != CondARM32::AL);
6837       assert(Cond.WhenTrue0 != CondARM32::kNone);
6838       assert(Cond.WhenTrue1 == CondARM32::kNone);
6839       _mov_redefined(T, _1, Cond.WhenTrue0);
6840     } break;
6841     case Inst::Fcmp: {
6842       _mov(T, _0);
6843       Inst *MovZero = Context.getLastInserted();
6844       CondWhenTrue Cond = lowerFcmpCond(llvm::cast<InstFcmp>(Producer));
6845       if (Cond.WhenTrue0 == CondARM32::AL) {
6846         assert(Cond.WhenTrue1 == CondARM32::kNone);
6847         MovZero->setDeleted();
6848         _mov(T, _1);
6849       } else if (Cond.WhenTrue0 != CondARM32::kNone) {
6850         _mov_redefined(T, _1, Cond.WhenTrue0);
6851       }
6852       if (Cond.WhenTrue1 != CondARM32::kNone) {
6853         assert(Cond.WhenTrue0 != CondARM32::kNone);
6854         assert(Cond.WhenTrue0 != CondARM32::AL);
6855         _mov_redefined(T, _1, Cond.WhenTrue1);
6856       }
6857     } break;
6858     case Inst::Cast: {
6859       const auto *CastProducer = llvm::cast<InstCast>(Producer);
6860       assert(CastProducer->getCastKind() == InstCast::Trunc);
6861       Operand *Src = CastProducer->getSrc(0);
6862       if (Src->getType() == IceType_i64)
6863         Src = loOperand(Src);
6864       _mov(T, legalize(Src, Legal_Reg | Legal_Flex));
6865       Safe = SBC_No;
6866     } break;
6867     case Inst::Arithmetic: {
6868       const auto *ArithProducer = llvm::cast<InstArithmetic>(Producer);
6869       Safe = lowerInt1Arithmetic(ArithProducer);
6870       _mov(T, ArithProducer->getDest());
6871     } break;
6872     }
6873   } else {
6874     _mov(T, legalize(Boolean, Legal_Reg | Legal_Flex));
6875   }
6876 
6877   _mov(Dest, T);
6878   return Safe;
6879 }
6880 
6881 namespace {
6882 namespace BoolFolding {
shouldTrackProducer(const Inst & Instr)6883 bool shouldTrackProducer(const Inst &Instr) {
6884   switch (Instr.getKind()) {
6885   default:
6886     return false;
6887   case Inst::Icmp:
6888   case Inst::Fcmp:
6889     return true;
6890   case Inst::Cast: {
6891     switch (llvm::cast<InstCast>(&Instr)->getCastKind()) {
6892     default:
6893       return false;
6894     case InstCast::Trunc:
6895       return true;
6896     }
6897   }
6898   case Inst::Arithmetic: {
6899     switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6900     default:
6901       return false;
6902     case InstArithmetic::And:
6903     case InstArithmetic::Or:
6904       return true;
6905     }
6906   }
6907   }
6908 }
6909 
isValidConsumer(const Inst & Instr)6910 bool isValidConsumer(const Inst &Instr) {
6911   switch (Instr.getKind()) {
6912   default:
6913     return false;
6914   case Inst::Br:
6915     return true;
6916   case Inst::Select:
6917     return !isVectorType(Instr.getDest()->getType());
6918   case Inst::Cast: {
6919     switch (llvm::cast<InstCast>(&Instr)->getCastKind()) {
6920     default:
6921       return false;
6922     case InstCast::Sext:
6923       return !isVectorType(Instr.getDest()->getType());
6924     case InstCast::Zext:
6925       return !isVectorType(Instr.getDest()->getType());
6926     }
6927   }
6928   case Inst::Arithmetic: {
6929     switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6930     default:
6931       return false;
6932     case InstArithmetic::And:
6933       return !isVectorType(Instr.getDest()->getType());
6934     case InstArithmetic::Or:
6935       return !isVectorType(Instr.getDest()->getType());
6936     }
6937   }
6938   }
6939 }
6940 } // end of namespace BoolFolding
6941 
6942 namespace FpFolding {
shouldTrackProducer(const Inst & Instr)6943 bool shouldTrackProducer(const Inst &Instr) {
6944   switch (Instr.getKind()) {
6945   default:
6946     return false;
6947   case Inst::Arithmetic: {
6948     switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6949     default:
6950       return false;
6951     case InstArithmetic::Fmul:
6952       return true;
6953     }
6954   }
6955   }
6956 }
6957 
isValidConsumer(const Inst & Instr)6958 bool isValidConsumer(const Inst &Instr) {
6959   switch (Instr.getKind()) {
6960   default:
6961     return false;
6962   case Inst::Arithmetic: {
6963     switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6964     default:
6965       return false;
6966     case InstArithmetic::Fadd:
6967     case InstArithmetic::Fsub:
6968       return true;
6969     }
6970   }
6971   }
6972 }
6973 } // end of namespace FpFolding
6974 
6975 namespace IntFolding {
shouldTrackProducer(const Inst & Instr)6976 bool shouldTrackProducer(const Inst &Instr) {
6977   switch (Instr.getKind()) {
6978   default:
6979     return false;
6980   case Inst::Arithmetic: {
6981     switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6982     default:
6983       return false;
6984     case InstArithmetic::Mul:
6985       return true;
6986     }
6987   }
6988   }
6989 }
6990 
isValidConsumer(const Inst & Instr)6991 bool isValidConsumer(const Inst &Instr) {
6992   switch (Instr.getKind()) {
6993   default:
6994     return false;
6995   case Inst::Arithmetic: {
6996     switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6997     default:
6998       return false;
6999     case InstArithmetic::Add:
7000     case InstArithmetic::Sub:
7001       return true;
7002     }
7003   }
7004   }
7005 }
7006 } // namespace IntFolding
7007 } // end of anonymous namespace
7008 
recordProducers(CfgNode * Node)7009 void TargetARM32::ComputationTracker::recordProducers(CfgNode *Node) {
7010   for (Inst &Instr : Node->getInsts()) {
7011     // Check whether Instr is a valid producer.
7012     Variable *Dest = Instr.getDest();
7013     if (!Instr.isDeleted() // only consider non-deleted instructions; and
7014         && Dest            // only instructions with an actual dest var; and
7015         && Dest->getType() == IceType_i1 // only bool-type dest vars; and
7016         && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr.
7017       KnownComputations.emplace(Dest->getIndex(),
7018                                 ComputationEntry(&Instr, IceType_i1));
7019     }
7020     if (!Instr.isDeleted() // only consider non-deleted instructions; and
7021         && Dest            // only instructions with an actual dest var; and
7022         && isScalarFloatingType(Dest->getType()) // fp-type only dest vars; and
7023         && FpFolding::shouldTrackProducer(Instr)) { // white-listed instr.
7024       KnownComputations.emplace(Dest->getIndex(),
7025                                 ComputationEntry(&Instr, Dest->getType()));
7026     }
7027     if (!Instr.isDeleted() // only consider non-deleted instructions; and
7028         && Dest            // only instructions with an actual dest var; and
7029         && Dest->getType() == IceType_i32            // i32 only dest vars; and
7030         && IntFolding::shouldTrackProducer(Instr)) { // white-listed instr.
7031       KnownComputations.emplace(Dest->getIndex(),
7032                                 ComputationEntry(&Instr, IceType_i32));
7033     }
7034     // Check each src variable against the map.
7035     FOREACH_VAR_IN_INST(Var, Instr) {
7036       SizeT VarNum = Var->getIndex();
7037       auto ComputationIter = KnownComputations.find(VarNum);
7038       if (ComputationIter == KnownComputations.end()) {
7039         continue;
7040       }
7041 
7042       ++ComputationIter->second.NumUses;
7043       switch (ComputationIter->second.ComputationType) {
7044       default:
7045         KnownComputations.erase(VarNum);
7046         continue;
7047       case IceType_i1:
7048         if (!BoolFolding::isValidConsumer(Instr)) {
7049           KnownComputations.erase(VarNum);
7050           continue;
7051         }
7052         break;
7053       case IceType_i32:
7054         if (IndexOfVarInInst(Var) != 1 || !IntFolding::isValidConsumer(Instr)) {
7055           KnownComputations.erase(VarNum);
7056           continue;
7057         }
7058         break;
7059       case IceType_f32:
7060       case IceType_f64:
7061         if (IndexOfVarInInst(Var) != 1 || !FpFolding::isValidConsumer(Instr)) {
7062           KnownComputations.erase(VarNum);
7063           continue;
7064         }
7065         break;
7066       }
7067 
7068       if (Instr.isLastUse(Var)) {
7069         ComputationIter->second.IsLiveOut = false;
7070       }
7071     }
7072   }
7073 
7074   for (auto Iter = KnownComputations.begin(), End = KnownComputations.end();
7075        Iter != End;) {
7076     // Disable the folding if its dest may be live beyond this block.
7077     if (Iter->second.IsLiveOut || Iter->second.NumUses > 1) {
7078       Iter = KnownComputations.erase(Iter);
7079       continue;
7080     }
7081 
7082     // Mark as "dead" rather than outright deleting. This is so that other
7083     // peephole style optimizations during or before lowering have access to
7084     // this instruction in undeleted form. See for example
7085     // tryOptimizedCmpxchgCmpBr().
7086     Iter->second.Instr->setDead();
7087     ++Iter;
7088   }
7089 }
7090 
Sandboxer(TargetARM32 * Target,InstBundleLock::Option BundleOption)7091 TargetARM32::Sandboxer::Sandboxer(TargetARM32 *Target,
7092                                   InstBundleLock::Option BundleOption)
7093     : Target(Target), BundleOption(BundleOption) {}
7094 
~Sandboxer()7095 TargetARM32::Sandboxer::~Sandboxer() {}
7096 
7097 namespace {
indirectBranchBicMask(Cfg * Func)7098 OperandARM32FlexImm *indirectBranchBicMask(Cfg *Func) {
7099   constexpr uint32_t Imm8 = 0xFC; // 0xC000000F
7100   constexpr uint32_t RotateAmt = 2;
7101   return OperandARM32FlexImm::create(Func, IceType_i32, Imm8, RotateAmt);
7102 }
7103 
memOpBicMask(Cfg * Func)7104 OperandARM32FlexImm *memOpBicMask(Cfg *Func) {
7105   constexpr uint32_t Imm8 = 0x0C; // 0xC0000000
7106   constexpr uint32_t RotateAmt = 2;
7107   return OperandARM32FlexImm::create(Func, IceType_i32, Imm8, RotateAmt);
7108 }
7109 
baseNeedsBic(Variable * Base)7110 static bool baseNeedsBic(Variable *Base) {
7111   return Base->getRegNum() != RegARM32::Reg_r9 &&
7112          Base->getRegNum() != RegARM32::Reg_sp;
7113 }
7114 } // end of anonymous namespace
7115 
createAutoBundle()7116 void TargetARM32::Sandboxer::createAutoBundle() {
7117   Bundler = makeUnique<AutoBundle>(Target, BundleOption);
7118 }
7119 
add_sp(Operand * AddAmount)7120 void TargetARM32::Sandboxer::add_sp(Operand *AddAmount) {
7121   Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp);
7122   if (!Target->NeedSandboxing) {
7123     Target->_add(SP, SP, AddAmount);
7124     return;
7125   }
7126   createAutoBundle();
7127   Target->_add(SP, SP, AddAmount);
7128   Target->_bic(SP, SP, memOpBicMask(Target->Func));
7129 }
7130 
align_sp(size_t Alignment)7131 void TargetARM32::Sandboxer::align_sp(size_t Alignment) {
7132   Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp);
7133   if (!Target->NeedSandboxing) {
7134     Target->alignRegisterPow2(SP, Alignment);
7135     return;
7136   }
7137   createAutoBundle();
7138   Target->alignRegisterPow2(SP, Alignment);
7139   Target->_bic(SP, SP, memOpBicMask(Target->Func));
7140 }
7141 
bl(Variable * ReturnReg,Operand * CallTarget)7142 InstARM32Call *TargetARM32::Sandboxer::bl(Variable *ReturnReg,
7143                                           Operand *CallTarget) {
7144   if (Target->NeedSandboxing) {
7145     createAutoBundle();
7146     if (auto *CallTargetR = llvm::dyn_cast<Variable>(CallTarget)) {
7147       Target->_bic(CallTargetR, CallTargetR,
7148                    indirectBranchBicMask(Target->Func));
7149     }
7150   }
7151   return Target->Context.insert<InstARM32Call>(ReturnReg, CallTarget);
7152 }
7153 
ldr(Variable * Dest,OperandARM32Mem * Mem,CondARM32::Cond Pred)7154 void TargetARM32::Sandboxer::ldr(Variable *Dest, OperandARM32Mem *Mem,
7155                                  CondARM32::Cond Pred) {
7156   Variable *MemBase = Mem->getBase();
7157   if (Target->NeedSandboxing && baseNeedsBic(MemBase)) {
7158     createAutoBundle();
7159     assert(!Mem->isRegReg());
7160     Target->_bic(MemBase, MemBase, memOpBicMask(Target->Func), Pred);
7161   }
7162   Target->_ldr(Dest, Mem, Pred);
7163 }
7164 
ldrex(Variable * Dest,OperandARM32Mem * Mem,CondARM32::Cond Pred)7165 void TargetARM32::Sandboxer::ldrex(Variable *Dest, OperandARM32Mem *Mem,
7166                                    CondARM32::Cond Pred) {
7167   Variable *MemBase = Mem->getBase();
7168   if (Target->NeedSandboxing && baseNeedsBic(MemBase)) {
7169     createAutoBundle();
7170     assert(!Mem->isRegReg());
7171     Target->_bic(MemBase, MemBase, memOpBicMask(Target->Func), Pred);
7172   }
7173   Target->_ldrex(Dest, Mem, Pred);
7174 }
7175 
reset_sp(Variable * Src)7176 void TargetARM32::Sandboxer::reset_sp(Variable *Src) {
7177   Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp);
7178   if (!Target->NeedSandboxing) {
7179     Target->_mov_redefined(SP, Src);
7180     return;
7181   }
7182   createAutoBundle();
7183   Target->_mov_redefined(SP, Src);
7184   Target->_bic(SP, SP, memOpBicMask(Target->Func));
7185 }
7186 
ret(Variable * RetAddr,Variable * RetValue)7187 void TargetARM32::Sandboxer::ret(Variable *RetAddr, Variable *RetValue) {
7188   if (Target->NeedSandboxing) {
7189     createAutoBundle();
7190     Target->_bic(RetAddr, RetAddr, indirectBranchBicMask(Target->Func));
7191   }
7192   Target->_ret(RetAddr, RetValue);
7193 }
7194 
str(Variable * Src,OperandARM32Mem * Mem,CondARM32::Cond Pred)7195 void TargetARM32::Sandboxer::str(Variable *Src, OperandARM32Mem *Mem,
7196                                  CondARM32::Cond Pred) {
7197   Variable *MemBase = Mem->getBase();
7198   if (Target->NeedSandboxing && baseNeedsBic(MemBase)) {
7199     createAutoBundle();
7200     assert(!Mem->isRegReg());
7201     Target->_bic(MemBase, MemBase, memOpBicMask(Target->Func), Pred);
7202   }
7203   Target->_str(Src, Mem, Pred);
7204 }
7205 
strex(Variable * Dest,Variable * Src,OperandARM32Mem * Mem,CondARM32::Cond Pred)7206 void TargetARM32::Sandboxer::strex(Variable *Dest, Variable *Src,
7207                                    OperandARM32Mem *Mem, CondARM32::Cond Pred) {
7208   Variable *MemBase = Mem->getBase();
7209   if (Target->NeedSandboxing && baseNeedsBic(MemBase)) {
7210     createAutoBundle();
7211     assert(!Mem->isRegReg());
7212     Target->_bic(MemBase, MemBase, memOpBicMask(Target->Func), Pred);
7213   }
7214   Target->_strex(Dest, Src, Mem, Pred);
7215 }
7216 
sub_sp(Operand * SubAmount)7217 void TargetARM32::Sandboxer::sub_sp(Operand *SubAmount) {
7218   Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp);
7219   if (!Target->NeedSandboxing) {
7220     Target->_sub(SP, SP, SubAmount);
7221     return;
7222   }
7223   createAutoBundle();
7224   Target->_sub(SP, SP, SubAmount);
7225   Target->_bic(SP, SP, memOpBicMask(Target->Func));
7226 }
7227 
TargetDataARM32(GlobalContext * Ctx)7228 TargetDataARM32::TargetDataARM32(GlobalContext *Ctx)
7229     : TargetDataLowering(Ctx) {}
7230 
lowerGlobals(const VariableDeclarationList & Vars,const std::string & SectionSuffix)7231 void TargetDataARM32::lowerGlobals(const VariableDeclarationList &Vars,
7232                                    const std::string &SectionSuffix) {
7233   const bool IsPIC = getFlags().getUseNonsfi();
7234   switch (getFlags().getOutFileType()) {
7235   case FT_Elf: {
7236     ELFObjectWriter *Writer = Ctx->getObjectWriter();
7237     Writer->writeDataSection(Vars, llvm::ELF::R_ARM_ABS32, SectionSuffix,
7238                              IsPIC);
7239   } break;
7240   case FT_Asm:
7241   case FT_Iasm: {
7242     OstreamLocker _(Ctx);
7243     for (const VariableDeclaration *Var : Vars) {
7244       if (getFlags().matchTranslateOnly(Var->getName(), 0)) {
7245         emitGlobal(*Var, SectionSuffix);
7246       }
7247     }
7248   } break;
7249   }
7250 }
7251 
7252 namespace {
7253 template <typename T> struct ConstantPoolEmitterTraits;
7254 
7255 static_assert(sizeof(uint64_t) == 8,
7256               "uint64_t is supposed to be 8 bytes wide.");
7257 
7258 // TODO(jpp): implement the following when implementing constant randomization:
7259 //  * template <> struct ConstantPoolEmitterTraits<uint8_t>
7260 //  * template <> struct ConstantPoolEmitterTraits<uint16_t>
7261 //  * template <> struct ConstantPoolEmitterTraits<uint32_t>
7262 template <> struct ConstantPoolEmitterTraits<float> {
7263   using ConstantType = ConstantFloat;
7264   static constexpr Type IceType = IceType_f32;
7265   // AsmTag and TypeName can't be constexpr because llvm::StringRef is unhappy
7266   // about them being constexpr.
7267   static const char AsmTag[];
7268   static const char TypeName[];
bitcastToUint64Ice::ARM32::__anon5132c32f1e11::ConstantPoolEmitterTraits7269   static uint64_t bitcastToUint64(float Value) {
7270     static_assert(sizeof(Value) == sizeof(uint32_t),
7271                   "Float should be 4 bytes.");
7272     const uint32_t IntValue = Utils::bitCopy<uint32_t>(Value);
7273     return static_cast<uint64_t>(IntValue);
7274   }
7275 };
7276 const char ConstantPoolEmitterTraits<float>::AsmTag[] = ".long";
7277 const char ConstantPoolEmitterTraits<float>::TypeName[] = "f32";
7278 
7279 template <> struct ConstantPoolEmitterTraits<double> {
7280   using ConstantType = ConstantDouble;
7281   static constexpr Type IceType = IceType_f64;
7282   static const char AsmTag[];
7283   static const char TypeName[];
bitcastToUint64Ice::ARM32::__anon5132c32f1e11::ConstantPoolEmitterTraits7284   static uint64_t bitcastToUint64(double Value) {
7285     static_assert(sizeof(double) == sizeof(uint64_t),
7286                   "Double should be 8 bytes.");
7287     return Utils::bitCopy<uint64_t>(Value);
7288   }
7289 };
7290 const char ConstantPoolEmitterTraits<double>::AsmTag[] = ".quad";
7291 const char ConstantPoolEmitterTraits<double>::TypeName[] = "f64";
7292 
7293 template <typename T>
emitConstant(Ostream & Str,const typename ConstantPoolEmitterTraits<T>::ConstantType * Const)7294 void emitConstant(
7295     Ostream &Str,
7296     const typename ConstantPoolEmitterTraits<T>::ConstantType *Const) {
7297   using Traits = ConstantPoolEmitterTraits<T>;
7298   Str << Const->getLabelName();
7299   Str << ":\n\t" << Traits::AsmTag << "\t0x";
7300   T Value = Const->getValue();
7301   Str.write_hex(Traits::bitcastToUint64(Value));
7302   Str << "\t/* " << Traits::TypeName << " " << Value << " */\n";
7303 }
7304 
emitConstantPool(GlobalContext * Ctx)7305 template <typename T> void emitConstantPool(GlobalContext *Ctx) {
7306   if (!BuildDefs::dump()) {
7307     return;
7308   }
7309 
7310   using Traits = ConstantPoolEmitterTraits<T>;
7311   static constexpr size_t MinimumAlignment = 4;
7312   SizeT Align = std::max(MinimumAlignment, typeAlignInBytes(Traits::IceType));
7313   assert((Align % 4) == 0 && "Constants should be aligned");
7314   Ostream &Str = Ctx->getStrEmit();
7315   ConstantList Pool = Ctx->getConstantPool(Traits::IceType);
7316 
7317   Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",%progbits," << Align
7318       << "\n"
7319       << "\t.align\t" << Align << "\n";
7320 
7321   if (getFlags().getReorderPooledConstants()) {
7322     // TODO(jpp): add constant pooling.
7323     UnimplementedError(getFlags());
7324   }
7325 
7326   for (Constant *C : Pool) {
7327     if (!C->getShouldBePooled()) {
7328       continue;
7329     }
7330 
7331     emitConstant<T>(Str, llvm::dyn_cast<typename Traits::ConstantType>(C));
7332   }
7333 }
7334 } // end of anonymous namespace
7335 
lowerConstants()7336 void TargetDataARM32::lowerConstants() {
7337   if (getFlags().getDisableTranslation())
7338     return;
7339   switch (getFlags().getOutFileType()) {
7340   case FT_Elf: {
7341     ELFObjectWriter *Writer = Ctx->getObjectWriter();
7342     Writer->writeConstantPool<ConstantFloat>(IceType_f32);
7343     Writer->writeConstantPool<ConstantDouble>(IceType_f64);
7344   } break;
7345   case FT_Asm:
7346   case FT_Iasm: {
7347     OstreamLocker _(Ctx);
7348     emitConstantPool<float>(Ctx);
7349     emitConstantPool<double>(Ctx);
7350     break;
7351   }
7352   }
7353 }
7354 
lowerJumpTables()7355 void TargetDataARM32::lowerJumpTables() {
7356   if (getFlags().getDisableTranslation())
7357     return;
7358   switch (getFlags().getOutFileType()) {
7359   case FT_Elf:
7360     if (!Ctx->getJumpTables().empty()) {
7361       llvm::report_fatal_error("ARM32 does not support jump tables yet.");
7362     }
7363     break;
7364   case FT_Asm:
7365     // Already emitted from Cfg
7366     break;
7367   case FT_Iasm: {
7368     // TODO(kschimpf): Fill this in when we get more information.
7369     break;
7370   }
7371   }
7372 }
7373 
TargetHeaderARM32(GlobalContext * Ctx)7374 TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx)
7375     : TargetHeaderLowering(Ctx), CPUFeatures(getFlags()) {}
7376 
lower()7377 void TargetHeaderARM32::lower() {
7378   OstreamLocker _(Ctx);
7379   Ostream &Str = Ctx->getStrEmit();
7380   Str << ".syntax unified\n";
7381   // Emit build attributes in format: .eabi_attribute TAG, VALUE. See Sec. 2 of
7382   // "Addenda to, and Errata in the ABI for the ARM architecture"
7383   // http://infocenter.arm.com
7384   //                  /help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf
7385   //
7386   // Tag_conformance should be be emitted first in a file-scope sub-subsection
7387   // of the first public subsection of the attributes.
7388   Str << ".eabi_attribute 67, \"2.09\"      @ Tag_conformance\n";
7389   // Chromebooks are at least A15, but do A9 for higher compat. For some
7390   // reason, the LLVM ARM asm parser has the .cpu directive override the mattr
7391   // specified on the commandline. So to test hwdiv, we need to set the .cpu
7392   // directive higher (can't just rely on --mattr=...).
7393   if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
7394     Str << ".cpu    cortex-a15\n";
7395   } else {
7396     Str << ".cpu    cortex-a9\n";
7397   }
7398   Str << ".eabi_attribute 6, 10   @ Tag_CPU_arch: ARMv7\n"
7399       << ".eabi_attribute 7, 65   @ Tag_CPU_arch_profile: App profile\n";
7400   Str << ".eabi_attribute 8, 1    @ Tag_ARM_ISA_use: Yes\n"
7401       << ".eabi_attribute 9, 2    @ Tag_THUMB_ISA_use: Thumb-2\n";
7402   Str << ".fpu    neon\n"
7403       << ".eabi_attribute 17, 1   @ Tag_ABI_PCS_GOT_use: permit directly\n"
7404       << ".eabi_attribute 20, 1   @ Tag_ABI_FP_denormal\n"
7405       << ".eabi_attribute 21, 1   @ Tag_ABI_FP_exceptions\n"
7406       << ".eabi_attribute 23, 3   @ Tag_ABI_FP_number_model: IEEE 754\n"
7407       << ".eabi_attribute 34, 1   @ Tag_CPU_unaligned_access\n"
7408       << ".eabi_attribute 24, 1   @ Tag_ABI_align_needed: 8-byte\n"
7409       << ".eabi_attribute 25, 1   @ Tag_ABI_align_preserved: 8-byte\n"
7410       << ".eabi_attribute 28, 1   @ Tag_ABI_VFP_args\n"
7411       << ".eabi_attribute 36, 1   @ Tag_FP_HP_extension\n"
7412       << ".eabi_attribute 38, 1   @ Tag_ABI_FP_16bit_format\n"
7413       << ".eabi_attribute 42, 1   @ Tag_MPextension_use\n"
7414       << ".eabi_attribute 68, 1   @ Tag_Virtualization_use\n";
7415   if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
7416     Str << ".eabi_attribute 44, 2   @ Tag_DIV_use\n";
7417   }
7418   // Technically R9 is used for TLS with Sandboxing, and we reserve it.
7419   // However, for compatibility with current NaCl LLVM, don't claim that.
7420   Str << ".eabi_attribute 14, 3   @ Tag_ABI_PCS_R9_use: Not used\n";
7421 }
7422 
7423 SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM];
7424 SmallBitVector TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];
7425 SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];
7426 
7427 } // end of namespace ARM32
7428 } // end of namespace Ice
7429