1 //===- subzero/src/IceTargetLoweringARM32.cpp - ARM32 lowering ------------===//
2 //
3 // The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief Implements the TargetLoweringARM32 class, which consists almost
12 /// entirely of the lowering sequence for each high-level instruction.
13 ///
14 //===----------------------------------------------------------------------===//
15 #include "IceTargetLoweringARM32.h"
16
17 #include "IceCfg.h"
18 #include "IceCfgNode.h"
19 #include "IceClFlags.h"
20 #include "IceDefs.h"
21 #include "IceELFObjectWriter.h"
22 #include "IceGlobalInits.h"
23 #include "IceInstARM32.def"
24 #include "IceInstARM32.h"
25 #include "IceInstVarIter.h"
26 #include "IceLiveness.h"
27 #include "IceOperand.h"
28 #include "IcePhiLoweringImpl.h"
29 #include "IceRegistersARM32.h"
30 #include "IceTargetLoweringARM32.def"
31 #include "IceUtils.h"
32 #include "llvm/Support/MathExtras.h"
33
34 #include <algorithm>
35 #include <array>
36 #include <utility>
37
38 namespace ARM32 {
createTargetLowering(::Ice::Cfg * Func)39 std::unique_ptr<::Ice::TargetLowering> createTargetLowering(::Ice::Cfg *Func) {
40 return ::Ice::ARM32::TargetARM32::create(Func);
41 }
42
43 std::unique_ptr<::Ice::TargetDataLowering>
createTargetDataLowering(::Ice::GlobalContext * Ctx)44 createTargetDataLowering(::Ice::GlobalContext *Ctx) {
45 return ::Ice::ARM32::TargetDataARM32::create(Ctx);
46 }
47
48 std::unique_ptr<::Ice::TargetHeaderLowering>
createTargetHeaderLowering(::Ice::GlobalContext * Ctx)49 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) {
50 return ::Ice::ARM32::TargetHeaderARM32::create(Ctx);
51 }
52
staticInit(::Ice::GlobalContext * Ctx)53 void staticInit(::Ice::GlobalContext *Ctx) {
54 ::Ice::ARM32::TargetARM32::staticInit(Ctx);
55 if (Ice::getFlags().getUseNonsfi()) {
56 // In nonsfi, we need to reference the _GLOBAL_OFFSET_TABLE_ for accessing
57 // globals. The GOT is an external symbol (i.e., it is not defined in the
58 // pexe) so we need to register it as such so that ELF emission won't barf
59 // on an "unknown" symbol. The GOT is added to the External symbols list
60 // here because staticInit() is invoked in a single-thread context.
61 Ctx->getConstantExternSym(Ctx->getGlobalString(::Ice::GlobalOffsetTable));
62 }
63 }
64
shouldBePooled(const::Ice::Constant * C)65 bool shouldBePooled(const ::Ice::Constant *C) {
66 return ::Ice::ARM32::TargetARM32::shouldBePooled(C);
67 }
68
getPointerType()69 ::Ice::Type getPointerType() {
70 return ::Ice::ARM32::TargetARM32::getPointerType();
71 }
72
73 } // end of namespace ARM32
74
75 namespace Ice {
76 namespace ARM32 {
77
78 namespace {
79
80 /// SizeOf is used to obtain the size of an initializer list as a constexpr
81 /// expression. This is only needed until our C++ library is updated to
82 /// C++ 14 -- which defines constexpr members to std::initializer_list.
83 class SizeOf {
84 SizeOf(const SizeOf &) = delete;
85 SizeOf &operator=(const SizeOf &) = delete;
86
87 public:
SizeOf()88 constexpr SizeOf() : Size(0) {}
89 template <typename... T>
SizeOf(T...)90 explicit constexpr SizeOf(T...) : Size(__length<T...>::value) {}
size() const91 constexpr SizeT size() const { return Size; }
92
93 private:
94 template <typename T, typename... U> struct __length {
95 static constexpr std::size_t value = 1 + __length<U...>::value;
96 };
97
98 template <typename T> struct __length<T> {
99 static constexpr std::size_t value = 1;
100 };
101
102 const std::size_t Size;
103 };
104
105 } // end of anonymous namespace
106
107 // Defines the RegARM32::Table table with register information.
108 RegARM32::RegTableType RegARM32::RegTable[RegARM32::Reg_NUM] = {
109 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \
110 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \
111 {name, encode, \
112 cc_arg, scratch, \
113 preserved, stackptr, \
114 frameptr, isGPR, \
115 isInt, isI64Pair, \
116 isFP32, isFP64, \
117 isVec128, (SizeOf alias_init).size(), \
118 alias_init},
119 REGARM32_TABLE
120 #undef X
121 };
122
123 namespace {
124
125 // The following table summarizes the logic for lowering the icmp instruction
126 // for i32 and narrower types. Each icmp condition has a clear mapping to an
127 // ARM32 conditional move instruction.
128
129 const struct TableIcmp32_ {
130 CondARM32::Cond Mapping;
131 } TableIcmp32[] = {
132 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V) \
133 {CondARM32::C_32},
134 ICMPARM32_TABLE
135 #undef X
136 };
137
138 // The following table summarizes the logic for lowering the icmp instruction
139 // for the i64 type. Two conditional moves are needed for setting to 1 or 0.
140 // The operands may need to be swapped, and there is a slight difference for
141 // signed vs unsigned (comparing hi vs lo first, and using cmp vs sbc).
142 const struct TableIcmp64_ {
143 bool IsSigned;
144 bool Swapped;
145 CondARM32::Cond C1, C2;
146 } TableIcmp64[] = {
147 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V) \
148 {is_signed, swapped64, CondARM32::C1_64, CondARM32::C2_64},
149 ICMPARM32_TABLE
150 #undef X
151 };
152
getIcmp32Mapping(InstIcmp::ICond Cond)153 CondARM32::Cond getIcmp32Mapping(InstIcmp::ICond Cond) {
154 assert(Cond < llvm::array_lengthof(TableIcmp32));
155 return TableIcmp32[Cond].Mapping;
156 }
157
158 // In some cases, there are x-macros tables for both high-level and low-level
159 // instructions/operands that use the same enum key value. The tables are kept
160 // separate to maintain a proper separation between abstraction layers. There
161 // is a risk that the tables could get out of sync if enum values are reordered
162 // or if entries are added or deleted. The following anonymous namespaces use
163 // static_asserts to ensure everything is kept in sync.
164
165 // Validate the enum values in ICMPARM32_TABLE.
166 namespace {
167 // Define a temporary set of enum values based on low-level table entries.
168 enum _icmp_ll_enum {
169 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V) \
170 _icmp_ll_##val,
171 ICMPARM32_TABLE
172 #undef X
173 _num
174 };
175 // Define a set of constants based on high-level table entries.
176 #define X(tag, reverse, str) \
177 static constexpr int _icmp_hl_##tag = InstIcmp::tag;
178 ICEINSTICMP_TABLE
179 #undef X
180 // Define a set of constants based on low-level table entries, and ensure the
181 // table entry keys are consistent.
182 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V) \
183 static_assert( \
184 _icmp_ll_##val == _icmp_hl_##val, \
185 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE: " #val);
186 ICMPARM32_TABLE
187 #undef X
188 // Repeat the static asserts with respect to the high-level table entries in
189 // case the high-level table has extra entries.
190 #define X(tag, reverse, str) \
191 static_assert( \
192 _icmp_hl_##tag == _icmp_ll_##tag, \
193 "Inconsistency between ICMPARM32_TABLE and ICEINSTICMP_TABLE: " #tag);
194 ICEINSTICMP_TABLE
195 #undef X
196 } // end of anonymous namespace
197
198 // Stack alignment
199 const uint32_t ARM32_STACK_ALIGNMENT_BYTES = 16;
200
201 // Value is in bytes. Return Value adjusted to the next highest multiple of the
202 // stack alignment.
applyStackAlignment(uint32_t Value)203 uint32_t applyStackAlignment(uint32_t Value) {
204 return Utils::applyAlignment(Value, ARM32_STACK_ALIGNMENT_BYTES);
205 }
206
207 // Value is in bytes. Return Value adjusted to the next highest multiple of the
208 // stack alignment required for the given type.
applyStackAlignmentTy(uint32_t Value,Type Ty)209 uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
210 // Use natural alignment, except that normally (non-NaCl) ARM only aligns
211 // vectors to 8 bytes.
212 // TODO(jvoung): Check this ...
213 size_t typeAlignInBytes = typeWidthInBytes(Ty);
214 if (isVectorType(Ty))
215 typeAlignInBytes = 8;
216 return Utils::applyAlignment(Value, typeAlignInBytes);
217 }
218
219 // Conservatively check if at compile time we know that the operand is
220 // definitely a non-zero integer.
isGuaranteedNonzeroInt(const Operand * Op)221 bool isGuaranteedNonzeroInt(const Operand *Op) {
222 if (auto *Const = llvm::dyn_cast_or_null<ConstantInteger32>(Op)) {
223 return Const->getValue() != 0;
224 }
225 return false;
226 }
227
228 } // end of anonymous namespace
229
TargetARM32Features(const ClFlags & Flags)230 TargetARM32Features::TargetARM32Features(const ClFlags &Flags) {
231 static_assert(
232 (ARM32InstructionSet::End - ARM32InstructionSet::Begin) ==
233 (TargetInstructionSet::ARM32InstructionSet_End -
234 TargetInstructionSet::ARM32InstructionSet_Begin),
235 "ARM32InstructionSet range different from TargetInstructionSet");
236 if (Flags.getTargetInstructionSet() !=
237 TargetInstructionSet::BaseInstructionSet) {
238 InstructionSet = static_cast<ARM32InstructionSet>(
239 (Flags.getTargetInstructionSet() -
240 TargetInstructionSet::ARM32InstructionSet_Begin) +
241 ARM32InstructionSet::Begin);
242 }
243 }
244
245 namespace {
246 constexpr SizeT NumGPRArgs =
247 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \
248 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \
249 +(((cc_arg) > 0) ? 1 : 0)
250 REGARM32_GPR_TABLE
251 #undef X
252 ;
253 std::array<RegNumT, NumGPRArgs> GPRArgInitializer;
254
255 constexpr SizeT NumI64Args =
256 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \
257 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \
258 +(((cc_arg) > 0) ? 1 : 0)
259 REGARM32_I64PAIR_TABLE
260 #undef X
261 ;
262 std::array<RegNumT, NumI64Args> I64ArgInitializer;
263
264 constexpr SizeT NumFP32Args =
265 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \
266 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \
267 +(((cc_arg) > 0) ? 1 : 0)
268 REGARM32_FP32_TABLE
269 #undef X
270 ;
271 std::array<RegNumT, NumFP32Args> FP32ArgInitializer;
272
273 constexpr SizeT NumFP64Args =
274 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \
275 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \
276 +(((cc_arg) > 0) ? 1 : 0)
277 REGARM32_FP64_TABLE
278 #undef X
279 ;
280 std::array<RegNumT, NumFP64Args> FP64ArgInitializer;
281
282 constexpr SizeT NumVec128Args =
283 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \
284 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \
285 +(((cc_arg > 0)) ? 1 : 0)
286 REGARM32_VEC128_TABLE
287 #undef X
288 ;
289 std::array<RegNumT, NumVec128Args> Vec128ArgInitializer;
290
getRegClassName(RegClass C)291 const char *getRegClassName(RegClass C) {
292 auto ClassNum = static_cast<RegARM32::RegClassARM32>(C);
293 assert(ClassNum < RegARM32::RCARM32_NUM);
294 switch (ClassNum) {
295 default:
296 assert(C < RC_Target);
297 return regClassString(C);
298 // Add handling of new register classes below.
299 case RegARM32::RCARM32_QtoS:
300 return "QtoS";
301 }
302 }
303
304 } // end of anonymous namespace
305
TargetARM32(Cfg * Func)306 TargetARM32::TargetARM32(Cfg *Func)
307 : TargetLowering(Func), NeedSandboxing(SandboxingType == ST_NaCl),
308 CPUFeatures(getFlags()) {}
309
staticInit(GlobalContext * Ctx)310 void TargetARM32::staticInit(GlobalContext *Ctx) {
311 RegNumT::setLimit(RegARM32::Reg_NUM);
312 // Limit this size (or do all bitsets need to be the same width)???
313 SmallBitVector IntegerRegisters(RegARM32::Reg_NUM);
314 SmallBitVector I64PairRegisters(RegARM32::Reg_NUM);
315 SmallBitVector Float32Registers(RegARM32::Reg_NUM);
316 SmallBitVector Float64Registers(RegARM32::Reg_NUM);
317 SmallBitVector VectorRegisters(RegARM32::Reg_NUM);
318 SmallBitVector QtoSRegisters(RegARM32::Reg_NUM);
319 SmallBitVector InvalidRegisters(RegARM32::Reg_NUM);
320 const unsigned EncodedReg_q8 = RegARM32::RegTable[RegARM32::Reg_q8].Encoding;
321 for (int i = 0; i < RegARM32::Reg_NUM; ++i) {
322 const auto &Entry = RegARM32::RegTable[i];
323 IntegerRegisters[i] = Entry.IsInt;
324 I64PairRegisters[i] = Entry.IsI64Pair;
325 Float32Registers[i] = Entry.IsFP32;
326 Float64Registers[i] = Entry.IsFP64;
327 VectorRegisters[i] = Entry.IsVec128;
328 RegisterAliases[i].resize(RegARM32::Reg_NUM);
329 // TODO(eholk): It would be better to store a QtoS flag in the
330 // IceRegistersARM32 table than to compare their encodings here.
331 QtoSRegisters[i] = Entry.IsVec128 && Entry.Encoding < EncodedReg_q8;
332 for (int j = 0; j < Entry.NumAliases; ++j) {
333 assert(i == j || !RegisterAliases[i][Entry.Aliases[j]]);
334 RegisterAliases[i].set(Entry.Aliases[j]);
335 }
336 assert(RegisterAliases[i][i]);
337 if (Entry.CCArg <= 0) {
338 continue;
339 }
340 const auto RegNum = RegNumT::fromInt(i);
341 if (Entry.IsGPR) {
342 GPRArgInitializer[Entry.CCArg - 1] = RegNum;
343 } else if (Entry.IsI64Pair) {
344 I64ArgInitializer[Entry.CCArg - 1] = RegNum;
345 } else if (Entry.IsFP32) {
346 FP32ArgInitializer[Entry.CCArg - 1] = RegNum;
347 } else if (Entry.IsFP64) {
348 FP64ArgInitializer[Entry.CCArg - 1] = RegNum;
349 } else if (Entry.IsVec128) {
350 Vec128ArgInitializer[Entry.CCArg - 1] = RegNum;
351 }
352 }
353 TypeToRegisterSet[IceType_void] = InvalidRegisters;
354 TypeToRegisterSet[IceType_i1] = IntegerRegisters;
355 TypeToRegisterSet[IceType_i8] = IntegerRegisters;
356 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
357 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
358 TypeToRegisterSet[IceType_i64] = I64PairRegisters;
359 TypeToRegisterSet[IceType_f32] = Float32Registers;
360 TypeToRegisterSet[IceType_f64] = Float64Registers;
361 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
362 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
363 TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
364 TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
365 TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
366 TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
367 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
368 TypeToRegisterSet[RegARM32::RCARM32_QtoS] = QtoSRegisters;
369
370 for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i)
371 TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
372
373 filterTypeToRegisterSet(
374 Ctx, RegARM32::Reg_NUM, TypeToRegisterSet,
375 llvm::array_lengthof(TypeToRegisterSet),
376 [](RegNumT RegNum) -> std::string {
377 // This function simply removes ", " from the
378 // register name.
379 std::string Name = RegARM32::getRegName(RegNum);
380 constexpr const char RegSeparator[] = ", ";
381 constexpr size_t RegSeparatorWidth =
382 llvm::array_lengthof(RegSeparator) - 1;
383 for (size_t Pos = Name.find(RegSeparator); Pos != std::string::npos;
384 Pos = Name.find(RegSeparator)) {
385 Name.replace(Pos, RegSeparatorWidth, "");
386 }
387 return Name;
388 },
389 getRegClassName);
390 }
391
392 namespace {
copyRegAllocFromInfWeightVariable64On32(const VarList & Vars)393 void copyRegAllocFromInfWeightVariable64On32(const VarList &Vars) {
394 for (Variable *Var : Vars) {
395 auto *Var64 = llvm::dyn_cast<Variable64On32>(Var);
396 if (!Var64) {
397 // This is not the variable we are looking for.
398 continue;
399 }
400 // only allow infinite-weight i64 temporaries to be register allocated.
401 assert(!Var64->hasReg() || Var64->mustHaveReg());
402 if (!Var64->hasReg()) {
403 continue;
404 }
405 const auto FirstReg =
406 RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(Var->getRegNum()));
407 // This assumes little endian.
408 Variable *Lo = Var64->getLo();
409 Variable *Hi = Var64->getHi();
410 assert(Lo->hasReg() == Hi->hasReg());
411 if (Lo->hasReg()) {
412 continue;
413 }
414 Lo->setRegNum(FirstReg);
415 Lo->setMustHaveReg();
416 Hi->setRegNum(RegNumT::fixme(FirstReg + 1));
417 Hi->setMustHaveReg();
418 }
419 }
420 } // end of anonymous namespace
421
getCallStackArgumentsSizeBytes(const InstCall * Call)422 uint32_t TargetARM32::getCallStackArgumentsSizeBytes(const InstCall *Call) {
423 TargetARM32::CallingConv CC;
424 RegNumT DummyReg;
425 size_t OutArgsSizeBytes = 0;
426 for (SizeT i = 0, NumArgs = Call->getNumArgs(); i < NumArgs; ++i) {
427 Operand *Arg = legalizeUndef(Call->getArg(i));
428 const Type Ty = Arg->getType();
429 if (isScalarIntegerType(Ty)) {
430 if (CC.argInGPR(Ty, &DummyReg)) {
431 continue;
432 }
433 } else {
434 if (CC.argInVFP(Ty, &DummyReg)) {
435 continue;
436 }
437 }
438
439 OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, Ty);
440 OutArgsSizeBytes += typeWidthInBytesOnStack(Ty);
441 }
442
443 return applyStackAlignment(OutArgsSizeBytes);
444 }
445
genTargetHelperCallFor(Inst * Instr)446 void TargetARM32::genTargetHelperCallFor(Inst *Instr) {
447 constexpr bool NoTailCall = false;
448 constexpr bool IsTargetHelperCall = true;
449
450 switch (Instr->getKind()) {
451 default:
452 return;
453 case Inst::Arithmetic: {
454 Variable *Dest = Instr->getDest();
455 const Type DestTy = Dest->getType();
456 const InstArithmetic::OpKind Op =
457 llvm::cast<InstArithmetic>(Instr)->getOp();
458 if (isVectorType(DestTy)) {
459 switch (Op) {
460 default:
461 break;
462 case InstArithmetic::Fdiv:
463 case InstArithmetic::Frem:
464 case InstArithmetic::Sdiv:
465 case InstArithmetic::Srem:
466 case InstArithmetic::Udiv:
467 case InstArithmetic::Urem:
468 scalarizeArithmetic(Op, Dest, Instr->getSrc(0), Instr->getSrc(1));
469 Instr->setDeleted();
470 return;
471 }
472 }
473 switch (DestTy) {
474 default:
475 return;
476 case IceType_i64: {
477 // Technically, ARM has its own aeabi routines, but we can use the
478 // non-aeabi routine as well. LLVM uses __aeabi_ldivmod for div, but uses
479 // the more standard __moddi3 for rem.
480 RuntimeHelper HelperID = RuntimeHelper::H_Num;
481 switch (Op) {
482 default:
483 return;
484 case InstArithmetic::Udiv:
485 HelperID = RuntimeHelper::H_udiv_i64;
486 break;
487 case InstArithmetic::Sdiv:
488 HelperID = RuntimeHelper::H_sdiv_i64;
489 break;
490 case InstArithmetic::Urem:
491 HelperID = RuntimeHelper::H_urem_i64;
492 break;
493 case InstArithmetic::Srem:
494 HelperID = RuntimeHelper::H_srem_i64;
495 break;
496 }
497 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(HelperID);
498 ARM32HelpersPreamble[TargetHelper] = &TargetARM32::preambleDivRem;
499 constexpr SizeT MaxArgs = 2;
500 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
501 NoTailCall, IsTargetHelperCall);
502 Call->addArg(Instr->getSrc(0));
503 Call->addArg(Instr->getSrc(1));
504 Instr->setDeleted();
505 return;
506 }
507 case IceType_i32:
508 case IceType_i16:
509 case IceType_i8: {
510 const bool HasHWDiv = hasCPUFeature(TargetARM32Features::HWDivArm);
511 InstCast::OpKind CastKind;
512 RuntimeHelper HelperID = RuntimeHelper::H_Num;
513 switch (Op) {
514 default:
515 return;
516 case InstArithmetic::Udiv:
517 HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_udiv_i32;
518 CastKind = InstCast::Zext;
519 break;
520 case InstArithmetic::Sdiv:
521 HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_sdiv_i32;
522 CastKind = InstCast::Sext;
523 break;
524 case InstArithmetic::Urem:
525 HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_urem_i32;
526 CastKind = InstCast::Zext;
527 break;
528 case InstArithmetic::Srem:
529 HelperID = HasHWDiv ? RuntimeHelper::H_Num : RuntimeHelper::H_srem_i32;
530 CastKind = InstCast::Sext;
531 break;
532 }
533 if (HelperID == RuntimeHelper::H_Num) {
534 // HelperID should only ever be undefined when the processor does not
535 // have a hardware divider. If any other helpers are ever introduced,
536 // the following assert will have to be modified.
537 assert(HasHWDiv);
538 return;
539 }
540 Operand *Src0 = Instr->getSrc(0);
541 Operand *Src1 = Instr->getSrc(1);
542 if (DestTy != IceType_i32) {
543 // Src0 and Src1 have to be zero-, or signed-extended to i32. For Src0,
544 // we just insert a InstCast right before the call to the helper.
545 Variable *Src0_32 = Func->makeVariable(IceType_i32);
546 Context.insert<InstCast>(CastKind, Src0_32, Src0);
547 Src0 = Src0_32;
548
549 // For extending Src1, we will just insert an InstCast if Src1 is not a
550 // Constant. If it is, then we extend it here, and not during program
551 // runtime. This allows preambleDivRem to optimize-out the div-by-0
552 // check.
553 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
554 const int32_t ShAmt = (DestTy == IceType_i16) ? 16 : 24;
555 int32_t NewC = C->getValue();
556 if (CastKind == InstCast::Zext) {
557 NewC &= ~(0x80000000l >> ShAmt);
558 } else {
559 NewC = (NewC << ShAmt) >> ShAmt;
560 }
561 Src1 = Ctx->getConstantInt32(NewC);
562 } else {
563 Variable *Src1_32 = Func->makeVariable(IceType_i32);
564 Context.insert<InstCast>(CastKind, Src1_32, Src1);
565 Src1 = Src1_32;
566 }
567 }
568 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(HelperID);
569 ARM32HelpersPreamble[TargetHelper] = &TargetARM32::preambleDivRem;
570 constexpr SizeT MaxArgs = 2;
571 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
572 NoTailCall, IsTargetHelperCall);
573 assert(Src0->getType() == IceType_i32);
574 Call->addArg(Src0);
575 assert(Src1->getType() == IceType_i32);
576 Call->addArg(Src1);
577 Instr->setDeleted();
578 return;
579 }
580 case IceType_f64:
581 case IceType_f32: {
582 if (Op != InstArithmetic::Frem) {
583 return;
584 }
585 constexpr SizeT MaxArgs = 2;
586 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(
587 DestTy == IceType_f32 ? RuntimeHelper::H_frem_f32
588 : RuntimeHelper::H_frem_f64);
589 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
590 NoTailCall, IsTargetHelperCall);
591 Call->addArg(Instr->getSrc(0));
592 Call->addArg(Instr->getSrc(1));
593 Instr->setDeleted();
594 return;
595 }
596 }
597 llvm::report_fatal_error("Control flow should never have reached here.");
598 }
599 case Inst::Cast: {
600 Variable *Dest = Instr->getDest();
601 Operand *Src0 = Instr->getSrc(0);
602 const Type DestTy = Dest->getType();
603 const Type SrcTy = Src0->getType();
604 auto *CastInstr = llvm::cast<InstCast>(Instr);
605 const InstCast::OpKind CastKind = CastInstr->getCastKind();
606
607 switch (CastKind) {
608 default:
609 return;
610 case InstCast::Fptosi:
611 case InstCast::Fptoui: {
612 if (DestTy != IceType_i64) {
613 return;
614 }
615 const bool DestIsSigned = CastKind == InstCast::Fptosi;
616 const bool Src0IsF32 = isFloat32Asserting32Or64(SrcTy);
617 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(
618 Src0IsF32 ? (DestIsSigned ? RuntimeHelper::H_fptosi_f32_i64
619 : RuntimeHelper::H_fptoui_f32_i64)
620 : (DestIsSigned ? RuntimeHelper::H_fptosi_f64_i64
621 : RuntimeHelper::H_fptoui_f64_i64));
622 static constexpr SizeT MaxArgs = 1;
623 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
624 NoTailCall, IsTargetHelperCall);
625 Call->addArg(Src0);
626 Instr->setDeleted();
627 return;
628 }
629 case InstCast::Sitofp:
630 case InstCast::Uitofp: {
631 if (SrcTy != IceType_i64) {
632 return;
633 }
634 const bool SourceIsSigned = CastKind == InstCast::Sitofp;
635 const bool DestIsF32 = isFloat32Asserting32Or64(Dest->getType());
636 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(
637 DestIsF32 ? (SourceIsSigned ? RuntimeHelper::H_sitofp_i64_f32
638 : RuntimeHelper::H_uitofp_i64_f32)
639 : (SourceIsSigned ? RuntimeHelper::H_sitofp_i64_f64
640 : RuntimeHelper::H_uitofp_i64_f64));
641 static constexpr SizeT MaxArgs = 1;
642 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
643 NoTailCall, IsTargetHelperCall);
644 Call->addArg(Src0);
645 Instr->setDeleted();
646 return;
647 }
648 case InstCast::Bitcast: {
649 if (DestTy == SrcTy) {
650 return;
651 }
652 Variable *CallDest = Dest;
653 RuntimeHelper HelperID = RuntimeHelper::H_Num;
654 switch (DestTy) {
655 default:
656 return;
657 case IceType_i8:
658 assert(SrcTy == IceType_v8i1);
659 HelperID = RuntimeHelper::H_bitcast_8xi1_i8;
660 CallDest = Func->makeVariable(IceType_i32);
661 break;
662 case IceType_i16:
663 assert(SrcTy == IceType_v16i1);
664 HelperID = RuntimeHelper::H_bitcast_16xi1_i16;
665 CallDest = Func->makeVariable(IceType_i32);
666 break;
667 case IceType_v8i1: {
668 assert(SrcTy == IceType_i8);
669 HelperID = RuntimeHelper::H_bitcast_i8_8xi1;
670 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
671 // Arguments to functions are required to be at least 32 bits wide.
672 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
673 Src0 = Src0AsI32;
674 } break;
675 case IceType_v16i1: {
676 assert(SrcTy == IceType_i16);
677 HelperID = RuntimeHelper::H_bitcast_i16_16xi1;
678 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
679 // Arguments to functions are required to be at least 32 bits wide.
680 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
681 Src0 = Src0AsI32;
682 } break;
683 }
684 constexpr SizeT MaxSrcs = 1;
685 InstCall *Call = makeHelperCall(HelperID, CallDest, MaxSrcs);
686 Call->addArg(Src0);
687 Context.insert(Call);
688 // The PNaCl ABI disallows i8/i16 return types, so truncate the helper
689 // call result to the appropriate type as necessary.
690 if (CallDest->getType() != Dest->getType())
691 Context.insert<InstCast>(InstCast::Trunc, Dest, CallDest);
692 Instr->setDeleted();
693 return;
694 }
695 case InstCast::Trunc: {
696 if (DestTy == SrcTy) {
697 return;
698 }
699 if (!isVectorType(SrcTy)) {
700 return;
701 }
702 assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
703 assert(typeElementType(DestTy) == IceType_i1);
704 assert(isVectorIntegerType(SrcTy));
705 return;
706 }
707 case InstCast::Sext:
708 case InstCast::Zext: {
709 if (DestTy == SrcTy) {
710 return;
711 }
712 if (!isVectorType(DestTy)) {
713 return;
714 }
715 assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
716 assert(typeElementType(SrcTy) == IceType_i1);
717 assert(isVectorIntegerType(DestTy));
718 return;
719 }
720 }
721 llvm::report_fatal_error("Control flow should never have reached here.");
722 }
723 case Inst::IntrinsicCall: {
724 Variable *Dest = Instr->getDest();
725 auto *IntrinsicCall = llvm::cast<InstIntrinsicCall>(Instr);
726 Intrinsics::IntrinsicID ID = IntrinsicCall->getIntrinsicInfo().ID;
727 switch (ID) {
728 default:
729 return;
730 case Intrinsics::Ctpop: {
731 Operand *Src0 = IntrinsicCall->getArg(0);
732 Operand *TargetHelper =
733 Ctx->getRuntimeHelperFunc(isInt32Asserting32Or64(Src0->getType())
734 ? RuntimeHelper::H_call_ctpop_i32
735 : RuntimeHelper::H_call_ctpop_i64);
736 static constexpr SizeT MaxArgs = 1;
737 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
738 NoTailCall, IsTargetHelperCall);
739 Call->addArg(Src0);
740 Instr->setDeleted();
741 if (Src0->getType() == IceType_i64) {
742 ARM32HelpersPostamble[TargetHelper] = &TargetARM32::postambleCtpop64;
743 }
744 return;
745 }
746 case Intrinsics::Longjmp: {
747 static constexpr SizeT MaxArgs = 2;
748 static constexpr Variable *NoDest = nullptr;
749 Operand *TargetHelper =
750 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_longjmp);
751 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
752 NoTailCall, IsTargetHelperCall);
753 Call->addArg(IntrinsicCall->getArg(0));
754 Call->addArg(IntrinsicCall->getArg(1));
755 Instr->setDeleted();
756 return;
757 }
758 case Intrinsics::Memcpy: {
759 // In the future, we could potentially emit an inline memcpy/memset, etc.
760 // for intrinsic calls w/ a known length.
761 static constexpr SizeT MaxArgs = 3;
762 static constexpr Variable *NoDest = nullptr;
763 Operand *TargetHelper =
764 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memcpy);
765 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
766 NoTailCall, IsTargetHelperCall);
767 Call->addArg(IntrinsicCall->getArg(0));
768 Call->addArg(IntrinsicCall->getArg(1));
769 Call->addArg(IntrinsicCall->getArg(2));
770 Instr->setDeleted();
771 return;
772 }
773 case Intrinsics::Memmove: {
774 static constexpr SizeT MaxArgs = 3;
775 static constexpr Variable *NoDest = nullptr;
776 Operand *TargetHelper =
777 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memmove);
778 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
779 NoTailCall, IsTargetHelperCall);
780 Call->addArg(IntrinsicCall->getArg(0));
781 Call->addArg(IntrinsicCall->getArg(1));
782 Call->addArg(IntrinsicCall->getArg(2));
783 Instr->setDeleted();
784 return;
785 }
786 case Intrinsics::Memset: {
787 // The value operand needs to be extended to a stack slot size because the
788 // PNaCl ABI requires arguments to be at least 32 bits wide.
789 Operand *ValOp = IntrinsicCall->getArg(1);
790 assert(ValOp->getType() == IceType_i8);
791 Variable *ValExt = Func->makeVariable(stackSlotType());
792 Context.insert<InstCast>(InstCast::Zext, ValExt, ValOp);
793
794 // Technically, ARM has its own __aeabi_memset, but we can use plain
795 // memset too. The value and size argument need to be flipped if we ever
796 // decide to use __aeabi_memset.
797 static constexpr SizeT MaxArgs = 3;
798 static constexpr Variable *NoDest = nullptr;
799 Operand *TargetHelper =
800 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memset);
801 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
802 NoTailCall, IsTargetHelperCall);
803 Call->addArg(IntrinsicCall->getArg(0));
804 Call->addArg(ValExt);
805 Call->addArg(IntrinsicCall->getArg(2));
806 Instr->setDeleted();
807 return;
808 }
809 case Intrinsics::NaClReadTP: {
810 if (SandboxingType == ST_NaCl) {
811 return;
812 }
813 static constexpr SizeT MaxArgs = 0;
814 Operand *TargetHelper =
815 SandboxingType == ST_Nonsfi
816 ? Ctx->getConstantExternSym(
817 Ctx->getGlobalString("__aeabi_read_tp"))
818 : Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_read_tp);
819 Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, NoTailCall,
820 IsTargetHelperCall);
821 Instr->setDeleted();
822 return;
823 }
824 case Intrinsics::Setjmp: {
825 static constexpr SizeT MaxArgs = 1;
826 Operand *TargetHelper =
827 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_setjmp);
828 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
829 NoTailCall, IsTargetHelperCall);
830 Call->addArg(IntrinsicCall->getArg(0));
831 Instr->setDeleted();
832 return;
833 }
834 }
835 llvm::report_fatal_error("Control flow should never have reached here.");
836 }
837 }
838 }
839
findMaxStackOutArgsSize()840 void TargetARM32::findMaxStackOutArgsSize() {
841 // MinNeededOutArgsBytes should be updated if the Target ever creates a
842 // high-level InstCall that requires more stack bytes.
843 constexpr size_t MinNeededOutArgsBytes = 0;
844 MaxOutArgsSizeBytes = MinNeededOutArgsBytes;
845 for (CfgNode *Node : Func->getNodes()) {
846 Context.init(Node);
847 while (!Context.atEnd()) {
848 PostIncrLoweringContext PostIncrement(Context);
849 Inst *CurInstr = iteratorToInst(Context.getCur());
850 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) {
851 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call);
852 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes);
853 }
854 }
855 }
856 }
857
createGotPtr()858 void TargetARM32::createGotPtr() {
859 if (SandboxingType != ST_Nonsfi) {
860 return;
861 }
862 GotPtr = Func->makeVariable(IceType_i32);
863 }
864
insertGotPtrInitPlaceholder()865 void TargetARM32::insertGotPtrInitPlaceholder() {
866 if (SandboxingType != ST_Nonsfi) {
867 return;
868 }
869 assert(GotPtr != nullptr);
870 // We add the two placeholder instructions here. The first fakedefs T, an
871 // infinite-weight temporary, while the second fakedefs the GotPtr "using" T.
872 // This is needed because the GotPtr initialization, if needed, will require
873 // a register:
874 //
875 // movw reg, _GLOBAL_OFFSET_TABLE_ - 16 - .
876 // movt reg, _GLOBAL_OFFSET_TABLE_ - 12 - .
877 // add reg, pc, reg
878 // mov GotPtr, reg
879 //
880 // If GotPtr is not used, then both these pseudo-instructions are dce'd.
881 Variable *T = makeReg(IceType_i32);
882 Context.insert<InstFakeDef>(T);
883 Context.insert<InstFakeDef>(GotPtr, T);
884 }
885
886 GlobalString
createGotoffRelocation(const ConstantRelocatable * CR)887 TargetARM32::createGotoffRelocation(const ConstantRelocatable *CR) {
888 GlobalString CRName = CR->getName();
889 GlobalString CRGotoffName =
890 Ctx->getGlobalString("GOTOFF$" + Func->getFunctionName() + "$" + CRName);
891 if (KnownGotoffs.count(CRGotoffName) == 0) {
892 constexpr bool SuppressMangling = true;
893 auto *Global =
894 VariableDeclaration::create(Func->getGlobalPool(), SuppressMangling);
895 Global->setIsConstant(true);
896 Global->setName(CRName);
897 Func->getGlobalPool()->willNotBeEmitted(Global);
898
899 auto *Gotoff =
900 VariableDeclaration::create(Func->getGlobalPool(), SuppressMangling);
901 constexpr auto GotFixup = R_ARM_GOTOFF32;
902 Gotoff->setIsConstant(true);
903 Gotoff->addInitializer(VariableDeclaration::RelocInitializer::create(
904 Func->getGlobalPool(), Global, {RelocOffset::create(Ctx, 0)},
905 GotFixup));
906 Gotoff->setName(CRGotoffName);
907 Func->addGlobal(Gotoff);
908 KnownGotoffs.emplace(CRGotoffName);
909 }
910 return CRGotoffName;
911 }
912
materializeGotAddr(CfgNode * Node)913 void TargetARM32::materializeGotAddr(CfgNode *Node) {
914 if (SandboxingType != ST_Nonsfi) {
915 return;
916 }
917
918 // At first, we try to find the
919 // GotPtr = def T
920 // pseudo-instruction that we placed for defining the got ptr. That
921 // instruction is not just a place-holder for defining the GotPtr (thus
922 // keeping liveness consistent), but it is also located at a point where it is
923 // safe to materialize the got addr -- i.e., before loading parameters to
924 // registers, but after moving register parameters from their home location.
925 InstFakeDef *DefGotPtr = nullptr;
926 for (auto &Inst : Node->getInsts()) {
927 auto *FakeDef = llvm::dyn_cast<InstFakeDef>(&Inst);
928 if (FakeDef != nullptr && FakeDef->getDest() == GotPtr) {
929 DefGotPtr = FakeDef;
930 break;
931 }
932 }
933
934 if (DefGotPtr == nullptr || DefGotPtr->isDeleted()) {
935 return;
936 }
937
938 // The got addr needs to be materialized at the same point where DefGotPtr
939 // lives.
940 Context.setInsertPoint(instToIterator(DefGotPtr));
941 assert(DefGotPtr->getSrcSize() == 1);
942 auto *T = llvm::cast<Variable>(DefGotPtr->getSrc(0));
943 loadNamedConstantRelocatablePIC(Ctx->getGlobalString(GlobalOffsetTable), T,
944 [this, T](Variable *PC) { _add(T, PC, T); });
945 _mov(GotPtr, T);
946 DefGotPtr->setDeleted();
947 }
948
loadNamedConstantRelocatablePIC(GlobalString Name,Variable * Register,std::function<void (Variable * PC)> Finish)949 void TargetARM32::loadNamedConstantRelocatablePIC(
950 GlobalString Name, Variable *Register,
951 std::function<void(Variable *PC)> Finish) {
952 assert(SandboxingType == ST_Nonsfi);
953 // We makeReg() here instead of getPhysicalRegister() because the latter ends
954 // up creating multi-blocks temporaries that liveness fails to validate.
955 auto *PC = makeReg(IceType_i32, RegARM32::Reg_pc);
956
957 auto *AddPcReloc = RelocOffset::create(Ctx);
958 AddPcReloc->setSubtract(true);
959 auto *AddPcLabel = InstARM32Label::create(Func, this);
960 AddPcLabel->setRelocOffset(AddPcReloc);
961
962 auto *MovwReloc = RelocOffset::create(Ctx);
963 auto *MovwLabel = InstARM32Label::create(Func, this);
964 MovwLabel->setRelocOffset(MovwReloc);
965
966 auto *MovtReloc = RelocOffset::create(Ctx);
967 auto *MovtLabel = InstARM32Label::create(Func, this);
968 MovtLabel->setRelocOffset(MovtReloc);
969
970 // The EmitString for these constant relocatables have hardcoded offsets
971 // attached to them. This could be dangerous if, e.g., we ever implemented
972 // instruction scheduling but llvm-mc currently does not support
973 //
974 // movw reg, #:lower16:(Symbol - Label - Number)
975 // movt reg, #:upper16:(Symbol - Label - Number)
976 //
977 // relocations.
978 static constexpr RelocOffsetT PcOffset = -8;
979 auto *CRLower = Ctx->getConstantSymWithEmitString(
980 PcOffset, {MovwReloc, AddPcReloc}, Name, Name + " -16");
981 auto *CRUpper = Ctx->getConstantSymWithEmitString(
982 PcOffset, {MovtReloc, AddPcReloc}, Name, Name + " -12");
983
984 Context.insert(MovwLabel);
985 _movw(Register, CRLower);
986 Context.insert(MovtLabel);
987 _movt(Register, CRUpper);
988 // PC = fake-def to keep liveness consistent.
989 Context.insert<InstFakeDef>(PC);
990 Context.insert(AddPcLabel);
991 Finish(PC);
992 }
993
translateO2()994 void TargetARM32::translateO2() {
995 TimerMarker T(TimerStack::TT_O2, Func);
996
997 // TODO(stichnot): share passes with other targets?
998 // https://code.google.com/p/nativeclient/issues/detail?id=4094
999 if (SandboxingType == ST_Nonsfi) {
1000 createGotPtr();
1001 }
1002 genTargetHelperCalls();
1003 findMaxStackOutArgsSize();
1004
1005 // Do not merge Alloca instructions, and lay out the stack.
1006 static constexpr bool SortAndCombineAllocas = true;
1007 Func->processAllocas(SortAndCombineAllocas);
1008 Func->dump("After Alloca processing");
1009
1010 if (!getFlags().getEnablePhiEdgeSplit()) {
1011 // Lower Phi instructions.
1012 Func->placePhiLoads();
1013 if (Func->hasError())
1014 return;
1015 Func->placePhiStores();
1016 if (Func->hasError())
1017 return;
1018 Func->deletePhis();
1019 if (Func->hasError())
1020 return;
1021 Func->dump("After Phi lowering");
1022 }
1023
1024 // Address mode optimization.
1025 Func->getVMetadata()->init(VMK_SingleDefs);
1026 Func->doAddressOpt();
1027 Func->materializeVectorShuffles();
1028
1029 // Argument lowering
1030 Func->doArgLowering();
1031
1032 // Target lowering. This requires liveness analysis for some parts of the
1033 // lowering decisions, such as compare/branch fusing. If non-lightweight
1034 // liveness analysis is used, the instructions need to be renumbered first.
1035 // TODO: This renumbering should only be necessary if we're actually
1036 // calculating live intervals, which we only do for register allocation.
1037 Func->renumberInstructions();
1038 if (Func->hasError())
1039 return;
1040
1041 // TODO: It should be sufficient to use the fastest liveness calculation,
1042 // i.e. livenessLightweight(). However, for some reason that slows down the
1043 // rest of the translation. Investigate.
1044 Func->liveness(Liveness_Basic);
1045 if (Func->hasError())
1046 return;
1047 Func->dump("After ARM32 address mode opt");
1048
1049 if (SandboxingType == ST_Nonsfi) {
1050 insertGotPtrInitPlaceholder();
1051 }
1052 Func->genCode();
1053 if (Func->hasError())
1054 return;
1055 Func->dump("After ARM32 codegen");
1056
1057 // Register allocation. This requires instruction renumbering and full
1058 // liveness analysis.
1059 Func->renumberInstructions();
1060 if (Func->hasError())
1061 return;
1062 Func->liveness(Liveness_Intervals);
1063 if (Func->hasError())
1064 return;
1065 // The post-codegen dump is done here, after liveness analysis and associated
1066 // cleanup, to make the dump cleaner and more useful.
1067 Func->dump("After initial ARM32 codegen");
1068 // Validate the live range computations. The expensive validation call is
1069 // deliberately only made when assertions are enabled.
1070 assert(Func->validateLiveness());
1071 Func->getVMetadata()->init(VMK_All);
1072 regAlloc(RAK_Global);
1073 if (Func->hasError())
1074 return;
1075
1076 copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
1077 Func->dump("After linear scan regalloc");
1078
1079 if (getFlags().getEnablePhiEdgeSplit()) {
1080 Func->advancedPhiLowering();
1081 Func->dump("After advanced Phi lowering");
1082 }
1083
1084 ForbidTemporaryWithoutReg _(this);
1085
1086 // Stack frame mapping.
1087 Func->genFrame();
1088 if (Func->hasError())
1089 return;
1090 Func->dump("After stack frame mapping");
1091
1092 postLowerLegalization();
1093 if (Func->hasError())
1094 return;
1095 Func->dump("After postLowerLegalization");
1096
1097 Func->contractEmptyNodes();
1098 Func->reorderNodes();
1099
1100 // Branch optimization. This needs to be done just before code emission. In
1101 // particular, no transformations that insert or reorder CfgNodes should be
1102 // done after branch optimization. We go ahead and do it before nop insertion
1103 // to reduce the amount of work needed for searching for opportunities.
1104 Func->doBranchOpt();
1105 Func->dump("After branch optimization");
1106
1107 // Nop insertion
1108 if (getFlags().getShouldDoNopInsertion()) {
1109 Func->doNopInsertion();
1110 }
1111 }
1112
translateOm1()1113 void TargetARM32::translateOm1() {
1114 TimerMarker T(TimerStack::TT_Om1, Func);
1115
1116 // TODO(stichnot): share passes with other targets?
1117 if (SandboxingType == ST_Nonsfi) {
1118 createGotPtr();
1119 }
1120
1121 genTargetHelperCalls();
1122 findMaxStackOutArgsSize();
1123
1124 // Do not merge Alloca instructions, and lay out the stack.
1125 static constexpr bool DontSortAndCombineAllocas = false;
1126 Func->processAllocas(DontSortAndCombineAllocas);
1127 Func->dump("After Alloca processing");
1128
1129 Func->placePhiLoads();
1130 if (Func->hasError())
1131 return;
1132 Func->placePhiStores();
1133 if (Func->hasError())
1134 return;
1135 Func->deletePhis();
1136 if (Func->hasError())
1137 return;
1138 Func->dump("After Phi lowering");
1139
1140 Func->doArgLowering();
1141
1142 if (SandboxingType == ST_Nonsfi) {
1143 insertGotPtrInitPlaceholder();
1144 }
1145 Func->genCode();
1146 if (Func->hasError())
1147 return;
1148 Func->dump("After initial ARM32 codegen");
1149
1150 regAlloc(RAK_InfOnly);
1151 if (Func->hasError())
1152 return;
1153
1154 copyRegAllocFromInfWeightVariable64On32(Func->getVariables());
1155 Func->dump("After regalloc of infinite-weight variables");
1156
1157 ForbidTemporaryWithoutReg _(this);
1158
1159 Func->genFrame();
1160 if (Func->hasError())
1161 return;
1162 Func->dump("After stack frame mapping");
1163
1164 postLowerLegalization();
1165 if (Func->hasError())
1166 return;
1167 Func->dump("After postLowerLegalization");
1168
1169 // Nop insertion
1170 if (getFlags().getShouldDoNopInsertion()) {
1171 Func->doNopInsertion();
1172 }
1173 }
1174
getStackAlignment() const1175 uint32_t TargetARM32::getStackAlignment() const {
1176 return ARM32_STACK_ALIGNMENT_BYTES;
1177 }
1178
doBranchOpt(Inst * I,const CfgNode * NextNode)1179 bool TargetARM32::doBranchOpt(Inst *I, const CfgNode *NextNode) {
1180 if (auto *Br = llvm::dyn_cast<InstARM32Br>(I)) {
1181 return Br->optimizeBranch(NextNode);
1182 }
1183 return false;
1184 }
1185
getRegName(RegNumT RegNum,Type Ty) const1186 const char *TargetARM32::getRegName(RegNumT RegNum, Type Ty) const {
1187 (void)Ty;
1188 return RegARM32::getRegName(RegNum);
1189 }
1190
getPhysicalRegister(RegNumT RegNum,Type Ty)1191 Variable *TargetARM32::getPhysicalRegister(RegNumT RegNum, Type Ty) {
1192 static const Type DefaultType[] = {
1193 #define X(val, encode, name, cc_arg, scratch, preserved, stackptr, frameptr, \
1194 isGPR, isInt, isI64Pair, isFP32, isFP64, isVec128, alias_init) \
1195 (isFP32) \
1196 ? IceType_f32 \
1197 : ((isFP64) ? IceType_f64 : ((isVec128 ? IceType_v4i32 : IceType_i32))),
1198 REGARM32_TABLE
1199 #undef X
1200 };
1201
1202 if (Ty == IceType_void) {
1203 assert(unsigned(RegNum) < llvm::array_lengthof(DefaultType));
1204 Ty = DefaultType[RegNum];
1205 }
1206 if (PhysicalRegisters[Ty].empty())
1207 PhysicalRegisters[Ty].resize(RegARM32::Reg_NUM);
1208 assert(unsigned(RegNum) < PhysicalRegisters[Ty].size());
1209 Variable *Reg = PhysicalRegisters[Ty][RegNum];
1210 if (Reg == nullptr) {
1211 Reg = Func->makeVariable(Ty);
1212 Reg->setRegNum(RegNum);
1213 PhysicalRegisters[Ty][RegNum] = Reg;
1214 // Specially mark a named physical register as an "argument" so that it is
1215 // considered live upon function entry. Otherwise it's possible to get
1216 // liveness validation errors for saving callee-save registers.
1217 Func->addImplicitArg(Reg);
1218 // Don't bother tracking the live range of a named physical register.
1219 Reg->setIgnoreLiveness();
1220 }
1221 return Reg;
1222 }
1223
emitJumpTable(const Cfg * Func,const InstJumpTable * JumpTable) const1224 void TargetARM32::emitJumpTable(const Cfg *Func,
1225 const InstJumpTable *JumpTable) const {
1226 (void)Func;
1227 (void)JumpTable;
1228 UnimplementedError(getFlags());
1229 }
1230
emitVariable(const Variable * Var) const1231 void TargetARM32::emitVariable(const Variable *Var) const {
1232 if (!BuildDefs::dump())
1233 return;
1234 Ostream &Str = Ctx->getStrEmit();
1235 if (Var->hasReg()) {
1236 Str << getRegName(Var->getRegNum(), Var->getType());
1237 return;
1238 }
1239 if (Var->mustHaveReg()) {
1240 llvm::report_fatal_error("Infinite-weight Variable (" + Var->getName() +
1241 ") has no register assigned - function " +
1242 Func->getFunctionName());
1243 }
1244 assert(!Var->isRematerializable());
1245 int32_t Offset = Var->getStackOffset();
1246 auto BaseRegNum = Var->getBaseRegNum();
1247 if (BaseRegNum.hasNoValue()) {
1248 BaseRegNum = getFrameOrStackReg();
1249 }
1250 const Type VarTy = Var->getType();
1251 Str << "[" << getRegName(BaseRegNum, VarTy);
1252 if (Offset != 0) {
1253 Str << ", #" << Offset;
1254 }
1255 Str << "]";
1256 }
1257
CallingConv()1258 TargetARM32::CallingConv::CallingConv()
1259 : GPRegsUsed(RegARM32::Reg_NUM),
1260 GPRArgs(GPRArgInitializer.rbegin(), GPRArgInitializer.rend()),
1261 I64Args(I64ArgInitializer.rbegin(), I64ArgInitializer.rend()),
1262 VFPRegsUsed(RegARM32::Reg_NUM),
1263 FP32Args(FP32ArgInitializer.rbegin(), FP32ArgInitializer.rend()),
1264 FP64Args(FP64ArgInitializer.rbegin(), FP64ArgInitializer.rend()),
1265 Vec128Args(Vec128ArgInitializer.rbegin(), Vec128ArgInitializer.rend()) {}
1266
argInGPR(Type Ty,RegNumT * Reg)1267 bool TargetARM32::CallingConv::argInGPR(Type Ty, RegNumT *Reg) {
1268 CfgVector<RegNumT> *Source;
1269
1270 switch (Ty) {
1271 default: {
1272 assert(isScalarIntegerType(Ty));
1273 Source = &GPRArgs;
1274 } break;
1275 case IceType_i64: {
1276 Source = &I64Args;
1277 } break;
1278 }
1279
1280 discardUnavailableGPRsAndTheirAliases(Source);
1281
1282 if (Source->empty()) {
1283 GPRegsUsed.set();
1284 return false;
1285 }
1286
1287 *Reg = Source->back();
1288 // Note that we don't Source->pop_back() here. This is intentional. Notice how
1289 // we mark all of Reg's aliases as Used. So, for the next argument,
1290 // Source->back() is marked as unavailable, and it is thus implicitly popped
1291 // from the stack.
1292 GPRegsUsed |= RegisterAliases[*Reg];
1293 return true;
1294 }
1295
1296 // GPR are not packed when passing parameters. Thus, a function foo(i32, i64,
1297 // i32) will have the first argument in r0, the second in r1-r2, and the third
1298 // on the stack. To model this behavior, whenever we pop a register from Regs,
1299 // we remove all of its aliases from the pool of available GPRs. This has the
1300 // effect of computing the "closure" on the GPR registers.
discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> * Regs)1301 void TargetARM32::CallingConv::discardUnavailableGPRsAndTheirAliases(
1302 CfgVector<RegNumT> *Regs) {
1303 while (!Regs->empty() && GPRegsUsed[Regs->back()]) {
1304 GPRegsUsed |= RegisterAliases[Regs->back()];
1305 Regs->pop_back();
1306 }
1307 }
1308
argInVFP(Type Ty,RegNumT * Reg)1309 bool TargetARM32::CallingConv::argInVFP(Type Ty, RegNumT *Reg) {
1310 CfgVector<RegNumT> *Source;
1311
1312 switch (Ty) {
1313 default: {
1314 assert(isVectorType(Ty));
1315 Source = &Vec128Args;
1316 } break;
1317 case IceType_f32: {
1318 Source = &FP32Args;
1319 } break;
1320 case IceType_f64: {
1321 Source = &FP64Args;
1322 } break;
1323 }
1324
1325 discardUnavailableVFPRegs(Source);
1326
1327 if (Source->empty()) {
1328 VFPRegsUsed.set();
1329 return false;
1330 }
1331
1332 *Reg = Source->back();
1333 VFPRegsUsed |= RegisterAliases[*Reg];
1334 return true;
1335 }
1336
1337 // Arguments in VFP registers are not packed, so we don't mark the popped
1338 // registers' aliases as unavailable.
discardUnavailableVFPRegs(CfgVector<RegNumT> * Regs)1339 void TargetARM32::CallingConv::discardUnavailableVFPRegs(
1340 CfgVector<RegNumT> *Regs) {
1341 while (!Regs->empty() && VFPRegsUsed[Regs->back()]) {
1342 Regs->pop_back();
1343 }
1344 }
1345
lowerArguments()1346 void TargetARM32::lowerArguments() {
1347 VarList &Args = Func->getArgs();
1348 TargetARM32::CallingConv CC;
1349
1350 // For each register argument, replace Arg in the argument list with the home
1351 // register. Then generate an instruction in the prolog to copy the home
1352 // register to the assigned location of Arg.
1353 Context.init(Func->getEntryNode());
1354 Context.setInsertPoint(Context.getCur());
1355
1356 for (SizeT I = 0, E = Args.size(); I < E; ++I) {
1357 Variable *Arg = Args[I];
1358 Type Ty = Arg->getType();
1359 RegNumT RegNum;
1360 if (isScalarIntegerType(Ty)) {
1361 if (!CC.argInGPR(Ty, &RegNum)) {
1362 continue;
1363 }
1364 } else {
1365 if (!CC.argInVFP(Ty, &RegNum)) {
1366 continue;
1367 }
1368 }
1369
1370 Variable *RegisterArg = Func->makeVariable(Ty);
1371 if (BuildDefs::dump()) {
1372 RegisterArg->setName(Func, "home_reg:" + Arg->getName());
1373 }
1374 RegisterArg->setIsArg();
1375 Arg->setIsArg(false);
1376 Args[I] = RegisterArg;
1377 switch (Ty) {
1378 default: {
1379 RegisterArg->setRegNum(RegNum);
1380 } break;
1381 case IceType_i64: {
1382 auto *RegisterArg64 = llvm::cast<Variable64On32>(RegisterArg);
1383 RegisterArg64->initHiLo(Func);
1384 RegisterArg64->getLo()->setRegNum(
1385 RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(RegNum)));
1386 RegisterArg64->getHi()->setRegNum(
1387 RegNumT::fixme(RegARM32::getI64PairSecondGPRNum(RegNum)));
1388 } break;
1389 }
1390 Context.insert<InstAssign>(Arg, RegisterArg);
1391 }
1392 }
1393
1394 // Helper function for addProlog().
1395 //
1396 // This assumes Arg is an argument passed on the stack. This sets the frame
1397 // offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
1398 // I64 arg that has been split into Lo and Hi components, it calls itself
1399 // recursively on the components, taking care to handle Lo first because of the
1400 // little-endian architecture. Lastly, this function generates an instruction
1401 // to copy Arg into its assigned register if applicable.
finishArgumentLowering(Variable * Arg,Variable * FramePtr,size_t BasicFrameOffset,size_t * InArgsSizeBytes)1402 void TargetARM32::finishArgumentLowering(Variable *Arg, Variable *FramePtr,
1403 size_t BasicFrameOffset,
1404 size_t *InArgsSizeBytes) {
1405 const Type Ty = Arg->getType();
1406 *InArgsSizeBytes = applyStackAlignmentTy(*InArgsSizeBytes, Ty);
1407
1408 if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) {
1409 Variable *const Lo = Arg64On32->getLo();
1410 Variable *const Hi = Arg64On32->getHi();
1411 finishArgumentLowering(Lo, FramePtr, BasicFrameOffset, InArgsSizeBytes);
1412 finishArgumentLowering(Hi, FramePtr, BasicFrameOffset, InArgsSizeBytes);
1413 return;
1414 }
1415 assert(Ty != IceType_i64);
1416
1417 const int32_t ArgStackOffset = BasicFrameOffset + *InArgsSizeBytes;
1418 *InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
1419
1420 if (!Arg->hasReg()) {
1421 Arg->setStackOffset(ArgStackOffset);
1422 return;
1423 }
1424
1425 // If the argument variable has been assigned a register, we need to copy the
1426 // value from the stack slot.
1427 Variable *Parameter = Func->makeVariable(Ty);
1428 Parameter->setMustNotHaveReg();
1429 Parameter->setStackOffset(ArgStackOffset);
1430 _mov(Arg, Parameter);
1431 }
1432
stackSlotType()1433 Type TargetARM32::stackSlotType() { return IceType_i32; }
1434
addProlog(CfgNode * Node)1435 void TargetARM32::addProlog(CfgNode *Node) {
1436 // Stack frame layout:
1437 //
1438 // +------------------------+
1439 // | 1. preserved registers |
1440 // +------------------------+
1441 // | 2. padding |
1442 // +------------------------+ <--- FramePointer (if used)
1443 // | 3. global spill area |
1444 // +------------------------+
1445 // | 4. padding |
1446 // +------------------------+
1447 // | 5. local spill area |
1448 // +------------------------+
1449 // | 6. padding |
1450 // +------------------------+
1451 // | 7. allocas (variable) |
1452 // +------------------------+
1453 // | 8. padding |
1454 // +------------------------+
1455 // | 9. out args |
1456 // +------------------------+ <--- StackPointer
1457 //
1458 // The following variables record the size in bytes of the given areas:
1459 // * PreservedRegsSizeBytes: area 1
1460 // * SpillAreaPaddingBytes: area 2
1461 // * GlobalsSize: area 3
1462 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4
1463 // * LocalsSpillAreaSize: area 5
1464 // * SpillAreaSizeBytes: areas 2 - 6, and 9
1465 // * MaxOutArgsSizeBytes: area 9
1466 //
1467 // Determine stack frame offsets for each Variable without a register
1468 // assignment. This can be done as one variable per stack slot. Or, do
1469 // coalescing by running the register allocator again with an infinite set of
1470 // registers (as a side effect, this gives variables a second chance at
1471 // physical register assignment).
1472 //
1473 // A middle ground approach is to leverage sparsity and allocate one block of
1474 // space on the frame for globals (variables with multi-block lifetime), and
1475 // one block to share for locals (single-block lifetime).
1476
1477 Context.init(Node);
1478 Context.setInsertPoint(Context.getCur());
1479
1480 SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None);
1481 RegsUsed = SmallBitVector(CalleeSaves.size());
1482 VarList SortedSpilledVariables;
1483 size_t GlobalsSize = 0;
1484 // If there is a separate locals area, this represents that area. Otherwise
1485 // it counts any variable not counted by GlobalsSize.
1486 SpillAreaSizeBytes = 0;
1487 // If there is a separate locals area, this specifies the alignment for it.
1488 uint32_t LocalsSlotsAlignmentBytes = 0;
1489 // The entire spill locations area gets aligned to largest natural alignment
1490 // of the variables that have a spill slot.
1491 uint32_t SpillAreaAlignmentBytes = 0;
1492 // For now, we don't have target-specific variables that need special
1493 // treatment (no stack-slot-linked SpillVariable type).
1494 std::function<bool(Variable *)> TargetVarHook = [](Variable *Var) {
1495 static constexpr bool AssignStackSlot = false;
1496 static constexpr bool DontAssignStackSlot = !AssignStackSlot;
1497 if (llvm::isa<Variable64On32>(Var)) {
1498 return DontAssignStackSlot;
1499 }
1500 return AssignStackSlot;
1501 };
1502
1503 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
1504 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
1505 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
1506 &LocalsSlotsAlignmentBytes, TargetVarHook);
1507 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
1508 SpillAreaSizeBytes += GlobalsSize;
1509
1510 // Add push instructions for preserved registers. On ARM, "push" can push a
1511 // whole list of GPRs via a bitmask (0-15). Unlike x86, ARM also has
1512 // callee-saved float/vector registers.
1513 //
1514 // The "vpush" instruction can handle a whole list of float/vector registers,
1515 // but it only handles contiguous sequences of registers by specifying the
1516 // start and the length.
1517 PreservedGPRs.reserve(CalleeSaves.size());
1518 PreservedSRegs.reserve(CalleeSaves.size());
1519
1520 // Consider FP and LR as callee-save / used as needed.
1521 if (UsesFramePointer) {
1522 if (RegsUsed[RegARM32::Reg_fp]) {
1523 llvm::report_fatal_error("Frame pointer has been used.");
1524 }
1525 CalleeSaves[RegARM32::Reg_fp] = true;
1526 RegsUsed[RegARM32::Reg_fp] = true;
1527 }
1528 if (!MaybeLeafFunc) {
1529 CalleeSaves[RegARM32::Reg_lr] = true;
1530 RegsUsed[RegARM32::Reg_lr] = true;
1531 }
1532
1533 // Make two passes over the used registers. The first pass records all the
1534 // used registers -- and their aliases. Then, we figure out which GPRs and
1535 // VFP S registers should be saved. We don't bother saving D/Q registers
1536 // because their uses are recorded as S regs uses.
1537 SmallBitVector ToPreserve(RegARM32::Reg_NUM);
1538 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
1539 if (NeedSandboxing && i == RegARM32::Reg_r9) {
1540 // r9 is never updated in sandboxed code.
1541 continue;
1542 }
1543 if (CalleeSaves[i] && RegsUsed[i]) {
1544 ToPreserve |= RegisterAliases[i];
1545 }
1546 }
1547
1548 uint32_t NumCallee = 0;
1549 size_t PreservedRegsSizeBytes = 0;
1550
1551 // RegClasses is a tuple of
1552 //
1553 // <First Register in Class, Last Register in Class, Vector of Save Registers>
1554 //
1555 // We use this tuple to figure out which register we should push/pop during
1556 // prolog/epilog.
1557 using RegClassType = std::tuple<uint32_t, uint32_t, VarList *>;
1558 const RegClassType RegClasses[] = {
1559 RegClassType(RegARM32::Reg_GPR_First, RegARM32::Reg_GPR_Last,
1560 &PreservedGPRs),
1561 RegClassType(RegARM32::Reg_SREG_First, RegARM32::Reg_SREG_Last,
1562 &PreservedSRegs)};
1563 for (const auto &RegClass : RegClasses) {
1564 const uint32_t FirstRegInClass = std::get<0>(RegClass);
1565 const uint32_t LastRegInClass = std::get<1>(RegClass);
1566 VarList *const PreservedRegsInClass = std::get<2>(RegClass);
1567 for (uint32_t Reg = FirstRegInClass; Reg <= LastRegInClass; ++Reg) {
1568 if (!ToPreserve[Reg]) {
1569 continue;
1570 }
1571 ++NumCallee;
1572 Variable *PhysicalRegister = getPhysicalRegister(RegNumT::fromInt(Reg));
1573 PreservedRegsSizeBytes +=
1574 typeWidthInBytesOnStack(PhysicalRegister->getType());
1575 PreservedRegsInClass->push_back(PhysicalRegister);
1576 }
1577 }
1578
1579 Ctx->statsUpdateRegistersSaved(NumCallee);
1580 if (!PreservedSRegs.empty())
1581 _push(PreservedSRegs);
1582 if (!PreservedGPRs.empty())
1583 _push(PreservedGPRs);
1584
1585 // Generate "mov FP, SP" if needed.
1586 if (UsesFramePointer) {
1587 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
1588 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1589 _mov(FP, SP);
1590 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
1591 Context.insert<InstFakeUse>(FP);
1592 }
1593
1594 // Align the variables area. SpillAreaPaddingBytes is the size of the region
1595 // after the preserved registers and before the spill areas.
1596 // LocalsSlotsPaddingBytes is the amount of padding between the globals and
1597 // locals area if they are separate.
1598 assert(SpillAreaAlignmentBytes <= ARM32_STACK_ALIGNMENT_BYTES);
1599 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
1600 uint32_t SpillAreaPaddingBytes = 0;
1601 uint32_t LocalsSlotsPaddingBytes = 0;
1602 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
1603 GlobalsSize, LocalsSlotsAlignmentBytes,
1604 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
1605 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
1606 uint32_t GlobalsAndSubsequentPaddingSize =
1607 GlobalsSize + LocalsSlotsPaddingBytes;
1608
1609 // Adds the out args space to the stack, and align SP if necessary.
1610 if (!NeedsStackAlignment) {
1611 SpillAreaSizeBytes += MaxOutArgsSizeBytes;
1612 } else {
1613 uint32_t StackOffset = PreservedRegsSizeBytes;
1614 uint32_t StackSize = applyStackAlignment(StackOffset + SpillAreaSizeBytes);
1615 StackSize = applyStackAlignment(StackSize + MaxOutArgsSizeBytes);
1616 SpillAreaSizeBytes = StackSize - StackOffset;
1617 }
1618
1619 // Combine fixed alloca with SpillAreaSize.
1620 SpillAreaSizeBytes += FixedAllocaSizeBytes;
1621
1622 // Generate "sub sp, SpillAreaSizeBytes"
1623 if (SpillAreaSizeBytes) {
1624 // Use the scratch register if needed to legalize the immediate.
1625 Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
1626 Legal_Reg | Legal_Flex, getReservedTmpReg());
1627 Sandboxer(this).sub_sp(SubAmount);
1628 if (FixedAllocaAlignBytes > ARM32_STACK_ALIGNMENT_BYTES) {
1629 Sandboxer(this).align_sp(FixedAllocaAlignBytes);
1630 }
1631 }
1632
1633 Ctx->statsUpdateFrameBytes(SpillAreaSizeBytes);
1634
1635 // Fill in stack offsets for stack args, and copy args into registers for
1636 // those that were register-allocated. Args are pushed right to left, so
1637 // Arg[0] is closest to the stack/frame pointer.
1638 Variable *FramePtr = getPhysicalRegister(getFrameOrStackReg());
1639 size_t BasicFrameOffset = PreservedRegsSizeBytes;
1640 if (!UsesFramePointer)
1641 BasicFrameOffset += SpillAreaSizeBytes;
1642
1643 materializeGotAddr(Node);
1644
1645 const VarList &Args = Func->getArgs();
1646 size_t InArgsSizeBytes = 0;
1647 TargetARM32::CallingConv CC;
1648 for (Variable *Arg : Args) {
1649 RegNumT DummyReg;
1650 const Type Ty = Arg->getType();
1651
1652 // Skip arguments passed in registers.
1653 if (isScalarIntegerType(Ty)) {
1654 if (CC.argInGPR(Ty, &DummyReg)) {
1655 continue;
1656 }
1657 } else {
1658 if (CC.argInVFP(Ty, &DummyReg)) {
1659 continue;
1660 }
1661 }
1662 finishArgumentLowering(Arg, FramePtr, BasicFrameOffset, &InArgsSizeBytes);
1663 }
1664
1665 // Fill in stack offsets for locals.
1666 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
1667 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize,
1668 UsesFramePointer);
1669 this->HasComputedFrame = true;
1670
1671 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
1672 OstreamLocker _(Func->getContext());
1673 Ostream &Str = Func->getContext()->getStrDump();
1674
1675 Str << "Stack layout:\n";
1676 uint32_t SPAdjustmentPaddingSize =
1677 SpillAreaSizeBytes - LocalsSpillAreaSize -
1678 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -
1679 MaxOutArgsSizeBytes;
1680 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
1681 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
1682 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
1683 << " globals spill area = " << GlobalsSize << " bytes\n"
1684 << " globals-locals spill areas intermediate padding = "
1685 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
1686 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
1687 << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";
1688
1689 Str << "Stack details:\n"
1690 << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
1691 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
1692 << " outgoing args size = " << MaxOutArgsSizeBytes << " bytes\n"
1693 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
1694 << " bytes\n"
1695 << " is FP based = " << UsesFramePointer << "\n";
1696 }
1697 }
1698
addEpilog(CfgNode * Node)1699 void TargetARM32::addEpilog(CfgNode *Node) {
1700 InstList &Insts = Node->getInsts();
1701 InstList::reverse_iterator RI, E;
1702 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
1703 if (llvm::isa<InstARM32Ret>(*RI))
1704 break;
1705 }
1706 if (RI == E)
1707 return;
1708
1709 // Convert the reverse_iterator position into its corresponding (forward)
1710 // iterator position.
1711 InstList::iterator InsertPoint = reverseToForwardIterator(RI);
1712 --InsertPoint;
1713 Context.init(Node);
1714 Context.setInsertPoint(InsertPoint);
1715
1716 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
1717 if (UsesFramePointer) {
1718 Variable *FP = getPhysicalRegister(RegARM32::Reg_fp);
1719 // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
1720 // use of SP before the assignment of SP=FP keeps previous SP adjustments
1721 // from being dead-code eliminated.
1722 Context.insert<InstFakeUse>(SP);
1723 Sandboxer(this).reset_sp(FP);
1724 } else {
1725 // add SP, SpillAreaSizeBytes
1726 if (SpillAreaSizeBytes) {
1727 // Use the scratch register if needed to legalize the immediate.
1728 Operand *AddAmount =
1729 legalize(Ctx->getConstantInt32(SpillAreaSizeBytes),
1730 Legal_Reg | Legal_Flex, getReservedTmpReg());
1731 Sandboxer(this).add_sp(AddAmount);
1732 }
1733 }
1734
1735 if (!PreservedGPRs.empty())
1736 _pop(PreservedGPRs);
1737 if (!PreservedSRegs.empty())
1738 _pop(PreservedSRegs);
1739
1740 if (!getFlags().getUseSandboxing())
1741 return;
1742
1743 // Change the original ret instruction into a sandboxed return sequence.
1744 //
1745 // bundle_lock
1746 // bic lr, #0xc000000f
1747 // bx lr
1748 // bundle_unlock
1749 //
1750 // This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to
1751 // restrict to the lower 1GB as well.
1752 Variable *LR = getPhysicalRegister(RegARM32::Reg_lr);
1753 Variable *RetValue = nullptr;
1754 if (RI->getSrcSize())
1755 RetValue = llvm::cast<Variable>(RI->getSrc(0));
1756
1757 Sandboxer(this).ret(LR, RetValue);
1758
1759 RI->setDeleted();
1760 }
1761
isLegalMemOffset(Type Ty,int32_t Offset) const1762 bool TargetARM32::isLegalMemOffset(Type Ty, int32_t Offset) const {
1763 constexpr bool ZeroExt = false;
1764 return OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset);
1765 }
1766
newBaseRegister(Variable * Base,int32_t Offset,RegNumT ScratchRegNum)1767 Variable *TargetARM32::PostLoweringLegalizer::newBaseRegister(
1768 Variable *Base, int32_t Offset, RegNumT ScratchRegNum) {
1769 // Legalize will likely need a movw/movt combination, but if the top bits are
1770 // all 0 from negating the offset and subtracting, we could use that instead.
1771 const bool ShouldSub = Offset != 0 && (-Offset & 0xFFFF0000) == 0;
1772 Variable *ScratchReg = Target->makeReg(IceType_i32, ScratchRegNum);
1773 if (ShouldSub) {
1774 Operand *OffsetVal =
1775 Target->legalize(Target->Ctx->getConstantInt32(-Offset),
1776 Legal_Reg | Legal_Flex, ScratchRegNum);
1777 Target->_sub(ScratchReg, Base, OffsetVal);
1778 } else {
1779 Operand *OffsetVal =
1780 Target->legalize(Target->Ctx->getConstantInt32(Offset),
1781 Legal_Reg | Legal_Flex, ScratchRegNum);
1782 Target->_add(ScratchReg, Base, OffsetVal);
1783 }
1784
1785 if (ScratchRegNum == Target->getReservedTmpReg()) {
1786 const bool BaseIsStackOrFramePtr =
1787 Base->getRegNum() == Target->getFrameOrStackReg();
1788 // There is currently no code path that would trigger this assertion, so we
1789 // leave this assertion here in case it is ever violated. This is not a
1790 // fatal error (thus the use of assert() and not llvm::report_fatal_error)
1791 // as the program compiled by subzero will still work correctly.
1792 assert(BaseIsStackOrFramePtr);
1793 // Side-effect: updates TempBase to reflect the new Temporary.
1794 if (BaseIsStackOrFramePtr) {
1795 TempBaseReg = ScratchReg;
1796 TempBaseOffset = Offset;
1797 } else {
1798 TempBaseReg = nullptr;
1799 TempBaseOffset = 0;
1800 }
1801 }
1802
1803 return ScratchReg;
1804 }
1805
createMemOperand(Type Ty,Variable * Base,int32_t Offset,bool AllowOffsets)1806 OperandARM32Mem *TargetARM32::PostLoweringLegalizer::createMemOperand(
1807 Type Ty, Variable *Base, int32_t Offset, bool AllowOffsets) {
1808 assert(!Base->isRematerializable());
1809 if (Offset == 0 || (AllowOffsets && Target->isLegalMemOffset(Ty, Offset))) {
1810 return OperandARM32Mem::create(
1811 Target->Func, Ty, Base,
1812 llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)),
1813 OperandARM32Mem::Offset);
1814 }
1815
1816 if (!AllowOffsets || TempBaseReg == nullptr) {
1817 newBaseRegister(Base, Offset, Target->getReservedTmpReg());
1818 }
1819
1820 int32_t OffsetDiff = Offset - TempBaseOffset;
1821 assert(AllowOffsets || OffsetDiff == 0);
1822
1823 if (!Target->isLegalMemOffset(Ty, OffsetDiff)) {
1824 newBaseRegister(Base, Offset, Target->getReservedTmpReg());
1825 OffsetDiff = 0;
1826 }
1827
1828 assert(!TempBaseReg->isRematerializable());
1829 return OperandARM32Mem::create(
1830 Target->Func, Ty, TempBaseReg,
1831 llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(OffsetDiff)),
1832 OperandARM32Mem::Offset);
1833 }
1834
resetTempBaseIfClobberedBy(const Inst * Instr)1835 void TargetARM32::PostLoweringLegalizer::resetTempBaseIfClobberedBy(
1836 const Inst *Instr) {
1837 bool ClobbersTempBase = false;
1838 if (TempBaseReg != nullptr) {
1839 Variable *Dest = Instr->getDest();
1840 if (llvm::isa<InstARM32Call>(Instr)) {
1841 // The following assertion is an invariant, so we remove it from the if
1842 // test. If the invariant is ever broken/invalidated/changed, remember
1843 // to add it back to the if condition.
1844 assert(TempBaseReg->getRegNum() == Target->getReservedTmpReg());
1845 // The linker may need to clobber IP if the call is too far from PC. Thus,
1846 // we assume IP will be overwritten.
1847 ClobbersTempBase = true;
1848 } else if (Dest != nullptr &&
1849 Dest->getRegNum() == TempBaseReg->getRegNum()) {
1850 // Register redefinition.
1851 ClobbersTempBase = true;
1852 }
1853 }
1854
1855 if (ClobbersTempBase) {
1856 TempBaseReg = nullptr;
1857 TempBaseOffset = 0;
1858 }
1859 }
1860
legalizeMov(InstARM32Mov * MovInstr)1861 void TargetARM32::PostLoweringLegalizer::legalizeMov(InstARM32Mov *MovInstr) {
1862 Variable *Dest = MovInstr->getDest();
1863 assert(Dest != nullptr);
1864 Type DestTy = Dest->getType();
1865 assert(DestTy != IceType_i64);
1866
1867 Operand *Src = MovInstr->getSrc(0);
1868 Type SrcTy = Src->getType();
1869 (void)SrcTy;
1870 assert(SrcTy != IceType_i64);
1871
1872 if (MovInstr->isMultiDest() || MovInstr->isMultiSource())
1873 return;
1874
1875 bool Legalized = false;
1876 if (!Dest->hasReg()) {
1877 auto *SrcR = llvm::cast<Variable>(Src);
1878 assert(SrcR->hasReg());
1879 assert(!SrcR->isRematerializable());
1880 const int32_t Offset = Dest->getStackOffset();
1881 // This is a _mov(Mem(), Variable), i.e., a store.
1882 TargetARM32::Sandboxer(Target).str(
1883 SrcR, createMemOperand(DestTy, StackOrFrameReg, Offset),
1884 MovInstr->getPredicate());
1885 // _str() does not have a Dest, so we add a fake-def(Dest).
1886 Target->Context.insert<InstFakeDef>(Dest);
1887 Legalized = true;
1888 } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) {
1889 if (Var->isRematerializable()) {
1890 // This is equivalent to an x86 _lea(RematOffset(%esp/%ebp), Variable).
1891
1892 // ExtraOffset is only needed for frame-pointer based frames as we have
1893 // to account for spill storage.
1894 const int32_t ExtraOffset = (Var->getRegNum() == Target->getFrameReg())
1895 ? Target->getFrameFixedAllocaOffset()
1896 : 0;
1897
1898 const int32_t Offset = Var->getStackOffset() + ExtraOffset;
1899 Variable *Base = Target->getPhysicalRegister(Var->getRegNum());
1900 Variable *T = newBaseRegister(Base, Offset, Dest->getRegNum());
1901 Target->_mov(Dest, T);
1902 Legalized = true;
1903 } else {
1904 if (!Var->hasReg()) {
1905 // This is a _mov(Variable, Mem()), i.e., a load.
1906 const int32_t Offset = Var->getStackOffset();
1907 TargetARM32::Sandboxer(Target).ldr(
1908 Dest, createMemOperand(DestTy, StackOrFrameReg, Offset),
1909 MovInstr->getPredicate());
1910 Legalized = true;
1911 }
1912 }
1913 }
1914
1915 if (Legalized) {
1916 if (MovInstr->isDestRedefined()) {
1917 Target->_set_dest_redefined();
1918 }
1919 MovInstr->setDeleted();
1920 }
1921 }
1922
1923 // ARM32 address modes:
1924 // ld/st i[8|16|32]: [reg], [reg +/- imm12], [pc +/- imm12],
1925 // [reg +/- reg << shamt5]
1926 // ld/st f[32|64] : [reg], [reg +/- imm8] , [pc +/- imm8]
1927 // ld/st vectors : [reg]
1928 //
1929 // For now, we don't handle address modes with Relocatables.
1930 namespace {
1931 // MemTraits contains per-type valid address mode information.
1932 #define X(tag, elementty, int_width, fp_width, uvec_width, svec_width, sbits, \
1933 ubits, rraddr, shaddr) \
1934 static_assert(!(shaddr) || rraddr, "Check ICETYPEARM32_TABLE::" #tag);
1935 ICETYPEARM32_TABLE
1936 #undef X
1937
1938 static const struct {
1939 int32_t ValidImmMask;
1940 bool CanHaveImm;
1941 bool CanHaveIndex;
1942 bool CanHaveShiftedIndex;
1943 } MemTraits[] = {
1944 #define X(tag, elementty, int_width, fp_width, uvec_width, svec_width, sbits, \
1945 ubits, rraddr, shaddr) \
1946 { \
1947 (1 << ubits) - 1, \
1948 (ubits) > 0, \
1949 rraddr, \
1950 shaddr, \
1951 },
1952 ICETYPEARM32_TABLE
1953 #undef X
1954 };
1955 static constexpr SizeT MemTraitsSize = llvm::array_lengthof(MemTraits);
1956 } // end of anonymous namespace
1957
1958 OperandARM32Mem *
legalizeMemOperand(OperandARM32Mem * Mem,bool AllowOffsets)1959 TargetARM32::PostLoweringLegalizer::legalizeMemOperand(OperandARM32Mem *Mem,
1960 bool AllowOffsets) {
1961 assert(!Mem->isRegReg() || !Mem->getIndex()->isRematerializable());
1962 assert(Mem->isRegReg() || Target->isLegalMemOffset(
1963 Mem->getType(), Mem->getOffset()->getValue()));
1964
1965 bool Legalized = false;
1966 Variable *Base = Mem->getBase();
1967 int32_t Offset = Mem->isRegReg() ? 0 : Mem->getOffset()->getValue();
1968 if (Base->isRematerializable()) {
1969 const int32_t ExtraOffset = (Base->getRegNum() == Target->getFrameReg())
1970 ? Target->getFrameFixedAllocaOffset()
1971 : 0;
1972 Offset += Base->getStackOffset() + ExtraOffset;
1973 Base = Target->getPhysicalRegister(Base->getRegNum());
1974 assert(!Base->isRematerializable());
1975 Legalized = true;
1976 }
1977
1978 if (!Legalized && !Target->NeedSandboxing) {
1979 return nullptr;
1980 }
1981
1982 if (!Mem->isRegReg()) {
1983 return createMemOperand(Mem->getType(), Base, Offset, AllowOffsets);
1984 }
1985
1986 if (Target->NeedSandboxing) {
1987 llvm::report_fatal_error("Reg-Reg address mode is not allowed.");
1988 }
1989
1990 assert(MemTraits[Mem->getType()].CanHaveIndex);
1991
1992 if (Offset != 0) {
1993 if (TempBaseReg == nullptr) {
1994 Base = newBaseRegister(Base, Offset, Target->getReservedTmpReg());
1995 } else {
1996 uint32_t Imm8, Rotate;
1997 const int32_t OffsetDiff = Offset - TempBaseOffset;
1998 if (OffsetDiff == 0) {
1999 Base = TempBaseReg;
2000 } else if (OperandARM32FlexImm::canHoldImm(OffsetDiff, &Rotate, &Imm8)) {
2001 auto *OffsetDiffF = OperandARM32FlexImm::create(
2002 Target->Func, IceType_i32, Imm8, Rotate);
2003 Target->_add(TempBaseReg, TempBaseReg, OffsetDiffF);
2004 TempBaseOffset += OffsetDiff;
2005 Base = TempBaseReg;
2006 } else if (OperandARM32FlexImm::canHoldImm(-OffsetDiff, &Rotate, &Imm8)) {
2007 auto *OffsetDiffF = OperandARM32FlexImm::create(
2008 Target->Func, IceType_i32, Imm8, Rotate);
2009 Target->_sub(TempBaseReg, TempBaseReg, OffsetDiffF);
2010 TempBaseOffset += OffsetDiff;
2011 Base = TempBaseReg;
2012 } else {
2013 Base = newBaseRegister(Base, Offset, Target->getReservedTmpReg());
2014 }
2015 }
2016 }
2017
2018 return OperandARM32Mem::create(Target->Func, Mem->getType(), Base,
2019 Mem->getIndex(), Mem->getShiftOp(),
2020 Mem->getShiftAmt(), Mem->getAddrMode());
2021 }
2022
postLowerLegalization()2023 void TargetARM32::postLowerLegalization() {
2024 // If a stack variable's frame offset doesn't fit, convert from:
2025 // ldr X, OFF[SP]
2026 // to:
2027 // movw/movt TMP, OFF_PART
2028 // add TMP, TMP, SP
2029 // ldr X, OFF_MORE[TMP]
2030 //
2031 // This is safe because we have reserved TMP, and add for ARM does not
2032 // clobber the flags register.
2033 Func->dump("Before postLowerLegalization");
2034 assert(hasComputedFrame());
2035 // Do a fairly naive greedy clustering for now. Pick the first stack slot
2036 // that's out of bounds and make a new base reg using the architecture's temp
2037 // register. If that works for the next slot, then great. Otherwise, create a
2038 // new base register, clobbering the previous base register. Never share a
2039 // base reg across different basic blocks. This isn't ideal if local and
2040 // multi-block variables are far apart and their references are interspersed.
2041 // It may help to be more coordinated about assign stack slot numbers and may
2042 // help to assign smaller offsets to higher-weight variables so that they
2043 // don't depend on this legalization.
2044 for (CfgNode *Node : Func->getNodes()) {
2045 Context.init(Node);
2046 // One legalizer per basic block, otherwise we would share the Temporary
2047 // Base Register between basic blocks.
2048 PostLoweringLegalizer Legalizer(this);
2049 while (!Context.atEnd()) {
2050 PostIncrLoweringContext PostIncrement(Context);
2051 Inst *CurInstr = iteratorToInst(Context.getCur());
2052
2053 // Check if the previous TempBaseReg is clobbered, and reset if needed.
2054 Legalizer.resetTempBaseIfClobberedBy(CurInstr);
2055
2056 if (auto *MovInstr = llvm::dyn_cast<InstARM32Mov>(CurInstr)) {
2057 Legalizer.legalizeMov(MovInstr);
2058 } else if (auto *LdrInstr = llvm::dyn_cast<InstARM32Ldr>(CurInstr)) {
2059 if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
2060 llvm::cast<OperandARM32Mem>(LdrInstr->getSrc(0)))) {
2061 Sandboxer(this).ldr(CurInstr->getDest(), LegalMem,
2062 LdrInstr->getPredicate());
2063 CurInstr->setDeleted();
2064 }
2065 } else if (auto *LdrexInstr = llvm::dyn_cast<InstARM32Ldrex>(CurInstr)) {
2066 constexpr bool DisallowOffsetsBecauseLdrex = false;
2067 if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
2068 llvm::cast<OperandARM32Mem>(LdrexInstr->getSrc(0)),
2069 DisallowOffsetsBecauseLdrex)) {
2070 Sandboxer(this).ldrex(CurInstr->getDest(), LegalMem,
2071 LdrexInstr->getPredicate());
2072 CurInstr->setDeleted();
2073 }
2074 } else if (auto *StrInstr = llvm::dyn_cast<InstARM32Str>(CurInstr)) {
2075 if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
2076 llvm::cast<OperandARM32Mem>(StrInstr->getSrc(1)))) {
2077 Sandboxer(this).str(llvm::cast<Variable>(CurInstr->getSrc(0)),
2078 LegalMem, StrInstr->getPredicate());
2079 CurInstr->setDeleted();
2080 }
2081 } else if (auto *StrexInstr = llvm::dyn_cast<InstARM32Strex>(CurInstr)) {
2082 constexpr bool DisallowOffsetsBecauseStrex = false;
2083 if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand(
2084 llvm::cast<OperandARM32Mem>(StrexInstr->getSrc(1)),
2085 DisallowOffsetsBecauseStrex)) {
2086 Sandboxer(this).strex(CurInstr->getDest(),
2087 llvm::cast<Variable>(CurInstr->getSrc(0)),
2088 LegalMem, StrexInstr->getPredicate());
2089 CurInstr->setDeleted();
2090 }
2091 }
2092
2093 // Sanity-check: the Legalizer will either have no Temp, or it will be
2094 // bound to IP.
2095 Legalizer.assertNoTempOrAssignedToIP();
2096 }
2097 }
2098 }
2099
loOperand(Operand * Operand)2100 Operand *TargetARM32::loOperand(Operand *Operand) {
2101 assert(Operand->getType() == IceType_i64);
2102 if (Operand->getType() != IceType_i64)
2103 return Operand;
2104 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
2105 return Var64On32->getLo();
2106 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand))
2107 return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
2108 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
2109 // Conservatively disallow memory operands with side-effects (pre/post
2110 // increment) in case of duplication.
2111 assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
2112 Mem->getAddrMode() == OperandARM32Mem::NegOffset);
2113 if (Mem->isRegReg()) {
2114 Variable *IndexR = legalizeToReg(Mem->getIndex());
2115 return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(), IndexR,
2116 Mem->getShiftOp(), Mem->getShiftAmt(),
2117 Mem->getAddrMode());
2118 } else {
2119 return OperandARM32Mem::create(Func, IceType_i32, Mem->getBase(),
2120 Mem->getOffset(), Mem->getAddrMode());
2121 }
2122 }
2123 llvm::report_fatal_error("Unsupported operand type");
2124 return nullptr;
2125 }
2126
hiOperand(Operand * Operand)2127 Operand *TargetARM32::hiOperand(Operand *Operand) {
2128 assert(Operand->getType() == IceType_i64);
2129 if (Operand->getType() != IceType_i64)
2130 return Operand;
2131 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
2132 return Var64On32->getHi();
2133 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
2134 return Ctx->getConstantInt32(
2135 static_cast<uint32_t>(Const->getValue() >> 32));
2136 }
2137 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand)) {
2138 // Conservatively disallow memory operands with side-effects in case of
2139 // duplication.
2140 assert(Mem->getAddrMode() == OperandARM32Mem::Offset ||
2141 Mem->getAddrMode() == OperandARM32Mem::NegOffset);
2142 const Type SplitType = IceType_i32;
2143 if (Mem->isRegReg()) {
2144 // We have to make a temp variable T, and add 4 to either Base or Index.
2145 // The Index may be shifted, so adding 4 can mean something else. Thus,
2146 // prefer T := Base + 4, and use T as the new Base.
2147 Variable *Base = Mem->getBase();
2148 Constant *Four = Ctx->getConstantInt32(4);
2149 Variable *NewBase = Func->makeVariable(Base->getType());
2150 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
2151 Base, Four));
2152 Variable *BaseR = legalizeToReg(NewBase);
2153 Variable *IndexR = legalizeToReg(Mem->getIndex());
2154 return OperandARM32Mem::create(Func, SplitType, BaseR, IndexR,
2155 Mem->getShiftOp(), Mem->getShiftAmt(),
2156 Mem->getAddrMode());
2157 } else {
2158 Variable *Base = Mem->getBase();
2159 ConstantInteger32 *Offset = Mem->getOffset();
2160 assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
2161 int32_t NextOffsetVal = Offset->getValue() + 4;
2162 constexpr bool ZeroExt = false;
2163 if (!OperandARM32Mem::canHoldOffset(SplitType, ZeroExt, NextOffsetVal)) {
2164 // We have to make a temp variable and add 4 to either Base or Offset.
2165 // If we add 4 to Offset, this will convert a non-RegReg addressing
2166 // mode into a RegReg addressing mode. Since NaCl sandboxing disallows
2167 // RegReg addressing modes, prefer adding to base and replacing
2168 // instead. Thus we leave the old offset alone.
2169 Constant *_4 = Ctx->getConstantInt32(4);
2170 Variable *NewBase = Func->makeVariable(Base->getType());
2171 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add,
2172 NewBase, Base, _4));
2173 Base = NewBase;
2174 } else {
2175 Offset =
2176 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
2177 }
2178 Variable *BaseR = legalizeToReg(Base);
2179 return OperandARM32Mem::create(Func, SplitType, BaseR, Offset,
2180 Mem->getAddrMode());
2181 }
2182 }
2183 llvm::report_fatal_error("Unsupported operand type");
2184 return nullptr;
2185 }
2186
getRegisterSet(RegSetMask Include,RegSetMask Exclude) const2187 SmallBitVector TargetARM32::getRegisterSet(RegSetMask Include,
2188 RegSetMask Exclude) const {
2189 SmallBitVector Registers(RegARM32::Reg_NUM);
2190
2191 for (uint32_t i = 0; i < RegARM32::Reg_NUM; ++i) {
2192 const auto &Entry = RegARM32::RegTable[i];
2193 if (Entry.Scratch && (Include & RegSet_CallerSave))
2194 Registers[i] = true;
2195 if (Entry.Preserved && (Include & RegSet_CalleeSave))
2196 Registers[i] = true;
2197 if (Entry.StackPtr && (Include & RegSet_StackPointer))
2198 Registers[i] = true;
2199 if (Entry.FramePtr && (Include & RegSet_FramePointer))
2200 Registers[i] = true;
2201 if (Entry.Scratch && (Exclude & RegSet_CallerSave))
2202 Registers[i] = false;
2203 if (Entry.Preserved && (Exclude & RegSet_CalleeSave))
2204 Registers[i] = false;
2205 if (Entry.StackPtr && (Exclude & RegSet_StackPointer))
2206 Registers[i] = false;
2207 if (Entry.FramePtr && (Exclude & RegSet_FramePointer))
2208 Registers[i] = false;
2209 }
2210
2211 return Registers;
2212 }
2213
lowerAlloca(const InstAlloca * Instr)2214 void TargetARM32::lowerAlloca(const InstAlloca *Instr) {
2215 // Conservatively require the stack to be aligned. Some stack adjustment
2216 // operations implemented below assume that the stack is aligned before the
2217 // alloca. All the alloca code ensures that the stack alignment is preserved
2218 // after the alloca. The stack alignment restriction can be relaxed in some
2219 // cases.
2220 NeedsStackAlignment = true;
2221
2222 // For default align=0, set it to the real value 1, to avoid any
2223 // bit-manipulation problems below.
2224 const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes());
2225
2226 // LLVM enforces power of 2 alignment.
2227 assert(llvm::isPowerOf2_32(AlignmentParam));
2228 assert(llvm::isPowerOf2_32(ARM32_STACK_ALIGNMENT_BYTES));
2229
2230 const uint32_t Alignment =
2231 std::max(AlignmentParam, ARM32_STACK_ALIGNMENT_BYTES);
2232 const bool OverAligned = Alignment > ARM32_STACK_ALIGNMENT_BYTES;
2233 const bool OptM1 = Func->getOptLevel() == Opt_m1;
2234 const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset();
2235 const bool UseFramePointer =
2236 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
2237
2238 if (UseFramePointer)
2239 setHasFramePointer();
2240
2241 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
2242 if (OverAligned) {
2243 Sandboxer(this).align_sp(Alignment);
2244 }
2245
2246 Variable *Dest = Instr->getDest();
2247 Operand *TotalSize = Instr->getSizeInBytes();
2248
2249 if (const auto *ConstantTotalSize =
2250 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
2251 const uint32_t Value =
2252 Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);
2253 // Constant size alloca.
2254 if (!UseFramePointer) {
2255 // If we don't need a Frame Pointer, this alloca has a known offset to the
2256 // stack pointer. We don't need adjust the stack pointer, nor assign any
2257 // value to Dest, as Dest is rematerializable.
2258 assert(Dest->isRematerializable());
2259 FixedAllocaSizeBytes += Value;
2260 Context.insert<InstFakeDef>(Dest);
2261 return;
2262 }
2263
2264 // If a frame pointer is required, then we need to store the alloca'd result
2265 // in Dest.
2266 Operand *SubAmountRF =
2267 legalize(Ctx->getConstantInt32(Value), Legal_Reg | Legal_Flex);
2268 Sandboxer(this).sub_sp(SubAmountRF);
2269 } else {
2270 // Non-constant sizes need to be adjusted to the next highest multiple of
2271 // the required alignment at runtime.
2272 TotalSize = legalize(TotalSize, Legal_Reg | Legal_Flex);
2273 Variable *T = makeReg(IceType_i32);
2274 _mov(T, TotalSize);
2275 Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1));
2276 _add(T, T, AddAmount);
2277 alignRegisterPow2(T, Alignment);
2278 Sandboxer(this).sub_sp(T);
2279 }
2280
2281 // Adds back a few bytes to SP to account for the out args area.
2282 Variable *T = SP;
2283 if (MaxOutArgsSizeBytes != 0) {
2284 T = makeReg(getPointerType());
2285 Operand *OutArgsSizeRF = legalize(
2286 Ctx->getConstantInt32(MaxOutArgsSizeBytes), Legal_Reg | Legal_Flex);
2287 _add(T, SP, OutArgsSizeRF);
2288 }
2289
2290 _mov(Dest, T);
2291 }
2292
div0Check(Type Ty,Operand * SrcLo,Operand * SrcHi)2293 void TargetARM32::div0Check(Type Ty, Operand *SrcLo, Operand *SrcHi) {
2294 if (isGuaranteedNonzeroInt(SrcLo) || isGuaranteedNonzeroInt(SrcHi))
2295 return;
2296 Variable *SrcLoReg = legalizeToReg(SrcLo);
2297 switch (Ty) {
2298 default:
2299 llvm_unreachable(
2300 ("Unexpected type in div0Check: " + typeStdString(Ty)).c_str());
2301 case IceType_i8:
2302 case IceType_i16: {
2303 Operand *ShAmtImm = shAmtImm(32 - getScalarIntBitWidth(Ty));
2304 Variable *T = makeReg(IceType_i32);
2305 _lsls(T, SrcLoReg, ShAmtImm);
2306 Context.insert<InstFakeUse>(T);
2307 } break;
2308 case IceType_i32: {
2309 _tst(SrcLoReg, SrcLoReg);
2310 break;
2311 }
2312 case IceType_i64: {
2313 Variable *T = makeReg(IceType_i32);
2314 _orrs(T, SrcLoReg, legalize(SrcHi, Legal_Reg | Legal_Flex));
2315 // T isn't going to be used, but we need the side-effect of setting flags
2316 // from this operation.
2317 Context.insert<InstFakeUse>(T);
2318 }
2319 }
2320 auto *Label = InstARM32Label::create(Func, this);
2321 _br(Label, CondARM32::NE);
2322 _trap();
2323 Context.insert(Label);
2324 }
2325
lowerIDivRem(Variable * Dest,Variable * T,Variable * Src0R,Operand * Src1,ExtInstr ExtFunc,DivInstr DivFunc,bool IsRemainder)2326 void TargetARM32::lowerIDivRem(Variable *Dest, Variable *T, Variable *Src0R,
2327 Operand *Src1, ExtInstr ExtFunc,
2328 DivInstr DivFunc, bool IsRemainder) {
2329 div0Check(Dest->getType(), Src1, nullptr);
2330 Variable *Src1R = legalizeToReg(Src1);
2331 Variable *T0R = Src0R;
2332 Variable *T1R = Src1R;
2333 if (Dest->getType() != IceType_i32) {
2334 T0R = makeReg(IceType_i32);
2335 (this->*ExtFunc)(T0R, Src0R, CondARM32::AL);
2336 T1R = makeReg(IceType_i32);
2337 (this->*ExtFunc)(T1R, Src1R, CondARM32::AL);
2338 }
2339 if (hasCPUFeature(TargetARM32Features::HWDivArm)) {
2340 (this->*DivFunc)(T, T0R, T1R, CondARM32::AL);
2341 if (IsRemainder) {
2342 Variable *T2 = makeReg(IceType_i32);
2343 _mls(T2, T, T1R, T0R);
2344 T = T2;
2345 }
2346 _mov(Dest, T);
2347 } else {
2348 llvm::report_fatal_error("div should have already been turned into a call");
2349 }
2350 }
2351
2352 TargetARM32::SafeBoolChain
lowerInt1Arithmetic(const InstArithmetic * Instr)2353 TargetARM32::lowerInt1Arithmetic(const InstArithmetic *Instr) {
2354 Variable *Dest = Instr->getDest();
2355 assert(Dest->getType() == IceType_i1);
2356
2357 // So folding didn't work for Instr. Not a problem: We just need to
2358 // materialize the Sources, and perform the operation. We create regular
2359 // Variables (and not infinite-weight ones) because this call might recurse a
2360 // lot, and we might end up with tons of infinite weight temporaries.
2361 assert(Instr->getSrcSize() == 2);
2362 Variable *Src0 = Func->makeVariable(IceType_i1);
2363 SafeBoolChain Src0Safe = lowerInt1(Src0, Instr->getSrc(0));
2364
2365 Operand *Src1 = Instr->getSrc(1);
2366 SafeBoolChain Src1Safe = SBC_Yes;
2367
2368 if (!llvm::isa<Constant>(Src1)) {
2369 Variable *Src1V = Func->makeVariable(IceType_i1);
2370 Src1Safe = lowerInt1(Src1V, Src1);
2371 Src1 = Src1V;
2372 }
2373
2374 Variable *T = makeReg(IceType_i1);
2375 Src0 = legalizeToReg(Src0);
2376 Operand *Src1RF = legalize(Src1, Legal_Reg | Legal_Flex);
2377 switch (Instr->getOp()) {
2378 default:
2379 // If this Unreachable is ever executed, add the offending operation to
2380 // the list of valid consumers.
2381 llvm::report_fatal_error("Unhandled i1 Op");
2382 case InstArithmetic::And:
2383 _and(T, Src0, Src1RF);
2384 break;
2385 case InstArithmetic::Or:
2386 _orr(T, Src0, Src1RF);
2387 break;
2388 case InstArithmetic::Xor:
2389 _eor(T, Src0, Src1RF);
2390 break;
2391 }
2392 _mov(Dest, T);
2393 return Src0Safe == SBC_Yes && Src1Safe == SBC_Yes ? SBC_Yes : SBC_No;
2394 }
2395
2396 namespace {
2397 // NumericOperands is used during arithmetic/icmp lowering for constant folding.
2398 // It holds the two sources operands, and maintains some state as to whether one
2399 // of them is a constant. If one of the operands is a constant, then it will be
2400 // be stored as the operation's second source, with a bit indicating whether the
2401 // operands were swapped.
2402 //
2403 // The class is split into a base class with operand type-independent methods,
2404 // and a derived, templated class, for each type of operand we want to fold
2405 // constants for:
2406 //
2407 // NumericOperandsBase --> NumericOperands<ConstantFloat>
2408 // --> NumericOperands<ConstantDouble>
2409 // --> NumericOperands<ConstantInt32>
2410 //
2411 // NumericOperands<ConstantInt32> also exposes helper methods for emitting
2412 // inverted/negated immediates.
2413 class NumericOperandsBase {
2414 NumericOperandsBase() = delete;
2415 NumericOperandsBase(const NumericOperandsBase &) = delete;
2416 NumericOperandsBase &operator=(const NumericOperandsBase &) = delete;
2417
2418 public:
NumericOperandsBase(Operand * S0,Operand * S1)2419 NumericOperandsBase(Operand *S0, Operand *S1)
2420 : Src0(NonConstOperand(S0, S1)), Src1(ConstOperand(S0, S1)),
2421 Swapped(Src0 == S1 && S0 != S1) {
2422 assert(Src0 != nullptr);
2423 assert(Src1 != nullptr);
2424 assert(Src0 != Src1 || S0 == S1);
2425 }
2426
hasConstOperand() const2427 bool hasConstOperand() const {
2428 return llvm::isa<Constant>(Src1) && !llvm::isa<ConstantRelocatable>(Src1);
2429 }
2430
swappedOperands() const2431 bool swappedOperands() const { return Swapped; }
2432
src0R(TargetARM32 * Target) const2433 Variable *src0R(TargetARM32 *Target) const {
2434 return legalizeToReg(Target, Src0);
2435 }
2436
unswappedSrc0R(TargetARM32 * Target) const2437 Variable *unswappedSrc0R(TargetARM32 *Target) const {
2438 return legalizeToReg(Target, Swapped ? Src1 : Src0);
2439 }
2440
src1RF(TargetARM32 * Target) const2441 Operand *src1RF(TargetARM32 *Target) const {
2442 return legalizeToRegOrFlex(Target, Src1);
2443 }
2444
unswappedSrc1R(TargetARM32 * Target) const2445 Variable *unswappedSrc1R(TargetARM32 *Target) const {
2446 return legalizeToReg(Target, Swapped ? Src0 : Src1);
2447 }
2448
src1() const2449 Operand *src1() const { return Src1; }
2450
2451 protected:
2452 Operand *const Src0;
2453 Operand *const Src1;
2454 const bool Swapped;
2455
legalizeToReg(TargetARM32 * Target,Operand * Src)2456 static Variable *legalizeToReg(TargetARM32 *Target, Operand *Src) {
2457 return Target->legalizeToReg(Src);
2458 }
2459
legalizeToRegOrFlex(TargetARM32 * Target,Operand * Src)2460 static Operand *legalizeToRegOrFlex(TargetARM32 *Target, Operand *Src) {
2461 return Target->legalize(Src,
2462 TargetARM32::Legal_Reg | TargetARM32::Legal_Flex);
2463 }
2464
2465 private:
NonConstOperand(Operand * S0,Operand * S1)2466 static Operand *NonConstOperand(Operand *S0, Operand *S1) {
2467 if (!llvm::isa<Constant>(S0))
2468 return S0;
2469 if (!llvm::isa<Constant>(S1))
2470 return S1;
2471 if (llvm::isa<ConstantRelocatable>(S1) &&
2472 !llvm::isa<ConstantRelocatable>(S0))
2473 return S1;
2474 return S0;
2475 }
2476
ConstOperand(Operand * S0,Operand * S1)2477 static Operand *ConstOperand(Operand *S0, Operand *S1) {
2478 if (!llvm::isa<Constant>(S0))
2479 return S1;
2480 if (!llvm::isa<Constant>(S1))
2481 return S0;
2482 if (llvm::isa<ConstantRelocatable>(S1) &&
2483 !llvm::isa<ConstantRelocatable>(S0))
2484 return S0;
2485 return S1;
2486 }
2487 };
2488
2489 template <typename C> class NumericOperands : public NumericOperandsBase {
2490 NumericOperands() = delete;
2491 NumericOperands(const NumericOperands &) = delete;
2492 NumericOperands &operator=(const NumericOperands &) = delete;
2493
2494 public:
NumericOperands(Operand * S0,Operand * S1)2495 NumericOperands(Operand *S0, Operand *S1) : NumericOperandsBase(S0, S1) {
2496 assert(!hasConstOperand() || llvm::isa<C>(this->Src1));
2497 }
2498
getConstantValue() const2499 typename C::PrimType getConstantValue() const {
2500 return llvm::cast<C>(Src1)->getValue();
2501 }
2502 };
2503
2504 using FloatOperands = NumericOperands<ConstantFloat>;
2505 using DoubleOperands = NumericOperands<ConstantDouble>;
2506
2507 class Int32Operands : public NumericOperands<ConstantInteger32> {
2508 Int32Operands() = delete;
2509 Int32Operands(const Int32Operands &) = delete;
2510 Int32Operands &operator=(const Int32Operands &) = delete;
2511
2512 public:
Int32Operands(Operand * S0,Operand * S1)2513 Int32Operands(Operand *S0, Operand *S1) : NumericOperands(S0, S1) {}
2514
unswappedSrc1RShAmtImm(TargetARM32 * Target) const2515 Operand *unswappedSrc1RShAmtImm(TargetARM32 *Target) const {
2516 if (!swappedOperands() && hasConstOperand()) {
2517 return Target->shAmtImm(getConstantValue() & 0x1F);
2518 }
2519 return legalizeToReg(Target, Swapped ? Src0 : Src1);
2520 }
2521
isSrc1ImmediateZero() const2522 bool isSrc1ImmediateZero() const {
2523 if (!swappedOperands() && hasConstOperand()) {
2524 return getConstantValue() == 0;
2525 }
2526 return false;
2527 }
2528
immediateIsFlexEncodable() const2529 bool immediateIsFlexEncodable() const {
2530 uint32_t Rotate, Imm8;
2531 return OperandARM32FlexImm::canHoldImm(getConstantValue(), &Rotate, &Imm8);
2532 }
2533
negatedImmediateIsFlexEncodable() const2534 bool negatedImmediateIsFlexEncodable() const {
2535 uint32_t Rotate, Imm8;
2536 return OperandARM32FlexImm::canHoldImm(
2537 -static_cast<int32_t>(getConstantValue()), &Rotate, &Imm8);
2538 }
2539
negatedSrc1F(TargetARM32 * Target) const2540 Operand *negatedSrc1F(TargetARM32 *Target) const {
2541 return legalizeToRegOrFlex(Target,
2542 Target->getCtx()->getConstantInt32(
2543 -static_cast<int32_t>(getConstantValue())));
2544 }
2545
invertedImmediateIsFlexEncodable() const2546 bool invertedImmediateIsFlexEncodable() const {
2547 uint32_t Rotate, Imm8;
2548 return OperandARM32FlexImm::canHoldImm(
2549 ~static_cast<uint32_t>(getConstantValue()), &Rotate, &Imm8);
2550 }
2551
invertedSrc1F(TargetARM32 * Target) const2552 Operand *invertedSrc1F(TargetARM32 *Target) const {
2553 return legalizeToRegOrFlex(Target,
2554 Target->getCtx()->getConstantInt32(
2555 ~static_cast<uint32_t>(getConstantValue())));
2556 }
2557 };
2558 } // end of anonymous namespace
2559
preambleDivRem(const InstCall * Instr)2560 void TargetARM32::preambleDivRem(const InstCall *Instr) {
2561 Operand *Src1 = Instr->getArg(1);
2562
2563 switch (Src1->getType()) {
2564 default:
2565 llvm::report_fatal_error("Invalid type for idiv.");
2566 case IceType_i64: {
2567 if (auto *C = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2568 if (C->getValue() == 0) {
2569 _trap();
2570 return;
2571 }
2572 }
2573 div0Check(IceType_i64, loOperand(Src1), hiOperand(Src1));
2574 return;
2575 }
2576 case IceType_i32: {
2577 // Src0 and Src1 have already been appropriately extended to an i32, so we
2578 // don't check for i8 and i16.
2579 if (auto *C = llvm::dyn_cast<ConstantInteger32>(Src1)) {
2580 if (C->getValue() == 0) {
2581 _trap();
2582 return;
2583 }
2584 }
2585 div0Check(IceType_i32, Src1, nullptr);
2586 return;
2587 }
2588 }
2589 }
2590
lowerInt64Arithmetic(InstArithmetic::OpKind Op,Variable * Dest,Operand * Src0,Operand * Src1)2591 void TargetARM32::lowerInt64Arithmetic(InstArithmetic::OpKind Op,
2592 Variable *Dest, Operand *Src0,
2593 Operand *Src1) {
2594 Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1));
2595 Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1));
2596 assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands());
2597 assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand());
2598
2599 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
2600 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2601 Variable *T_Lo = makeReg(DestLo->getType());
2602 Variable *T_Hi = makeReg(DestHi->getType());
2603
2604 switch (Op) {
2605 case InstArithmetic::_num:
2606 llvm::report_fatal_error("Unknown arithmetic operator");
2607 return;
2608 case InstArithmetic::Add: {
2609 Variable *Src0LoR = SrcsLo.src0R(this);
2610 Operand *Src1LoRF = SrcsLo.src1RF(this);
2611 Variable *Src0HiR = SrcsHi.src0R(this);
2612 Operand *Src1HiRF = SrcsHi.src1RF(this);
2613 _adds(T_Lo, Src0LoR, Src1LoRF);
2614 _mov(DestLo, T_Lo);
2615 _adc(T_Hi, Src0HiR, Src1HiRF);
2616 _mov(DestHi, T_Hi);
2617 return;
2618 }
2619 case InstArithmetic::And: {
2620 Variable *Src0LoR = SrcsLo.src0R(this);
2621 Operand *Src1LoRF = SrcsLo.src1RF(this);
2622 Variable *Src0HiR = SrcsHi.src0R(this);
2623 Operand *Src1HiRF = SrcsHi.src1RF(this);
2624 _and(T_Lo, Src0LoR, Src1LoRF);
2625 _mov(DestLo, T_Lo);
2626 _and(T_Hi, Src0HiR, Src1HiRF);
2627 _mov(DestHi, T_Hi);
2628 return;
2629 }
2630 case InstArithmetic::Or: {
2631 Variable *Src0LoR = SrcsLo.src0R(this);
2632 Operand *Src1LoRF = SrcsLo.src1RF(this);
2633 Variable *Src0HiR = SrcsHi.src0R(this);
2634 Operand *Src1HiRF = SrcsHi.src1RF(this);
2635 _orr(T_Lo, Src0LoR, Src1LoRF);
2636 _mov(DestLo, T_Lo);
2637 _orr(T_Hi, Src0HiR, Src1HiRF);
2638 _mov(DestHi, T_Hi);
2639 return;
2640 }
2641 case InstArithmetic::Xor: {
2642 Variable *Src0LoR = SrcsLo.src0R(this);
2643 Operand *Src1LoRF = SrcsLo.src1RF(this);
2644 Variable *Src0HiR = SrcsHi.src0R(this);
2645 Operand *Src1HiRF = SrcsHi.src1RF(this);
2646 _eor(T_Lo, Src0LoR, Src1LoRF);
2647 _mov(DestLo, T_Lo);
2648 _eor(T_Hi, Src0HiR, Src1HiRF);
2649 _mov(DestHi, T_Hi);
2650 return;
2651 }
2652 case InstArithmetic::Sub: {
2653 Variable *Src0LoR = SrcsLo.src0R(this);
2654 Operand *Src1LoRF = SrcsLo.src1RF(this);
2655 Variable *Src0HiR = SrcsHi.src0R(this);
2656 Operand *Src1HiRF = SrcsHi.src1RF(this);
2657 if (SrcsLo.swappedOperands()) {
2658 _rsbs(T_Lo, Src0LoR, Src1LoRF);
2659 _mov(DestLo, T_Lo);
2660 _rsc(T_Hi, Src0HiR, Src1HiRF);
2661 _mov(DestHi, T_Hi);
2662 } else {
2663 _subs(T_Lo, Src0LoR, Src1LoRF);
2664 _mov(DestLo, T_Lo);
2665 _sbc(T_Hi, Src0HiR, Src1HiRF);
2666 _mov(DestHi, T_Hi);
2667 }
2668 return;
2669 }
2670 case InstArithmetic::Mul: {
2671 // GCC 4.8 does:
2672 // a=b*c ==>
2673 // t_acc =(mul) (b.lo * c.hi)
2674 // t_acc =(mla) (c.lo * b.hi) + t_acc
2675 // t.hi,t.lo =(umull) b.lo * c.lo
2676 // t.hi += t_acc
2677 // a.lo = t.lo
2678 // a.hi = t.hi
2679 //
2680 // LLVM does:
2681 // t.hi,t.lo =(umull) b.lo * c.lo
2682 // t.hi =(mla) (b.lo * c.hi) + t.hi
2683 // t.hi =(mla) (b.hi * c.lo) + t.hi
2684 // a.lo = t.lo
2685 // a.hi = t.hi
2686 //
2687 // LLVM's lowering has fewer instructions, but more register pressure:
2688 // t.lo is live from beginning to end, while GCC delays the two-dest
2689 // instruction till the end, and kills c.hi immediately.
2690 Variable *T_Acc = makeReg(IceType_i32);
2691 Variable *T_Acc1 = makeReg(IceType_i32);
2692 Variable *T_Hi1 = makeReg(IceType_i32);
2693 Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);
2694 Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);
2695 Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);
2696 Variable *Src1RHi = SrcsHi.unswappedSrc1R(this);
2697 _mul(T_Acc, Src0RLo, Src1RHi);
2698 _mla(T_Acc1, Src1RLo, Src0RHi, T_Acc);
2699 _umull(T_Lo, T_Hi1, Src0RLo, Src1RLo);
2700 _add(T_Hi, T_Hi1, T_Acc1);
2701 _mov(DestLo, T_Lo);
2702 _mov(DestHi, T_Hi);
2703 return;
2704 }
2705 case InstArithmetic::Shl: {
2706 if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) {
2707 Variable *Src0RLo = SrcsLo.src0R(this);
2708 // Truncating the ShAmt to [0, 63] because that's what ARM does anyway.
2709 const int32_t ShAmtImm = SrcsLo.getConstantValue() & 0x3F;
2710 if (ShAmtImm == 0) {
2711 _mov(DestLo, Src0RLo);
2712 _mov(DestHi, SrcsHi.src0R(this));
2713 return;
2714 }
2715
2716 if (ShAmtImm >= 32) {
2717 if (ShAmtImm == 32) {
2718 _mov(DestHi, Src0RLo);
2719 } else {
2720 Operand *ShAmtOp = shAmtImm(ShAmtImm - 32);
2721 _lsl(T_Hi, Src0RLo, ShAmtOp);
2722 _mov(DestHi, T_Hi);
2723 }
2724
2725 Operand *_0 =
2726 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
2727 _mov(T_Lo, _0);
2728 _mov(DestLo, T_Lo);
2729 return;
2730 }
2731
2732 Variable *Src0RHi = SrcsHi.src0R(this);
2733 Operand *ShAmtOp = shAmtImm(ShAmtImm);
2734 Operand *ComplShAmtOp = shAmtImm(32 - ShAmtImm);
2735 _lsl(T_Hi, Src0RHi, ShAmtOp);
2736 _orr(T_Hi, T_Hi,
2737 OperandARM32FlexReg::create(Func, IceType_i32, Src0RLo,
2738 OperandARM32::LSR, ComplShAmtOp));
2739 _mov(DestHi, T_Hi);
2740
2741 _lsl(T_Lo, Src0RLo, ShAmtOp);
2742 _mov(DestLo, T_Lo);
2743 return;
2744 }
2745
2746 // a=b<<c ==>
2747 // pnacl-llc does:
2748 // mov t_b.lo, b.lo
2749 // mov t_b.hi, b.hi
2750 // mov t_c.lo, c.lo
2751 // rsb T0, t_c.lo, #32
2752 // lsr T1, t_b.lo, T0
2753 // orr t_a.hi, T1, t_b.hi, lsl t_c.lo
2754 // sub T2, t_c.lo, #32
2755 // cmp T2, #0
2756 // lslge t_a.hi, t_b.lo, T2
2757 // lsl t_a.lo, t_b.lo, t_c.lo
2758 // mov a.lo, t_a.lo
2759 // mov a.hi, t_a.hi
2760 //
2761 // GCC 4.8 does:
2762 // sub t_c1, c.lo, #32
2763 // lsl t_hi, b.hi, c.lo
2764 // orr t_hi, t_hi, b.lo, lsl t_c1
2765 // rsb t_c2, c.lo, #32
2766 // orr t_hi, t_hi, b.lo, lsr t_c2
2767 // lsl t_lo, b.lo, c.lo
2768 // a.lo = t_lo
2769 // a.hi = t_hi
2770 //
2771 // These are incompatible, therefore we mimic pnacl-llc.
2772 // Can be strength-reduced for constant-shifts, but we don't do that for
2773 // now.
2774 // Given the sub/rsb T_C, C.lo, #32, one of the T_C will be negative. On
2775 // ARM, shifts only take the lower 8 bits of the shift register, and
2776 // saturate to the range 0-32, so the negative value will saturate to 32.
2777 Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
2778 Operand *_0 =
2779 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
2780 Variable *T0 = makeReg(IceType_i32);
2781 Variable *T1 = makeReg(IceType_i32);
2782 Variable *T2 = makeReg(IceType_i32);
2783 Variable *TA_Hi = makeReg(IceType_i32);
2784 Variable *TA_Lo = makeReg(IceType_i32);
2785 Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);
2786 Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);
2787 Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);
2788 _rsb(T0, Src1RLo, _32);
2789 _lsr(T1, Src0RLo, T0);
2790 _orr(TA_Hi, T1,
2791 OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
2792 OperandARM32::LSL, Src1RLo));
2793 _sub(T2, Src1RLo, _32);
2794 _cmp(T2, _0);
2795 _lsl(TA_Hi, Src0RLo, T2, CondARM32::GE);
2796 _set_dest_redefined();
2797 _lsl(TA_Lo, Src0RLo, Src1RLo);
2798 _mov(DestLo, TA_Lo);
2799 _mov(DestHi, TA_Hi);
2800 return;
2801 }
2802 case InstArithmetic::Lshr:
2803 case InstArithmetic::Ashr: {
2804 const bool ASR = Op == InstArithmetic::Ashr;
2805 if (!SrcsLo.swappedOperands() && SrcsLo.hasConstOperand()) {
2806 Variable *Src0RHi = SrcsHi.src0R(this);
2807 // Truncating the ShAmt to [0, 63] because that's what ARM does anyway.
2808 const int32_t ShAmt = SrcsLo.getConstantValue() & 0x3F;
2809 if (ShAmt == 0) {
2810 _mov(DestHi, Src0RHi);
2811 _mov(DestLo, SrcsLo.src0R(this));
2812 return;
2813 }
2814
2815 if (ShAmt >= 32) {
2816 if (ShAmt == 32) {
2817 _mov(DestLo, Src0RHi);
2818 } else {
2819 Operand *ShAmtImm = shAmtImm(ShAmt - 32);
2820 if (ASR) {
2821 _asr(T_Lo, Src0RHi, ShAmtImm);
2822 } else {
2823 _lsr(T_Lo, Src0RHi, ShAmtImm);
2824 }
2825 _mov(DestLo, T_Lo);
2826 }
2827
2828 if (ASR) {
2829 Operand *_31 = shAmtImm(31);
2830 _asr(T_Hi, Src0RHi, _31);
2831 } else {
2832 Operand *_0 = legalize(Ctx->getConstantZero(IceType_i32),
2833 Legal_Reg | Legal_Flex);
2834 _mov(T_Hi, _0);
2835 }
2836 _mov(DestHi, T_Hi);
2837 return;
2838 }
2839
2840 Variable *Src0RLo = SrcsLo.src0R(this);
2841 Operand *ShAmtImm = shAmtImm(ShAmt);
2842 Operand *ComplShAmtImm = shAmtImm(32 - ShAmt);
2843 _lsr(T_Lo, Src0RLo, ShAmtImm);
2844 _orr(T_Lo, T_Lo,
2845 OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
2846 OperandARM32::LSL, ComplShAmtImm));
2847 _mov(DestLo, T_Lo);
2848
2849 if (ASR) {
2850 _asr(T_Hi, Src0RHi, ShAmtImm);
2851 } else {
2852 _lsr(T_Hi, Src0RHi, ShAmtImm);
2853 }
2854 _mov(DestHi, T_Hi);
2855 return;
2856 }
2857
2858 // a=b>>c
2859 // pnacl-llc does:
2860 // mov t_b.lo, b.lo
2861 // mov t_b.hi, b.hi
2862 // mov t_c.lo, c.lo
2863 // lsr T0, t_b.lo, t_c.lo
2864 // rsb T1, t_c.lo, #32
2865 // orr t_a.lo, T0, t_b.hi, lsl T1
2866 // sub T2, t_c.lo, #32
2867 // cmp T2, #0
2868 // [al]srge t_a.lo, t_b.hi, T2
2869 // [al]sr t_a.hi, t_b.hi, t_c.lo
2870 // mov a.lo, t_a.lo
2871 // mov a.hi, t_a.hi
2872 //
2873 // GCC 4.8 does (lsr):
2874 // rsb t_c1, c.lo, #32
2875 // lsr t_lo, b.lo, c.lo
2876 // orr t_lo, t_lo, b.hi, lsl t_c1
2877 // sub t_c2, c.lo, #32
2878 // orr t_lo, t_lo, b.hi, lsr t_c2
2879 // lsr t_hi, b.hi, c.lo
2880 // mov a.lo, t_lo
2881 // mov a.hi, t_hi
2882 //
2883 // These are incompatible, therefore we mimic pnacl-llc.
2884 Operand *_32 = legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
2885 Operand *_0 =
2886 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
2887 Variable *T0 = makeReg(IceType_i32);
2888 Variable *T1 = makeReg(IceType_i32);
2889 Variable *T2 = makeReg(IceType_i32);
2890 Variable *TA_Lo = makeReg(IceType_i32);
2891 Variable *TA_Hi = makeReg(IceType_i32);
2892 Variable *Src0RLo = SrcsLo.unswappedSrc0R(this);
2893 Variable *Src0RHi = SrcsHi.unswappedSrc0R(this);
2894 Variable *Src1RLo = SrcsLo.unswappedSrc1R(this);
2895 _lsr(T0, Src0RLo, Src1RLo);
2896 _rsb(T1, Src1RLo, _32);
2897 _orr(TA_Lo, T0,
2898 OperandARM32FlexReg::create(Func, IceType_i32, Src0RHi,
2899 OperandARM32::LSL, T1));
2900 _sub(T2, Src1RLo, _32);
2901 _cmp(T2, _0);
2902 if (ASR) {
2903 _asr(TA_Lo, Src0RHi, T2, CondARM32::GE);
2904 _set_dest_redefined();
2905 _asr(TA_Hi, Src0RHi, Src1RLo);
2906 } else {
2907 _lsr(TA_Lo, Src0RHi, T2, CondARM32::GE);
2908 _set_dest_redefined();
2909 _lsr(TA_Hi, Src0RHi, Src1RLo);
2910 }
2911 _mov(DestLo, TA_Lo);
2912 _mov(DestHi, TA_Hi);
2913 return;
2914 }
2915 case InstArithmetic::Fadd:
2916 case InstArithmetic::Fsub:
2917 case InstArithmetic::Fmul:
2918 case InstArithmetic::Fdiv:
2919 case InstArithmetic::Frem:
2920 llvm::report_fatal_error("FP instruction with i64 type");
2921 return;
2922 case InstArithmetic::Udiv:
2923 case InstArithmetic::Sdiv:
2924 case InstArithmetic::Urem:
2925 case InstArithmetic::Srem:
2926 llvm::report_fatal_error("Call-helper-involved instruction for i64 type "
2927 "should have already been handled before");
2928 return;
2929 }
2930 }
2931
2932 namespace {
2933 // StrengthReduction is a namespace with the strength reduction machinery. The
2934 // entry point is the StrengthReduction::tryToOptimize method. It returns true
2935 // if the optimization can be performed, and false otherwise.
2936 //
2937 // If the optimization can be performed, tryToOptimize sets its NumOperations
2938 // parameter to the number of shifts that are needed to perform the
2939 // multiplication; and it sets the Operations parameter with <ShAmt, AddOrSub>
2940 // tuples that describe how to materialize the multiplication.
2941 //
2942 // The algorithm finds contiguous 1s in the Multiplication source, and uses one
2943 // or two shifts to materialize it. A sequence of 1s, e.g.,
2944 //
2945 // M N
2946 // ...00000000000011111...111110000000...
2947 //
2948 // is materializable with (1 << (M + 1)) - (1 << N):
2949 //
2950 // ...00000000000100000...000000000000... [1 << (M + 1)]
2951 // ...00000000000000000...000010000000... (-) [1 << N]
2952 // --------------------------------------
2953 // ...00000000000011111...111110000000...
2954 //
2955 // And a single bit set, which is just a left shift.
2956 namespace StrengthReduction {
2957 enum AggregationOperation {
2958 AO_Invalid,
2959 AO_Add,
2960 AO_Sub,
2961 };
2962
2963 // AggregateElement is a glorified <ShAmt, AddOrSub> tuple.
2964 class AggregationElement {
2965 AggregationElement(const AggregationElement &) = delete;
2966
2967 public:
2968 AggregationElement() = default;
2969 AggregationElement &operator=(const AggregationElement &) = default;
AggregationElement(AggregationOperation Op,uint32_t ShAmt)2970 AggregationElement(AggregationOperation Op, uint32_t ShAmt)
2971 : Op(Op), ShAmt(ShAmt) {}
2972
createShiftedOperand(Cfg * Func,Variable * OpR) const2973 Operand *createShiftedOperand(Cfg *Func, Variable *OpR) const {
2974 assert(OpR->mustHaveReg());
2975 if (ShAmt == 0) {
2976 return OpR;
2977 }
2978 return OperandARM32FlexReg::create(
2979 Func, IceType_i32, OpR, OperandARM32::LSL,
2980 OperandARM32ShAmtImm::create(
2981 Func, llvm::cast<ConstantInteger32>(
2982 Func->getContext()->getConstantInt32(ShAmt))));
2983 }
2984
aggregateWithAdd() const2985 bool aggregateWithAdd() const {
2986 switch (Op) {
2987 case AO_Invalid:
2988 llvm::report_fatal_error("Invalid Strength Reduction Operations.");
2989 case AO_Add:
2990 return true;
2991 case AO_Sub:
2992 return false;
2993 }
2994 llvm_unreachable("(silence g++ warning)");
2995 }
2996
shAmt() const2997 uint32_t shAmt() const { return ShAmt; }
2998
2999 private:
3000 AggregationOperation Op = AO_Invalid;
3001 uint32_t ShAmt;
3002 };
3003
3004 // [RangeStart, RangeEnd] is a range of 1s in Src.
3005 template <std::size_t N>
addOperations(uint32_t RangeStart,uint32_t RangeEnd,SizeT * NumOperations,std::array<AggregationElement,N> * Operations)3006 bool addOperations(uint32_t RangeStart, uint32_t RangeEnd, SizeT *NumOperations,
3007 std::array<AggregationElement, N> *Operations) {
3008 assert(*NumOperations < N);
3009 if (RangeStart == RangeEnd) {
3010 // Single bit set:
3011 // Src : 0...00010...
3012 // RangeStart : ^
3013 // RangeEnd : ^
3014 // NegSrc : 0...00001...
3015 (*Operations)[*NumOperations] = AggregationElement(AO_Add, RangeStart);
3016 ++(*NumOperations);
3017 return true;
3018 }
3019
3020 // Sequence of 1s: (two operations required.)
3021 // Src : 0...00011...110...
3022 // RangeStart : ^
3023 // RangeEnd : ^
3024 // NegSrc : 0...00000...001...
3025 if (*NumOperations + 1 >= N) {
3026 return false;
3027 }
3028 (*Operations)[*NumOperations] = AggregationElement(AO_Add, RangeStart + 1);
3029 ++(*NumOperations);
3030 (*Operations)[*NumOperations] = AggregationElement(AO_Sub, RangeEnd);
3031 ++(*NumOperations);
3032 return true;
3033 }
3034
3035 // tryToOptmize scans Src looking for sequences of 1s (including the unitary bit
3036 // 1 surrounded by zeroes.
3037 template <std::size_t N>
tryToOptimize(uint32_t Src,SizeT * NumOperations,std::array<AggregationElement,N> * Operations)3038 bool tryToOptimize(uint32_t Src, SizeT *NumOperations,
3039 std::array<AggregationElement, N> *Operations) {
3040 constexpr uint32_t SrcSizeBits = sizeof(Src) * CHAR_BIT;
3041 uint32_t NegSrc = ~Src;
3042
3043 *NumOperations = 0;
3044 while (Src != 0 && *NumOperations < N) {
3045 // Each step of the algorithm:
3046 // * finds L, the last bit set in Src;
3047 // * clears all the upper bits in NegSrc up to bit L;
3048 // * finds nL, the last bit set in NegSrc;
3049 // * clears all the upper bits in Src up to bit nL;
3050 //
3051 // if L == nL + 1, then a unitary 1 was found in Src. Otherwise, a sequence
3052 // of 1s starting at L, and ending at nL + 1, was found.
3053 const uint32_t SrcLastBitSet = llvm::findLastSet(Src);
3054 const uint32_t NegSrcClearMask =
3055 (SrcLastBitSet == 0) ? 0
3056 : (0xFFFFFFFFu) >> (SrcSizeBits - SrcLastBitSet);
3057 NegSrc &= NegSrcClearMask;
3058 if (NegSrc == 0) {
3059 if (addOperations(SrcLastBitSet, 0, NumOperations, Operations)) {
3060 return true;
3061 }
3062 return false;
3063 }
3064 const uint32_t NegSrcLastBitSet = llvm::findLastSet(NegSrc);
3065 assert(NegSrcLastBitSet < SrcLastBitSet);
3066 const uint32_t SrcClearMask =
3067 (NegSrcLastBitSet == 0)
3068 ? 0
3069 : (0xFFFFFFFFu) >> (SrcSizeBits - NegSrcLastBitSet);
3070 Src &= SrcClearMask;
3071 if (!addOperations(SrcLastBitSet, NegSrcLastBitSet + 1, NumOperations,
3072 Operations)) {
3073 return false;
3074 }
3075 }
3076
3077 return Src == 0;
3078 }
3079 } // end of namespace StrengthReduction
3080 } // end of anonymous namespace
3081
lowerArithmetic(const InstArithmetic * Instr)3082 void TargetARM32::lowerArithmetic(const InstArithmetic *Instr) {
3083 Variable *Dest = Instr->getDest();
3084
3085 if (Dest->isRematerializable()) {
3086 Context.insert<InstFakeDef>(Dest);
3087 return;
3088 }
3089
3090 Type DestTy = Dest->getType();
3091 if (DestTy == IceType_i1) {
3092 lowerInt1Arithmetic(Instr);
3093 return;
3094 }
3095
3096 Operand *Src0 = legalizeUndef(Instr->getSrc(0));
3097 Operand *Src1 = legalizeUndef(Instr->getSrc(1));
3098 if (DestTy == IceType_i64) {
3099 lowerInt64Arithmetic(Instr->getOp(), Instr->getDest(), Src0, Src1);
3100 return;
3101 }
3102
3103 if (isVectorType(DestTy)) {
3104 switch (Instr->getOp()) {
3105 default:
3106 UnimplementedLoweringError(this, Instr);
3107 return;
3108 // Explicitly allow vector instructions we have implemented/enabled.
3109 case InstArithmetic::Add:
3110 case InstArithmetic::And:
3111 case InstArithmetic::Ashr:
3112 case InstArithmetic::Fadd:
3113 case InstArithmetic::Fmul:
3114 case InstArithmetic::Fsub:
3115 case InstArithmetic::Lshr:
3116 case InstArithmetic::Mul:
3117 case InstArithmetic::Or:
3118 case InstArithmetic::Shl:
3119 case InstArithmetic::Sub:
3120 case InstArithmetic::Xor:
3121 break;
3122 }
3123 }
3124
3125 Variable *T = makeReg(DestTy);
3126
3127 // * Handle div/rem separately. They require a non-legalized Src1 to inspect
3128 // whether or not Src1 is a non-zero constant. Once legalized it is more
3129 // difficult to determine (constant may be moved to a register).
3130 // * Handle floating point arithmetic separately: they require Src1 to be
3131 // legalized to a register.
3132 switch (Instr->getOp()) {
3133 default:
3134 break;
3135 case InstArithmetic::Udiv: {
3136 constexpr bool NotRemainder = false;
3137 Variable *Src0R = legalizeToReg(Src0);
3138 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
3139 NotRemainder);
3140 return;
3141 }
3142 case InstArithmetic::Sdiv: {
3143 constexpr bool NotRemainder = false;
3144 Variable *Src0R = legalizeToReg(Src0);
3145 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
3146 NotRemainder);
3147 return;
3148 }
3149 case InstArithmetic::Urem: {
3150 constexpr bool IsRemainder = true;
3151 Variable *Src0R = legalizeToReg(Src0);
3152 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_uxt, &TargetARM32::_udiv,
3153 IsRemainder);
3154 return;
3155 }
3156 case InstArithmetic::Srem: {
3157 constexpr bool IsRemainder = true;
3158 Variable *Src0R = legalizeToReg(Src0);
3159 lowerIDivRem(Dest, T, Src0R, Src1, &TargetARM32::_sxt, &TargetARM32::_sdiv,
3160 IsRemainder);
3161 return;
3162 }
3163 case InstArithmetic::Frem: {
3164 if (!isScalarFloatingType(DestTy)) {
3165 llvm::report_fatal_error("Unexpected type when lowering frem.");
3166 }
3167 llvm::report_fatal_error("Frem should have already been lowered.");
3168 }
3169 case InstArithmetic::Fadd: {
3170 Variable *Src0R = legalizeToReg(Src0);
3171 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
3172 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
3173 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
3174 _vmla(Src0R, Src1R, Src2R);
3175 _mov(Dest, Src0R);
3176 return;
3177 }
3178
3179 Variable *Src1R = legalizeToReg(Src1);
3180 _vadd(T, Src0R, Src1R);
3181 _mov(Dest, T);
3182 return;
3183 }
3184 case InstArithmetic::Fsub: {
3185 Variable *Src0R = legalizeToReg(Src0);
3186 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
3187 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
3188 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
3189 _vmls(Src0R, Src1R, Src2R);
3190 _mov(Dest, Src0R);
3191 return;
3192 }
3193 Variable *Src1R = legalizeToReg(Src1);
3194 _vsub(T, Src0R, Src1R);
3195 _mov(Dest, T);
3196 return;
3197 }
3198 case InstArithmetic::Fmul: {
3199 Variable *Src0R = legalizeToReg(Src0);
3200 Variable *Src1R = legalizeToReg(Src1);
3201 _vmul(T, Src0R, Src1R);
3202 _mov(Dest, T);
3203 return;
3204 }
3205 case InstArithmetic::Fdiv: {
3206 Variable *Src0R = legalizeToReg(Src0);
3207 Variable *Src1R = legalizeToReg(Src1);
3208 _vdiv(T, Src0R, Src1R);
3209 _mov(Dest, T);
3210 return;
3211 }
3212 }
3213
3214 // Handle everything else here.
3215 Int32Operands Srcs(Src0, Src1);
3216 switch (Instr->getOp()) {
3217 case InstArithmetic::_num:
3218 llvm::report_fatal_error("Unknown arithmetic operator");
3219 return;
3220 case InstArithmetic::Add: {
3221 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
3222 assert(!isVectorType(DestTy));
3223 Variable *Src0R = legalizeToReg(Src0);
3224 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
3225 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
3226 _mla(T, Src1R, Src2R, Src0R);
3227 _mov(Dest, T);
3228 return;
3229 }
3230
3231 if (Srcs.hasConstOperand()) {
3232 if (!Srcs.immediateIsFlexEncodable() &&
3233 Srcs.negatedImmediateIsFlexEncodable()) {
3234 assert(!isVectorType(DestTy));
3235 Variable *Src0R = Srcs.src0R(this);
3236 Operand *Src1F = Srcs.negatedSrc1F(this);
3237 if (!Srcs.swappedOperands()) {
3238 _sub(T, Src0R, Src1F);
3239 } else {
3240 _rsb(T, Src0R, Src1F);
3241 }
3242 _mov(Dest, T);
3243 return;
3244 }
3245 }
3246 Variable *Src0R = Srcs.src0R(this);
3247 if (isVectorType(DestTy)) {
3248 Variable *Src1R = legalizeToReg(Src1);
3249 _vadd(T, Src0R, Src1R);
3250 } else {
3251 Operand *Src1RF = Srcs.src1RF(this);
3252 _add(T, Src0R, Src1RF);
3253 }
3254 _mov(Dest, T);
3255 return;
3256 }
3257 case InstArithmetic::And: {
3258 if (Srcs.hasConstOperand()) {
3259 if (!Srcs.immediateIsFlexEncodable() &&
3260 Srcs.invertedImmediateIsFlexEncodable()) {
3261 Variable *Src0R = Srcs.src0R(this);
3262 Operand *Src1F = Srcs.invertedSrc1F(this);
3263 _bic(T, Src0R, Src1F);
3264 _mov(Dest, T);
3265 return;
3266 }
3267 }
3268 assert(isIntegerType(DestTy));
3269 Variable *Src0R = Srcs.src0R(this);
3270 if (isVectorType(DestTy)) {
3271 Variable *Src1R = legalizeToReg(Src1);
3272 _vand(T, Src0R, Src1R);
3273 } else {
3274 Operand *Src1RF = Srcs.src1RF(this);
3275 _and(T, Src0R, Src1RF);
3276 }
3277 _mov(Dest, T);
3278 return;
3279 }
3280 case InstArithmetic::Or: {
3281 Variable *Src0R = Srcs.src0R(this);
3282 assert(isIntegerType(DestTy));
3283 if (isVectorType(DestTy)) {
3284 Variable *Src1R = legalizeToReg(Src1);
3285 _vorr(T, Src0R, Src1R);
3286 } else {
3287 Operand *Src1RF = Srcs.src1RF(this);
3288 _orr(T, Src0R, Src1RF);
3289 }
3290 _mov(Dest, T);
3291 return;
3292 }
3293 case InstArithmetic::Xor: {
3294 Variable *Src0R = Srcs.src0R(this);
3295 assert(isIntegerType(DestTy));
3296 if (isVectorType(DestTy)) {
3297 Variable *Src1R = legalizeToReg(Src1);
3298 _veor(T, Src0R, Src1R);
3299 } else {
3300 Operand *Src1RF = Srcs.src1RF(this);
3301 _eor(T, Src0R, Src1RF);
3302 }
3303 _mov(Dest, T);
3304 return;
3305 }
3306 case InstArithmetic::Sub: {
3307 if (const Inst *Src1Producer = Computations.getProducerOf(Src1)) {
3308 assert(!isVectorType(DestTy));
3309 Variable *Src0R = legalizeToReg(Src0);
3310 Variable *Src1R = legalizeToReg(Src1Producer->getSrc(0));
3311 Variable *Src2R = legalizeToReg(Src1Producer->getSrc(1));
3312 _mls(T, Src1R, Src2R, Src0R);
3313 _mov(Dest, T);
3314 return;
3315 }
3316
3317 if (Srcs.hasConstOperand()) {
3318 assert(!isVectorType(DestTy));
3319 if (Srcs.immediateIsFlexEncodable()) {
3320 Variable *Src0R = Srcs.src0R(this);
3321 Operand *Src1RF = Srcs.src1RF(this);
3322 if (Srcs.swappedOperands()) {
3323 _rsb(T, Src0R, Src1RF);
3324 } else {
3325 _sub(T, Src0R, Src1RF);
3326 }
3327 _mov(Dest, T);
3328 return;
3329 }
3330 if (!Srcs.swappedOperands() && Srcs.negatedImmediateIsFlexEncodable()) {
3331 Variable *Src0R = Srcs.src0R(this);
3332 Operand *Src1F = Srcs.negatedSrc1F(this);
3333 _add(T, Src0R, Src1F);
3334 _mov(Dest, T);
3335 return;
3336 }
3337 }
3338 Variable *Src0R = Srcs.unswappedSrc0R(this);
3339 Variable *Src1R = Srcs.unswappedSrc1R(this);
3340 if (isVectorType(DestTy)) {
3341 _vsub(T, Src0R, Src1R);
3342 } else {
3343 _sub(T, Src0R, Src1R);
3344 }
3345 _mov(Dest, T);
3346 return;
3347 }
3348 case InstArithmetic::Mul: {
3349 const bool OptM1 = Func->getOptLevel() == Opt_m1;
3350 if (!OptM1 && Srcs.hasConstOperand()) {
3351 constexpr std::size_t MaxShifts = 4;
3352 std::array<StrengthReduction::AggregationElement, MaxShifts> Shifts;
3353 SizeT NumOperations;
3354 int32_t Const = Srcs.getConstantValue();
3355 const bool Invert = Const < 0;
3356 const bool MultiplyByZero = Const == 0;
3357 Operand *_0 =
3358 legalize(Ctx->getConstantZero(DestTy), Legal_Reg | Legal_Flex);
3359
3360 if (MultiplyByZero) {
3361 _mov(T, _0);
3362 _mov(Dest, T);
3363 return;
3364 }
3365
3366 if (Invert) {
3367 Const = -Const;
3368 }
3369
3370 if (StrengthReduction::tryToOptimize(Const, &NumOperations, &Shifts)) {
3371 assert(NumOperations >= 1);
3372 Variable *Src0R = Srcs.src0R(this);
3373 int32_t Start;
3374 int32_t End;
3375 if (NumOperations == 1 || Shifts[NumOperations - 1].shAmt() != 0) {
3376 // Multiplication by a power of 2 (NumOperations == 1); or
3377 // Multiplication by a even number not a power of 2.
3378 Start = 1;
3379 End = NumOperations;
3380 assert(Shifts[0].aggregateWithAdd());
3381 _lsl(T, Src0R, shAmtImm(Shifts[0].shAmt()));
3382 } else {
3383 // Multiplication by an odd number. Put the free barrel shifter to a
3384 // good use.
3385 Start = 0;
3386 End = NumOperations - 2;
3387 const StrengthReduction::AggregationElement &Last =
3388 Shifts[NumOperations - 1];
3389 const StrengthReduction::AggregationElement &SecondToLast =
3390 Shifts[NumOperations - 2];
3391 if (!Last.aggregateWithAdd()) {
3392 assert(SecondToLast.aggregateWithAdd());
3393 _rsb(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R));
3394 } else if (!SecondToLast.aggregateWithAdd()) {
3395 assert(Last.aggregateWithAdd());
3396 _sub(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R));
3397 } else {
3398 _add(T, Src0R, SecondToLast.createShiftedOperand(Func, Src0R));
3399 }
3400 }
3401
3402 // Odd numbers : S E I I
3403 // +---+---+---+---+---+---+ ... +---+---+---+---+
3404 // Shifts = | | | | | | | ... | | | | |
3405 // +---+---+---+---+---+---+ ... +---+---+---+---+
3406 // Even numbers: I S E
3407 //
3408 // S: Start; E: End; I: Init
3409 for (int32_t I = Start; I < End; ++I) {
3410 const StrengthReduction::AggregationElement &Current = Shifts[I];
3411 Operand *SrcF = Current.createShiftedOperand(Func, Src0R);
3412 if (Current.aggregateWithAdd()) {
3413 _add(T, T, SrcF);
3414 } else {
3415 _sub(T, T, SrcF);
3416 }
3417 }
3418
3419 if (Invert) {
3420 // T = 0 - T.
3421 _rsb(T, T, _0);
3422 }
3423
3424 _mov(Dest, T);
3425 return;
3426 }
3427 }
3428 Variable *Src0R = Srcs.unswappedSrc0R(this);
3429 Variable *Src1R = Srcs.unswappedSrc1R(this);
3430 if (isVectorType(DestTy)) {
3431 _vmul(T, Src0R, Src1R);
3432 } else {
3433 _mul(T, Src0R, Src1R);
3434 }
3435 _mov(Dest, T);
3436 return;
3437 }
3438 case InstArithmetic::Shl: {
3439 Variable *Src0R = Srcs.unswappedSrc0R(this);
3440 if (!isVectorType(T->getType())) {
3441 if (Srcs.isSrc1ImmediateZero()) {
3442 _mov(T, Src0R);
3443 } else {
3444 Operand *Src1R = Srcs.unswappedSrc1RShAmtImm(this);
3445 _lsl(T, Src0R, Src1R);
3446 }
3447 } else {
3448 if (Srcs.hasConstOperand()) {
3449 ConstantInteger32 *ShAmt = llvm::cast<ConstantInteger32>(Srcs.src1());
3450 _vshl(T, Src0R, ShAmt);
3451 } else {
3452 auto *Src1R = Srcs.unswappedSrc1R(this);
3453 _vshl(T, Src0R, Src1R)->setSignType(InstARM32::FS_Unsigned);
3454 }
3455 }
3456 _mov(Dest, T);
3457 return;
3458 }
3459 case InstArithmetic::Lshr: {
3460 Variable *Src0R = Srcs.unswappedSrc0R(this);
3461 if (!isVectorType(T->getType())) {
3462 if (DestTy != IceType_i32) {
3463 _uxt(Src0R, Src0R);
3464 }
3465 if (Srcs.isSrc1ImmediateZero()) {
3466 _mov(T, Src0R);
3467 } else {
3468 Operand *Src1R = Srcs.unswappedSrc1RShAmtImm(this);
3469 _lsr(T, Src0R, Src1R);
3470 }
3471 } else {
3472 if (Srcs.hasConstOperand()) {
3473 ConstantInteger32 *ShAmt = llvm::cast<ConstantInteger32>(Srcs.src1());
3474 _vshr(T, Src0R, ShAmt)->setSignType(InstARM32::FS_Unsigned);
3475 } else {
3476 auto *Src1R = Srcs.unswappedSrc1R(this);
3477 auto *Src1RNeg = makeReg(Src1R->getType());
3478 _vneg(Src1RNeg, Src1R);
3479 _vshl(T, Src0R, Src1RNeg)->setSignType(InstARM32::FS_Unsigned);
3480 }
3481 }
3482 _mov(Dest, T);
3483 return;
3484 }
3485 case InstArithmetic::Ashr: {
3486 Variable *Src0R = Srcs.unswappedSrc0R(this);
3487 if (!isVectorType(T->getType())) {
3488 if (DestTy != IceType_i32) {
3489 _sxt(Src0R, Src0R);
3490 }
3491 if (Srcs.isSrc1ImmediateZero()) {
3492 _mov(T, Src0R);
3493 } else {
3494 _asr(T, Src0R, Srcs.unswappedSrc1RShAmtImm(this));
3495 }
3496 } else {
3497 if (Srcs.hasConstOperand()) {
3498 ConstantInteger32 *ShAmt = llvm::cast<ConstantInteger32>(Srcs.src1());
3499 _vshr(T, Src0R, ShAmt)->setSignType(InstARM32::FS_Signed);
3500 } else {
3501 auto *Src1R = Srcs.unswappedSrc1R(this);
3502 auto *Src1RNeg = makeReg(Src1R->getType());
3503 _vneg(Src1RNeg, Src1R);
3504 _vshl(T, Src0R, Src1RNeg)->setSignType(InstARM32::FS_Signed);
3505 }
3506 }
3507 _mov(Dest, T);
3508 return;
3509 }
3510 case InstArithmetic::Udiv:
3511 case InstArithmetic::Sdiv:
3512 case InstArithmetic::Urem:
3513 case InstArithmetic::Srem:
3514 llvm::report_fatal_error(
3515 "Integer div/rem should have been handled earlier.");
3516 return;
3517 case InstArithmetic::Fadd:
3518 case InstArithmetic::Fsub:
3519 case InstArithmetic::Fmul:
3520 case InstArithmetic::Fdiv:
3521 case InstArithmetic::Frem:
3522 llvm::report_fatal_error(
3523 "Floating point arith should have been handled earlier.");
3524 return;
3525 }
3526 }
3527
lowerAssign(const InstAssign * Instr)3528 void TargetARM32::lowerAssign(const InstAssign *Instr) {
3529 Variable *Dest = Instr->getDest();
3530
3531 if (Dest->isRematerializable()) {
3532 Context.insert<InstFakeDef>(Dest);
3533 return;
3534 }
3535
3536 Operand *Src0 = Instr->getSrc(0);
3537 assert(Dest->getType() == Src0->getType());
3538 if (Dest->getType() == IceType_i64) {
3539 Src0 = legalizeUndef(Src0);
3540
3541 Variable *T_Lo = makeReg(IceType_i32);
3542 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3543 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg | Legal_Flex);
3544 _mov(T_Lo, Src0Lo);
3545 _mov(DestLo, T_Lo);
3546
3547 Variable *T_Hi = makeReg(IceType_i32);
3548 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3549 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg | Legal_Flex);
3550 _mov(T_Hi, Src0Hi);
3551 _mov(DestHi, T_Hi);
3552
3553 return;
3554 }
3555
3556 Operand *NewSrc;
3557 if (Dest->hasReg()) {
3558 // If Dest already has a physical register, then legalize the Src operand
3559 // into a Variable with the same register assignment. This especially
3560 // helps allow the use of Flex operands.
3561 NewSrc = legalize(Src0, Legal_Reg | Legal_Flex, Dest->getRegNum());
3562 } else {
3563 // Dest could be a stack operand. Since we could potentially need to do a
3564 // Store (and store can only have Register operands), legalize this to a
3565 // register.
3566 NewSrc = legalize(Src0, Legal_Reg);
3567 }
3568
3569 if (isVectorType(Dest->getType()) || isScalarFloatingType(Dest->getType())) {
3570 NewSrc = legalize(NewSrc, Legal_Reg | Legal_Mem);
3571 }
3572 _mov(Dest, NewSrc);
3573 }
3574
lowerInt1ForBranch(Operand * Boolean,const LowerInt1BranchTarget & TargetTrue,const LowerInt1BranchTarget & TargetFalse,uint32_t ShortCircuitable)3575 TargetARM32::ShortCircuitCondAndLabel TargetARM32::lowerInt1ForBranch(
3576 Operand *Boolean, const LowerInt1BranchTarget &TargetTrue,
3577 const LowerInt1BranchTarget &TargetFalse, uint32_t ShortCircuitable) {
3578 InstARM32Label *NewShortCircuitLabel = nullptr;
3579 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
3580
3581 const Inst *Producer = Computations.getProducerOf(Boolean);
3582
3583 if (Producer == nullptr) {
3584 // No producer, no problem: just do emit code to perform (Boolean & 1) and
3585 // set the flags register. The branch should be taken if the resulting flags
3586 // indicate a non-zero result.
3587 _tst(legalizeToReg(Boolean), _1);
3588 return ShortCircuitCondAndLabel(CondWhenTrue(CondARM32::NE));
3589 }
3590
3591 switch (Producer->getKind()) {
3592 default:
3593 llvm::report_fatal_error("Unexpected producer.");
3594 case Inst::Icmp: {
3595 return ShortCircuitCondAndLabel(
3596 lowerIcmpCond(llvm::cast<InstIcmp>(Producer)));
3597 } break;
3598 case Inst::Fcmp: {
3599 return ShortCircuitCondAndLabel(
3600 lowerFcmpCond(llvm::cast<InstFcmp>(Producer)));
3601 } break;
3602 case Inst::Cast: {
3603 const auto *CastProducer = llvm::cast<InstCast>(Producer);
3604 assert(CastProducer->getCastKind() == InstCast::Trunc);
3605 Operand *Src = CastProducer->getSrc(0);
3606 if (Src->getType() == IceType_i64)
3607 Src = loOperand(Src);
3608 _tst(legalizeToReg(Src), _1);
3609 return ShortCircuitCondAndLabel(CondWhenTrue(CondARM32::NE));
3610 } break;
3611 case Inst::Arithmetic: {
3612 const auto *ArithProducer = llvm::cast<InstArithmetic>(Producer);
3613 switch (ArithProducer->getOp()) {
3614 default:
3615 llvm::report_fatal_error("Unhandled Arithmetic Producer.");
3616 case InstArithmetic::And: {
3617 if (!(ShortCircuitable & SC_And)) {
3618 NewShortCircuitLabel = InstARM32Label::create(Func, this);
3619 }
3620
3621 LowerInt1BranchTarget NewTarget =
3622 TargetFalse.createForLabelOrDuplicate(NewShortCircuitLabel);
3623
3624 ShortCircuitCondAndLabel CondAndLabel = lowerInt1ForBranch(
3625 Producer->getSrc(0), TargetTrue, NewTarget, SC_And);
3626 const CondWhenTrue &Cond = CondAndLabel.Cond;
3627
3628 _br_short_circuit(NewTarget, Cond.invert());
3629
3630 InstARM32Label *const ShortCircuitLabel = CondAndLabel.ShortCircuitTarget;
3631 if (ShortCircuitLabel != nullptr)
3632 Context.insert(ShortCircuitLabel);
3633
3634 return ShortCircuitCondAndLabel(
3635 lowerInt1ForBranch(Producer->getSrc(1), TargetTrue, NewTarget, SC_All)
3636 .assertNoLabelAndReturnCond(),
3637 NewShortCircuitLabel);
3638 } break;
3639 case InstArithmetic::Or: {
3640 if (!(ShortCircuitable & SC_Or)) {
3641 NewShortCircuitLabel = InstARM32Label::create(Func, this);
3642 }
3643
3644 LowerInt1BranchTarget NewTarget =
3645 TargetTrue.createForLabelOrDuplicate(NewShortCircuitLabel);
3646
3647 ShortCircuitCondAndLabel CondAndLabel = lowerInt1ForBranch(
3648 Producer->getSrc(0), NewTarget, TargetFalse, SC_Or);
3649 const CondWhenTrue &Cond = CondAndLabel.Cond;
3650
3651 _br_short_circuit(NewTarget, Cond);
3652
3653 InstARM32Label *const ShortCircuitLabel = CondAndLabel.ShortCircuitTarget;
3654 if (ShortCircuitLabel != nullptr)
3655 Context.insert(ShortCircuitLabel);
3656
3657 return ShortCircuitCondAndLabel(lowerInt1ForBranch(Producer->getSrc(1),
3658 NewTarget, TargetFalse,
3659 SC_All)
3660 .assertNoLabelAndReturnCond(),
3661 NewShortCircuitLabel);
3662 } break;
3663 }
3664 }
3665 }
3666 }
3667
lowerBr(const InstBr * Instr)3668 void TargetARM32::lowerBr(const InstBr *Instr) {
3669 if (Instr->isUnconditional()) {
3670 _br(Instr->getTargetUnconditional());
3671 return;
3672 }
3673
3674 CfgNode *TargetTrue = Instr->getTargetTrue();
3675 CfgNode *TargetFalse = Instr->getTargetFalse();
3676 ShortCircuitCondAndLabel CondAndLabel = lowerInt1ForBranch(
3677 Instr->getCondition(), LowerInt1BranchTarget(TargetTrue),
3678 LowerInt1BranchTarget(TargetFalse), SC_All);
3679 assert(CondAndLabel.ShortCircuitTarget == nullptr);
3680
3681 const CondWhenTrue &Cond = CondAndLabel.Cond;
3682 if (Cond.WhenTrue1 != CondARM32::kNone) {
3683 assert(Cond.WhenTrue0 != CondARM32::AL);
3684 _br(TargetTrue, Cond.WhenTrue1);
3685 }
3686
3687 switch (Cond.WhenTrue0) {
3688 default:
3689 _br(TargetTrue, TargetFalse, Cond.WhenTrue0);
3690 break;
3691 case CondARM32::kNone:
3692 _br(TargetFalse);
3693 break;
3694 case CondARM32::AL:
3695 _br(TargetTrue);
3696 break;
3697 }
3698 }
3699
lowerCall(const InstCall * Instr)3700 void TargetARM32::lowerCall(const InstCall *Instr) {
3701 Operand *CallTarget = Instr->getCallTarget();
3702 if (Instr->isTargetHelperCall()) {
3703 auto TargetHelperPreamble = ARM32HelpersPreamble.find(CallTarget);
3704 if (TargetHelperPreamble != ARM32HelpersPreamble.end()) {
3705 (this->*TargetHelperPreamble->second)(Instr);
3706 }
3707 }
3708 MaybeLeafFunc = false;
3709 NeedsStackAlignment = true;
3710
3711 // Assign arguments to registers and stack. Also reserve stack.
3712 TargetARM32::CallingConv CC;
3713 // Pair of Arg Operand -> GPR number assignments.
3714 llvm::SmallVector<std::pair<Operand *, RegNumT>, NumGPRArgs> GPRArgs;
3715 llvm::SmallVector<std::pair<Operand *, RegNumT>, NumFP32Args> FPArgs;
3716 // Pair of Arg Operand -> stack offset.
3717 llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;
3718 size_t ParameterAreaSizeBytes = 0;
3719
3720 // Classify each argument operand according to the location where the
3721 // argument is passed.
3722 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
3723 Operand *Arg = legalizeUndef(Instr->getArg(i));
3724 const Type Ty = Arg->getType();
3725 bool InReg = false;
3726 RegNumT Reg;
3727 if (isScalarIntegerType(Ty)) {
3728 InReg = CC.argInGPR(Ty, &Reg);
3729 } else {
3730 InReg = CC.argInVFP(Ty, &Reg);
3731 }
3732
3733 if (!InReg) {
3734 ParameterAreaSizeBytes =
3735 applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
3736 StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
3737 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty);
3738 continue;
3739 }
3740
3741 if (Ty == IceType_i64) {
3742 Operand *Lo = loOperand(Arg);
3743 Operand *Hi = hiOperand(Arg);
3744 GPRArgs.push_back(std::make_pair(
3745 Lo, RegNumT::fixme(RegARM32::getI64PairFirstGPRNum(Reg))));
3746 GPRArgs.push_back(std::make_pair(
3747 Hi, RegNumT::fixme(RegARM32::getI64PairSecondGPRNum(Reg))));
3748 } else if (isScalarIntegerType(Ty)) {
3749 GPRArgs.push_back(std::make_pair(Arg, Reg));
3750 } else {
3751 FPArgs.push_back(std::make_pair(Arg, Reg));
3752 }
3753 }
3754
3755 // Adjust the parameter area so that the stack is aligned. It is assumed that
3756 // the stack is already aligned at the start of the calling sequence.
3757 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
3758
3759 if (ParameterAreaSizeBytes > MaxOutArgsSizeBytes) {
3760 llvm::report_fatal_error("MaxOutArgsSizeBytes is not really a max.");
3761 }
3762
3763 // Copy arguments that are passed on the stack to the appropriate stack
3764 // locations.
3765 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
3766 for (auto &StackArg : StackArgs) {
3767 ConstantInteger32 *Loc =
3768 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
3769 Type Ty = StackArg.first->getType();
3770 OperandARM32Mem *Addr;
3771 constexpr bool SignExt = false;
3772 if (OperandARM32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) {
3773 Addr = OperandARM32Mem::create(Func, Ty, SP, Loc);
3774 } else {
3775 Variable *NewBase = Func->makeVariable(SP->getType());
3776 lowerArithmetic(
3777 InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc));
3778 Addr = formMemoryOperand(NewBase, Ty);
3779 }
3780 lowerStore(InstStore::create(Func, StackArg.first, Addr));
3781 }
3782
3783 // Generate the call instruction. Assign its result to a temporary with high
3784 // register allocation weight.
3785 Variable *Dest = Instr->getDest();
3786 // ReturnReg doubles as ReturnRegLo as necessary.
3787 Variable *ReturnReg = nullptr;
3788 Variable *ReturnRegHi = nullptr;
3789 if (Dest) {
3790 switch (Dest->getType()) {
3791 case IceType_NUM:
3792 llvm::report_fatal_error("Invalid Call dest type");
3793 break;
3794 case IceType_void:
3795 break;
3796 case IceType_i1:
3797 assert(Computations.getProducerOf(Dest) == nullptr);
3798 // Fall-through intended.
3799 case IceType_i8:
3800 case IceType_i16:
3801 case IceType_i32:
3802 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_r0);
3803 break;
3804 case IceType_i64:
3805 ReturnReg = makeReg(IceType_i32, RegARM32::Reg_r0);
3806 ReturnRegHi = makeReg(IceType_i32, RegARM32::Reg_r1);
3807 break;
3808 case IceType_f32:
3809 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_s0);
3810 break;
3811 case IceType_f64:
3812 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_d0);
3813 break;
3814 case IceType_v4i1:
3815 case IceType_v8i1:
3816 case IceType_v16i1:
3817 case IceType_v16i8:
3818 case IceType_v8i16:
3819 case IceType_v4i32:
3820 case IceType_v4f32:
3821 ReturnReg = makeReg(Dest->getType(), RegARM32::Reg_q0);
3822 break;
3823 }
3824 }
3825
3826 // Allow ConstantRelocatable to be left alone as a direct call, but force
3827 // other constants like ConstantInteger32 to be in a register and make it an
3828 // indirect call.
3829 if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
3830 CallTarget = legalize(CallTarget, Legal_Reg);
3831 }
3832
3833 // Copy arguments to be passed in registers to the appropriate registers.
3834 CfgVector<Variable *> RegArgs;
3835 for (auto &FPArg : FPArgs) {
3836 RegArgs.emplace_back(legalizeToReg(FPArg.first, FPArg.second));
3837 }
3838 for (auto &GPRArg : GPRArgs) {
3839 RegArgs.emplace_back(legalizeToReg(GPRArg.first, GPRArg.second));
3840 }
3841
3842 // Generate a FakeUse of register arguments so that they do not get dead code
3843 // eliminated as a result of the FakeKill of scratch registers after the call.
3844 // These fake-uses need to be placed here to avoid argument registers from
3845 // being used during the legalizeToReg() calls above.
3846 for (auto *RegArg : RegArgs) {
3847 Context.insert<InstFakeUse>(RegArg);
3848 }
3849
3850 InstARM32Call *NewCall =
3851 Sandboxer(this, InstBundleLock::Opt_AlignToEnd).bl(ReturnReg, CallTarget);
3852
3853 if (ReturnRegHi)
3854 Context.insert<InstFakeDef>(ReturnRegHi);
3855
3856 // Insert a register-kill pseudo instruction.
3857 Context.insert<InstFakeKill>(NewCall);
3858
3859 // Generate a FakeUse to keep the call live if necessary.
3860 if (Instr->hasSideEffects() && ReturnReg) {
3861 Context.insert<InstFakeUse>(ReturnReg);
3862 }
3863
3864 if (Dest != nullptr) {
3865 // Assign the result of the call to Dest.
3866 if (ReturnReg != nullptr) {
3867 if (ReturnRegHi) {
3868 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest);
3869 Variable *DestLo = Dest64On32->getLo();
3870 Variable *DestHi = Dest64On32->getHi();
3871 _mov(DestLo, ReturnReg);
3872 _mov(DestHi, ReturnRegHi);
3873 } else {
3874 if (isFloatingType(Dest->getType()) || isVectorType(Dest->getType())) {
3875 _mov(Dest, ReturnReg);
3876 } else {
3877 assert(isIntegerType(Dest->getType()) &&
3878 typeWidthInBytes(Dest->getType()) <= 4);
3879 _mov(Dest, ReturnReg);
3880 }
3881 }
3882 }
3883 }
3884
3885 if (Instr->isTargetHelperCall()) {
3886 auto TargetHelpersPostamble = ARM32HelpersPostamble.find(CallTarget);
3887 if (TargetHelpersPostamble != ARM32HelpersPostamble.end()) {
3888 (this->*TargetHelpersPostamble->second)(Instr);
3889 }
3890 }
3891 }
3892
3893 namespace {
configureBitcastTemporary(Variable64On32 * Var)3894 void configureBitcastTemporary(Variable64On32 *Var) {
3895 Var->setMustNotHaveReg();
3896 Var->getHi()->setMustHaveReg();
3897 Var->getLo()->setMustHaveReg();
3898 }
3899 } // end of anonymous namespace
3900
lowerCast(const InstCast * Instr)3901 void TargetARM32::lowerCast(const InstCast *Instr) {
3902 InstCast::OpKind CastKind = Instr->getCastKind();
3903 Variable *Dest = Instr->getDest();
3904 const Type DestTy = Dest->getType();
3905 Operand *Src0 = legalizeUndef(Instr->getSrc(0));
3906 switch (CastKind) {
3907 default:
3908 Func->setError("Cast type not supported");
3909 return;
3910 case InstCast::Sext: {
3911 if (isVectorType(DestTy)) {
3912 Variable *T0 = makeReg(DestTy);
3913 Variable *T1 = makeReg(DestTy);
3914 ConstantInteger32 *ShAmt = nullptr;
3915 switch (DestTy) {
3916 default:
3917 llvm::report_fatal_error("Unexpected type in vector sext.");
3918 case IceType_v16i8:
3919 ShAmt = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(7));
3920 break;
3921 case IceType_v8i16:
3922 ShAmt = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(15));
3923 break;
3924 case IceType_v4i32:
3925 ShAmt = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(31));
3926 break;
3927 }
3928 auto *Src0R = legalizeToReg(Src0);
3929 _vshl(T0, Src0R, ShAmt);
3930 _vshr(T1, T0, ShAmt)->setSignType(InstARM32::FS_Signed);
3931 _mov(Dest, T1);
3932 } else if (DestTy == IceType_i64) {
3933 // t1=sxtb src; t2= mov t1 asr #31; dst.lo=t1; dst.hi=t2
3934 Constant *ShiftAmt = Ctx->getConstantInt32(31);
3935 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3936 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3937 Variable *T_Lo = makeReg(DestLo->getType());
3938 if (Src0->getType() == IceType_i32) {
3939 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
3940 _mov(T_Lo, Src0RF);
3941 } else if (Src0->getType() != IceType_i1) {
3942 Variable *Src0R = legalizeToReg(Src0);
3943 _sxt(T_Lo, Src0R);
3944 } else {
3945 Operand *_0 = Ctx->getConstantZero(IceType_i32);
3946 Operand *_m1 = Ctx->getConstantInt32(-1);
3947 lowerInt1ForSelect(T_Lo, Src0, _m1, _0);
3948 }
3949 _mov(DestLo, T_Lo);
3950 Variable *T_Hi = makeReg(DestHi->getType());
3951 if (Src0->getType() != IceType_i1) {
3952 _mov(T_Hi, OperandARM32FlexReg::create(Func, IceType_i32, T_Lo,
3953 OperandARM32::ASR, ShiftAmt));
3954 } else {
3955 // For i1, the asr instruction is already done above.
3956 _mov(T_Hi, T_Lo);
3957 }
3958 _mov(DestHi, T_Hi);
3959 } else if (Src0->getType() != IceType_i1) {
3960 // t1 = sxt src; dst = t1
3961 Variable *Src0R = legalizeToReg(Src0);
3962 Variable *T = makeReg(DestTy);
3963 _sxt(T, Src0R);
3964 _mov(Dest, T);
3965 } else {
3966 Constant *_0 = Ctx->getConstantZero(IceType_i32);
3967 Operand *_m1 = Ctx->getConstantInt(DestTy, -1);
3968 Variable *T = makeReg(DestTy);
3969 lowerInt1ForSelect(T, Src0, _m1, _0);
3970 _mov(Dest, T);
3971 }
3972 break;
3973 }
3974 case InstCast::Zext: {
3975 if (isVectorType(DestTy)) {
3976 auto *Mask = makeReg(DestTy);
3977 auto *_1 = Ctx->getConstantInt32(1);
3978 auto *T = makeReg(DestTy);
3979 auto *Src0R = legalizeToReg(Src0);
3980 _mov(Mask, _1);
3981 _vand(T, Src0R, Mask);
3982 _mov(Dest, T);
3983 } else if (DestTy == IceType_i64) {
3984 // t1=uxtb src; dst.lo=t1; dst.hi=0
3985 Operand *_0 =
3986 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
3987 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3988 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3989 Variable *T_Lo = makeReg(DestLo->getType());
3990
3991 switch (Src0->getType()) {
3992 default: {
3993 assert(Src0->getType() != IceType_i64);
3994 _uxt(T_Lo, legalizeToReg(Src0));
3995 } break;
3996 case IceType_i32: {
3997 _mov(T_Lo, legalize(Src0, Legal_Reg | Legal_Flex));
3998 } break;
3999 case IceType_i1: {
4000 SafeBoolChain Safe = lowerInt1(T_Lo, Src0);
4001 if (Safe == SBC_No) {
4002 Operand *_1 =
4003 legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
4004 _and(T_Lo, T_Lo, _1);
4005 }
4006 } break;
4007 }
4008
4009 _mov(DestLo, T_Lo);
4010
4011 Variable *T_Hi = makeReg(DestLo->getType());
4012 _mov(T_Hi, _0);
4013 _mov(DestHi, T_Hi);
4014 } else if (Src0->getType() == IceType_i1) {
4015 Variable *T = makeReg(DestTy);
4016
4017 SafeBoolChain Safe = lowerInt1(T, Src0);
4018 if (Safe == SBC_No) {
4019 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
4020 _and(T, T, _1);
4021 }
4022
4023 _mov(Dest, T);
4024 } else {
4025 // t1 = uxt src; dst = t1
4026 Variable *Src0R = legalizeToReg(Src0);
4027 Variable *T = makeReg(DestTy);
4028 _uxt(T, Src0R);
4029 _mov(Dest, T);
4030 }
4031 break;
4032 }
4033 case InstCast::Trunc: {
4034 if (isVectorType(DestTy)) {
4035 auto *T = makeReg(DestTy);
4036 auto *Src0R = legalizeToReg(Src0);
4037 _mov(T, Src0R);
4038 _mov(Dest, T);
4039 } else {
4040 if (Src0->getType() == IceType_i64)
4041 Src0 = loOperand(Src0);
4042 Operand *Src0RF = legalize(Src0, Legal_Reg | Legal_Flex);
4043 // t1 = trunc Src0RF; Dest = t1
4044 Variable *T = makeReg(DestTy);
4045 _mov(T, Src0RF);
4046 if (DestTy == IceType_i1)
4047 _and(T, T, Ctx->getConstantInt1(1));
4048 _mov(Dest, T);
4049 }
4050 break;
4051 }
4052 case InstCast::Fptrunc:
4053 case InstCast::Fpext: {
4054 // fptrunc: dest.f32 = fptrunc src0.fp64
4055 // fpext: dest.f64 = fptrunc src0.fp32
4056 const bool IsTrunc = CastKind == InstCast::Fptrunc;
4057 assert(!isVectorType(DestTy));
4058 assert(DestTy == (IsTrunc ? IceType_f32 : IceType_f64));
4059 assert(Src0->getType() == (IsTrunc ? IceType_f64 : IceType_f32));
4060 Variable *Src0R = legalizeToReg(Src0);
4061 Variable *T = makeReg(DestTy);
4062 _vcvt(T, Src0R, IsTrunc ? InstARM32Vcvt::D2s : InstARM32Vcvt::S2d);
4063 _mov(Dest, T);
4064 break;
4065 }
4066 case InstCast::Fptosi:
4067 case InstCast::Fptoui: {
4068 const bool DestIsSigned = CastKind == InstCast::Fptosi;
4069 Variable *Src0R = legalizeToReg(Src0);
4070
4071 if (isVectorType(DestTy)) {
4072 assert(typeElementType(Src0->getType()) == IceType_f32);
4073 auto *T = makeReg(DestTy);
4074 _vcvt(T, Src0R,
4075 DestIsSigned ? InstARM32Vcvt::Vs2si : InstARM32Vcvt::Vs2ui);
4076 _mov(Dest, T);
4077 break;
4078 }
4079
4080 const bool Src0IsF32 = isFloat32Asserting32Or64(Src0->getType());
4081 if (llvm::isa<Variable64On32>(Dest)) {
4082 llvm::report_fatal_error("fp-to-i64 should have been pre-lowered.");
4083 }
4084 // fptosi:
4085 // t1.fp = vcvt src0.fp
4086 // t2.i32 = vmov t1.fp
4087 // dest.int = conv t2.i32 @ Truncates the result if needed.
4088 // fptoui:
4089 // t1.fp = vcvt src0.fp
4090 // t2.u32 = vmov t1.fp
4091 // dest.uint = conv t2.u32 @ Truncates the result if needed.
4092 Variable *T_fp = makeReg(IceType_f32);
4093 const InstARM32Vcvt::VcvtVariant Conversion =
4094 Src0IsF32 ? (DestIsSigned ? InstARM32Vcvt::S2si : InstARM32Vcvt::S2ui)
4095 : (DestIsSigned ? InstARM32Vcvt::D2si : InstARM32Vcvt::D2ui);
4096 _vcvt(T_fp, Src0R, Conversion);
4097 Variable *T = makeReg(IceType_i32);
4098 _mov(T, T_fp);
4099 if (DestTy != IceType_i32) {
4100 Variable *T_1 = makeReg(DestTy);
4101 lowerCast(InstCast::create(Func, InstCast::Trunc, T_1, T));
4102 T = T_1;
4103 }
4104 _mov(Dest, T);
4105 break;
4106 }
4107 case InstCast::Sitofp:
4108 case InstCast::Uitofp: {
4109 const bool SourceIsSigned = CastKind == InstCast::Sitofp;
4110
4111 if (isVectorType(DestTy)) {
4112 assert(typeElementType(DestTy) == IceType_f32);
4113 auto *T = makeReg(DestTy);
4114 Variable *Src0R = legalizeToReg(Src0);
4115 _vcvt(T, Src0R,
4116 SourceIsSigned ? InstARM32Vcvt::Vsi2s : InstARM32Vcvt::Vui2s);
4117 _mov(Dest, T);
4118 break;
4119 }
4120
4121 const bool DestIsF32 = isFloat32Asserting32Or64(DestTy);
4122 if (Src0->getType() == IceType_i64) {
4123 llvm::report_fatal_error("i64-to-fp should have been pre-lowered.");
4124 }
4125 // sitofp:
4126 // t1.i32 = sext src.int @ sign-extends src0 if needed.
4127 // t2.fp32 = vmov t1.i32
4128 // t3.fp = vcvt.{fp}.s32 @ fp is either f32 or f64
4129 // uitofp:
4130 // t1.i32 = zext src.int @ zero-extends src0 if needed.
4131 // t2.fp32 = vmov t1.i32
4132 // t3.fp = vcvt.{fp}.s32 @ fp is either f32 or f64
4133 if (Src0->getType() != IceType_i32) {
4134 Variable *Src0R_32 = makeReg(IceType_i32);
4135 lowerCast(InstCast::create(
4136 Func, SourceIsSigned ? InstCast::Sext : InstCast::Zext, Src0R_32,
4137 Src0));
4138 Src0 = Src0R_32;
4139 }
4140 Variable *Src0R = legalizeToReg(Src0);
4141 Variable *Src0R_f32 = makeReg(IceType_f32);
4142 _mov(Src0R_f32, Src0R);
4143 Src0R = Src0R_f32;
4144 Variable *T = makeReg(DestTy);
4145 const InstARM32Vcvt::VcvtVariant Conversion =
4146 DestIsF32
4147 ? (SourceIsSigned ? InstARM32Vcvt::Si2s : InstARM32Vcvt::Ui2s)
4148 : (SourceIsSigned ? InstARM32Vcvt::Si2d : InstARM32Vcvt::Ui2d);
4149 _vcvt(T, Src0R, Conversion);
4150 _mov(Dest, T);
4151 break;
4152 }
4153 case InstCast::Bitcast: {
4154 Operand *Src0 = Instr->getSrc(0);
4155 if (DestTy == Src0->getType()) {
4156 auto *Assign = InstAssign::create(Func, Dest, Src0);
4157 lowerAssign(Assign);
4158 return;
4159 }
4160 switch (DestTy) {
4161 case IceType_NUM:
4162 case IceType_void:
4163 llvm::report_fatal_error("Unexpected bitcast.");
4164 case IceType_i1:
4165 UnimplementedLoweringError(this, Instr);
4166 break;
4167 case IceType_i8:
4168 assert(Src0->getType() == IceType_v8i1);
4169 llvm::report_fatal_error(
4170 "i8 to v8i1 conversion should have been prelowered.");
4171 break;
4172 case IceType_i16:
4173 assert(Src0->getType() == IceType_v16i1);
4174 llvm::report_fatal_error(
4175 "i16 to v16i1 conversion should have been prelowered.");
4176 break;
4177 case IceType_i32:
4178 case IceType_f32: {
4179 Variable *Src0R = legalizeToReg(Src0);
4180 Variable *T = makeReg(DestTy);
4181 _mov(T, Src0R);
4182 lowerAssign(InstAssign::create(Func, Dest, T));
4183 break;
4184 }
4185 case IceType_i64: {
4186 // t0, t1 <- src0
4187 // dest[31..0] = t0
4188 // dest[63..32] = t1
4189 assert(Src0->getType() == IceType_f64);
4190 auto *T = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4191 T->initHiLo(Func);
4192 configureBitcastTemporary(T);
4193 Variable *Src0R = legalizeToReg(Src0);
4194 _mov(T, Src0R);
4195 Context.insert<InstFakeUse>(T->getHi());
4196 Context.insert<InstFakeUse>(T->getLo());
4197 lowerAssign(InstAssign::create(Func, Dest, T));
4198 break;
4199 }
4200 case IceType_f64: {
4201 // T0 <- lo(src)
4202 // T1 <- hi(src)
4203 // vmov T2, T0, T1
4204 // Dest <- T2
4205 assert(Src0->getType() == IceType_i64);
4206 Variable *T = makeReg(DestTy);
4207 auto *Src64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4208 Src64->initHiLo(Func);
4209 configureBitcastTemporary(Src64);
4210 lowerAssign(InstAssign::create(Func, Src64, Src0));
4211 _mov(T, Src64);
4212 lowerAssign(InstAssign::create(Func, Dest, T));
4213 break;
4214 }
4215 case IceType_v8i1:
4216 assert(Src0->getType() == IceType_i8);
4217 llvm::report_fatal_error(
4218 "v8i1 to i8 conversion should have been prelowered.");
4219 break;
4220 case IceType_v16i1:
4221 assert(Src0->getType() == IceType_i16);
4222 llvm::report_fatal_error(
4223 "v16i1 to i16 conversion should have been prelowered.");
4224 break;
4225 case IceType_v4i1:
4226 case IceType_v8i16:
4227 case IceType_v16i8:
4228 case IceType_v4f32:
4229 case IceType_v4i32: {
4230 assert(typeWidthInBytes(DestTy) == typeWidthInBytes(Src0->getType()));
4231 assert(isVectorType(DestTy) == isVectorType(Src0->getType()));
4232 Variable *T = makeReg(DestTy);
4233 _mov(T, Src0);
4234 _mov(Dest, T);
4235 break;
4236 }
4237 }
4238 break;
4239 }
4240 }
4241 }
4242
lowerExtractElement(const InstExtractElement * Instr)4243 void TargetARM32::lowerExtractElement(const InstExtractElement *Instr) {
4244 Variable *Dest = Instr->getDest();
4245 Type DestTy = Dest->getType();
4246
4247 Variable *Src0 = legalizeToReg(Instr->getSrc(0));
4248 Operand *Src1 = Instr->getSrc(1);
4249
4250 if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src1)) {
4251 const uint32_t Index = Imm->getValue();
4252 Variable *T = makeReg(DestTy);
4253 Variable *TSrc0 = makeReg(Src0->getType());
4254
4255 if (isFloatingType(DestTy)) {
4256 // We need to make sure the source is in a suitable register.
4257 TSrc0->setRegClass(RegARM32::RCARM32_QtoS);
4258 }
4259
4260 _mov(TSrc0, Src0);
4261 _extractelement(T, TSrc0, Index);
4262 _mov(Dest, T);
4263 return;
4264 }
4265 assert(false && "extractelement requires a constant index");
4266 }
4267
4268 namespace {
4269 // Validates FCMPARM32_TABLE's declaration w.r.t. InstFcmp::FCondition ordering
4270 // (and naming).
4271 enum {
4272 #define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V) _fcmp_ll_##val,
4273 FCMPARM32_TABLE
4274 #undef X
4275 _fcmp_ll_NUM
4276 };
4277
4278 enum {
4279 #define X(tag, str) _fcmp_hl_##tag = InstFcmp::tag,
4280 ICEINSTFCMP_TABLE
4281 #undef X
4282 _fcmp_hl_NUM
4283 };
4284
4285 static_assert((uint32_t)_fcmp_hl_NUM == (uint32_t)_fcmp_ll_NUM,
4286 "Inconsistency between high-level and low-level fcmp tags.");
4287 #define X(tag, str) \
4288 static_assert( \
4289 (uint32_t)_fcmp_hl_##tag == (uint32_t)_fcmp_ll_##tag, \
4290 "Inconsistency between high-level and low-level fcmp tag " #tag);
4291 ICEINSTFCMP_TABLE
4292 #undef X
4293
4294 struct {
4295 CondARM32::Cond CC0;
4296 CondARM32::Cond CC1;
4297 } TableFcmp[] = {
4298 #define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V) \
4299 {CondARM32::CC0, CondARM32::CC1},
4300 FCMPARM32_TABLE
4301 #undef X
4302 };
4303
isFloatingPointZero(const Operand * Src)4304 bool isFloatingPointZero(const Operand *Src) {
4305 if (const auto *F32 = llvm::dyn_cast<const ConstantFloat>(Src)) {
4306 return Utils::isPositiveZero(F32->getValue());
4307 }
4308
4309 if (const auto *F64 = llvm::dyn_cast<const ConstantDouble>(Src)) {
4310 return Utils::isPositiveZero(F64->getValue());
4311 }
4312
4313 return false;
4314 }
4315 } // end of anonymous namespace
4316
lowerFcmpCond(const InstFcmp * Instr)4317 TargetARM32::CondWhenTrue TargetARM32::lowerFcmpCond(const InstFcmp *Instr) {
4318 InstFcmp::FCond Condition = Instr->getCondition();
4319 switch (Condition) {
4320 case InstFcmp::False:
4321 return CondWhenTrue(CondARM32::kNone);
4322 case InstFcmp::True:
4323 return CondWhenTrue(CondARM32::AL);
4324 break;
4325 default: {
4326 Variable *Src0R = legalizeToReg(Instr->getSrc(0));
4327 Operand *Src1 = Instr->getSrc(1);
4328 if (isFloatingPointZero(Src1)) {
4329 _vcmp(Src0R, OperandARM32FlexFpZero::create(Func, Src0R->getType()));
4330 } else {
4331 _vcmp(Src0R, legalizeToReg(Src1));
4332 }
4333 _vmrs();
4334 assert(Condition < llvm::array_lengthof(TableFcmp));
4335 return CondWhenTrue(TableFcmp[Condition].CC0, TableFcmp[Condition].CC1);
4336 }
4337 }
4338 }
4339
lowerFcmp(const InstFcmp * Instr)4340 void TargetARM32::lowerFcmp(const InstFcmp *Instr) {
4341 Variable *Dest = Instr->getDest();
4342 const Type DestTy = Dest->getType();
4343
4344 if (isVectorType(DestTy)) {
4345 if (Instr->getCondition() == InstFcmp::False) {
4346 constexpr Type SafeTypeForMovingConstant = IceType_v4i32;
4347 auto *T = makeReg(SafeTypeForMovingConstant);
4348 _mov(T, llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(0)));
4349 _mov(Dest, T);
4350 return;
4351 }
4352
4353 if (Instr->getCondition() == InstFcmp::True) {
4354 constexpr Type SafeTypeForMovingConstant = IceType_v4i32;
4355 auto *T = makeReg(SafeTypeForMovingConstant);
4356 _mov(T, llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(1)));
4357 _mov(Dest, T);
4358 return;
4359 }
4360
4361 Variable *T0;
4362 Variable *T1;
4363 bool Negate = false;
4364 auto *Src0 = legalizeToReg(Instr->getSrc(0));
4365 auto *Src1 = legalizeToReg(Instr->getSrc(1));
4366
4367 switch (Instr->getCondition()) {
4368 default:
4369 llvm::report_fatal_error("Unhandled fp comparison.");
4370 #define _Vcnone(Tptr, S0, S1) \
4371 do { \
4372 *(Tptr) = nullptr; \
4373 } while (0)
4374 #define _Vceq(Tptr, S0, S1) \
4375 do { \
4376 *(Tptr) = makeReg(DestTy); \
4377 _vceq(*(Tptr), S0, S1); \
4378 } while (0)
4379 #define _Vcge(Tptr, S0, S1) \
4380 do { \
4381 *(Tptr) = makeReg(DestTy); \
4382 _vcge(*(Tptr), S0, S1)->setSignType(InstARM32::FS_Signed); \
4383 } while (0)
4384 #define _Vcgt(Tptr, S0, S1) \
4385 do { \
4386 *(Tptr) = makeReg(DestTy); \
4387 _vcgt(*(Tptr), S0, S1)->setSignType(InstARM32::FS_Signed); \
4388 } while (0)
4389 #define X(val, CC0, CC1, CC0_V, CC1_V, INV_V, NEG_V) \
4390 case InstFcmp::val: { \
4391 _Vc##CC0_V(&T0, (INV_V) ? Src1 : Src0, (INV_V) ? Src0 : Src1); \
4392 _Vc##CC1_V(&T1, (INV_V) ? Src0 : Src1, (INV_V) ? Src1 : Src0); \
4393 Negate = NEG_V; \
4394 } break;
4395 FCMPARM32_TABLE
4396 #undef X
4397 #undef _Vcgt
4398 #undef _Vcge
4399 #undef _Vceq
4400 #undef _Vcnone
4401 }
4402 assert(T0 != nullptr);
4403 Variable *T = T0;
4404 if (T1 != nullptr) {
4405 T = makeReg(DestTy);
4406 _vorr(T, T0, T1);
4407 }
4408
4409 if (Negate) {
4410 auto *TNeg = makeReg(DestTy);
4411 _vmvn(TNeg, T);
4412 T = TNeg;
4413 }
4414
4415 _mov(Dest, T);
4416 return;
4417 }
4418
4419 Variable *T = makeReg(IceType_i1);
4420 Operand *_1 = legalize(Ctx->getConstantInt32(1), Legal_Reg | Legal_Flex);
4421 Operand *_0 =
4422 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
4423
4424 CondWhenTrue Cond = lowerFcmpCond(Instr);
4425
4426 bool RedefineT = false;
4427 if (Cond.WhenTrue0 != CondARM32::AL) {
4428 _mov(T, _0);
4429 RedefineT = true;
4430 }
4431
4432 if (Cond.WhenTrue0 == CondARM32::kNone) {
4433 _mov(Dest, T);
4434 return;
4435 }
4436
4437 if (RedefineT) {
4438 _mov_redefined(T, _1, Cond.WhenTrue0);
4439 } else {
4440 _mov(T, _1, Cond.WhenTrue0);
4441 }
4442
4443 if (Cond.WhenTrue1 != CondARM32::kNone) {
4444 _mov_redefined(T, _1, Cond.WhenTrue1);
4445 }
4446
4447 _mov(Dest, T);
4448 }
4449
4450 TargetARM32::CondWhenTrue
lowerInt64IcmpCond(InstIcmp::ICond Condition,Operand * Src0,Operand * Src1)4451 TargetARM32::lowerInt64IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
4452 Operand *Src1) {
4453 assert(Condition < llvm::array_lengthof(TableIcmp64));
4454
4455 Int32Operands SrcsLo(loOperand(Src0), loOperand(Src1));
4456 Int32Operands SrcsHi(hiOperand(Src0), hiOperand(Src1));
4457 assert(SrcsLo.hasConstOperand() == SrcsHi.hasConstOperand());
4458 assert(SrcsLo.swappedOperands() == SrcsHi.swappedOperands());
4459
4460 if (SrcsLo.hasConstOperand()) {
4461 const uint32_t ValueLo = SrcsLo.getConstantValue();
4462 const uint32_t ValueHi = SrcsHi.getConstantValue();
4463 const uint64_t Value = (static_cast<uint64_t>(ValueHi) << 32) | ValueLo;
4464 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) &&
4465 Value == 0) {
4466 Variable *T = makeReg(IceType_i32);
4467 Variable *Src0LoR = SrcsLo.src0R(this);
4468 Variable *Src0HiR = SrcsHi.src0R(this);
4469 _orrs(T, Src0LoR, Src0HiR);
4470 Context.insert<InstFakeUse>(T);
4471 return CondWhenTrue(TableIcmp64[Condition].C1);
4472 }
4473
4474 Variable *Src0RLo = SrcsLo.src0R(this);
4475 Variable *Src0RHi = SrcsHi.src0R(this);
4476 Operand *Src1RFLo = SrcsLo.src1RF(this);
4477 Operand *Src1RFHi = ValueLo == ValueHi ? Src1RFLo : SrcsHi.src1RF(this);
4478
4479 const bool UseRsb =
4480 TableIcmp64[Condition].Swapped != SrcsLo.swappedOperands();
4481
4482 if (UseRsb) {
4483 if (TableIcmp64[Condition].IsSigned) {
4484 Variable *T = makeReg(IceType_i32);
4485 _rsbs(T, Src0RLo, Src1RFLo);
4486 Context.insert<InstFakeUse>(T);
4487
4488 T = makeReg(IceType_i32);
4489 _rscs(T, Src0RHi, Src1RFHi);
4490 // We need to add a FakeUse here because liveness gets mad at us (Def
4491 // without Use.) Note that flag-setting instructions are considered to
4492 // have side effects and, therefore, are not DCE'ed.
4493 Context.insert<InstFakeUse>(T);
4494 } else {
4495 Variable *T = makeReg(IceType_i32);
4496 _rsbs(T, Src0RHi, Src1RFHi);
4497 Context.insert<InstFakeUse>(T);
4498
4499 T = makeReg(IceType_i32);
4500 _rsbs(T, Src0RLo, Src1RFLo, CondARM32::EQ);
4501 Context.insert<InstFakeUse>(T);
4502 }
4503 } else {
4504 if (TableIcmp64[Condition].IsSigned) {
4505 _cmp(Src0RLo, Src1RFLo);
4506 Variable *T = makeReg(IceType_i32);
4507 _sbcs(T, Src0RHi, Src1RFHi);
4508 Context.insert<InstFakeUse>(T);
4509 } else {
4510 _cmp(Src0RHi, Src1RFHi);
4511 _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);
4512 }
4513 }
4514
4515 return CondWhenTrue(TableIcmp64[Condition].C1);
4516 }
4517
4518 Variable *Src0RLo, *Src0RHi;
4519 Operand *Src1RFLo, *Src1RFHi;
4520 if (TableIcmp64[Condition].Swapped) {
4521 Src0RLo = legalizeToReg(loOperand(Src1));
4522 Src0RHi = legalizeToReg(hiOperand(Src1));
4523 Src1RFLo = legalizeToReg(loOperand(Src0));
4524 Src1RFHi = legalizeToReg(hiOperand(Src0));
4525 } else {
4526 Src0RLo = legalizeToReg(loOperand(Src0));
4527 Src0RHi = legalizeToReg(hiOperand(Src0));
4528 Src1RFLo = legalizeToReg(loOperand(Src1));
4529 Src1RFHi = legalizeToReg(hiOperand(Src1));
4530 }
4531
4532 // a=icmp cond, b, c ==>
4533 // GCC does:
4534 // cmp b.hi, c.hi or cmp b.lo, c.lo
4535 // cmp.eq b.lo, c.lo sbcs t1, b.hi, c.hi
4536 // mov.<C1> t, #1 mov.<C1> t, #1
4537 // mov.<C2> t, #0 mov.<C2> t, #0
4538 // mov a, t mov a, t
4539 // where the "cmp.eq b.lo, c.lo" is used for unsigned and "sbcs t1, hi, hi"
4540 // is used for signed compares. In some cases, b and c need to be swapped as
4541 // well.
4542 //
4543 // LLVM does:
4544 // for EQ and NE:
4545 // eor t1, b.hi, c.hi
4546 // eor t2, b.lo, c.hi
4547 // orrs t, t1, t2
4548 // mov.<C> t, #1
4549 // mov a, t
4550 //
4551 // that's nice in that it's just as short but has fewer dependencies for
4552 // better ILP at the cost of more registers.
4553 //
4554 // Otherwise for signed/unsigned <, <=, etc. LLVM uses a sequence with two
4555 // unconditional mov #0, two cmps, two conditional mov #1, and one
4556 // conditional reg mov. That has few dependencies for good ILP, but is a
4557 // longer sequence.
4558 //
4559 // So, we are going with the GCC version since it's usually better (except
4560 // perhaps for eq/ne). We could revisit special-casing eq/ne later.
4561 if (TableIcmp64[Condition].IsSigned) {
4562 Variable *ScratchReg = makeReg(IceType_i32);
4563 _cmp(Src0RLo, Src1RFLo);
4564 _sbcs(ScratchReg, Src0RHi, Src1RFHi);
4565 // ScratchReg isn't going to be used, but we need the side-effect of
4566 // setting flags from this operation.
4567 Context.insert<InstFakeUse>(ScratchReg);
4568 } else {
4569 _cmp(Src0RHi, Src1RFHi);
4570 _cmp(Src0RLo, Src1RFLo, CondARM32::EQ);
4571 }
4572 return CondWhenTrue(TableIcmp64[Condition].C1);
4573 }
4574
4575 TargetARM32::CondWhenTrue
lowerInt32IcmpCond(InstIcmp::ICond Condition,Operand * Src0,Operand * Src1)4576 TargetARM32::lowerInt32IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
4577 Operand *Src1) {
4578 Int32Operands Srcs(Src0, Src1);
4579 if (!Srcs.hasConstOperand()) {
4580
4581 Variable *Src0R = Srcs.src0R(this);
4582 Operand *Src1RF = Srcs.src1RF(this);
4583 _cmp(Src0R, Src1RF);
4584 return CondWhenTrue(getIcmp32Mapping(Condition));
4585 }
4586
4587 Variable *Src0R = Srcs.src0R(this);
4588 const int32_t Value = Srcs.getConstantValue();
4589 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && Value == 0) {
4590 _tst(Src0R, Src0R);
4591 return CondWhenTrue(getIcmp32Mapping(Condition));
4592 }
4593
4594 if (!Srcs.swappedOperands() && !Srcs.immediateIsFlexEncodable() &&
4595 Srcs.negatedImmediateIsFlexEncodable()) {
4596 Operand *Src1F = Srcs.negatedSrc1F(this);
4597 _cmn(Src0R, Src1F);
4598 return CondWhenTrue(getIcmp32Mapping(Condition));
4599 }
4600
4601 Operand *Src1RF = Srcs.src1RF(this);
4602 if (!Srcs.swappedOperands()) {
4603 _cmp(Src0R, Src1RF);
4604 } else {
4605 Variable *T = makeReg(IceType_i32);
4606 _rsbs(T, Src0R, Src1RF);
4607 Context.insert<InstFakeUse>(T);
4608 }
4609 return CondWhenTrue(getIcmp32Mapping(Condition));
4610 }
4611
4612 TargetARM32::CondWhenTrue
lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition,Operand * Src0,Operand * Src1)4613 TargetARM32::lowerInt8AndInt16IcmpCond(InstIcmp::ICond Condition, Operand *Src0,
4614 Operand *Src1) {
4615 Int32Operands Srcs(Src0, Src1);
4616 const int32_t ShAmt = 32 - getScalarIntBitWidth(Src0->getType());
4617 assert(ShAmt >= 0);
4618
4619 if (!Srcs.hasConstOperand()) {
4620 Variable *Src0R = makeReg(IceType_i32);
4621 Operand *ShAmtImm = shAmtImm(ShAmt);
4622 _lsl(Src0R, legalizeToReg(Src0), ShAmtImm);
4623
4624 Variable *Src1R = legalizeToReg(Src1);
4625 auto *Src1F = OperandARM32FlexReg::create(Func, IceType_i32, Src1R,
4626 OperandARM32::LSL, ShAmtImm);
4627 _cmp(Src0R, Src1F);
4628 return CondWhenTrue(getIcmp32Mapping(Condition));
4629 }
4630
4631 const int32_t Value = Srcs.getConstantValue();
4632 if ((Condition == InstIcmp::Eq || Condition == InstIcmp::Ne) && Value == 0) {
4633 Operand *ShAmtImm = shAmtImm(ShAmt);
4634 Variable *T = makeReg(IceType_i32);
4635 _lsls(T, Srcs.src0R(this), ShAmtImm);
4636 Context.insert<InstFakeUse>(T);
4637 return CondWhenTrue(getIcmp32Mapping(Condition));
4638 }
4639
4640 Variable *ConstR = makeReg(IceType_i32);
4641 _mov(ConstR,
4642 legalize(Ctx->getConstantInt32(Value << ShAmt), Legal_Reg | Legal_Flex));
4643 Operand *NonConstF = OperandARM32FlexReg::create(
4644 Func, IceType_i32, Srcs.src0R(this), OperandARM32::LSL,
4645 Ctx->getConstantInt32(ShAmt));
4646
4647 if (Srcs.swappedOperands()) {
4648 _cmp(ConstR, NonConstF);
4649 } else {
4650 Variable *T = makeReg(IceType_i32);
4651 _rsbs(T, ConstR, NonConstF);
4652 Context.insert<InstFakeUse>(T);
4653 }
4654 return CondWhenTrue(getIcmp32Mapping(Condition));
4655 }
4656
lowerIcmpCond(const InstIcmp * Instr)4657 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(const InstIcmp *Instr) {
4658 return lowerIcmpCond(Instr->getCondition(), Instr->getSrc(0),
4659 Instr->getSrc(1));
4660 }
4661
lowerIcmpCond(InstIcmp::ICond Condition,Operand * Src0,Operand * Src1)4662 TargetARM32::CondWhenTrue TargetARM32::lowerIcmpCond(InstIcmp::ICond Condition,
4663 Operand *Src0,
4664 Operand *Src1) {
4665 Src0 = legalizeUndef(Src0);
4666 Src1 = legalizeUndef(Src1);
4667
4668 // a=icmp cond b, c ==>
4669 // GCC does:
4670 // <u/s>xtb tb, b
4671 // <u/s>xtb tc, c
4672 // cmp tb, tc
4673 // mov.C1 t, #0
4674 // mov.C2 t, #1
4675 // mov a, t
4676 // where the unsigned/sign extension is not needed for 32-bit. They also have
4677 // special cases for EQ and NE. E.g., for NE:
4678 // <extend to tb, tc>
4679 // subs t, tb, tc
4680 // movne t, #1
4681 // mov a, t
4682 //
4683 // LLVM does:
4684 // lsl tb, b, #<N>
4685 // mov t, #0
4686 // cmp tb, c, lsl #<N>
4687 // mov.<C> t, #1
4688 // mov a, t
4689 //
4690 // the left shift is by 0, 16, or 24, which allows the comparison to focus on
4691 // the digits that actually matter (for 16-bit or 8-bit signed/unsigned). For
4692 // the unsigned case, for some reason it does similar to GCC and does a uxtb
4693 // first. It's not clear to me why that special-casing is needed.
4694 //
4695 // We'll go with the LLVM way for now, since it's shorter and has just as few
4696 // dependencies.
4697 switch (Src0->getType()) {
4698 default:
4699 llvm::report_fatal_error("Unhandled type in lowerIcmpCond");
4700 case IceType_i1:
4701 case IceType_i8:
4702 case IceType_i16:
4703 return lowerInt8AndInt16IcmpCond(Condition, Src0, Src1);
4704 case IceType_i32:
4705 return lowerInt32IcmpCond(Condition, Src0, Src1);
4706 case IceType_i64:
4707 return lowerInt64IcmpCond(Condition, Src0, Src1);
4708 }
4709 }
4710
lowerIcmp(const InstIcmp * Instr)4711 void TargetARM32::lowerIcmp(const InstIcmp *Instr) {
4712 Variable *Dest = Instr->getDest();
4713 const Type DestTy = Dest->getType();
4714
4715 if (isVectorType(DestTy)) {
4716 auto *T = makeReg(DestTy);
4717 auto *Src0 = legalizeToReg(Instr->getSrc(0));
4718 auto *Src1 = legalizeToReg(Instr->getSrc(1));
4719 const Type SrcTy = Src0->getType();
4720
4721 bool NeedsShl = false;
4722 Type NewTypeAfterShl;
4723 SizeT ShAmt;
4724 switch (SrcTy) {
4725 default:
4726 break;
4727 case IceType_v16i1:
4728 NeedsShl = true;
4729 NewTypeAfterShl = IceType_v16i8;
4730 ShAmt = 7;
4731 break;
4732 case IceType_v8i1:
4733 NeedsShl = true;
4734 NewTypeAfterShl = IceType_v8i16;
4735 ShAmt = 15;
4736 break;
4737 case IceType_v4i1:
4738 NeedsShl = true;
4739 NewTypeAfterShl = IceType_v4i32;
4740 ShAmt = 31;
4741 break;
4742 }
4743
4744 if (NeedsShl) {
4745 auto *Imm = llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(ShAmt));
4746 auto *Src0T = makeReg(NewTypeAfterShl);
4747 auto *Src0Shl = makeReg(NewTypeAfterShl);
4748 _mov(Src0T, Src0);
4749 _vshl(Src0Shl, Src0T, Imm);
4750 Src0 = Src0Shl;
4751
4752 auto *Src1T = makeReg(NewTypeAfterShl);
4753 auto *Src1Shl = makeReg(NewTypeAfterShl);
4754 _mov(Src1T, Src1);
4755 _vshl(Src1Shl, Src1T, Imm);
4756 Src1 = Src1Shl;
4757 }
4758
4759 switch (Instr->getCondition()) {
4760 default:
4761 llvm::report_fatal_error("Unhandled integer comparison.");
4762 #define _Vceq(T, S0, S1, Signed) _vceq(T, S0, S1)
4763 #define _Vcge(T, S0, S1, Signed) \
4764 _vcge(T, S0, S1)->setSignType(Signed ? InstARM32::FS_Signed \
4765 : InstARM32::FS_Unsigned)
4766 #define _Vcgt(T, S0, S1, Signed) \
4767 _vcgt(T, S0, S1)->setSignType(Signed ? InstARM32::FS_Signed \
4768 : InstARM32::FS_Unsigned)
4769 #define X(val, is_signed, swapped64, C_32, C1_64, C2_64, C_V, INV_V, NEG_V) \
4770 case InstIcmp::val: { \
4771 _Vc##C_V(T, (INV_V) ? Src1 : Src0, (INV_V) ? Src0 : Src1, is_signed); \
4772 if (NEG_V) { \
4773 auto *TInv = makeReg(DestTy); \
4774 _vmvn(TInv, T); \
4775 T = TInv; \
4776 } \
4777 } break;
4778 ICMPARM32_TABLE
4779 #undef X
4780 #undef _Vcgt
4781 #undef _Vcge
4782 #undef _Vceq
4783 }
4784 _mov(Dest, T);
4785 return;
4786 }
4787
4788 Operand *_0 =
4789 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
4790 Operand *_1 = legalize(Ctx->getConstantInt32(1), Legal_Reg | Legal_Flex);
4791 Variable *T = makeReg(IceType_i1);
4792
4793 _mov(T, _0);
4794 CondWhenTrue Cond = lowerIcmpCond(Instr);
4795 _mov_redefined(T, _1, Cond.WhenTrue0);
4796 _mov(Dest, T);
4797
4798 assert(Cond.WhenTrue1 == CondARM32::kNone);
4799
4800 return;
4801 }
4802
lowerInsertElement(const InstInsertElement * Instr)4803 void TargetARM32::lowerInsertElement(const InstInsertElement *Instr) {
4804 Variable *Dest = Instr->getDest();
4805 Type DestTy = Dest->getType();
4806
4807 Variable *Src0 = legalizeToReg(Instr->getSrc(0));
4808 Variable *Src1 = legalizeToReg(Instr->getSrc(1));
4809 Operand *Src2 = Instr->getSrc(2);
4810
4811 if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src2)) {
4812 const uint32_t Index = Imm->getValue();
4813 Variable *T = makeReg(DestTy);
4814
4815 if (isFloatingType(DestTy)) {
4816 T->setRegClass(RegARM32::RCARM32_QtoS);
4817 }
4818
4819 _mov(T, Src0);
4820 _insertelement(T, Src1, Index);
4821 _set_dest_redefined();
4822 _mov(Dest, T);
4823 return;
4824 }
4825 assert(false && "insertelement requires a constant index");
4826 }
4827
4828 namespace {
getConstantMemoryOrder(Operand * Opnd)4829 inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
4830 if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
4831 return Integer->getValue();
4832 return Intrinsics::MemoryOrderInvalid;
4833 }
4834 } // end of anonymous namespace
4835
lowerLoadLinkedStoreExclusive(Type Ty,Operand * Addr,std::function<Variable * (Variable *)> Operation,CondARM32::Cond Cond)4836 void TargetARM32::lowerLoadLinkedStoreExclusive(
4837 Type Ty, Operand *Addr, std::function<Variable *(Variable *)> Operation,
4838 CondARM32::Cond Cond) {
4839
4840 auto *Retry = Context.insert<InstARM32Label>(this);
4841
4842 { // scoping for loop highlighting.
4843 Variable *Success = makeReg(IceType_i32);
4844 Variable *Tmp = (Ty == IceType_i64) ? makeI64RegPair() : makeReg(Ty);
4845 auto *_0 = Ctx->getConstantZero(IceType_i32);
4846
4847 Context.insert<InstFakeDef>(Tmp);
4848 Context.insert<InstFakeUse>(Tmp);
4849 Variable *AddrR = legalizeToReg(Addr);
4850 _ldrex(Tmp, formMemoryOperand(AddrR, Ty))->setDestRedefined();
4851 auto *StoreValue = Operation(Tmp);
4852 assert(StoreValue->mustHaveReg());
4853 // strex requires Dest to be a register other than Value or Addr. This
4854 // restriction is cleanly represented by adding an "early" definition of
4855 // Dest (or a latter use of all the sources.)
4856 Context.insert<InstFakeDef>(Success);
4857 if (Cond != CondARM32::AL) {
4858 _mov_redefined(Success, legalize(_0, Legal_Reg | Legal_Flex),
4859 InstARM32::getOppositeCondition(Cond));
4860 }
4861 _strex(Success, StoreValue, formMemoryOperand(AddrR, Ty), Cond)
4862 ->setDestRedefined();
4863 _cmp(Success, _0);
4864 }
4865
4866 _br(Retry, CondARM32::NE);
4867 }
4868
4869 namespace {
createArithInst(Cfg * Func,uint32_t Operation,Variable * Dest,Variable * Src0,Operand * Src1)4870 InstArithmetic *createArithInst(Cfg *Func, uint32_t Operation, Variable *Dest,
4871 Variable *Src0, Operand *Src1) {
4872 InstArithmetic::OpKind Oper;
4873 switch (Operation) {
4874 default:
4875 llvm::report_fatal_error("Unknown AtomicRMW operation");
4876 case Intrinsics::AtomicExchange:
4877 llvm::report_fatal_error("Can't handle Atomic xchg operation");
4878 case Intrinsics::AtomicAdd:
4879 Oper = InstArithmetic::Add;
4880 break;
4881 case Intrinsics::AtomicAnd:
4882 Oper = InstArithmetic::And;
4883 break;
4884 case Intrinsics::AtomicSub:
4885 Oper = InstArithmetic::Sub;
4886 break;
4887 case Intrinsics::AtomicOr:
4888 Oper = InstArithmetic::Or;
4889 break;
4890 case Intrinsics::AtomicXor:
4891 Oper = InstArithmetic::Xor;
4892 break;
4893 }
4894 return InstArithmetic::create(Func, Oper, Dest, Src0, Src1);
4895 }
4896 } // end of anonymous namespace
4897
lowerAtomicRMW(Variable * Dest,uint32_t Operation,Operand * Addr,Operand * Val)4898 void TargetARM32::lowerAtomicRMW(Variable *Dest, uint32_t Operation,
4899 Operand *Addr, Operand *Val) {
4900 // retry:
4901 // ldrex tmp, [addr]
4902 // mov contents, tmp
4903 // op result, contents, Val
4904 // strex success, result, [addr]
4905 // cmp success, 0
4906 // jne retry
4907 // fake-use(addr, operand) @ prevents undesirable clobbering.
4908 // mov dest, contents
4909 auto DestTy = Dest->getType();
4910
4911 if (DestTy == IceType_i64) {
4912 lowerInt64AtomicRMW(Dest, Operation, Addr, Val);
4913 return;
4914 }
4915
4916 Operand *ValRF = nullptr;
4917 if (llvm::isa<ConstantInteger32>(Val)) {
4918 ValRF = Val;
4919 } else {
4920 ValRF = legalizeToReg(Val);
4921 }
4922 auto *ContentsR = makeReg(DestTy);
4923 auto *ResultR = makeReg(DestTy);
4924
4925 _dmb();
4926 lowerLoadLinkedStoreExclusive(
4927 DestTy, Addr,
4928 [this, Operation, ResultR, ContentsR, ValRF](Variable *Tmp) {
4929 lowerAssign(InstAssign::create(Func, ContentsR, Tmp));
4930 if (Operation == Intrinsics::AtomicExchange) {
4931 lowerAssign(InstAssign::create(Func, ResultR, ValRF));
4932 } else {
4933 lowerArithmetic(
4934 createArithInst(Func, Operation, ResultR, ContentsR, ValRF));
4935 }
4936 return ResultR;
4937 });
4938 _dmb();
4939 if (auto *ValR = llvm::dyn_cast<Variable>(ValRF)) {
4940 Context.insert<InstFakeUse>(ValR);
4941 }
4942 // Can't dce ContentsR.
4943 Context.insert<InstFakeUse>(ContentsR);
4944 lowerAssign(InstAssign::create(Func, Dest, ContentsR));
4945 }
4946
lowerInt64AtomicRMW(Variable * Dest,uint32_t Operation,Operand * Addr,Operand * Val)4947 void TargetARM32::lowerInt64AtomicRMW(Variable *Dest, uint32_t Operation,
4948 Operand *Addr, Operand *Val) {
4949 assert(Dest->getType() == IceType_i64);
4950
4951 auto *ResultR = makeI64RegPair();
4952
4953 Context.insert<InstFakeDef>(ResultR);
4954
4955 Operand *ValRF = nullptr;
4956 if (llvm::dyn_cast<ConstantInteger64>(Val)) {
4957 ValRF = Val;
4958 } else {
4959 auto *ValR64 = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4960 ValR64->initHiLo(Func);
4961 ValR64->setMustNotHaveReg();
4962 ValR64->getLo()->setMustHaveReg();
4963 ValR64->getHi()->setMustHaveReg();
4964 lowerAssign(InstAssign::create(Func, ValR64, Val));
4965 ValRF = ValR64;
4966 }
4967
4968 auto *ContentsR = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
4969 ContentsR->initHiLo(Func);
4970 ContentsR->setMustNotHaveReg();
4971 ContentsR->getLo()->setMustHaveReg();
4972 ContentsR->getHi()->setMustHaveReg();
4973
4974 _dmb();
4975 lowerLoadLinkedStoreExclusive(
4976 IceType_i64, Addr,
4977 [this, Operation, ResultR, ContentsR, ValRF](Variable *Tmp) {
4978 lowerAssign(InstAssign::create(Func, ContentsR, Tmp));
4979 Context.insert<InstFakeUse>(Tmp);
4980 if (Operation == Intrinsics::AtomicExchange) {
4981 lowerAssign(InstAssign::create(Func, ResultR, ValRF));
4982 } else {
4983 lowerArithmetic(
4984 createArithInst(Func, Operation, ResultR, ContentsR, ValRF));
4985 }
4986 Context.insert<InstFakeUse>(ResultR->getHi());
4987 Context.insert<InstFakeDef>(ResultR, ResultR->getLo())
4988 ->setDestRedefined();
4989 return ResultR;
4990 });
4991 _dmb();
4992 if (auto *ValR64 = llvm::dyn_cast<Variable64On32>(ValRF)) {
4993 Context.insert<InstFakeUse>(ValR64->getLo());
4994 Context.insert<InstFakeUse>(ValR64->getHi());
4995 }
4996 lowerAssign(InstAssign::create(Func, Dest, ContentsR));
4997 }
4998
postambleCtpop64(const InstCall * Instr)4999 void TargetARM32::postambleCtpop64(const InstCall *Instr) {
5000 Operand *Arg0 = Instr->getArg(0);
5001 if (isInt32Asserting32Or64(Arg0->getType())) {
5002 return;
5003 }
5004 // The popcount helpers always return 32-bit values, while the intrinsic's
5005 // signature matches some 64-bit platform's native instructions and expect to
5006 // fill a 64-bit reg. Thus, clear the upper bits of the dest just in case the
5007 // user doesn't do that in the IR or doesn't toss the bits via truncate.
5008 auto *DestHi = llvm::cast<Variable>(hiOperand(Instr->getDest()));
5009 Variable *T = makeReg(IceType_i32);
5010 Operand *_0 =
5011 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
5012 _mov(T, _0);
5013 _mov(DestHi, T);
5014 }
5015
lowerIntrinsicCall(const InstIntrinsicCall * Instr)5016 void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
5017 Variable *Dest = Instr->getDest();
5018 Type DestTy = (Dest != nullptr) ? Dest->getType() : IceType_void;
5019 Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID;
5020 switch (ID) {
5021 case Intrinsics::AtomicFence:
5022 case Intrinsics::AtomicFenceAll:
5023 assert(Dest == nullptr);
5024 _dmb();
5025 return;
5026 case Intrinsics::AtomicIsLockFree: {
5027 Operand *ByteSize = Instr->getArg(0);
5028 auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize);
5029 if (CI == nullptr) {
5030 // The PNaCl ABI requires the byte size to be a compile-time constant.
5031 Func->setError("AtomicIsLockFree byte size should be compile-time const");
5032 return;
5033 }
5034 static constexpr int32_t NotLockFree = 0;
5035 static constexpr int32_t LockFree = 1;
5036 int32_t Result = NotLockFree;
5037 switch (CI->getValue()) {
5038 case 1:
5039 case 2:
5040 case 4:
5041 case 8:
5042 Result = LockFree;
5043 break;
5044 }
5045 _mov(Dest, legalizeToReg(Ctx->getConstantInt32(Result)));
5046 return;
5047 }
5048 case Intrinsics::AtomicLoad: {
5049 assert(isScalarIntegerType(DestTy));
5050 // We require the memory address to be naturally aligned. Given that is the
5051 // case, then normal loads are atomic.
5052 if (!Intrinsics::isMemoryOrderValid(
5053 ID, getConstantMemoryOrder(Instr->getArg(1)))) {
5054 Func->setError("Unexpected memory ordering for AtomicLoad");
5055 return;
5056 }
5057 Variable *T;
5058
5059 if (DestTy == IceType_i64) {
5060 // ldrex is the only arm instruction that is guaranteed to load a 64-bit
5061 // integer atomically. Everything else works with a regular ldr.
5062 T = makeI64RegPair();
5063 _ldrex(T, formMemoryOperand(Instr->getArg(0), IceType_i64));
5064 } else {
5065 T = makeReg(DestTy);
5066 _ldr(T, formMemoryOperand(Instr->getArg(0), DestTy));
5067 }
5068 _dmb();
5069 lowerAssign(InstAssign::create(Func, Dest, T));
5070 // Adding a fake-use T to ensure the atomic load is not removed if Dest is
5071 // unused.
5072 Context.insert<InstFakeUse>(T);
5073 return;
5074 }
5075 case Intrinsics::AtomicStore: {
5076 // We require the memory address to be naturally aligned. Given that is the
5077 // case, then normal loads are atomic.
5078 if (!Intrinsics::isMemoryOrderValid(
5079 ID, getConstantMemoryOrder(Instr->getArg(2)))) {
5080 Func->setError("Unexpected memory ordering for AtomicStore");
5081 return;
5082 }
5083
5084 auto *Value = Instr->getArg(0);
5085 if (Value->getType() == IceType_i64) {
5086 auto *ValueR = makeI64RegPair();
5087 Context.insert<InstFakeDef>(ValueR);
5088 lowerAssign(InstAssign::create(Func, ValueR, Value));
5089 _dmb();
5090 lowerLoadLinkedStoreExclusive(
5091 IceType_i64, Instr->getArg(1), [this, ValueR](Variable *Tmp) {
5092 // The following fake-use prevents the ldrex instruction from being
5093 // dead code eliminated.
5094 Context.insert<InstFakeUse>(llvm::cast<Variable>(loOperand(Tmp)));
5095 Context.insert<InstFakeUse>(llvm::cast<Variable>(hiOperand(Tmp)));
5096 Context.insert<InstFakeUse>(Tmp);
5097 return ValueR;
5098 });
5099 Context.insert<InstFakeUse>(ValueR);
5100 _dmb();
5101 return;
5102 }
5103
5104 auto *ValueR = legalizeToReg(Instr->getArg(0));
5105 const auto ValueTy = ValueR->getType();
5106 assert(isScalarIntegerType(ValueTy));
5107 auto *Addr = legalizeToReg(Instr->getArg(1));
5108
5109 // non-64-bit stores are atomically as long as the address is aligned. This
5110 // is PNaCl, so addresses are aligned.
5111 _dmb();
5112 _str(ValueR, formMemoryOperand(Addr, ValueTy));
5113 _dmb();
5114 return;
5115 }
5116 case Intrinsics::AtomicCmpxchg: {
5117 // retry:
5118 // ldrex tmp, [addr]
5119 // cmp tmp, expected
5120 // mov expected, tmp
5121 // strexeq success, new, [addr]
5122 // cmpeq success, #0
5123 // bne retry
5124 // mov dest, expected
5125 assert(isScalarIntegerType(DestTy));
5126 // We require the memory address to be naturally aligned. Given that is the
5127 // case, then normal loads are atomic.
5128 if (!Intrinsics::isMemoryOrderValid(
5129 ID, getConstantMemoryOrder(Instr->getArg(3)),
5130 getConstantMemoryOrder(Instr->getArg(4)))) {
5131 Func->setError("Unexpected memory ordering for AtomicCmpxchg");
5132 return;
5133 }
5134
5135 if (DestTy == IceType_i64) {
5136 Variable *LoadedValue = nullptr;
5137
5138 auto *New = makeI64RegPair();
5139 Context.insert<InstFakeDef>(New);
5140 lowerAssign(InstAssign::create(Func, New, Instr->getArg(2)));
5141
5142 auto *Expected = makeI64RegPair();
5143 Context.insert<InstFakeDef>(Expected);
5144 lowerAssign(InstAssign::create(Func, Expected, Instr->getArg(1)));
5145
5146 _dmb();
5147 lowerLoadLinkedStoreExclusive(
5148 DestTy, Instr->getArg(0),
5149 [this, Expected, New, &LoadedValue](Variable *Tmp) {
5150 auto *ExpectedLoR = llvm::cast<Variable>(loOperand(Expected));
5151 auto *ExpectedHiR = llvm::cast<Variable>(hiOperand(Expected));
5152 auto *TmpLoR = llvm::cast<Variable>(loOperand(Tmp));
5153 auto *TmpHiR = llvm::cast<Variable>(hiOperand(Tmp));
5154 _cmp(TmpLoR, ExpectedLoR);
5155 _cmp(TmpHiR, ExpectedHiR, CondARM32::EQ);
5156 LoadedValue = Tmp;
5157 return New;
5158 },
5159 CondARM32::EQ);
5160 _dmb();
5161
5162 Context.insert<InstFakeUse>(LoadedValue);
5163 lowerAssign(InstAssign::create(Func, Dest, LoadedValue));
5164 // The fake-use Expected prevents the assignments to Expected (above)
5165 // from being removed if Dest is not used.
5166 Context.insert<InstFakeUse>(Expected);
5167 // New needs to be alive here, or its live range will end in the
5168 // strex instruction.
5169 Context.insert<InstFakeUse>(New);
5170 return;
5171 }
5172
5173 auto *New = legalizeToReg(Instr->getArg(2));
5174 auto *Expected = legalizeToReg(Instr->getArg(1));
5175 Variable *LoadedValue = nullptr;
5176
5177 _dmb();
5178 lowerLoadLinkedStoreExclusive(
5179 DestTy, Instr->getArg(0),
5180 [this, Expected, New, &LoadedValue](Variable *Tmp) {
5181 lowerIcmpCond(InstIcmp::Eq, Tmp, Expected);
5182 LoadedValue = Tmp;
5183 return New;
5184 },
5185 CondARM32::EQ);
5186 _dmb();
5187
5188 lowerAssign(InstAssign::create(Func, Dest, LoadedValue));
5189 Context.insert<InstFakeUse>(Expected);
5190 Context.insert<InstFakeUse>(New);
5191 return;
5192 }
5193 case Intrinsics::AtomicRMW: {
5194 if (!Intrinsics::isMemoryOrderValid(
5195 ID, getConstantMemoryOrder(Instr->getArg(3)))) {
5196 Func->setError("Unexpected memory ordering for AtomicRMW");
5197 return;
5198 }
5199 lowerAtomicRMW(
5200 Dest,
5201 static_cast<uint32_t>(
5202 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue()),
5203 Instr->getArg(1), Instr->getArg(2));
5204 return;
5205 }
5206 case Intrinsics::Bswap: {
5207 Operand *Val = Instr->getArg(0);
5208 Type Ty = Val->getType();
5209 if (Ty == IceType_i64) {
5210 Val = legalizeUndef(Val);
5211 Variable *Val_Lo = legalizeToReg(loOperand(Val));
5212 Variable *Val_Hi = legalizeToReg(hiOperand(Val));
5213 Variable *T_Lo = makeReg(IceType_i32);
5214 Variable *T_Hi = makeReg(IceType_i32);
5215 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
5216 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
5217 _rev(T_Lo, Val_Lo);
5218 _rev(T_Hi, Val_Hi);
5219 _mov(DestLo, T_Hi);
5220 _mov(DestHi, T_Lo);
5221 } else {
5222 assert(Ty == IceType_i32 || Ty == IceType_i16);
5223 Variable *ValR = legalizeToReg(Val);
5224 Variable *T = makeReg(Ty);
5225 _rev(T, ValR);
5226 if (Val->getType() == IceType_i16) {
5227 Operand *_16 = shAmtImm(16);
5228 _lsr(T, T, _16);
5229 }
5230 _mov(Dest, T);
5231 }
5232 return;
5233 }
5234 case Intrinsics::Ctpop: {
5235 llvm::report_fatal_error("Ctpop should have been prelowered.");
5236 }
5237 case Intrinsics::Ctlz: {
5238 // The "is zero undef" parameter is ignored and we always return a
5239 // well-defined value.
5240 Operand *Val = Instr->getArg(0);
5241 Variable *ValLoR;
5242 Variable *ValHiR = nullptr;
5243 if (Val->getType() == IceType_i64) {
5244 Val = legalizeUndef(Val);
5245 ValLoR = legalizeToReg(loOperand(Val));
5246 ValHiR = legalizeToReg(hiOperand(Val));
5247 } else {
5248 ValLoR = legalizeToReg(Val);
5249 }
5250 lowerCLZ(Dest, ValLoR, ValHiR);
5251 return;
5252 }
5253 case Intrinsics::Cttz: {
5254 // Essentially like Clz, but reverse the bits first.
5255 Operand *Val = Instr->getArg(0);
5256 Variable *ValLoR;
5257 Variable *ValHiR = nullptr;
5258 if (Val->getType() == IceType_i64) {
5259 Val = legalizeUndef(Val);
5260 ValLoR = legalizeToReg(loOperand(Val));
5261 ValHiR = legalizeToReg(hiOperand(Val));
5262 Variable *TLo = makeReg(IceType_i32);
5263 Variable *THi = makeReg(IceType_i32);
5264 _rbit(TLo, ValLoR);
5265 _rbit(THi, ValHiR);
5266 ValLoR = THi;
5267 ValHiR = TLo;
5268 } else {
5269 ValLoR = legalizeToReg(Val);
5270 Variable *T = makeReg(IceType_i32);
5271 _rbit(T, ValLoR);
5272 ValLoR = T;
5273 }
5274 lowerCLZ(Dest, ValLoR, ValHiR);
5275 return;
5276 }
5277 case Intrinsics::Fabs: {
5278 Variable *T = makeReg(DestTy);
5279 _vabs(T, legalizeToReg(Instr->getArg(0)));
5280 _mov(Dest, T);
5281 return;
5282 }
5283 case Intrinsics::Longjmp: {
5284 llvm::report_fatal_error("longjmp should have been prelowered.");
5285 }
5286 case Intrinsics::Memcpy: {
5287 llvm::report_fatal_error("memcpy should have been prelowered.");
5288 }
5289 case Intrinsics::Memmove: {
5290 llvm::report_fatal_error("memmove should have been prelowered.");
5291 }
5292 case Intrinsics::Memset: {
5293 llvm::report_fatal_error("memmove should have been prelowered.");
5294 }
5295 case Intrinsics::NaClReadTP: {
5296 if (SandboxingType != ST_NaCl) {
5297 llvm::report_fatal_error("nacl-read-tp should have been prelowered.");
5298 }
5299 Variable *TP = legalizeToReg(OperandARM32Mem::create(
5300 Func, getPointerType(), getPhysicalRegister(RegARM32::Reg_r9),
5301 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))));
5302 _mov(Dest, TP);
5303 return;
5304 }
5305 case Intrinsics::Setjmp: {
5306 llvm::report_fatal_error("setjmp should have been prelowered.");
5307 }
5308 case Intrinsics::Sqrt: {
5309 assert(isScalarFloatingType(Dest->getType()) ||
5310 getFlags().getApplicationBinaryInterface() != ::Ice::ABI_PNaCl);
5311 Variable *Src = legalizeToReg(Instr->getArg(0));
5312 Variable *T = makeReg(DestTy);
5313 _vsqrt(T, Src);
5314 _mov(Dest, T);
5315 return;
5316 }
5317 case Intrinsics::Stacksave: {
5318 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
5319 _mov(Dest, SP);
5320 return;
5321 }
5322 case Intrinsics::Stackrestore: {
5323 Variable *Val = legalizeToReg(Instr->getArg(0));
5324 Sandboxer(this).reset_sp(Val);
5325 return;
5326 }
5327 case Intrinsics::Trap:
5328 _trap();
5329 return;
5330 case Intrinsics::AddSaturateSigned:
5331 case Intrinsics::AddSaturateUnsigned: {
5332 bool Unsigned = (ID == Intrinsics::AddSaturateUnsigned);
5333 Variable *Src0 = legalizeToReg(Instr->getArg(0));
5334 Variable *Src1 = legalizeToReg(Instr->getArg(1));
5335 Variable *T = makeReg(DestTy);
5336 _vqadd(T, Src0, Src1, Unsigned);
5337 _mov(Dest, T);
5338 return;
5339 }
5340 case Intrinsics::LoadSubVector: {
5341 assert(llvm::isa<ConstantInteger32>(Instr->getArg(1)) &&
5342 "LoadSubVector second argument must be a constant");
5343 Variable *Dest = Instr->getDest();
5344 Type Ty = Dest->getType();
5345 auto *SubVectorSize = llvm::cast<ConstantInteger32>(Instr->getArg(1));
5346 Operand *Addr = Instr->getArg(0);
5347 OperandARM32Mem *Src = formMemoryOperand(Addr, Ty);
5348 doMockBoundsCheck(Src);
5349
5350 if (Dest->isRematerializable()) {
5351 Context.insert<InstFakeDef>(Dest);
5352 return;
5353 }
5354
5355 auto *T = makeReg(Ty);
5356 switch (SubVectorSize->getValue()) {
5357 case 4:
5358 _vldr1d(T, Src);
5359 break;
5360 case 8:
5361 _vldr1q(T, Src);
5362 break;
5363 default:
5364 Func->setError("Unexpected size for LoadSubVector");
5365 return;
5366 }
5367 _mov(Dest, T);
5368 return;
5369 }
5370 case Intrinsics::StoreSubVector: {
5371 assert(llvm::isa<ConstantInteger32>(Instr->getArg(2)) &&
5372 "StoreSubVector third argument must be a constant");
5373 auto *SubVectorSize = llvm::cast<ConstantInteger32>(Instr->getArg(2));
5374 Variable *Value = legalizeToReg(Instr->getArg(0));
5375 Operand *Addr = Instr->getArg(1);
5376 OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
5377 doMockBoundsCheck(NewAddr);
5378
5379 Value = legalizeToReg(Value);
5380
5381 switch (SubVectorSize->getValue()) {
5382 case 4:
5383 _vstr1d(Value, NewAddr);
5384 break;
5385 case 8:
5386 _vstr1q(Value, NewAddr);
5387 break;
5388 default:
5389 Func->setError("Unexpected size for StoreSubVector");
5390 return;
5391 }
5392 return;
5393 }
5394 case Intrinsics::MultiplyAddPairs: {
5395 Variable *Src0 = legalizeToReg(Instr->getArg(0));
5396 Variable *Src1 = legalizeToReg(Instr->getArg(1));
5397 Variable *T = makeReg(DestTy);
5398 _vmlap(T, Src0, Src1);
5399 _mov(Dest, T);
5400 return;
5401 }
5402 case Intrinsics::MultiplyHighSigned:
5403 case Intrinsics::MultiplyHighUnsigned: {
5404 bool Unsigned = (ID == Intrinsics::MultiplyHighUnsigned);
5405 Variable *Src0 = legalizeToReg(Instr->getArg(0));
5406 Variable *Src1 = legalizeToReg(Instr->getArg(1));
5407 Variable *T = makeReg(DestTy);
5408 _vmulh(T, Src0, Src1, Unsigned);
5409 _mov(Dest, T);
5410 return;
5411 }
5412 case Intrinsics::Nearbyint: {
5413 UnimplementedLoweringError(this, Instr);
5414 return;
5415 }
5416 case Intrinsics::Round: {
5417 UnimplementedLoweringError(this, Instr);
5418 return;
5419 }
5420 case Intrinsics::SignMask: {
5421 UnimplementedLoweringError(this, Instr);
5422 return;
5423 }
5424 case Intrinsics::SubtractSaturateSigned:
5425 case Intrinsics::SubtractSaturateUnsigned: {
5426 bool Unsigned = (ID == Intrinsics::SubtractSaturateUnsigned);
5427 Variable *Src0 = legalizeToReg(Instr->getArg(0));
5428 Variable *Src1 = legalizeToReg(Instr->getArg(1));
5429 Variable *T = makeReg(DestTy);
5430 _vqsub(T, Src0, Src1, Unsigned);
5431 _mov(Dest, T);
5432 return;
5433 }
5434 case Intrinsics::VectorPackSigned:
5435 case Intrinsics::VectorPackUnsigned: {
5436 bool Unsigned = (ID == Intrinsics::VectorPackUnsigned);
5437 bool Saturating = true;
5438 Variable *Src0 = legalizeToReg(Instr->getArg(0));
5439 Variable *Src1 = legalizeToReg(Instr->getArg(1));
5440 Variable *T = makeReg(DestTy);
5441 _vqmovn2(T, Src0, Src1, Unsigned, Saturating);
5442 _mov(Dest, T);
5443 return;
5444 }
5445 default: // UnknownIntrinsic
5446 Func->setError("Unexpected intrinsic");
5447 return;
5448 }
5449 return;
5450 }
5451
lowerCLZ(Variable * Dest,Variable * ValLoR,Variable * ValHiR)5452 void TargetARM32::lowerCLZ(Variable *Dest, Variable *ValLoR, Variable *ValHiR) {
5453 Type Ty = Dest->getType();
5454 assert(Ty == IceType_i32 || Ty == IceType_i64);
5455 Variable *T = makeReg(IceType_i32);
5456 _clz(T, ValLoR);
5457 if (Ty == IceType_i64) {
5458 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
5459 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
5460 Operand *Zero =
5461 legalize(Ctx->getConstantZero(IceType_i32), Legal_Reg | Legal_Flex);
5462 Operand *ThirtyTwo =
5463 legalize(Ctx->getConstantInt32(32), Legal_Reg | Legal_Flex);
5464 _cmp(ValHiR, Zero);
5465 Variable *T2 = makeReg(IceType_i32);
5466 _add(T2, T, ThirtyTwo);
5467 _clz(T2, ValHiR, CondARM32::NE);
5468 // T2 is actually a source as well when the predicate is not AL (since it
5469 // may leave T2 alone). We use _set_dest_redefined to prolong the liveness
5470 // of T2 as if it was used as a source.
5471 _set_dest_redefined();
5472 _mov(DestLo, T2);
5473 Variable *T3 = makeReg(Zero->getType());
5474 _mov(T3, Zero);
5475 _mov(DestHi, T3);
5476 return;
5477 }
5478 _mov(Dest, T);
5479 return;
5480 }
5481
lowerLoad(const InstLoad * Load)5482 void TargetARM32::lowerLoad(const InstLoad *Load) {
5483 // A Load instruction can be treated the same as an Assign instruction, after
5484 // the source operand is transformed into an OperandARM32Mem operand.
5485 Type Ty = Load->getDest()->getType();
5486 Operand *Src0 = formMemoryOperand(Load->getSourceAddress(), Ty);
5487 Variable *DestLoad = Load->getDest();
5488
5489 // TODO(jvoung): handled folding opportunities. Sign and zero extension can
5490 // be folded into a load.
5491 auto *Assign = InstAssign::create(Func, DestLoad, Src0);
5492 lowerAssign(Assign);
5493 }
5494
5495 namespace {
dumpAddressOpt(const Cfg * Func,const Variable * Base,int32_t Offset,const Variable * OffsetReg,int16_t OffsetRegShAmt,const Inst * Reason)5496 void dumpAddressOpt(const Cfg *Func, const Variable *Base, int32_t Offset,
5497 const Variable *OffsetReg, int16_t OffsetRegShAmt,
5498 const Inst *Reason) {
5499 if (!BuildDefs::dump())
5500 return;
5501 if (!Func->isVerbose(IceV_AddrOpt))
5502 return;
5503 OstreamLocker _(Func->getContext());
5504 Ostream &Str = Func->getContext()->getStrDump();
5505 Str << "Instruction: ";
5506 Reason->dumpDecorated(Func);
5507 Str << " results in Base=";
5508 if (Base)
5509 Base->dump(Func);
5510 else
5511 Str << "<null>";
5512 Str << ", OffsetReg=";
5513 if (OffsetReg)
5514 OffsetReg->dump(Func);
5515 else
5516 Str << "<null>";
5517 Str << ", Shift=" << OffsetRegShAmt << ", Offset=" << Offset << "\n";
5518 }
5519
matchAssign(const VariablesMetadata * VMetadata,Variable ** Var,int32_t * Offset,const Inst ** Reason)5520 bool matchAssign(const VariablesMetadata *VMetadata, Variable **Var,
5521 int32_t *Offset, const Inst **Reason) {
5522 // Var originates from Var=SrcVar ==> set Var:=SrcVar
5523 if (*Var == nullptr)
5524 return false;
5525 const Inst *VarAssign = VMetadata->getSingleDefinition(*Var);
5526 if (!VarAssign)
5527 return false;
5528 assert(!VMetadata->isMultiDef(*Var));
5529 if (!llvm::isa<InstAssign>(VarAssign))
5530 return false;
5531
5532 Operand *SrcOp = VarAssign->getSrc(0);
5533 bool Optimized = false;
5534 if (auto *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {
5535 if (!VMetadata->isMultiDef(SrcVar) ||
5536 // TODO: ensure SrcVar stays single-BB
5537 false) {
5538 Optimized = true;
5539 *Var = SrcVar;
5540 } else if (auto *Const = llvm::dyn_cast<ConstantInteger32>(SrcOp)) {
5541 int32_t MoreOffset = Const->getValue();
5542 int32_t NewOffset = MoreOffset + *Offset;
5543 if (Utils::WouldOverflowAdd(*Offset, MoreOffset))
5544 return false;
5545 *Var = nullptr;
5546 *Offset += NewOffset;
5547 Optimized = true;
5548 }
5549 }
5550
5551 if (Optimized) {
5552 *Reason = VarAssign;
5553 }
5554
5555 return Optimized;
5556 }
5557
isAddOrSub(const Inst * Instr,InstArithmetic::OpKind * Kind)5558 bool isAddOrSub(const Inst *Instr, InstArithmetic::OpKind *Kind) {
5559 if (const auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {
5560 switch (Arith->getOp()) {
5561 default:
5562 return false;
5563 case InstArithmetic::Add:
5564 case InstArithmetic::Sub:
5565 *Kind = Arith->getOp();
5566 return true;
5567 }
5568 }
5569 return false;
5570 }
5571
matchCombinedBaseIndex(const VariablesMetadata * VMetadata,Variable ** Base,Variable ** OffsetReg,int32_t OffsetRegShamt,const Inst ** Reason)5572 bool matchCombinedBaseIndex(const VariablesMetadata *VMetadata, Variable **Base,
5573 Variable **OffsetReg, int32_t OffsetRegShamt,
5574 const Inst **Reason) {
5575 // OffsetReg==nullptr && Base is Base=Var1+Var2 ==>
5576 // set Base=Var1, OffsetReg=Var2, Shift=0
5577 if (*Base == nullptr)
5578 return false;
5579 if (*OffsetReg != nullptr)
5580 return false;
5581 (void)OffsetRegShamt;
5582 assert(OffsetRegShamt == 0);
5583 const Inst *BaseInst = VMetadata->getSingleDefinition(*Base);
5584 if (BaseInst == nullptr)
5585 return false;
5586 assert(!VMetadata->isMultiDef(*Base));
5587 if (BaseInst->getSrcSize() < 2)
5588 return false;
5589 auto *Var1 = llvm::dyn_cast<Variable>(BaseInst->getSrc(0));
5590 if (!Var1)
5591 return false;
5592 if (VMetadata->isMultiDef(Var1))
5593 return false;
5594 auto *Var2 = llvm::dyn_cast<Variable>(BaseInst->getSrc(1));
5595 if (!Var2)
5596 return false;
5597 if (VMetadata->isMultiDef(Var2))
5598 return false;
5599 InstArithmetic::OpKind _;
5600 if (!isAddOrSub(BaseInst, &_) ||
5601 // TODO: ensure Var1 and Var2 stay single-BB
5602 false)
5603 return false;
5604 *Base = Var1;
5605 *OffsetReg = Var2;
5606 // OffsetRegShamt is already 0.
5607 *Reason = BaseInst;
5608 return true;
5609 }
5610
matchShiftedOffsetReg(const VariablesMetadata * VMetadata,Variable ** OffsetReg,OperandARM32::ShiftKind * Kind,int32_t * OffsetRegShamt,const Inst ** Reason)5611 bool matchShiftedOffsetReg(const VariablesMetadata *VMetadata,
5612 Variable **OffsetReg, OperandARM32::ShiftKind *Kind,
5613 int32_t *OffsetRegShamt, const Inst **Reason) {
5614 // OffsetReg is OffsetReg=Var*Const && log2(Const)+Shift<=32 ==>
5615 // OffsetReg=Var, Shift+=log2(Const)
5616 // OffsetReg is OffsetReg=Var<<Const && Const+Shift<=32 ==>
5617 // OffsetReg=Var, Shift+=Const
5618 // OffsetReg is OffsetReg=Var>>Const && Const-Shift>=-32 ==>
5619 // OffsetReg=Var, Shift-=Const
5620 OperandARM32::ShiftKind NewShiftKind = OperandARM32::kNoShift;
5621 if (*OffsetReg == nullptr)
5622 return false;
5623 auto *IndexInst = VMetadata->getSingleDefinition(*OffsetReg);
5624 if (IndexInst == nullptr)
5625 return false;
5626 assert(!VMetadata->isMultiDef(*OffsetReg));
5627 if (IndexInst->getSrcSize() < 2)
5628 return false;
5629 auto *ArithInst = llvm::dyn_cast<InstArithmetic>(IndexInst);
5630 if (ArithInst == nullptr)
5631 return false;
5632 auto *Var = llvm::dyn_cast<Variable>(ArithInst->getSrc(0));
5633 if (Var == nullptr)
5634 return false;
5635 auto *Const = llvm::dyn_cast<ConstantInteger32>(ArithInst->getSrc(1));
5636 if (Const == nullptr) {
5637 assert(!llvm::isa<ConstantInteger32>(ArithInst->getSrc(0)));
5638 return false;
5639 }
5640 if (VMetadata->isMultiDef(Var) || Const->getType() != IceType_i32)
5641 return false;
5642
5643 uint32_t NewShamt = -1;
5644 switch (ArithInst->getOp()) {
5645 default:
5646 return false;
5647 case InstArithmetic::Shl: {
5648 NewShiftKind = OperandARM32::LSL;
5649 NewShamt = Const->getValue();
5650 if (NewShamt > 31)
5651 return false;
5652 } break;
5653 case InstArithmetic::Lshr: {
5654 NewShiftKind = OperandARM32::LSR;
5655 NewShamt = Const->getValue();
5656 if (NewShamt > 31)
5657 return false;
5658 } break;
5659 case InstArithmetic::Ashr: {
5660 NewShiftKind = OperandARM32::ASR;
5661 NewShamt = Const->getValue();
5662 if (NewShamt > 31)
5663 return false;
5664 } break;
5665 case InstArithmetic::Udiv:
5666 case InstArithmetic::Mul: {
5667 const uint32_t UnsignedConst = Const->getValue();
5668 NewShamt = llvm::findFirstSet(UnsignedConst);
5669 if (NewShamt != llvm::findLastSet(UnsignedConst)) {
5670 // First bit set is not the same as the last bit set, so Const is not
5671 // a power of 2.
5672 return false;
5673 }
5674 NewShiftKind = ArithInst->getOp() == InstArithmetic::Udiv
5675 ? OperandARM32::LSR
5676 : OperandARM32::LSL;
5677 } break;
5678 }
5679 // Allowed "transitions":
5680 // kNoShift -> * iff NewShamt < 31
5681 // LSL -> LSL iff NewShamt + OffsetRegShamt < 31
5682 // LSR -> LSR iff NewShamt + OffsetRegShamt < 31
5683 // ASR -> ASR iff NewShamt + OffsetRegShamt < 31
5684 if (*Kind != OperandARM32::kNoShift && *Kind != NewShiftKind) {
5685 return false;
5686 }
5687 const int32_t NewOffsetRegShamt = *OffsetRegShamt + NewShamt;
5688 if (NewOffsetRegShamt > 31)
5689 return false;
5690 *OffsetReg = Var;
5691 *OffsetRegShamt = NewOffsetRegShamt;
5692 *Kind = NewShiftKind;
5693 *Reason = IndexInst;
5694 return true;
5695 }
5696
matchOffsetBase(const VariablesMetadata * VMetadata,Variable ** Base,int32_t * Offset,const Inst ** Reason)5697 bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable **Base,
5698 int32_t *Offset, const Inst **Reason) {
5699 // Base is Base=Var+Const || Base is Base=Const+Var ==>
5700 // set Base=Var, Offset+=Const
5701 // Base is Base=Var-Const ==>
5702 // set Base=Var, Offset-=Const
5703 if (*Base == nullptr)
5704 return false;
5705 const Inst *BaseInst = VMetadata->getSingleDefinition(*Base);
5706 if (BaseInst == nullptr) {
5707 return false;
5708 }
5709 assert(!VMetadata->isMultiDef(*Base));
5710
5711 auto *ArithInst = llvm::dyn_cast<const InstArithmetic>(BaseInst);
5712 if (ArithInst == nullptr)
5713 return false;
5714 InstArithmetic::OpKind Kind;
5715 if (!isAddOrSub(ArithInst, &Kind))
5716 return false;
5717 bool IsAdd = Kind == InstArithmetic::Add;
5718 Operand *Src0 = ArithInst->getSrc(0);
5719 Operand *Src1 = ArithInst->getSrc(1);
5720 auto *Var0 = llvm::dyn_cast<Variable>(Src0);
5721 auto *Var1 = llvm::dyn_cast<Variable>(Src1);
5722 auto *Const0 = llvm::dyn_cast<ConstantInteger32>(Src0);
5723 auto *Const1 = llvm::dyn_cast<ConstantInteger32>(Src1);
5724 Variable *NewBase = nullptr;
5725 int32_t NewOffset = *Offset;
5726
5727 if (Var0 == nullptr && Const0 == nullptr) {
5728 assert(llvm::isa<ConstantRelocatable>(Src0));
5729 return false;
5730 }
5731
5732 if (Var1 == nullptr && Const1 == nullptr) {
5733 assert(llvm::isa<ConstantRelocatable>(Src1));
5734 return false;
5735 }
5736
5737 if (Var0 && Var1)
5738 // TODO(jpp): merge base/index splitting into here.
5739 return false;
5740 if (!IsAdd && Var1)
5741 return false;
5742 if (Var0)
5743 NewBase = Var0;
5744 else if (Var1)
5745 NewBase = Var1;
5746 // Compute the updated constant offset.
5747 if (Const0) {
5748 int32_t MoreOffset = IsAdd ? Const0->getValue() : -Const0->getValue();
5749 if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5750 return false;
5751 NewOffset += MoreOffset;
5752 }
5753 if (Const1) {
5754 int32_t MoreOffset = IsAdd ? Const1->getValue() : -Const1->getValue();
5755 if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5756 return false;
5757 NewOffset += MoreOffset;
5758 }
5759
5760 // Update the computed address parameters once we are sure optimization
5761 // is valid.
5762 *Base = NewBase;
5763 *Offset = NewOffset;
5764 *Reason = BaseInst;
5765 return true;
5766 }
5767 } // end of anonymous namespace
5768
formAddressingMode(Type Ty,Cfg * Func,const Inst * LdSt,Operand * Base)5769 OperandARM32Mem *TargetARM32::formAddressingMode(Type Ty, Cfg *Func,
5770 const Inst *LdSt,
5771 Operand *Base) {
5772 assert(Base != nullptr);
5773 int32_t OffsetImm = 0;
5774 Variable *OffsetReg = nullptr;
5775 int32_t OffsetRegShamt = 0;
5776 OperandARM32::ShiftKind ShiftKind = OperandARM32::kNoShift;
5777
5778 Func->resetCurrentNode();
5779 if (Func->isVerbose(IceV_AddrOpt)) {
5780 OstreamLocker _(Func->getContext());
5781 Ostream &Str = Func->getContext()->getStrDump();
5782 Str << "\nAddress mode formation:\t";
5783 LdSt->dumpDecorated(Func);
5784 }
5785
5786 if (isVectorType(Ty))
5787 // vector loads and stores do not allow offsets, and only support the
5788 // "[reg]" addressing mode (the other supported modes are write back.)
5789 return nullptr;
5790
5791 auto *BaseVar = llvm::dyn_cast<Variable>(Base);
5792 if (BaseVar == nullptr)
5793 return nullptr;
5794
5795 (void)MemTraitsSize;
5796 assert(Ty < MemTraitsSize);
5797 auto *TypeTraits = &MemTraits[Ty];
5798 const bool CanHaveIndex = !NeedSandboxing && TypeTraits->CanHaveIndex;
5799 const bool CanHaveShiftedIndex =
5800 !NeedSandboxing && TypeTraits->CanHaveShiftedIndex;
5801 const bool CanHaveImm = TypeTraits->CanHaveImm;
5802 const int32_t ValidImmMask = TypeTraits->ValidImmMask;
5803 (void)ValidImmMask;
5804 assert(!CanHaveImm || ValidImmMask >= 0);
5805
5806 const VariablesMetadata *VMetadata = Func->getVMetadata();
5807 const Inst *Reason = nullptr;
5808
5809 do {
5810 if (Reason != nullptr) {
5811 dumpAddressOpt(Func, BaseVar, OffsetImm, OffsetReg, OffsetRegShamt,
5812 Reason);
5813 Reason = nullptr;
5814 }
5815
5816 if (matchAssign(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5817 continue;
5818 }
5819
5820 if (CanHaveIndex &&
5821 matchAssign(VMetadata, &OffsetReg, &OffsetImm, &Reason)) {
5822 continue;
5823 }
5824
5825 if (CanHaveIndex && matchCombinedBaseIndex(VMetadata, &BaseVar, &OffsetReg,
5826 OffsetRegShamt, &Reason)) {
5827 continue;
5828 }
5829
5830 if (CanHaveShiftedIndex) {
5831 if (matchShiftedOffsetReg(VMetadata, &OffsetReg, &ShiftKind,
5832 &OffsetRegShamt, &Reason)) {
5833 continue;
5834 }
5835
5836 if ((OffsetRegShamt == 0) &&
5837 matchShiftedOffsetReg(VMetadata, &BaseVar, &ShiftKind,
5838 &OffsetRegShamt, &Reason)) {
5839 std::swap(BaseVar, OffsetReg);
5840 continue;
5841 }
5842 }
5843
5844 if (matchOffsetBase(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5845 continue;
5846 }
5847 } while (Reason);
5848
5849 if (BaseVar == nullptr) {
5850 // [OffsetReg{, LSL Shamt}{, #OffsetImm}] is not legal in ARM, so we have to
5851 // legalize the addressing mode to [BaseReg, OffsetReg{, LSL Shamt}].
5852 // Instead of a zeroed BaseReg, we initialize it with OffsetImm:
5853 //
5854 // [OffsetReg{, LSL Shamt}{, #OffsetImm}] ->
5855 // mov BaseReg, #OffsetImm
5856 // use of [BaseReg, OffsetReg{, LSL Shamt}]
5857 //
5858 const Type PointerType = getPointerType();
5859 BaseVar = makeReg(PointerType);
5860 Context.insert<InstAssign>(BaseVar, Ctx->getConstantInt32(OffsetImm));
5861 OffsetImm = 0;
5862 } else if (OffsetImm != 0) {
5863 // ARM Ldr/Str instructions have limited range immediates. The formation
5864 // loop above materialized an Immediate carelessly, so we ensure the
5865 // generated offset is sane.
5866 const int32_t PositiveOffset = OffsetImm > 0 ? OffsetImm : -OffsetImm;
5867 const InstArithmetic::OpKind Op =
5868 OffsetImm > 0 ? InstArithmetic::Add : InstArithmetic::Sub;
5869
5870 if (!CanHaveImm || !isLegalMemOffset(Ty, OffsetImm) ||
5871 OffsetReg != nullptr) {
5872 if (OffsetReg == nullptr) {
5873 // We formed a [Base, #const] addressing mode which is not encodable in
5874 // ARM. There is little point in forming an address mode now if we don't
5875 // have an offset. Effectively, we would end up with something like
5876 //
5877 // [Base, #const] -> add T, Base, #const
5878 // use of [T]
5879 //
5880 // Which is exactly what we already have. So we just bite the bullet
5881 // here and don't form any address mode.
5882 return nullptr;
5883 }
5884 // We formed [Base, Offset {, LSL Amnt}, #const]. Oops. Legalize it to
5885 //
5886 // [Base, Offset, {LSL amount}, #const] ->
5887 // add T, Base, #const
5888 // use of [T, Offset {, LSL amount}]
5889 const Type PointerType = getPointerType();
5890 Variable *T = makeReg(PointerType);
5891 Context.insert<InstArithmetic>(Op, T, BaseVar,
5892 Ctx->getConstantInt32(PositiveOffset));
5893 BaseVar = T;
5894 OffsetImm = 0;
5895 }
5896 }
5897
5898 assert(BaseVar != nullptr);
5899 assert(OffsetImm == 0 || OffsetReg == nullptr);
5900 assert(OffsetReg == nullptr || CanHaveIndex);
5901 assert(OffsetImm < 0 ? (ValidImmMask & -OffsetImm) == -OffsetImm
5902 : (ValidImmMask & OffsetImm) == OffsetImm);
5903
5904 if (OffsetReg != nullptr) {
5905 Variable *OffsetR = makeReg(getPointerType());
5906 Context.insert<InstAssign>(OffsetR, OffsetReg);
5907 return OperandARM32Mem::create(Func, Ty, BaseVar, OffsetR, ShiftKind,
5908 OffsetRegShamt);
5909 }
5910
5911 return OperandARM32Mem::create(
5912 Func, Ty, BaseVar,
5913 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetImm)));
5914 }
5915
doAddressOptLoad()5916 void TargetARM32::doAddressOptLoad() {
5917 Inst *Instr = iteratorToInst(Context.getCur());
5918 assert(llvm::isa<InstLoad>(Instr));
5919 Variable *Dest = Instr->getDest();
5920 Operand *Addr = Instr->getSrc(0);
5921 if (OperandARM32Mem *Mem =
5922 formAddressingMode(Dest->getType(), Func, Instr, Addr)) {
5923 Instr->setDeleted();
5924 Context.insert<InstLoad>(Dest, Mem);
5925 }
5926 }
5927
randomlyInsertNop(float Probability,RandomNumberGenerator & RNG)5928 void TargetARM32::randomlyInsertNop(float Probability,
5929 RandomNumberGenerator &RNG) {
5930 RandomNumberGeneratorWrapper RNGW(RNG);
5931 if (RNGW.getTrueWithProbability(Probability)) {
5932 _nop();
5933 }
5934 }
5935
lowerPhi(const InstPhi *)5936 void TargetARM32::lowerPhi(const InstPhi * /*Instr*/) {
5937 Func->setError("Phi found in regular instruction list");
5938 }
5939
lowerRet(const InstRet * Instr)5940 void TargetARM32::lowerRet(const InstRet *Instr) {
5941 Variable *Reg = nullptr;
5942 if (Instr->hasRetValue()) {
5943 Operand *Src0 = Instr->getRetValue();
5944 Type Ty = Src0->getType();
5945 if (Ty == IceType_i64) {
5946 Src0 = legalizeUndef(Src0);
5947 Variable *R0 = legalizeToReg(loOperand(Src0), RegARM32::Reg_r0);
5948 Variable *R1 = legalizeToReg(hiOperand(Src0), RegARM32::Reg_r1);
5949 Reg = R0;
5950 Context.insert<InstFakeUse>(R1);
5951 } else if (Ty == IceType_f32) {
5952 Variable *S0 = legalizeToReg(Src0, RegARM32::Reg_s0);
5953 Reg = S0;
5954 } else if (Ty == IceType_f64) {
5955 Variable *D0 = legalizeToReg(Src0, RegARM32::Reg_d0);
5956 Reg = D0;
5957 } else if (isVectorType(Src0->getType())) {
5958 Variable *Q0 = legalizeToReg(Src0, RegARM32::Reg_q0);
5959 Reg = Q0;
5960 } else {
5961 Operand *Src0F = legalize(Src0, Legal_Reg | Legal_Flex);
5962 Reg = makeReg(Src0F->getType(), RegARM32::Reg_r0);
5963 _mov(Reg, Src0F, CondARM32::AL);
5964 }
5965 }
5966 // Add a ret instruction even if sandboxing is enabled, because addEpilog
5967 // explicitly looks for a ret instruction as a marker for where to insert the
5968 // frame removal instructions. addEpilog is responsible for restoring the
5969 // "lr" register as needed prior to this ret instruction.
5970 _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg);
5971
5972 // Add a fake use of sp to make sure sp stays alive for the entire function.
5973 // Otherwise post-call sp adjustments get dead-code eliminated.
5974 // TODO: Are there more places where the fake use should be inserted? E.g.
5975 // "void f(int n){while(1) g(n);}" may not have a ret instruction.
5976 Variable *SP = getPhysicalRegister(RegARM32::Reg_sp);
5977 Context.insert<InstFakeUse>(SP);
5978 }
5979
lowerShuffleVector(const InstShuffleVector * Instr)5980 void TargetARM32::lowerShuffleVector(const InstShuffleVector *Instr) {
5981 auto *Dest = Instr->getDest();
5982 const Type DestTy = Dest->getType();
5983
5984 auto *T = makeReg(DestTy);
5985 auto *Src0 = Instr->getSrc(0);
5986 auto *Src1 = Instr->getSrc(1);
5987 const SizeT NumElements = typeNumElements(DestTy);
5988 const Type ElementType = typeElementType(DestTy);
5989
5990 bool Replicate = true;
5991 for (SizeT I = 1; Replicate && I < Instr->getNumIndexes(); ++I) {
5992 if (Instr->getIndexValue(I) != Instr->getIndexValue(0)) {
5993 Replicate = false;
5994 }
5995 }
5996
5997 if (Replicate) {
5998 Variable *Src0Var = legalizeToReg(Src0);
5999 _vdup(T, Src0Var, Instr->getIndexValue(0));
6000 _mov(Dest, T);
6001 return;
6002 }
6003
6004 switch (DestTy) {
6005 case IceType_v8i1:
6006 case IceType_v8i16: {
6007 static constexpr SizeT ExpectedNumElements = 8;
6008 assert(ExpectedNumElements == Instr->getNumIndexes());
6009 (void)ExpectedNumElements;
6010
6011 if (Instr->indexesAre(0, 0, 1, 1, 2, 2, 3, 3)) {
6012 Variable *Src0R = legalizeToReg(Src0);
6013 _vzip(T, Src0R, Src0R);
6014 _mov(Dest, T);
6015 return;
6016 }
6017
6018 if (Instr->indexesAre(0, 8, 1, 9, 2, 10, 3, 11)) {
6019 Variable *Src0R = legalizeToReg(Src0);
6020 Variable *Src1R = legalizeToReg(Src1);
6021 _vzip(T, Src0R, Src1R);
6022 _mov(Dest, T);
6023 return;
6024 }
6025
6026 if (Instr->indexesAre(0, 2, 4, 6, 0, 2, 4, 6)) {
6027 Variable *Src0R = legalizeToReg(Src0);
6028 _vqmovn2(T, Src0R, Src0R, false, false);
6029 _mov(Dest, T);
6030 return;
6031 }
6032 } break;
6033 case IceType_v16i1:
6034 case IceType_v16i8: {
6035 static constexpr SizeT ExpectedNumElements = 16;
6036 assert(ExpectedNumElements == Instr->getNumIndexes());
6037 (void)ExpectedNumElements;
6038
6039 if (Instr->indexesAre(0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7)) {
6040 Variable *Src0R = legalizeToReg(Src0);
6041 _vzip(T, Src0R, Src0R);
6042 _mov(Dest, T);
6043 return;
6044 }
6045
6046 if (Instr->indexesAre(0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7,
6047 23)) {
6048 Variable *Src0R = legalizeToReg(Src0);
6049 Variable *Src1R = legalizeToReg(Src1);
6050 _vzip(T, Src0R, Src1R);
6051 _mov(Dest, T);
6052 return;
6053 }
6054 } break;
6055 case IceType_v4i1:
6056 case IceType_v4i32:
6057 case IceType_v4f32: {
6058 static constexpr SizeT ExpectedNumElements = 4;
6059 assert(ExpectedNumElements == Instr->getNumIndexes());
6060 (void)ExpectedNumElements;
6061
6062 if (Instr->indexesAre(0, 0, 1, 1)) {
6063 Variable *Src0R = legalizeToReg(Src0);
6064 _vzip(T, Src0R, Src0R);
6065 _mov(Dest, T);
6066 return;
6067 }
6068
6069 if (Instr->indexesAre(0, 4, 1, 5)) {
6070 Variable *Src0R = legalizeToReg(Src0);
6071 Variable *Src1R = legalizeToReg(Src1);
6072 _vzip(T, Src0R, Src1R);
6073 _mov(Dest, T);
6074 return;
6075 }
6076
6077 if (Instr->indexesAre(0, 1, 4, 5)) {
6078 Variable *Src0R = legalizeToReg(Src0);
6079 Variable *Src1R = legalizeToReg(Src1);
6080 _vmovlh(T, Src0R, Src1R);
6081 _mov(Dest, T);
6082 return;
6083 }
6084
6085 if (Instr->indexesAre(2, 3, 2, 3)) {
6086 Variable *Src0R = legalizeToReg(Src0);
6087 _vmovhl(T, Src0R, Src0R);
6088 _mov(Dest, T);
6089 return;
6090 }
6091
6092 if (Instr->indexesAre(2, 3, 6, 7)) {
6093 Variable *Src0R = legalizeToReg(Src0);
6094 Variable *Src1R = legalizeToReg(Src1);
6095 _vmovhl(T, Src1R, Src0R);
6096 _mov(Dest, T);
6097 return;
6098 }
6099 } break;
6100 default:
6101 break;
6102 // TODO(jpp): figure out how to properly lower this without scalarization.
6103 }
6104
6105 // Unoptimized shuffle. Perform a series of inserts and extracts.
6106 Context.insert<InstFakeDef>(T);
6107 for (SizeT I = 0; I < Instr->getNumIndexes(); ++I) {
6108 auto *Index = Instr->getIndex(I);
6109 const SizeT Elem = Index->getValue();
6110 auto *ExtElmt = makeReg(ElementType);
6111 if (Elem < NumElements) {
6112 lowerExtractElement(
6113 InstExtractElement::create(Func, ExtElmt, Src0, Index));
6114 } else {
6115 lowerExtractElement(InstExtractElement::create(
6116 Func, ExtElmt, Src1,
6117 Ctx->getConstantInt32(Index->getValue() - NumElements)));
6118 }
6119 auto *NewT = makeReg(DestTy);
6120 lowerInsertElement(InstInsertElement::create(Func, NewT, T, ExtElmt,
6121 Ctx->getConstantInt32(I)));
6122 T = NewT;
6123 }
6124 _mov(Dest, T);
6125 }
6126
lowerSelect(const InstSelect * Instr)6127 void TargetARM32::lowerSelect(const InstSelect *Instr) {
6128 Variable *Dest = Instr->getDest();
6129 Type DestTy = Dest->getType();
6130 Operand *SrcT = Instr->getTrueOperand();
6131 Operand *SrcF = Instr->getFalseOperand();
6132 Operand *Condition = Instr->getCondition();
6133
6134 if (!isVectorType(DestTy)) {
6135 lowerInt1ForSelect(Dest, Condition, legalizeUndef(SrcT),
6136 legalizeUndef(SrcF));
6137 return;
6138 }
6139
6140 Type TType = DestTy;
6141 switch (DestTy) {
6142 default:
6143 llvm::report_fatal_error("Unexpected type for vector select.");
6144 case IceType_v4i1:
6145 TType = IceType_v4i32;
6146 break;
6147 case IceType_v8i1:
6148 TType = IceType_v8i16;
6149 break;
6150 case IceType_v16i1:
6151 TType = IceType_v16i8;
6152 break;
6153 case IceType_v4f32:
6154 TType = IceType_v4i32;
6155 break;
6156 case IceType_v4i32:
6157 case IceType_v8i16:
6158 case IceType_v16i8:
6159 break;
6160 }
6161 auto *T = makeReg(TType);
6162 lowerCast(InstCast::create(Func, InstCast::Sext, T, Condition));
6163 auto *SrcTR = legalizeToReg(SrcT);
6164 auto *SrcFR = legalizeToReg(SrcF);
6165 _vbsl(T, SrcTR, SrcFR)->setDestRedefined();
6166 _mov(Dest, T);
6167 }
6168
lowerStore(const InstStore * Instr)6169 void TargetARM32::lowerStore(const InstStore *Instr) {
6170 Operand *Value = Instr->getData();
6171 Operand *Addr = Instr->getAddr();
6172 OperandARM32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
6173 Type Ty = NewAddr->getType();
6174
6175 if (Ty == IceType_i64) {
6176 Value = legalizeUndef(Value);
6177 Variable *ValueHi = legalizeToReg(hiOperand(Value));
6178 Variable *ValueLo = legalizeToReg(loOperand(Value));
6179 _str(ValueHi, llvm::cast<OperandARM32Mem>(hiOperand(NewAddr)));
6180 _str(ValueLo, llvm::cast<OperandARM32Mem>(loOperand(NewAddr)));
6181 } else {
6182 Variable *ValueR = legalizeToReg(Value);
6183 _str(ValueR, NewAddr);
6184 }
6185 }
6186
doAddressOptStore()6187 void TargetARM32::doAddressOptStore() {
6188 Inst *Instr = iteratorToInst(Context.getCur());
6189 assert(llvm::isa<InstStore>(Instr));
6190 Operand *Src = Instr->getSrc(0);
6191 Operand *Addr = Instr->getSrc(1);
6192 if (OperandARM32Mem *Mem =
6193 formAddressingMode(Src->getType(), Func, Instr, Addr)) {
6194 Instr->setDeleted();
6195 Context.insert<InstStore>(Src, Mem);
6196 }
6197 }
6198
lowerSwitch(const InstSwitch * Instr)6199 void TargetARM32::lowerSwitch(const InstSwitch *Instr) {
6200 // This implements the most naive possible lowering.
6201 // cmp a,val[0]; jeq label[0]; cmp a,val[1]; jeq label[1]; ... jmp default
6202 Operand *Src0 = Instr->getComparison();
6203 SizeT NumCases = Instr->getNumCases();
6204 if (Src0->getType() == IceType_i64) {
6205 Src0 = legalizeUndef(Src0);
6206 Variable *Src0Lo = legalizeToReg(loOperand(Src0));
6207 Variable *Src0Hi = legalizeToReg(hiOperand(Src0));
6208 for (SizeT I = 0; I < NumCases; ++I) {
6209 Operand *ValueLo = Ctx->getConstantInt32(Instr->getValue(I));
6210 Operand *ValueHi = Ctx->getConstantInt32(Instr->getValue(I) >> 32);
6211 ValueLo = legalize(ValueLo, Legal_Reg | Legal_Flex);
6212 ValueHi = legalize(ValueHi, Legal_Reg | Legal_Flex);
6213 _cmp(Src0Lo, ValueLo);
6214 _cmp(Src0Hi, ValueHi, CondARM32::EQ);
6215 _br(Instr->getLabel(I), CondARM32::EQ);
6216 }
6217 _br(Instr->getLabelDefault());
6218 return;
6219 }
6220
6221 Variable *Src0Var = legalizeToReg(Src0);
6222 // If Src0 is not an i32, we left shift it -- see the icmp lowering for the
6223 // reason.
6224 assert(Src0Var->mustHaveReg());
6225 const size_t ShiftAmt = 32 - getScalarIntBitWidth(Src0->getType());
6226 assert(ShiftAmt < 32);
6227 if (ShiftAmt > 0) {
6228 Operand *ShAmtImm = shAmtImm(ShiftAmt);
6229 Variable *T = makeReg(IceType_i32);
6230 _lsl(T, Src0Var, ShAmtImm);
6231 Src0Var = T;
6232 }
6233
6234 for (SizeT I = 0; I < NumCases; ++I) {
6235 Operand *Value = Ctx->getConstantInt32(Instr->getValue(I) << ShiftAmt);
6236 Value = legalize(Value, Legal_Reg | Legal_Flex);
6237 _cmp(Src0Var, Value);
6238 _br(Instr->getLabel(I), CondARM32::EQ);
6239 }
6240 _br(Instr->getLabelDefault());
6241 }
6242
lowerBreakpoint(const InstBreakpoint * Instr)6243 void TargetARM32::lowerBreakpoint(const InstBreakpoint *Instr) {
6244 UnimplementedLoweringError(this, Instr);
6245 }
6246
lowerUnreachable(const InstUnreachable *)6247 void TargetARM32::lowerUnreachable(const InstUnreachable * /*Instr*/) {
6248 _trap();
6249 }
6250
6251 namespace {
6252 // Returns whether Opnd needs the GOT address. Currently, ConstantRelocatables,
6253 // and fp constants will need access to the GOT address.
operandNeedsGot(const Operand * Opnd)6254 bool operandNeedsGot(const Operand *Opnd) {
6255 if (llvm::isa<ConstantRelocatable>(Opnd)) {
6256 return true;
6257 }
6258
6259 if (llvm::isa<ConstantFloat>(Opnd)) {
6260 uint32_t _;
6261 return !OperandARM32FlexFpImm::canHoldImm(Opnd, &_);
6262 }
6263
6264 const auto *F64 = llvm::dyn_cast<ConstantDouble>(Opnd);
6265 if (F64 != nullptr) {
6266 uint32_t _;
6267 return !OperandARM32FlexFpImm::canHoldImm(Opnd, &_) &&
6268 !isFloatingPointZero(F64);
6269 }
6270
6271 return false;
6272 }
6273
6274 // Returns whether Phi needs the GOT address (which it does if any of its
6275 // operands needs the GOT address.)
phiNeedsGot(const InstPhi * Phi)6276 bool phiNeedsGot(const InstPhi *Phi) {
6277 if (Phi->isDeleted()) {
6278 return false;
6279 }
6280
6281 for (SizeT I = 0; I < Phi->getSrcSize(); ++I) {
6282 if (operandNeedsGot(Phi->getSrc(I))) {
6283 return true;
6284 }
6285 }
6286
6287 return false;
6288 }
6289
6290 // Returns whether **any** phi in Node needs the GOT address.
anyPhiInNodeNeedsGot(CfgNode * Node)6291 bool anyPhiInNodeNeedsGot(CfgNode *Node) {
6292 for (auto &Inst : Node->getPhis()) {
6293 if (phiNeedsGot(llvm::cast<InstPhi>(&Inst))) {
6294 return true;
6295 }
6296 }
6297 return false;
6298 }
6299
6300 } // end of anonymous namespace
6301
prelowerPhis()6302 void TargetARM32::prelowerPhis() {
6303 CfgNode *Node = Context.getNode();
6304
6305 if (SandboxingType == ST_Nonsfi) {
6306 assert(GotPtr != nullptr);
6307 if (anyPhiInNodeNeedsGot(Node)) {
6308 // If any phi instruction needs the GOT address, we place a
6309 // fake-use GotPtr
6310 // in Node to prevent the GotPtr's initialization from being dead code
6311 // eliminated.
6312 Node->getInsts().push_front(InstFakeUse::create(Func, GotPtr));
6313 }
6314 }
6315
6316 PhiLowering::prelowerPhis32Bit(this, Node, Func);
6317 }
6318
makeVectorOfZeros(Type Ty,RegNumT RegNum)6319 Variable *TargetARM32::makeVectorOfZeros(Type Ty, RegNumT RegNum) {
6320 Variable *Reg = makeReg(Ty, RegNum);
6321 Context.insert<InstFakeDef>(Reg);
6322 assert(isVectorType(Ty));
6323 _veor(Reg, Reg, Reg);
6324 return Reg;
6325 }
6326
6327 // Helper for legalize() to emit the right code to lower an operand to a
6328 // register of the appropriate type.
copyToReg(Operand * Src,RegNumT RegNum)6329 Variable *TargetARM32::copyToReg(Operand *Src, RegNumT RegNum) {
6330 Type Ty = Src->getType();
6331 Variable *Reg = makeReg(Ty, RegNum);
6332 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Src)) {
6333 _ldr(Reg, Mem);
6334 } else {
6335 _mov(Reg, Src);
6336 }
6337 return Reg;
6338 }
6339
6340 // TODO(jpp): remove unneeded else clauses in legalize.
legalize(Operand * From,LegalMask Allowed,RegNumT RegNum)6341 Operand *TargetARM32::legalize(Operand *From, LegalMask Allowed,
6342 RegNumT RegNum) {
6343 Type Ty = From->getType();
6344 // Assert that a physical register is allowed. To date, all calls to
6345 // legalize() allow a physical register. Legal_Flex converts registers to the
6346 // right type OperandARM32FlexReg as needed.
6347 assert(Allowed & Legal_Reg);
6348
6349 // Copied ipsis literis from TargetX86Base<Machine>.
6350 if (RegNum.hasNoValue()) {
6351 if (Variable *Subst = getContext().availabilityGet(From)) {
6352 // At this point we know there is a potential substitution available.
6353 if (!Subst->isRematerializable() && Subst->mustHaveReg() &&
6354 !Subst->hasReg()) {
6355 // At this point we know the substitution will have a register.
6356 if (From->getType() == Subst->getType()) {
6357 // At this point we know the substitution's register is compatible.
6358 return Subst;
6359 }
6360 }
6361 }
6362 }
6363
6364 // Go through the various types of operands: OperandARM32Mem,
6365 // OperandARM32Flex, Constant, and Variable. Given the above assertion, if
6366 // type of operand is not legal (e.g., OperandARM32Mem and !Legal_Mem), we
6367 // can always copy to a register.
6368 if (auto *Mem = llvm::dyn_cast<OperandARM32Mem>(From)) {
6369 // Before doing anything with a Mem operand, we need to ensure that the
6370 // Base and Index components are in physical registers.
6371 Variable *Base = Mem->getBase();
6372 Variable *Index = Mem->getIndex();
6373 ConstantInteger32 *Offset = Mem->getOffset();
6374 assert(Index == nullptr || Offset == nullptr);
6375 Variable *RegBase = nullptr;
6376 Variable *RegIndex = nullptr;
6377 assert(Base);
6378 RegBase = llvm::cast<Variable>(
6379 legalize(Base, Legal_Reg | Legal_Rematerializable));
6380 assert(Ty < MemTraitsSize);
6381 if (Index) {
6382 assert(Offset == nullptr);
6383 assert(MemTraits[Ty].CanHaveIndex);
6384 RegIndex = legalizeToReg(Index);
6385 }
6386 if (Offset && Offset->getValue() != 0) {
6387 assert(Index == nullptr);
6388 static constexpr bool ZeroExt = false;
6389 assert(MemTraits[Ty].CanHaveImm);
6390 if (!OperandARM32Mem::canHoldOffset(Ty, ZeroExt, Offset->getValue())) {
6391 llvm::report_fatal_error("Invalid memory offset.");
6392 }
6393 }
6394
6395 // Create a new operand if there was a change.
6396 if (Base != RegBase || Index != RegIndex) {
6397 // There is only a reg +/- reg or reg + imm form.
6398 // Figure out which to re-create.
6399 if (RegIndex) {
6400 Mem = OperandARM32Mem::create(Func, Ty, RegBase, RegIndex,
6401 Mem->getShiftOp(), Mem->getShiftAmt(),
6402 Mem->getAddrMode());
6403 } else {
6404 Mem = OperandARM32Mem::create(Func, Ty, RegBase, Offset,
6405 Mem->getAddrMode());
6406 }
6407 }
6408 if (Allowed & Legal_Mem) {
6409 From = Mem;
6410 } else {
6411 Variable *Reg = makeReg(Ty, RegNum);
6412 _ldr(Reg, Mem);
6413 From = Reg;
6414 }
6415 return From;
6416 }
6417
6418 if (auto *Flex = llvm::dyn_cast<OperandARM32Flex>(From)) {
6419 if (!(Allowed & Legal_Flex)) {
6420 if (auto *FlexReg = llvm::dyn_cast<OperandARM32FlexReg>(Flex)) {
6421 if (FlexReg->getShiftOp() == OperandARM32::kNoShift) {
6422 From = FlexReg->getReg();
6423 // Fall through and let From be checked as a Variable below, where it
6424 // may or may not need a register.
6425 } else {
6426 return copyToReg(Flex, RegNum);
6427 }
6428 } else {
6429 return copyToReg(Flex, RegNum);
6430 }
6431 } else {
6432 return From;
6433 }
6434 }
6435
6436 if (llvm::isa<Constant>(From)) {
6437 if (llvm::isa<ConstantUndef>(From)) {
6438 From = legalizeUndef(From, RegNum);
6439 if (isVectorType(Ty))
6440 return From;
6441 }
6442 // There should be no constants of vector type (other than undef).
6443 assert(!isVectorType(Ty));
6444 if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
6445 uint32_t RotateAmt;
6446 uint32_t Immed_8;
6447 uint32_t Value = static_cast<uint32_t>(C32->getValue());
6448 if (OperandARM32FlexImm::canHoldImm(Value, &RotateAmt, &Immed_8)) {
6449 // The immediate can be encoded as a Flex immediate. We may return the
6450 // Flex operand if the caller has Allow'ed it.
6451 auto *OpF = OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
6452 const bool CanBeFlex = Allowed & Legal_Flex;
6453 if (CanBeFlex)
6454 return OpF;
6455 return copyToReg(OpF, RegNum);
6456 } else if (OperandARM32FlexImm::canHoldImm(~Value, &RotateAmt,
6457 &Immed_8)) {
6458 // Even though the immediate can't be encoded as a Flex operand, its
6459 // inverted bit pattern can, thus we use ARM's mvn to load the 32-bit
6460 // constant with a single instruction.
6461 auto *InvOpF =
6462 OperandARM32FlexImm::create(Func, Ty, Immed_8, RotateAmt);
6463 Variable *Reg = makeReg(Ty, RegNum);
6464 _mvn(Reg, InvOpF);
6465 return Reg;
6466 } else {
6467 // Do a movw/movt to a register.
6468 Variable *Reg = makeReg(Ty, RegNum);
6469 uint32_t UpperBits = (Value >> 16) & 0xFFFF;
6470 _movw(Reg,
6471 UpperBits != 0 ? Ctx->getConstantInt32(Value & 0xFFFF) : C32);
6472 if (UpperBits != 0) {
6473 _movt(Reg, Ctx->getConstantInt32(UpperBits));
6474 }
6475 return Reg;
6476 }
6477 } else if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {
6478 Variable *Reg = makeReg(Ty, RegNum);
6479 if (SandboxingType != ST_Nonsfi) {
6480 _movw(Reg, C);
6481 _movt(Reg, C);
6482 } else {
6483 auto *GotAddr = legalizeToReg(GotPtr);
6484 GlobalString CGotoffName = createGotoffRelocation(C);
6485 loadNamedConstantRelocatablePIC(
6486 CGotoffName, Reg, [this, Reg](Variable *PC) {
6487 _ldr(Reg, OperandARM32Mem::create(Func, IceType_i32, PC, Reg));
6488 });
6489 _add(Reg, GotAddr, Reg);
6490 }
6491 return Reg;
6492 } else {
6493 assert(isScalarFloatingType(Ty));
6494 uint32_t ModifiedImm;
6495 if (OperandARM32FlexFpImm::canHoldImm(From, &ModifiedImm)) {
6496 Variable *T = makeReg(Ty, RegNum);
6497 _mov(T,
6498 OperandARM32FlexFpImm::create(Func, From->getType(), ModifiedImm));
6499 return T;
6500 }
6501
6502 if (Ty == IceType_f64 && isFloatingPointZero(From)) {
6503 // Use T = T ^ T to load a 64-bit fp zero. This does not work for f32
6504 // because ARM does not have a veor instruction with S registers.
6505 Variable *T = makeReg(IceType_f64, RegNum);
6506 Context.insert<InstFakeDef>(T);
6507 _veor(T, T, T);
6508 return T;
6509 }
6510
6511 // Load floats/doubles from literal pool.
6512 auto *CFrom = llvm::cast<Constant>(From);
6513 assert(CFrom->getShouldBePooled());
6514 Constant *Offset = Ctx->getConstantSym(0, CFrom->getLabelName());
6515 Variable *BaseReg = nullptr;
6516 if (SandboxingType == ST_Nonsfi) {
6517 // vldr does not support the [base, index] addressing mode, so we need
6518 // to legalize Offset to a register. Otherwise, we could simply
6519 // vldr dest, [got, reg(Offset)]
6520 BaseReg = legalizeToReg(Offset);
6521 } else {
6522 BaseReg = makeReg(getPointerType());
6523 _movw(BaseReg, Offset);
6524 _movt(BaseReg, Offset);
6525 }
6526 From = formMemoryOperand(BaseReg, Ty);
6527 return copyToReg(From, RegNum);
6528 }
6529 }
6530
6531 if (auto *Var = llvm::dyn_cast<Variable>(From)) {
6532 if (Var->isRematerializable()) {
6533 if (Allowed & Legal_Rematerializable) {
6534 return From;
6535 }
6536
6537 Variable *T = makeReg(Var->getType(), RegNum);
6538 _mov(T, Var);
6539 return T;
6540 }
6541 // Check if the variable is guaranteed a physical register. This can happen
6542 // either when the variable is pre-colored or when it is assigned infinite
6543 // weight.
6544 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
6545 // We need a new physical register for the operand if:
6546 // Mem is not allowed and Var isn't guaranteed a physical
6547 // register, or
6548 // RegNum is required and Var->getRegNum() doesn't match.
6549 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
6550 (RegNum.hasValue() && (RegNum != Var->getRegNum()))) {
6551 From = copyToReg(From, RegNum);
6552 }
6553 return From;
6554 }
6555 llvm::report_fatal_error("Unhandled operand kind in legalize()");
6556
6557 return From;
6558 }
6559
6560 /// Provide a trivial wrapper to legalize() for this common usage.
legalizeToReg(Operand * From,RegNumT RegNum)6561 Variable *TargetARM32::legalizeToReg(Operand *From, RegNumT RegNum) {
6562 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
6563 }
6564
6565 /// Legalize undef values to concrete values.
legalizeUndef(Operand * From,RegNumT RegNum)6566 Operand *TargetARM32::legalizeUndef(Operand *From, RegNumT RegNum) {
6567 Type Ty = From->getType();
6568 if (llvm::isa<ConstantUndef>(From)) {
6569 // Lower undefs to zero. Another option is to lower undefs to an
6570 // uninitialized register; however, using an uninitialized register results
6571 // in less predictable code.
6572 //
6573 // If in the future the implementation is changed to lower undef values to
6574 // uninitialized registers, a FakeDef will be needed:
6575 // Context.insert(InstFakeDef::create(Func, Reg)); This is in order to
6576 // ensure that the live range of Reg is not overestimated. If the constant
6577 // being lowered is a 64 bit value, then the result should be split and the
6578 // lo and hi components will need to go in uninitialized registers.
6579 if (isVectorType(Ty))
6580 return makeVectorOfZeros(Ty, RegNum);
6581 return Ctx->getConstantZero(Ty);
6582 }
6583 return From;
6584 }
6585
formMemoryOperand(Operand * Operand,Type Ty)6586 OperandARM32Mem *TargetARM32::formMemoryOperand(Operand *Operand, Type Ty) {
6587 auto *Mem = llvm::dyn_cast<OperandARM32Mem>(Operand);
6588 // It may be the case that address mode optimization already creates an
6589 // OperandARM32Mem, so in that case it wouldn't need another level of
6590 // transformation.
6591 if (Mem) {
6592 return llvm::cast<OperandARM32Mem>(legalize(Mem));
6593 }
6594 // If we didn't do address mode optimization, then we only have a
6595 // base/offset to work with. ARM always requires a base register, so
6596 // just use that to hold the operand.
6597 auto *Base = llvm::cast<Variable>(
6598 legalize(Operand, Legal_Reg | Legal_Rematerializable));
6599 return OperandARM32Mem::create(
6600 Func, Ty, Base,
6601 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32)));
6602 }
6603
makeI64RegPair()6604 Variable64On32 *TargetARM32::makeI64RegPair() {
6605 Variable64On32 *Reg =
6606 llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
6607 Reg->setMustHaveReg();
6608 Reg->initHiLo(Func);
6609 Reg->getLo()->setMustNotHaveReg();
6610 Reg->getHi()->setMustNotHaveReg();
6611 return Reg;
6612 }
6613
makeReg(Type Type,RegNumT RegNum)6614 Variable *TargetARM32::makeReg(Type Type, RegNumT RegNum) {
6615 // There aren't any 64-bit integer registers for ARM32.
6616 assert(Type != IceType_i64);
6617 assert(AllowTemporaryWithNoReg || RegNum.hasValue());
6618 Variable *Reg = Func->makeVariable(Type);
6619 if (RegNum.hasValue())
6620 Reg->setRegNum(RegNum);
6621 else
6622 Reg->setMustHaveReg();
6623 return Reg;
6624 }
6625
alignRegisterPow2(Variable * Reg,uint32_t Align,RegNumT TmpRegNum)6626 void TargetARM32::alignRegisterPow2(Variable *Reg, uint32_t Align,
6627 RegNumT TmpRegNum) {
6628 assert(llvm::isPowerOf2_32(Align));
6629 uint32_t RotateAmt;
6630 uint32_t Immed_8;
6631 Operand *Mask;
6632 // Use AND or BIC to mask off the bits, depending on which immediate fits (if
6633 // it fits at all). Assume Align is usually small, in which case BIC works
6634 // better. Thus, this rounds down to the alignment.
6635 if (OperandARM32FlexImm::canHoldImm(Align - 1, &RotateAmt, &Immed_8)) {
6636 Mask = legalize(Ctx->getConstantInt32(Align - 1), Legal_Reg | Legal_Flex,
6637 TmpRegNum);
6638 _bic(Reg, Reg, Mask);
6639 } else {
6640 Mask = legalize(Ctx->getConstantInt32(-Align), Legal_Reg | Legal_Flex,
6641 TmpRegNum);
6642 _and(Reg, Reg, Mask);
6643 }
6644 }
6645
postLower()6646 void TargetARM32::postLower() {
6647 if (Func->getOptLevel() == Opt_m1)
6648 return;
6649 markRedefinitions();
6650 Context.availabilityUpdate();
6651 }
6652
makeRandomRegisterPermutation(llvm::SmallVectorImpl<RegNumT> & Permutation,const SmallBitVector & ExcludeRegisters,uint64_t Salt) const6653 void TargetARM32::makeRandomRegisterPermutation(
6654 llvm::SmallVectorImpl<RegNumT> &Permutation,
6655 const SmallBitVector &ExcludeRegisters, uint64_t Salt) const {
6656 (void)Permutation;
6657 (void)ExcludeRegisters;
6658 (void)Salt;
6659 UnimplementedError(getFlags());
6660 }
6661
emit(const ConstantInteger32 * C) const6662 void TargetARM32::emit(const ConstantInteger32 *C) const {
6663 if (!BuildDefs::dump())
6664 return;
6665 Ostream &Str = Ctx->getStrEmit();
6666 Str << "#" << C->getValue();
6667 }
6668
emit(const ConstantInteger64 *) const6669 void TargetARM32::emit(const ConstantInteger64 *) const {
6670 llvm::report_fatal_error("Not expecting to emit 64-bit integers");
6671 }
6672
emit(const ConstantFloat * C) const6673 void TargetARM32::emit(const ConstantFloat *C) const {
6674 (void)C;
6675 UnimplementedError(getFlags());
6676 }
6677
emit(const ConstantDouble * C) const6678 void TargetARM32::emit(const ConstantDouble *C) const {
6679 (void)C;
6680 UnimplementedError(getFlags());
6681 }
6682
emit(const ConstantUndef *) const6683 void TargetARM32::emit(const ConstantUndef *) const {
6684 llvm::report_fatal_error("undef value encountered by emitter.");
6685 }
6686
emit(const ConstantRelocatable * C) const6687 void TargetARM32::emit(const ConstantRelocatable *C) const {
6688 if (!BuildDefs::dump())
6689 return;
6690 Ostream &Str = Ctx->getStrEmit();
6691 Str << "#";
6692 emitWithoutPrefix(C);
6693 }
6694
lowerInt1ForSelect(Variable * Dest,Operand * Boolean,Operand * TrueValue,Operand * FalseValue)6695 void TargetARM32::lowerInt1ForSelect(Variable *Dest, Operand *Boolean,
6696 Operand *TrueValue, Operand *FalseValue) {
6697 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
6698
6699 assert(Boolean->getType() == IceType_i1);
6700
6701 bool NeedsAnd1 = false;
6702 if (TrueValue->getType() == IceType_i1) {
6703 assert(FalseValue->getType() == IceType_i1);
6704
6705 Variable *TrueValueV = Func->makeVariable(IceType_i1);
6706 SafeBoolChain Src0Safe = lowerInt1(TrueValueV, TrueValue);
6707 TrueValue = TrueValueV;
6708
6709 Variable *FalseValueV = Func->makeVariable(IceType_i1);
6710 SafeBoolChain Src1Safe = lowerInt1(FalseValueV, FalseValue);
6711 FalseValue = FalseValueV;
6712
6713 NeedsAnd1 = Src0Safe == SBC_No || Src1Safe == SBC_No;
6714 }
6715
6716 Variable *DestLo = (Dest->getType() == IceType_i64)
6717 ? llvm::cast<Variable>(loOperand(Dest))
6718 : Dest;
6719 Variable *DestHi = (Dest->getType() == IceType_i64)
6720 ? llvm::cast<Variable>(hiOperand(Dest))
6721 : nullptr;
6722 Operand *FalseValueLo = (FalseValue->getType() == IceType_i64)
6723 ? loOperand(FalseValue)
6724 : FalseValue;
6725 Operand *FalseValueHi =
6726 (FalseValue->getType() == IceType_i64) ? hiOperand(FalseValue) : nullptr;
6727
6728 Operand *TrueValueLo =
6729 (TrueValue->getType() == IceType_i64) ? loOperand(TrueValue) : TrueValue;
6730 Operand *TrueValueHi =
6731 (TrueValue->getType() == IceType_i64) ? hiOperand(TrueValue) : nullptr;
6732
6733 Variable *T_Lo = makeReg(DestLo->getType());
6734 Variable *T_Hi = (DestHi == nullptr) ? nullptr : makeReg(DestHi->getType());
6735
6736 _mov(T_Lo, legalize(FalseValueLo, Legal_Reg | Legal_Flex));
6737 if (DestHi) {
6738 _mov(T_Hi, legalize(FalseValueHi, Legal_Reg | Legal_Flex));
6739 }
6740
6741 CondWhenTrue Cond(CondARM32::kNone);
6742 // FlagsWereSet is used to determine wether Boolean was folded or not. If not,
6743 // add an explicit _tst instruction below.
6744 bool FlagsWereSet = false;
6745 if (const Inst *Producer = Computations.getProducerOf(Boolean)) {
6746 switch (Producer->getKind()) {
6747 default:
6748 llvm::report_fatal_error("Unexpected producer.");
6749 case Inst::Icmp: {
6750 Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer));
6751 FlagsWereSet = true;
6752 } break;
6753 case Inst::Fcmp: {
6754 Cond = lowerFcmpCond(llvm::cast<InstFcmp>(Producer));
6755 FlagsWereSet = true;
6756 } break;
6757 case Inst::Cast: {
6758 const auto *CastProducer = llvm::cast<InstCast>(Producer);
6759 assert(CastProducer->getCastKind() == InstCast::Trunc);
6760 Boolean = CastProducer->getSrc(0);
6761 // No flags were set, so a _tst(Src, 1) will be emitted below. Don't
6762 // bother legalizing Src to a Reg because it will be legalized before
6763 // emitting the tst instruction.
6764 FlagsWereSet = false;
6765 } break;
6766 case Inst::Arithmetic: {
6767 // This is a special case: we eagerly assumed Producer could be folded,
6768 // but in reality, it can't. No reason to panic: we just lower it using
6769 // the regular lowerArithmetic helper.
6770 const auto *ArithProducer = llvm::cast<InstArithmetic>(Producer);
6771 lowerArithmetic(ArithProducer);
6772 Boolean = ArithProducer->getDest();
6773 // No flags were set, so a _tst(Dest, 1) will be emitted below. Don't
6774 // bother legalizing Dest to a Reg because it will be legalized before
6775 // emitting the tst instruction.
6776 FlagsWereSet = false;
6777 } break;
6778 }
6779 }
6780
6781 if (!FlagsWereSet) {
6782 // No flags have been set, so emit a tst Boolean, 1.
6783 Variable *Src = legalizeToReg(Boolean);
6784 _tst(Src, _1);
6785 Cond = CondWhenTrue(CondARM32::NE); // i.e., CondARM32::NotZero.
6786 }
6787
6788 if (Cond.WhenTrue0 == CondARM32::kNone) {
6789 assert(Cond.WhenTrue1 == CondARM32::kNone);
6790 } else {
6791 _mov_redefined(T_Lo, legalize(TrueValueLo, Legal_Reg | Legal_Flex),
6792 Cond.WhenTrue0);
6793 if (DestHi) {
6794 _mov_redefined(T_Hi, legalize(TrueValueHi, Legal_Reg | Legal_Flex),
6795 Cond.WhenTrue0);
6796 }
6797 }
6798
6799 if (Cond.WhenTrue1 != CondARM32::kNone) {
6800 _mov_redefined(T_Lo, legalize(TrueValueLo, Legal_Reg | Legal_Flex),
6801 Cond.WhenTrue1);
6802 if (DestHi) {
6803 _mov_redefined(T_Hi, legalize(TrueValueHi, Legal_Reg | Legal_Flex),
6804 Cond.WhenTrue1);
6805 }
6806 }
6807
6808 if (NeedsAnd1) {
6809 // We lowered something that is unsafe (i.e., can't provably be zero or
6810 // one). Truncate the result.
6811 _and(T_Lo, T_Lo, _1);
6812 }
6813
6814 _mov(DestLo, T_Lo);
6815 if (DestHi) {
6816 _mov(DestHi, T_Hi);
6817 }
6818 }
6819
lowerInt1(Variable * Dest,Operand * Boolean)6820 TargetARM32::SafeBoolChain TargetARM32::lowerInt1(Variable *Dest,
6821 Operand *Boolean) {
6822 assert(Boolean->getType() == IceType_i1);
6823 Variable *T = makeReg(IceType_i1);
6824 Operand *_0 =
6825 legalize(Ctx->getConstantZero(IceType_i1), Legal_Reg | Legal_Flex);
6826 Operand *_1 = legalize(Ctx->getConstantInt1(1), Legal_Reg | Legal_Flex);
6827
6828 SafeBoolChain Safe = SBC_Yes;
6829 if (const Inst *Producer = Computations.getProducerOf(Boolean)) {
6830 switch (Producer->getKind()) {
6831 default:
6832 llvm::report_fatal_error("Unexpected producer.");
6833 case Inst::Icmp: {
6834 _mov(T, _0);
6835 CondWhenTrue Cond = lowerIcmpCond(llvm::cast<InstIcmp>(Producer));
6836 assert(Cond.WhenTrue0 != CondARM32::AL);
6837 assert(Cond.WhenTrue0 != CondARM32::kNone);
6838 assert(Cond.WhenTrue1 == CondARM32::kNone);
6839 _mov_redefined(T, _1, Cond.WhenTrue0);
6840 } break;
6841 case Inst::Fcmp: {
6842 _mov(T, _0);
6843 Inst *MovZero = Context.getLastInserted();
6844 CondWhenTrue Cond = lowerFcmpCond(llvm::cast<InstFcmp>(Producer));
6845 if (Cond.WhenTrue0 == CondARM32::AL) {
6846 assert(Cond.WhenTrue1 == CondARM32::kNone);
6847 MovZero->setDeleted();
6848 _mov(T, _1);
6849 } else if (Cond.WhenTrue0 != CondARM32::kNone) {
6850 _mov_redefined(T, _1, Cond.WhenTrue0);
6851 }
6852 if (Cond.WhenTrue1 != CondARM32::kNone) {
6853 assert(Cond.WhenTrue0 != CondARM32::kNone);
6854 assert(Cond.WhenTrue0 != CondARM32::AL);
6855 _mov_redefined(T, _1, Cond.WhenTrue1);
6856 }
6857 } break;
6858 case Inst::Cast: {
6859 const auto *CastProducer = llvm::cast<InstCast>(Producer);
6860 assert(CastProducer->getCastKind() == InstCast::Trunc);
6861 Operand *Src = CastProducer->getSrc(0);
6862 if (Src->getType() == IceType_i64)
6863 Src = loOperand(Src);
6864 _mov(T, legalize(Src, Legal_Reg | Legal_Flex));
6865 Safe = SBC_No;
6866 } break;
6867 case Inst::Arithmetic: {
6868 const auto *ArithProducer = llvm::cast<InstArithmetic>(Producer);
6869 Safe = lowerInt1Arithmetic(ArithProducer);
6870 _mov(T, ArithProducer->getDest());
6871 } break;
6872 }
6873 } else {
6874 _mov(T, legalize(Boolean, Legal_Reg | Legal_Flex));
6875 }
6876
6877 _mov(Dest, T);
6878 return Safe;
6879 }
6880
6881 namespace {
6882 namespace BoolFolding {
shouldTrackProducer(const Inst & Instr)6883 bool shouldTrackProducer(const Inst &Instr) {
6884 switch (Instr.getKind()) {
6885 default:
6886 return false;
6887 case Inst::Icmp:
6888 case Inst::Fcmp:
6889 return true;
6890 case Inst::Cast: {
6891 switch (llvm::cast<InstCast>(&Instr)->getCastKind()) {
6892 default:
6893 return false;
6894 case InstCast::Trunc:
6895 return true;
6896 }
6897 }
6898 case Inst::Arithmetic: {
6899 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6900 default:
6901 return false;
6902 case InstArithmetic::And:
6903 case InstArithmetic::Or:
6904 return true;
6905 }
6906 }
6907 }
6908 }
6909
isValidConsumer(const Inst & Instr)6910 bool isValidConsumer(const Inst &Instr) {
6911 switch (Instr.getKind()) {
6912 default:
6913 return false;
6914 case Inst::Br:
6915 return true;
6916 case Inst::Select:
6917 return !isVectorType(Instr.getDest()->getType());
6918 case Inst::Cast: {
6919 switch (llvm::cast<InstCast>(&Instr)->getCastKind()) {
6920 default:
6921 return false;
6922 case InstCast::Sext:
6923 return !isVectorType(Instr.getDest()->getType());
6924 case InstCast::Zext:
6925 return !isVectorType(Instr.getDest()->getType());
6926 }
6927 }
6928 case Inst::Arithmetic: {
6929 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6930 default:
6931 return false;
6932 case InstArithmetic::And:
6933 return !isVectorType(Instr.getDest()->getType());
6934 case InstArithmetic::Or:
6935 return !isVectorType(Instr.getDest()->getType());
6936 }
6937 }
6938 }
6939 }
6940 } // end of namespace BoolFolding
6941
6942 namespace FpFolding {
shouldTrackProducer(const Inst & Instr)6943 bool shouldTrackProducer(const Inst &Instr) {
6944 switch (Instr.getKind()) {
6945 default:
6946 return false;
6947 case Inst::Arithmetic: {
6948 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6949 default:
6950 return false;
6951 case InstArithmetic::Fmul:
6952 return true;
6953 }
6954 }
6955 }
6956 }
6957
isValidConsumer(const Inst & Instr)6958 bool isValidConsumer(const Inst &Instr) {
6959 switch (Instr.getKind()) {
6960 default:
6961 return false;
6962 case Inst::Arithmetic: {
6963 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6964 default:
6965 return false;
6966 case InstArithmetic::Fadd:
6967 case InstArithmetic::Fsub:
6968 return true;
6969 }
6970 }
6971 }
6972 }
6973 } // end of namespace FpFolding
6974
6975 namespace IntFolding {
shouldTrackProducer(const Inst & Instr)6976 bool shouldTrackProducer(const Inst &Instr) {
6977 switch (Instr.getKind()) {
6978 default:
6979 return false;
6980 case Inst::Arithmetic: {
6981 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6982 default:
6983 return false;
6984 case InstArithmetic::Mul:
6985 return true;
6986 }
6987 }
6988 }
6989 }
6990
isValidConsumer(const Inst & Instr)6991 bool isValidConsumer(const Inst &Instr) {
6992 switch (Instr.getKind()) {
6993 default:
6994 return false;
6995 case Inst::Arithmetic: {
6996 switch (llvm::cast<InstArithmetic>(&Instr)->getOp()) {
6997 default:
6998 return false;
6999 case InstArithmetic::Add:
7000 case InstArithmetic::Sub:
7001 return true;
7002 }
7003 }
7004 }
7005 }
7006 } // namespace IntFolding
7007 } // end of anonymous namespace
7008
recordProducers(CfgNode * Node)7009 void TargetARM32::ComputationTracker::recordProducers(CfgNode *Node) {
7010 for (Inst &Instr : Node->getInsts()) {
7011 // Check whether Instr is a valid producer.
7012 Variable *Dest = Instr.getDest();
7013 if (!Instr.isDeleted() // only consider non-deleted instructions; and
7014 && Dest // only instructions with an actual dest var; and
7015 && Dest->getType() == IceType_i1 // only bool-type dest vars; and
7016 && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr.
7017 KnownComputations.emplace(Dest->getIndex(),
7018 ComputationEntry(&Instr, IceType_i1));
7019 }
7020 if (!Instr.isDeleted() // only consider non-deleted instructions; and
7021 && Dest // only instructions with an actual dest var; and
7022 && isScalarFloatingType(Dest->getType()) // fp-type only dest vars; and
7023 && FpFolding::shouldTrackProducer(Instr)) { // white-listed instr.
7024 KnownComputations.emplace(Dest->getIndex(),
7025 ComputationEntry(&Instr, Dest->getType()));
7026 }
7027 if (!Instr.isDeleted() // only consider non-deleted instructions; and
7028 && Dest // only instructions with an actual dest var; and
7029 && Dest->getType() == IceType_i32 // i32 only dest vars; and
7030 && IntFolding::shouldTrackProducer(Instr)) { // white-listed instr.
7031 KnownComputations.emplace(Dest->getIndex(),
7032 ComputationEntry(&Instr, IceType_i32));
7033 }
7034 // Check each src variable against the map.
7035 FOREACH_VAR_IN_INST(Var, Instr) {
7036 SizeT VarNum = Var->getIndex();
7037 auto ComputationIter = KnownComputations.find(VarNum);
7038 if (ComputationIter == KnownComputations.end()) {
7039 continue;
7040 }
7041
7042 ++ComputationIter->second.NumUses;
7043 switch (ComputationIter->second.ComputationType) {
7044 default:
7045 KnownComputations.erase(VarNum);
7046 continue;
7047 case IceType_i1:
7048 if (!BoolFolding::isValidConsumer(Instr)) {
7049 KnownComputations.erase(VarNum);
7050 continue;
7051 }
7052 break;
7053 case IceType_i32:
7054 if (IndexOfVarInInst(Var) != 1 || !IntFolding::isValidConsumer(Instr)) {
7055 KnownComputations.erase(VarNum);
7056 continue;
7057 }
7058 break;
7059 case IceType_f32:
7060 case IceType_f64:
7061 if (IndexOfVarInInst(Var) != 1 || !FpFolding::isValidConsumer(Instr)) {
7062 KnownComputations.erase(VarNum);
7063 continue;
7064 }
7065 break;
7066 }
7067
7068 if (Instr.isLastUse(Var)) {
7069 ComputationIter->second.IsLiveOut = false;
7070 }
7071 }
7072 }
7073
7074 for (auto Iter = KnownComputations.begin(), End = KnownComputations.end();
7075 Iter != End;) {
7076 // Disable the folding if its dest may be live beyond this block.
7077 if (Iter->second.IsLiveOut || Iter->second.NumUses > 1) {
7078 Iter = KnownComputations.erase(Iter);
7079 continue;
7080 }
7081
7082 // Mark as "dead" rather than outright deleting. This is so that other
7083 // peephole style optimizations during or before lowering have access to
7084 // this instruction in undeleted form. See for example
7085 // tryOptimizedCmpxchgCmpBr().
7086 Iter->second.Instr->setDead();
7087 ++Iter;
7088 }
7089 }
7090
Sandboxer(TargetARM32 * Target,InstBundleLock::Option BundleOption)7091 TargetARM32::Sandboxer::Sandboxer(TargetARM32 *Target,
7092 InstBundleLock::Option BundleOption)
7093 : Target(Target), BundleOption(BundleOption) {}
7094
~Sandboxer()7095 TargetARM32::Sandboxer::~Sandboxer() {}
7096
7097 namespace {
indirectBranchBicMask(Cfg * Func)7098 OperandARM32FlexImm *indirectBranchBicMask(Cfg *Func) {
7099 constexpr uint32_t Imm8 = 0xFC; // 0xC000000F
7100 constexpr uint32_t RotateAmt = 2;
7101 return OperandARM32FlexImm::create(Func, IceType_i32, Imm8, RotateAmt);
7102 }
7103
memOpBicMask(Cfg * Func)7104 OperandARM32FlexImm *memOpBicMask(Cfg *Func) {
7105 constexpr uint32_t Imm8 = 0x0C; // 0xC0000000
7106 constexpr uint32_t RotateAmt = 2;
7107 return OperandARM32FlexImm::create(Func, IceType_i32, Imm8, RotateAmt);
7108 }
7109
baseNeedsBic(Variable * Base)7110 static bool baseNeedsBic(Variable *Base) {
7111 return Base->getRegNum() != RegARM32::Reg_r9 &&
7112 Base->getRegNum() != RegARM32::Reg_sp;
7113 }
7114 } // end of anonymous namespace
7115
createAutoBundle()7116 void TargetARM32::Sandboxer::createAutoBundle() {
7117 Bundler = makeUnique<AutoBundle>(Target, BundleOption);
7118 }
7119
add_sp(Operand * AddAmount)7120 void TargetARM32::Sandboxer::add_sp(Operand *AddAmount) {
7121 Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp);
7122 if (!Target->NeedSandboxing) {
7123 Target->_add(SP, SP, AddAmount);
7124 return;
7125 }
7126 createAutoBundle();
7127 Target->_add(SP, SP, AddAmount);
7128 Target->_bic(SP, SP, memOpBicMask(Target->Func));
7129 }
7130
align_sp(size_t Alignment)7131 void TargetARM32::Sandboxer::align_sp(size_t Alignment) {
7132 Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp);
7133 if (!Target->NeedSandboxing) {
7134 Target->alignRegisterPow2(SP, Alignment);
7135 return;
7136 }
7137 createAutoBundle();
7138 Target->alignRegisterPow2(SP, Alignment);
7139 Target->_bic(SP, SP, memOpBicMask(Target->Func));
7140 }
7141
bl(Variable * ReturnReg,Operand * CallTarget)7142 InstARM32Call *TargetARM32::Sandboxer::bl(Variable *ReturnReg,
7143 Operand *CallTarget) {
7144 if (Target->NeedSandboxing) {
7145 createAutoBundle();
7146 if (auto *CallTargetR = llvm::dyn_cast<Variable>(CallTarget)) {
7147 Target->_bic(CallTargetR, CallTargetR,
7148 indirectBranchBicMask(Target->Func));
7149 }
7150 }
7151 return Target->Context.insert<InstARM32Call>(ReturnReg, CallTarget);
7152 }
7153
ldr(Variable * Dest,OperandARM32Mem * Mem,CondARM32::Cond Pred)7154 void TargetARM32::Sandboxer::ldr(Variable *Dest, OperandARM32Mem *Mem,
7155 CondARM32::Cond Pred) {
7156 Variable *MemBase = Mem->getBase();
7157 if (Target->NeedSandboxing && baseNeedsBic(MemBase)) {
7158 createAutoBundle();
7159 assert(!Mem->isRegReg());
7160 Target->_bic(MemBase, MemBase, memOpBicMask(Target->Func), Pred);
7161 }
7162 Target->_ldr(Dest, Mem, Pred);
7163 }
7164
ldrex(Variable * Dest,OperandARM32Mem * Mem,CondARM32::Cond Pred)7165 void TargetARM32::Sandboxer::ldrex(Variable *Dest, OperandARM32Mem *Mem,
7166 CondARM32::Cond Pred) {
7167 Variable *MemBase = Mem->getBase();
7168 if (Target->NeedSandboxing && baseNeedsBic(MemBase)) {
7169 createAutoBundle();
7170 assert(!Mem->isRegReg());
7171 Target->_bic(MemBase, MemBase, memOpBicMask(Target->Func), Pred);
7172 }
7173 Target->_ldrex(Dest, Mem, Pred);
7174 }
7175
reset_sp(Variable * Src)7176 void TargetARM32::Sandboxer::reset_sp(Variable *Src) {
7177 Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp);
7178 if (!Target->NeedSandboxing) {
7179 Target->_mov_redefined(SP, Src);
7180 return;
7181 }
7182 createAutoBundle();
7183 Target->_mov_redefined(SP, Src);
7184 Target->_bic(SP, SP, memOpBicMask(Target->Func));
7185 }
7186
ret(Variable * RetAddr,Variable * RetValue)7187 void TargetARM32::Sandboxer::ret(Variable *RetAddr, Variable *RetValue) {
7188 if (Target->NeedSandboxing) {
7189 createAutoBundle();
7190 Target->_bic(RetAddr, RetAddr, indirectBranchBicMask(Target->Func));
7191 }
7192 Target->_ret(RetAddr, RetValue);
7193 }
7194
str(Variable * Src,OperandARM32Mem * Mem,CondARM32::Cond Pred)7195 void TargetARM32::Sandboxer::str(Variable *Src, OperandARM32Mem *Mem,
7196 CondARM32::Cond Pred) {
7197 Variable *MemBase = Mem->getBase();
7198 if (Target->NeedSandboxing && baseNeedsBic(MemBase)) {
7199 createAutoBundle();
7200 assert(!Mem->isRegReg());
7201 Target->_bic(MemBase, MemBase, memOpBicMask(Target->Func), Pred);
7202 }
7203 Target->_str(Src, Mem, Pred);
7204 }
7205
strex(Variable * Dest,Variable * Src,OperandARM32Mem * Mem,CondARM32::Cond Pred)7206 void TargetARM32::Sandboxer::strex(Variable *Dest, Variable *Src,
7207 OperandARM32Mem *Mem, CondARM32::Cond Pred) {
7208 Variable *MemBase = Mem->getBase();
7209 if (Target->NeedSandboxing && baseNeedsBic(MemBase)) {
7210 createAutoBundle();
7211 assert(!Mem->isRegReg());
7212 Target->_bic(MemBase, MemBase, memOpBicMask(Target->Func), Pred);
7213 }
7214 Target->_strex(Dest, Src, Mem, Pred);
7215 }
7216
sub_sp(Operand * SubAmount)7217 void TargetARM32::Sandboxer::sub_sp(Operand *SubAmount) {
7218 Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp);
7219 if (!Target->NeedSandboxing) {
7220 Target->_sub(SP, SP, SubAmount);
7221 return;
7222 }
7223 createAutoBundle();
7224 Target->_sub(SP, SP, SubAmount);
7225 Target->_bic(SP, SP, memOpBicMask(Target->Func));
7226 }
7227
TargetDataARM32(GlobalContext * Ctx)7228 TargetDataARM32::TargetDataARM32(GlobalContext *Ctx)
7229 : TargetDataLowering(Ctx) {}
7230
lowerGlobals(const VariableDeclarationList & Vars,const std::string & SectionSuffix)7231 void TargetDataARM32::lowerGlobals(const VariableDeclarationList &Vars,
7232 const std::string &SectionSuffix) {
7233 const bool IsPIC = getFlags().getUseNonsfi();
7234 switch (getFlags().getOutFileType()) {
7235 case FT_Elf: {
7236 ELFObjectWriter *Writer = Ctx->getObjectWriter();
7237 Writer->writeDataSection(Vars, llvm::ELF::R_ARM_ABS32, SectionSuffix,
7238 IsPIC);
7239 } break;
7240 case FT_Asm:
7241 case FT_Iasm: {
7242 OstreamLocker _(Ctx);
7243 for (const VariableDeclaration *Var : Vars) {
7244 if (getFlags().matchTranslateOnly(Var->getName(), 0)) {
7245 emitGlobal(*Var, SectionSuffix);
7246 }
7247 }
7248 } break;
7249 }
7250 }
7251
7252 namespace {
7253 template <typename T> struct ConstantPoolEmitterTraits;
7254
7255 static_assert(sizeof(uint64_t) == 8,
7256 "uint64_t is supposed to be 8 bytes wide.");
7257
7258 // TODO(jpp): implement the following when implementing constant randomization:
7259 // * template <> struct ConstantPoolEmitterTraits<uint8_t>
7260 // * template <> struct ConstantPoolEmitterTraits<uint16_t>
7261 // * template <> struct ConstantPoolEmitterTraits<uint32_t>
7262 template <> struct ConstantPoolEmitterTraits<float> {
7263 using ConstantType = ConstantFloat;
7264 static constexpr Type IceType = IceType_f32;
7265 // AsmTag and TypeName can't be constexpr because llvm::StringRef is unhappy
7266 // about them being constexpr.
7267 static const char AsmTag[];
7268 static const char TypeName[];
bitcastToUint64Ice::ARM32::__anon5132c32f1e11::ConstantPoolEmitterTraits7269 static uint64_t bitcastToUint64(float Value) {
7270 static_assert(sizeof(Value) == sizeof(uint32_t),
7271 "Float should be 4 bytes.");
7272 const uint32_t IntValue = Utils::bitCopy<uint32_t>(Value);
7273 return static_cast<uint64_t>(IntValue);
7274 }
7275 };
7276 const char ConstantPoolEmitterTraits<float>::AsmTag[] = ".long";
7277 const char ConstantPoolEmitterTraits<float>::TypeName[] = "f32";
7278
7279 template <> struct ConstantPoolEmitterTraits<double> {
7280 using ConstantType = ConstantDouble;
7281 static constexpr Type IceType = IceType_f64;
7282 static const char AsmTag[];
7283 static const char TypeName[];
bitcastToUint64Ice::ARM32::__anon5132c32f1e11::ConstantPoolEmitterTraits7284 static uint64_t bitcastToUint64(double Value) {
7285 static_assert(sizeof(double) == sizeof(uint64_t),
7286 "Double should be 8 bytes.");
7287 return Utils::bitCopy<uint64_t>(Value);
7288 }
7289 };
7290 const char ConstantPoolEmitterTraits<double>::AsmTag[] = ".quad";
7291 const char ConstantPoolEmitterTraits<double>::TypeName[] = "f64";
7292
7293 template <typename T>
emitConstant(Ostream & Str,const typename ConstantPoolEmitterTraits<T>::ConstantType * Const)7294 void emitConstant(
7295 Ostream &Str,
7296 const typename ConstantPoolEmitterTraits<T>::ConstantType *Const) {
7297 using Traits = ConstantPoolEmitterTraits<T>;
7298 Str << Const->getLabelName();
7299 Str << ":\n\t" << Traits::AsmTag << "\t0x";
7300 T Value = Const->getValue();
7301 Str.write_hex(Traits::bitcastToUint64(Value));
7302 Str << "\t/* " << Traits::TypeName << " " << Value << " */\n";
7303 }
7304
emitConstantPool(GlobalContext * Ctx)7305 template <typename T> void emitConstantPool(GlobalContext *Ctx) {
7306 if (!BuildDefs::dump()) {
7307 return;
7308 }
7309
7310 using Traits = ConstantPoolEmitterTraits<T>;
7311 static constexpr size_t MinimumAlignment = 4;
7312 SizeT Align = std::max(MinimumAlignment, typeAlignInBytes(Traits::IceType));
7313 assert((Align % 4) == 0 && "Constants should be aligned");
7314 Ostream &Str = Ctx->getStrEmit();
7315 ConstantList Pool = Ctx->getConstantPool(Traits::IceType);
7316
7317 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",%progbits," << Align
7318 << "\n"
7319 << "\t.align\t" << Align << "\n";
7320
7321 if (getFlags().getReorderPooledConstants()) {
7322 // TODO(jpp): add constant pooling.
7323 UnimplementedError(getFlags());
7324 }
7325
7326 for (Constant *C : Pool) {
7327 if (!C->getShouldBePooled()) {
7328 continue;
7329 }
7330
7331 emitConstant<T>(Str, llvm::dyn_cast<typename Traits::ConstantType>(C));
7332 }
7333 }
7334 } // end of anonymous namespace
7335
lowerConstants()7336 void TargetDataARM32::lowerConstants() {
7337 if (getFlags().getDisableTranslation())
7338 return;
7339 switch (getFlags().getOutFileType()) {
7340 case FT_Elf: {
7341 ELFObjectWriter *Writer = Ctx->getObjectWriter();
7342 Writer->writeConstantPool<ConstantFloat>(IceType_f32);
7343 Writer->writeConstantPool<ConstantDouble>(IceType_f64);
7344 } break;
7345 case FT_Asm:
7346 case FT_Iasm: {
7347 OstreamLocker _(Ctx);
7348 emitConstantPool<float>(Ctx);
7349 emitConstantPool<double>(Ctx);
7350 break;
7351 }
7352 }
7353 }
7354
lowerJumpTables()7355 void TargetDataARM32::lowerJumpTables() {
7356 if (getFlags().getDisableTranslation())
7357 return;
7358 switch (getFlags().getOutFileType()) {
7359 case FT_Elf:
7360 if (!Ctx->getJumpTables().empty()) {
7361 llvm::report_fatal_error("ARM32 does not support jump tables yet.");
7362 }
7363 break;
7364 case FT_Asm:
7365 // Already emitted from Cfg
7366 break;
7367 case FT_Iasm: {
7368 // TODO(kschimpf): Fill this in when we get more information.
7369 break;
7370 }
7371 }
7372 }
7373
TargetHeaderARM32(GlobalContext * Ctx)7374 TargetHeaderARM32::TargetHeaderARM32(GlobalContext *Ctx)
7375 : TargetHeaderLowering(Ctx), CPUFeatures(getFlags()) {}
7376
lower()7377 void TargetHeaderARM32::lower() {
7378 OstreamLocker _(Ctx);
7379 Ostream &Str = Ctx->getStrEmit();
7380 Str << ".syntax unified\n";
7381 // Emit build attributes in format: .eabi_attribute TAG, VALUE. See Sec. 2 of
7382 // "Addenda to, and Errata in the ABI for the ARM architecture"
7383 // http://infocenter.arm.com
7384 // /help/topic/com.arm.doc.ihi0045d/IHI0045D_ABI_addenda.pdf
7385 //
7386 // Tag_conformance should be be emitted first in a file-scope sub-subsection
7387 // of the first public subsection of the attributes.
7388 Str << ".eabi_attribute 67, \"2.09\" @ Tag_conformance\n";
7389 // Chromebooks are at least A15, but do A9 for higher compat. For some
7390 // reason, the LLVM ARM asm parser has the .cpu directive override the mattr
7391 // specified on the commandline. So to test hwdiv, we need to set the .cpu
7392 // directive higher (can't just rely on --mattr=...).
7393 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
7394 Str << ".cpu cortex-a15\n";
7395 } else {
7396 Str << ".cpu cortex-a9\n";
7397 }
7398 Str << ".eabi_attribute 6, 10 @ Tag_CPU_arch: ARMv7\n"
7399 << ".eabi_attribute 7, 65 @ Tag_CPU_arch_profile: App profile\n";
7400 Str << ".eabi_attribute 8, 1 @ Tag_ARM_ISA_use: Yes\n"
7401 << ".eabi_attribute 9, 2 @ Tag_THUMB_ISA_use: Thumb-2\n";
7402 Str << ".fpu neon\n"
7403 << ".eabi_attribute 17, 1 @ Tag_ABI_PCS_GOT_use: permit directly\n"
7404 << ".eabi_attribute 20, 1 @ Tag_ABI_FP_denormal\n"
7405 << ".eabi_attribute 21, 1 @ Tag_ABI_FP_exceptions\n"
7406 << ".eabi_attribute 23, 3 @ Tag_ABI_FP_number_model: IEEE 754\n"
7407 << ".eabi_attribute 34, 1 @ Tag_CPU_unaligned_access\n"
7408 << ".eabi_attribute 24, 1 @ Tag_ABI_align_needed: 8-byte\n"
7409 << ".eabi_attribute 25, 1 @ Tag_ABI_align_preserved: 8-byte\n"
7410 << ".eabi_attribute 28, 1 @ Tag_ABI_VFP_args\n"
7411 << ".eabi_attribute 36, 1 @ Tag_FP_HP_extension\n"
7412 << ".eabi_attribute 38, 1 @ Tag_ABI_FP_16bit_format\n"
7413 << ".eabi_attribute 42, 1 @ Tag_MPextension_use\n"
7414 << ".eabi_attribute 68, 1 @ Tag_Virtualization_use\n";
7415 if (CPUFeatures.hasFeature(TargetARM32Features::HWDivArm)) {
7416 Str << ".eabi_attribute 44, 2 @ Tag_DIV_use\n";
7417 }
7418 // Technically R9 is used for TLS with Sandboxing, and we reserve it.
7419 // However, for compatibility with current NaCl LLVM, don't claim that.
7420 Str << ".eabi_attribute 14, 3 @ Tag_ABI_PCS_R9_use: Not used\n";
7421 }
7422
7423 SmallBitVector TargetARM32::TypeToRegisterSet[RegARM32::RCARM32_NUM];
7424 SmallBitVector TargetARM32::TypeToRegisterSetUnfiltered[RegARM32::RCARM32_NUM];
7425 SmallBitVector TargetARM32::RegisterAliases[RegARM32::Reg_NUM];
7426
7427 } // end of namespace ARM32
7428 } // end of namespace Ice
7429