1 //
2 //                        The Subzero Code Generator
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// \brief Implements the TargetLoweringMIPS32 class, which consists almost
11 /// entirely of the lowering sequence for each high-level instruction.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "IceTargetLoweringMIPS32.h"
16 
17 #include "IceCfg.h"
18 #include "IceCfgNode.h"
19 #include "IceClFlags.h"
20 #include "IceDefs.h"
21 #include "IceELFObjectWriter.h"
22 #include "IceGlobalInits.h"
23 #include "IceInstMIPS32.h"
24 #include "IceInstVarIter.h"
25 #include "IceLiveness.h"
26 #include "IceOperand.h"
27 #include "IcePhiLoweringImpl.h"
28 #include "IceRegistersMIPS32.h"
29 #include "IceTargetLoweringMIPS32.def"
30 #include "IceUtils.h"
31 #include "llvm/Support/MathExtras.h"
32 
33 namespace MIPS32 {
createTargetLowering(::Ice::Cfg * Func)34 std::unique_ptr<::Ice::TargetLowering> createTargetLowering(::Ice::Cfg *Func) {
35   return ::Ice::MIPS32::TargetMIPS32::create(Func);
36 }
37 
38 std::unique_ptr<::Ice::TargetDataLowering>
createTargetDataLowering(::Ice::GlobalContext * Ctx)39 createTargetDataLowering(::Ice::GlobalContext *Ctx) {
40   return ::Ice::MIPS32::TargetDataMIPS32::create(Ctx);
41 }
42 
43 std::unique_ptr<::Ice::TargetHeaderLowering>
createTargetHeaderLowering(::Ice::GlobalContext * Ctx)44 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) {
45   return ::Ice::MIPS32::TargetHeaderMIPS32::create(Ctx);
46 }
47 
staticInit(::Ice::GlobalContext * Ctx)48 void staticInit(::Ice::GlobalContext *Ctx) {
49   ::Ice::MIPS32::TargetMIPS32::staticInit(Ctx);
50 }
51 
shouldBePooled(const::Ice::Constant * C)52 bool shouldBePooled(const ::Ice::Constant *C) {
53   return ::Ice::MIPS32::TargetMIPS32::shouldBePooled(C);
54 }
55 
getPointerType()56 ::Ice::Type getPointerType() {
57   return ::Ice::MIPS32::TargetMIPS32::getPointerType();
58 }
59 
60 } // end of namespace MIPS32
61 
62 namespace Ice {
63 namespace MIPS32 {
64 
65 using llvm::isInt;
66 
67 namespace {
68 
69 // The maximum number of arguments to pass in GPR registers.
70 constexpr uint32_t MIPS32_MAX_GPR_ARG = 4;
71 
72 std::array<RegNumT, MIPS32_MAX_GPR_ARG> GPRArgInitializer;
73 std::array<RegNumT, MIPS32_MAX_GPR_ARG / 2> I64ArgInitializer;
74 
75 constexpr uint32_t MIPS32_MAX_FP_ARG = 2;
76 
77 std::array<RegNumT, MIPS32_MAX_FP_ARG> FP32ArgInitializer;
78 std::array<RegNumT, MIPS32_MAX_FP_ARG> FP64ArgInitializer;
79 
getRegClassName(RegClass C)80 const char *getRegClassName(RegClass C) {
81   auto ClassNum = static_cast<RegClassMIPS32>(C);
82   assert(ClassNum < RCMIPS32_NUM);
83   switch (ClassNum) {
84   default:
85     assert(C < RC_Target);
86     return regClassString(C);
87     // Add handling of new register classes below.
88   }
89 }
90 
91 // Stack alignment
92 constexpr uint32_t MIPS32_STACK_ALIGNMENT_BYTES = 16;
93 
94 // Value is in bytes. Return Value adjusted to the next highest multiple of the
95 // stack alignment required for the given type.
applyStackAlignmentTy(uint32_t Value,Type Ty)96 uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
97   size_t typeAlignInBytes = typeWidthInBytes(Ty);
98   // Vectors are stored on stack with the same alignment as that of int type
99   if (isVectorType(Ty))
100     typeAlignInBytes = typeWidthInBytes(IceType_i64);
101   return Utils::applyAlignment(Value, typeAlignInBytes);
102 }
103 
104 // Value is in bytes. Return Value adjusted to the next highest multiple of the
105 // stack alignment.
applyStackAlignment(uint32_t Value)106 uint32_t applyStackAlignment(uint32_t Value) {
107   return Utils::applyAlignment(Value, MIPS32_STACK_ALIGNMENT_BYTES);
108 }
109 
110 } // end of anonymous namespace
111 
TargetMIPS32(Cfg * Func)112 TargetMIPS32::TargetMIPS32(Cfg *Func)
113     : TargetLowering(Func), NeedSandboxing(SandboxingType == ST_NaCl) {}
114 
assignVarStackSlots(VarList & SortedSpilledVariables,size_t SpillAreaPaddingBytes,size_t SpillAreaSizeBytes,size_t GlobalsAndSubsequentPaddingSize)115 void TargetMIPS32::assignVarStackSlots(VarList &SortedSpilledVariables,
116                                        size_t SpillAreaPaddingBytes,
117                                        size_t SpillAreaSizeBytes,
118                                        size_t GlobalsAndSubsequentPaddingSize) {
119   const VariablesMetadata *VMetadata = Func->getVMetadata();
120   size_t GlobalsSpaceUsed = SpillAreaPaddingBytes;
121   size_t NextStackOffset = SpillAreaPaddingBytes;
122   CfgVector<size_t> LocalsSize(Func->getNumNodes());
123   const bool SimpleCoalescing = !callsReturnsTwice();
124   for (Variable *Var : SortedSpilledVariables) {
125     size_t Increment = typeWidthInBytesOnStack(Var->getType());
126     if (SimpleCoalescing && VMetadata->isTracked(Var)) {
127       if (VMetadata->isMultiBlock(Var)) {
128         GlobalsSpaceUsed += Increment;
129         NextStackOffset = GlobalsSpaceUsed;
130       } else {
131         SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex();
132         LocalsSize[NodeIndex] += Increment;
133         NextStackOffset = SpillAreaPaddingBytes +
134                           GlobalsAndSubsequentPaddingSize +
135                           LocalsSize[NodeIndex];
136       }
137     } else {
138       NextStackOffset += Increment;
139     }
140     Var->setStackOffset(SpillAreaSizeBytes - NextStackOffset);
141   }
142 }
143 
staticInit(GlobalContext * Ctx)144 void TargetMIPS32::staticInit(GlobalContext *Ctx) {
145   (void)Ctx;
146   RegNumT::setLimit(RegMIPS32::Reg_NUM);
147   SmallBitVector IntegerRegisters(RegMIPS32::Reg_NUM);
148   SmallBitVector I64PairRegisters(RegMIPS32::Reg_NUM);
149   SmallBitVector Float32Registers(RegMIPS32::Reg_NUM);
150   SmallBitVector Float64Registers(RegMIPS32::Reg_NUM);
151   SmallBitVector VectorRegisters(RegMIPS32::Reg_NUM);
152   SmallBitVector InvalidRegisters(RegMIPS32::Reg_NUM);
153 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt,    \
154           isI64Pair, isFP32, isFP64, isVec128, alias_init)                     \
155   IntegerRegisters[RegMIPS32::val] = isInt;                                    \
156   I64PairRegisters[RegMIPS32::val] = isI64Pair;                                \
157   Float32Registers[RegMIPS32::val] = isFP32;                                   \
158   Float64Registers[RegMIPS32::val] = isFP64;                                   \
159   VectorRegisters[RegMIPS32::val] = isVec128;                                  \
160   RegisterAliases[RegMIPS32::val].resize(RegMIPS32::Reg_NUM);                  \
161   for (SizeT RegAlias : alias_init) {                                          \
162     assert(!RegisterAliases[RegMIPS32::val][RegAlias] &&                       \
163            "Duplicate alias for " #val);                                       \
164     RegisterAliases[RegMIPS32::val].set(RegAlias);                             \
165   }                                                                            \
166   RegisterAliases[RegMIPS32::val].resize(RegMIPS32::Reg_NUM);                  \
167   assert(RegisterAliases[RegMIPS32::val][RegMIPS32::val]);
168   REGMIPS32_TABLE;
169 #undef X
170 
171   // TODO(mohit.bhakkad): Change these inits once we provide argument related
172   // field in register tables
173   for (size_t i = 0; i < MIPS32_MAX_GPR_ARG; i++)
174     GPRArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_A0 + i);
175 
176   for (size_t i = 0; i < MIPS32_MAX_GPR_ARG / 2; i++)
177     I64ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_A0A1 + i);
178 
179   for (size_t i = 0; i < MIPS32_MAX_FP_ARG; i++) {
180     FP32ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_F12 + i * 2);
181     FP64ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_F12F13 + i);
182   }
183 
184   TypeToRegisterSet[IceType_void] = InvalidRegisters;
185   TypeToRegisterSet[IceType_i1] = IntegerRegisters;
186   TypeToRegisterSet[IceType_i8] = IntegerRegisters;
187   TypeToRegisterSet[IceType_i16] = IntegerRegisters;
188   TypeToRegisterSet[IceType_i32] = IntegerRegisters;
189   TypeToRegisterSet[IceType_i64] = IntegerRegisters;
190   TypeToRegisterSet[IceType_f32] = Float32Registers;
191   TypeToRegisterSet[IceType_f64] = Float64Registers;
192   TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
193   TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
194   TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
195   TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
196   TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
197   TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
198   TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
199 
200   for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i)
201     TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
202 
203   filterTypeToRegisterSet(Ctx, RegMIPS32::Reg_NUM, TypeToRegisterSet,
204                           llvm::array_lengthof(TypeToRegisterSet),
205                           RegMIPS32::getRegName, getRegClassName);
206 }
207 
unsetIfNonLeafFunc()208 void TargetMIPS32::unsetIfNonLeafFunc() {
209   for (CfgNode *Node : Func->getNodes()) {
210     for (Inst &Instr : Node->getInsts()) {
211       if (llvm::isa<InstCall>(&Instr)) {
212         // Unset MaybeLeafFunc if call instruction exists.
213         MaybeLeafFunc = false;
214         return;
215       }
216     }
217   }
218 }
219 
getStackAlignment() const220 uint32_t TargetMIPS32::getStackAlignment() const {
221   return MIPS32_STACK_ALIGNMENT_BYTES;
222 }
223 
getCallStackArgumentsSizeBytes(const InstCall * Call)224 uint32_t TargetMIPS32::getCallStackArgumentsSizeBytes(const InstCall *Call) {
225   TargetMIPS32::CallingConv CC;
226   RegNumT DummyReg;
227   size_t OutArgsSizeBytes = 0;
228   Variable *Dest = Call->getDest();
229   bool PartialOnStack = false;
230   if (Dest != nullptr && isVectorFloatingType(Dest->getType())) {
231     CC.discardReg(RegMIPS32::Reg_A0);
232     // Next vector is partially on stack
233     PartialOnStack = true;
234   }
235   for (SizeT i = 0, NumArgs = Call->getNumArgs(); i < NumArgs; ++i) {
236     Operand *Arg = legalizeUndef(Call->getArg(i));
237     const Type Ty = Arg->getType();
238     RegNumT RegNum;
239     if (CC.argInReg(Ty, i, &RegNum)) {
240       // If PartialOnStack is true and if this is a vector type then last two
241       // elements are on stack
242       if (PartialOnStack && isVectorType(Ty)) {
243         OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, IceType_i64);
244         OutArgsSizeBytes += typeWidthInBytesOnStack(IceType_i32) * 2;
245       }
246       continue;
247     }
248     OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, Ty);
249     OutArgsSizeBytes += typeWidthInBytesOnStack(Ty);
250   }
251   // Add size of argument save area
252   constexpr int BytesPerStackArg = 4;
253   OutArgsSizeBytes += MIPS32_MAX_GPR_ARG * BytesPerStackArg;
254   return applyStackAlignment(OutArgsSizeBytes);
255 }
256 
257 namespace {
getConstantMemoryOrder(Operand * Opnd)258 inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
259   if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
260     return Integer->getValue();
261   return Intrinsics::MemoryOrderInvalid;
262 }
263 } // namespace
264 
genTargetHelperCallFor(Inst * Instr)265 void TargetMIPS32::genTargetHelperCallFor(Inst *Instr) {
266   constexpr bool NoTailCall = false;
267   constexpr bool IsTargetHelperCall = true;
268   Variable *Dest = Instr->getDest();
269   const Type DestTy = Dest ? Dest->getType() : IceType_void;
270 
271   switch (Instr->getKind()) {
272   default:
273     return;
274   case Inst::Select: {
275     if (isVectorType(DestTy)) {
276       Operand *SrcT = llvm::cast<InstSelect>(Instr)->getTrueOperand();
277       Operand *SrcF = llvm::cast<InstSelect>(Instr)->getFalseOperand();
278       Operand *Cond = llvm::cast<InstSelect>(Instr)->getCondition();
279       Variable *T = Func->makeVariable(DestTy);
280       auto *Undef = ConstantUndef::create(Ctx, DestTy);
281       Context.insert<InstAssign>(T, Undef);
282       auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
283       VarVecOn32->initVecElement(Func);
284       for (SizeT I = 0; I < typeNumElements(DestTy); ++I) {
285         auto *Index = Ctx->getConstantInt32(I);
286         auto *OpC = Func->makeVariable(typeElementType(Cond->getType()));
287         Context.insert<InstExtractElement>(OpC, Cond, Index);
288         auto *OpT = Func->makeVariable(typeElementType(DestTy));
289         Context.insert<InstExtractElement>(OpT, SrcT, Index);
290         auto *OpF = Func->makeVariable(typeElementType(DestTy));
291         Context.insert<InstExtractElement>(OpF, SrcF, Index);
292         auto *Dst = Func->makeVariable(typeElementType(DestTy));
293         Variable *DestT = Func->makeVariable(DestTy);
294         Context.insert<InstSelect>(Dst, OpC, OpT, OpF);
295         Context.insert<InstInsertElement>(DestT, T, Dst, Index);
296         T = DestT;
297       }
298       Context.insert<InstAssign>(Dest, T);
299       Instr->setDeleted();
300     }
301     return;
302   }
303   case Inst::Fcmp: {
304     if (isVectorType(DestTy)) {
305       InstFcmp::FCond Cond = llvm::cast<InstFcmp>(Instr)->getCondition();
306       Operand *Src0 = Instr->getSrc(0);
307       Operand *Src1 = Instr->getSrc(1);
308       Variable *T = Func->makeVariable(IceType_v4f32);
309       auto *Undef = ConstantUndef::create(Ctx, IceType_v4f32);
310       Context.insert<InstAssign>(T, Undef);
311       auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
312       VarVecOn32->initVecElement(Func);
313       for (SizeT I = 0; I < typeNumElements(IceType_v4f32); ++I) {
314         auto *Index = Ctx->getConstantInt32(I);
315         auto *Op0 = Func->makeVariable(IceType_f32);
316         Context.insert<InstExtractElement>(Op0, Src0, Index);
317         auto *Op1 = Func->makeVariable(IceType_f32);
318         Context.insert<InstExtractElement>(Op1, Src1, Index);
319         auto *Dst = Func->makeVariable(IceType_f32);
320         Variable *DestT = Func->makeVariable(IceType_v4f32);
321         Context.insert<InstFcmp>(Cond, Dst, Op0, Op1);
322         Context.insert<InstInsertElement>(DestT, T, Dst, Index);
323         T = DestT;
324       }
325       Context.insert<InstAssign>(Dest, T);
326       Instr->setDeleted();
327     }
328     return;
329   }
330   case Inst::Icmp: {
331     if (isVectorType(DestTy)) {
332       InstIcmp::ICond Cond = llvm::cast<InstIcmp>(Instr)->getCondition();
333       Operand *Src0 = Instr->getSrc(0);
334       Operand *Src1 = Instr->getSrc(1);
335       const Type SrcType = Src0->getType();
336       Variable *T = Func->makeVariable(DestTy);
337       auto *Undef = ConstantUndef::create(Ctx, DestTy);
338       Context.insert<InstAssign>(T, Undef);
339       auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
340       VarVecOn32->initVecElement(Func);
341       for (SizeT I = 0; I < typeNumElements(SrcType); ++I) {
342         auto *Index = Ctx->getConstantInt32(I);
343         auto *Op0 = Func->makeVariable(typeElementType(SrcType));
344         Context.insert<InstExtractElement>(Op0, Src0, Index);
345         auto *Op1 = Func->makeVariable(typeElementType(SrcType));
346         Context.insert<InstExtractElement>(Op1, Src1, Index);
347         auto *Dst = Func->makeVariable(typeElementType(DestTy));
348         Variable *DestT = Func->makeVariable(DestTy);
349         Context.insert<InstIcmp>(Cond, Dst, Op0, Op1);
350         Context.insert<InstInsertElement>(DestT, T, Dst, Index);
351         T = DestT;
352       }
353       Context.insert<InstAssign>(Dest, T);
354       Instr->setDeleted();
355     }
356     return;
357   }
358   case Inst::Arithmetic: {
359     const InstArithmetic::OpKind Op =
360         llvm::cast<InstArithmetic>(Instr)->getOp();
361     if (isVectorType(DestTy)) {
362       scalarizeArithmetic(Op, Dest, Instr->getSrc(0), Instr->getSrc(1));
363       Instr->setDeleted();
364       return;
365     }
366     switch (DestTy) {
367     default:
368       return;
369     case IceType_i64: {
370       RuntimeHelper HelperID = RuntimeHelper::H_Num;
371       switch (Op) {
372       default:
373         return;
374       case InstArithmetic::Udiv:
375         HelperID = RuntimeHelper::H_udiv_i64;
376         break;
377       case InstArithmetic::Sdiv:
378         HelperID = RuntimeHelper::H_sdiv_i64;
379         break;
380       case InstArithmetic::Urem:
381         HelperID = RuntimeHelper::H_urem_i64;
382         break;
383       case InstArithmetic::Srem:
384         HelperID = RuntimeHelper::H_srem_i64;
385         break;
386       }
387 
388       if (HelperID == RuntimeHelper::H_Num) {
389         return;
390       }
391 
392       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(HelperID);
393       constexpr SizeT MaxArgs = 2;
394       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
395                                             NoTailCall, IsTargetHelperCall);
396       Call->addArg(Instr->getSrc(0));
397       Call->addArg(Instr->getSrc(1));
398       Instr->setDeleted();
399       return;
400     }
401     case IceType_f32:
402     case IceType_f64: {
403       if (Op != InstArithmetic::Frem) {
404         return;
405       }
406       constexpr SizeT MaxArgs = 2;
407       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(
408           DestTy == IceType_f32 ? RuntimeHelper::H_frem_f32
409                                 : RuntimeHelper::H_frem_f64);
410       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
411                                             NoTailCall, IsTargetHelperCall);
412       Call->addArg(Instr->getSrc(0));
413       Call->addArg(Instr->getSrc(1));
414       Instr->setDeleted();
415       return;
416     }
417     }
418     llvm::report_fatal_error("Control flow should never have reached here.");
419   }
420   case Inst::Cast: {
421     Operand *Src0 = Instr->getSrc(0);
422     const Type SrcTy = Src0->getType();
423     auto *CastInstr = llvm::cast<InstCast>(Instr);
424     const InstCast::OpKind CastKind = CastInstr->getCastKind();
425 
426     if (isVectorType(DestTy)) {
427       Variable *T = Func->makeVariable(DestTy);
428       auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
429       VarVecOn32->initVecElement(Func);
430       auto *Undef = ConstantUndef::create(Ctx, DestTy);
431       Context.insert<InstAssign>(T, Undef);
432       for (SizeT I = 0; I < typeNumElements(DestTy); ++I) {
433         auto *Index = Ctx->getConstantInt32(I);
434         auto *Op = Func->makeVariable(typeElementType(SrcTy));
435         Context.insert<InstExtractElement>(Op, Src0, Index);
436         auto *Dst = Func->makeVariable(typeElementType(DestTy));
437         Variable *DestT = Func->makeVariable(DestTy);
438         Context.insert<InstCast>(CastKind, Dst, Op);
439         Context.insert<InstInsertElement>(DestT, T, Dst, Index);
440         T = DestT;
441       }
442       Context.insert<InstAssign>(Dest, T);
443       Instr->setDeleted();
444       return;
445     }
446 
447     switch (CastKind) {
448     default:
449       return;
450     case InstCast::Fptosi:
451     case InstCast::Fptoui: {
452       if ((DestTy != IceType_i32) && (DestTy != IceType_i64)) {
453         return;
454       }
455       const bool DestIs32 = DestTy == IceType_i32;
456       const bool DestIsSigned = CastKind == InstCast::Fptosi;
457       const bool Src0IsF32 = isFloat32Asserting32Or64(SrcTy);
458       RuntimeHelper RTHFunc = RuntimeHelper::H_Num;
459       if (DestIsSigned) {
460         if (DestIs32) {
461           return;
462         }
463         RTHFunc = Src0IsF32 ? RuntimeHelper::H_fptosi_f32_i64
464                             : RuntimeHelper::H_fptosi_f64_i64;
465       } else {
466         RTHFunc = Src0IsF32 ? (DestIs32 ? RuntimeHelper::H_fptoui_f32_i32
467                                         : RuntimeHelper::H_fptoui_f32_i64)
468                             : (DestIs32 ? RuntimeHelper::H_fptoui_f64_i32
469                                         : RuntimeHelper::H_fptoui_f64_i64);
470       }
471       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(RTHFunc);
472       static constexpr SizeT MaxArgs = 1;
473       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
474                                             NoTailCall, IsTargetHelperCall);
475       Call->addArg(Src0);
476       Instr->setDeleted();
477       return;
478     }
479     case InstCast::Sitofp:
480     case InstCast::Uitofp: {
481       if ((SrcTy != IceType_i32) && (SrcTy != IceType_i64)) {
482         return;
483       }
484       const bool SourceIs32 = SrcTy == IceType_i32;
485       const bool SourceIsSigned = CastKind == InstCast::Sitofp;
486       const bool DestIsF32 = isFloat32Asserting32Or64(DestTy);
487       RuntimeHelper RTHFunc = RuntimeHelper::H_Num;
488       if (SourceIsSigned) {
489         if (SourceIs32) {
490           return;
491         }
492         RTHFunc = DestIsF32 ? RuntimeHelper::H_sitofp_i64_f32
493                             : RuntimeHelper::H_sitofp_i64_f64;
494       } else {
495         RTHFunc = DestIsF32 ? (SourceIs32 ? RuntimeHelper::H_uitofp_i32_f32
496                                           : RuntimeHelper::H_uitofp_i64_f32)
497                             : (SourceIs32 ? RuntimeHelper::H_uitofp_i32_f64
498                                           : RuntimeHelper::H_uitofp_i64_f64);
499       }
500       Operand *TargetHelper = Ctx->getRuntimeHelperFunc(RTHFunc);
501       static constexpr SizeT MaxArgs = 1;
502       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
503                                             NoTailCall, IsTargetHelperCall);
504       Call->addArg(Src0);
505       Instr->setDeleted();
506       return;
507     }
508     case InstCast::Bitcast: {
509       if (DestTy == SrcTy) {
510         return;
511       }
512       Variable *CallDest = Dest;
513       RuntimeHelper HelperID = RuntimeHelper::H_Num;
514       switch (DestTy) {
515       default:
516         return;
517       case IceType_i8:
518         assert(SrcTy == IceType_v8i1);
519         HelperID = RuntimeHelper::H_bitcast_8xi1_i8;
520         CallDest = Func->makeVariable(IceType_i32);
521         break;
522       case IceType_i16:
523         assert(SrcTy == IceType_v16i1);
524         HelperID = RuntimeHelper::H_bitcast_16xi1_i16;
525         CallDest = Func->makeVariable(IceType_i32);
526         break;
527       case IceType_v8i1: {
528         assert(SrcTy == IceType_i8);
529         HelperID = RuntimeHelper::H_bitcast_i8_8xi1;
530         Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
531         // Arguments to functions are required to be at least 32 bits wide.
532         Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
533         Src0 = Src0AsI32;
534       } break;
535       case IceType_v16i1: {
536         assert(SrcTy == IceType_i16);
537         HelperID = RuntimeHelper::H_bitcast_i16_16xi1;
538         Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
539         // Arguments to functions are required to be at least 32 bits wide.
540         Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
541         Src0 = Src0AsI32;
542       } break;
543       }
544       constexpr SizeT MaxSrcs = 1;
545       InstCall *Call = makeHelperCall(HelperID, CallDest, MaxSrcs);
546       Call->addArg(Src0);
547       Context.insert(Call);
548       // The PNaCl ABI disallows i8/i16 return types, so truncate the helper
549       // call result to the appropriate type as necessary.
550       if (CallDest->getType() != DestTy)
551         Context.insert<InstCast>(InstCast::Trunc, Dest, CallDest);
552       Instr->setDeleted();
553       return;
554     }
555     case InstCast::Trunc: {
556       if (DestTy == SrcTy) {
557         return;
558       }
559       if (!isVectorType(SrcTy)) {
560         return;
561       }
562       assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
563       assert(typeElementType(DestTy) == IceType_i1);
564       assert(isVectorIntegerType(SrcTy));
565       return;
566     }
567     case InstCast::Sext:
568     case InstCast::Zext: {
569       if (DestTy == SrcTy) {
570         return;
571       }
572       if (!isVectorType(DestTy)) {
573         return;
574       }
575       assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
576       assert(typeElementType(SrcTy) == IceType_i1);
577       assert(isVectorIntegerType(DestTy));
578       return;
579     }
580     }
581     llvm::report_fatal_error("Control flow should never have reached here.");
582   }
583   case Inst::IntrinsicCall: {
584     auto *IntrinsicCall = llvm::cast<InstIntrinsicCall>(Instr);
585     Intrinsics::IntrinsicID ID = IntrinsicCall->getIntrinsicInfo().ID;
586     if (isVectorType(DestTy) && ID == Intrinsics::Fabs) {
587       Operand *Src0 = IntrinsicCall->getArg(0);
588       GlobalString FabsFloat = Ctx->getGlobalString("llvm.fabs.f32");
589       Operand *CallTarget = Ctx->getConstantExternSym(FabsFloat);
590       GlobalString FabsVec = Ctx->getGlobalString("llvm.fabs.v4f32");
591       bool BadIntrinsic = false;
592       const Intrinsics::FullIntrinsicInfo *FullInfo =
593           Ctx->getIntrinsicsInfo().find(FabsVec, BadIntrinsic);
594       Intrinsics::IntrinsicInfo Info = FullInfo->Info;
595 
596       Variable *T = Func->makeVariable(IceType_v4f32);
597       auto *Undef = ConstantUndef::create(Ctx, IceType_v4f32);
598       Context.insert<InstAssign>(T, Undef);
599       auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
600       VarVecOn32->initVecElement(Func);
601 
602       for (SizeT i = 0; i < typeNumElements(IceType_v4f32); ++i) {
603         auto *Index = Ctx->getConstantInt32(i);
604         auto *Op = Func->makeVariable(IceType_f32);
605         Context.insert<InstExtractElement>(Op, Src0, Index);
606         auto *Res = Func->makeVariable(IceType_f32);
607         Variable *DestT = Func->makeVariable(IceType_v4f32);
608         auto *Call =
609             Context.insert<InstIntrinsicCall>(1, Res, CallTarget, Info);
610         Call->addArg(Op);
611         Context.insert<InstInsertElement>(DestT, T, Res, Index);
612         T = DestT;
613       }
614 
615       Context.insert<InstAssign>(Dest, T);
616 
617       Instr->setDeleted();
618       return;
619     }
620     switch (ID) {
621     default:
622       return;
623     case Intrinsics::AtomicLoad: {
624       if (DestTy != IceType_i64)
625         return;
626       if (!Intrinsics::isMemoryOrderValid(
627               ID, getConstantMemoryOrder(IntrinsicCall->getArg(1)))) {
628         Func->setError("Unexpected memory ordering for AtomicLoad");
629         return;
630       }
631       Operand *Addr = IntrinsicCall->getArg(0);
632       Operand *TargetHelper = Ctx->getConstantExternSym(
633           Ctx->getGlobalString("__sync_val_compare_and_swap_8"));
634       static constexpr SizeT MaxArgs = 3;
635       auto *_0 = Ctx->getConstantZero(IceType_i64);
636       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
637                                             NoTailCall, IsTargetHelperCall);
638       Call->addArg(Addr);
639       Call->addArg(_0);
640       Call->addArg(_0);
641       Context.insert<InstMIPS32Sync>();
642       Instr->setDeleted();
643       return;
644     }
645     case Intrinsics::AtomicStore: {
646       Operand *Val = IntrinsicCall->getArg(0);
647       if (Val->getType() != IceType_i64)
648         return;
649       if (!Intrinsics::isMemoryOrderValid(
650               ID, getConstantMemoryOrder(IntrinsicCall->getArg(2)))) {
651         Func->setError("Unexpected memory ordering for AtomicStore");
652         return;
653       }
654       Operand *Addr = IntrinsicCall->getArg(1);
655       Variable *NoDest = nullptr;
656       Operand *TargetHelper = Ctx->getConstantExternSym(
657           Ctx->getGlobalString("__sync_lock_test_and_set_8"));
658       Context.insert<InstMIPS32Sync>();
659       static constexpr SizeT MaxArgs = 2;
660       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
661                                             NoTailCall, IsTargetHelperCall);
662       Call->addArg(Addr);
663       Call->addArg(Val);
664       Context.insert<InstMIPS32Sync>();
665       Instr->setDeleted();
666       return;
667     }
668     case Intrinsics::AtomicCmpxchg: {
669       if (DestTy != IceType_i64)
670         return;
671       if (!Intrinsics::isMemoryOrderValid(
672               ID, getConstantMemoryOrder(IntrinsicCall->getArg(3)),
673               getConstantMemoryOrder(IntrinsicCall->getArg(4)))) {
674         Func->setError("Unexpected memory ordering for AtomicCmpxchg");
675         return;
676       }
677       Operand *Addr = IntrinsicCall->getArg(0);
678       Operand *Oldval = IntrinsicCall->getArg(1);
679       Operand *Newval = IntrinsicCall->getArg(2);
680       Operand *TargetHelper = Ctx->getConstantExternSym(
681           Ctx->getGlobalString("__sync_val_compare_and_swap_8"));
682       Context.insert<InstMIPS32Sync>();
683       static constexpr SizeT MaxArgs = 3;
684       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
685                                             NoTailCall, IsTargetHelperCall);
686       Call->addArg(Addr);
687       Call->addArg(Oldval);
688       Call->addArg(Newval);
689       Context.insert<InstMIPS32Sync>();
690       Instr->setDeleted();
691       return;
692     }
693     case Intrinsics::AtomicRMW: {
694       if (DestTy != IceType_i64)
695         return;
696       if (!Intrinsics::isMemoryOrderValid(
697               ID, getConstantMemoryOrder(IntrinsicCall->getArg(3)))) {
698         Func->setError("Unexpected memory ordering for AtomicRMW");
699         return;
700       }
701       auto Operation = static_cast<Intrinsics::AtomicRMWOperation>(
702           llvm::cast<ConstantInteger32>(IntrinsicCall->getArg(0))->getValue());
703       auto *Addr = IntrinsicCall->getArg(1);
704       auto *Newval = IntrinsicCall->getArg(2);
705       Operand *TargetHelper;
706       switch (Operation) {
707       case Intrinsics::AtomicAdd:
708         TargetHelper = Ctx->getConstantExternSym(
709             Ctx->getGlobalString("__sync_fetch_and_add_8"));
710         break;
711       case Intrinsics::AtomicSub:
712         TargetHelper = Ctx->getConstantExternSym(
713             Ctx->getGlobalString("__sync_fetch_and_sub_8"));
714         break;
715       case Intrinsics::AtomicOr:
716         TargetHelper = Ctx->getConstantExternSym(
717             Ctx->getGlobalString("__sync_fetch_and_or_8"));
718         break;
719       case Intrinsics::AtomicAnd:
720         TargetHelper = Ctx->getConstantExternSym(
721             Ctx->getGlobalString("__sync_fetch_and_and_8"));
722         break;
723       case Intrinsics::AtomicXor:
724         TargetHelper = Ctx->getConstantExternSym(
725             Ctx->getGlobalString("__sync_fetch_and_xor_8"));
726         break;
727       case Intrinsics::AtomicExchange:
728         TargetHelper = Ctx->getConstantExternSym(
729             Ctx->getGlobalString("__sync_lock_test_and_set_8"));
730         break;
731       default:
732         llvm::report_fatal_error("Unknown AtomicRMW operation");
733         return;
734       }
735       Context.insert<InstMIPS32Sync>();
736       static constexpr SizeT MaxArgs = 2;
737       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
738                                             NoTailCall, IsTargetHelperCall);
739       Call->addArg(Addr);
740       Call->addArg(Newval);
741       Context.insert<InstMIPS32Sync>();
742       Instr->setDeleted();
743       return;
744     }
745     case Intrinsics::Ctpop: {
746       Operand *Src0 = IntrinsicCall->getArg(0);
747       Operand *TargetHelper =
748           Ctx->getRuntimeHelperFunc(isInt32Asserting32Or64(Src0->getType())
749                                         ? RuntimeHelper::H_call_ctpop_i32
750                                         : RuntimeHelper::H_call_ctpop_i64);
751       static constexpr SizeT MaxArgs = 1;
752       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
753                                             NoTailCall, IsTargetHelperCall);
754       Call->addArg(Src0);
755       Instr->setDeleted();
756       return;
757     }
758     case Intrinsics::Longjmp: {
759       static constexpr SizeT MaxArgs = 2;
760       static constexpr Variable *NoDest = nullptr;
761       Operand *TargetHelper =
762           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_longjmp);
763       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
764                                             NoTailCall, IsTargetHelperCall);
765       Call->addArg(IntrinsicCall->getArg(0));
766       Call->addArg(IntrinsicCall->getArg(1));
767       Instr->setDeleted();
768       return;
769     }
770     case Intrinsics::Memcpy: {
771       static constexpr SizeT MaxArgs = 3;
772       static constexpr Variable *NoDest = nullptr;
773       Operand *TargetHelper =
774           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memcpy);
775       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
776                                             NoTailCall, IsTargetHelperCall);
777       Call->addArg(IntrinsicCall->getArg(0));
778       Call->addArg(IntrinsicCall->getArg(1));
779       Call->addArg(IntrinsicCall->getArg(2));
780       Instr->setDeleted();
781       return;
782     }
783     case Intrinsics::Memmove: {
784       static constexpr SizeT MaxArgs = 3;
785       static constexpr Variable *NoDest = nullptr;
786       Operand *TargetHelper =
787           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memmove);
788       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
789                                             NoTailCall, IsTargetHelperCall);
790       Call->addArg(IntrinsicCall->getArg(0));
791       Call->addArg(IntrinsicCall->getArg(1));
792       Call->addArg(IntrinsicCall->getArg(2));
793       Instr->setDeleted();
794       return;
795     }
796     case Intrinsics::Memset: {
797       Operand *ValOp = IntrinsicCall->getArg(1);
798       assert(ValOp->getType() == IceType_i8);
799       Variable *ValExt = Func->makeVariable(stackSlotType());
800       Context.insert<InstCast>(InstCast::Zext, ValExt, ValOp);
801 
802       static constexpr SizeT MaxArgs = 3;
803       static constexpr Variable *NoDest = nullptr;
804       Operand *TargetHelper =
805           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memset);
806       auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
807                                             NoTailCall, IsTargetHelperCall);
808       Call->addArg(IntrinsicCall->getArg(0));
809       Call->addArg(ValExt);
810       Call->addArg(IntrinsicCall->getArg(2));
811       Instr->setDeleted();
812       return;
813     }
814     case Intrinsics::NaClReadTP: {
815       if (SandboxingType == ST_NaCl) {
816         return;
817       }
818       static constexpr SizeT MaxArgs = 0;
819       assert(SandboxingType != ST_Nonsfi);
820       Operand *TargetHelper =
821           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_read_tp);
822       Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, NoTailCall,
823                                IsTargetHelperCall);
824       Instr->setDeleted();
825       return;
826     }
827     case Intrinsics::Setjmp: {
828       static constexpr SizeT MaxArgs = 1;
829       Operand *TargetHelper =
830           Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_setjmp);
831       auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
832                                             NoTailCall, IsTargetHelperCall);
833       Call->addArg(IntrinsicCall->getArg(0));
834       Instr->setDeleted();
835       return;
836     }
837     }
838     llvm::report_fatal_error("Control flow should never have reached here.");
839   }
840   }
841 }
842 
findMaxStackOutArgsSize()843 void TargetMIPS32::findMaxStackOutArgsSize() {
844   // MinNeededOutArgsBytes should be updated if the Target ever creates a
845   // high-level InstCall that requires more stack bytes.
846   size_t MinNeededOutArgsBytes = 0;
847   if (!MaybeLeafFunc)
848     MinNeededOutArgsBytes = MIPS32_MAX_GPR_ARG * 4;
849   MaxOutArgsSizeBytes = MinNeededOutArgsBytes;
850   for (CfgNode *Node : Func->getNodes()) {
851     Context.init(Node);
852     while (!Context.atEnd()) {
853       PostIncrLoweringContext PostIncrement(Context);
854       Inst *CurInstr = iteratorToInst(Context.getCur());
855       if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) {
856         SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call);
857         MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes);
858       }
859     }
860   }
861   CurrentAllocaOffset = MaxOutArgsSizeBytes;
862 }
863 
translateO2()864 void TargetMIPS32::translateO2() {
865   TimerMarker T(TimerStack::TT_O2, Func);
866 
867   // TODO(stichnot): share passes with X86?
868   // https://code.google.com/p/nativeclient/issues/detail?id=4094
869   genTargetHelperCalls();
870 
871   unsetIfNonLeafFunc();
872 
873   findMaxStackOutArgsSize();
874 
875   // Merge Alloca instructions, and lay out the stack.
876   static constexpr bool SortAndCombineAllocas = true;
877   Func->processAllocas(SortAndCombineAllocas);
878   Func->dump("After Alloca processing");
879 
880   if (!getFlags().getEnablePhiEdgeSplit()) {
881     // Lower Phi instructions.
882     Func->placePhiLoads();
883     if (Func->hasError())
884       return;
885     Func->placePhiStores();
886     if (Func->hasError())
887       return;
888     Func->deletePhis();
889     if (Func->hasError())
890       return;
891     Func->dump("After Phi lowering");
892   }
893 
894   // Address mode optimization.
895   Func->getVMetadata()->init(VMK_SingleDefs);
896   Func->doAddressOpt();
897 
898   // Argument lowering
899   Func->doArgLowering();
900 
901   // Target lowering. This requires liveness analysis for some parts of the
902   // lowering decisions, such as compare/branch fusing. If non-lightweight
903   // liveness analysis is used, the instructions need to be renumbered first.
904   // TODO: This renumbering should only be necessary if we're actually
905   // calculating live intervals, which we only do for register allocation.
906   Func->renumberInstructions();
907   if (Func->hasError())
908     return;
909 
910   // TODO: It should be sufficient to use the fastest liveness calculation,
911   // i.e. livenessLightweight(). However, for some reason that slows down the
912   // rest of the translation. Investigate.
913   Func->liveness(Liveness_Basic);
914   if (Func->hasError())
915     return;
916   Func->dump("After MIPS32 address mode opt");
917 
918   Func->genCode();
919   if (Func->hasError())
920     return;
921   Func->dump("After MIPS32 codegen");
922 
923   // Register allocation. This requires instruction renumbering and full
924   // liveness analysis.
925   Func->renumberInstructions();
926   if (Func->hasError())
927     return;
928   Func->liveness(Liveness_Intervals);
929   if (Func->hasError())
930     return;
931   // The post-codegen dump is done here, after liveness analysis and associated
932   // cleanup, to make the dump cleaner and more useful.
933   Func->dump("After initial MIPS32 codegen");
934   // Validate the live range computations. The expensive validation call is
935   // deliberately only made when assertions are enabled.
936   assert(Func->validateLiveness());
937   Func->getVMetadata()->init(VMK_All);
938   regAlloc(RAK_Global);
939   if (Func->hasError())
940     return;
941   Func->dump("After linear scan regalloc");
942 
943   if (getFlags().getEnablePhiEdgeSplit()) {
944     Func->advancedPhiLowering();
945     Func->dump("After advanced Phi lowering");
946   }
947 
948   // Stack frame mapping.
949   Func->genFrame();
950   if (Func->hasError())
951     return;
952   Func->dump("After stack frame mapping");
953 
954   postLowerLegalization();
955   if (Func->hasError())
956     return;
957   Func->dump("After postLowerLegalization");
958 
959   Func->contractEmptyNodes();
960   Func->reorderNodes();
961 
962   // Branch optimization. This needs to be done just before code emission. In
963   // particular, no transformations that insert or reorder CfgNodes should be
964   // done after branch optimization. We go ahead and do it before nop insertion
965   // to reduce the amount of work needed for searching for opportunities.
966   Func->doBranchOpt();
967   Func->dump("After branch optimization");
968 
969   // Nop insertion
970   if (getFlags().getShouldDoNopInsertion()) {
971     Func->doNopInsertion();
972   }
973 }
974 
translateOm1()975 void TargetMIPS32::translateOm1() {
976   TimerMarker T(TimerStack::TT_Om1, Func);
977 
978   // TODO: share passes with X86?
979   genTargetHelperCalls();
980 
981   unsetIfNonLeafFunc();
982 
983   findMaxStackOutArgsSize();
984 
985   // Do not merge Alloca instructions, and lay out the stack.
986   static constexpr bool SortAndCombineAllocas = false;
987   Func->processAllocas(SortAndCombineAllocas);
988   Func->dump("After Alloca processing");
989 
990   Func->placePhiLoads();
991   if (Func->hasError())
992     return;
993   Func->placePhiStores();
994   if (Func->hasError())
995     return;
996   Func->deletePhis();
997   if (Func->hasError())
998     return;
999   Func->dump("After Phi lowering");
1000 
1001   Func->doArgLowering();
1002 
1003   Func->genCode();
1004   if (Func->hasError())
1005     return;
1006   Func->dump("After initial MIPS32 codegen");
1007 
1008   regAlloc(RAK_InfOnly);
1009   if (Func->hasError())
1010     return;
1011   Func->dump("After regalloc of infinite-weight variables");
1012 
1013   Func->genFrame();
1014   if (Func->hasError())
1015     return;
1016   Func->dump("After stack frame mapping");
1017 
1018   postLowerLegalization();
1019   if (Func->hasError())
1020     return;
1021   Func->dump("After postLowerLegalization");
1022 
1023   // Nop insertion
1024   if (getFlags().getShouldDoNopInsertion()) {
1025     Func->doNopInsertion();
1026   }
1027 }
1028 
doBranchOpt(Inst * Instr,const CfgNode * NextNode)1029 bool TargetMIPS32::doBranchOpt(Inst *Instr, const CfgNode *NextNode) {
1030   if (auto *Br = llvm::dyn_cast<InstMIPS32Br>(Instr)) {
1031     return Br->optimizeBranch(NextNode);
1032   }
1033   return false;
1034 }
1035 
1036 namespace {
1037 
1038 const char *RegNames[RegMIPS32::Reg_NUM] = {
1039 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt,    \
1040           isI64Pair, isFP32, isFP64, isVec128, alias_init)                     \
1041   name,
1042     REGMIPS32_TABLE
1043 #undef X
1044 };
1045 
1046 } // end of anonymous namespace
1047 
getRegName(RegNumT RegNum)1048 const char *RegMIPS32::getRegName(RegNumT RegNum) {
1049   RegNum.assertIsValid();
1050   return RegNames[RegNum];
1051 }
1052 
getRegName(RegNumT RegNum,Type Ty) const1053 const char *TargetMIPS32::getRegName(RegNumT RegNum, Type Ty) const {
1054   (void)Ty;
1055   return RegMIPS32::getRegName(RegNum);
1056 }
1057 
getPhysicalRegister(RegNumT RegNum,Type Ty)1058 Variable *TargetMIPS32::getPhysicalRegister(RegNumT RegNum, Type Ty) {
1059   if (Ty == IceType_void)
1060     Ty = IceType_i32;
1061   if (PhysicalRegisters[Ty].empty())
1062     PhysicalRegisters[Ty].resize(RegMIPS32::Reg_NUM);
1063   RegNum.assertIsValid();
1064   Variable *Reg = PhysicalRegisters[Ty][RegNum];
1065   if (Reg == nullptr) {
1066     Reg = Func->makeVariable(Ty);
1067     Reg->setRegNum(RegNum);
1068     PhysicalRegisters[Ty][RegNum] = Reg;
1069     // Specially mark a named physical register as an "argument" so that it is
1070     // considered live upon function entry.  Otherwise it's possible to get
1071     // liveness validation errors for saving callee-save registers.
1072     Func->addImplicitArg(Reg);
1073     // Don't bother tracking the live range of a named physical register.
1074     Reg->setIgnoreLiveness();
1075   }
1076   return Reg;
1077 }
1078 
emitJumpTable(const Cfg * Func,const InstJumpTable * JumpTable) const1079 void TargetMIPS32::emitJumpTable(const Cfg *Func,
1080                                  const InstJumpTable *JumpTable) const {
1081   (void)Func;
1082   (void)JumpTable;
1083   UnimplementedError(getFlags());
1084 }
1085 
1086 /// Provide a trivial wrapper to legalize() for this common usage.
legalizeToReg(Operand * From,RegNumT RegNum)1087 Variable *TargetMIPS32::legalizeToReg(Operand *From, RegNumT RegNum) {
1088   return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
1089 }
1090 
1091 /// Legalize undef values to concrete values.
legalizeUndef(Operand * From,RegNumT RegNum)1092 Operand *TargetMIPS32::legalizeUndef(Operand *From, RegNumT RegNum) {
1093   (void)RegNum;
1094   Type Ty = From->getType();
1095   if (llvm::isa<ConstantUndef>(From)) {
1096     // Lower undefs to zero.  Another option is to lower undefs to an
1097     // uninitialized register; however, using an uninitialized register
1098     // results in less predictable code.
1099     //
1100     // If in the future the implementation is changed to lower undef
1101     // values to uninitialized registers, a FakeDef will be needed:
1102     //     Context.insert(InstFakeDef::create(Func, Reg));
1103     // This is in order to ensure that the live range of Reg is not
1104     // overestimated.  If the constant being lowered is a 64 bit value,
1105     // then the result should be split and the lo and hi components will
1106     // need to go in uninitialized registers.
1107     if (isVectorType(Ty)) {
1108       Variable *Var = makeReg(Ty, RegNum);
1109       auto *Reg = llvm::cast<VariableVecOn32>(Var);
1110       Reg->initVecElement(Func);
1111       auto *Zero = getZero();
1112       for (Variable *Var : Reg->getContainers()) {
1113         _mov(Var, Zero);
1114       }
1115       return Reg;
1116     }
1117     return Ctx->getConstantZero(Ty);
1118   }
1119   return From;
1120 }
1121 
makeReg(Type Type,RegNumT RegNum)1122 Variable *TargetMIPS32::makeReg(Type Type, RegNumT RegNum) {
1123   // There aren't any 64-bit integer registers for Mips32.
1124   assert(Type != IceType_i64);
1125   Variable *Reg = Func->makeVariable(Type);
1126   if (RegNum.hasValue())
1127     Reg->setRegNum(RegNum);
1128   else
1129     Reg->setMustHaveReg();
1130   return Reg;
1131 }
1132 
formMemoryOperand(Operand * Operand,Type Ty)1133 OperandMIPS32Mem *TargetMIPS32::formMemoryOperand(Operand *Operand, Type Ty) {
1134   // It may be the case that address mode optimization already creates an
1135   // OperandMIPS32Mem, so in that case it wouldn't need another level of
1136   // transformation.
1137   if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
1138     return llvm::cast<OperandMIPS32Mem>(legalize(Mem));
1139   }
1140 
1141   // If we didn't do address mode optimization, then we only have a base/offset
1142   // to work with. MIPS always requires a base register, so just use that to
1143   // hold the operand.
1144   auto *Base = llvm::cast<Variable>(
1145       legalize(Operand, Legal_Reg | Legal_Rematerializable));
1146   const int32_t Offset = Base->hasStackOffset() ? Base->getStackOffset() : 0;
1147   return OperandMIPS32Mem::create(
1148       Func, Ty, Base,
1149       llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(Offset)));
1150 }
1151 
emitVariable(const Variable * Var) const1152 void TargetMIPS32::emitVariable(const Variable *Var) const {
1153   if (!BuildDefs::dump())
1154     return;
1155   Ostream &Str = Ctx->getStrEmit();
1156   const Type FrameSPTy = IceType_i32;
1157   if (Var->hasReg()) {
1158     Str << '$' << getRegName(Var->getRegNum(), Var->getType());
1159     return;
1160   }
1161   if (Var->mustHaveReg()) {
1162     llvm::report_fatal_error("Infinite-weight Variable (" + Var->getName() +
1163                              ") has no register assigned - function " +
1164                              Func->getFunctionName());
1165   }
1166   const int32_t Offset = Var->getStackOffset();
1167   Str << Offset;
1168   Str << "($" << getRegName(getFrameOrStackReg(), FrameSPTy);
1169   Str << ")";
1170 }
1171 
CallingConv()1172 TargetMIPS32::CallingConv::CallingConv()
1173     : GPRegsUsed(RegMIPS32::Reg_NUM),
1174       GPRArgs(GPRArgInitializer.rbegin(), GPRArgInitializer.rend()),
1175       I64Args(I64ArgInitializer.rbegin(), I64ArgInitializer.rend()),
1176       VFPRegsUsed(RegMIPS32::Reg_NUM),
1177       FP32Args(FP32ArgInitializer.rbegin(), FP32ArgInitializer.rend()),
1178       FP64Args(FP64ArgInitializer.rbegin(), FP64ArgInitializer.rend()) {}
1179 
1180 // In MIPS O32 abi FP argument registers can be used only if first argument is
1181 // of type float/double. UseFPRegs flag is used to care of that. Also FP arg
1182 // registers can be used only for first 2 arguments, so we require argument
1183 // number to make register allocation decisions.
argInReg(Type Ty,uint32_t ArgNo,RegNumT * Reg)1184 bool TargetMIPS32::CallingConv::argInReg(Type Ty, uint32_t ArgNo,
1185                                          RegNumT *Reg) {
1186   if (isScalarIntegerType(Ty) || isVectorType(Ty))
1187     return argInGPR(Ty, Reg);
1188   if (isScalarFloatingType(Ty)) {
1189     if (ArgNo == 0) {
1190       UseFPRegs = true;
1191       return argInVFP(Ty, Reg);
1192     }
1193     if (UseFPRegs && ArgNo == 1) {
1194       UseFPRegs = false;
1195       return argInVFP(Ty, Reg);
1196     }
1197     return argInGPR(Ty, Reg);
1198   }
1199   llvm::report_fatal_error("argInReg: Invalid type.");
1200   return false;
1201 }
1202 
argInGPR(Type Ty,RegNumT * Reg)1203 bool TargetMIPS32::CallingConv::argInGPR(Type Ty, RegNumT *Reg) {
1204   CfgVector<RegNumT> *Source;
1205 
1206   switch (Ty) {
1207   default: {
1208     llvm::report_fatal_error("argInGPR: Invalid type.");
1209     return false;
1210   } break;
1211   case IceType_v4i1:
1212   case IceType_v8i1:
1213   case IceType_v16i1:
1214   case IceType_v16i8:
1215   case IceType_v8i16:
1216   case IceType_v4i32:
1217   case IceType_v4f32:
1218   case IceType_i32:
1219   case IceType_f32: {
1220     Source = &GPRArgs;
1221   } break;
1222   case IceType_i64:
1223   case IceType_f64: {
1224     Source = &I64Args;
1225   } break;
1226   }
1227 
1228   discardUnavailableGPRsAndTheirAliases(Source);
1229 
1230   // If $4 is used for any scalar type (or returining v4f32) then the next
1231   // vector type if passed in $6:$7:stack:stack
1232   if (isVectorType(Ty)) {
1233     alignGPR(Source);
1234   }
1235 
1236   if (Source->empty()) {
1237     GPRegsUsed.set();
1238     return false;
1239   }
1240 
1241   *Reg = Source->back();
1242   // Note that we don't Source->pop_back() here. This is intentional. Notice how
1243   // we mark all of Reg's aliases as Used. So, for the next argument,
1244   // Source->back() is marked as unavailable, and it is thus implicitly popped
1245   // from the stack.
1246   GPRegsUsed |= RegisterAliases[*Reg];
1247 
1248   // All vector arguments irrespective of their base type are passed in GP
1249   // registers. First vector argument is passed in $4:$5:$6:$7 and 2nd
1250   // is passed in $6:$7:stack:stack. If it is 1st argument then discard
1251   // $4:$5:$6:$7 otherwise discard $6:$7 only.
1252   if (isVectorType(Ty)) {
1253     if (((unsigned)*Reg) == RegMIPS32::Reg_A0) {
1254       GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A1];
1255       GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A2];
1256       GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A3];
1257     } else {
1258       GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A3];
1259     }
1260   }
1261 
1262   return true;
1263 }
1264 
discardNextGPRAndItsAliases(CfgVector<RegNumT> * Regs)1265 inline void TargetMIPS32::CallingConv::discardNextGPRAndItsAliases(
1266     CfgVector<RegNumT> *Regs) {
1267   GPRegsUsed |= RegisterAliases[Regs->back()];
1268   Regs->pop_back();
1269 }
1270 
alignGPR(CfgVector<RegNumT> * Regs)1271 inline void TargetMIPS32::CallingConv::alignGPR(CfgVector<RegNumT> *Regs) {
1272   if (Regs->back() == RegMIPS32::Reg_A1 || Regs->back() == RegMIPS32::Reg_A3)
1273     discardNextGPRAndItsAliases(Regs);
1274 }
1275 
1276 // GPR are not packed when passing parameters. Thus, a function foo(i32, i64,
1277 // i32) will have the first argument in a0, the second in a2-a3, and the third
1278 // on the stack. To model this behavior, whenever we pop a register from Regs,
1279 // we remove all of its aliases from the pool of available GPRs. This has the
1280 // effect of computing the "closure" on the GPR registers.
discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> * Regs)1281 void TargetMIPS32::CallingConv::discardUnavailableGPRsAndTheirAliases(
1282     CfgVector<RegNumT> *Regs) {
1283   while (!Regs->empty() && GPRegsUsed[Regs->back()]) {
1284     discardNextGPRAndItsAliases(Regs);
1285   }
1286 }
1287 
argInVFP(Type Ty,RegNumT * Reg)1288 bool TargetMIPS32::CallingConv::argInVFP(Type Ty, RegNumT *Reg) {
1289   CfgVector<RegNumT> *Source;
1290 
1291   switch (Ty) {
1292   default: {
1293     llvm::report_fatal_error("argInVFP: Invalid type.");
1294     return false;
1295   } break;
1296   case IceType_f32: {
1297     Source = &FP32Args;
1298   } break;
1299   case IceType_f64: {
1300     Source = &FP64Args;
1301   } break;
1302   }
1303 
1304   discardUnavailableVFPRegsAndTheirAliases(Source);
1305 
1306   if (Source->empty()) {
1307     VFPRegsUsed.set();
1308     return false;
1309   }
1310 
1311   *Reg = Source->back();
1312   VFPRegsUsed |= RegisterAliases[*Reg];
1313 
1314   // In MIPS O32 abi if fun arguments are (f32, i32) then one can not use reg_a0
1315   // for second argument even though it's free. f32 arg goes in reg_f12, i32 arg
1316   // goes in reg_a1. Similarly if arguments are (f64, i32) second argument goes
1317   // in reg_a3 and a0, a1 are not used.
1318   Source = &GPRArgs;
1319   // Discard one GPR reg for f32(4 bytes), two for f64(4 + 4 bytes)
1320   if (Ty == IceType_f64) {
1321     // In MIPS o32 abi, when we use GPR argument pairs to store F64 values, pair
1322     // must be aligned at even register. Similarly when we discard GPR registers
1323     // when some arguments from starting 16 bytes goes in FPR, we must take care
1324     // of alignment. For example if fun args are (f32, f64, f32), for first f32
1325     // we discard a0, now for f64 argument, which will go in F14F15, we must
1326     // first align GPR vector to even register by discarding a1, then discard
1327     // two GPRs a2 and a3. Now last f32 argument will go on stack.
1328     alignGPR(Source);
1329     discardNextGPRAndItsAliases(Source);
1330   }
1331   discardNextGPRAndItsAliases(Source);
1332   return true;
1333 }
1334 
discardUnavailableVFPRegsAndTheirAliases(CfgVector<RegNumT> * Regs)1335 void TargetMIPS32::CallingConv::discardUnavailableVFPRegsAndTheirAliases(
1336     CfgVector<RegNumT> *Regs) {
1337   while (!Regs->empty() && VFPRegsUsed[Regs->back()]) {
1338     Regs->pop_back();
1339   }
1340 }
1341 
lowerArguments()1342 void TargetMIPS32::lowerArguments() {
1343   VarList &Args = Func->getArgs();
1344   TargetMIPS32::CallingConv CC;
1345 
1346   // For each register argument, replace Arg in the argument list with the home
1347   // register. Then generate an instruction in the prolog to copy the home
1348   // register to the assigned location of Arg.
1349   Context.init(Func->getEntryNode());
1350   Context.setInsertPoint(Context.getCur());
1351 
1352   // v4f32 is returned through stack. $4 is setup by the caller and passed as
1353   // first argument implicitly. Callee then copies the return vector at $4.
1354   Variable *ImplicitRetVec = nullptr;
1355   if (isVectorFloatingType(Func->getReturnType())) {
1356     ImplicitRetVec = Func->makeVariable(IceType_i32);
1357     ImplicitRetVec->setName(Func, "ImplicitRet_v4f32");
1358     ImplicitRetVec->setIsArg();
1359     Args.insert(Args.begin(), ImplicitRetVec);
1360     setImplicitRet(ImplicitRetVec);
1361   }
1362 
1363   for (SizeT i = 0, E = Args.size(); i < E; ++i) {
1364     Variable *Arg = Args[i];
1365     Type Ty = Arg->getType();
1366     RegNumT RegNum;
1367     if (!CC.argInReg(Ty, i, &RegNum)) {
1368       continue;
1369     }
1370     Variable *RegisterArg = Func->makeVariable(Ty);
1371     if (BuildDefs::dump()) {
1372       RegisterArg->setName(Func, "home_reg:" + Arg->getName());
1373     }
1374     RegisterArg->setIsArg();
1375     Arg->setIsArg(false);
1376     Args[i] = RegisterArg;
1377 
1378     if (isVectorType(Ty)) {
1379       auto *RegisterArgVec = llvm::cast<VariableVecOn32>(RegisterArg);
1380       RegisterArgVec->initVecElement(Func);
1381       RegisterArgVec->getContainers()[0]->setRegNum(
1382           RegNumT::fixme((unsigned)RegNum + 0));
1383       RegisterArgVec->getContainers()[1]->setRegNum(
1384           RegNumT::fixme((unsigned)RegNum + 1));
1385       // First two elements of second vector argument are passed
1386       // in $6:$7 and remaining two on stack. Do not assign register
1387       // to this is second vector argument.
1388       if (i == 0) {
1389         RegisterArgVec->getContainers()[2]->setRegNum(
1390             RegNumT::fixme((unsigned)RegNum + 2));
1391         RegisterArgVec->getContainers()[3]->setRegNum(
1392             RegNumT::fixme((unsigned)RegNum + 3));
1393       } else {
1394         RegisterArgVec->getContainers()[2]->setRegNum(
1395             RegNumT::fixme(RegNumT()));
1396         RegisterArgVec->getContainers()[3]->setRegNum(
1397             RegNumT::fixme(RegNumT()));
1398       }
1399     } else {
1400       switch (Ty) {
1401       default: {
1402         RegisterArg->setRegNum(RegNum);
1403       } break;
1404       case IceType_i64: {
1405         auto *RegisterArg64 = llvm::cast<Variable64On32>(RegisterArg);
1406         RegisterArg64->initHiLo(Func);
1407         RegisterArg64->getLo()->setRegNum(
1408             RegNumT::fixme(RegMIPS32::get64PairFirstRegNum(RegNum)));
1409         RegisterArg64->getHi()->setRegNum(
1410             RegNumT::fixme(RegMIPS32::get64PairSecondRegNum(RegNum)));
1411       } break;
1412       }
1413     }
1414     Context.insert<InstAssign>(Arg, RegisterArg);
1415   }
1416 
1417   // Insert fake use of ImplicitRet_v4f32 to keep it live
1418   if (ImplicitRetVec) {
1419     for (CfgNode *Node : Func->getNodes()) {
1420       for (Inst &Instr : Node->getInsts()) {
1421         if (llvm::isa<InstRet>(&Instr)) {
1422           Context.setInsertPoint(instToIterator(&Instr));
1423           Context.insert<InstFakeUse>(ImplicitRetVec);
1424           break;
1425         }
1426       }
1427     }
1428   }
1429 }
1430 
stackSlotType()1431 Type TargetMIPS32::stackSlotType() { return IceType_i32; }
1432 
1433 // Helper function for addProlog().
1434 //
1435 // This assumes Arg is an argument passed on the stack. This sets the frame
1436 // offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
1437 // I64 arg that has been split into Lo and Hi components, it calls itself
1438 // recursively on the components, taking care to handle Lo first because of the
1439 // little-endian architecture. Lastly, this function generates an instruction
1440 // to copy Arg into its assigned register if applicable.
finishArgumentLowering(Variable * Arg,bool PartialOnStack,Variable * FramePtr,size_t BasicFrameOffset,size_t * InArgsSizeBytes)1441 void TargetMIPS32::finishArgumentLowering(Variable *Arg, bool PartialOnStack,
1442                                           Variable *FramePtr,
1443                                           size_t BasicFrameOffset,
1444                                           size_t *InArgsSizeBytes) {
1445   const Type Ty = Arg->getType();
1446   *InArgsSizeBytes = applyStackAlignmentTy(*InArgsSizeBytes, Ty);
1447 
1448   // If $4 is used for any scalar type (or returining v4f32) then the next
1449   // vector type if passed in $6:$7:stack:stack. Load 3nd and 4th element
1450   // from agument stack.
1451   if (auto *ArgVecOn32 = llvm::dyn_cast<VariableVecOn32>(Arg)) {
1452     if (PartialOnStack == false) {
1453       auto *Elem0 = ArgVecOn32->getContainers()[0];
1454       auto *Elem1 = ArgVecOn32->getContainers()[1];
1455       finishArgumentLowering(Elem0, PartialOnStack, FramePtr, BasicFrameOffset,
1456                              InArgsSizeBytes);
1457       finishArgumentLowering(Elem1, PartialOnStack, FramePtr, BasicFrameOffset,
1458                              InArgsSizeBytes);
1459     }
1460     auto *Elem2 = ArgVecOn32->getContainers()[2];
1461     auto *Elem3 = ArgVecOn32->getContainers()[3];
1462     finishArgumentLowering(Elem2, PartialOnStack, FramePtr, BasicFrameOffset,
1463                            InArgsSizeBytes);
1464     finishArgumentLowering(Elem3, PartialOnStack, FramePtr, BasicFrameOffset,
1465                            InArgsSizeBytes);
1466     return;
1467   }
1468 
1469   if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) {
1470     Variable *const Lo = Arg64On32->getLo();
1471     Variable *const Hi = Arg64On32->getHi();
1472     finishArgumentLowering(Lo, PartialOnStack, FramePtr, BasicFrameOffset,
1473                            InArgsSizeBytes);
1474     finishArgumentLowering(Hi, PartialOnStack, FramePtr, BasicFrameOffset,
1475                            InArgsSizeBytes);
1476     return;
1477   }
1478 
1479   assert(Ty != IceType_i64);
1480   assert(!isVectorType(Ty));
1481 
1482   const int32_t ArgStackOffset = BasicFrameOffset + *InArgsSizeBytes;
1483   *InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
1484 
1485   if (!Arg->hasReg()) {
1486     Arg->setStackOffset(ArgStackOffset);
1487     return;
1488   }
1489 
1490   // If the argument variable has been assigned a register, we need to copy the
1491   // value from the stack slot.
1492   Variable *Parameter = Func->makeVariable(Ty);
1493   Parameter->setMustNotHaveReg();
1494   Parameter->setStackOffset(ArgStackOffset);
1495   _mov(Arg, Parameter);
1496 }
1497 
addProlog(CfgNode * Node)1498 void TargetMIPS32::addProlog(CfgNode *Node) {
1499   // Stack frame layout:
1500   //
1501   // +------------------------+
1502   // | 1. preserved registers |
1503   // +------------------------+
1504   // | 2. padding             |
1505   // +------------------------+
1506   // | 3. global spill area   |
1507   // +------------------------+
1508   // | 4. padding             |
1509   // +------------------------+
1510   // | 5. local spill area    |
1511   // +------------------------+
1512   // | 6. padding             |
1513   // +------------------------+
1514   // | 7. allocas             |
1515   // +------------------------+
1516   // | 8. padding             |
1517   // +------------------------+
1518   // | 9. out args            |
1519   // +------------------------+ <--- StackPointer
1520   //
1521   // The following variables record the size in bytes of the given areas:
1522   //  * PreservedRegsSizeBytes: area 1
1523   //  * SpillAreaPaddingBytes:  area 2
1524   //  * GlobalsSize:            area 3
1525   //  * GlobalsAndSubsequentPaddingSize: areas 3 - 4
1526   //  * LocalsSpillAreaSize:    area 5
1527   //  * SpillAreaSizeBytes:     areas 2 - 9
1528   //  * maxOutArgsSizeBytes():  area 9
1529 
1530   Context.init(Node);
1531   Context.setInsertPoint(Context.getCur());
1532 
1533   SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None);
1534   RegsUsed = SmallBitVector(CalleeSaves.size());
1535 
1536   VarList SortedSpilledVariables;
1537 
1538   size_t GlobalsSize = 0;
1539   // If there is a separate locals area, this represents that area. Otherwise
1540   // it counts any variable not counted by GlobalsSize.
1541   SpillAreaSizeBytes = 0;
1542   // If there is a separate locals area, this specifies the alignment for it.
1543   uint32_t LocalsSlotsAlignmentBytes = 0;
1544   // The entire spill locations area gets aligned to largest natural alignment
1545   // of the variables that have a spill slot.
1546   uint32_t SpillAreaAlignmentBytes = 0;
1547   // For now, we don't have target-specific variables that need special
1548   // treatment (no stack-slot-linked SpillVariable type).
1549   std::function<bool(Variable *)> TargetVarHook = [](Variable *Var) {
1550     static constexpr bool AssignStackSlot = false;
1551     static constexpr bool DontAssignStackSlot = !AssignStackSlot;
1552     if (llvm::isa<Variable64On32>(Var)) {
1553       return DontAssignStackSlot;
1554     }
1555     return AssignStackSlot;
1556   };
1557 
1558   // Compute the list of spilled variables and bounds for GlobalsSize, etc.
1559   getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
1560                         &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
1561                         &LocalsSlotsAlignmentBytes, TargetVarHook);
1562   uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
1563   SpillAreaSizeBytes += GlobalsSize;
1564 
1565   PreservedGPRs.reserve(CalleeSaves.size());
1566 
1567   // Consider FP and RA as callee-save / used as needed.
1568   if (UsesFramePointer) {
1569     if (RegsUsed[RegMIPS32::Reg_FP]) {
1570       llvm::report_fatal_error("Frame pointer has been used.");
1571     }
1572     CalleeSaves[RegMIPS32::Reg_FP] = true;
1573     RegsUsed[RegMIPS32::Reg_FP] = true;
1574   }
1575   if (!MaybeLeafFunc) {
1576     CalleeSaves[RegMIPS32::Reg_RA] = true;
1577     RegsUsed[RegMIPS32::Reg_RA] = true;
1578   }
1579 
1580   // Make two passes over the used registers. The first pass records all the
1581   // used registers -- and their aliases. Then, we figure out which GPR
1582   // registers should be saved.
1583   SmallBitVector ToPreserve(RegMIPS32::Reg_NUM);
1584   for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
1585     if (CalleeSaves[i] && RegsUsed[i]) {
1586       ToPreserve |= RegisterAliases[i];
1587     }
1588   }
1589 
1590   uint32_t NumCallee = 0;
1591 
1592   // RegClasses is a tuple of
1593   //
1594   // <First Register in Class, Last Register in Class, Vector of Save Registers>
1595   //
1596   // We use this tuple to figure out which register we should save/restore
1597   // during
1598   // prolog/epilog.
1599   using RegClassType = std::tuple<uint32_t, uint32_t, VarList *>;
1600   const RegClassType RegClass = RegClassType(
1601       RegMIPS32::Reg_GPR_First, RegMIPS32::Reg_FPR_Last, &PreservedGPRs);
1602   const uint32_t FirstRegInClass = std::get<0>(RegClass);
1603   const uint32_t LastRegInClass = std::get<1>(RegClass);
1604   VarList *const PreservedRegsInClass = std::get<2>(RegClass);
1605   for (uint32_t Reg = LastRegInClass; Reg > FirstRegInClass; Reg--) {
1606     if (!ToPreserve[Reg]) {
1607       continue;
1608     }
1609     ++NumCallee;
1610     Variable *PhysicalRegister = getPhysicalRegister(RegNumT::fromInt(Reg));
1611     PreservedRegsSizeBytes +=
1612         typeWidthInBytesOnStack(PhysicalRegister->getType());
1613     PreservedRegsInClass->push_back(PhysicalRegister);
1614   }
1615 
1616   Ctx->statsUpdateRegistersSaved(NumCallee);
1617 
1618   // Align the variables area. SpillAreaPaddingBytes is the size of the region
1619   // after the preserved registers and before the spill areas.
1620   // LocalsSlotsPaddingBytes is the amount of padding between the globals and
1621   // locals area if they are separate.
1622   assert(SpillAreaAlignmentBytes <= MIPS32_STACK_ALIGNMENT_BYTES);
1623   (void)MIPS32_STACK_ALIGNMENT_BYTES;
1624   assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
1625   uint32_t SpillAreaPaddingBytes = 0;
1626   uint32_t LocalsSlotsPaddingBytes = 0;
1627   alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
1628                        GlobalsSize, LocalsSlotsAlignmentBytes,
1629                        &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
1630   SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
1631   uint32_t GlobalsAndSubsequentPaddingSize =
1632       GlobalsSize + LocalsSlotsPaddingBytes;
1633 
1634   // Adds the out args space to the stack, and align SP if necessary.
1635   if (!NeedsStackAlignment) {
1636     SpillAreaSizeBytes += MaxOutArgsSizeBytes * (VariableAllocaUsed ? 0 : 1);
1637   } else {
1638     SpillAreaSizeBytes = applyStackAlignment(
1639         SpillAreaSizeBytes +
1640         (VariableAllocaUsed ? VariableAllocaAlignBytes : MaxOutArgsSizeBytes));
1641   }
1642 
1643   // Combine fixed alloca with SpillAreaSize.
1644   SpillAreaSizeBytes += FixedAllocaSizeBytes;
1645 
1646   TotalStackSizeBytes =
1647       applyStackAlignment(PreservedRegsSizeBytes + SpillAreaSizeBytes);
1648 
1649   // Generate "addiu sp, sp, -TotalStackSizeBytes"
1650   if (TotalStackSizeBytes) {
1651     // Use the scratch register if needed to legalize the immediate.
1652     Sandboxer(this).addiu_sp(-TotalStackSizeBytes);
1653   }
1654 
1655   Ctx->statsUpdateFrameBytes(TotalStackSizeBytes);
1656 
1657   if (!PreservedGPRs.empty()) {
1658     uint32_t StackOffset = TotalStackSizeBytes;
1659     for (Variable *Var : *PreservedRegsInClass) {
1660       Type RegType;
1661       if (RegMIPS32::isFPRReg(Var->getRegNum()))
1662         RegType = IceType_f32;
1663       else
1664         RegType = IceType_i32;
1665       auto *PhysicalRegister = makeReg(RegType, Var->getRegNum());
1666       StackOffset -= typeWidthInBytesOnStack(RegType);
1667       Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1668       OperandMIPS32Mem *MemoryLocation = OperandMIPS32Mem::create(
1669           Func, RegType, SP,
1670           llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackOffset)));
1671       Sandboxer(this).sw(PhysicalRegister, MemoryLocation);
1672     }
1673   }
1674 
1675   Variable *FP = getPhysicalRegister(RegMIPS32::Reg_FP);
1676 
1677   // Generate "mov FP, SP" if needed.
1678   if (UsesFramePointer) {
1679     Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1680     _mov(FP, SP);
1681     // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
1682     Context.insert<InstFakeUse>(FP);
1683   }
1684 
1685   // Fill in stack offsets for stack args, and copy args into registers for
1686   // those that were register-allocated. Args are pushed right to left, so
1687   // Arg[0] is closest to the stack/frame pointer.
1688   const VarList &Args = Func->getArgs();
1689   size_t InArgsSizeBytes = MIPS32_MAX_GPR_ARG * 4;
1690   TargetMIPS32::CallingConv CC;
1691   uint32_t ArgNo = 0;
1692 
1693   for (Variable *Arg : Args) {
1694     RegNumT DummyReg;
1695     const Type Ty = Arg->getType();
1696     bool PartialOnStack;
1697     // Skip arguments passed in registers.
1698     if (CC.argInReg(Ty, ArgNo, &DummyReg)) {
1699       // Load argument from stack:
1700       // 1. If this is first vector argument and return type is v4f32.
1701       //    In this case $4 is used to pass stack address implicitly.
1702       //    3rd and 4th element of vector argument is passed through stack.
1703       // 2. If this is second vector argument.
1704       if (ArgNo != 0 && isVectorType(Ty)) {
1705         PartialOnStack = true;
1706         finishArgumentLowering(Arg, PartialOnStack, FP, TotalStackSizeBytes,
1707                                &InArgsSizeBytes);
1708       }
1709     } else {
1710       PartialOnStack = false;
1711       finishArgumentLowering(Arg, PartialOnStack, FP, TotalStackSizeBytes,
1712                              &InArgsSizeBytes);
1713     }
1714     ++ArgNo;
1715   }
1716 
1717   // Fill in stack offsets for locals.
1718   assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
1719                       SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize);
1720   this->HasComputedFrame = true;
1721 
1722   if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
1723     OstreamLocker _(Func->getContext());
1724     Ostream &Str = Func->getContext()->getStrDump();
1725 
1726     Str << "Stack layout:\n";
1727     uint32_t SPAdjustmentPaddingSize =
1728         SpillAreaSizeBytes - LocalsSpillAreaSize -
1729         GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -
1730         MaxOutArgsSizeBytes;
1731     Str << " in-args = " << InArgsSizeBytes << " bytes\n"
1732         << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
1733         << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
1734         << " globals spill area = " << GlobalsSize << " bytes\n"
1735         << " globals-locals spill areas intermediate padding = "
1736         << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
1737         << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
1738         << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";
1739 
1740     Str << "Stack details:\n"
1741         << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
1742         << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
1743         << " outgoing args size = " << MaxOutArgsSizeBytes << " bytes\n"
1744         << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
1745         << " bytes\n"
1746         << " is FP based = " << 1 << "\n";
1747   }
1748   return;
1749 }
1750 
addEpilog(CfgNode * Node)1751 void TargetMIPS32::addEpilog(CfgNode *Node) {
1752   InstList &Insts = Node->getInsts();
1753   InstList::reverse_iterator RI, E;
1754   for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
1755     if (llvm::isa<InstMIPS32Ret>(*RI))
1756       break;
1757   }
1758   if (RI == E)
1759     return;
1760 
1761   // Convert the reverse_iterator position into its corresponding (forward)
1762   // iterator position.
1763   InstList::iterator InsertPoint = reverseToForwardIterator(RI);
1764   --InsertPoint;
1765   Context.init(Node);
1766   Context.setInsertPoint(InsertPoint);
1767 
1768   Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1769   if (UsesFramePointer) {
1770     Variable *FP = getPhysicalRegister(RegMIPS32::Reg_FP);
1771     // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
1772     // use of SP before the assignment of SP=FP keeps previous SP adjustments
1773     // from being dead-code eliminated.
1774     Context.insert<InstFakeUse>(SP);
1775     Sandboxer(this).reset_sp(FP);
1776   }
1777 
1778   VarList::reverse_iterator RIter, END;
1779 
1780   if (!PreservedGPRs.empty()) {
1781     uint32_t StackOffset = TotalStackSizeBytes - PreservedRegsSizeBytes;
1782     for (RIter = PreservedGPRs.rbegin(), END = PreservedGPRs.rend();
1783          RIter != END; ++RIter) {
1784       Type RegType;
1785       if (RegMIPS32::isFPRReg((*RIter)->getRegNum()))
1786         RegType = IceType_f32;
1787       else
1788         RegType = IceType_i32;
1789       auto *PhysicalRegister = makeReg(RegType, (*RIter)->getRegNum());
1790       Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1791       OperandMIPS32Mem *MemoryLocation = OperandMIPS32Mem::create(
1792           Func, RegType, SP,
1793           llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackOffset)));
1794       _lw(PhysicalRegister, MemoryLocation);
1795       StackOffset += typeWidthInBytesOnStack(PhysicalRegister->getType());
1796     }
1797   }
1798 
1799   if (TotalStackSizeBytes) {
1800     Sandboxer(this).addiu_sp(TotalStackSizeBytes);
1801   }
1802   if (!getFlags().getUseSandboxing())
1803     return;
1804 
1805   Variable *RA = getPhysicalRegister(RegMIPS32::Reg_RA);
1806   Variable *RetValue = nullptr;
1807   if (RI->getSrcSize())
1808     RetValue = llvm::cast<Variable>(RI->getSrc(0));
1809 
1810   Sandboxer(this).ret(RA, RetValue);
1811 
1812   RI->setDeleted();
1813 }
1814 
newBaseRegister(Variable * Base,int32_t Offset,RegNumT ScratchRegNum)1815 Variable *TargetMIPS32::PostLoweringLegalizer::newBaseRegister(
1816     Variable *Base, int32_t Offset, RegNumT ScratchRegNum) {
1817   // Legalize will likely need a lui/ori combination, but if the top bits are
1818   // all 0 from negating the offset and subtracting, we could use that instead.
1819   const bool ShouldSub = Offset != 0 && (-Offset & 0xFFFF0000) == 0;
1820   Variable *ScratchReg = Target->makeReg(IceType_i32, ScratchRegNum);
1821   if (ShouldSub) {
1822     Target->_addi(ScratchReg, Base, -Offset);
1823   } else {
1824     constexpr bool SignExt = true;
1825     if (!OperandMIPS32Mem::canHoldOffset(Base->getType(), SignExt, Offset)) {
1826       const uint32_t UpperBits = (Offset >> 16) & 0xFFFF;
1827       const uint32_t LowerBits = Offset & 0xFFFF;
1828       Target->_lui(ScratchReg, Target->Ctx->getConstantInt32(UpperBits));
1829       if (LowerBits)
1830         Target->_ori(ScratchReg, ScratchReg, LowerBits);
1831       Target->_addu(ScratchReg, ScratchReg, Base);
1832     } else {
1833       Target->_addiu(ScratchReg, Base, Offset);
1834     }
1835   }
1836 
1837   return ScratchReg;
1838 }
1839 
legalizeMovFp(InstMIPS32MovFP64ToI64 * MovInstr)1840 void TargetMIPS32::PostLoweringLegalizer::legalizeMovFp(
1841     InstMIPS32MovFP64ToI64 *MovInstr) {
1842   Variable *Dest = MovInstr->getDest();
1843   Operand *Src = MovInstr->getSrc(0);
1844   const Type SrcTy = Src->getType();
1845 
1846   if (Dest != nullptr && SrcTy == IceType_f64) {
1847     int32_t Offset = Dest->getStackOffset();
1848     auto *Base = Target->getPhysicalRegister(Target->getFrameOrStackReg());
1849     OperandMIPS32Mem *TAddr = OperandMIPS32Mem::create(
1850         Target->Func, IceType_f32, Base,
1851         llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)));
1852     OperandMIPS32Mem *Addr = legalizeMemOperand(TAddr);
1853     auto *SrcV = llvm::cast<Variable>(Src);
1854     Variable *SrcR;
1855     if (MovInstr->getInt64Part() == Int64_Lo) {
1856       SrcR = Target->makeReg(
1857           IceType_f32, RegMIPS32::get64PairFirstRegNum(SrcV->getRegNum()));
1858     } else {
1859       SrcR = Target->makeReg(
1860           IceType_f32, RegMIPS32::get64PairSecondRegNum(SrcV->getRegNum()));
1861     }
1862     Sandboxer(Target).sw(SrcR, Addr);
1863     if (MovInstr->isDestRedefined()) {
1864       Target->_set_dest_redefined();
1865     }
1866     MovInstr->setDeleted();
1867     return;
1868   }
1869 
1870   llvm::report_fatal_error("legalizeMovFp: Invalid operands");
1871 }
1872 
legalizeMov(InstMIPS32Mov * MovInstr)1873 void TargetMIPS32::PostLoweringLegalizer::legalizeMov(InstMIPS32Mov *MovInstr) {
1874   Variable *Dest = MovInstr->getDest();
1875   assert(Dest != nullptr);
1876   const Type DestTy = Dest->getType();
1877   assert(DestTy != IceType_i64);
1878 
1879   Operand *Src = MovInstr->getSrc(0);
1880   const Type SrcTy = Src->getType();
1881   (void)SrcTy;
1882   assert(SrcTy != IceType_i64);
1883 
1884   bool Legalized = false;
1885   auto *SrcR = llvm::cast<Variable>(Src);
1886   if (Dest->hasReg() && SrcR->hasReg()) {
1887     // This might be a GP to/from FP move generated due to argument passing.
1888     // Use mtc1/mfc1 instead of mov.[s/d] if src and dst registers are of
1889     // different types.
1890     const bool IsDstGPR = RegMIPS32::isGPRReg(Dest->getRegNum());
1891     const bool IsSrcGPR = RegMIPS32::isGPRReg(SrcR->getRegNum());
1892     const RegNumT SRegNum = SrcR->getRegNum();
1893     const RegNumT DRegNum = Dest->getRegNum();
1894     if (IsDstGPR != IsSrcGPR) {
1895       if (IsDstGPR) {
1896         // Dest is GPR and SrcR is FPR. Use mfc1.
1897         int32_t TypeWidth = typeWidthInBytes(DestTy);
1898         if (MovInstr->getDestHi() != nullptr)
1899           TypeWidth += typeWidthInBytes(MovInstr->getDestHi()->getType());
1900         if (TypeWidth == 8) {
1901           // Split it into two mfc1 instructions
1902           Variable *SrcGPRHi = Target->makeReg(
1903               IceType_f32, RegMIPS32::get64PairFirstRegNum(SRegNum));
1904           Variable *SrcGPRLo = Target->makeReg(
1905               IceType_f32, RegMIPS32::get64PairSecondRegNum(SRegNum));
1906           Variable *DstFPRHi, *DstFPRLo;
1907           if (MovInstr->getDestHi() != nullptr && Dest != nullptr) {
1908             DstFPRHi = Target->makeReg(IceType_i32,
1909                                        MovInstr->getDestHi()->getRegNum());
1910             DstFPRLo = Target->makeReg(IceType_i32, Dest->getRegNum());
1911           } else {
1912             DstFPRHi = Target->makeReg(
1913                 IceType_i32, RegMIPS32::get64PairFirstRegNum(DRegNum));
1914             DstFPRLo = Target->makeReg(
1915                 IceType_i32, RegMIPS32::get64PairSecondRegNum(DRegNum));
1916           }
1917           Target->_mov(DstFPRHi, SrcGPRHi);
1918           Target->_mov(DstFPRLo, SrcGPRLo);
1919           Legalized = true;
1920         } else {
1921           Variable *SrcGPR = Target->makeReg(IceType_f32, SRegNum);
1922           Variable *DstFPR = Target->makeReg(IceType_i32, DRegNum);
1923           Target->_mov(DstFPR, SrcGPR);
1924           Legalized = true;
1925         }
1926       } else {
1927         // Dest is FPR and SrcR is GPR. Use mtc1.
1928         if (typeWidthInBytes(Dest->getType()) == 8) {
1929           Variable *SrcGPRHi, *SrcGPRLo;
1930           // SrcR could be $zero which is i32
1931           if (SRegNum == RegMIPS32::Reg_ZERO) {
1932             SrcGPRHi = Target->makeReg(IceType_i32, SRegNum);
1933             SrcGPRLo = SrcGPRHi;
1934           } else {
1935             // Split it into two mtc1 instructions
1936             if (MovInstr->getSrcSize() == 2) {
1937               const auto FirstReg =
1938                   (llvm::cast<Variable>(MovInstr->getSrc(0)))->getRegNum();
1939               const auto SecondReg =
1940                   (llvm::cast<Variable>(MovInstr->getSrc(1)))->getRegNum();
1941               SrcGPRHi = Target->makeReg(IceType_i32, FirstReg);
1942               SrcGPRLo = Target->makeReg(IceType_i32, SecondReg);
1943             } else {
1944               SrcGPRLo = Target->makeReg(
1945                   IceType_i32, RegMIPS32::get64PairFirstRegNum(SRegNum));
1946               SrcGPRHi = Target->makeReg(
1947                   IceType_i32, RegMIPS32::get64PairSecondRegNum(SRegNum));
1948             }
1949           }
1950           Variable *DstFPRHi = Target->makeReg(
1951               IceType_f32, RegMIPS32::get64PairFirstRegNum(DRegNum));
1952           Variable *DstFPRLo = Target->makeReg(
1953               IceType_f32, RegMIPS32::get64PairSecondRegNum(DRegNum));
1954           Target->_mov(DstFPRHi, SrcGPRLo);
1955           Target->_mov(DstFPRLo, SrcGPRHi);
1956           Legalized = true;
1957         } else {
1958           Variable *SrcGPR = Target->makeReg(IceType_i32, SRegNum);
1959           Variable *DstFPR = Target->makeReg(IceType_f32, DRegNum);
1960           Target->_mov(DstFPR, SrcGPR);
1961           Legalized = true;
1962         }
1963       }
1964     }
1965     if (Legalized) {
1966       if (MovInstr->isDestRedefined()) {
1967         Target->_set_dest_redefined();
1968       }
1969       MovInstr->setDeleted();
1970       return;
1971     }
1972   }
1973 
1974   if (!Dest->hasReg()) {
1975     auto *SrcR = llvm::cast<Variable>(Src);
1976     assert(SrcR->hasReg());
1977     assert(!SrcR->isRematerializable());
1978     int32_t Offset = Dest->getStackOffset();
1979 
1980     // This is a _mov(Mem(), Variable), i.e., a store.
1981     auto *Base = Target->getPhysicalRegister(Target->getFrameOrStackReg());
1982 
1983     OperandMIPS32Mem *TAddr = OperandMIPS32Mem::create(
1984         Target->Func, DestTy, Base,
1985         llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)));
1986     OperandMIPS32Mem *TAddrHi = OperandMIPS32Mem::create(
1987         Target->Func, DestTy, Base,
1988         llvm::cast<ConstantInteger32>(
1989             Target->Ctx->getConstantInt32(Offset + 4)));
1990     OperandMIPS32Mem *Addr = legalizeMemOperand(TAddr);
1991 
1992     // FP arguments are passed in GP reg if first argument is in GP. In this
1993     // case type of the SrcR is still FP thus we need to explicitly generate sw
1994     // instead of swc1.
1995     const RegNumT RegNum = SrcR->getRegNum();
1996     const bool IsSrcGPReg = RegMIPS32::isGPRReg(SrcR->getRegNum());
1997     if (SrcTy == IceType_f32 && IsSrcGPReg) {
1998       Variable *SrcGPR = Target->makeReg(IceType_i32, RegNum);
1999       Sandboxer(Target).sw(SrcGPR, Addr);
2000     } else if (SrcTy == IceType_f64 && IsSrcGPReg) {
2001       Variable *SrcGPRHi =
2002           Target->makeReg(IceType_i32, RegMIPS32::get64PairFirstRegNum(RegNum));
2003       Variable *SrcGPRLo = Target->makeReg(
2004           IceType_i32, RegMIPS32::get64PairSecondRegNum(RegNum));
2005       Sandboxer(Target).sw(SrcGPRHi, Addr);
2006       OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
2007       Sandboxer(Target).sw(SrcGPRLo, AddrHi);
2008     } else if (DestTy == IceType_f64 && IsSrcGPReg) {
2009       const auto FirstReg =
2010           (llvm::cast<Variable>(MovInstr->getSrc(0)))->getRegNum();
2011       const auto SecondReg =
2012           (llvm::cast<Variable>(MovInstr->getSrc(1)))->getRegNum();
2013       Variable *SrcGPRHi = Target->makeReg(IceType_i32, FirstReg);
2014       Variable *SrcGPRLo = Target->makeReg(IceType_i32, SecondReg);
2015       Sandboxer(Target).sw(SrcGPRLo, Addr);
2016       OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
2017       Sandboxer(Target).sw(SrcGPRHi, AddrHi);
2018     } else {
2019       Sandboxer(Target).sw(SrcR, Addr);
2020     }
2021 
2022     Target->Context.insert<InstFakeDef>(Dest);
2023     Legalized = true;
2024   } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) {
2025     if (Var->isRematerializable()) {
2026       // This is equivalent to an x86 _lea(RematOffset(%esp/%ebp), Variable).
2027 
2028       // ExtraOffset is only needed for stack-pointer based frames as we have
2029       // to account for spill storage.
2030       const int32_t ExtraOffset =
2031           (Var->getRegNum() == Target->getFrameOrStackReg())
2032               ? Target->getFrameFixedAllocaOffset()
2033               : 0;
2034 
2035       const int32_t Offset = Var->getStackOffset() + ExtraOffset;
2036       Variable *Base = Target->getPhysicalRegister(Var->getRegNum());
2037       Variable *T = newBaseRegister(Base, Offset, Dest->getRegNum());
2038       Target->_mov(Dest, T);
2039       Legalized = true;
2040     } else {
2041       if (!Var->hasReg()) {
2042         // This is a _mov(Variable, Mem()), i.e., a load.
2043         const int32_t Offset = Var->getStackOffset();
2044         auto *Base = Target->getPhysicalRegister(Target->getFrameOrStackReg());
2045         const RegNumT RegNum = Dest->getRegNum();
2046         const bool IsDstGPReg = RegMIPS32::isGPRReg(Dest->getRegNum());
2047         // If we are moving i64 to a double using stack then the address may
2048         // not be aligned to 8-byte boundary as we split i64 into Hi-Lo parts
2049         // and store them individually with 4-byte alignment. Load the Hi-Lo
2050         // parts in TmpReg and move them to the dest using mtc1.
2051         if (DestTy == IceType_f64 && !Utils::IsAligned(Offset, 8) &&
2052             !IsDstGPReg) {
2053           auto *Reg = Target->makeReg(IceType_i32, Target->getReservedTmpReg());
2054           const RegNumT RegNum = Dest->getRegNum();
2055           Variable *DestLo = Target->makeReg(
2056               IceType_f32, RegMIPS32::get64PairFirstRegNum(RegNum));
2057           Variable *DestHi = Target->makeReg(
2058               IceType_f32, RegMIPS32::get64PairSecondRegNum(RegNum));
2059           OperandMIPS32Mem *AddrLo = OperandMIPS32Mem::create(
2060               Target->Func, IceType_i32, Base,
2061               llvm::cast<ConstantInteger32>(
2062                   Target->Ctx->getConstantInt32(Offset)));
2063           OperandMIPS32Mem *AddrHi = OperandMIPS32Mem::create(
2064               Target->Func, IceType_i32, Base,
2065               llvm::cast<ConstantInteger32>(
2066                   Target->Ctx->getConstantInt32(Offset + 4)));
2067           Sandboxer(Target).lw(Reg, AddrLo);
2068           Target->_mov(DestLo, Reg);
2069           Sandboxer(Target).lw(Reg, AddrHi);
2070           Target->_mov(DestHi, Reg);
2071         } else {
2072           OperandMIPS32Mem *TAddr = OperandMIPS32Mem::create(
2073               Target->Func, DestTy, Base,
2074               llvm::cast<ConstantInteger32>(
2075                   Target->Ctx->getConstantInt32(Offset)));
2076           OperandMIPS32Mem *Addr = legalizeMemOperand(TAddr);
2077           OperandMIPS32Mem *TAddrHi = OperandMIPS32Mem::create(
2078               Target->Func, DestTy, Base,
2079               llvm::cast<ConstantInteger32>(
2080                   Target->Ctx->getConstantInt32(Offset + 4)));
2081           // FP arguments are passed in GP reg if first argument is in GP.
2082           // In this case type of the Dest is still FP thus we need to
2083           // explicitly generate lw instead of lwc1.
2084           if (DestTy == IceType_f32 && IsDstGPReg) {
2085             Variable *DstGPR = Target->makeReg(IceType_i32, RegNum);
2086             Sandboxer(Target).lw(DstGPR, Addr);
2087           } else if (DestTy == IceType_f64 && IsDstGPReg) {
2088             Variable *DstGPRHi = Target->makeReg(
2089                 IceType_i32, RegMIPS32::get64PairFirstRegNum(RegNum));
2090             Variable *DstGPRLo = Target->makeReg(
2091                 IceType_i32, RegMIPS32::get64PairSecondRegNum(RegNum));
2092             Sandboxer(Target).lw(DstGPRHi, Addr);
2093             OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
2094             Sandboxer(Target).lw(DstGPRLo, AddrHi);
2095           } else if (DestTy == IceType_f64 && IsDstGPReg) {
2096             const auto FirstReg =
2097                 (llvm::cast<Variable>(MovInstr->getSrc(0)))->getRegNum();
2098             const auto SecondReg =
2099                 (llvm::cast<Variable>(MovInstr->getSrc(1)))->getRegNum();
2100             Variable *DstGPRHi = Target->makeReg(IceType_i32, FirstReg);
2101             Variable *DstGPRLo = Target->makeReg(IceType_i32, SecondReg);
2102             Sandboxer(Target).lw(DstGPRLo, Addr);
2103             OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
2104             Sandboxer(Target).lw(DstGPRHi, AddrHi);
2105           } else {
2106             Sandboxer(Target).lw(Dest, Addr);
2107           }
2108         }
2109         Legalized = true;
2110       }
2111     }
2112   }
2113 
2114   if (Legalized) {
2115     if (MovInstr->isDestRedefined()) {
2116       Target->_set_dest_redefined();
2117     }
2118     MovInstr->setDeleted();
2119   }
2120 }
2121 
2122 OperandMIPS32Mem *
legalizeMemOperand(OperandMIPS32Mem * Mem)2123 TargetMIPS32::PostLoweringLegalizer::legalizeMemOperand(OperandMIPS32Mem *Mem) {
2124   if (llvm::isa<ConstantRelocatable>(Mem->getOffset())) {
2125     return nullptr;
2126   }
2127   Variable *Base = Mem->getBase();
2128   auto *Ci32 = llvm::cast<ConstantInteger32>(Mem->getOffset());
2129   int32_t Offset = Ci32->getValue();
2130 
2131   if (Base->isRematerializable()) {
2132     const int32_t ExtraOffset =
2133         (Base->getRegNum() == Target->getFrameOrStackReg())
2134             ? Target->getFrameFixedAllocaOffset()
2135             : 0;
2136     Offset += Base->getStackOffset() + ExtraOffset;
2137     Base = Target->getPhysicalRegister(Base->getRegNum());
2138   }
2139 
2140   constexpr bool SignExt = true;
2141   if (!OperandMIPS32Mem::canHoldOffset(Mem->getType(), SignExt, Offset)) {
2142     Base = newBaseRegister(Base, Offset, Target->getReservedTmpReg());
2143     Offset = 0;
2144   }
2145 
2146   return OperandMIPS32Mem::create(
2147       Target->Func, Mem->getType(), Base,
2148       llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)));
2149 }
2150 
legalizeImmediate(int32_t Imm)2151 Variable *TargetMIPS32::PostLoweringLegalizer::legalizeImmediate(int32_t Imm) {
2152   Variable *Reg = nullptr;
2153   if (!((std::numeric_limits<int16_t>::min() <= Imm) &&
2154         (Imm <= std::numeric_limits<int16_t>::max()))) {
2155     const uint32_t UpperBits = (Imm >> 16) & 0xFFFF;
2156     const uint32_t LowerBits = Imm & 0xFFFF;
2157     Variable *TReg = Target->makeReg(IceType_i32, Target->getReservedTmpReg());
2158     Reg = Target->makeReg(IceType_i32, Target->getReservedTmpReg());
2159     if (LowerBits) {
2160       Target->_lui(TReg, Target->Ctx->getConstantInt32(UpperBits));
2161       Target->_ori(Reg, TReg, LowerBits);
2162     } else {
2163       Target->_lui(Reg, Target->Ctx->getConstantInt32(UpperBits));
2164     }
2165   }
2166   return Reg;
2167 }
2168 
postLowerLegalization()2169 void TargetMIPS32::postLowerLegalization() {
2170   Func->dump("Before postLowerLegalization");
2171   assert(hasComputedFrame());
2172   for (CfgNode *Node : Func->getNodes()) {
2173     Context.init(Node);
2174     PostLoweringLegalizer Legalizer(this);
2175     while (!Context.atEnd()) {
2176       PostIncrLoweringContext PostIncrement(Context);
2177       Inst *CurInstr = iteratorToInst(Context.getCur());
2178       const SizeT NumSrcs = CurInstr->getSrcSize();
2179       Operand *Src0 = NumSrcs < 1 ? nullptr : CurInstr->getSrc(0);
2180       Operand *Src1 = NumSrcs < 2 ? nullptr : CurInstr->getSrc(1);
2181       auto *Src0V = llvm::dyn_cast_or_null<Variable>(Src0);
2182       auto *Src0M = llvm::dyn_cast_or_null<OperandMIPS32Mem>(Src0);
2183       auto *Src1M = llvm::dyn_cast_or_null<OperandMIPS32Mem>(Src1);
2184       Variable *Dst = CurInstr->getDest();
2185       if (auto *MovInstr = llvm::dyn_cast<InstMIPS32Mov>(CurInstr)) {
2186         Legalizer.legalizeMov(MovInstr);
2187         continue;
2188       }
2189       if (auto *MovInstr = llvm::dyn_cast<InstMIPS32MovFP64ToI64>(CurInstr)) {
2190         Legalizer.legalizeMovFp(MovInstr);
2191         continue;
2192       }
2193       if (llvm::isa<InstMIPS32Sw>(CurInstr)) {
2194         if (auto *LegalMem = Legalizer.legalizeMemOperand(Src1M)) {
2195           Sandboxer(this).sw(Src0V, LegalMem);
2196           CurInstr->setDeleted();
2197         }
2198         continue;
2199       }
2200       if (llvm::isa<InstMIPS32Swc1>(CurInstr)) {
2201         if (auto *LegalMem = Legalizer.legalizeMemOperand(Src1M)) {
2202           _swc1(Src0V, LegalMem);
2203           CurInstr->setDeleted();
2204         }
2205         continue;
2206       }
2207       if (llvm::isa<InstMIPS32Sdc1>(CurInstr)) {
2208         if (auto *LegalMem = Legalizer.legalizeMemOperand(Src1M)) {
2209           _sdc1(Src0V, LegalMem);
2210           CurInstr->setDeleted();
2211         }
2212         continue;
2213       }
2214       if (llvm::isa<InstMIPS32Lw>(CurInstr)) {
2215         if (auto *LegalMem = Legalizer.legalizeMemOperand(Src0M)) {
2216           Sandboxer(this).lw(Dst, LegalMem);
2217           CurInstr->setDeleted();
2218         }
2219         continue;
2220       }
2221       if (llvm::isa<InstMIPS32Lwc1>(CurInstr)) {
2222         if (auto *LegalMem = Legalizer.legalizeMemOperand(Src0M)) {
2223           _lwc1(Dst, LegalMem);
2224           CurInstr->setDeleted();
2225         }
2226         continue;
2227       }
2228       if (llvm::isa<InstMIPS32Ldc1>(CurInstr)) {
2229         if (auto *LegalMem = Legalizer.legalizeMemOperand(Src0M)) {
2230           _ldc1(Dst, LegalMem);
2231           CurInstr->setDeleted();
2232         }
2233         continue;
2234       }
2235       if (auto *AddiuInstr = llvm::dyn_cast<InstMIPS32Addiu>(CurInstr)) {
2236         if (auto *LegalImm = Legalizer.legalizeImmediate(
2237                 static_cast<int32_t>(AddiuInstr->getImmediateValue()))) {
2238           _addu(Dst, Src0V, LegalImm);
2239           CurInstr->setDeleted();
2240         }
2241         continue;
2242       }
2243     }
2244   }
2245 }
2246 
loOperand(Operand * Operand)2247 Operand *TargetMIPS32::loOperand(Operand *Operand) {
2248   assert(Operand->getType() == IceType_i64);
2249   if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
2250     return Var64On32->getLo();
2251   if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
2252     return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
2253   }
2254   if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
2255     // Conservatively disallow memory operands with side-effects (pre/post
2256     // increment) in case of duplication.
2257     assert(Mem->getAddrMode() == OperandMIPS32Mem::Offset);
2258     return OperandMIPS32Mem::create(Func, IceType_i32, Mem->getBase(),
2259                                     Mem->getOffset(), Mem->getAddrMode());
2260   }
2261   llvm_unreachable("Unsupported operand type");
2262   return nullptr;
2263 }
2264 
getOperandAtIndex(Operand * Operand,Type BaseType,uint32_t Index)2265 Operand *TargetMIPS32::getOperandAtIndex(Operand *Operand, Type BaseType,
2266                                          uint32_t Index) {
2267   if (!isVectorType(Operand->getType())) {
2268     llvm::report_fatal_error("getOperandAtIndex: Operand is not vector");
2269     return nullptr;
2270   }
2271 
2272   if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
2273     assert(Mem->getAddrMode() == OperandMIPS32Mem::Offset);
2274     Variable *Base = Mem->getBase();
2275     auto *Offset = llvm::cast<ConstantInteger32>(Mem->getOffset());
2276     assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
2277     int32_t NextOffsetVal =
2278         Offset->getValue() + (Index * typeWidthInBytes(BaseType));
2279     constexpr bool NoSignExt = false;
2280     if (!OperandMIPS32Mem::canHoldOffset(BaseType, NoSignExt, NextOffsetVal)) {
2281       Constant *_4 = Ctx->getConstantInt32(4);
2282       Variable *NewBase = Func->makeVariable(Base->getType());
2283       lowerArithmetic(
2284           InstArithmetic::create(Func, InstArithmetic::Add, NewBase, Base, _4));
2285       Base = NewBase;
2286     } else {
2287       Offset =
2288           llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
2289     }
2290     return OperandMIPS32Mem::create(Func, BaseType, Base, Offset,
2291                                     Mem->getAddrMode());
2292   }
2293 
2294   if (auto *VarVecOn32 = llvm::dyn_cast<VariableVecOn32>(Operand))
2295     return VarVecOn32->getContainers()[Index];
2296 
2297   llvm_unreachable("Unsupported operand type");
2298   return nullptr;
2299 }
2300 
hiOperand(Operand * Operand)2301 Operand *TargetMIPS32::hiOperand(Operand *Operand) {
2302   assert(Operand->getType() == IceType_i64);
2303   if (Operand->getType() != IceType_i64)
2304     return Operand;
2305   if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
2306     return Var64On32->getHi();
2307   if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
2308     return Ctx->getConstantInt32(
2309         static_cast<uint32_t>(Const->getValue() >> 32));
2310   }
2311   if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
2312     // Conservatively disallow memory operands with side-effects
2313     // in case of duplication.
2314     assert(Mem->getAddrMode() == OperandMIPS32Mem::Offset);
2315     const Type SplitType = IceType_i32;
2316     Variable *Base = Mem->getBase();
2317     auto *Offset = llvm::cast<ConstantInteger32>(Mem->getOffset());
2318     assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
2319     int32_t NextOffsetVal = Offset->getValue() + 4;
2320     constexpr bool SignExt = false;
2321     if (!OperandMIPS32Mem::canHoldOffset(SplitType, SignExt, NextOffsetVal)) {
2322       // We have to make a temp variable and add 4 to either Base or Offset.
2323       // If we add 4 to Offset, this will convert a non-RegReg addressing
2324       // mode into a RegReg addressing mode. Since NaCl sandboxing disallows
2325       // RegReg addressing modes, prefer adding to base and replacing instead.
2326       // Thus we leave the old offset alone.
2327       Constant *Four = Ctx->getConstantInt32(4);
2328       Variable *NewBase = Func->makeVariable(Base->getType());
2329       lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
2330                                              Base, Four));
2331       Base = NewBase;
2332     } else {
2333       Offset =
2334           llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
2335     }
2336     return OperandMIPS32Mem::create(Func, SplitType, Base, Offset,
2337                                     Mem->getAddrMode());
2338   }
2339   llvm_unreachable("Unsupported operand type");
2340   return nullptr;
2341 }
2342 
getRegisterSet(RegSetMask Include,RegSetMask Exclude) const2343 SmallBitVector TargetMIPS32::getRegisterSet(RegSetMask Include,
2344                                             RegSetMask Exclude) const {
2345   SmallBitVector Registers(RegMIPS32::Reg_NUM);
2346 
2347 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt,    \
2348           isI64Pair, isFP32, isFP64, isVec128, alias_init)                     \
2349   if (scratch && (Include & RegSet_CallerSave))                                \
2350     Registers[RegMIPS32::val] = true;                                          \
2351   if (preserved && (Include & RegSet_CalleeSave))                              \
2352     Registers[RegMIPS32::val] = true;                                          \
2353   if (stackptr && (Include & RegSet_StackPointer))                             \
2354     Registers[RegMIPS32::val] = true;                                          \
2355   if (frameptr && (Include & RegSet_FramePointer))                             \
2356     Registers[RegMIPS32::val] = true;                                          \
2357   if (scratch && (Exclude & RegSet_CallerSave))                                \
2358     Registers[RegMIPS32::val] = false;                                         \
2359   if (preserved && (Exclude & RegSet_CalleeSave))                              \
2360     Registers[RegMIPS32::val] = false;                                         \
2361   if (stackptr && (Exclude & RegSet_StackPointer))                             \
2362     Registers[RegMIPS32::val] = false;                                         \
2363   if (frameptr && (Exclude & RegSet_FramePointer))                             \
2364     Registers[RegMIPS32::val] = false;
2365 
2366   REGMIPS32_TABLE
2367 
2368 #undef X
2369 
2370   if (NeedSandboxing) {
2371     Registers[RegMIPS32::Reg_T6] = false;
2372     Registers[RegMIPS32::Reg_T7] = false;
2373     Registers[RegMIPS32::Reg_T8] = false;
2374   }
2375   return Registers;
2376 }
2377 
lowerAlloca(const InstAlloca * Instr)2378 void TargetMIPS32::lowerAlloca(const InstAlloca *Instr) {
2379   // Conservatively require the stack to be aligned. Some stack adjustment
2380   // operations implemented below assume that the stack is aligned before the
2381   // alloca. All the alloca code ensures that the stack alignment is preserved
2382   // after the alloca. The stack alignment restriction can be relaxed in some
2383   // cases.
2384   NeedsStackAlignment = true;
2385 
2386   // For default align=0, set it to the real value 1, to avoid any
2387   // bit-manipulation problems below.
2388   const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes());
2389 
2390   // LLVM enforces power of 2 alignment.
2391   assert(llvm::isPowerOf2_32(AlignmentParam));
2392   assert(llvm::isPowerOf2_32(MIPS32_STACK_ALIGNMENT_BYTES));
2393 
2394   const uint32_t Alignment =
2395       std::max(AlignmentParam, MIPS32_STACK_ALIGNMENT_BYTES);
2396   const bool OverAligned = Alignment > MIPS32_STACK_ALIGNMENT_BYTES;
2397   const bool OptM1 = Func->getOptLevel() == Opt_m1;
2398   const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset();
2399   const bool UseFramePointer =
2400       hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
2401 
2402   if (UseFramePointer)
2403     setHasFramePointer();
2404 
2405   Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
2406 
2407   Variable *Dest = Instr->getDest();
2408   Operand *TotalSize = Instr->getSizeInBytes();
2409 
2410   if (const auto *ConstantTotalSize =
2411           llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
2412     const uint32_t Value =
2413         Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);
2414     FixedAllocaSizeBytes += Value;
2415     // Constant size alloca.
2416     if (!UseFramePointer) {
2417       // If we don't need a Frame Pointer, this alloca has a known offset to the
2418       // stack pointer. We don't need adjust the stack pointer, nor assign any
2419       // value to Dest, as Dest is rematerializable.
2420       assert(Dest->isRematerializable());
2421       Context.insert<InstFakeDef>(Dest);
2422       return;
2423     }
2424 
2425     if (Alignment > MIPS32_STACK_ALIGNMENT_BYTES) {
2426       CurrentAllocaOffset =
2427           Utils::applyAlignment(CurrentAllocaOffset, Alignment);
2428     }
2429     auto *T = I32Reg();
2430     _addiu(T, SP, CurrentAllocaOffset);
2431     _mov(Dest, T);
2432     CurrentAllocaOffset += Value;
2433     return;
2434 
2435   } else {
2436     // Non-constant sizes need to be adjusted to the next highest multiple of
2437     // the required alignment at runtime.
2438     VariableAllocaUsed = true;
2439     VariableAllocaAlignBytes = AlignmentParam;
2440     Variable *AlignAmount;
2441     auto *TotalSizeR = legalizeToReg(TotalSize, Legal_Reg);
2442     auto *T1 = I32Reg();
2443     auto *T2 = I32Reg();
2444     auto *T3 = I32Reg();
2445     auto *T4 = I32Reg();
2446     auto *T5 = I32Reg();
2447     _addiu(T1, TotalSizeR, MIPS32_STACK_ALIGNMENT_BYTES - 1);
2448     _addiu(T2, getZero(), -MIPS32_STACK_ALIGNMENT_BYTES);
2449     _and(T3, T1, T2);
2450     _subu(T4, SP, T3);
2451     if (Instr->getAlignInBytes()) {
2452       AlignAmount =
2453           legalizeToReg(Ctx->getConstantInt32(-AlignmentParam), Legal_Reg);
2454       _and(T5, T4, AlignAmount);
2455       _mov(Dest, T5);
2456     } else {
2457       _mov(Dest, T4);
2458     }
2459     if (OptM1)
2460       _mov(SP, Dest);
2461     else
2462       Sandboxer(this).reset_sp(Dest);
2463     return;
2464   }
2465 }
2466 
lowerInt64Arithmetic(const InstArithmetic * Instr,Variable * Dest,Operand * Src0,Operand * Src1)2467 void TargetMIPS32::lowerInt64Arithmetic(const InstArithmetic *Instr,
2468                                         Variable *Dest, Operand *Src0,
2469                                         Operand *Src1) {
2470   InstArithmetic::OpKind Op = Instr->getOp();
2471   auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
2472   auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2473   Variable *Src0LoR = nullptr;
2474   Variable *Src1LoR = nullptr;
2475   Variable *Src0HiR = nullptr;
2476   Variable *Src1HiR = nullptr;
2477 
2478   switch (Op) {
2479   case InstArithmetic::_num:
2480     llvm::report_fatal_error("Unknown arithmetic operator");
2481     return;
2482   case InstArithmetic::Add: {
2483     Src0LoR = legalizeToReg(loOperand(Src0));
2484     Src1LoR = legalizeToReg(loOperand(Src1));
2485     Src0HiR = legalizeToReg(hiOperand(Src0));
2486     Src1HiR = legalizeToReg(hiOperand(Src1));
2487     auto *T_Carry = I32Reg(), *T_Lo = I32Reg(), *T_Hi = I32Reg(),
2488          *T_Hi2 = I32Reg();
2489     _addu(T_Lo, Src0LoR, Src1LoR);
2490     _mov(DestLo, T_Lo);
2491     _sltu(T_Carry, T_Lo, Src0LoR);
2492     _addu(T_Hi, T_Carry, Src0HiR);
2493     _addu(T_Hi2, Src1HiR, T_Hi);
2494     _mov(DestHi, T_Hi2);
2495     return;
2496   }
2497   case InstArithmetic::And: {
2498     Src0LoR = legalizeToReg(loOperand(Src0));
2499     Src1LoR = legalizeToReg(loOperand(Src1));
2500     Src0HiR = legalizeToReg(hiOperand(Src0));
2501     Src1HiR = legalizeToReg(hiOperand(Src1));
2502     auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
2503     _and(T_Lo, Src0LoR, Src1LoR);
2504     _mov(DestLo, T_Lo);
2505     _and(T_Hi, Src0HiR, Src1HiR);
2506     _mov(DestHi, T_Hi);
2507     return;
2508   }
2509   case InstArithmetic::Sub: {
2510     Src0LoR = legalizeToReg(loOperand(Src0));
2511     Src1LoR = legalizeToReg(loOperand(Src1));
2512     Src0HiR = legalizeToReg(hiOperand(Src0));
2513     Src1HiR = legalizeToReg(hiOperand(Src1));
2514     auto *T_Borrow = I32Reg(), *T_Lo = I32Reg(), *T_Hi = I32Reg(),
2515          *T_Hi2 = I32Reg();
2516     _subu(T_Lo, Src0LoR, Src1LoR);
2517     _mov(DestLo, T_Lo);
2518     _sltu(T_Borrow, Src0LoR, Src1LoR);
2519     _addu(T_Hi, T_Borrow, Src1HiR);
2520     _subu(T_Hi2, Src0HiR, T_Hi);
2521     _mov(DestHi, T_Hi2);
2522     return;
2523   }
2524   case InstArithmetic::Or: {
2525     Src0LoR = legalizeToReg(loOperand(Src0));
2526     Src1LoR = legalizeToReg(loOperand(Src1));
2527     Src0HiR = legalizeToReg(hiOperand(Src0));
2528     Src1HiR = legalizeToReg(hiOperand(Src1));
2529     auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
2530     _or(T_Lo, Src0LoR, Src1LoR);
2531     _mov(DestLo, T_Lo);
2532     _or(T_Hi, Src0HiR, Src1HiR);
2533     _mov(DestHi, T_Hi);
2534     return;
2535   }
2536   case InstArithmetic::Xor: {
2537     Src0LoR = legalizeToReg(loOperand(Src0));
2538     Src1LoR = legalizeToReg(loOperand(Src1));
2539     Src0HiR = legalizeToReg(hiOperand(Src0));
2540     Src1HiR = legalizeToReg(hiOperand(Src1));
2541     auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
2542     _xor(T_Lo, Src0LoR, Src1LoR);
2543     _mov(DestLo, T_Lo);
2544     _xor(T_Hi, Src0HiR, Src1HiR);
2545     _mov(DestHi, T_Hi);
2546     return;
2547   }
2548   case InstArithmetic::Mul: {
2549     // TODO(rkotler): Make sure that mul has the side effect of clobbering
2550     // LO, HI. Check for any other LO, HI quirkiness in this section.
2551     Src0LoR = legalizeToReg(loOperand(Src0));
2552     Src1LoR = legalizeToReg(loOperand(Src1));
2553     Src0HiR = legalizeToReg(hiOperand(Src0));
2554     Src1HiR = legalizeToReg(hiOperand(Src1));
2555     auto *T_Lo = I32Reg(RegMIPS32::Reg_LO), *T_Hi = I32Reg(RegMIPS32::Reg_HI);
2556     auto *T1 = I32Reg(), *T2 = I32Reg();
2557     auto *TM1 = I32Reg(), *TM2 = I32Reg(), *TM3 = I32Reg(), *TM4 = I32Reg();
2558     _multu(T_Lo, Src0LoR, Src1LoR);
2559     Context.insert<InstFakeDef>(T_Hi, T_Lo);
2560     _mflo(T1, T_Lo);
2561     _mfhi(T2, T_Hi);
2562     _mov(DestLo, T1);
2563     _mul(TM1, Src0HiR, Src1LoR);
2564     _mul(TM2, Src0LoR, Src1HiR);
2565     _addu(TM3, TM1, T2);
2566     _addu(TM4, TM3, TM2);
2567     _mov(DestHi, TM4);
2568     return;
2569   }
2570   case InstArithmetic::Shl: {
2571     auto *T_Lo = I32Reg();
2572     auto *T_Hi = I32Reg();
2573     auto *T1_Lo = I32Reg();
2574     auto *T1_Hi = I32Reg();
2575     auto *T1 = I32Reg();
2576     auto *T2 = I32Reg();
2577     auto *T3 = I32Reg();
2578     auto *T4 = I32Reg();
2579     auto *T5 = I32Reg();
2580 
2581     if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2582       Src0LoR = legalizeToReg(loOperand(Src0));
2583       int64_t ShiftAmount = Const->getValue();
2584       if (ShiftAmount == 1) {
2585         Src0HiR = legalizeToReg(hiOperand(Src0));
2586         _addu(T_Lo, Src0LoR, Src0LoR);
2587         _sltu(T1, T_Lo, Src0LoR);
2588         _addu(T2, T1, Src0HiR);
2589         _addu(T_Hi, Src0HiR, T2);
2590       } else if (ShiftAmount < INT32_BITS) {
2591         Src0HiR = legalizeToReg(hiOperand(Src0));
2592         _srl(T1, Src0LoR, INT32_BITS - ShiftAmount);
2593         _sll(T2, Src0HiR, ShiftAmount);
2594         _or(T_Hi, T1, T2);
2595         _sll(T_Lo, Src0LoR, ShiftAmount);
2596       } else if (ShiftAmount == INT32_BITS) {
2597         _addiu(T_Lo, getZero(), 0);
2598         _mov(T_Hi, Src0LoR);
2599       } else if (ShiftAmount > INT32_BITS && ShiftAmount < 64) {
2600         _sll(T_Hi, Src0LoR, ShiftAmount - INT32_BITS);
2601         _addiu(T_Lo, getZero(), 0);
2602       }
2603       _mov(DestLo, T_Lo);
2604       _mov(DestHi, T_Hi);
2605       return;
2606     }
2607 
2608     Src0LoR = legalizeToReg(loOperand(Src0));
2609     Src1LoR = legalizeToReg(loOperand(Src1));
2610     Src0HiR = legalizeToReg(hiOperand(Src0));
2611 
2612     _sllv(T1, Src0HiR, Src1LoR);
2613     _not(T2, Src1LoR);
2614     _srl(T3, Src0LoR, 1);
2615     _srlv(T4, T3, T2);
2616     _or(T_Hi, T1, T4);
2617     _sllv(T_Lo, Src0LoR, Src1LoR);
2618 
2619     _mov(T1_Hi, T_Hi);
2620     _mov(T1_Lo, T_Lo);
2621     _andi(T5, Src1LoR, INT32_BITS);
2622     _movn(T1_Hi, T_Lo, T5);
2623     _movn(T1_Lo, getZero(), T5);
2624     _mov(DestHi, T1_Hi);
2625     _mov(DestLo, T1_Lo);
2626     return;
2627   }
2628   case InstArithmetic::Lshr: {
2629 
2630     auto *T_Lo = I32Reg();
2631     auto *T_Hi = I32Reg();
2632     auto *T1_Lo = I32Reg();
2633     auto *T1_Hi = I32Reg();
2634     auto *T1 = I32Reg();
2635     auto *T2 = I32Reg();
2636     auto *T3 = I32Reg();
2637     auto *T4 = I32Reg();
2638     auto *T5 = I32Reg();
2639 
2640     if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2641       Src0HiR = legalizeToReg(hiOperand(Src0));
2642       int64_t ShiftAmount = Const->getValue();
2643       if (ShiftAmount < INT32_BITS) {
2644         Src0LoR = legalizeToReg(loOperand(Src0));
2645         _sll(T1, Src0HiR, INT32_BITS - ShiftAmount);
2646         _srl(T2, Src0LoR, ShiftAmount);
2647         _or(T_Lo, T1, T2);
2648         _srl(T_Hi, Src0HiR, ShiftAmount);
2649       } else if (ShiftAmount == INT32_BITS) {
2650         _mov(T_Lo, Src0HiR);
2651         _addiu(T_Hi, getZero(), 0);
2652       } else if (ShiftAmount > INT32_BITS && ShiftAmount < 64) {
2653         _srl(T_Lo, Src0HiR, ShiftAmount - INT32_BITS);
2654         _addiu(T_Hi, getZero(), 0);
2655       }
2656       _mov(DestLo, T_Lo);
2657       _mov(DestHi, T_Hi);
2658       return;
2659     }
2660 
2661     Src0LoR = legalizeToReg(loOperand(Src0));
2662     Src1LoR = legalizeToReg(loOperand(Src1));
2663     Src0HiR = legalizeToReg(hiOperand(Src0));
2664 
2665     _srlv(T1, Src0LoR, Src1LoR);
2666     _not(T2, Src1LoR);
2667     _sll(T3, Src0HiR, 1);
2668     _sllv(T4, T3, T2);
2669     _or(T_Lo, T1, T4);
2670     _srlv(T_Hi, Src0HiR, Src1LoR);
2671 
2672     _mov(T1_Hi, T_Hi);
2673     _mov(T1_Lo, T_Lo);
2674     _andi(T5, Src1LoR, INT32_BITS);
2675     _movn(T1_Lo, T_Hi, T5);
2676     _movn(T1_Hi, getZero(), T5);
2677     _mov(DestHi, T1_Hi);
2678     _mov(DestLo, T1_Lo);
2679     return;
2680   }
2681   case InstArithmetic::Ashr: {
2682 
2683     auto *T_Lo = I32Reg();
2684     auto *T_Hi = I32Reg();
2685     auto *T1_Lo = I32Reg();
2686     auto *T1_Hi = I32Reg();
2687     auto *T1 = I32Reg();
2688     auto *T2 = I32Reg();
2689     auto *T3 = I32Reg();
2690     auto *T4 = I32Reg();
2691     auto *T5 = I32Reg();
2692     auto *T6 = I32Reg();
2693 
2694     if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2695       Src0HiR = legalizeToReg(hiOperand(Src0));
2696       int64_t ShiftAmount = Const->getValue();
2697       if (ShiftAmount < INT32_BITS) {
2698         Src0LoR = legalizeToReg(loOperand(Src0));
2699         _sll(T1, Src0HiR, INT32_BITS - ShiftAmount);
2700         _srl(T2, Src0LoR, ShiftAmount);
2701         _or(T_Lo, T1, T2);
2702         _sra(T_Hi, Src0HiR, ShiftAmount);
2703       } else if (ShiftAmount == INT32_BITS) {
2704         _sra(T_Hi, Src0HiR, INT32_BITS - 1);
2705         _mov(T_Lo, Src0HiR);
2706       } else if (ShiftAmount > INT32_BITS && ShiftAmount < 64) {
2707         _sra(T_Lo, Src0HiR, ShiftAmount - INT32_BITS);
2708         _sra(T_Hi, Src0HiR, INT32_BITS - 1);
2709       }
2710       _mov(DestLo, T_Lo);
2711       _mov(DestHi, T_Hi);
2712       return;
2713     }
2714 
2715     Src0LoR = legalizeToReg(loOperand(Src0));
2716     Src1LoR = legalizeToReg(loOperand(Src1));
2717     Src0HiR = legalizeToReg(hiOperand(Src0));
2718 
2719     _srlv(T1, Src0LoR, Src1LoR);
2720     _not(T2, Src1LoR);
2721     _sll(T3, Src0HiR, 1);
2722     _sllv(T4, T3, T2);
2723     _or(T_Lo, T1, T4);
2724     _srav(T_Hi, Src0HiR, Src1LoR);
2725 
2726     _mov(T1_Hi, T_Hi);
2727     _mov(T1_Lo, T_Lo);
2728     _andi(T5, Src1LoR, INT32_BITS);
2729     _movn(T1_Lo, T_Hi, T5);
2730     _sra(T6, Src0HiR, INT32_BITS - 1);
2731     _movn(T1_Hi, T6, T5);
2732     _mov(DestHi, T1_Hi);
2733     _mov(DestLo, T1_Lo);
2734     return;
2735   }
2736   case InstArithmetic::Fadd:
2737   case InstArithmetic::Fsub:
2738   case InstArithmetic::Fmul:
2739   case InstArithmetic::Fdiv:
2740   case InstArithmetic::Frem:
2741     llvm::report_fatal_error("FP instruction with i64 type");
2742     return;
2743   case InstArithmetic::Udiv:
2744   case InstArithmetic::Sdiv:
2745   case InstArithmetic::Urem:
2746   case InstArithmetic::Srem:
2747     llvm::report_fatal_error("64-bit div and rem should have been prelowered");
2748     return;
2749   }
2750 }
2751 
lowerArithmetic(const InstArithmetic * Instr)2752 void TargetMIPS32::lowerArithmetic(const InstArithmetic *Instr) {
2753   Variable *Dest = Instr->getDest();
2754 
2755   if (Dest->isRematerializable()) {
2756     Context.insert<InstFakeDef>(Dest);
2757     return;
2758   }
2759 
2760   // We need to signal all the UnimplementedLoweringError errors before any
2761   // legalization into new variables, otherwise Om1 register allocation may fail
2762   // when it sees variables that are defined but not used.
2763   Type DestTy = Dest->getType();
2764   Operand *Src0 = legalizeUndef(Instr->getSrc(0));
2765   Operand *Src1 = legalizeUndef(Instr->getSrc(1));
2766   if (DestTy == IceType_i64) {
2767     lowerInt64Arithmetic(Instr, Instr->getDest(), Src0, Src1);
2768     return;
2769   }
2770   if (isVectorType(Dest->getType())) {
2771     llvm::report_fatal_error("Arithmetic: Destination type is vector");
2772     return;
2773   }
2774 
2775   Variable *T = makeReg(Dest->getType());
2776   Variable *Src0R = legalizeToReg(Src0);
2777   Variable *Src1R = nullptr;
2778   uint32_t Value = 0;
2779   bool IsSrc1Imm16 = false;
2780 
2781   switch (Instr->getOp()) {
2782   case InstArithmetic::Add:
2783   case InstArithmetic::Sub: {
2784     auto *Const32 = llvm::dyn_cast<ConstantInteger32>(Src1);
2785     if (Const32 != nullptr && isInt<16>(int32_t(Const32->getValue()))) {
2786       IsSrc1Imm16 = true;
2787       Value = Const32->getValue();
2788     } else {
2789       Src1R = legalizeToReg(Src1);
2790     }
2791     break;
2792   }
2793   case InstArithmetic::And:
2794   case InstArithmetic::Or:
2795   case InstArithmetic::Xor:
2796   case InstArithmetic::Shl:
2797   case InstArithmetic::Lshr:
2798   case InstArithmetic::Ashr: {
2799     auto *Const32 = llvm::dyn_cast<ConstantInteger32>(Src1);
2800     if (Const32 != nullptr && llvm::isUInt<16>(uint32_t(Const32->getValue()))) {
2801       IsSrc1Imm16 = true;
2802       Value = Const32->getValue();
2803     } else {
2804       Src1R = legalizeToReg(Src1);
2805     }
2806     break;
2807   }
2808   default:
2809     Src1R = legalizeToReg(Src1);
2810     break;
2811   }
2812   constexpr uint32_t DivideByZeroTrapCode = 7;
2813 
2814   switch (Instr->getOp()) {
2815   case InstArithmetic::_num:
2816     break;
2817   case InstArithmetic::Add: {
2818     auto *T0R = Src0R;
2819     auto *T1R = Src1R;
2820     if (Dest->getType() != IceType_i32) {
2821       T0R = makeReg(IceType_i32);
2822       lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2823       if (!IsSrc1Imm16) {
2824         T1R = makeReg(IceType_i32);
2825         lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2826       }
2827     }
2828     if (IsSrc1Imm16) {
2829       _addiu(T, T0R, Value);
2830     } else {
2831       _addu(T, T0R, T1R);
2832     }
2833     _mov(Dest, T);
2834     return;
2835   }
2836   case InstArithmetic::And:
2837     if (IsSrc1Imm16) {
2838       _andi(T, Src0R, Value);
2839     } else {
2840       _and(T, Src0R, Src1R);
2841     }
2842     _mov(Dest, T);
2843     return;
2844   case InstArithmetic::Or:
2845     if (IsSrc1Imm16) {
2846       _ori(T, Src0R, Value);
2847     } else {
2848       _or(T, Src0R, Src1R);
2849     }
2850     _mov(Dest, T);
2851     return;
2852   case InstArithmetic::Xor:
2853     if (IsSrc1Imm16) {
2854       _xori(T, Src0R, Value);
2855     } else {
2856       _xor(T, Src0R, Src1R);
2857     }
2858     _mov(Dest, T);
2859     return;
2860   case InstArithmetic::Sub: {
2861     auto *T0R = Src0R;
2862     auto *T1R = Src1R;
2863     if (Dest->getType() != IceType_i32) {
2864       T0R = makeReg(IceType_i32);
2865       lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2866       if (!IsSrc1Imm16) {
2867         T1R = makeReg(IceType_i32);
2868         lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2869       }
2870     }
2871     if (IsSrc1Imm16) {
2872       _addiu(T, T0R, -Value);
2873     } else {
2874       _subu(T, T0R, T1R);
2875     }
2876     _mov(Dest, T);
2877     return;
2878   }
2879   case InstArithmetic::Mul: {
2880     _mul(T, Src0R, Src1R);
2881     _mov(Dest, T);
2882     return;
2883   }
2884   case InstArithmetic::Shl: {
2885     if (IsSrc1Imm16) {
2886       _sll(T, Src0R, Value);
2887     } else {
2888       _sllv(T, Src0R, Src1R);
2889     }
2890     _mov(Dest, T);
2891     return;
2892   }
2893   case InstArithmetic::Lshr: {
2894     auto *T0R = Src0R;
2895     auto *T1R = Src1R;
2896     if (Dest->getType() != IceType_i32) {
2897       T0R = makeReg(IceType_i32);
2898       lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
2899       if (!IsSrc1Imm16) {
2900         T1R = makeReg(IceType_i32);
2901         lowerCast(InstCast::create(Func, InstCast::Zext, T1R, Src1R));
2902       }
2903     }
2904     if (IsSrc1Imm16) {
2905       _srl(T, T0R, Value);
2906     } else {
2907       _srlv(T, T0R, T1R);
2908     }
2909     _mov(Dest, T);
2910     return;
2911   }
2912   case InstArithmetic::Ashr: {
2913     auto *T0R = Src0R;
2914     auto *T1R = Src1R;
2915     if (Dest->getType() != IceType_i32) {
2916       T0R = makeReg(IceType_i32);
2917       lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2918       if (!IsSrc1Imm16) {
2919         T1R = makeReg(IceType_i32);
2920         lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2921       }
2922     }
2923     if (IsSrc1Imm16) {
2924       _sra(T, T0R, Value);
2925     } else {
2926       _srav(T, T0R, T1R);
2927     }
2928     _mov(Dest, T);
2929     return;
2930   }
2931   case InstArithmetic::Udiv: {
2932     auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2933     auto *T0R = Src0R;
2934     auto *T1R = Src1R;
2935     if (Dest->getType() != IceType_i32) {
2936       T0R = makeReg(IceType_i32);
2937       lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
2938       T1R = makeReg(IceType_i32);
2939       lowerCast(InstCast::create(Func, InstCast::Zext, T1R, Src1R));
2940     }
2941     _divu(T_Zero, T0R, T1R);
2942     _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2943     _mflo(T, T_Zero);
2944     _mov(Dest, T);
2945     return;
2946   }
2947   case InstArithmetic::Sdiv: {
2948     auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2949     auto *T0R = Src0R;
2950     auto *T1R = Src1R;
2951     if (Dest->getType() != IceType_i32) {
2952       T0R = makeReg(IceType_i32);
2953       lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2954       T1R = makeReg(IceType_i32);
2955       lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2956     }
2957     _div(T_Zero, T0R, T1R);
2958     _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2959     _mflo(T, T_Zero);
2960     _mov(Dest, T);
2961     return;
2962   }
2963   case InstArithmetic::Urem: {
2964     auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2965     auto *T0R = Src0R;
2966     auto *T1R = Src1R;
2967     if (Dest->getType() != IceType_i32) {
2968       T0R = makeReg(IceType_i32);
2969       lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
2970       T1R = makeReg(IceType_i32);
2971       lowerCast(InstCast::create(Func, InstCast::Zext, T1R, Src1R));
2972     }
2973     _divu(T_Zero, T0R, T1R);
2974     _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2975     _mfhi(T, T_Zero);
2976     _mov(Dest, T);
2977     return;
2978   }
2979   case InstArithmetic::Srem: {
2980     auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2981     auto *T0R = Src0R;
2982     auto *T1R = Src1R;
2983     if (Dest->getType() != IceType_i32) {
2984       T0R = makeReg(IceType_i32);
2985       lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2986       T1R = makeReg(IceType_i32);
2987       lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2988     }
2989     _div(T_Zero, T0R, T1R);
2990     _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2991     _mfhi(T, T_Zero);
2992     _mov(Dest, T);
2993     return;
2994   }
2995   case InstArithmetic::Fadd: {
2996     if (DestTy == IceType_f32) {
2997       _add_s(T, Src0R, Src1R);
2998       _mov(Dest, T);
2999       return;
3000     }
3001     if (DestTy == IceType_f64) {
3002       _add_d(T, Src0R, Src1R);
3003       _mov(Dest, T);
3004       return;
3005     }
3006     break;
3007   }
3008   case InstArithmetic::Fsub:
3009     if (DestTy == IceType_f32) {
3010       _sub_s(T, Src0R, Src1R);
3011       _mov(Dest, T);
3012       return;
3013     }
3014     if (DestTy == IceType_f64) {
3015       _sub_d(T, Src0R, Src1R);
3016       _mov(Dest, T);
3017       return;
3018     }
3019     break;
3020   case InstArithmetic::Fmul:
3021     if (DestTy == IceType_f32) {
3022       _mul_s(T, Src0R, Src1R);
3023       _mov(Dest, T);
3024       return;
3025     }
3026     if (DestTy == IceType_f64) {
3027       _mul_d(T, Src0R, Src1R);
3028       _mov(Dest, T);
3029       return;
3030     }
3031     break;
3032   case InstArithmetic::Fdiv:
3033     if (DestTy == IceType_f32) {
3034       _div_s(T, Src0R, Src1R);
3035       _mov(Dest, T);
3036       return;
3037     }
3038     if (DestTy == IceType_f64) {
3039       _div_d(T, Src0R, Src1R);
3040       _mov(Dest, T);
3041       return;
3042     }
3043     break;
3044   case InstArithmetic::Frem:
3045     llvm::report_fatal_error("frem should have been prelowered.");
3046     break;
3047   }
3048   llvm::report_fatal_error("Unknown arithmetic operator");
3049 }
3050 
lowerAssign(const InstAssign * Instr)3051 void TargetMIPS32::lowerAssign(const InstAssign *Instr) {
3052   Variable *Dest = Instr->getDest();
3053 
3054   if (Dest->isRematerializable()) {
3055     Context.insert<InstFakeDef>(Dest);
3056     return;
3057   }
3058 
3059   // Source type may not be same as destination
3060   if (isVectorType(Dest->getType())) {
3061     Operand *Src0 = legalizeUndef(Instr->getSrc(0));
3062     auto *DstVec = llvm::dyn_cast<VariableVecOn32>(Dest);
3063     for (SizeT i = 0; i < DstVec->ContainersPerVector; ++i) {
3064       auto *DCont = DstVec->getContainers()[i];
3065       auto *SCont =
3066           legalize(getOperandAtIndex(Src0, IceType_i32, i), Legal_Reg);
3067       auto *TReg = makeReg(IceType_i32);
3068       _mov(TReg, SCont);
3069       _mov(DCont, TReg);
3070     }
3071     return;
3072   }
3073   Operand *Src0 = Instr->getSrc(0);
3074   assert(Dest->getType() == Src0->getType());
3075   if (Dest->getType() == IceType_i64) {
3076     Src0 = legalizeUndef(Src0);
3077     Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg);
3078     Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg);
3079     auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3080     auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3081     auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
3082     _mov(T_Lo, Src0Lo);
3083     _mov(DestLo, T_Lo);
3084     _mov(T_Hi, Src0Hi);
3085     _mov(DestHi, T_Hi);
3086     return;
3087   }
3088   Operand *SrcR;
3089   if (Dest->hasReg()) {
3090     // If Dest already has a physical register, then legalize the Src operand
3091     // into a Variable with the same register assignment.  This especially
3092     // helps allow the use of Flex operands.
3093     SrcR = legalize(Src0, Legal_Reg, Dest->getRegNum());
3094   } else {
3095     // Dest could be a stack operand. Since we could potentially need
3096     // to do a Store (and store can only have Register operands),
3097     // legalize this to a register.
3098     SrcR = legalize(Src0, Legal_Reg);
3099   }
3100   _mov(Dest, SrcR);
3101 }
3102 
lowerBr(const InstBr * Instr)3103 void TargetMIPS32::lowerBr(const InstBr *Instr) {
3104   if (Instr->isUnconditional()) {
3105     _br(Instr->getTargetUnconditional());
3106     return;
3107   }
3108   CfgNode *TargetTrue = Instr->getTargetTrue();
3109   CfgNode *TargetFalse = Instr->getTargetFalse();
3110   Operand *Boolean = Instr->getCondition();
3111   const Inst *Producer = Computations.getProducerOf(Boolean);
3112   if (Producer == nullptr) {
3113     // Since we don't know the producer of this boolean we will assume its
3114     // producer will keep it in positive logic and just emit beqz with this
3115     // Boolean as an operand.
3116     auto *BooleanR = legalizeToReg(Boolean);
3117     _br(TargetTrue, TargetFalse, BooleanR, CondMIPS32::Cond::EQZ);
3118     return;
3119   }
3120   if (Producer->getKind() == Inst::Icmp) {
3121     const InstIcmp *CompareInst = llvm::cast<InstIcmp>(Producer);
3122     Operand *Src0 = CompareInst->getSrc(0);
3123     Operand *Src1 = CompareInst->getSrc(1);
3124     const Type Src0Ty = Src0->getType();
3125     assert(Src0Ty == Src1->getType());
3126 
3127     Variable *Src0R = nullptr;
3128     Variable *Src1R = nullptr;
3129     Variable *Src0HiR = nullptr;
3130     Variable *Src1HiR = nullptr;
3131     if (Src0Ty == IceType_i64) {
3132       Src0R = legalizeToReg(loOperand(Src0));
3133       Src1R = legalizeToReg(loOperand(Src1));
3134       Src0HiR = legalizeToReg(hiOperand(Src0));
3135       Src1HiR = legalizeToReg(hiOperand(Src1));
3136     } else {
3137       auto *Src0RT = legalizeToReg(Src0);
3138       auto *Src1RT = legalizeToReg(Src1);
3139       // Sign/Zero extend the source operands
3140       if (Src0Ty != IceType_i32) {
3141         InstCast::OpKind CastKind;
3142         switch (CompareInst->getCondition()) {
3143         case InstIcmp::Eq:
3144         case InstIcmp::Ne:
3145         case InstIcmp::Sgt:
3146         case InstIcmp::Sge:
3147         case InstIcmp::Slt:
3148         case InstIcmp::Sle:
3149           CastKind = InstCast::Sext;
3150           break;
3151         default:
3152           CastKind = InstCast::Zext;
3153           break;
3154         }
3155         Src0R = makeReg(IceType_i32);
3156         Src1R = makeReg(IceType_i32);
3157         lowerCast(InstCast::create(Func, CastKind, Src0R, Src0RT));
3158         lowerCast(InstCast::create(Func, CastKind, Src1R, Src1RT));
3159       } else {
3160         Src0R = Src0RT;
3161         Src1R = Src1RT;
3162       }
3163     }
3164     auto *DestT = makeReg(IceType_i32);
3165 
3166     switch (CompareInst->getCondition()) {
3167     default:
3168       llvm_unreachable("unexpected condition");
3169       return;
3170     case InstIcmp::Eq: {
3171       if (Src0Ty == IceType_i64) {
3172         auto *T1 = I32Reg();
3173         auto *T2 = I32Reg();
3174         auto *T3 = I32Reg();
3175         _xor(T1, Src0HiR, Src1HiR);
3176         _xor(T2, Src0R, Src1R);
3177         _or(T3, T1, T2);
3178         _mov(DestT, T3);
3179         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3180       } else {
3181         _br(TargetTrue, TargetFalse, Src0R, Src1R, CondMIPS32::Cond::NE);
3182       }
3183       return;
3184     }
3185     case InstIcmp::Ne: {
3186       if (Src0Ty == IceType_i64) {
3187         auto *T1 = I32Reg();
3188         auto *T2 = I32Reg();
3189         auto *T3 = I32Reg();
3190         _xor(T1, Src0HiR, Src1HiR);
3191         _xor(T2, Src0R, Src1R);
3192         _or(T3, T1, T2);
3193         _mov(DestT, T3);
3194         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3195       } else {
3196         _br(TargetTrue, TargetFalse, Src0R, Src1R, CondMIPS32::Cond::EQ);
3197       }
3198       return;
3199     }
3200     case InstIcmp::Ugt: {
3201       if (Src0Ty == IceType_i64) {
3202         auto *T1 = I32Reg();
3203         auto *T2 = I32Reg();
3204         auto *T3 = I32Reg();
3205         auto *T4 = I32Reg();
3206         auto *T5 = I32Reg();
3207         _xor(T1, Src0HiR, Src1HiR);
3208         _sltu(T2, Src1HiR, Src0HiR);
3209         _xori(T3, T2, 1);
3210         _sltu(T4, Src1R, Src0R);
3211         _xori(T5, T4, 1);
3212         _movz(T3, T5, T1);
3213         _mov(DestT, T3);
3214         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3215       } else {
3216         _sltu(DestT, Src1R, Src0R);
3217         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3218       }
3219       return;
3220     }
3221     case InstIcmp::Uge: {
3222       if (Src0Ty == IceType_i64) {
3223         auto *T1 = I32Reg();
3224         auto *T2 = I32Reg();
3225         auto *T3 = I32Reg();
3226         _xor(T1, Src0HiR, Src1HiR);
3227         _sltu(T2, Src0HiR, Src1HiR);
3228         _sltu(T3, Src0R, Src1R);
3229         _movz(T2, T3, T1);
3230         _mov(DestT, T2);
3231         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3232       } else {
3233         _sltu(DestT, Src0R, Src1R);
3234         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3235       }
3236       return;
3237     }
3238     case InstIcmp::Ult: {
3239       if (Src0Ty == IceType_i64) {
3240         auto *T1 = I32Reg();
3241         auto *T2 = I32Reg();
3242         auto *T3 = I32Reg();
3243         auto *T4 = I32Reg();
3244         auto *T5 = I32Reg();
3245         _xor(T1, Src0HiR, Src1HiR);
3246         _sltu(T2, Src0HiR, Src1HiR);
3247         _xori(T3, T2, 1);
3248         _sltu(T4, Src0R, Src1R);
3249         _xori(T5, T4, 1);
3250         _movz(T3, T5, T1);
3251         _mov(DestT, T3);
3252         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3253       } else {
3254         _sltu(DestT, Src0R, Src1R);
3255         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3256       }
3257       return;
3258     }
3259     case InstIcmp::Ule: {
3260       if (Src0Ty == IceType_i64) {
3261         auto *T1 = I32Reg();
3262         auto *T2 = I32Reg();
3263         auto *T3 = I32Reg();
3264         _xor(T1, Src0HiR, Src1HiR);
3265         _sltu(T2, Src1HiR, Src0HiR);
3266         _sltu(T3, Src1R, Src0R);
3267         _movz(T2, T3, T1);
3268         _mov(DestT, T2);
3269         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3270       } else {
3271         _sltu(DestT, Src1R, Src0R);
3272         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3273       }
3274       return;
3275     }
3276     case InstIcmp::Sgt: {
3277       if (Src0Ty == IceType_i64) {
3278         auto *T1 = I32Reg();
3279         auto *T2 = I32Reg();
3280         auto *T3 = I32Reg();
3281         auto *T4 = I32Reg();
3282         auto *T5 = I32Reg();
3283         _xor(T1, Src0HiR, Src1HiR);
3284         _slt(T2, Src1HiR, Src0HiR);
3285         _xori(T3, T2, 1);
3286         _sltu(T4, Src1R, Src0R);
3287         _xori(T5, T4, 1);
3288         _movz(T3, T5, T1);
3289         _mov(DestT, T3);
3290         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3291       } else {
3292         _slt(DestT, Src1R, Src0R);
3293         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3294       }
3295       return;
3296     }
3297     case InstIcmp::Sge: {
3298       if (Src0Ty == IceType_i64) {
3299         auto *T1 = I32Reg();
3300         auto *T2 = I32Reg();
3301         auto *T3 = I32Reg();
3302         _xor(T1, Src0HiR, Src1HiR);
3303         _slt(T2, Src0HiR, Src1HiR);
3304         _sltu(T3, Src0R, Src1R);
3305         _movz(T2, T3, T1);
3306         _mov(DestT, T2);
3307         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3308       } else {
3309         _slt(DestT, Src0R, Src1R);
3310         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3311       }
3312       return;
3313     }
3314     case InstIcmp::Slt: {
3315       if (Src0Ty == IceType_i64) {
3316         auto *T1 = I32Reg();
3317         auto *T2 = I32Reg();
3318         auto *T3 = I32Reg();
3319         auto *T4 = I32Reg();
3320         auto *T5 = I32Reg();
3321         _xor(T1, Src0HiR, Src1HiR);
3322         _slt(T2, Src0HiR, Src1HiR);
3323         _xori(T3, T2, 1);
3324         _sltu(T4, Src0R, Src1R);
3325         _xori(T5, T4, 1);
3326         _movz(T3, T5, T1);
3327         _mov(DestT, T3);
3328         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3329       } else {
3330         _slt(DestT, Src0R, Src1R);
3331         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3332       }
3333       return;
3334     }
3335     case InstIcmp::Sle: {
3336       if (Src0Ty == IceType_i64) {
3337         auto *T1 = I32Reg();
3338         auto *T2 = I32Reg();
3339         auto *T3 = I32Reg();
3340         _xor(T1, Src0HiR, Src1HiR);
3341         _slt(T2, Src1HiR, Src0HiR);
3342         _sltu(T3, Src1R, Src0R);
3343         _movz(T2, T3, T1);
3344         _mov(DestT, T2);
3345         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3346       } else {
3347         _slt(DestT, Src1R, Src0R);
3348         _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3349       }
3350       return;
3351     }
3352     }
3353   }
3354 }
3355 
lowerCall(const InstCall * Instr)3356 void TargetMIPS32::lowerCall(const InstCall *Instr) {
3357   CfgVector<Variable *> RegArgs;
3358   NeedsStackAlignment = true;
3359 
3360   //  Assign arguments to registers and stack. Also reserve stack.
3361   TargetMIPS32::CallingConv CC;
3362 
3363   // Pair of Arg Operand -> GPR number assignments.
3364   llvm::SmallVector<std::pair<Operand *, RegNumT>, MIPS32_MAX_GPR_ARG> GPRArgs;
3365   llvm::SmallVector<std::pair<Operand *, RegNumT>, MIPS32_MAX_FP_ARG> FPArgs;
3366   // Pair of Arg Operand -> stack offset.
3367   llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;
3368   size_t ParameterAreaSizeBytes = 16;
3369 
3370   // Classify each argument operand according to the location where the
3371   // argument is passed.
3372 
3373   // v4f32 is returned through stack. $4 is setup by the caller and passed as
3374   // first argument implicitly. Callee then copies the return vector at $4.
3375   SizeT ArgNum = 0;
3376   Variable *Dest = Instr->getDest();
3377   Variable *RetVecFloat = nullptr;
3378   if (Dest && isVectorFloatingType(Dest->getType())) {
3379     ArgNum = 1;
3380     CC.discardReg(RegMIPS32::Reg_A0);
3381     RetVecFloat = Func->makeVariable(IceType_i32);
3382     auto *ByteCount = ConstantInteger32::create(Ctx, IceType_i32, 16);
3383     constexpr SizeT Alignment = 4;
3384     lowerAlloca(InstAlloca::create(Func, RetVecFloat, ByteCount, Alignment));
3385     RegArgs.emplace_back(
3386         legalizeToReg(RetVecFloat, RegNumT::fixme(RegMIPS32::Reg_A0)));
3387   }
3388 
3389   for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
3390     Operand *Arg = legalizeUndef(Instr->getArg(i));
3391     const Type Ty = Arg->getType();
3392     bool InReg = false;
3393     RegNumT Reg;
3394 
3395     InReg = CC.argInReg(Ty, i, &Reg);
3396 
3397     if (!InReg) {
3398       if (isVectorType(Ty)) {
3399         auto *ArgVec = llvm::cast<VariableVecOn32>(Arg);
3400         ParameterAreaSizeBytes =
3401             applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i64);
3402         for (Variable *Elem : ArgVec->getContainers()) {
3403           StackArgs.push_back(std::make_pair(Elem, ParameterAreaSizeBytes));
3404           ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32);
3405         }
3406       } else {
3407         ParameterAreaSizeBytes =
3408             applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
3409         StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
3410         ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty);
3411       }
3412       ++ArgNum;
3413       continue;
3414     }
3415 
3416     if (isVectorType(Ty)) {
3417       auto *ArgVec = llvm::cast<VariableVecOn32>(Arg);
3418       Operand *Elem0 = ArgVec->getContainers()[0];
3419       Operand *Elem1 = ArgVec->getContainers()[1];
3420       GPRArgs.push_back(
3421           std::make_pair(Elem0, RegNumT::fixme((unsigned)Reg + 0)));
3422       GPRArgs.push_back(
3423           std::make_pair(Elem1, RegNumT::fixme((unsigned)Reg + 1)));
3424       Operand *Elem2 = ArgVec->getContainers()[2];
3425       Operand *Elem3 = ArgVec->getContainers()[3];
3426       // First argument is passed in $4:$5:$6:$7
3427       // Second and rest arguments are passed in $6:$7:stack:stack
3428       if (ArgNum == 0) {
3429         GPRArgs.push_back(
3430             std::make_pair(Elem2, RegNumT::fixme((unsigned)Reg + 2)));
3431         GPRArgs.push_back(
3432             std::make_pair(Elem3, RegNumT::fixme((unsigned)Reg + 3)));
3433       } else {
3434         ParameterAreaSizeBytes =
3435             applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i64);
3436         StackArgs.push_back(std::make_pair(Elem2, ParameterAreaSizeBytes));
3437         ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32);
3438         StackArgs.push_back(std::make_pair(Elem3, ParameterAreaSizeBytes));
3439         ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32);
3440       }
3441     } else if (Ty == IceType_i64) {
3442       Operand *Lo = loOperand(Arg);
3443       Operand *Hi = hiOperand(Arg);
3444       GPRArgs.push_back(
3445           std::make_pair(Lo, RegMIPS32::get64PairFirstRegNum(Reg)));
3446       GPRArgs.push_back(
3447           std::make_pair(Hi, RegMIPS32::get64PairSecondRegNum(Reg)));
3448     } else if (isScalarIntegerType(Ty)) {
3449       GPRArgs.push_back(std::make_pair(Arg, Reg));
3450     } else {
3451       FPArgs.push_back(std::make_pair(Arg, Reg));
3452     }
3453     ++ArgNum;
3454   }
3455 
3456   // Adjust the parameter area so that the stack is aligned. It is assumed that
3457   // the stack is already aligned at the start of the calling sequence.
3458   ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
3459 
3460   // Copy arguments that are passed on the stack to the appropriate stack
3461   // locations.
3462   Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
3463   for (auto &StackArg : StackArgs) {
3464     ConstantInteger32 *Loc =
3465         llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
3466     Type Ty = StackArg.first->getType();
3467     OperandMIPS32Mem *Addr;
3468     constexpr bool SignExt = false;
3469     if (OperandMIPS32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) {
3470       Addr = OperandMIPS32Mem::create(Func, Ty, SP, Loc);
3471     } else {
3472       Variable *NewBase = Func->makeVariable(SP->getType());
3473       lowerArithmetic(
3474           InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc));
3475       Addr = formMemoryOperand(NewBase, Ty);
3476     }
3477     lowerStore(InstStore::create(Func, StackArg.first, Addr));
3478   }
3479 
3480   // Generate the call instruction.  Assign its result to a temporary with high
3481   // register allocation weight.
3482 
3483   // ReturnReg doubles as ReturnRegLo as necessary.
3484   Variable *ReturnReg = nullptr;
3485   Variable *ReturnRegHi = nullptr;
3486   if (Dest) {
3487     switch (Dest->getType()) {
3488     case IceType_NUM:
3489       llvm_unreachable("Invalid Call dest type");
3490       return;
3491     case IceType_void:
3492       break;
3493     case IceType_i1:
3494     case IceType_i8:
3495     case IceType_i16:
3496     case IceType_i32:
3497       ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_V0);
3498       break;
3499     case IceType_i64:
3500       ReturnReg = I32Reg(RegMIPS32::Reg_V0);
3501       ReturnRegHi = I32Reg(RegMIPS32::Reg_V1);
3502       break;
3503     case IceType_f32:
3504       ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_F0);
3505       break;
3506     case IceType_f64:
3507       ReturnReg = makeReg(IceType_f64, RegMIPS32::Reg_F0);
3508       break;
3509     case IceType_v4i1:
3510     case IceType_v8i1:
3511     case IceType_v16i1:
3512     case IceType_v16i8:
3513     case IceType_v8i16:
3514     case IceType_v4i32: {
3515       ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_V0);
3516       auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg);
3517       RetVec->initVecElement(Func);
3518       for (SizeT i = 0; i < RetVec->ContainersPerVector; ++i) {
3519         auto *Var = RetVec->getContainers()[i];
3520         Var->setRegNum(RegNumT::fixme(RegMIPS32::Reg_V0 + i));
3521       }
3522       break;
3523     }
3524     case IceType_v4f32:
3525       ReturnReg = makeReg(IceType_i32, RegMIPS32::Reg_V0);
3526       break;
3527     }
3528   }
3529   Operand *CallTarget = Instr->getCallTarget();
3530   // Allow ConstantRelocatable to be left alone as a direct call,
3531   // but force other constants like ConstantInteger32 to be in
3532   // a register and make it an indirect call.
3533   if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
3534     CallTarget = legalize(CallTarget, Legal_Reg);
3535   }
3536 
3537   // Copy arguments to be passed in registers to the appropriate registers.
3538   for (auto &FPArg : FPArgs) {
3539     RegArgs.emplace_back(legalizeToReg(FPArg.first, FPArg.second));
3540   }
3541   for (auto &GPRArg : GPRArgs) {
3542     RegArgs.emplace_back(legalizeToReg(GPRArg.first, GPRArg.second));
3543   }
3544 
3545   // Generate a FakeUse of register arguments so that they do not get dead code
3546   // eliminated as a result of the FakeKill of scratch registers after the call.
3547   // These fake-uses need to be placed here to avoid argument registers from
3548   // being used during the legalizeToReg() calls above.
3549   for (auto *RegArg : RegArgs) {
3550     Context.insert<InstFakeUse>(RegArg);
3551   }
3552 
3553   // If variable alloca is used the extra 16 bytes for argument build area
3554   // will be allocated on stack before a call.
3555   if (VariableAllocaUsed)
3556     Sandboxer(this).addiu_sp(-MaxOutArgsSizeBytes);
3557 
3558   Inst *NewCall;
3559 
3560   // We don't need to define the return register if it is a vector.
3561   // We have inserted fake defs of it just after the call.
3562   if (ReturnReg && isVectorIntegerType(ReturnReg->getType())) {
3563     Variable *RetReg = nullptr;
3564     NewCall = InstMIPS32Call::create(Func, RetReg, CallTarget);
3565     Context.insert(NewCall);
3566   } else {
3567     NewCall = Sandboxer(this, InstBundleLock::Opt_AlignToEnd)
3568                   .jal(ReturnReg, CallTarget);
3569   }
3570 
3571   if (VariableAllocaUsed)
3572     Sandboxer(this).addiu_sp(MaxOutArgsSizeBytes);
3573 
3574   // Insert a fake use of stack pointer to avoid dead code elimination of addiu
3575   // instruction.
3576   Context.insert<InstFakeUse>(SP);
3577 
3578   if (ReturnRegHi)
3579     Context.insert(InstFakeDef::create(Func, ReturnRegHi));
3580 
3581   if (ReturnReg) {
3582     if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) {
3583       for (Variable *Var : RetVec->getContainers()) {
3584         Context.insert(InstFakeDef::create(Func, Var));
3585       }
3586     }
3587   }
3588 
3589   // Insert a register-kill pseudo instruction.
3590   Context.insert(InstFakeKill::create(Func, NewCall));
3591 
3592   // Generate a FakeUse to keep the call live if necessary.
3593   if (Instr->hasSideEffects() && ReturnReg) {
3594     if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) {
3595       for (Variable *Var : RetVec->getContainers()) {
3596         Context.insert<InstFakeUse>(Var);
3597       }
3598     } else {
3599       Context.insert<InstFakeUse>(ReturnReg);
3600     }
3601   }
3602 
3603   if (Dest == nullptr)
3604     return;
3605 
3606   // Assign the result of the call to Dest.
3607   if (ReturnReg) {
3608     if (RetVecFloat) {
3609       auto *DestVecOn32 = llvm::cast<VariableVecOn32>(Dest);
3610       auto *TBase = legalizeToReg(RetVecFloat);
3611       for (SizeT i = 0; i < DestVecOn32->ContainersPerVector; ++i) {
3612         auto *Var = DestVecOn32->getContainers()[i];
3613         auto *TVar = makeReg(IceType_i32);
3614         OperandMIPS32Mem *Mem = OperandMIPS32Mem::create(
3615             Func, IceType_i32, TBase,
3616             llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(i * 4)));
3617         _lw(TVar, Mem);
3618         _mov(Var, TVar);
3619       }
3620     } else if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) {
3621       auto *DestVecOn32 = llvm::cast<VariableVecOn32>(Dest);
3622       for (SizeT i = 0; i < DestVecOn32->ContainersPerVector; ++i) {
3623         _mov(DestVecOn32->getContainers()[i], RetVec->getContainers()[i]);
3624       }
3625     } else if (ReturnRegHi) {
3626       assert(Dest->getType() == IceType_i64);
3627       auto *Dest64On32 = llvm::cast<Variable64On32>(Dest);
3628       Variable *DestLo = Dest64On32->getLo();
3629       Variable *DestHi = Dest64On32->getHi();
3630       _mov(DestLo, ReturnReg);
3631       _mov(DestHi, ReturnRegHi);
3632     } else {
3633       assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
3634              Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
3635              isScalarFloatingType(Dest->getType()) ||
3636              isVectorType(Dest->getType()));
3637       _mov(Dest, ReturnReg);
3638     }
3639   }
3640 }
3641 
lowerCast(const InstCast * Instr)3642 void TargetMIPS32::lowerCast(const InstCast *Instr) {
3643   InstCast::OpKind CastKind = Instr->getCastKind();
3644   Variable *Dest = Instr->getDest();
3645   Operand *Src0 = legalizeUndef(Instr->getSrc(0));
3646   const Type DestTy = Dest->getType();
3647   const Type Src0Ty = Src0->getType();
3648   const uint32_t ShiftAmount =
3649       (Src0Ty == IceType_i1
3650            ? INT32_BITS - 1
3651            : INT32_BITS - (CHAR_BITS * typeWidthInBytes(Src0Ty)));
3652   const uint32_t Mask =
3653       (Src0Ty == IceType_i1
3654            ? 1
3655            : (1 << (CHAR_BITS * typeWidthInBytes(Src0Ty))) - 1);
3656 
3657   if (isVectorType(DestTy)) {
3658     llvm::report_fatal_error("Cast: Destination type is vector");
3659     return;
3660   }
3661   switch (CastKind) {
3662   default:
3663     Func->setError("Cast type not supported");
3664     return;
3665   case InstCast::Sext: {
3666     if (DestTy == IceType_i64) {
3667       auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3668       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3669       Variable *Src0R = legalizeToReg(Src0);
3670       Variable *T1_Lo = I32Reg();
3671       Variable *T2_Lo = I32Reg();
3672       Variable *T_Hi = I32Reg();
3673       if (Src0Ty == IceType_i1) {
3674         _sll(T1_Lo, Src0R, INT32_BITS - 1);
3675         _sra(T2_Lo, T1_Lo, INT32_BITS - 1);
3676         _mov(DestHi, T2_Lo);
3677         _mov(DestLo, T2_Lo);
3678       } else if (Src0Ty == IceType_i8 || Src0Ty == IceType_i16) {
3679         _sll(T1_Lo, Src0R, ShiftAmount);
3680         _sra(T2_Lo, T1_Lo, ShiftAmount);
3681         _sra(T_Hi, T2_Lo, INT32_BITS - 1);
3682         _mov(DestHi, T_Hi);
3683         _mov(DestLo, T2_Lo);
3684       } else if (Src0Ty == IceType_i32) {
3685         _mov(T1_Lo, Src0R);
3686         _sra(T_Hi, T1_Lo, INT32_BITS - 1);
3687         _mov(DestHi, T_Hi);
3688         _mov(DestLo, T1_Lo);
3689       }
3690     } else {
3691       Variable *Src0R = legalizeToReg(Src0);
3692       Variable *T1 = makeReg(DestTy);
3693       Variable *T2 = makeReg(DestTy);
3694       if (Src0Ty == IceType_i1 || Src0Ty == IceType_i8 ||
3695           Src0Ty == IceType_i16) {
3696         _sll(T1, Src0R, ShiftAmount);
3697         _sra(T2, T1, ShiftAmount);
3698         _mov(Dest, T2);
3699       }
3700     }
3701     break;
3702   }
3703   case InstCast::Zext: {
3704     if (DestTy == IceType_i64) {
3705       auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3706       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3707       Variable *Src0R = legalizeToReg(Src0);
3708       Variable *T_Lo = I32Reg();
3709       Variable *T_Hi = I32Reg();
3710 
3711       if (Src0Ty == IceType_i1 || Src0Ty == IceType_i8 || Src0Ty == IceType_i16)
3712         _andi(T_Lo, Src0R, Mask);
3713       else if (Src0Ty == IceType_i32)
3714         _mov(T_Lo, Src0R);
3715       else
3716         assert(Src0Ty != IceType_i64);
3717       _mov(DestLo, T_Lo);
3718 
3719       auto *Zero = getZero();
3720       _addiu(T_Hi, Zero, 0);
3721       _mov(DestHi, T_Hi);
3722     } else {
3723       Variable *Src0R = legalizeToReg(Src0);
3724       Variable *T = makeReg(DestTy);
3725       if (Src0Ty == IceType_i1 || Src0Ty == IceType_i8 ||
3726           Src0Ty == IceType_i16) {
3727         _andi(T, Src0R, Mask);
3728         _mov(Dest, T);
3729       }
3730     }
3731     break;
3732   }
3733   case InstCast::Trunc: {
3734     if (Src0Ty == IceType_i64)
3735       Src0 = loOperand(Src0);
3736     Variable *Src0R = legalizeToReg(Src0);
3737     Variable *T = makeReg(DestTy);
3738     switch (DestTy) {
3739     case IceType_i1:
3740       _andi(T, Src0R, 0x1);
3741       break;
3742     case IceType_i8:
3743       _andi(T, Src0R, 0xff);
3744       break;
3745     case IceType_i16:
3746       _andi(T, Src0R, 0xffff);
3747       break;
3748     default:
3749       _mov(T, Src0R);
3750       break;
3751     }
3752     _mov(Dest, T);
3753     break;
3754   }
3755   case InstCast::Fptrunc: {
3756     assert(Dest->getType() == IceType_f32);
3757     assert(Src0->getType() == IceType_f64);
3758     auto *DestR = legalizeToReg(Dest);
3759     auto *Src0R = legalizeToReg(Src0);
3760     _cvt_s_d(DestR, Src0R);
3761     _mov(Dest, DestR);
3762     break;
3763   }
3764   case InstCast::Fpext: {
3765     assert(Dest->getType() == IceType_f64);
3766     assert(Src0->getType() == IceType_f32);
3767     auto *DestR = legalizeToReg(Dest);
3768     auto *Src0R = legalizeToReg(Src0);
3769     _cvt_d_s(DestR, Src0R);
3770     _mov(Dest, DestR);
3771     break;
3772   }
3773   case InstCast::Fptosi:
3774   case InstCast::Fptoui: {
3775     if (llvm::isa<Variable64On32>(Dest)) {
3776       llvm::report_fatal_error("fp-to-i64 should have been prelowered.");
3777       return;
3778     }
3779     if (DestTy != IceType_i64) {
3780       if (Src0Ty == IceType_f32 && isScalarIntegerType(DestTy)) {
3781         Variable *Src0R = legalizeToReg(Src0);
3782         Variable *FTmp = makeReg(IceType_f32);
3783         _trunc_w_s(FTmp, Src0R);
3784         _mov(Dest, FTmp);
3785         return;
3786       }
3787       if (Src0Ty == IceType_f64 && isScalarIntegerType(DestTy)) {
3788         Variable *Src0R = legalizeToReg(Src0);
3789         Variable *FTmp = makeReg(IceType_f64);
3790         _trunc_w_d(FTmp, Src0R);
3791         _mov(Dest, FTmp);
3792         return;
3793       }
3794     }
3795     llvm::report_fatal_error("Destination is i64 in fp-to-i32");
3796     break;
3797   }
3798   case InstCast::Sitofp:
3799   case InstCast::Uitofp: {
3800     if (llvm::isa<Variable64On32>(Dest)) {
3801       llvm::report_fatal_error("i64-to-fp should have been prelowered.");
3802       return;
3803     }
3804     if (Src0Ty != IceType_i64) {
3805       Variable *Src0R = legalizeToReg(Src0);
3806       auto *T0R = Src0R;
3807       if (Src0Ty != IceType_i32) {
3808         T0R = makeReg(IceType_i32);
3809         if (CastKind == InstCast::Uitofp)
3810           lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
3811         else
3812           lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
3813       }
3814       if (isScalarIntegerType(Src0Ty) && DestTy == IceType_f32) {
3815         Variable *FTmp1 = makeReg(IceType_f32);
3816         Variable *FTmp2 = makeReg(IceType_f32);
3817         _mtc1(FTmp1, T0R);
3818         _cvt_s_w(FTmp2, FTmp1);
3819         _mov(Dest, FTmp2);
3820         return;
3821       }
3822       if (isScalarIntegerType(Src0Ty) && DestTy == IceType_f64) {
3823         Variable *FTmp1 = makeReg(IceType_f64);
3824         Variable *FTmp2 = makeReg(IceType_f64);
3825         _mtc1(FTmp1, T0R);
3826         _cvt_d_w(FTmp2, FTmp1);
3827         _mov(Dest, FTmp2);
3828         return;
3829       }
3830     }
3831     llvm::report_fatal_error("Source is i64 in i32-to-fp");
3832     break;
3833   }
3834   case InstCast::Bitcast: {
3835     Operand *Src0 = Instr->getSrc(0);
3836     if (DestTy == Src0->getType()) {
3837       auto *Assign = InstAssign::create(Func, Dest, Src0);
3838       lowerAssign(Assign);
3839       return;
3840     }
3841     if (isVectorType(DestTy) || isVectorType(Src0->getType())) {
3842       llvm::report_fatal_error(
3843           "Bitcast: vector type should have been prelowered.");
3844       return;
3845     }
3846     switch (DestTy) {
3847     case IceType_NUM:
3848     case IceType_void:
3849       llvm::report_fatal_error("Unexpected bitcast.");
3850     case IceType_i1:
3851       UnimplementedLoweringError(this, Instr);
3852       break;
3853     case IceType_i8:
3854       assert(Src0->getType() == IceType_v8i1);
3855       llvm::report_fatal_error(
3856           "i8 to v8i1 conversion should have been prelowered.");
3857       break;
3858     case IceType_i16:
3859       assert(Src0->getType() == IceType_v16i1);
3860       llvm::report_fatal_error(
3861           "i16 to v16i1 conversion should have been prelowered.");
3862       break;
3863     case IceType_i32:
3864     case IceType_f32: {
3865       Variable *Src0R = legalizeToReg(Src0);
3866       _mov(Dest, Src0R);
3867       break;
3868     }
3869     case IceType_i64: {
3870       assert(Src0->getType() == IceType_f64);
3871       Variable *Src0R = legalizeToReg(Src0);
3872       auto *T = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
3873       T->initHiLo(Func);
3874       T->getHi()->setMustNotHaveReg();
3875       T->getLo()->setMustNotHaveReg();
3876       Context.insert<InstFakeDef>(T->getHi());
3877       Context.insert<InstFakeDef>(T->getLo());
3878       _mov_fp64_to_i64(T->getHi(), Src0R, Int64_Hi);
3879       _mov_fp64_to_i64(T->getLo(), Src0R, Int64_Lo);
3880       lowerAssign(InstAssign::create(Func, Dest, T));
3881       break;
3882     }
3883     case IceType_f64: {
3884       assert(Src0->getType() == IceType_i64);
3885       const uint32_t Mask = 0xFFFFFFFF;
3886       if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src0)) {
3887         Variable *RegHi, *RegLo;
3888         const uint64_t Value = C64->getValue();
3889         uint64_t Upper32Bits = (Value >> INT32_BITS) & Mask;
3890         uint64_t Lower32Bits = Value & Mask;
3891         RegLo = legalizeToReg(Ctx->getConstantInt32(Lower32Bits));
3892         RegHi = legalizeToReg(Ctx->getConstantInt32(Upper32Bits));
3893         _mov(Dest, RegHi, RegLo);
3894       } else {
3895         auto *Var64On32 = llvm::cast<Variable64On32>(Src0);
3896         auto *RegLo = legalizeToReg(loOperand(Var64On32));
3897         auto *RegHi = legalizeToReg(hiOperand(Var64On32));
3898         _mov(Dest, RegHi, RegLo);
3899       }
3900       break;
3901     }
3902     default:
3903       llvm::report_fatal_error("Unexpected bitcast.");
3904     }
3905     break;
3906   }
3907   }
3908 }
3909 
lowerExtractElement(const InstExtractElement * Instr)3910 void TargetMIPS32::lowerExtractElement(const InstExtractElement *Instr) {
3911   Variable *Dest = Instr->getDest();
3912   const Type DestTy = Dest->getType();
3913   Operand *Src1 = Instr->getSrc(1);
3914   if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src1)) {
3915     const uint32_t Index = Imm->getValue();
3916     Variable *TDest = makeReg(DestTy);
3917     Variable *TReg = makeReg(DestTy);
3918     auto *Src0 = legalizeUndef(Instr->getSrc(0));
3919     auto *Src0R = llvm::dyn_cast<VariableVecOn32>(Src0);
3920     // Number of elements in each container
3921     uint32_t ElemPerCont =
3922         typeNumElements(Src0->getType()) / Src0R->ContainersPerVector;
3923     auto *Src = Src0R->getContainers()[Index / ElemPerCont];
3924     auto *SrcE = legalizeToReg(Src);
3925     // Position of the element in the container
3926     uint32_t PosInCont = Index % ElemPerCont;
3927     if (ElemPerCont == 1) {
3928       _mov(TDest, SrcE);
3929     } else if (ElemPerCont == 2) {
3930       switch (PosInCont) {
3931       case 0:
3932         _andi(TDest, SrcE, 0xffff);
3933         break;
3934       case 1:
3935         _srl(TDest, SrcE, 16);
3936         break;
3937       default:
3938         llvm::report_fatal_error("ExtractElement: Invalid PosInCont");
3939         break;
3940       }
3941     } else if (ElemPerCont == 4) {
3942       switch (PosInCont) {
3943       case 0:
3944         _andi(TDest, SrcE, 0xff);
3945         break;
3946       case 1:
3947         _srl(TReg, SrcE, 8);
3948         _andi(TDest, TReg, 0xff);
3949         break;
3950       case 2:
3951         _srl(TReg, SrcE, 16);
3952         _andi(TDest, TReg, 0xff);
3953         break;
3954       case 3:
3955         _srl(TDest, SrcE, 24);
3956         break;
3957       default:
3958         llvm::report_fatal_error("ExtractElement: Invalid PosInCont");
3959         break;
3960       }
3961     }
3962     if (typeElementType(Src0R->getType()) == IceType_i1) {
3963       Variable *TReg1 = makeReg(DestTy);
3964       _andi(TReg1, TDest, 0x1);
3965       _mov(Dest, TReg1);
3966     } else {
3967       _mov(Dest, TDest);
3968     }
3969     return;
3970   }
3971   llvm::report_fatal_error("ExtractElement requires a constant index");
3972 }
3973 
lowerFcmp(const InstFcmp * Instr)3974 void TargetMIPS32::lowerFcmp(const InstFcmp *Instr) {
3975   Variable *Dest = Instr->getDest();
3976   if (isVectorType(Dest->getType())) {
3977     llvm::report_fatal_error("Fcmp: Destination type is vector");
3978     return;
3979   }
3980 
3981   auto *Src0 = Instr->getSrc(0);
3982   auto *Src1 = Instr->getSrc(1);
3983   auto *Zero = getZero();
3984 
3985   InstFcmp::FCond Cond = Instr->getCondition();
3986   auto *DestR = makeReg(IceType_i32);
3987   auto *Src0R = legalizeToReg(Src0);
3988   auto *Src1R = legalizeToReg(Src1);
3989   const Type Src0Ty = Src0->getType();
3990 
3991   Operand *FCC0 = OperandMIPS32FCC::create(getFunc(), OperandMIPS32FCC::FCC0);
3992 
3993   switch (Cond) {
3994   default: {
3995     llvm::report_fatal_error("Unhandled fp comparison.");
3996     return;
3997   }
3998   case InstFcmp::False: {
3999     Context.insert<InstFakeUse>(Src0R);
4000     Context.insert<InstFakeUse>(Src1R);
4001     _addiu(DestR, Zero, 0);
4002     _mov(Dest, DestR);
4003     break;
4004   }
4005   case InstFcmp::Oeq: {
4006     if (Src0Ty == IceType_f32) {
4007       _c_eq_s(Src0R, Src1R);
4008     } else {
4009       _c_eq_d(Src0R, Src1R);
4010     }
4011     _addiu(DestR, Zero, 1);
4012     _movf(DestR, Zero, FCC0);
4013     _mov(Dest, DestR);
4014     break;
4015   }
4016   case InstFcmp::Ogt: {
4017     if (Src0Ty == IceType_f32) {
4018       _c_ule_s(Src0R, Src1R);
4019     } else {
4020       _c_ule_d(Src0R, Src1R);
4021     }
4022     _addiu(DestR, Zero, 1);
4023     _movt(DestR, Zero, FCC0);
4024     _mov(Dest, DestR);
4025     break;
4026   }
4027   case InstFcmp::Oge: {
4028     if (Src0Ty == IceType_f32) {
4029       _c_ult_s(Src0R, Src1R);
4030     } else {
4031       _c_ult_d(Src0R, Src1R);
4032     }
4033     _addiu(DestR, Zero, 1);
4034     _movt(DestR, Zero, FCC0);
4035     _mov(Dest, DestR);
4036     break;
4037   }
4038   case InstFcmp::Olt: {
4039     if (Src0Ty == IceType_f32) {
4040       _c_olt_s(Src0R, Src1R);
4041     } else {
4042       _c_olt_d(Src0R, Src1R);
4043     }
4044     _addiu(DestR, Zero, 1);
4045     _movf(DestR, Zero, FCC0);
4046     _mov(Dest, DestR);
4047     break;
4048   }
4049   case InstFcmp::Ole: {
4050     if (Src0Ty == IceType_f32) {
4051       _c_ole_s(Src0R, Src1R);
4052     } else {
4053       _c_ole_d(Src0R, Src1R);
4054     }
4055     _addiu(DestR, Zero, 1);
4056     _movf(DestR, Zero, FCC0);
4057     _mov(Dest, DestR);
4058     break;
4059   }
4060   case InstFcmp::One: {
4061     if (Src0Ty == IceType_f32) {
4062       _c_ueq_s(Src0R, Src1R);
4063     } else {
4064       _c_ueq_d(Src0R, Src1R);
4065     }
4066     _addiu(DestR, Zero, 1);
4067     _movt(DestR, Zero, FCC0);
4068     _mov(Dest, DestR);
4069     break;
4070   }
4071   case InstFcmp::Ord: {
4072     if (Src0Ty == IceType_f32) {
4073       _c_un_s(Src0R, Src1R);
4074     } else {
4075       _c_un_d(Src0R, Src1R);
4076     }
4077     _addiu(DestR, Zero, 1);
4078     _movt(DestR, Zero, FCC0);
4079     _mov(Dest, DestR);
4080     break;
4081   }
4082   case InstFcmp::Ueq: {
4083     if (Src0Ty == IceType_f32) {
4084       _c_ueq_s(Src0R, Src1R);
4085     } else {
4086       _c_ueq_d(Src0R, Src1R);
4087     }
4088     _addiu(DestR, Zero, 1);
4089     _movf(DestR, Zero, FCC0);
4090     _mov(Dest, DestR);
4091     break;
4092   }
4093   case InstFcmp::Ugt: {
4094     if (Src0Ty == IceType_f32) {
4095       _c_ole_s(Src0R, Src1R);
4096     } else {
4097       _c_ole_d(Src0R, Src1R);
4098     }
4099     _addiu(DestR, Zero, 1);
4100     _movt(DestR, Zero, FCC0);
4101     _mov(Dest, DestR);
4102     break;
4103   }
4104   case InstFcmp::Uge: {
4105     if (Src0Ty == IceType_f32) {
4106       _c_olt_s(Src0R, Src1R);
4107     } else {
4108       _c_olt_d(Src0R, Src1R);
4109     }
4110     _addiu(DestR, Zero, 1);
4111     _movt(DestR, Zero, FCC0);
4112     _mov(Dest, DestR);
4113     break;
4114   }
4115   case InstFcmp::Ult: {
4116     if (Src0Ty == IceType_f32) {
4117       _c_ult_s(Src0R, Src1R);
4118     } else {
4119       _c_ult_d(Src0R, Src1R);
4120     }
4121     _addiu(DestR, Zero, 1);
4122     _movf(DestR, Zero, FCC0);
4123     _mov(Dest, DestR);
4124     break;
4125   }
4126   case InstFcmp::Ule: {
4127     if (Src0Ty == IceType_f32) {
4128       _c_ule_s(Src0R, Src1R);
4129     } else {
4130       _c_ule_d(Src0R, Src1R);
4131     }
4132     _addiu(DestR, Zero, 1);
4133     _movf(DestR, Zero, FCC0);
4134     _mov(Dest, DestR);
4135     break;
4136   }
4137   case InstFcmp::Une: {
4138     if (Src0Ty == IceType_f32) {
4139       _c_eq_s(Src0R, Src1R);
4140     } else {
4141       _c_eq_d(Src0R, Src1R);
4142     }
4143     _addiu(DestR, Zero, 1);
4144     _movt(DestR, Zero, FCC0);
4145     _mov(Dest, DestR);
4146     break;
4147   }
4148   case InstFcmp::Uno: {
4149     if (Src0Ty == IceType_f32) {
4150       _c_un_s(Src0R, Src1R);
4151     } else {
4152       _c_un_d(Src0R, Src1R);
4153     }
4154     _addiu(DestR, Zero, 1);
4155     _movf(DestR, Zero, FCC0);
4156     _mov(Dest, DestR);
4157     break;
4158   }
4159   case InstFcmp::True: {
4160     Context.insert<InstFakeUse>(Src0R);
4161     Context.insert<InstFakeUse>(Src1R);
4162     _addiu(DestR, Zero, 1);
4163     _mov(Dest, DestR);
4164     break;
4165   }
4166   }
4167 }
4168 
lower64Icmp(const InstIcmp * Instr)4169 void TargetMIPS32::lower64Icmp(const InstIcmp *Instr) {
4170   Operand *Src0 = legalize(Instr->getSrc(0));
4171   Operand *Src1 = legalize(Instr->getSrc(1));
4172   Variable *Dest = Instr->getDest();
4173   InstIcmp::ICond Condition = Instr->getCondition();
4174 
4175   Variable *Src0LoR = legalizeToReg(loOperand(Src0));
4176   Variable *Src0HiR = legalizeToReg(hiOperand(Src0));
4177   Variable *Src1LoR = legalizeToReg(loOperand(Src1));
4178   Variable *Src1HiR = legalizeToReg(hiOperand(Src1));
4179 
4180   switch (Condition) {
4181   default:
4182     llvm_unreachable("unexpected condition");
4183     return;
4184   case InstIcmp::Eq: {
4185     auto *T1 = I32Reg();
4186     auto *T2 = I32Reg();
4187     auto *T3 = I32Reg();
4188     auto *T4 = I32Reg();
4189     _xor(T1, Src0HiR, Src1HiR);
4190     _xor(T2, Src0LoR, Src1LoR);
4191     _or(T3, T1, T2);
4192     _sltiu(T4, T3, 1);
4193     _mov(Dest, T4);
4194     return;
4195   }
4196   case InstIcmp::Ne: {
4197     auto *T1 = I32Reg();
4198     auto *T2 = I32Reg();
4199     auto *T3 = I32Reg();
4200     auto *T4 = I32Reg();
4201     _xor(T1, Src0HiR, Src1HiR);
4202     _xor(T2, Src0LoR, Src1LoR);
4203     _or(T3, T1, T2);
4204     _sltu(T4, getZero(), T3);
4205     _mov(Dest, T4);
4206     return;
4207   }
4208   case InstIcmp::Sgt: {
4209     auto *T1 = I32Reg();
4210     auto *T2 = I32Reg();
4211     auto *T3 = I32Reg();
4212     _xor(T1, Src0HiR, Src1HiR);
4213     _slt(T2, Src1HiR, Src0HiR);
4214     _sltu(T3, Src1LoR, Src0LoR);
4215     _movz(T2, T3, T1);
4216     _mov(Dest, T2);
4217     return;
4218   }
4219   case InstIcmp::Ugt: {
4220     auto *T1 = I32Reg();
4221     auto *T2 = I32Reg();
4222     auto *T3 = I32Reg();
4223     _xor(T1, Src0HiR, Src1HiR);
4224     _sltu(T2, Src1HiR, Src0HiR);
4225     _sltu(T3, Src1LoR, Src0LoR);
4226     _movz(T2, T3, T1);
4227     _mov(Dest, T2);
4228     return;
4229   }
4230   case InstIcmp::Sge: {
4231     auto *T1 = I32Reg();
4232     auto *T2 = I32Reg();
4233     auto *T3 = I32Reg();
4234     auto *T4 = I32Reg();
4235     auto *T5 = I32Reg();
4236     _xor(T1, Src0HiR, Src1HiR);
4237     _slt(T2, Src0HiR, Src1HiR);
4238     _xori(T3, T2, 1);
4239     _sltu(T4, Src0LoR, Src1LoR);
4240     _xori(T5, T4, 1);
4241     _movz(T3, T5, T1);
4242     _mov(Dest, T3);
4243     return;
4244   }
4245   case InstIcmp::Uge: {
4246     auto *T1 = I32Reg();
4247     auto *T2 = I32Reg();
4248     auto *T3 = I32Reg();
4249     auto *T4 = I32Reg();
4250     auto *T5 = I32Reg();
4251     _xor(T1, Src0HiR, Src1HiR);
4252     _sltu(T2, Src0HiR, Src1HiR);
4253     _xori(T3, T2, 1);
4254     _sltu(T4, Src0LoR, Src1LoR);
4255     _xori(T5, T4, 1);
4256     _movz(T3, T5, T1);
4257     _mov(Dest, T3);
4258     return;
4259   }
4260   case InstIcmp::Slt: {
4261     auto *T1 = I32Reg();
4262     auto *T2 = I32Reg();
4263     auto *T3 = I32Reg();
4264     _xor(T1, Src0HiR, Src1HiR);
4265     _slt(T2, Src0HiR, Src1HiR);
4266     _sltu(T3, Src0LoR, Src1LoR);
4267     _movz(T2, T3, T1);
4268     _mov(Dest, T2);
4269     return;
4270   }
4271   case InstIcmp::Ult: {
4272     auto *T1 = I32Reg();
4273     auto *T2 = I32Reg();
4274     auto *T3 = I32Reg();
4275     _xor(T1, Src0HiR, Src1HiR);
4276     _sltu(T2, Src0HiR, Src1HiR);
4277     _sltu(T3, Src0LoR, Src1LoR);
4278     _movz(T2, T3, T1);
4279     _mov(Dest, T2);
4280     return;
4281   }
4282   case InstIcmp::Sle: {
4283     auto *T1 = I32Reg();
4284     auto *T2 = I32Reg();
4285     auto *T3 = I32Reg();
4286     auto *T4 = I32Reg();
4287     auto *T5 = I32Reg();
4288     _xor(T1, Src0HiR, Src1HiR);
4289     _slt(T2, Src1HiR, Src0HiR);
4290     _xori(T3, T2, 1);
4291     _sltu(T4, Src1LoR, Src0LoR);
4292     _xori(T5, T4, 1);
4293     _movz(T3, T5, T1);
4294     _mov(Dest, T3);
4295     return;
4296   }
4297   case InstIcmp::Ule: {
4298     auto *T1 = I32Reg();
4299     auto *T2 = I32Reg();
4300     auto *T3 = I32Reg();
4301     auto *T4 = I32Reg();
4302     auto *T5 = I32Reg();
4303     _xor(T1, Src0HiR, Src1HiR);
4304     _sltu(T2, Src1HiR, Src0HiR);
4305     _xori(T3, T2, 1);
4306     _sltu(T4, Src1LoR, Src0LoR);
4307     _xori(T5, T4, 1);
4308     _movz(T3, T5, T1);
4309     _mov(Dest, T3);
4310     return;
4311   }
4312   }
4313 }
4314 
lowerIcmp(const InstIcmp * Instr)4315 void TargetMIPS32::lowerIcmp(const InstIcmp *Instr) {
4316   auto *Src0 = Instr->getSrc(0);
4317   auto *Src1 = Instr->getSrc(1);
4318   if (Src0->getType() == IceType_i64) {
4319     lower64Icmp(Instr);
4320     return;
4321   }
4322   Variable *Dest = Instr->getDest();
4323   if (isVectorType(Dest->getType())) {
4324     llvm::report_fatal_error("Icmp: Destination type is vector");
4325     return;
4326   }
4327   InstIcmp::ICond Cond = Instr->getCondition();
4328   auto *Src0R = legalizeToReg(Src0);
4329   auto *Src1R = legalizeToReg(Src1);
4330   const Type Src0Ty = Src0R->getType();
4331   const uint32_t ShAmt = INT32_BITS - getScalarIntBitWidth(Src0->getType());
4332   Variable *Src0RT = I32Reg();
4333   Variable *Src1RT = I32Reg();
4334 
4335   if (Src0Ty != IceType_i32) {
4336     _sll(Src0RT, Src0R, ShAmt);
4337     _sll(Src1RT, Src1R, ShAmt);
4338   } else {
4339     _mov(Src0RT, Src0R);
4340     _mov(Src1RT, Src1R);
4341   }
4342 
4343   switch (Cond) {
4344   case InstIcmp::Eq: {
4345     auto *DestT = I32Reg();
4346     auto *T = I32Reg();
4347     _xor(T, Src0RT, Src1RT);
4348     _sltiu(DestT, T, 1);
4349     _mov(Dest, DestT);
4350     return;
4351   }
4352   case InstIcmp::Ne: {
4353     auto *DestT = I32Reg();
4354     auto *T = I32Reg();
4355     auto *Zero = getZero();
4356     _xor(T, Src0RT, Src1RT);
4357     _sltu(DestT, Zero, T);
4358     _mov(Dest, DestT);
4359     return;
4360   }
4361   case InstIcmp::Ugt: {
4362     auto *DestT = I32Reg();
4363     _sltu(DestT, Src1RT, Src0RT);
4364     _mov(Dest, DestT);
4365     return;
4366   }
4367   case InstIcmp::Uge: {
4368     auto *DestT = I32Reg();
4369     auto *T = I32Reg();
4370     _sltu(T, Src0RT, Src1RT);
4371     _xori(DestT, T, 1);
4372     _mov(Dest, DestT);
4373     return;
4374   }
4375   case InstIcmp::Ult: {
4376     auto *DestT = I32Reg();
4377     _sltu(DestT, Src0RT, Src1RT);
4378     _mov(Dest, DestT);
4379     return;
4380   }
4381   case InstIcmp::Ule: {
4382     auto *DestT = I32Reg();
4383     auto *T = I32Reg();
4384     _sltu(T, Src1RT, Src0RT);
4385     _xori(DestT, T, 1);
4386     _mov(Dest, DestT);
4387     return;
4388   }
4389   case InstIcmp::Sgt: {
4390     auto *DestT = I32Reg();
4391     _slt(DestT, Src1RT, Src0RT);
4392     _mov(Dest, DestT);
4393     return;
4394   }
4395   case InstIcmp::Sge: {
4396     auto *DestT = I32Reg();
4397     auto *T = I32Reg();
4398     _slt(T, Src0RT, Src1RT);
4399     _xori(DestT, T, 1);
4400     _mov(Dest, DestT);
4401     return;
4402   }
4403   case InstIcmp::Slt: {
4404     auto *DestT = I32Reg();
4405     _slt(DestT, Src0RT, Src1RT);
4406     _mov(Dest, DestT);
4407     return;
4408   }
4409   case InstIcmp::Sle: {
4410     auto *DestT = I32Reg();
4411     auto *T = I32Reg();
4412     _slt(T, Src1RT, Src0RT);
4413     _xori(DestT, T, 1);
4414     _mov(Dest, DestT);
4415     return;
4416   }
4417   default:
4418     llvm_unreachable("Invalid ICmp operator");
4419     return;
4420   }
4421 }
4422 
lowerInsertElement(const InstInsertElement * Instr)4423 void TargetMIPS32::lowerInsertElement(const InstInsertElement *Instr) {
4424   Variable *Dest = Instr->getDest();
4425   const Type DestTy = Dest->getType();
4426   Operand *Src2 = Instr->getSrc(2);
4427   if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src2)) {
4428     const uint32_t Index = Imm->getValue();
4429     // Vector to insert in
4430     auto *Src0 = legalizeUndef(Instr->getSrc(0));
4431     auto *Src0R = llvm::dyn_cast<VariableVecOn32>(Src0);
4432     // Number of elements in each container
4433     uint32_t ElemPerCont =
4434         typeNumElements(Src0->getType()) / Src0R->ContainersPerVector;
4435     // Source Element
4436     auto *Src = Src0R->getContainers()[Index / ElemPerCont];
4437     auto *SrcE = Src;
4438     if (ElemPerCont > 1)
4439       SrcE = legalizeToReg(Src);
4440     // Dest is a vector
4441     auto *VDest = llvm::dyn_cast<VariableVecOn32>(Dest);
4442     VDest->initVecElement(Func);
4443     // Temp vector variable
4444     auto *TDest = makeReg(DestTy);
4445     auto *TVDest = llvm::dyn_cast<VariableVecOn32>(TDest);
4446     TVDest->initVecElement(Func);
4447     // Destination element
4448     auto *DstE = TVDest->getContainers()[Index / ElemPerCont];
4449     // Element to insert
4450     auto *Src1R = legalizeToReg(Instr->getSrc(1));
4451     auto *TReg1 = makeReg(IceType_i32);
4452     auto *TReg2 = makeReg(IceType_i32);
4453     auto *TReg3 = makeReg(IceType_i32);
4454     auto *TReg4 = makeReg(IceType_i32);
4455     auto *TReg5 = makeReg(IceType_i32);
4456     auto *TDReg = makeReg(IceType_i32);
4457     // Position of the element in the container
4458     uint32_t PosInCont = Index % ElemPerCont;
4459     // Load source vector in a temporary vector
4460     for (SizeT i = 0; i < TVDest->ContainersPerVector; ++i) {
4461       auto *DCont = TVDest->getContainers()[i];
4462       // Do not define DstE as we are going to redefine it
4463       if (DCont == DstE)
4464         continue;
4465       auto *SCont = Src0R->getContainers()[i];
4466       auto *TReg = makeReg(IceType_i32);
4467       _mov(TReg, SCont);
4468       _mov(DCont, TReg);
4469     }
4470     // Insert the element
4471     if (ElemPerCont == 1) {
4472       _mov(DstE, Src1R);
4473     } else if (ElemPerCont == 2) {
4474       switch (PosInCont) {
4475       case 0:
4476         _andi(TReg1, Src1R, 0xffff); // Clear upper 16-bits of source
4477         _srl(TReg2, SrcE, 16);
4478         _sll(TReg3, TReg2, 16); // Clear lower 16-bits of element
4479         _or(TDReg, TReg1, TReg3);
4480         _mov(DstE, TDReg);
4481         break;
4482       case 1:
4483         _sll(TReg1, Src1R, 16); // Clear lower 16-bits  of source
4484         _sll(TReg2, SrcE, 16);
4485         _srl(TReg3, TReg2, 16); // Clear upper 16-bits of element
4486         _or(TDReg, TReg1, TReg3);
4487         _mov(DstE, TDReg);
4488         break;
4489       default:
4490         llvm::report_fatal_error("InsertElement: Invalid PosInCont");
4491         break;
4492       }
4493     } else if (ElemPerCont == 4) {
4494       switch (PosInCont) {
4495       case 0:
4496         _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source
4497         _srl(TReg2, SrcE, 8);
4498         _sll(TReg3, TReg2, 8); // Clear bits[7:0] of element
4499         _or(TDReg, TReg1, TReg3);
4500         _mov(DstE, TDReg);
4501         break;
4502       case 1:
4503         _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source
4504         _sll(TReg5, TReg1, 8);     // Position in the destination
4505         _lui(TReg2, Ctx->getConstantInt32(0xffff));
4506         _ori(TReg3, TReg2, 0x00ff);
4507         _and(TReg4, SrcE, TReg3); // Clear bits[15:8] of element
4508         _or(TDReg, TReg5, TReg4);
4509         _mov(DstE, TDReg);
4510         break;
4511       case 2:
4512         _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source
4513         _sll(TReg5, TReg1, 16);    // Position in the destination
4514         _lui(TReg2, Ctx->getConstantInt32(0xff00));
4515         _ori(TReg3, TReg2, 0xffff);
4516         _and(TReg4, SrcE, TReg3); // Clear bits[15:8] of element
4517         _or(TDReg, TReg5, TReg4);
4518         _mov(DstE, TDReg);
4519         break;
4520       case 3:
4521         _sll(TReg1, Src1R, 24); // Position in the destination
4522         _sll(TReg2, SrcE, 8);
4523         _srl(TReg3, TReg2, 8); // Clear bits[31:24] of element
4524         _or(TDReg, TReg1, TReg3);
4525         _mov(DstE, TDReg);
4526         break;
4527       default:
4528         llvm::report_fatal_error("InsertElement: Invalid PosInCont");
4529         break;
4530       }
4531     }
4532     // Write back temporary vector to the destination
4533     auto *Assign = InstAssign::create(Func, Dest, TDest);
4534     lowerAssign(Assign);
4535     return;
4536   }
4537   llvm::report_fatal_error("InsertElement requires a constant index");
4538 }
4539 
createArithInst(Intrinsics::AtomicRMWOperation Operation,Variable * Dest,Variable * Src0,Variable * Src1)4540 void TargetMIPS32::createArithInst(Intrinsics::AtomicRMWOperation Operation,
4541                                    Variable *Dest, Variable *Src0,
4542                                    Variable *Src1) {
4543   switch (Operation) {
4544   default:
4545     llvm::report_fatal_error("Unknown AtomicRMW operation");
4546   case Intrinsics::AtomicExchange:
4547     llvm::report_fatal_error("Can't handle Atomic xchg operation");
4548   case Intrinsics::AtomicAdd:
4549     _addu(Dest, Src0, Src1);
4550     break;
4551   case Intrinsics::AtomicAnd:
4552     _and(Dest, Src0, Src1);
4553     break;
4554   case Intrinsics::AtomicSub:
4555     _subu(Dest, Src0, Src1);
4556     break;
4557   case Intrinsics::AtomicOr:
4558     _or(Dest, Src0, Src1);
4559     break;
4560   case Intrinsics::AtomicXor:
4561     _xor(Dest, Src0, Src1);
4562     break;
4563   }
4564 }
4565 
lowerIntrinsicCall(const InstIntrinsicCall * Instr)4566 void TargetMIPS32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
4567   Variable *Dest = Instr->getDest();
4568   Type DestTy = (Dest == nullptr) ? IceType_void : Dest->getType();
4569 
4570   Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID;
4571   switch (ID) {
4572   case Intrinsics::AtomicLoad: {
4573     assert(isScalarIntegerType(DestTy));
4574     // We require the memory address to be naturally aligned. Given that is the
4575     // case, then normal loads are atomic.
4576     if (!Intrinsics::isMemoryOrderValid(
4577             ID, getConstantMemoryOrder(Instr->getArg(1)))) {
4578       Func->setError("Unexpected memory ordering for AtomicLoad");
4579       return;
4580     }
4581     if (DestTy == IceType_i64) {
4582       llvm::report_fatal_error("AtomicLoad.i64 should have been prelowered.");
4583       return;
4584     } else if (DestTy == IceType_i32) {
4585       auto *T1 = makeReg(DestTy);
4586       auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4587       auto *Base = legalizeToReg(Instr->getArg(0));
4588       auto *Addr = formMemoryOperand(Base, DestTy);
4589       InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4590       InstMIPS32Label *Exit = InstMIPS32Label::create(Func, this);
4591       constexpr CfgNode *NoTarget = nullptr;
4592       _sync();
4593       Context.insert(Retry);
4594       Sandboxer(this).ll(T1, Addr);
4595       _br(NoTarget, NoTarget, T1, getZero(), Exit, CondMIPS32::Cond::NE);
4596       _addiu(RegAt, getZero(), 0); // Loaded value is zero here, writeback zero
4597       Sandboxer(this).sc(RegAt, Addr);
4598       _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4599       Context.insert(Exit);
4600       _sync();
4601       _mov(Dest, T1);
4602       Context.insert<InstFakeUse>(T1);
4603     } else {
4604       const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(DestTy))) - 1;
4605       auto *Base = legalizeToReg(Instr->getArg(0));
4606       auto *T1 = makeReg(IceType_i32);
4607       auto *T2 = makeReg(IceType_i32);
4608       auto *T3 = makeReg(IceType_i32);
4609       auto *T4 = makeReg(IceType_i32);
4610       auto *T5 = makeReg(IceType_i32);
4611       auto *T6 = makeReg(IceType_i32);
4612       auto *SrcMask = makeReg(IceType_i32);
4613       auto *Tdest = makeReg(IceType_i32);
4614       auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4615       InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4616       InstMIPS32Label *Exit = InstMIPS32Label::create(Func, this);
4617       constexpr CfgNode *NoTarget = nullptr;
4618       _sync();
4619       _addiu(T1, getZero(), -4); // Address mask 0xFFFFFFFC
4620       _andi(T2, Base, 3);        // Last two bits of the address
4621       _and(T3, Base, T1);        // Align the address
4622       _sll(T4, T2, 3);
4623       _ori(T5, getZero(), Mask);
4624       _sllv(SrcMask, T5, T4); // Source mask
4625       auto *Addr = formMemoryOperand(T3, IceType_i32);
4626       Context.insert(Retry);
4627       Sandboxer(this).ll(T6, Addr);
4628       _and(Tdest, T6, SrcMask);
4629       _br(NoTarget, NoTarget, T6, getZero(), Exit, CondMIPS32::Cond::NE);
4630       _addiu(RegAt, getZero(), 0); // Loaded value is zero here, writeback zero
4631       Sandboxer(this).sc(RegAt, Addr);
4632       _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4633       Context.insert(Exit);
4634       auto *T7 = makeReg(IceType_i32);
4635       auto *T8 = makeReg(IceType_i32);
4636       _srlv(T7, Tdest, T4);
4637       _andi(T8, T7, Mask);
4638       _sync();
4639       _mov(Dest, T8);
4640       Context.insert<InstFakeUse>(T6);
4641       Context.insert<InstFakeUse>(SrcMask);
4642     }
4643     return;
4644   }
4645   case Intrinsics::AtomicStore: {
4646     // We require the memory address to be naturally aligned. Given that is the
4647     // case, then normal stores are atomic.
4648     if (!Intrinsics::isMemoryOrderValid(
4649             ID, getConstantMemoryOrder(Instr->getArg(2)))) {
4650       Func->setError("Unexpected memory ordering for AtomicStore");
4651       return;
4652     }
4653     auto *Val = Instr->getArg(0);
4654     auto Ty = Val->getType();
4655     if (Ty == IceType_i64) {
4656       llvm::report_fatal_error("AtomicStore.i64 should have been prelowered.");
4657       return;
4658     } else if (Ty == IceType_i32) {
4659       auto *Val = legalizeToReg(Instr->getArg(0));
4660       auto *Base = legalizeToReg(Instr->getArg(1));
4661       auto *Addr = formMemoryOperand(Base, Ty);
4662       InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4663       constexpr CfgNode *NoTarget = nullptr;
4664       auto *T1 = makeReg(IceType_i32);
4665       auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4666       _sync();
4667       Context.insert(Retry);
4668       Sandboxer(this).ll(T1, Addr);
4669       _mov(RegAt, Val);
4670       Sandboxer(this).sc(RegAt, Addr);
4671       _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4672       Context.insert<InstFakeUse>(T1); // To keep LL alive
4673       _sync();
4674     } else {
4675       auto *Val = legalizeToReg(Instr->getArg(0));
4676       auto *Base = legalizeToReg(Instr->getArg(1));
4677       InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4678       constexpr CfgNode *NoTarget = nullptr;
4679       auto *T1 = makeReg(IceType_i32);
4680       auto *T2 = makeReg(IceType_i32);
4681       auto *T3 = makeReg(IceType_i32);
4682       auto *T4 = makeReg(IceType_i32);
4683       auto *T5 = makeReg(IceType_i32);
4684       auto *T6 = makeReg(IceType_i32);
4685       auto *T7 = makeReg(IceType_i32);
4686       auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4687       auto *SrcMask = makeReg(IceType_i32);
4688       auto *DstMask = makeReg(IceType_i32);
4689       const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(Ty))) - 1;
4690       _sync();
4691       _addiu(T1, getZero(), -4);
4692       _and(T7, Base, T1);
4693       auto *Addr = formMemoryOperand(T7, Ty);
4694       _andi(T2, Base, 3);
4695       _sll(T3, T2, 3);
4696       _ori(T4, getZero(), Mask);
4697       _sllv(T5, T4, T3);
4698       _sllv(T6, Val, T3);
4699       _nor(SrcMask, getZero(), T5);
4700       _and(DstMask, T6, T5);
4701       Context.insert(Retry);
4702       Sandboxer(this).ll(RegAt, Addr);
4703       _and(RegAt, RegAt, SrcMask);
4704       _or(RegAt, RegAt, DstMask);
4705       Sandboxer(this).sc(RegAt, Addr);
4706       _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4707       Context.insert<InstFakeUse>(SrcMask);
4708       Context.insert<InstFakeUse>(DstMask);
4709       _sync();
4710     }
4711     return;
4712   }
4713   case Intrinsics::AtomicCmpxchg: {
4714     assert(isScalarIntegerType(DestTy));
4715     // We require the memory address to be naturally aligned. Given that is the
4716     // case, then normal loads are atomic.
4717     if (!Intrinsics::isMemoryOrderValid(
4718             ID, getConstantMemoryOrder(Instr->getArg(3)),
4719             getConstantMemoryOrder(Instr->getArg(4)))) {
4720       Func->setError("Unexpected memory ordering for AtomicCmpxchg");
4721       return;
4722     }
4723 
4724     InstMIPS32Label *Exit = InstMIPS32Label::create(Func, this);
4725     InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4726     constexpr CfgNode *NoTarget = nullptr;
4727     auto *New = Instr->getArg(2);
4728     auto *Expected = Instr->getArg(1);
4729     auto *ActualAddress = Instr->getArg(0);
4730 
4731     if (DestTy == IceType_i64) {
4732       llvm::report_fatal_error(
4733           "AtomicCmpxchg.i64 should have been prelowered.");
4734       return;
4735     } else if (DestTy == IceType_i8 || DestTy == IceType_i16) {
4736       auto *NewR = legalizeToReg(New);
4737       auto *ExpectedR = legalizeToReg(Expected);
4738       auto *ActualAddressR = legalizeToReg(ActualAddress);
4739       const uint32_t ShiftAmount =
4740           (INT32_BITS - CHAR_BITS * typeWidthInBytes(DestTy));
4741       const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(DestTy))) - 1;
4742       auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4743       auto *T1 = I32Reg();
4744       auto *T2 = I32Reg();
4745       auto *T3 = I32Reg();
4746       auto *T4 = I32Reg();
4747       auto *T5 = I32Reg();
4748       auto *T6 = I32Reg();
4749       auto *T7 = I32Reg();
4750       auto *T8 = I32Reg();
4751       auto *T9 = I32Reg();
4752       _addiu(RegAt, getZero(), -4);
4753       _and(T1, ActualAddressR, RegAt);
4754       auto *Addr = formMemoryOperand(T1, DestTy);
4755       _andi(RegAt, ActualAddressR, 3);
4756       _sll(T2, RegAt, 3);
4757       _ori(RegAt, getZero(), Mask);
4758       _sllv(T3, RegAt, T2);
4759       _nor(T4, getZero(), T3);
4760       _andi(RegAt, ExpectedR, Mask);
4761       _sllv(T5, RegAt, T2);
4762       _andi(RegAt, NewR, Mask);
4763       _sllv(T6, RegAt, T2);
4764       _sync();
4765       Context.insert(Retry);
4766       Sandboxer(this).ll(T7, Addr);
4767       _and(T8, T7, T3);
4768       _br(NoTarget, NoTarget, T8, T5, Exit, CondMIPS32::Cond::NE);
4769       _and(RegAt, T7, T4);
4770       _or(T9, RegAt, T6);
4771       Sandboxer(this).sc(T9, Addr);
4772       _br(NoTarget, NoTarget, getZero(), T9, Retry, CondMIPS32::Cond::EQ);
4773       Context.insert<InstFakeUse>(getZero());
4774       Context.insert(Exit);
4775       _srlv(RegAt, T8, T2);
4776       _sll(RegAt, RegAt, ShiftAmount);
4777       _sra(RegAt, RegAt, ShiftAmount);
4778       _mov(Dest, RegAt);
4779       _sync();
4780       Context.insert<InstFakeUse>(T3);
4781       Context.insert<InstFakeUse>(T4);
4782       Context.insert<InstFakeUse>(T5);
4783       Context.insert<InstFakeUse>(T6);
4784       Context.insert<InstFakeUse>(T8);
4785       Context.insert<InstFakeUse>(ExpectedR);
4786       Context.insert<InstFakeUse>(NewR);
4787     } else {
4788       auto *T1 = I32Reg();
4789       auto *T2 = I32Reg();
4790       auto *NewR = legalizeToReg(New);
4791       auto *ExpectedR = legalizeToReg(Expected);
4792       auto *ActualAddressR = legalizeToReg(ActualAddress);
4793       _sync();
4794       Context.insert(Retry);
4795       Sandboxer(this).ll(T1, formMemoryOperand(ActualAddressR, DestTy));
4796       _br(NoTarget, NoTarget, T1, ExpectedR, Exit, CondMIPS32::Cond::NE);
4797       _mov(T2, NewR);
4798       Sandboxer(this).sc(T2, formMemoryOperand(ActualAddressR, DestTy));
4799       _br(NoTarget, NoTarget, T2, getZero(), Retry, CondMIPS32::Cond::EQ);
4800       Context.insert<InstFakeUse>(getZero());
4801       Context.insert(Exit);
4802       _mov(Dest, T1);
4803       _sync();
4804       Context.insert<InstFakeUse>(ExpectedR);
4805       Context.insert<InstFakeUse>(NewR);
4806     }
4807     return;
4808   }
4809   case Intrinsics::AtomicRMW: {
4810     assert(isScalarIntegerType(DestTy));
4811     // We require the memory address to be naturally aligned. Given that is the
4812     // case, then normal loads are atomic.
4813     if (!Intrinsics::isMemoryOrderValid(
4814             ID, getConstantMemoryOrder(Instr->getArg(3)))) {
4815       Func->setError("Unexpected memory ordering for AtomicRMW");
4816       return;
4817     }
4818 
4819     constexpr CfgNode *NoTarget = nullptr;
4820     InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4821     auto Operation = static_cast<Intrinsics::AtomicRMWOperation>(
4822         llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue());
4823     auto *New = Instr->getArg(2);
4824     auto *ActualAddress = Instr->getArg(1);
4825 
4826     if (DestTy == IceType_i64) {
4827       llvm::report_fatal_error("AtomicRMW.i64 should have been prelowered.");
4828       return;
4829     } else if (DestTy == IceType_i8 || DestTy == IceType_i16) {
4830       const uint32_t ShiftAmount =
4831           INT32_BITS - (CHAR_BITS * typeWidthInBytes(DestTy));
4832       const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(DestTy))) - 1;
4833       auto *NewR = legalizeToReg(New);
4834       auto *ActualAddressR = legalizeToReg(ActualAddress);
4835       auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4836       auto *T1 = I32Reg();
4837       auto *T2 = I32Reg();
4838       auto *T3 = I32Reg();
4839       auto *T4 = I32Reg();
4840       auto *T5 = I32Reg();
4841       auto *T6 = I32Reg();
4842       auto *T7 = I32Reg();
4843       _sync();
4844       _addiu(RegAt, getZero(), -4);
4845       _and(T1, ActualAddressR, RegAt);
4846       _andi(RegAt, ActualAddressR, 3);
4847       _sll(T2, RegAt, 3);
4848       _ori(RegAt, getZero(), Mask);
4849       _sllv(T3, RegAt, T2);
4850       _nor(T4, getZero(), T3);
4851       _sllv(T5, NewR, T2);
4852       Context.insert(Retry);
4853       Sandboxer(this).ll(T6, formMemoryOperand(T1, DestTy));
4854       if (Operation != Intrinsics::AtomicExchange) {
4855         createArithInst(Operation, RegAt, T6, T5);
4856         _and(RegAt, RegAt, T3);
4857       }
4858       _and(T7, T6, T4);
4859       if (Operation == Intrinsics::AtomicExchange) {
4860         _or(RegAt, T7, T5);
4861       } else {
4862         _or(RegAt, T7, RegAt);
4863       }
4864       Sandboxer(this).sc(RegAt, formMemoryOperand(T1, DestTy));
4865       _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4866       Context.insert<InstFakeUse>(getZero());
4867       _and(RegAt, T6, T3);
4868       _srlv(RegAt, RegAt, T2);
4869       _sll(RegAt, RegAt, ShiftAmount);
4870       _sra(RegAt, RegAt, ShiftAmount);
4871       _mov(Dest, RegAt);
4872       _sync();
4873       Context.insert<InstFakeUse>(NewR);
4874       Context.insert<InstFakeUse>(Dest);
4875     } else {
4876       auto *T1 = I32Reg();
4877       auto *T2 = I32Reg();
4878       auto *NewR = legalizeToReg(New);
4879       auto *ActualAddressR = legalizeToReg(ActualAddress);
4880       _sync();
4881       Context.insert(Retry);
4882       Sandboxer(this).ll(T1, formMemoryOperand(ActualAddressR, DestTy));
4883       if (Operation == Intrinsics::AtomicExchange) {
4884         _mov(T2, NewR);
4885       } else {
4886         createArithInst(Operation, T2, T1, NewR);
4887       }
4888       Sandboxer(this).sc(T2, formMemoryOperand(ActualAddressR, DestTy));
4889       _br(NoTarget, NoTarget, T2, getZero(), Retry, CondMIPS32::Cond::EQ);
4890       Context.insert<InstFakeUse>(getZero());
4891       _mov(Dest, T1);
4892       _sync();
4893       Context.insert<InstFakeUse>(NewR);
4894       Context.insert<InstFakeUse>(Dest);
4895     }
4896     return;
4897   }
4898   case Intrinsics::AtomicFence:
4899   case Intrinsics::AtomicFenceAll:
4900     assert(Dest == nullptr);
4901     _sync();
4902     return;
4903   case Intrinsics::AtomicIsLockFree: {
4904     Operand *ByteSize = Instr->getArg(0);
4905     auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize);
4906     auto *T = I32Reg();
4907     if (CI == nullptr) {
4908       // The PNaCl ABI requires the byte size to be a compile-time constant.
4909       Func->setError("AtomicIsLockFree byte size should be compile-time const");
4910       return;
4911     }
4912     static constexpr int32_t NotLockFree = 0;
4913     static constexpr int32_t LockFree = 1;
4914     int32_t Result = NotLockFree;
4915     switch (CI->getValue()) {
4916     case 1:
4917     case 2:
4918     case 4:
4919       Result = LockFree;
4920       break;
4921     }
4922     _addiu(T, getZero(), Result);
4923     _mov(Dest, T);
4924     return;
4925   }
4926   case Intrinsics::Bswap: {
4927     auto *Src = Instr->getArg(0);
4928     const Type SrcTy = Src->getType();
4929     assert(SrcTy == IceType_i16 || SrcTy == IceType_i32 ||
4930            SrcTy == IceType_i64);
4931     switch (SrcTy) {
4932     case IceType_i16: {
4933       auto *T1 = I32Reg();
4934       auto *T2 = I32Reg();
4935       auto *T3 = I32Reg();
4936       auto *T4 = I32Reg();
4937       auto *SrcR = legalizeToReg(Src);
4938       _sll(T1, SrcR, 8);
4939       _lui(T2, Ctx->getConstantInt32(255));
4940       _and(T1, T1, T2);
4941       _sll(T3, SrcR, 24);
4942       _or(T1, T3, T1);
4943       _srl(T4, T1, 16);
4944       _mov(Dest, T4);
4945       return;
4946     }
4947     case IceType_i32: {
4948       auto *T1 = I32Reg();
4949       auto *T2 = I32Reg();
4950       auto *T3 = I32Reg();
4951       auto *T4 = I32Reg();
4952       auto *T5 = I32Reg();
4953       auto *SrcR = legalizeToReg(Src);
4954       _srl(T1, SrcR, 24);
4955       _srl(T2, SrcR, 8);
4956       _andi(T2, T2, 0xFF00);
4957       _or(T1, T2, T1);
4958       _sll(T4, SrcR, 8);
4959       _lui(T3, Ctx->getConstantInt32(255));
4960       _and(T4, T4, T3);
4961       _sll(T5, SrcR, 24);
4962       _or(T4, T5, T4);
4963       _or(T4, T4, T1);
4964       _mov(Dest, T4);
4965       return;
4966     }
4967     case IceType_i64: {
4968       auto *T1 = I32Reg();
4969       auto *T2 = I32Reg();
4970       auto *T3 = I32Reg();
4971       auto *T4 = I32Reg();
4972       auto *T5 = I32Reg();
4973       auto *T6 = I32Reg();
4974       auto *T7 = I32Reg();
4975       auto *T8 = I32Reg();
4976       auto *T9 = I32Reg();
4977       auto *T10 = I32Reg();
4978       auto *T11 = I32Reg();
4979       auto *T12 = I32Reg();
4980       auto *T13 = I32Reg();
4981       auto *T14 = I32Reg();
4982       auto *T15 = I32Reg();
4983       auto *T16 = I32Reg();
4984       auto *T17 = I32Reg();
4985       auto *T18 = I32Reg();
4986       auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
4987       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4988       Src = legalizeUndef(Src);
4989       auto *SrcLoR = legalizeToReg(loOperand(Src));
4990       auto *SrcHiR = legalizeToReg(hiOperand(Src));
4991       _sll(T1, SrcHiR, 8);
4992       _srl(T2, SrcHiR, 24);
4993       _srl(T3, SrcHiR, 8);
4994       _andi(T3, T3, 0xFF00);
4995       _lui(T4, Ctx->getConstantInt32(255));
4996       _or(T5, T3, T2);
4997       _and(T6, T1, T4);
4998       _sll(T7, SrcHiR, 24);
4999       _or(T8, T7, T6);
5000       _srl(T9, SrcLoR, 24);
5001       _srl(T10, SrcLoR, 8);
5002       _andi(T11, T10, 0xFF00);
5003       _or(T12, T8, T5);
5004       _or(T13, T11, T9);
5005       _sll(T14, SrcLoR, 8);
5006       _and(T15, T14, T4);
5007       _sll(T16, SrcLoR, 24);
5008       _or(T17, T16, T15);
5009       _or(T18, T17, T13);
5010       _mov(DestLo, T12);
5011       _mov(DestHi, T18);
5012       return;
5013     }
5014     default:
5015       llvm::report_fatal_error("Control flow should never have reached here.");
5016     }
5017     return;
5018   }
5019   case Intrinsics::Ctpop: {
5020     llvm::report_fatal_error("Ctpop should have been prelowered.");
5021     return;
5022   }
5023   case Intrinsics::Ctlz: {
5024     auto *Src = Instr->getArg(0);
5025     const Type SrcTy = Src->getType();
5026     assert(SrcTy == IceType_i32 || SrcTy == IceType_i64);
5027     switch (SrcTy) {
5028     case IceType_i32: {
5029       auto *T = I32Reg();
5030       auto *SrcR = legalizeToReg(Src);
5031       _clz(T, SrcR);
5032       _mov(Dest, T);
5033       break;
5034     }
5035     case IceType_i64: {
5036       auto *T1 = I32Reg();
5037       auto *T2 = I32Reg();
5038       auto *T3 = I32Reg();
5039       auto *T4 = I32Reg();
5040       auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
5041       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
5042       Variable *SrcHiR = legalizeToReg(hiOperand(Src));
5043       Variable *SrcLoR = legalizeToReg(loOperand(Src));
5044       _clz(T1, SrcHiR);
5045       _clz(T2, SrcLoR);
5046       _addiu(T3, T2, 32);
5047       _movn(T3, T1, SrcHiR);
5048       _addiu(T4, getZero(), 0);
5049       _mov(DestHi, T4);
5050       _mov(DestLo, T3);
5051       break;
5052     }
5053     default:
5054       llvm::report_fatal_error("Control flow should never have reached here.");
5055     }
5056     break;
5057   }
5058   case Intrinsics::Cttz: {
5059     auto *Src = Instr->getArg(0);
5060     const Type SrcTy = Src->getType();
5061     assert(SrcTy == IceType_i32 || SrcTy == IceType_i64);
5062     switch (SrcTy) {
5063     case IceType_i32: {
5064       auto *T1 = I32Reg();
5065       auto *T2 = I32Reg();
5066       auto *T3 = I32Reg();
5067       auto *T4 = I32Reg();
5068       auto *T5 = I32Reg();
5069       auto *T6 = I32Reg();
5070       auto *SrcR = legalizeToReg(Src);
5071       _addiu(T1, SrcR, -1);
5072       _not(T2, SrcR);
5073       _and(T3, T2, T1);
5074       _clz(T4, T3);
5075       _addiu(T5, getZero(), 32);
5076       _subu(T6, T5, T4);
5077       _mov(Dest, T6);
5078       break;
5079     }
5080     case IceType_i64: {
5081       auto *THi1 = I32Reg();
5082       auto *THi2 = I32Reg();
5083       auto *THi3 = I32Reg();
5084       auto *THi4 = I32Reg();
5085       auto *THi5 = I32Reg();
5086       auto *THi6 = I32Reg();
5087       auto *TLo1 = I32Reg();
5088       auto *TLo2 = I32Reg();
5089       auto *TLo3 = I32Reg();
5090       auto *TLo4 = I32Reg();
5091       auto *TLo5 = I32Reg();
5092       auto *TLo6 = I32Reg();
5093       auto *TResHi = I32Reg();
5094       auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
5095       auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
5096       Variable *SrcHiR = legalizeToReg(hiOperand(Src));
5097       Variable *SrcLoR = legalizeToReg(loOperand(Src));
5098       _addiu(THi1, SrcHiR, -1);
5099       _not(THi2, SrcHiR);
5100       _and(THi3, THi2, THi1);
5101       _clz(THi4, THi3);
5102       _addiu(THi5, getZero(), 64);
5103       _subu(THi6, THi5, THi4);
5104       _addiu(TLo1, SrcLoR, -1);
5105       _not(TLo2, SrcLoR);
5106       _and(TLo3, TLo2, TLo1);
5107       _clz(TLo4, TLo3);
5108       _addiu(TLo5, getZero(), 32);
5109       _subu(TLo6, TLo5, TLo4);
5110       _movn(THi6, TLo6, SrcLoR);
5111       _addiu(TResHi, getZero(), 0);
5112       _mov(DestHi, TResHi);
5113       _mov(DestLo, THi6);
5114       break;
5115     }
5116     default:
5117       llvm::report_fatal_error("Control flow should never have reached here.");
5118     }
5119     return;
5120   }
5121   case Intrinsics::Fabs: {
5122     if (isScalarFloatingType(DestTy)) {
5123       Variable *T = makeReg(DestTy);
5124       if (DestTy == IceType_f32) {
5125         _abs_s(T, legalizeToReg(Instr->getArg(0)));
5126       } else {
5127         _abs_d(T, legalizeToReg(Instr->getArg(0)));
5128       }
5129       _mov(Dest, T);
5130     }
5131     return;
5132   }
5133   case Intrinsics::Longjmp: {
5134     llvm::report_fatal_error("longjmp should have been prelowered.");
5135     return;
5136   }
5137   case Intrinsics::Memcpy: {
5138     llvm::report_fatal_error("memcpy should have been prelowered.");
5139     return;
5140   }
5141   case Intrinsics::Memmove: {
5142     llvm::report_fatal_error("memmove should have been prelowered.");
5143     return;
5144   }
5145   case Intrinsics::Memset: {
5146     llvm::report_fatal_error("memset should have been prelowered.");
5147     return;
5148   }
5149   case Intrinsics::NaClReadTP: {
5150     if (SandboxingType != ST_NaCl)
5151       llvm::report_fatal_error("nacl-read-tp should have been prelowered.");
5152     else {
5153       auto *T8 = makeReg(IceType_i32, RegMIPS32::Reg_T8);
5154       Context.insert<InstFakeDef>(T8);
5155       Variable *TP = legalizeToReg(OperandMIPS32Mem::create(
5156           Func, getPointerType(), T8,
5157           llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))));
5158       _mov(Dest, TP);
5159     }
5160     return;
5161   }
5162   case Intrinsics::Setjmp: {
5163     llvm::report_fatal_error("setjmp should have been prelowered.");
5164     return;
5165   }
5166   case Intrinsics::Sqrt: {
5167     if (isScalarFloatingType(DestTy)) {
5168       Variable *T = makeReg(DestTy);
5169       if (DestTy == IceType_f32) {
5170         _sqrt_s(T, legalizeToReg(Instr->getArg(0)));
5171       } else {
5172         _sqrt_d(T, legalizeToReg(Instr->getArg(0)));
5173       }
5174       _mov(Dest, T);
5175     } else {
5176       assert(getFlags().getApplicationBinaryInterface() != ::Ice::ABI_PNaCl);
5177       UnimplementedLoweringError(this, Instr); // Not required for PNaCl
5178     }
5179     return;
5180   }
5181   case Intrinsics::Stacksave: {
5182     Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
5183     _mov(Dest, SP);
5184     return;
5185   }
5186   case Intrinsics::Stackrestore: {
5187     Variable *Val = legalizeToReg(Instr->getArg(0));
5188     Sandboxer(this).reset_sp(Val);
5189     return;
5190   }
5191   case Intrinsics::Trap: {
5192     const uint32_t TrapCodeZero = 0;
5193     _teq(getZero(), getZero(), TrapCodeZero);
5194     return;
5195   }
5196   case Intrinsics::LoadSubVector: {
5197     UnimplementedLoweringError(this, Instr); // Not required for PNaCl
5198     return;
5199   }
5200   case Intrinsics::StoreSubVector: {
5201     UnimplementedLoweringError(this, Instr); // Not required for PNaCl
5202     return;
5203   }
5204   default: // UnknownIntrinsic
5205     Func->setError("Unexpected intrinsic");
5206     return;
5207   }
5208   return;
5209 }
5210 
lowerLoad(const InstLoad * Instr)5211 void TargetMIPS32::lowerLoad(const InstLoad *Instr) {
5212   // A Load instruction can be treated the same as an Assign instruction, after
5213   // the source operand is transformed into an OperandMIPS32Mem operand.
5214   Type Ty = Instr->getDest()->getType();
5215   Operand *Src0 = formMemoryOperand(Instr->getSourceAddress(), Ty);
5216   Variable *DestLoad = Instr->getDest();
5217   auto *Assign = InstAssign::create(Func, DestLoad, Src0);
5218   lowerAssign(Assign);
5219 }
5220 
5221 namespace {
dumpAddressOpt(const Cfg * Func,const Variable * Base,int32_t Offset,const Inst * Reason)5222 void dumpAddressOpt(const Cfg *Func, const Variable *Base, int32_t Offset,
5223                     const Inst *Reason) {
5224   if (!BuildDefs::dump())
5225     return;
5226   if (!Func->isVerbose(IceV_AddrOpt))
5227     return;
5228   OstreamLocker _(Func->getContext());
5229   Ostream &Str = Func->getContext()->getStrDump();
5230   Str << "Instruction: ";
5231   Reason->dumpDecorated(Func);
5232   Str << "  results in Base=";
5233   if (Base)
5234     Base->dump(Func);
5235   else
5236     Str << "<null>";
5237   Str << ", Offset=" << Offset << "\n";
5238 }
5239 
matchAssign(const VariablesMetadata * VMetadata,Variable ** Var,int32_t * Offset,const Inst ** Reason)5240 bool matchAssign(const VariablesMetadata *VMetadata, Variable **Var,
5241                  int32_t *Offset, const Inst **Reason) {
5242   // Var originates from Var=SrcVar ==> set Var:=SrcVar
5243   if (*Var == nullptr)
5244     return false;
5245   const Inst *VarAssign = VMetadata->getSingleDefinition(*Var);
5246   if (!VarAssign)
5247     return false;
5248   assert(!VMetadata->isMultiDef(*Var));
5249   if (!llvm::isa<InstAssign>(VarAssign))
5250     return false;
5251 
5252   Operand *SrcOp = VarAssign->getSrc(0);
5253   bool Optimized = false;
5254   if (auto *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {
5255     if (!VMetadata->isMultiDef(SrcVar) ||
5256         // TODO: ensure SrcVar stays single-BB
5257         false) {
5258       Optimized = true;
5259       *Var = SrcVar;
5260     } else if (auto *Const = llvm::dyn_cast<ConstantInteger32>(SrcOp)) {
5261       int32_t MoreOffset = Const->getValue();
5262       int32_t NewOffset = MoreOffset + *Offset;
5263       if (Utils::WouldOverflowAdd(*Offset, MoreOffset))
5264         return false;
5265       *Var = nullptr;
5266       *Offset += NewOffset;
5267       Optimized = true;
5268     }
5269   }
5270 
5271   if (Optimized) {
5272     *Reason = VarAssign;
5273   }
5274 
5275   return Optimized;
5276 }
5277 
isAddOrSub(const Inst * Instr,InstArithmetic::OpKind * Kind)5278 bool isAddOrSub(const Inst *Instr, InstArithmetic::OpKind *Kind) {
5279   if (const auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {
5280     switch (Arith->getOp()) {
5281     default:
5282       return false;
5283     case InstArithmetic::Add:
5284     case InstArithmetic::Sub:
5285       *Kind = Arith->getOp();
5286       return true;
5287     }
5288   }
5289   return false;
5290 }
5291 
matchOffsetBase(const VariablesMetadata * VMetadata,Variable ** Base,int32_t * Offset,const Inst ** Reason)5292 bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable **Base,
5293                      int32_t *Offset, const Inst **Reason) {
5294   // Base is Base=Var+Const || Base is Base=Const+Var ==>
5295   //   set Base=Var, Offset+=Const
5296   // Base is Base=Var-Const ==>
5297   //   set Base=Var, Offset-=Const
5298   if (*Base == nullptr)
5299     return false;
5300   const Inst *BaseInst = VMetadata->getSingleDefinition(*Base);
5301   if (BaseInst == nullptr) {
5302     return false;
5303   }
5304   assert(!VMetadata->isMultiDef(*Base));
5305 
5306   auto *ArithInst = llvm::dyn_cast<const InstArithmetic>(BaseInst);
5307   if (ArithInst == nullptr)
5308     return false;
5309   InstArithmetic::OpKind Kind;
5310   if (!isAddOrSub(ArithInst, &Kind))
5311     return false;
5312   bool IsAdd = Kind == InstArithmetic::Add;
5313   Operand *Src0 = ArithInst->getSrc(0);
5314   Operand *Src1 = ArithInst->getSrc(1);
5315   auto *Var0 = llvm::dyn_cast<Variable>(Src0);
5316   auto *Var1 = llvm::dyn_cast<Variable>(Src1);
5317   auto *Const0 = llvm::dyn_cast<ConstantInteger32>(Src0);
5318   auto *Const1 = llvm::dyn_cast<ConstantInteger32>(Src1);
5319   Variable *NewBase = nullptr;
5320   int32_t NewOffset = *Offset;
5321 
5322   if (Var0 == nullptr && Const0 == nullptr) {
5323     assert(llvm::isa<ConstantRelocatable>(Src0));
5324     return false;
5325   }
5326 
5327   if (Var1 == nullptr && Const1 == nullptr) {
5328     assert(llvm::isa<ConstantRelocatable>(Src1));
5329     return false;
5330   }
5331 
5332   if (Var0 && Var1)
5333     // TODO(jpp): merge base/index splitting into here.
5334     return false;
5335   if (!IsAdd && Var1)
5336     return false;
5337   if (Var0)
5338     NewBase = Var0;
5339   else if (Var1)
5340     NewBase = Var1;
5341   // Compute the updated constant offset.
5342   if (Const0) {
5343     int32_t MoreOffset = IsAdd ? Const0->getValue() : -Const0->getValue();
5344     if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5345       return false;
5346     NewOffset += MoreOffset;
5347   }
5348   if (Const1) {
5349     int32_t MoreOffset = IsAdd ? Const1->getValue() : -Const1->getValue();
5350     if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5351       return false;
5352     NewOffset += MoreOffset;
5353   }
5354 
5355   // Update the computed address parameters once we are sure optimization
5356   // is valid.
5357   *Base = NewBase;
5358   *Offset = NewOffset;
5359   *Reason = BaseInst;
5360   return true;
5361 }
5362 } // end of anonymous namespace
5363 
formAddressingMode(Type Ty,Cfg * Func,const Inst * LdSt,Operand * Base)5364 OperandMIPS32Mem *TargetMIPS32::formAddressingMode(Type Ty, Cfg *Func,
5365                                                    const Inst *LdSt,
5366                                                    Operand *Base) {
5367   assert(Base != nullptr);
5368   int32_t OffsetImm = 0;
5369 
5370   Func->resetCurrentNode();
5371   if (Func->isVerbose(IceV_AddrOpt)) {
5372     OstreamLocker _(Func->getContext());
5373     Ostream &Str = Func->getContext()->getStrDump();
5374     Str << "\nAddress mode formation:\t";
5375     LdSt->dumpDecorated(Func);
5376   }
5377 
5378   if (isVectorType(Ty)) {
5379     return nullptr;
5380   }
5381 
5382   auto *BaseVar = llvm::dyn_cast<Variable>(Base);
5383   if (BaseVar == nullptr)
5384     return nullptr;
5385 
5386   const VariablesMetadata *VMetadata = Func->getVMetadata();
5387   const Inst *Reason = nullptr;
5388 
5389   do {
5390     if (Reason != nullptr) {
5391       dumpAddressOpt(Func, BaseVar, OffsetImm, Reason);
5392       Reason = nullptr;
5393     }
5394 
5395     if (matchAssign(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5396       continue;
5397     }
5398 
5399     if (matchOffsetBase(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5400       continue;
5401     }
5402   } while (Reason);
5403 
5404   if (BaseVar == nullptr) {
5405     // We need base register rather than just OffsetImm. Move the OffsetImm to
5406     // BaseVar and form 0(BaseVar) addressing.
5407     const Type PointerType = getPointerType();
5408     BaseVar = makeReg(PointerType);
5409     Context.insert<InstAssign>(BaseVar, Ctx->getConstantInt32(OffsetImm));
5410     OffsetImm = 0;
5411   } else if (OffsetImm != 0) {
5412     // If the OffsetImm is more than signed 16-bit value then add it in the
5413     // BaseVar and form 0(BaseVar) addressing.
5414     const int32_t PositiveOffset = OffsetImm > 0 ? OffsetImm : -OffsetImm;
5415     const InstArithmetic::OpKind Op =
5416         OffsetImm > 0 ? InstArithmetic::Add : InstArithmetic::Sub;
5417     constexpr bool ZeroExt = false;
5418     if (!OperandMIPS32Mem::canHoldOffset(Ty, ZeroExt, OffsetImm)) {
5419       const Type PointerType = getPointerType();
5420       Variable *T = makeReg(PointerType);
5421       Context.insert<InstArithmetic>(Op, T, BaseVar,
5422                                      Ctx->getConstantInt32(PositiveOffset));
5423       BaseVar = T;
5424       OffsetImm = 0;
5425     }
5426   }
5427 
5428   assert(BaseVar != nullptr);
5429   assert(OffsetImm < 0 ? (-OffsetImm & 0x0000ffff) == -OffsetImm
5430                        : (OffsetImm & 0x0000ffff) == OffsetImm);
5431 
5432   return OperandMIPS32Mem::create(
5433       Func, Ty, BaseVar,
5434       llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetImm)));
5435 }
5436 
doAddressOptLoad()5437 void TargetMIPS32::doAddressOptLoad() {
5438   Inst *Instr = iteratorToInst(Context.getCur());
5439   assert(llvm::isa<InstLoad>(Instr));
5440   Variable *Dest = Instr->getDest();
5441   Operand *Addr = Instr->getSrc(0);
5442   if (OperandMIPS32Mem *Mem =
5443           formAddressingMode(Dest->getType(), Func, Instr, Addr)) {
5444     Instr->setDeleted();
5445     Context.insert<InstLoad>(Dest, Mem);
5446   }
5447 }
5448 
randomlyInsertNop(float Probability,RandomNumberGenerator & RNG)5449 void TargetMIPS32::randomlyInsertNop(float Probability,
5450                                      RandomNumberGenerator &RNG) {
5451   RandomNumberGeneratorWrapper RNGW(RNG);
5452   if (RNGW.getTrueWithProbability(Probability)) {
5453     _nop();
5454   }
5455 }
5456 
lowerPhi(const InstPhi *)5457 void TargetMIPS32::lowerPhi(const InstPhi * /*Instr*/) {
5458   Func->setError("Phi found in regular instruction list");
5459 }
5460 
lowerRet(const InstRet * Instr)5461 void TargetMIPS32::lowerRet(const InstRet *Instr) {
5462   Variable *Reg = nullptr;
5463   if (Instr->hasRetValue()) {
5464     Operand *Src0 = Instr->getRetValue();
5465     switch (Src0->getType()) {
5466     case IceType_f32: {
5467       Operand *Src0F = legalizeToReg(Src0);
5468       Reg = makeReg(Src0F->getType(), RegMIPS32::Reg_F0);
5469       _mov(Reg, Src0F);
5470       break;
5471     }
5472     case IceType_f64: {
5473       Operand *Src0F = legalizeToReg(Src0);
5474       Reg = makeReg(Src0F->getType(), RegMIPS32::Reg_F0F1);
5475       _mov(Reg, Src0F);
5476       break;
5477     }
5478     case IceType_i1:
5479     case IceType_i8:
5480     case IceType_i16:
5481     case IceType_i32: {
5482       Operand *Src0F = legalizeToReg(Src0);
5483       Reg = makeReg(Src0F->getType(), RegMIPS32::Reg_V0);
5484       _mov(Reg, Src0F);
5485       break;
5486     }
5487     case IceType_i64: {
5488       Src0 = legalizeUndef(Src0);
5489       Variable *R0 = legalizeToReg(loOperand(Src0), RegMIPS32::Reg_V0);
5490       Variable *R1 = legalizeToReg(hiOperand(Src0), RegMIPS32::Reg_V1);
5491       Reg = R0;
5492       Context.insert<InstFakeUse>(R1);
5493       break;
5494     }
5495     case IceType_v4i1:
5496     case IceType_v8i1:
5497     case IceType_v16i1:
5498     case IceType_v16i8:
5499     case IceType_v8i16:
5500     case IceType_v4i32: {
5501       auto *SrcVec = llvm::dyn_cast<VariableVecOn32>(legalizeUndef(Src0));
5502       Variable *V0 =
5503           legalizeToReg(SrcVec->getContainers()[0], RegMIPS32::Reg_V0);
5504       Variable *V1 =
5505           legalizeToReg(SrcVec->getContainers()[1], RegMIPS32::Reg_V1);
5506       Variable *A0 =
5507           legalizeToReg(SrcVec->getContainers()[2], RegMIPS32::Reg_A0);
5508       Variable *A1 =
5509           legalizeToReg(SrcVec->getContainers()[3], RegMIPS32::Reg_A1);
5510       Reg = V0;
5511       Context.insert<InstFakeUse>(V1);
5512       Context.insert<InstFakeUse>(A0);
5513       Context.insert<InstFakeUse>(A1);
5514       break;
5515     }
5516     case IceType_v4f32: {
5517       auto *SrcVec = llvm::dyn_cast<VariableVecOn32>(legalizeUndef(Src0));
5518       Reg = getImplicitRet();
5519       auto *RegT = legalizeToReg(Reg);
5520       // Return the vector through buffer in implicit argument a0
5521       for (SizeT i = 0; i < SrcVec->ContainersPerVector; ++i) {
5522         OperandMIPS32Mem *Mem = OperandMIPS32Mem::create(
5523             Func, IceType_f32, RegT,
5524             llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(i * 4)));
5525         Variable *Var = legalizeToReg(SrcVec->getContainers()[i]);
5526         _sw(Var, Mem);
5527       }
5528       Variable *V0 = makeReg(IceType_i32, RegMIPS32::Reg_V0);
5529       _mov(V0, Reg); // move v0,a0
5530       Context.insert<InstFakeUse>(Reg);
5531       Context.insert<InstFakeUse>(V0);
5532       break;
5533     }
5534     default:
5535       llvm::report_fatal_error("Ret: Invalid type.");
5536       break;
5537     }
5538   }
5539   _ret(getPhysicalRegister(RegMIPS32::Reg_RA), Reg);
5540 }
5541 
lowerSelect(const InstSelect * Instr)5542 void TargetMIPS32::lowerSelect(const InstSelect *Instr) {
5543   Variable *Dest = Instr->getDest();
5544   const Type DestTy = Dest->getType();
5545 
5546   if (isVectorType(DestTy)) {
5547     llvm::report_fatal_error("Select: Destination type is vector");
5548     return;
5549   }
5550 
5551   Variable *DestR = nullptr;
5552   Variable *DestHiR = nullptr;
5553   Variable *SrcTR = nullptr;
5554   Variable *SrcTHiR = nullptr;
5555   Variable *SrcFR = nullptr;
5556   Variable *SrcFHiR = nullptr;
5557 
5558   if (DestTy == IceType_i64) {
5559     DestR = llvm::cast<Variable>(loOperand(Dest));
5560     DestHiR = llvm::cast<Variable>(hiOperand(Dest));
5561     SrcTR = legalizeToReg(loOperand(legalizeUndef(Instr->getTrueOperand())));
5562     SrcTHiR = legalizeToReg(hiOperand(legalizeUndef(Instr->getTrueOperand())));
5563     SrcFR = legalizeToReg(loOperand(legalizeUndef(Instr->getFalseOperand())));
5564     SrcFHiR = legalizeToReg(hiOperand(legalizeUndef(Instr->getFalseOperand())));
5565   } else {
5566     SrcTR = legalizeToReg(legalizeUndef(Instr->getTrueOperand()));
5567     SrcFR = legalizeToReg(legalizeUndef(Instr->getFalseOperand()));
5568   }
5569 
5570   Variable *ConditionR = legalizeToReg(Instr->getCondition());
5571 
5572   assert(Instr->getCondition()->getType() == IceType_i1);
5573 
5574   switch (DestTy) {
5575   case IceType_i1:
5576   case IceType_i8:
5577   case IceType_i16:
5578   case IceType_i32:
5579     _movn(SrcFR, SrcTR, ConditionR);
5580     _mov(Dest, SrcFR);
5581     break;
5582   case IceType_i64:
5583     _movn(SrcFR, SrcTR, ConditionR);
5584     _movn(SrcFHiR, SrcTHiR, ConditionR);
5585     _mov(DestR, SrcFR);
5586     _mov(DestHiR, SrcFHiR);
5587     break;
5588   case IceType_f32:
5589     _movn_s(SrcFR, SrcTR, ConditionR);
5590     _mov(Dest, SrcFR);
5591     break;
5592   case IceType_f64:
5593     _movn_d(SrcFR, SrcTR, ConditionR);
5594     _mov(Dest, SrcFR);
5595     break;
5596   default:
5597     llvm::report_fatal_error("Select: Invalid type.");
5598   }
5599 }
5600 
lowerShuffleVector(const InstShuffleVector * Instr)5601 void TargetMIPS32::lowerShuffleVector(const InstShuffleVector *Instr) {
5602   UnimplementedLoweringError(this, Instr);
5603 }
5604 
lowerStore(const InstStore * Instr)5605 void TargetMIPS32::lowerStore(const InstStore *Instr) {
5606   Operand *Value = Instr->getData();
5607   Operand *Addr = Instr->getAddr();
5608   OperandMIPS32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
5609   Type Ty = NewAddr->getType();
5610 
5611   if (Ty == IceType_i64) {
5612     Value = legalizeUndef(Value);
5613     Variable *ValueHi = legalizeToReg(hiOperand(Value));
5614     Variable *ValueLo = legalizeToReg(loOperand(Value));
5615     _sw(ValueHi, llvm::cast<OperandMIPS32Mem>(hiOperand(NewAddr)));
5616     _sw(ValueLo, llvm::cast<OperandMIPS32Mem>(loOperand(NewAddr)));
5617   } else if (isVectorType(Value->getType())) {
5618     auto *DataVec = llvm::dyn_cast<VariableVecOn32>(Value);
5619     for (SizeT i = 0; i < DataVec->ContainersPerVector; ++i) {
5620       auto *DCont = legalizeToReg(DataVec->getContainers()[i]);
5621       auto *MCont = llvm::cast<OperandMIPS32Mem>(
5622           getOperandAtIndex(NewAddr, IceType_i32, i));
5623       _sw(DCont, MCont);
5624     }
5625   } else {
5626     Variable *ValueR = legalizeToReg(Value);
5627     _sw(ValueR, NewAddr);
5628   }
5629 }
5630 
doAddressOptStore()5631 void TargetMIPS32::doAddressOptStore() {
5632   Inst *Instr = iteratorToInst(Context.getCur());
5633   assert(llvm::isa<InstStore>(Instr));
5634   Operand *Src = Instr->getSrc(0);
5635   Operand *Addr = Instr->getSrc(1);
5636   if (OperandMIPS32Mem *Mem =
5637           formAddressingMode(Src->getType(), Func, Instr, Addr)) {
5638     Instr->setDeleted();
5639     Context.insert<InstStore>(Src, Mem);
5640   }
5641 }
5642 
lowerSwitch(const InstSwitch * Instr)5643 void TargetMIPS32::lowerSwitch(const InstSwitch *Instr) {
5644   Operand *Src = Instr->getComparison();
5645   SizeT NumCases = Instr->getNumCases();
5646   if (Src->getType() == IceType_i64) {
5647     Src = legalizeUndef(Src);
5648     Variable *Src0Lo = legalizeToReg(loOperand(Src));
5649     Variable *Src0Hi = legalizeToReg(hiOperand(Src));
5650     for (SizeT I = 0; I < NumCases; ++I) {
5651       Operand *ValueLo = Ctx->getConstantInt32(Instr->getValue(I));
5652       Operand *ValueHi = Ctx->getConstantInt32(Instr->getValue(I) >> 32);
5653       CfgNode *TargetTrue = Instr->getLabel(I);
5654       constexpr CfgNode *NoTarget = nullptr;
5655       ValueHi = legalizeToReg(ValueHi);
5656       InstMIPS32Label *IntraLabel = InstMIPS32Label::create(Func, this);
5657       _br(NoTarget, NoTarget, Src0Hi, ValueHi, IntraLabel,
5658           CondMIPS32::Cond::NE);
5659       ValueLo = legalizeToReg(ValueLo);
5660       _br(NoTarget, TargetTrue, Src0Lo, ValueLo, CondMIPS32::Cond::EQ);
5661       Context.insert(IntraLabel);
5662     }
5663     _br(Instr->getLabelDefault());
5664     return;
5665   }
5666   Variable *SrcVar = legalizeToReg(Src);
5667   assert(SrcVar->mustHaveReg());
5668   for (SizeT I = 0; I < NumCases; ++I) {
5669     Operand *Value = Ctx->getConstantInt32(Instr->getValue(I));
5670     CfgNode *TargetTrue = Instr->getLabel(I);
5671     constexpr CfgNode *NoTargetFalse = nullptr;
5672     Value = legalizeToReg(Value);
5673     _br(NoTargetFalse, TargetTrue, SrcVar, Value, CondMIPS32::Cond::EQ);
5674   }
5675   _br(Instr->getLabelDefault());
5676 }
5677 
lowerBreakpoint(const InstBreakpoint * Instr)5678 void TargetMIPS32::lowerBreakpoint(const InstBreakpoint *Instr) {
5679   UnimplementedLoweringError(this, Instr);
5680 }
5681 
lowerUnreachable(const InstUnreachable *)5682 void TargetMIPS32::lowerUnreachable(const InstUnreachable *) {
5683   const uint32_t TrapCodeZero = 0;
5684   _teq(getZero(), getZero(), TrapCodeZero);
5685 }
5686 
lowerOther(const Inst * Instr)5687 void TargetMIPS32::lowerOther(const Inst *Instr) {
5688   if (llvm::isa<InstMIPS32Sync>(Instr)) {
5689     _sync();
5690   } else {
5691     TargetLowering::lowerOther(Instr);
5692   }
5693 }
5694 
5695 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to preserve
5696 // integrity of liveness analysis. Undef values are also turned into zeroes,
5697 // since loOperand() and hiOperand() don't expect Undef input.
prelowerPhis()5698 void TargetMIPS32::prelowerPhis() {
5699   PhiLowering::prelowerPhis32Bit<TargetMIPS32>(this, Context.getNode(), Func);
5700 }
5701 
postLower()5702 void TargetMIPS32::postLower() {
5703   if (Func->getOptLevel() == Opt_m1)
5704     return;
5705   markRedefinitions();
5706   Context.availabilityUpdate();
5707 }
5708 
makeRandomRegisterPermutation(llvm::SmallVectorImpl<RegNumT> & Permutation,const SmallBitVector & ExcludeRegisters,uint64_t Salt) const5709 void TargetMIPS32::makeRandomRegisterPermutation(
5710     llvm::SmallVectorImpl<RegNumT> &Permutation,
5711     const SmallBitVector &ExcludeRegisters, uint64_t Salt) const {
5712   (void)Permutation;
5713   (void)ExcludeRegisters;
5714   (void)Salt;
5715   UnimplementedError(getFlags());
5716 }
5717 
5718 /* TODO(jvoung): avoid duplicate symbols with multiple targets.
5719 void ConstantUndef::emitWithoutDollar(GlobalContext *) const {
5720   llvm_unreachable("Not expecting to emitWithoutDollar undef");
5721 }
5722 
5723 void ConstantUndef::emit(GlobalContext *) const {
5724   llvm_unreachable("undef value encountered by emitter.");
5725 }
5726 */
5727 
TargetDataMIPS32(GlobalContext * Ctx)5728 TargetDataMIPS32::TargetDataMIPS32(GlobalContext *Ctx)
5729     : TargetDataLowering(Ctx) {}
5730 
5731 // Generate .MIPS.abiflags section. This section contains a versioned data
5732 // structure with essential information required for loader to determine the
5733 // requirements of the application.
emitTargetRODataSections()5734 void TargetDataMIPS32::emitTargetRODataSections() {
5735   struct MipsABIFlagsSection Flags;
5736   ELFObjectWriter *Writer = Ctx->getObjectWriter();
5737   const std::string Name = ".MIPS.abiflags";
5738   const llvm::ELF::Elf64_Word ShType = llvm::ELF::SHT_MIPS_ABIFLAGS;
5739   const llvm::ELF::Elf64_Xword ShFlags = llvm::ELF::SHF_ALLOC;
5740   const llvm::ELF::Elf64_Xword ShAddralign = 8;
5741   const llvm::ELF::Elf64_Xword ShEntsize = sizeof(Flags);
5742   Writer->writeTargetRODataSection(
5743       Name, ShType, ShFlags, ShAddralign, ShEntsize,
5744       llvm::StringRef(reinterpret_cast<const char *>(&Flags), sizeof(Flags)));
5745 }
5746 
lowerGlobals(const VariableDeclarationList & Vars,const std::string & SectionSuffix)5747 void TargetDataMIPS32::lowerGlobals(const VariableDeclarationList &Vars,
5748                                     const std::string &SectionSuffix) {
5749   const bool IsPIC = getFlags().getUseNonsfi();
5750   switch (getFlags().getOutFileType()) {
5751   case FT_Elf: {
5752     ELFObjectWriter *Writer = Ctx->getObjectWriter();
5753     Writer->writeDataSection(Vars, llvm::ELF::R_MIPS_32, SectionSuffix, IsPIC);
5754   } break;
5755   case FT_Asm:
5756   case FT_Iasm: {
5757     OstreamLocker L(Ctx);
5758     for (const VariableDeclaration *Var : Vars) {
5759       if (getFlags().matchTranslateOnly(Var->getName(), 0)) {
5760         emitGlobal(*Var, SectionSuffix);
5761       }
5762     }
5763   } break;
5764   }
5765 }
5766 
5767 namespace {
5768 template <typename T> struct ConstantPoolEmitterTraits;
5769 
5770 static_assert(sizeof(uint64_t) == 8,
5771               "uint64_t is supposed to be 8 bytes wide.");
5772 
5773 // TODO(jaydeep.patil): implement the following when implementing constant
5774 // randomization:
5775 //  * template <> struct ConstantPoolEmitterTraits<uint8_t>
5776 //  * template <> struct ConstantPoolEmitterTraits<uint16_t>
5777 //  * template <> struct ConstantPoolEmitterTraits<uint32_t>
5778 template <> struct ConstantPoolEmitterTraits<float> {
5779   using ConstantType = ConstantFloat;
5780   static constexpr Type IceType = IceType_f32;
5781   // AsmTag and TypeName can't be constexpr because llvm::StringRef is unhappy
5782   // about them being constexpr.
5783   static const char AsmTag[];
5784   static const char TypeName[];
bitcastToUint64Ice::MIPS32::__anonf63758080611::ConstantPoolEmitterTraits5785   static uint64_t bitcastToUint64(float Value) {
5786     static_assert(sizeof(Value) == sizeof(uint32_t),
5787                   "Float should be 4 bytes.");
5788     const uint32_t IntValue = Utils::bitCopy<uint32_t>(Value);
5789     return static_cast<uint64_t>(IntValue);
5790   }
5791 };
5792 const char ConstantPoolEmitterTraits<float>::AsmTag[] = ".word";
5793 const char ConstantPoolEmitterTraits<float>::TypeName[] = "f32";
5794 
5795 template <> struct ConstantPoolEmitterTraits<double> {
5796   using ConstantType = ConstantDouble;
5797   static constexpr Type IceType = IceType_f64;
5798   static const char AsmTag[];
5799   static const char TypeName[];
bitcastToUint64Ice::MIPS32::__anonf63758080611::ConstantPoolEmitterTraits5800   static uint64_t bitcastToUint64(double Value) {
5801     static_assert(sizeof(double) == sizeof(uint64_t),
5802                   "Double should be 8 bytes.");
5803     return Utils::bitCopy<uint64_t>(Value);
5804   }
5805 };
5806 const char ConstantPoolEmitterTraits<double>::AsmTag[] = ".quad";
5807 const char ConstantPoolEmitterTraits<double>::TypeName[] = "f64";
5808 
5809 template <typename T>
emitConstant(Ostream & Str,const typename ConstantPoolEmitterTraits<T>::ConstantType * Const)5810 void emitConstant(
5811     Ostream &Str,
5812     const typename ConstantPoolEmitterTraits<T>::ConstantType *Const) {
5813   if (!BuildDefs::dump())
5814     return;
5815   using Traits = ConstantPoolEmitterTraits<T>;
5816   Str << Const->getLabelName();
5817   T Value = Const->getValue();
5818   Str << ":\n\t" << Traits::AsmTag << "\t0x";
5819   Str.write_hex(Traits::bitcastToUint64(Value));
5820   Str << "\t/* " << Traits::TypeName << " " << Value << " */\n";
5821 }
5822 
emitConstantPool(GlobalContext * Ctx)5823 template <typename T> void emitConstantPool(GlobalContext *Ctx) {
5824   if (!BuildDefs::dump())
5825     return;
5826   using Traits = ConstantPoolEmitterTraits<T>;
5827   static constexpr size_t MinimumAlignment = 4;
5828   SizeT Align = std::max(MinimumAlignment, typeAlignInBytes(Traits::IceType));
5829   assert((Align % 4) == 0 && "Constants should be aligned");
5830   Ostream &Str = Ctx->getStrEmit();
5831   ConstantList Pool = Ctx->getConstantPool(Traits::IceType);
5832   Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",%progbits," << Align
5833       << "\n"
5834       << "\t.align\t" << (Align == 4 ? 2 : 3) << "\n";
5835   if (getFlags().getReorderPooledConstants()) {
5836     // TODO(jaydeep.patil): add constant pooling.
5837     UnimplementedError(getFlags());
5838   }
5839   for (Constant *C : Pool) {
5840     if (!C->getShouldBePooled()) {
5841       continue;
5842     }
5843     emitConstant<T>(Str, llvm::dyn_cast<typename Traits::ConstantType>(C));
5844   }
5845 }
5846 } // end of anonymous namespace
5847 
lowerConstants()5848 void TargetDataMIPS32::lowerConstants() {
5849   if (getFlags().getDisableTranslation())
5850     return;
5851   switch (getFlags().getOutFileType()) {
5852   case FT_Elf: {
5853     ELFObjectWriter *Writer = Ctx->getObjectWriter();
5854     Writer->writeConstantPool<ConstantFloat>(IceType_f32);
5855     Writer->writeConstantPool<ConstantDouble>(IceType_f64);
5856   } break;
5857   case FT_Asm:
5858   case FT_Iasm: {
5859     OstreamLocker _(Ctx);
5860     emitConstantPool<float>(Ctx);
5861     emitConstantPool<double>(Ctx);
5862     break;
5863   }
5864   }
5865 }
5866 
lowerJumpTables()5867 void TargetDataMIPS32::lowerJumpTables() {
5868   if (getFlags().getDisableTranslation())
5869     return;
5870 }
5871 
5872 // Helper for legalize() to emit the right code to lower an operand to a
5873 // register of the appropriate type.
copyToReg(Operand * Src,RegNumT RegNum)5874 Variable *TargetMIPS32::copyToReg(Operand *Src, RegNumT RegNum) {
5875   Type Ty = Src->getType();
5876   Variable *Reg = makeReg(Ty, RegNum);
5877   if (isVectorType(Ty)) {
5878     llvm::report_fatal_error("Invalid copy from vector type.");
5879   } else {
5880     if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Src)) {
5881       _lw(Reg, Mem);
5882     } else {
5883       _mov(Reg, Src);
5884     }
5885   }
5886   return Reg;
5887 }
5888 
legalize(Operand * From,LegalMask Allowed,RegNumT RegNum)5889 Operand *TargetMIPS32::legalize(Operand *From, LegalMask Allowed,
5890                                 RegNumT RegNum) {
5891   Type Ty = From->getType();
5892   // Assert that a physical register is allowed.  To date, all calls
5893   // to legalize() allow a physical register. Legal_Flex converts
5894   // registers to the right type OperandMIPS32FlexReg as needed.
5895   assert(Allowed & Legal_Reg);
5896 
5897   if (RegNum.hasNoValue()) {
5898     if (Variable *Subst = getContext().availabilityGet(From)) {
5899       // At this point we know there is a potential substitution available.
5900       if (!Subst->isRematerializable() && Subst->mustHaveReg() &&
5901           !Subst->hasReg()) {
5902         // At this point we know the substitution will have a register.
5903         if (From->getType() == Subst->getType()) {
5904           // At this point we know the substitution's register is compatible.
5905           return Subst;
5906         }
5907       }
5908     }
5909   }
5910 
5911   // Go through the various types of operands:
5912   // OperandMIPS32Mem, Constant, and Variable.
5913   // Given the above assertion, if type of operand is not legal
5914   // (e.g., OperandMIPS32Mem and !Legal_Mem), we can always copy
5915   // to a register.
5916   if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(From)) {
5917     // Base must be in a physical register.
5918     Variable *Base = Mem->getBase();
5919     ConstantInteger32 *Offset = llvm::cast<ConstantInteger32>(Mem->getOffset());
5920     Variable *RegBase = nullptr;
5921     assert(Base);
5922 
5923     RegBase = llvm::cast<Variable>(
5924         legalize(Base, Legal_Reg | Legal_Rematerializable));
5925 
5926     if (Offset != nullptr && Offset->getValue() != 0) {
5927       static constexpr bool ZeroExt = false;
5928       if (!OperandMIPS32Mem::canHoldOffset(Ty, ZeroExt, Offset->getValue())) {
5929         llvm::report_fatal_error("Invalid memory offset.");
5930       }
5931     }
5932 
5933     // Create a new operand if there was a change.
5934     if (Base != RegBase) {
5935       Mem = OperandMIPS32Mem::create(Func, Ty, RegBase, Offset,
5936                                      Mem->getAddrMode());
5937     }
5938 
5939     if (Allowed & Legal_Mem) {
5940       From = Mem;
5941     } else {
5942       Variable *Reg = makeReg(Ty, RegNum);
5943       _lw(Reg, Mem);
5944       From = Reg;
5945     }
5946     return From;
5947   }
5948 
5949   if (llvm::isa<Constant>(From)) {
5950     if (llvm::isa<ConstantUndef>(From)) {
5951       From = legalizeUndef(From, RegNum);
5952       if (isVectorType(Ty))
5953         return From;
5954     }
5955     if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {
5956       Variable *Reg = makeReg(Ty, RegNum);
5957       Variable *TReg = makeReg(Ty, RegNum);
5958       _lui(TReg, C, RO_Hi);
5959       _addiu(Reg, TReg, C, RO_Lo);
5960       return Reg;
5961     } else if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
5962       const uint32_t Value = C32->getValue();
5963       // Use addiu if the immediate is a 16bit value. Otherwise load it
5964       // using a lui-ori instructions.
5965       Variable *Reg = makeReg(Ty, RegNum);
5966       if (isInt<16>(int32_t(Value))) {
5967         Variable *Zero = makeReg(Ty, RegMIPS32::Reg_ZERO);
5968         Context.insert<InstFakeDef>(Zero);
5969         _addiu(Reg, Zero, Value);
5970       } else {
5971         uint32_t UpperBits = (Value >> 16) & 0xFFFF;
5972         uint32_t LowerBits = Value & 0xFFFF;
5973         if (LowerBits) {
5974           Variable *TReg = makeReg(Ty, RegNum);
5975           _lui(TReg, Ctx->getConstantInt32(UpperBits));
5976           _ori(Reg, TReg, LowerBits);
5977         } else {
5978           _lui(Reg, Ctx->getConstantInt32(UpperBits));
5979         }
5980       }
5981       return Reg;
5982     } else if (isScalarFloatingType(Ty)) {
5983       auto *CFrom = llvm::cast<Constant>(From);
5984       Variable *TReg = makeReg(Ty);
5985       if (!CFrom->getShouldBePooled()) {
5986         // Float/Double constant 0 is not pooled.
5987         Context.insert<InstFakeDef>(TReg);
5988         _mov(TReg, getZero());
5989       } else {
5990         // Load floats/doubles from literal pool.
5991         Constant *Offset = Ctx->getConstantSym(0, CFrom->getLabelName());
5992         Variable *TReg1 = makeReg(getPointerType());
5993         _lui(TReg1, Offset, RO_Hi);
5994         OperandMIPS32Mem *Addr =
5995             OperandMIPS32Mem::create(Func, Ty, TReg1, Offset);
5996         if (Ty == IceType_f32)
5997           Sandboxer(this).lwc1(TReg, Addr, RO_Lo);
5998         else
5999           Sandboxer(this).ldc1(TReg, Addr, RO_Lo);
6000       }
6001       return copyToReg(TReg, RegNum);
6002     }
6003   }
6004 
6005   if (auto *Var = llvm::dyn_cast<Variable>(From)) {
6006     if (Var->isRematerializable()) {
6007       if (Allowed & Legal_Rematerializable) {
6008         return From;
6009       }
6010 
6011       Variable *T = makeReg(Var->getType(), RegNum);
6012       _mov(T, Var);
6013       return T;
6014     }
6015     // Check if the variable is guaranteed a physical register.  This
6016     // can happen either when the variable is pre-colored or when it is
6017     // assigned infinite weight.
6018     bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
6019     // We need a new physical register for the operand if:
6020     //   Mem is not allowed and Var isn't guaranteed a physical
6021     //   register, or
6022     //   RegNum is required and Var->getRegNum() doesn't match.
6023     if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
6024         (RegNum.hasValue() && RegNum != Var->getRegNum())) {
6025       From = copyToReg(From, RegNum);
6026     }
6027     return From;
6028   }
6029   return From;
6030 }
6031 
6032 namespace BoolFolding {
6033 // TODO(sagar.thakur): Add remaining instruction kinds to shouldTrackProducer()
6034 // and isValidConsumer()
shouldTrackProducer(const Inst & Instr)6035 bool shouldTrackProducer(const Inst &Instr) {
6036   return Instr.getKind() == Inst::Icmp;
6037 }
6038 
isValidConsumer(const Inst & Instr)6039 bool isValidConsumer(const Inst &Instr) { return Instr.getKind() == Inst::Br; }
6040 } // end of namespace BoolFolding
6041 
recordProducers(CfgNode * Node)6042 void TargetMIPS32::ComputationTracker::recordProducers(CfgNode *Node) {
6043   for (Inst &Instr : Node->getInsts()) {
6044     if (Instr.isDeleted())
6045       continue;
6046     // Check whether Instr is a valid producer.
6047     Variable *Dest = Instr.getDest();
6048     if (Dest // only consider instructions with an actual dest var; and
6049         && Dest->getType() == IceType_i1 // only bool-type dest vars; and
6050         && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr.
6051       KnownComputations.emplace(Dest->getIndex(),
6052                                 ComputationEntry(&Instr, IceType_i1));
6053     }
6054     // Check each src variable against the map.
6055     FOREACH_VAR_IN_INST(Var, Instr) {
6056       SizeT VarNum = Var->getIndex();
6057       auto ComputationIter = KnownComputations.find(VarNum);
6058       if (ComputationIter == KnownComputations.end()) {
6059         continue;
6060       }
6061 
6062       ++ComputationIter->second.NumUses;
6063       switch (ComputationIter->second.ComputationType) {
6064       default:
6065         KnownComputations.erase(VarNum);
6066         continue;
6067       case IceType_i1:
6068         if (!BoolFolding::isValidConsumer(Instr)) {
6069           KnownComputations.erase(VarNum);
6070           continue;
6071         }
6072         break;
6073       }
6074 
6075       if (Instr.isLastUse(Var)) {
6076         ComputationIter->second.IsLiveOut = false;
6077       }
6078     }
6079   }
6080 
6081   for (auto Iter = KnownComputations.begin(), End = KnownComputations.end();
6082        Iter != End;) {
6083     // Disable the folding if its dest may be live beyond this block.
6084     if (Iter->second.IsLiveOut || Iter->second.NumUses > 1) {
6085       Iter = KnownComputations.erase(Iter);
6086       continue;
6087     }
6088 
6089     // Mark as "dead" rather than outright deleting. This is so that other
6090     // peephole style optimizations during or before lowering have access to
6091     // this instruction in undeleted form. See for example
6092     // tryOptimizedCmpxchgCmpBr().
6093     Iter->second.Instr->setDead();
6094     ++Iter;
6095   }
6096 }
6097 
TargetHeaderMIPS32(GlobalContext * Ctx)6098 TargetHeaderMIPS32::TargetHeaderMIPS32(GlobalContext *Ctx)
6099     : TargetHeaderLowering(Ctx) {}
6100 
lower()6101 void TargetHeaderMIPS32::lower() {
6102   if (!BuildDefs::dump())
6103     return;
6104   OstreamLocker L(Ctx);
6105   Ostream &Str = Ctx->getStrEmit();
6106   Str << "\t.set\t"
6107       << "nomicromips\n";
6108   Str << "\t.set\t"
6109       << "nomips16\n";
6110   Str << "\t.set\t"
6111       << "noat\n";
6112   if (getFlags().getUseSandboxing())
6113     Str << "\t.bundle_align_mode 4\n";
6114 }
6115 
6116 SmallBitVector TargetMIPS32::TypeToRegisterSet[RCMIPS32_NUM];
6117 SmallBitVector TargetMIPS32::TypeToRegisterSetUnfiltered[RCMIPS32_NUM];
6118 SmallBitVector TargetMIPS32::RegisterAliases[RegMIPS32::Reg_NUM];
6119 
Sandboxer(TargetMIPS32 * Target,InstBundleLock::Option BundleOption)6120 TargetMIPS32::Sandboxer::Sandboxer(TargetMIPS32 *Target,
6121                                    InstBundleLock::Option BundleOption)
6122     : Target(Target), BundleOption(BundleOption) {}
6123 
~Sandboxer()6124 TargetMIPS32::Sandboxer::~Sandboxer() {}
6125 
createAutoBundle()6126 void TargetMIPS32::Sandboxer::createAutoBundle() {
6127   Bundler = makeUnique<AutoBundle>(Target, BundleOption);
6128 }
6129 
addiu_sp(uint32_t StackOffset)6130 void TargetMIPS32::Sandboxer::addiu_sp(uint32_t StackOffset) {
6131   Variable *SP = Target->getPhysicalRegister(RegMIPS32::Reg_SP);
6132   if (!Target->NeedSandboxing) {
6133     Target->_addiu(SP, SP, StackOffset);
6134     return;
6135   }
6136   auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6137   Target->Context.insert<InstFakeDef>(T7);
6138   createAutoBundle();
6139   Target->_addiu(SP, SP, StackOffset);
6140   Target->_and(SP, SP, T7);
6141 }
6142 
lw(Variable * Dest,OperandMIPS32Mem * Mem)6143 void TargetMIPS32::Sandboxer::lw(Variable *Dest, OperandMIPS32Mem *Mem) {
6144   Variable *Base = Mem->getBase();
6145   if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum()) &&
6146       (RegMIPS32::Reg_T8 != Base->getRegNum())) {
6147     auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6148     Target->Context.insert<InstFakeDef>(T7);
6149     createAutoBundle();
6150     Target->_and(Base, Base, T7);
6151   }
6152   Target->_lw(Dest, Mem);
6153   if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) {
6154     auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6155     Target->Context.insert<InstFakeDef>(T7);
6156     Target->_and(Dest, Dest, T7);
6157   }
6158 }
6159 
ll(Variable * Dest,OperandMIPS32Mem * Mem)6160 void TargetMIPS32::Sandboxer::ll(Variable *Dest, OperandMIPS32Mem *Mem) {
6161   Variable *Base = Mem->getBase();
6162   if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6163     auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6164     Target->Context.insert<InstFakeDef>(T7);
6165     createAutoBundle();
6166     Target->_and(Base, Base, T7);
6167   }
6168   Target->_ll(Dest, Mem);
6169   if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) {
6170     auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6171     Target->Context.insert<InstFakeDef>(T7);
6172     Target->_and(Dest, Dest, T7);
6173   }
6174 }
6175 
sc(Variable * Dest,OperandMIPS32Mem * Mem)6176 void TargetMIPS32::Sandboxer::sc(Variable *Dest, OperandMIPS32Mem *Mem) {
6177   Variable *Base = Mem->getBase();
6178   if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6179     auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6180     Target->Context.insert<InstFakeDef>(T7);
6181     createAutoBundle();
6182     Target->_and(Base, Base, T7);
6183   }
6184   Target->_sc(Dest, Mem);
6185 }
6186 
sw(Variable * Dest,OperandMIPS32Mem * Mem)6187 void TargetMIPS32::Sandboxer::sw(Variable *Dest, OperandMIPS32Mem *Mem) {
6188   Variable *Base = Mem->getBase();
6189   if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6190     auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6191     Target->Context.insert<InstFakeDef>(T7);
6192     createAutoBundle();
6193     Target->_and(Base, Base, T7);
6194   }
6195   Target->_sw(Dest, Mem);
6196 }
6197 
lwc1(Variable * Dest,OperandMIPS32Mem * Mem,RelocOp Reloc)6198 void TargetMIPS32::Sandboxer::lwc1(Variable *Dest, OperandMIPS32Mem *Mem,
6199                                    RelocOp Reloc) {
6200   Variable *Base = Mem->getBase();
6201   if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6202     auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6203     Target->Context.insert<InstFakeDef>(T7);
6204     createAutoBundle();
6205     Target->_and(Base, Base, T7);
6206   }
6207   Target->_lwc1(Dest, Mem, Reloc);
6208   if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) {
6209     auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6210     Target->Context.insert<InstFakeDef>(T7);
6211     Target->_and(Dest, Dest, T7);
6212   }
6213 }
6214 
ldc1(Variable * Dest,OperandMIPS32Mem * Mem,RelocOp Reloc)6215 void TargetMIPS32::Sandboxer::ldc1(Variable *Dest, OperandMIPS32Mem *Mem,
6216                                    RelocOp Reloc) {
6217   Variable *Base = Mem->getBase();
6218   if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6219     auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6220     Target->Context.insert<InstFakeDef>(T7);
6221     createAutoBundle();
6222     Target->_and(Base, Base, T7);
6223   }
6224   Target->_ldc1(Dest, Mem, Reloc);
6225   if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) {
6226     auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6227     Target->Context.insert<InstFakeDef>(T7);
6228     Target->_and(Dest, Dest, T7);
6229   }
6230 }
6231 
ret(Variable * RetAddr,Variable * RetValue)6232 void TargetMIPS32::Sandboxer::ret(Variable *RetAddr, Variable *RetValue) {
6233   if (!Target->NeedSandboxing) {
6234     Target->_ret(RetAddr, RetValue);
6235   }
6236   auto *T6 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T6);
6237   Target->Context.insert<InstFakeDef>(T6);
6238   createAutoBundle();
6239   Target->_and(RetAddr, RetAddr, T6);
6240   Target->_ret(RetAddr, RetValue);
6241 }
6242 
reset_sp(Variable * Src)6243 void TargetMIPS32::Sandboxer::reset_sp(Variable *Src) {
6244   Variable *SP = Target->getPhysicalRegister(RegMIPS32::Reg_SP);
6245   if (!Target->NeedSandboxing) {
6246     Target->_mov(SP, Src);
6247     return;
6248   }
6249   auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6250   Target->Context.insert<InstFakeDef>(T7);
6251   createAutoBundle();
6252   Target->_mov(SP, Src);
6253   Target->_and(SP, SP, T7);
6254   Target->getContext().insert<InstFakeUse>(SP);
6255 }
6256 
jal(Variable * ReturnReg,Operand * CallTarget)6257 InstMIPS32Call *TargetMIPS32::Sandboxer::jal(Variable *ReturnReg,
6258                                              Operand *CallTarget) {
6259   if (Target->NeedSandboxing) {
6260     createAutoBundle();
6261     if (auto *CallTargetR = llvm::dyn_cast<Variable>(CallTarget)) {
6262       auto *T6 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T6);
6263       Target->Context.insert<InstFakeDef>(T6);
6264       Target->_and(CallTargetR, CallTargetR, T6);
6265     }
6266   }
6267   return Target->Context.insert<InstMIPS32Call>(ReturnReg, CallTarget);
6268 }
6269 
6270 } // end of namespace MIPS32
6271 } // end of namespace Ice
6272