1 //
2 // The Subzero Code Generator
3 //
4 // This file is distributed under the University of Illinois Open Source
5 // License. See LICENSE.TXT for details.
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// \brief Implements the TargetLoweringMIPS32 class, which consists almost
11 /// entirely of the lowering sequence for each high-level instruction.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #include "IceTargetLoweringMIPS32.h"
16
17 #include "IceCfg.h"
18 #include "IceCfgNode.h"
19 #include "IceClFlags.h"
20 #include "IceDefs.h"
21 #include "IceELFObjectWriter.h"
22 #include "IceGlobalInits.h"
23 #include "IceInstMIPS32.h"
24 #include "IceInstVarIter.h"
25 #include "IceLiveness.h"
26 #include "IceOperand.h"
27 #include "IcePhiLoweringImpl.h"
28 #include "IceRegistersMIPS32.h"
29 #include "IceTargetLoweringMIPS32.def"
30 #include "IceUtils.h"
31 #include "llvm/Support/MathExtras.h"
32
33 namespace MIPS32 {
createTargetLowering(::Ice::Cfg * Func)34 std::unique_ptr<::Ice::TargetLowering> createTargetLowering(::Ice::Cfg *Func) {
35 return ::Ice::MIPS32::TargetMIPS32::create(Func);
36 }
37
38 std::unique_ptr<::Ice::TargetDataLowering>
createTargetDataLowering(::Ice::GlobalContext * Ctx)39 createTargetDataLowering(::Ice::GlobalContext *Ctx) {
40 return ::Ice::MIPS32::TargetDataMIPS32::create(Ctx);
41 }
42
43 std::unique_ptr<::Ice::TargetHeaderLowering>
createTargetHeaderLowering(::Ice::GlobalContext * Ctx)44 createTargetHeaderLowering(::Ice::GlobalContext *Ctx) {
45 return ::Ice::MIPS32::TargetHeaderMIPS32::create(Ctx);
46 }
47
staticInit(::Ice::GlobalContext * Ctx)48 void staticInit(::Ice::GlobalContext *Ctx) {
49 ::Ice::MIPS32::TargetMIPS32::staticInit(Ctx);
50 }
51
shouldBePooled(const::Ice::Constant * C)52 bool shouldBePooled(const ::Ice::Constant *C) {
53 return ::Ice::MIPS32::TargetMIPS32::shouldBePooled(C);
54 }
55
getPointerType()56 ::Ice::Type getPointerType() {
57 return ::Ice::MIPS32::TargetMIPS32::getPointerType();
58 }
59
60 } // end of namespace MIPS32
61
62 namespace Ice {
63 namespace MIPS32 {
64
65 using llvm::isInt;
66
67 namespace {
68
69 // The maximum number of arguments to pass in GPR registers.
70 constexpr uint32_t MIPS32_MAX_GPR_ARG = 4;
71
72 std::array<RegNumT, MIPS32_MAX_GPR_ARG> GPRArgInitializer;
73 std::array<RegNumT, MIPS32_MAX_GPR_ARG / 2> I64ArgInitializer;
74
75 constexpr uint32_t MIPS32_MAX_FP_ARG = 2;
76
77 std::array<RegNumT, MIPS32_MAX_FP_ARG> FP32ArgInitializer;
78 std::array<RegNumT, MIPS32_MAX_FP_ARG> FP64ArgInitializer;
79
getRegClassName(RegClass C)80 const char *getRegClassName(RegClass C) {
81 auto ClassNum = static_cast<RegClassMIPS32>(C);
82 assert(ClassNum < RCMIPS32_NUM);
83 switch (ClassNum) {
84 default:
85 assert(C < RC_Target);
86 return regClassString(C);
87 // Add handling of new register classes below.
88 }
89 }
90
91 // Stack alignment
92 constexpr uint32_t MIPS32_STACK_ALIGNMENT_BYTES = 16;
93
94 // Value is in bytes. Return Value adjusted to the next highest multiple of the
95 // stack alignment required for the given type.
applyStackAlignmentTy(uint32_t Value,Type Ty)96 uint32_t applyStackAlignmentTy(uint32_t Value, Type Ty) {
97 size_t typeAlignInBytes = typeWidthInBytes(Ty);
98 // Vectors are stored on stack with the same alignment as that of int type
99 if (isVectorType(Ty))
100 typeAlignInBytes = typeWidthInBytes(IceType_i64);
101 return Utils::applyAlignment(Value, typeAlignInBytes);
102 }
103
104 // Value is in bytes. Return Value adjusted to the next highest multiple of the
105 // stack alignment.
applyStackAlignment(uint32_t Value)106 uint32_t applyStackAlignment(uint32_t Value) {
107 return Utils::applyAlignment(Value, MIPS32_STACK_ALIGNMENT_BYTES);
108 }
109
110 } // end of anonymous namespace
111
TargetMIPS32(Cfg * Func)112 TargetMIPS32::TargetMIPS32(Cfg *Func)
113 : TargetLowering(Func), NeedSandboxing(SandboxingType == ST_NaCl) {}
114
assignVarStackSlots(VarList & SortedSpilledVariables,size_t SpillAreaPaddingBytes,size_t SpillAreaSizeBytes,size_t GlobalsAndSubsequentPaddingSize)115 void TargetMIPS32::assignVarStackSlots(VarList &SortedSpilledVariables,
116 size_t SpillAreaPaddingBytes,
117 size_t SpillAreaSizeBytes,
118 size_t GlobalsAndSubsequentPaddingSize) {
119 const VariablesMetadata *VMetadata = Func->getVMetadata();
120 size_t GlobalsSpaceUsed = SpillAreaPaddingBytes;
121 size_t NextStackOffset = SpillAreaPaddingBytes;
122 CfgVector<size_t> LocalsSize(Func->getNumNodes());
123 const bool SimpleCoalescing = !callsReturnsTwice();
124 for (Variable *Var : SortedSpilledVariables) {
125 size_t Increment = typeWidthInBytesOnStack(Var->getType());
126 if (SimpleCoalescing && VMetadata->isTracked(Var)) {
127 if (VMetadata->isMultiBlock(Var)) {
128 GlobalsSpaceUsed += Increment;
129 NextStackOffset = GlobalsSpaceUsed;
130 } else {
131 SizeT NodeIndex = VMetadata->getLocalUseNode(Var)->getIndex();
132 LocalsSize[NodeIndex] += Increment;
133 NextStackOffset = SpillAreaPaddingBytes +
134 GlobalsAndSubsequentPaddingSize +
135 LocalsSize[NodeIndex];
136 }
137 } else {
138 NextStackOffset += Increment;
139 }
140 Var->setStackOffset(SpillAreaSizeBytes - NextStackOffset);
141 }
142 }
143
staticInit(GlobalContext * Ctx)144 void TargetMIPS32::staticInit(GlobalContext *Ctx) {
145 (void)Ctx;
146 RegNumT::setLimit(RegMIPS32::Reg_NUM);
147 SmallBitVector IntegerRegisters(RegMIPS32::Reg_NUM);
148 SmallBitVector I64PairRegisters(RegMIPS32::Reg_NUM);
149 SmallBitVector Float32Registers(RegMIPS32::Reg_NUM);
150 SmallBitVector Float64Registers(RegMIPS32::Reg_NUM);
151 SmallBitVector VectorRegisters(RegMIPS32::Reg_NUM);
152 SmallBitVector InvalidRegisters(RegMIPS32::Reg_NUM);
153 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
154 isI64Pair, isFP32, isFP64, isVec128, alias_init) \
155 IntegerRegisters[RegMIPS32::val] = isInt; \
156 I64PairRegisters[RegMIPS32::val] = isI64Pair; \
157 Float32Registers[RegMIPS32::val] = isFP32; \
158 Float64Registers[RegMIPS32::val] = isFP64; \
159 VectorRegisters[RegMIPS32::val] = isVec128; \
160 RegisterAliases[RegMIPS32::val].resize(RegMIPS32::Reg_NUM); \
161 for (SizeT RegAlias : alias_init) { \
162 assert(!RegisterAliases[RegMIPS32::val][RegAlias] && \
163 "Duplicate alias for " #val); \
164 RegisterAliases[RegMIPS32::val].set(RegAlias); \
165 } \
166 RegisterAliases[RegMIPS32::val].resize(RegMIPS32::Reg_NUM); \
167 assert(RegisterAliases[RegMIPS32::val][RegMIPS32::val]);
168 REGMIPS32_TABLE;
169 #undef X
170
171 // TODO(mohit.bhakkad): Change these inits once we provide argument related
172 // field in register tables
173 for (size_t i = 0; i < MIPS32_MAX_GPR_ARG; i++)
174 GPRArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_A0 + i);
175
176 for (size_t i = 0; i < MIPS32_MAX_GPR_ARG / 2; i++)
177 I64ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_A0A1 + i);
178
179 for (size_t i = 0; i < MIPS32_MAX_FP_ARG; i++) {
180 FP32ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_F12 + i * 2);
181 FP64ArgInitializer[i] = RegNumT::fixme(RegMIPS32::Reg_F12F13 + i);
182 }
183
184 TypeToRegisterSet[IceType_void] = InvalidRegisters;
185 TypeToRegisterSet[IceType_i1] = IntegerRegisters;
186 TypeToRegisterSet[IceType_i8] = IntegerRegisters;
187 TypeToRegisterSet[IceType_i16] = IntegerRegisters;
188 TypeToRegisterSet[IceType_i32] = IntegerRegisters;
189 TypeToRegisterSet[IceType_i64] = IntegerRegisters;
190 TypeToRegisterSet[IceType_f32] = Float32Registers;
191 TypeToRegisterSet[IceType_f64] = Float64Registers;
192 TypeToRegisterSet[IceType_v4i1] = VectorRegisters;
193 TypeToRegisterSet[IceType_v8i1] = VectorRegisters;
194 TypeToRegisterSet[IceType_v16i1] = VectorRegisters;
195 TypeToRegisterSet[IceType_v16i8] = VectorRegisters;
196 TypeToRegisterSet[IceType_v8i16] = VectorRegisters;
197 TypeToRegisterSet[IceType_v4i32] = VectorRegisters;
198 TypeToRegisterSet[IceType_v4f32] = VectorRegisters;
199
200 for (size_t i = 0; i < llvm::array_lengthof(TypeToRegisterSet); ++i)
201 TypeToRegisterSetUnfiltered[i] = TypeToRegisterSet[i];
202
203 filterTypeToRegisterSet(Ctx, RegMIPS32::Reg_NUM, TypeToRegisterSet,
204 llvm::array_lengthof(TypeToRegisterSet),
205 RegMIPS32::getRegName, getRegClassName);
206 }
207
unsetIfNonLeafFunc()208 void TargetMIPS32::unsetIfNonLeafFunc() {
209 for (CfgNode *Node : Func->getNodes()) {
210 for (Inst &Instr : Node->getInsts()) {
211 if (llvm::isa<InstCall>(&Instr)) {
212 // Unset MaybeLeafFunc if call instruction exists.
213 MaybeLeafFunc = false;
214 return;
215 }
216 }
217 }
218 }
219
getStackAlignment() const220 uint32_t TargetMIPS32::getStackAlignment() const {
221 return MIPS32_STACK_ALIGNMENT_BYTES;
222 }
223
getCallStackArgumentsSizeBytes(const InstCall * Call)224 uint32_t TargetMIPS32::getCallStackArgumentsSizeBytes(const InstCall *Call) {
225 TargetMIPS32::CallingConv CC;
226 RegNumT DummyReg;
227 size_t OutArgsSizeBytes = 0;
228 Variable *Dest = Call->getDest();
229 bool PartialOnStack = false;
230 if (Dest != nullptr && isVectorFloatingType(Dest->getType())) {
231 CC.discardReg(RegMIPS32::Reg_A0);
232 // Next vector is partially on stack
233 PartialOnStack = true;
234 }
235 for (SizeT i = 0, NumArgs = Call->getNumArgs(); i < NumArgs; ++i) {
236 Operand *Arg = legalizeUndef(Call->getArg(i));
237 const Type Ty = Arg->getType();
238 RegNumT RegNum;
239 if (CC.argInReg(Ty, i, &RegNum)) {
240 // If PartialOnStack is true and if this is a vector type then last two
241 // elements are on stack
242 if (PartialOnStack && isVectorType(Ty)) {
243 OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, IceType_i64);
244 OutArgsSizeBytes += typeWidthInBytesOnStack(IceType_i32) * 2;
245 }
246 continue;
247 }
248 OutArgsSizeBytes = applyStackAlignmentTy(OutArgsSizeBytes, Ty);
249 OutArgsSizeBytes += typeWidthInBytesOnStack(Ty);
250 }
251 // Add size of argument save area
252 constexpr int BytesPerStackArg = 4;
253 OutArgsSizeBytes += MIPS32_MAX_GPR_ARG * BytesPerStackArg;
254 return applyStackAlignment(OutArgsSizeBytes);
255 }
256
257 namespace {
getConstantMemoryOrder(Operand * Opnd)258 inline uint64_t getConstantMemoryOrder(Operand *Opnd) {
259 if (auto *Integer = llvm::dyn_cast<ConstantInteger32>(Opnd))
260 return Integer->getValue();
261 return Intrinsics::MemoryOrderInvalid;
262 }
263 } // namespace
264
genTargetHelperCallFor(Inst * Instr)265 void TargetMIPS32::genTargetHelperCallFor(Inst *Instr) {
266 constexpr bool NoTailCall = false;
267 constexpr bool IsTargetHelperCall = true;
268 Variable *Dest = Instr->getDest();
269 const Type DestTy = Dest ? Dest->getType() : IceType_void;
270
271 switch (Instr->getKind()) {
272 default:
273 return;
274 case Inst::Select: {
275 if (isVectorType(DestTy)) {
276 Operand *SrcT = llvm::cast<InstSelect>(Instr)->getTrueOperand();
277 Operand *SrcF = llvm::cast<InstSelect>(Instr)->getFalseOperand();
278 Operand *Cond = llvm::cast<InstSelect>(Instr)->getCondition();
279 Variable *T = Func->makeVariable(DestTy);
280 auto *Undef = ConstantUndef::create(Ctx, DestTy);
281 Context.insert<InstAssign>(T, Undef);
282 auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
283 VarVecOn32->initVecElement(Func);
284 for (SizeT I = 0; I < typeNumElements(DestTy); ++I) {
285 auto *Index = Ctx->getConstantInt32(I);
286 auto *OpC = Func->makeVariable(typeElementType(Cond->getType()));
287 Context.insert<InstExtractElement>(OpC, Cond, Index);
288 auto *OpT = Func->makeVariable(typeElementType(DestTy));
289 Context.insert<InstExtractElement>(OpT, SrcT, Index);
290 auto *OpF = Func->makeVariable(typeElementType(DestTy));
291 Context.insert<InstExtractElement>(OpF, SrcF, Index);
292 auto *Dst = Func->makeVariable(typeElementType(DestTy));
293 Variable *DestT = Func->makeVariable(DestTy);
294 Context.insert<InstSelect>(Dst, OpC, OpT, OpF);
295 Context.insert<InstInsertElement>(DestT, T, Dst, Index);
296 T = DestT;
297 }
298 Context.insert<InstAssign>(Dest, T);
299 Instr->setDeleted();
300 }
301 return;
302 }
303 case Inst::Fcmp: {
304 if (isVectorType(DestTy)) {
305 InstFcmp::FCond Cond = llvm::cast<InstFcmp>(Instr)->getCondition();
306 Operand *Src0 = Instr->getSrc(0);
307 Operand *Src1 = Instr->getSrc(1);
308 Variable *T = Func->makeVariable(IceType_v4f32);
309 auto *Undef = ConstantUndef::create(Ctx, IceType_v4f32);
310 Context.insert<InstAssign>(T, Undef);
311 auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
312 VarVecOn32->initVecElement(Func);
313 for (SizeT I = 0; I < typeNumElements(IceType_v4f32); ++I) {
314 auto *Index = Ctx->getConstantInt32(I);
315 auto *Op0 = Func->makeVariable(IceType_f32);
316 Context.insert<InstExtractElement>(Op0, Src0, Index);
317 auto *Op1 = Func->makeVariable(IceType_f32);
318 Context.insert<InstExtractElement>(Op1, Src1, Index);
319 auto *Dst = Func->makeVariable(IceType_f32);
320 Variable *DestT = Func->makeVariable(IceType_v4f32);
321 Context.insert<InstFcmp>(Cond, Dst, Op0, Op1);
322 Context.insert<InstInsertElement>(DestT, T, Dst, Index);
323 T = DestT;
324 }
325 Context.insert<InstAssign>(Dest, T);
326 Instr->setDeleted();
327 }
328 return;
329 }
330 case Inst::Icmp: {
331 if (isVectorType(DestTy)) {
332 InstIcmp::ICond Cond = llvm::cast<InstIcmp>(Instr)->getCondition();
333 Operand *Src0 = Instr->getSrc(0);
334 Operand *Src1 = Instr->getSrc(1);
335 const Type SrcType = Src0->getType();
336 Variable *T = Func->makeVariable(DestTy);
337 auto *Undef = ConstantUndef::create(Ctx, DestTy);
338 Context.insert<InstAssign>(T, Undef);
339 auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
340 VarVecOn32->initVecElement(Func);
341 for (SizeT I = 0; I < typeNumElements(SrcType); ++I) {
342 auto *Index = Ctx->getConstantInt32(I);
343 auto *Op0 = Func->makeVariable(typeElementType(SrcType));
344 Context.insert<InstExtractElement>(Op0, Src0, Index);
345 auto *Op1 = Func->makeVariable(typeElementType(SrcType));
346 Context.insert<InstExtractElement>(Op1, Src1, Index);
347 auto *Dst = Func->makeVariable(typeElementType(DestTy));
348 Variable *DestT = Func->makeVariable(DestTy);
349 Context.insert<InstIcmp>(Cond, Dst, Op0, Op1);
350 Context.insert<InstInsertElement>(DestT, T, Dst, Index);
351 T = DestT;
352 }
353 Context.insert<InstAssign>(Dest, T);
354 Instr->setDeleted();
355 }
356 return;
357 }
358 case Inst::Arithmetic: {
359 const InstArithmetic::OpKind Op =
360 llvm::cast<InstArithmetic>(Instr)->getOp();
361 if (isVectorType(DestTy)) {
362 scalarizeArithmetic(Op, Dest, Instr->getSrc(0), Instr->getSrc(1));
363 Instr->setDeleted();
364 return;
365 }
366 switch (DestTy) {
367 default:
368 return;
369 case IceType_i64: {
370 RuntimeHelper HelperID = RuntimeHelper::H_Num;
371 switch (Op) {
372 default:
373 return;
374 case InstArithmetic::Udiv:
375 HelperID = RuntimeHelper::H_udiv_i64;
376 break;
377 case InstArithmetic::Sdiv:
378 HelperID = RuntimeHelper::H_sdiv_i64;
379 break;
380 case InstArithmetic::Urem:
381 HelperID = RuntimeHelper::H_urem_i64;
382 break;
383 case InstArithmetic::Srem:
384 HelperID = RuntimeHelper::H_srem_i64;
385 break;
386 }
387
388 if (HelperID == RuntimeHelper::H_Num) {
389 return;
390 }
391
392 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(HelperID);
393 constexpr SizeT MaxArgs = 2;
394 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
395 NoTailCall, IsTargetHelperCall);
396 Call->addArg(Instr->getSrc(0));
397 Call->addArg(Instr->getSrc(1));
398 Instr->setDeleted();
399 return;
400 }
401 case IceType_f32:
402 case IceType_f64: {
403 if (Op != InstArithmetic::Frem) {
404 return;
405 }
406 constexpr SizeT MaxArgs = 2;
407 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(
408 DestTy == IceType_f32 ? RuntimeHelper::H_frem_f32
409 : RuntimeHelper::H_frem_f64);
410 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
411 NoTailCall, IsTargetHelperCall);
412 Call->addArg(Instr->getSrc(0));
413 Call->addArg(Instr->getSrc(1));
414 Instr->setDeleted();
415 return;
416 }
417 }
418 llvm::report_fatal_error("Control flow should never have reached here.");
419 }
420 case Inst::Cast: {
421 Operand *Src0 = Instr->getSrc(0);
422 const Type SrcTy = Src0->getType();
423 auto *CastInstr = llvm::cast<InstCast>(Instr);
424 const InstCast::OpKind CastKind = CastInstr->getCastKind();
425
426 if (isVectorType(DestTy)) {
427 Variable *T = Func->makeVariable(DestTy);
428 auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
429 VarVecOn32->initVecElement(Func);
430 auto *Undef = ConstantUndef::create(Ctx, DestTy);
431 Context.insert<InstAssign>(T, Undef);
432 for (SizeT I = 0; I < typeNumElements(DestTy); ++I) {
433 auto *Index = Ctx->getConstantInt32(I);
434 auto *Op = Func->makeVariable(typeElementType(SrcTy));
435 Context.insert<InstExtractElement>(Op, Src0, Index);
436 auto *Dst = Func->makeVariable(typeElementType(DestTy));
437 Variable *DestT = Func->makeVariable(DestTy);
438 Context.insert<InstCast>(CastKind, Dst, Op);
439 Context.insert<InstInsertElement>(DestT, T, Dst, Index);
440 T = DestT;
441 }
442 Context.insert<InstAssign>(Dest, T);
443 Instr->setDeleted();
444 return;
445 }
446
447 switch (CastKind) {
448 default:
449 return;
450 case InstCast::Fptosi:
451 case InstCast::Fptoui: {
452 if ((DestTy != IceType_i32) && (DestTy != IceType_i64)) {
453 return;
454 }
455 const bool DestIs32 = DestTy == IceType_i32;
456 const bool DestIsSigned = CastKind == InstCast::Fptosi;
457 const bool Src0IsF32 = isFloat32Asserting32Or64(SrcTy);
458 RuntimeHelper RTHFunc = RuntimeHelper::H_Num;
459 if (DestIsSigned) {
460 if (DestIs32) {
461 return;
462 }
463 RTHFunc = Src0IsF32 ? RuntimeHelper::H_fptosi_f32_i64
464 : RuntimeHelper::H_fptosi_f64_i64;
465 } else {
466 RTHFunc = Src0IsF32 ? (DestIs32 ? RuntimeHelper::H_fptoui_f32_i32
467 : RuntimeHelper::H_fptoui_f32_i64)
468 : (DestIs32 ? RuntimeHelper::H_fptoui_f64_i32
469 : RuntimeHelper::H_fptoui_f64_i64);
470 }
471 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(RTHFunc);
472 static constexpr SizeT MaxArgs = 1;
473 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
474 NoTailCall, IsTargetHelperCall);
475 Call->addArg(Src0);
476 Instr->setDeleted();
477 return;
478 }
479 case InstCast::Sitofp:
480 case InstCast::Uitofp: {
481 if ((SrcTy != IceType_i32) && (SrcTy != IceType_i64)) {
482 return;
483 }
484 const bool SourceIs32 = SrcTy == IceType_i32;
485 const bool SourceIsSigned = CastKind == InstCast::Sitofp;
486 const bool DestIsF32 = isFloat32Asserting32Or64(DestTy);
487 RuntimeHelper RTHFunc = RuntimeHelper::H_Num;
488 if (SourceIsSigned) {
489 if (SourceIs32) {
490 return;
491 }
492 RTHFunc = DestIsF32 ? RuntimeHelper::H_sitofp_i64_f32
493 : RuntimeHelper::H_sitofp_i64_f64;
494 } else {
495 RTHFunc = DestIsF32 ? (SourceIs32 ? RuntimeHelper::H_uitofp_i32_f32
496 : RuntimeHelper::H_uitofp_i64_f32)
497 : (SourceIs32 ? RuntimeHelper::H_uitofp_i32_f64
498 : RuntimeHelper::H_uitofp_i64_f64);
499 }
500 Operand *TargetHelper = Ctx->getRuntimeHelperFunc(RTHFunc);
501 static constexpr SizeT MaxArgs = 1;
502 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
503 NoTailCall, IsTargetHelperCall);
504 Call->addArg(Src0);
505 Instr->setDeleted();
506 return;
507 }
508 case InstCast::Bitcast: {
509 if (DestTy == SrcTy) {
510 return;
511 }
512 Variable *CallDest = Dest;
513 RuntimeHelper HelperID = RuntimeHelper::H_Num;
514 switch (DestTy) {
515 default:
516 return;
517 case IceType_i8:
518 assert(SrcTy == IceType_v8i1);
519 HelperID = RuntimeHelper::H_bitcast_8xi1_i8;
520 CallDest = Func->makeVariable(IceType_i32);
521 break;
522 case IceType_i16:
523 assert(SrcTy == IceType_v16i1);
524 HelperID = RuntimeHelper::H_bitcast_16xi1_i16;
525 CallDest = Func->makeVariable(IceType_i32);
526 break;
527 case IceType_v8i1: {
528 assert(SrcTy == IceType_i8);
529 HelperID = RuntimeHelper::H_bitcast_i8_8xi1;
530 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
531 // Arguments to functions are required to be at least 32 bits wide.
532 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
533 Src0 = Src0AsI32;
534 } break;
535 case IceType_v16i1: {
536 assert(SrcTy == IceType_i16);
537 HelperID = RuntimeHelper::H_bitcast_i16_16xi1;
538 Variable *Src0AsI32 = Func->makeVariable(stackSlotType());
539 // Arguments to functions are required to be at least 32 bits wide.
540 Context.insert<InstCast>(InstCast::Zext, Src0AsI32, Src0);
541 Src0 = Src0AsI32;
542 } break;
543 }
544 constexpr SizeT MaxSrcs = 1;
545 InstCall *Call = makeHelperCall(HelperID, CallDest, MaxSrcs);
546 Call->addArg(Src0);
547 Context.insert(Call);
548 // The PNaCl ABI disallows i8/i16 return types, so truncate the helper
549 // call result to the appropriate type as necessary.
550 if (CallDest->getType() != DestTy)
551 Context.insert<InstCast>(InstCast::Trunc, Dest, CallDest);
552 Instr->setDeleted();
553 return;
554 }
555 case InstCast::Trunc: {
556 if (DestTy == SrcTy) {
557 return;
558 }
559 if (!isVectorType(SrcTy)) {
560 return;
561 }
562 assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
563 assert(typeElementType(DestTy) == IceType_i1);
564 assert(isVectorIntegerType(SrcTy));
565 return;
566 }
567 case InstCast::Sext:
568 case InstCast::Zext: {
569 if (DestTy == SrcTy) {
570 return;
571 }
572 if (!isVectorType(DestTy)) {
573 return;
574 }
575 assert(typeNumElements(DestTy) == typeNumElements(SrcTy));
576 assert(typeElementType(SrcTy) == IceType_i1);
577 assert(isVectorIntegerType(DestTy));
578 return;
579 }
580 }
581 llvm::report_fatal_error("Control flow should never have reached here.");
582 }
583 case Inst::IntrinsicCall: {
584 auto *IntrinsicCall = llvm::cast<InstIntrinsicCall>(Instr);
585 Intrinsics::IntrinsicID ID = IntrinsicCall->getIntrinsicInfo().ID;
586 if (isVectorType(DestTy) && ID == Intrinsics::Fabs) {
587 Operand *Src0 = IntrinsicCall->getArg(0);
588 GlobalString FabsFloat = Ctx->getGlobalString("llvm.fabs.f32");
589 Operand *CallTarget = Ctx->getConstantExternSym(FabsFloat);
590 GlobalString FabsVec = Ctx->getGlobalString("llvm.fabs.v4f32");
591 bool BadIntrinsic = false;
592 const Intrinsics::FullIntrinsicInfo *FullInfo =
593 Ctx->getIntrinsicsInfo().find(FabsVec, BadIntrinsic);
594 Intrinsics::IntrinsicInfo Info = FullInfo->Info;
595
596 Variable *T = Func->makeVariable(IceType_v4f32);
597 auto *Undef = ConstantUndef::create(Ctx, IceType_v4f32);
598 Context.insert<InstAssign>(T, Undef);
599 auto *VarVecOn32 = llvm::cast<VariableVecOn32>(T);
600 VarVecOn32->initVecElement(Func);
601
602 for (SizeT i = 0; i < typeNumElements(IceType_v4f32); ++i) {
603 auto *Index = Ctx->getConstantInt32(i);
604 auto *Op = Func->makeVariable(IceType_f32);
605 Context.insert<InstExtractElement>(Op, Src0, Index);
606 auto *Res = Func->makeVariable(IceType_f32);
607 Variable *DestT = Func->makeVariable(IceType_v4f32);
608 auto *Call =
609 Context.insert<InstIntrinsicCall>(1, Res, CallTarget, Info);
610 Call->addArg(Op);
611 Context.insert<InstInsertElement>(DestT, T, Res, Index);
612 T = DestT;
613 }
614
615 Context.insert<InstAssign>(Dest, T);
616
617 Instr->setDeleted();
618 return;
619 }
620 switch (ID) {
621 default:
622 return;
623 case Intrinsics::AtomicLoad: {
624 if (DestTy != IceType_i64)
625 return;
626 if (!Intrinsics::isMemoryOrderValid(
627 ID, getConstantMemoryOrder(IntrinsicCall->getArg(1)))) {
628 Func->setError("Unexpected memory ordering for AtomicLoad");
629 return;
630 }
631 Operand *Addr = IntrinsicCall->getArg(0);
632 Operand *TargetHelper = Ctx->getConstantExternSym(
633 Ctx->getGlobalString("__sync_val_compare_and_swap_8"));
634 static constexpr SizeT MaxArgs = 3;
635 auto *_0 = Ctx->getConstantZero(IceType_i64);
636 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
637 NoTailCall, IsTargetHelperCall);
638 Call->addArg(Addr);
639 Call->addArg(_0);
640 Call->addArg(_0);
641 Context.insert<InstMIPS32Sync>();
642 Instr->setDeleted();
643 return;
644 }
645 case Intrinsics::AtomicStore: {
646 Operand *Val = IntrinsicCall->getArg(0);
647 if (Val->getType() != IceType_i64)
648 return;
649 if (!Intrinsics::isMemoryOrderValid(
650 ID, getConstantMemoryOrder(IntrinsicCall->getArg(2)))) {
651 Func->setError("Unexpected memory ordering for AtomicStore");
652 return;
653 }
654 Operand *Addr = IntrinsicCall->getArg(1);
655 Variable *NoDest = nullptr;
656 Operand *TargetHelper = Ctx->getConstantExternSym(
657 Ctx->getGlobalString("__sync_lock_test_and_set_8"));
658 Context.insert<InstMIPS32Sync>();
659 static constexpr SizeT MaxArgs = 2;
660 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
661 NoTailCall, IsTargetHelperCall);
662 Call->addArg(Addr);
663 Call->addArg(Val);
664 Context.insert<InstMIPS32Sync>();
665 Instr->setDeleted();
666 return;
667 }
668 case Intrinsics::AtomicCmpxchg: {
669 if (DestTy != IceType_i64)
670 return;
671 if (!Intrinsics::isMemoryOrderValid(
672 ID, getConstantMemoryOrder(IntrinsicCall->getArg(3)),
673 getConstantMemoryOrder(IntrinsicCall->getArg(4)))) {
674 Func->setError("Unexpected memory ordering for AtomicCmpxchg");
675 return;
676 }
677 Operand *Addr = IntrinsicCall->getArg(0);
678 Operand *Oldval = IntrinsicCall->getArg(1);
679 Operand *Newval = IntrinsicCall->getArg(2);
680 Operand *TargetHelper = Ctx->getConstantExternSym(
681 Ctx->getGlobalString("__sync_val_compare_and_swap_8"));
682 Context.insert<InstMIPS32Sync>();
683 static constexpr SizeT MaxArgs = 3;
684 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
685 NoTailCall, IsTargetHelperCall);
686 Call->addArg(Addr);
687 Call->addArg(Oldval);
688 Call->addArg(Newval);
689 Context.insert<InstMIPS32Sync>();
690 Instr->setDeleted();
691 return;
692 }
693 case Intrinsics::AtomicRMW: {
694 if (DestTy != IceType_i64)
695 return;
696 if (!Intrinsics::isMemoryOrderValid(
697 ID, getConstantMemoryOrder(IntrinsicCall->getArg(3)))) {
698 Func->setError("Unexpected memory ordering for AtomicRMW");
699 return;
700 }
701 auto Operation = static_cast<Intrinsics::AtomicRMWOperation>(
702 llvm::cast<ConstantInteger32>(IntrinsicCall->getArg(0))->getValue());
703 auto *Addr = IntrinsicCall->getArg(1);
704 auto *Newval = IntrinsicCall->getArg(2);
705 Operand *TargetHelper;
706 switch (Operation) {
707 case Intrinsics::AtomicAdd:
708 TargetHelper = Ctx->getConstantExternSym(
709 Ctx->getGlobalString("__sync_fetch_and_add_8"));
710 break;
711 case Intrinsics::AtomicSub:
712 TargetHelper = Ctx->getConstantExternSym(
713 Ctx->getGlobalString("__sync_fetch_and_sub_8"));
714 break;
715 case Intrinsics::AtomicOr:
716 TargetHelper = Ctx->getConstantExternSym(
717 Ctx->getGlobalString("__sync_fetch_and_or_8"));
718 break;
719 case Intrinsics::AtomicAnd:
720 TargetHelper = Ctx->getConstantExternSym(
721 Ctx->getGlobalString("__sync_fetch_and_and_8"));
722 break;
723 case Intrinsics::AtomicXor:
724 TargetHelper = Ctx->getConstantExternSym(
725 Ctx->getGlobalString("__sync_fetch_and_xor_8"));
726 break;
727 case Intrinsics::AtomicExchange:
728 TargetHelper = Ctx->getConstantExternSym(
729 Ctx->getGlobalString("__sync_lock_test_and_set_8"));
730 break;
731 default:
732 llvm::report_fatal_error("Unknown AtomicRMW operation");
733 return;
734 }
735 Context.insert<InstMIPS32Sync>();
736 static constexpr SizeT MaxArgs = 2;
737 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
738 NoTailCall, IsTargetHelperCall);
739 Call->addArg(Addr);
740 Call->addArg(Newval);
741 Context.insert<InstMIPS32Sync>();
742 Instr->setDeleted();
743 return;
744 }
745 case Intrinsics::Ctpop: {
746 Operand *Src0 = IntrinsicCall->getArg(0);
747 Operand *TargetHelper =
748 Ctx->getRuntimeHelperFunc(isInt32Asserting32Or64(Src0->getType())
749 ? RuntimeHelper::H_call_ctpop_i32
750 : RuntimeHelper::H_call_ctpop_i64);
751 static constexpr SizeT MaxArgs = 1;
752 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
753 NoTailCall, IsTargetHelperCall);
754 Call->addArg(Src0);
755 Instr->setDeleted();
756 return;
757 }
758 case Intrinsics::Longjmp: {
759 static constexpr SizeT MaxArgs = 2;
760 static constexpr Variable *NoDest = nullptr;
761 Operand *TargetHelper =
762 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_longjmp);
763 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
764 NoTailCall, IsTargetHelperCall);
765 Call->addArg(IntrinsicCall->getArg(0));
766 Call->addArg(IntrinsicCall->getArg(1));
767 Instr->setDeleted();
768 return;
769 }
770 case Intrinsics::Memcpy: {
771 static constexpr SizeT MaxArgs = 3;
772 static constexpr Variable *NoDest = nullptr;
773 Operand *TargetHelper =
774 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memcpy);
775 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
776 NoTailCall, IsTargetHelperCall);
777 Call->addArg(IntrinsicCall->getArg(0));
778 Call->addArg(IntrinsicCall->getArg(1));
779 Call->addArg(IntrinsicCall->getArg(2));
780 Instr->setDeleted();
781 return;
782 }
783 case Intrinsics::Memmove: {
784 static constexpr SizeT MaxArgs = 3;
785 static constexpr Variable *NoDest = nullptr;
786 Operand *TargetHelper =
787 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memmove);
788 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
789 NoTailCall, IsTargetHelperCall);
790 Call->addArg(IntrinsicCall->getArg(0));
791 Call->addArg(IntrinsicCall->getArg(1));
792 Call->addArg(IntrinsicCall->getArg(2));
793 Instr->setDeleted();
794 return;
795 }
796 case Intrinsics::Memset: {
797 Operand *ValOp = IntrinsicCall->getArg(1);
798 assert(ValOp->getType() == IceType_i8);
799 Variable *ValExt = Func->makeVariable(stackSlotType());
800 Context.insert<InstCast>(InstCast::Zext, ValExt, ValOp);
801
802 static constexpr SizeT MaxArgs = 3;
803 static constexpr Variable *NoDest = nullptr;
804 Operand *TargetHelper =
805 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_memset);
806 auto *Call = Context.insert<InstCall>(MaxArgs, NoDest, TargetHelper,
807 NoTailCall, IsTargetHelperCall);
808 Call->addArg(IntrinsicCall->getArg(0));
809 Call->addArg(ValExt);
810 Call->addArg(IntrinsicCall->getArg(2));
811 Instr->setDeleted();
812 return;
813 }
814 case Intrinsics::NaClReadTP: {
815 if (SandboxingType == ST_NaCl) {
816 return;
817 }
818 static constexpr SizeT MaxArgs = 0;
819 assert(SandboxingType != ST_Nonsfi);
820 Operand *TargetHelper =
821 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_read_tp);
822 Context.insert<InstCall>(MaxArgs, Dest, TargetHelper, NoTailCall,
823 IsTargetHelperCall);
824 Instr->setDeleted();
825 return;
826 }
827 case Intrinsics::Setjmp: {
828 static constexpr SizeT MaxArgs = 1;
829 Operand *TargetHelper =
830 Ctx->getRuntimeHelperFunc(RuntimeHelper::H_call_setjmp);
831 auto *Call = Context.insert<InstCall>(MaxArgs, Dest, TargetHelper,
832 NoTailCall, IsTargetHelperCall);
833 Call->addArg(IntrinsicCall->getArg(0));
834 Instr->setDeleted();
835 return;
836 }
837 }
838 llvm::report_fatal_error("Control flow should never have reached here.");
839 }
840 }
841 }
842
findMaxStackOutArgsSize()843 void TargetMIPS32::findMaxStackOutArgsSize() {
844 // MinNeededOutArgsBytes should be updated if the Target ever creates a
845 // high-level InstCall that requires more stack bytes.
846 size_t MinNeededOutArgsBytes = 0;
847 if (!MaybeLeafFunc)
848 MinNeededOutArgsBytes = MIPS32_MAX_GPR_ARG * 4;
849 MaxOutArgsSizeBytes = MinNeededOutArgsBytes;
850 for (CfgNode *Node : Func->getNodes()) {
851 Context.init(Node);
852 while (!Context.atEnd()) {
853 PostIncrLoweringContext PostIncrement(Context);
854 Inst *CurInstr = iteratorToInst(Context.getCur());
855 if (auto *Call = llvm::dyn_cast<InstCall>(CurInstr)) {
856 SizeT OutArgsSizeBytes = getCallStackArgumentsSizeBytes(Call);
857 MaxOutArgsSizeBytes = std::max(MaxOutArgsSizeBytes, OutArgsSizeBytes);
858 }
859 }
860 }
861 CurrentAllocaOffset = MaxOutArgsSizeBytes;
862 }
863
translateO2()864 void TargetMIPS32::translateO2() {
865 TimerMarker T(TimerStack::TT_O2, Func);
866
867 // TODO(stichnot): share passes with X86?
868 // https://code.google.com/p/nativeclient/issues/detail?id=4094
869 genTargetHelperCalls();
870
871 unsetIfNonLeafFunc();
872
873 findMaxStackOutArgsSize();
874
875 // Merge Alloca instructions, and lay out the stack.
876 static constexpr bool SortAndCombineAllocas = true;
877 Func->processAllocas(SortAndCombineAllocas);
878 Func->dump("After Alloca processing");
879
880 if (!getFlags().getEnablePhiEdgeSplit()) {
881 // Lower Phi instructions.
882 Func->placePhiLoads();
883 if (Func->hasError())
884 return;
885 Func->placePhiStores();
886 if (Func->hasError())
887 return;
888 Func->deletePhis();
889 if (Func->hasError())
890 return;
891 Func->dump("After Phi lowering");
892 }
893
894 // Address mode optimization.
895 Func->getVMetadata()->init(VMK_SingleDefs);
896 Func->doAddressOpt();
897
898 // Argument lowering
899 Func->doArgLowering();
900
901 // Target lowering. This requires liveness analysis for some parts of the
902 // lowering decisions, such as compare/branch fusing. If non-lightweight
903 // liveness analysis is used, the instructions need to be renumbered first.
904 // TODO: This renumbering should only be necessary if we're actually
905 // calculating live intervals, which we only do for register allocation.
906 Func->renumberInstructions();
907 if (Func->hasError())
908 return;
909
910 // TODO: It should be sufficient to use the fastest liveness calculation,
911 // i.e. livenessLightweight(). However, for some reason that slows down the
912 // rest of the translation. Investigate.
913 Func->liveness(Liveness_Basic);
914 if (Func->hasError())
915 return;
916 Func->dump("After MIPS32 address mode opt");
917
918 Func->genCode();
919 if (Func->hasError())
920 return;
921 Func->dump("After MIPS32 codegen");
922
923 // Register allocation. This requires instruction renumbering and full
924 // liveness analysis.
925 Func->renumberInstructions();
926 if (Func->hasError())
927 return;
928 Func->liveness(Liveness_Intervals);
929 if (Func->hasError())
930 return;
931 // The post-codegen dump is done here, after liveness analysis and associated
932 // cleanup, to make the dump cleaner and more useful.
933 Func->dump("After initial MIPS32 codegen");
934 // Validate the live range computations. The expensive validation call is
935 // deliberately only made when assertions are enabled.
936 assert(Func->validateLiveness());
937 Func->getVMetadata()->init(VMK_All);
938 regAlloc(RAK_Global);
939 if (Func->hasError())
940 return;
941 Func->dump("After linear scan regalloc");
942
943 if (getFlags().getEnablePhiEdgeSplit()) {
944 Func->advancedPhiLowering();
945 Func->dump("After advanced Phi lowering");
946 }
947
948 // Stack frame mapping.
949 Func->genFrame();
950 if (Func->hasError())
951 return;
952 Func->dump("After stack frame mapping");
953
954 postLowerLegalization();
955 if (Func->hasError())
956 return;
957 Func->dump("After postLowerLegalization");
958
959 Func->contractEmptyNodes();
960 Func->reorderNodes();
961
962 // Branch optimization. This needs to be done just before code emission. In
963 // particular, no transformations that insert or reorder CfgNodes should be
964 // done after branch optimization. We go ahead and do it before nop insertion
965 // to reduce the amount of work needed for searching for opportunities.
966 Func->doBranchOpt();
967 Func->dump("After branch optimization");
968
969 // Nop insertion
970 if (getFlags().getShouldDoNopInsertion()) {
971 Func->doNopInsertion();
972 }
973 }
974
translateOm1()975 void TargetMIPS32::translateOm1() {
976 TimerMarker T(TimerStack::TT_Om1, Func);
977
978 // TODO: share passes with X86?
979 genTargetHelperCalls();
980
981 unsetIfNonLeafFunc();
982
983 findMaxStackOutArgsSize();
984
985 // Do not merge Alloca instructions, and lay out the stack.
986 static constexpr bool SortAndCombineAllocas = false;
987 Func->processAllocas(SortAndCombineAllocas);
988 Func->dump("After Alloca processing");
989
990 Func->placePhiLoads();
991 if (Func->hasError())
992 return;
993 Func->placePhiStores();
994 if (Func->hasError())
995 return;
996 Func->deletePhis();
997 if (Func->hasError())
998 return;
999 Func->dump("After Phi lowering");
1000
1001 Func->doArgLowering();
1002
1003 Func->genCode();
1004 if (Func->hasError())
1005 return;
1006 Func->dump("After initial MIPS32 codegen");
1007
1008 regAlloc(RAK_InfOnly);
1009 if (Func->hasError())
1010 return;
1011 Func->dump("After regalloc of infinite-weight variables");
1012
1013 Func->genFrame();
1014 if (Func->hasError())
1015 return;
1016 Func->dump("After stack frame mapping");
1017
1018 postLowerLegalization();
1019 if (Func->hasError())
1020 return;
1021 Func->dump("After postLowerLegalization");
1022
1023 // Nop insertion
1024 if (getFlags().getShouldDoNopInsertion()) {
1025 Func->doNopInsertion();
1026 }
1027 }
1028
doBranchOpt(Inst * Instr,const CfgNode * NextNode)1029 bool TargetMIPS32::doBranchOpt(Inst *Instr, const CfgNode *NextNode) {
1030 if (auto *Br = llvm::dyn_cast<InstMIPS32Br>(Instr)) {
1031 return Br->optimizeBranch(NextNode);
1032 }
1033 return false;
1034 }
1035
1036 namespace {
1037
1038 const char *RegNames[RegMIPS32::Reg_NUM] = {
1039 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
1040 isI64Pair, isFP32, isFP64, isVec128, alias_init) \
1041 name,
1042 REGMIPS32_TABLE
1043 #undef X
1044 };
1045
1046 } // end of anonymous namespace
1047
getRegName(RegNumT RegNum)1048 const char *RegMIPS32::getRegName(RegNumT RegNum) {
1049 RegNum.assertIsValid();
1050 return RegNames[RegNum];
1051 }
1052
getRegName(RegNumT RegNum,Type Ty) const1053 const char *TargetMIPS32::getRegName(RegNumT RegNum, Type Ty) const {
1054 (void)Ty;
1055 return RegMIPS32::getRegName(RegNum);
1056 }
1057
getPhysicalRegister(RegNumT RegNum,Type Ty)1058 Variable *TargetMIPS32::getPhysicalRegister(RegNumT RegNum, Type Ty) {
1059 if (Ty == IceType_void)
1060 Ty = IceType_i32;
1061 if (PhysicalRegisters[Ty].empty())
1062 PhysicalRegisters[Ty].resize(RegMIPS32::Reg_NUM);
1063 RegNum.assertIsValid();
1064 Variable *Reg = PhysicalRegisters[Ty][RegNum];
1065 if (Reg == nullptr) {
1066 Reg = Func->makeVariable(Ty);
1067 Reg->setRegNum(RegNum);
1068 PhysicalRegisters[Ty][RegNum] = Reg;
1069 // Specially mark a named physical register as an "argument" so that it is
1070 // considered live upon function entry. Otherwise it's possible to get
1071 // liveness validation errors for saving callee-save registers.
1072 Func->addImplicitArg(Reg);
1073 // Don't bother tracking the live range of a named physical register.
1074 Reg->setIgnoreLiveness();
1075 }
1076 return Reg;
1077 }
1078
emitJumpTable(const Cfg * Func,const InstJumpTable * JumpTable) const1079 void TargetMIPS32::emitJumpTable(const Cfg *Func,
1080 const InstJumpTable *JumpTable) const {
1081 (void)Func;
1082 (void)JumpTable;
1083 UnimplementedError(getFlags());
1084 }
1085
1086 /// Provide a trivial wrapper to legalize() for this common usage.
legalizeToReg(Operand * From,RegNumT RegNum)1087 Variable *TargetMIPS32::legalizeToReg(Operand *From, RegNumT RegNum) {
1088 return llvm::cast<Variable>(legalize(From, Legal_Reg, RegNum));
1089 }
1090
1091 /// Legalize undef values to concrete values.
legalizeUndef(Operand * From,RegNumT RegNum)1092 Operand *TargetMIPS32::legalizeUndef(Operand *From, RegNumT RegNum) {
1093 (void)RegNum;
1094 Type Ty = From->getType();
1095 if (llvm::isa<ConstantUndef>(From)) {
1096 // Lower undefs to zero. Another option is to lower undefs to an
1097 // uninitialized register; however, using an uninitialized register
1098 // results in less predictable code.
1099 //
1100 // If in the future the implementation is changed to lower undef
1101 // values to uninitialized registers, a FakeDef will be needed:
1102 // Context.insert(InstFakeDef::create(Func, Reg));
1103 // This is in order to ensure that the live range of Reg is not
1104 // overestimated. If the constant being lowered is a 64 bit value,
1105 // then the result should be split and the lo and hi components will
1106 // need to go in uninitialized registers.
1107 if (isVectorType(Ty)) {
1108 Variable *Var = makeReg(Ty, RegNum);
1109 auto *Reg = llvm::cast<VariableVecOn32>(Var);
1110 Reg->initVecElement(Func);
1111 auto *Zero = getZero();
1112 for (Variable *Var : Reg->getContainers()) {
1113 _mov(Var, Zero);
1114 }
1115 return Reg;
1116 }
1117 return Ctx->getConstantZero(Ty);
1118 }
1119 return From;
1120 }
1121
makeReg(Type Type,RegNumT RegNum)1122 Variable *TargetMIPS32::makeReg(Type Type, RegNumT RegNum) {
1123 // There aren't any 64-bit integer registers for Mips32.
1124 assert(Type != IceType_i64);
1125 Variable *Reg = Func->makeVariable(Type);
1126 if (RegNum.hasValue())
1127 Reg->setRegNum(RegNum);
1128 else
1129 Reg->setMustHaveReg();
1130 return Reg;
1131 }
1132
formMemoryOperand(Operand * Operand,Type Ty)1133 OperandMIPS32Mem *TargetMIPS32::formMemoryOperand(Operand *Operand, Type Ty) {
1134 // It may be the case that address mode optimization already creates an
1135 // OperandMIPS32Mem, so in that case it wouldn't need another level of
1136 // transformation.
1137 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
1138 return llvm::cast<OperandMIPS32Mem>(legalize(Mem));
1139 }
1140
1141 // If we didn't do address mode optimization, then we only have a base/offset
1142 // to work with. MIPS always requires a base register, so just use that to
1143 // hold the operand.
1144 auto *Base = llvm::cast<Variable>(
1145 legalize(Operand, Legal_Reg | Legal_Rematerializable));
1146 const int32_t Offset = Base->hasStackOffset() ? Base->getStackOffset() : 0;
1147 return OperandMIPS32Mem::create(
1148 Func, Ty, Base,
1149 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(Offset)));
1150 }
1151
emitVariable(const Variable * Var) const1152 void TargetMIPS32::emitVariable(const Variable *Var) const {
1153 if (!BuildDefs::dump())
1154 return;
1155 Ostream &Str = Ctx->getStrEmit();
1156 const Type FrameSPTy = IceType_i32;
1157 if (Var->hasReg()) {
1158 Str << '$' << getRegName(Var->getRegNum(), Var->getType());
1159 return;
1160 }
1161 if (Var->mustHaveReg()) {
1162 llvm::report_fatal_error("Infinite-weight Variable (" + Var->getName() +
1163 ") has no register assigned - function " +
1164 Func->getFunctionName());
1165 }
1166 const int32_t Offset = Var->getStackOffset();
1167 Str << Offset;
1168 Str << "($" << getRegName(getFrameOrStackReg(), FrameSPTy);
1169 Str << ")";
1170 }
1171
CallingConv()1172 TargetMIPS32::CallingConv::CallingConv()
1173 : GPRegsUsed(RegMIPS32::Reg_NUM),
1174 GPRArgs(GPRArgInitializer.rbegin(), GPRArgInitializer.rend()),
1175 I64Args(I64ArgInitializer.rbegin(), I64ArgInitializer.rend()),
1176 VFPRegsUsed(RegMIPS32::Reg_NUM),
1177 FP32Args(FP32ArgInitializer.rbegin(), FP32ArgInitializer.rend()),
1178 FP64Args(FP64ArgInitializer.rbegin(), FP64ArgInitializer.rend()) {}
1179
1180 // In MIPS O32 abi FP argument registers can be used only if first argument is
1181 // of type float/double. UseFPRegs flag is used to care of that. Also FP arg
1182 // registers can be used only for first 2 arguments, so we require argument
1183 // number to make register allocation decisions.
argInReg(Type Ty,uint32_t ArgNo,RegNumT * Reg)1184 bool TargetMIPS32::CallingConv::argInReg(Type Ty, uint32_t ArgNo,
1185 RegNumT *Reg) {
1186 if (isScalarIntegerType(Ty) || isVectorType(Ty))
1187 return argInGPR(Ty, Reg);
1188 if (isScalarFloatingType(Ty)) {
1189 if (ArgNo == 0) {
1190 UseFPRegs = true;
1191 return argInVFP(Ty, Reg);
1192 }
1193 if (UseFPRegs && ArgNo == 1) {
1194 UseFPRegs = false;
1195 return argInVFP(Ty, Reg);
1196 }
1197 return argInGPR(Ty, Reg);
1198 }
1199 llvm::report_fatal_error("argInReg: Invalid type.");
1200 return false;
1201 }
1202
argInGPR(Type Ty,RegNumT * Reg)1203 bool TargetMIPS32::CallingConv::argInGPR(Type Ty, RegNumT *Reg) {
1204 CfgVector<RegNumT> *Source;
1205
1206 switch (Ty) {
1207 default: {
1208 llvm::report_fatal_error("argInGPR: Invalid type.");
1209 return false;
1210 } break;
1211 case IceType_v4i1:
1212 case IceType_v8i1:
1213 case IceType_v16i1:
1214 case IceType_v16i8:
1215 case IceType_v8i16:
1216 case IceType_v4i32:
1217 case IceType_v4f32:
1218 case IceType_i32:
1219 case IceType_f32: {
1220 Source = &GPRArgs;
1221 } break;
1222 case IceType_i64:
1223 case IceType_f64: {
1224 Source = &I64Args;
1225 } break;
1226 }
1227
1228 discardUnavailableGPRsAndTheirAliases(Source);
1229
1230 // If $4 is used for any scalar type (or returining v4f32) then the next
1231 // vector type if passed in $6:$7:stack:stack
1232 if (isVectorType(Ty)) {
1233 alignGPR(Source);
1234 }
1235
1236 if (Source->empty()) {
1237 GPRegsUsed.set();
1238 return false;
1239 }
1240
1241 *Reg = Source->back();
1242 // Note that we don't Source->pop_back() here. This is intentional. Notice how
1243 // we mark all of Reg's aliases as Used. So, for the next argument,
1244 // Source->back() is marked as unavailable, and it is thus implicitly popped
1245 // from the stack.
1246 GPRegsUsed |= RegisterAliases[*Reg];
1247
1248 // All vector arguments irrespective of their base type are passed in GP
1249 // registers. First vector argument is passed in $4:$5:$6:$7 and 2nd
1250 // is passed in $6:$7:stack:stack. If it is 1st argument then discard
1251 // $4:$5:$6:$7 otherwise discard $6:$7 only.
1252 if (isVectorType(Ty)) {
1253 if (((unsigned)*Reg) == RegMIPS32::Reg_A0) {
1254 GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A1];
1255 GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A2];
1256 GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A3];
1257 } else {
1258 GPRegsUsed |= RegisterAliases[RegMIPS32::Reg_A3];
1259 }
1260 }
1261
1262 return true;
1263 }
1264
discardNextGPRAndItsAliases(CfgVector<RegNumT> * Regs)1265 inline void TargetMIPS32::CallingConv::discardNextGPRAndItsAliases(
1266 CfgVector<RegNumT> *Regs) {
1267 GPRegsUsed |= RegisterAliases[Regs->back()];
1268 Regs->pop_back();
1269 }
1270
alignGPR(CfgVector<RegNumT> * Regs)1271 inline void TargetMIPS32::CallingConv::alignGPR(CfgVector<RegNumT> *Regs) {
1272 if (Regs->back() == RegMIPS32::Reg_A1 || Regs->back() == RegMIPS32::Reg_A3)
1273 discardNextGPRAndItsAliases(Regs);
1274 }
1275
1276 // GPR are not packed when passing parameters. Thus, a function foo(i32, i64,
1277 // i32) will have the first argument in a0, the second in a2-a3, and the third
1278 // on the stack. To model this behavior, whenever we pop a register from Regs,
1279 // we remove all of its aliases from the pool of available GPRs. This has the
1280 // effect of computing the "closure" on the GPR registers.
discardUnavailableGPRsAndTheirAliases(CfgVector<RegNumT> * Regs)1281 void TargetMIPS32::CallingConv::discardUnavailableGPRsAndTheirAliases(
1282 CfgVector<RegNumT> *Regs) {
1283 while (!Regs->empty() && GPRegsUsed[Regs->back()]) {
1284 discardNextGPRAndItsAliases(Regs);
1285 }
1286 }
1287
argInVFP(Type Ty,RegNumT * Reg)1288 bool TargetMIPS32::CallingConv::argInVFP(Type Ty, RegNumT *Reg) {
1289 CfgVector<RegNumT> *Source;
1290
1291 switch (Ty) {
1292 default: {
1293 llvm::report_fatal_error("argInVFP: Invalid type.");
1294 return false;
1295 } break;
1296 case IceType_f32: {
1297 Source = &FP32Args;
1298 } break;
1299 case IceType_f64: {
1300 Source = &FP64Args;
1301 } break;
1302 }
1303
1304 discardUnavailableVFPRegsAndTheirAliases(Source);
1305
1306 if (Source->empty()) {
1307 VFPRegsUsed.set();
1308 return false;
1309 }
1310
1311 *Reg = Source->back();
1312 VFPRegsUsed |= RegisterAliases[*Reg];
1313
1314 // In MIPS O32 abi if fun arguments are (f32, i32) then one can not use reg_a0
1315 // for second argument even though it's free. f32 arg goes in reg_f12, i32 arg
1316 // goes in reg_a1. Similarly if arguments are (f64, i32) second argument goes
1317 // in reg_a3 and a0, a1 are not used.
1318 Source = &GPRArgs;
1319 // Discard one GPR reg for f32(4 bytes), two for f64(4 + 4 bytes)
1320 if (Ty == IceType_f64) {
1321 // In MIPS o32 abi, when we use GPR argument pairs to store F64 values, pair
1322 // must be aligned at even register. Similarly when we discard GPR registers
1323 // when some arguments from starting 16 bytes goes in FPR, we must take care
1324 // of alignment. For example if fun args are (f32, f64, f32), for first f32
1325 // we discard a0, now for f64 argument, which will go in F14F15, we must
1326 // first align GPR vector to even register by discarding a1, then discard
1327 // two GPRs a2 and a3. Now last f32 argument will go on stack.
1328 alignGPR(Source);
1329 discardNextGPRAndItsAliases(Source);
1330 }
1331 discardNextGPRAndItsAliases(Source);
1332 return true;
1333 }
1334
discardUnavailableVFPRegsAndTheirAliases(CfgVector<RegNumT> * Regs)1335 void TargetMIPS32::CallingConv::discardUnavailableVFPRegsAndTheirAliases(
1336 CfgVector<RegNumT> *Regs) {
1337 while (!Regs->empty() && VFPRegsUsed[Regs->back()]) {
1338 Regs->pop_back();
1339 }
1340 }
1341
lowerArguments()1342 void TargetMIPS32::lowerArguments() {
1343 VarList &Args = Func->getArgs();
1344 TargetMIPS32::CallingConv CC;
1345
1346 // For each register argument, replace Arg in the argument list with the home
1347 // register. Then generate an instruction in the prolog to copy the home
1348 // register to the assigned location of Arg.
1349 Context.init(Func->getEntryNode());
1350 Context.setInsertPoint(Context.getCur());
1351
1352 // v4f32 is returned through stack. $4 is setup by the caller and passed as
1353 // first argument implicitly. Callee then copies the return vector at $4.
1354 Variable *ImplicitRetVec = nullptr;
1355 if (isVectorFloatingType(Func->getReturnType())) {
1356 ImplicitRetVec = Func->makeVariable(IceType_i32);
1357 ImplicitRetVec->setName(Func, "ImplicitRet_v4f32");
1358 ImplicitRetVec->setIsArg();
1359 Args.insert(Args.begin(), ImplicitRetVec);
1360 setImplicitRet(ImplicitRetVec);
1361 }
1362
1363 for (SizeT i = 0, E = Args.size(); i < E; ++i) {
1364 Variable *Arg = Args[i];
1365 Type Ty = Arg->getType();
1366 RegNumT RegNum;
1367 if (!CC.argInReg(Ty, i, &RegNum)) {
1368 continue;
1369 }
1370 Variable *RegisterArg = Func->makeVariable(Ty);
1371 if (BuildDefs::dump()) {
1372 RegisterArg->setName(Func, "home_reg:" + Arg->getName());
1373 }
1374 RegisterArg->setIsArg();
1375 Arg->setIsArg(false);
1376 Args[i] = RegisterArg;
1377
1378 if (isVectorType(Ty)) {
1379 auto *RegisterArgVec = llvm::cast<VariableVecOn32>(RegisterArg);
1380 RegisterArgVec->initVecElement(Func);
1381 RegisterArgVec->getContainers()[0]->setRegNum(
1382 RegNumT::fixme((unsigned)RegNum + 0));
1383 RegisterArgVec->getContainers()[1]->setRegNum(
1384 RegNumT::fixme((unsigned)RegNum + 1));
1385 // First two elements of second vector argument are passed
1386 // in $6:$7 and remaining two on stack. Do not assign register
1387 // to this is second vector argument.
1388 if (i == 0) {
1389 RegisterArgVec->getContainers()[2]->setRegNum(
1390 RegNumT::fixme((unsigned)RegNum + 2));
1391 RegisterArgVec->getContainers()[3]->setRegNum(
1392 RegNumT::fixme((unsigned)RegNum + 3));
1393 } else {
1394 RegisterArgVec->getContainers()[2]->setRegNum(
1395 RegNumT::fixme(RegNumT()));
1396 RegisterArgVec->getContainers()[3]->setRegNum(
1397 RegNumT::fixme(RegNumT()));
1398 }
1399 } else {
1400 switch (Ty) {
1401 default: {
1402 RegisterArg->setRegNum(RegNum);
1403 } break;
1404 case IceType_i64: {
1405 auto *RegisterArg64 = llvm::cast<Variable64On32>(RegisterArg);
1406 RegisterArg64->initHiLo(Func);
1407 RegisterArg64->getLo()->setRegNum(
1408 RegNumT::fixme(RegMIPS32::get64PairFirstRegNum(RegNum)));
1409 RegisterArg64->getHi()->setRegNum(
1410 RegNumT::fixme(RegMIPS32::get64PairSecondRegNum(RegNum)));
1411 } break;
1412 }
1413 }
1414 Context.insert<InstAssign>(Arg, RegisterArg);
1415 }
1416
1417 // Insert fake use of ImplicitRet_v4f32 to keep it live
1418 if (ImplicitRetVec) {
1419 for (CfgNode *Node : Func->getNodes()) {
1420 for (Inst &Instr : Node->getInsts()) {
1421 if (llvm::isa<InstRet>(&Instr)) {
1422 Context.setInsertPoint(instToIterator(&Instr));
1423 Context.insert<InstFakeUse>(ImplicitRetVec);
1424 break;
1425 }
1426 }
1427 }
1428 }
1429 }
1430
stackSlotType()1431 Type TargetMIPS32::stackSlotType() { return IceType_i32; }
1432
1433 // Helper function for addProlog().
1434 //
1435 // This assumes Arg is an argument passed on the stack. This sets the frame
1436 // offset for Arg and updates InArgsSizeBytes according to Arg's width. For an
1437 // I64 arg that has been split into Lo and Hi components, it calls itself
1438 // recursively on the components, taking care to handle Lo first because of the
1439 // little-endian architecture. Lastly, this function generates an instruction
1440 // to copy Arg into its assigned register if applicable.
finishArgumentLowering(Variable * Arg,bool PartialOnStack,Variable * FramePtr,size_t BasicFrameOffset,size_t * InArgsSizeBytes)1441 void TargetMIPS32::finishArgumentLowering(Variable *Arg, bool PartialOnStack,
1442 Variable *FramePtr,
1443 size_t BasicFrameOffset,
1444 size_t *InArgsSizeBytes) {
1445 const Type Ty = Arg->getType();
1446 *InArgsSizeBytes = applyStackAlignmentTy(*InArgsSizeBytes, Ty);
1447
1448 // If $4 is used for any scalar type (or returining v4f32) then the next
1449 // vector type if passed in $6:$7:stack:stack. Load 3nd and 4th element
1450 // from agument stack.
1451 if (auto *ArgVecOn32 = llvm::dyn_cast<VariableVecOn32>(Arg)) {
1452 if (PartialOnStack == false) {
1453 auto *Elem0 = ArgVecOn32->getContainers()[0];
1454 auto *Elem1 = ArgVecOn32->getContainers()[1];
1455 finishArgumentLowering(Elem0, PartialOnStack, FramePtr, BasicFrameOffset,
1456 InArgsSizeBytes);
1457 finishArgumentLowering(Elem1, PartialOnStack, FramePtr, BasicFrameOffset,
1458 InArgsSizeBytes);
1459 }
1460 auto *Elem2 = ArgVecOn32->getContainers()[2];
1461 auto *Elem3 = ArgVecOn32->getContainers()[3];
1462 finishArgumentLowering(Elem2, PartialOnStack, FramePtr, BasicFrameOffset,
1463 InArgsSizeBytes);
1464 finishArgumentLowering(Elem3, PartialOnStack, FramePtr, BasicFrameOffset,
1465 InArgsSizeBytes);
1466 return;
1467 }
1468
1469 if (auto *Arg64On32 = llvm::dyn_cast<Variable64On32>(Arg)) {
1470 Variable *const Lo = Arg64On32->getLo();
1471 Variable *const Hi = Arg64On32->getHi();
1472 finishArgumentLowering(Lo, PartialOnStack, FramePtr, BasicFrameOffset,
1473 InArgsSizeBytes);
1474 finishArgumentLowering(Hi, PartialOnStack, FramePtr, BasicFrameOffset,
1475 InArgsSizeBytes);
1476 return;
1477 }
1478
1479 assert(Ty != IceType_i64);
1480 assert(!isVectorType(Ty));
1481
1482 const int32_t ArgStackOffset = BasicFrameOffset + *InArgsSizeBytes;
1483 *InArgsSizeBytes += typeWidthInBytesOnStack(Ty);
1484
1485 if (!Arg->hasReg()) {
1486 Arg->setStackOffset(ArgStackOffset);
1487 return;
1488 }
1489
1490 // If the argument variable has been assigned a register, we need to copy the
1491 // value from the stack slot.
1492 Variable *Parameter = Func->makeVariable(Ty);
1493 Parameter->setMustNotHaveReg();
1494 Parameter->setStackOffset(ArgStackOffset);
1495 _mov(Arg, Parameter);
1496 }
1497
addProlog(CfgNode * Node)1498 void TargetMIPS32::addProlog(CfgNode *Node) {
1499 // Stack frame layout:
1500 //
1501 // +------------------------+
1502 // | 1. preserved registers |
1503 // +------------------------+
1504 // | 2. padding |
1505 // +------------------------+
1506 // | 3. global spill area |
1507 // +------------------------+
1508 // | 4. padding |
1509 // +------------------------+
1510 // | 5. local spill area |
1511 // +------------------------+
1512 // | 6. padding |
1513 // +------------------------+
1514 // | 7. allocas |
1515 // +------------------------+
1516 // | 8. padding |
1517 // +------------------------+
1518 // | 9. out args |
1519 // +------------------------+ <--- StackPointer
1520 //
1521 // The following variables record the size in bytes of the given areas:
1522 // * PreservedRegsSizeBytes: area 1
1523 // * SpillAreaPaddingBytes: area 2
1524 // * GlobalsSize: area 3
1525 // * GlobalsAndSubsequentPaddingSize: areas 3 - 4
1526 // * LocalsSpillAreaSize: area 5
1527 // * SpillAreaSizeBytes: areas 2 - 9
1528 // * maxOutArgsSizeBytes(): area 9
1529
1530 Context.init(Node);
1531 Context.setInsertPoint(Context.getCur());
1532
1533 SmallBitVector CalleeSaves = getRegisterSet(RegSet_CalleeSave, RegSet_None);
1534 RegsUsed = SmallBitVector(CalleeSaves.size());
1535
1536 VarList SortedSpilledVariables;
1537
1538 size_t GlobalsSize = 0;
1539 // If there is a separate locals area, this represents that area. Otherwise
1540 // it counts any variable not counted by GlobalsSize.
1541 SpillAreaSizeBytes = 0;
1542 // If there is a separate locals area, this specifies the alignment for it.
1543 uint32_t LocalsSlotsAlignmentBytes = 0;
1544 // The entire spill locations area gets aligned to largest natural alignment
1545 // of the variables that have a spill slot.
1546 uint32_t SpillAreaAlignmentBytes = 0;
1547 // For now, we don't have target-specific variables that need special
1548 // treatment (no stack-slot-linked SpillVariable type).
1549 std::function<bool(Variable *)> TargetVarHook = [](Variable *Var) {
1550 static constexpr bool AssignStackSlot = false;
1551 static constexpr bool DontAssignStackSlot = !AssignStackSlot;
1552 if (llvm::isa<Variable64On32>(Var)) {
1553 return DontAssignStackSlot;
1554 }
1555 return AssignStackSlot;
1556 };
1557
1558 // Compute the list of spilled variables and bounds for GlobalsSize, etc.
1559 getVarStackSlotParams(SortedSpilledVariables, RegsUsed, &GlobalsSize,
1560 &SpillAreaSizeBytes, &SpillAreaAlignmentBytes,
1561 &LocalsSlotsAlignmentBytes, TargetVarHook);
1562 uint32_t LocalsSpillAreaSize = SpillAreaSizeBytes;
1563 SpillAreaSizeBytes += GlobalsSize;
1564
1565 PreservedGPRs.reserve(CalleeSaves.size());
1566
1567 // Consider FP and RA as callee-save / used as needed.
1568 if (UsesFramePointer) {
1569 if (RegsUsed[RegMIPS32::Reg_FP]) {
1570 llvm::report_fatal_error("Frame pointer has been used.");
1571 }
1572 CalleeSaves[RegMIPS32::Reg_FP] = true;
1573 RegsUsed[RegMIPS32::Reg_FP] = true;
1574 }
1575 if (!MaybeLeafFunc) {
1576 CalleeSaves[RegMIPS32::Reg_RA] = true;
1577 RegsUsed[RegMIPS32::Reg_RA] = true;
1578 }
1579
1580 // Make two passes over the used registers. The first pass records all the
1581 // used registers -- and their aliases. Then, we figure out which GPR
1582 // registers should be saved.
1583 SmallBitVector ToPreserve(RegMIPS32::Reg_NUM);
1584 for (SizeT i = 0; i < CalleeSaves.size(); ++i) {
1585 if (CalleeSaves[i] && RegsUsed[i]) {
1586 ToPreserve |= RegisterAliases[i];
1587 }
1588 }
1589
1590 uint32_t NumCallee = 0;
1591
1592 // RegClasses is a tuple of
1593 //
1594 // <First Register in Class, Last Register in Class, Vector of Save Registers>
1595 //
1596 // We use this tuple to figure out which register we should save/restore
1597 // during
1598 // prolog/epilog.
1599 using RegClassType = std::tuple<uint32_t, uint32_t, VarList *>;
1600 const RegClassType RegClass = RegClassType(
1601 RegMIPS32::Reg_GPR_First, RegMIPS32::Reg_FPR_Last, &PreservedGPRs);
1602 const uint32_t FirstRegInClass = std::get<0>(RegClass);
1603 const uint32_t LastRegInClass = std::get<1>(RegClass);
1604 VarList *const PreservedRegsInClass = std::get<2>(RegClass);
1605 for (uint32_t Reg = LastRegInClass; Reg > FirstRegInClass; Reg--) {
1606 if (!ToPreserve[Reg]) {
1607 continue;
1608 }
1609 ++NumCallee;
1610 Variable *PhysicalRegister = getPhysicalRegister(RegNumT::fromInt(Reg));
1611 PreservedRegsSizeBytes +=
1612 typeWidthInBytesOnStack(PhysicalRegister->getType());
1613 PreservedRegsInClass->push_back(PhysicalRegister);
1614 }
1615
1616 Ctx->statsUpdateRegistersSaved(NumCallee);
1617
1618 // Align the variables area. SpillAreaPaddingBytes is the size of the region
1619 // after the preserved registers and before the spill areas.
1620 // LocalsSlotsPaddingBytes is the amount of padding between the globals and
1621 // locals area if they are separate.
1622 assert(SpillAreaAlignmentBytes <= MIPS32_STACK_ALIGNMENT_BYTES);
1623 (void)MIPS32_STACK_ALIGNMENT_BYTES;
1624 assert(LocalsSlotsAlignmentBytes <= SpillAreaAlignmentBytes);
1625 uint32_t SpillAreaPaddingBytes = 0;
1626 uint32_t LocalsSlotsPaddingBytes = 0;
1627 alignStackSpillAreas(PreservedRegsSizeBytes, SpillAreaAlignmentBytes,
1628 GlobalsSize, LocalsSlotsAlignmentBytes,
1629 &SpillAreaPaddingBytes, &LocalsSlotsPaddingBytes);
1630 SpillAreaSizeBytes += SpillAreaPaddingBytes + LocalsSlotsPaddingBytes;
1631 uint32_t GlobalsAndSubsequentPaddingSize =
1632 GlobalsSize + LocalsSlotsPaddingBytes;
1633
1634 // Adds the out args space to the stack, and align SP if necessary.
1635 if (!NeedsStackAlignment) {
1636 SpillAreaSizeBytes += MaxOutArgsSizeBytes * (VariableAllocaUsed ? 0 : 1);
1637 } else {
1638 SpillAreaSizeBytes = applyStackAlignment(
1639 SpillAreaSizeBytes +
1640 (VariableAllocaUsed ? VariableAllocaAlignBytes : MaxOutArgsSizeBytes));
1641 }
1642
1643 // Combine fixed alloca with SpillAreaSize.
1644 SpillAreaSizeBytes += FixedAllocaSizeBytes;
1645
1646 TotalStackSizeBytes =
1647 applyStackAlignment(PreservedRegsSizeBytes + SpillAreaSizeBytes);
1648
1649 // Generate "addiu sp, sp, -TotalStackSizeBytes"
1650 if (TotalStackSizeBytes) {
1651 // Use the scratch register if needed to legalize the immediate.
1652 Sandboxer(this).addiu_sp(-TotalStackSizeBytes);
1653 }
1654
1655 Ctx->statsUpdateFrameBytes(TotalStackSizeBytes);
1656
1657 if (!PreservedGPRs.empty()) {
1658 uint32_t StackOffset = TotalStackSizeBytes;
1659 for (Variable *Var : *PreservedRegsInClass) {
1660 Type RegType;
1661 if (RegMIPS32::isFPRReg(Var->getRegNum()))
1662 RegType = IceType_f32;
1663 else
1664 RegType = IceType_i32;
1665 auto *PhysicalRegister = makeReg(RegType, Var->getRegNum());
1666 StackOffset -= typeWidthInBytesOnStack(RegType);
1667 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1668 OperandMIPS32Mem *MemoryLocation = OperandMIPS32Mem::create(
1669 Func, RegType, SP,
1670 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackOffset)));
1671 Sandboxer(this).sw(PhysicalRegister, MemoryLocation);
1672 }
1673 }
1674
1675 Variable *FP = getPhysicalRegister(RegMIPS32::Reg_FP);
1676
1677 // Generate "mov FP, SP" if needed.
1678 if (UsesFramePointer) {
1679 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1680 _mov(FP, SP);
1681 // Keep FP live for late-stage liveness analysis (e.g. asm-verbose mode).
1682 Context.insert<InstFakeUse>(FP);
1683 }
1684
1685 // Fill in stack offsets for stack args, and copy args into registers for
1686 // those that were register-allocated. Args are pushed right to left, so
1687 // Arg[0] is closest to the stack/frame pointer.
1688 const VarList &Args = Func->getArgs();
1689 size_t InArgsSizeBytes = MIPS32_MAX_GPR_ARG * 4;
1690 TargetMIPS32::CallingConv CC;
1691 uint32_t ArgNo = 0;
1692
1693 for (Variable *Arg : Args) {
1694 RegNumT DummyReg;
1695 const Type Ty = Arg->getType();
1696 bool PartialOnStack;
1697 // Skip arguments passed in registers.
1698 if (CC.argInReg(Ty, ArgNo, &DummyReg)) {
1699 // Load argument from stack:
1700 // 1. If this is first vector argument and return type is v4f32.
1701 // In this case $4 is used to pass stack address implicitly.
1702 // 3rd and 4th element of vector argument is passed through stack.
1703 // 2. If this is second vector argument.
1704 if (ArgNo != 0 && isVectorType(Ty)) {
1705 PartialOnStack = true;
1706 finishArgumentLowering(Arg, PartialOnStack, FP, TotalStackSizeBytes,
1707 &InArgsSizeBytes);
1708 }
1709 } else {
1710 PartialOnStack = false;
1711 finishArgumentLowering(Arg, PartialOnStack, FP, TotalStackSizeBytes,
1712 &InArgsSizeBytes);
1713 }
1714 ++ArgNo;
1715 }
1716
1717 // Fill in stack offsets for locals.
1718 assignVarStackSlots(SortedSpilledVariables, SpillAreaPaddingBytes,
1719 SpillAreaSizeBytes, GlobalsAndSubsequentPaddingSize);
1720 this->HasComputedFrame = true;
1721
1722 if (BuildDefs::dump() && Func->isVerbose(IceV_Frame)) {
1723 OstreamLocker _(Func->getContext());
1724 Ostream &Str = Func->getContext()->getStrDump();
1725
1726 Str << "Stack layout:\n";
1727 uint32_t SPAdjustmentPaddingSize =
1728 SpillAreaSizeBytes - LocalsSpillAreaSize -
1729 GlobalsAndSubsequentPaddingSize - SpillAreaPaddingBytes -
1730 MaxOutArgsSizeBytes;
1731 Str << " in-args = " << InArgsSizeBytes << " bytes\n"
1732 << " preserved registers = " << PreservedRegsSizeBytes << " bytes\n"
1733 << " spill area padding = " << SpillAreaPaddingBytes << " bytes\n"
1734 << " globals spill area = " << GlobalsSize << " bytes\n"
1735 << " globals-locals spill areas intermediate padding = "
1736 << GlobalsAndSubsequentPaddingSize - GlobalsSize << " bytes\n"
1737 << " locals spill area = " << LocalsSpillAreaSize << " bytes\n"
1738 << " SP alignment padding = " << SPAdjustmentPaddingSize << " bytes\n";
1739
1740 Str << "Stack details:\n"
1741 << " SP adjustment = " << SpillAreaSizeBytes << " bytes\n"
1742 << " spill area alignment = " << SpillAreaAlignmentBytes << " bytes\n"
1743 << " outgoing args size = " << MaxOutArgsSizeBytes << " bytes\n"
1744 << " locals spill area alignment = " << LocalsSlotsAlignmentBytes
1745 << " bytes\n"
1746 << " is FP based = " << 1 << "\n";
1747 }
1748 return;
1749 }
1750
addEpilog(CfgNode * Node)1751 void TargetMIPS32::addEpilog(CfgNode *Node) {
1752 InstList &Insts = Node->getInsts();
1753 InstList::reverse_iterator RI, E;
1754 for (RI = Insts.rbegin(), E = Insts.rend(); RI != E; ++RI) {
1755 if (llvm::isa<InstMIPS32Ret>(*RI))
1756 break;
1757 }
1758 if (RI == E)
1759 return;
1760
1761 // Convert the reverse_iterator position into its corresponding (forward)
1762 // iterator position.
1763 InstList::iterator InsertPoint = reverseToForwardIterator(RI);
1764 --InsertPoint;
1765 Context.init(Node);
1766 Context.setInsertPoint(InsertPoint);
1767
1768 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1769 if (UsesFramePointer) {
1770 Variable *FP = getPhysicalRegister(RegMIPS32::Reg_FP);
1771 // For late-stage liveness analysis (e.g. asm-verbose mode), adding a fake
1772 // use of SP before the assignment of SP=FP keeps previous SP adjustments
1773 // from being dead-code eliminated.
1774 Context.insert<InstFakeUse>(SP);
1775 Sandboxer(this).reset_sp(FP);
1776 }
1777
1778 VarList::reverse_iterator RIter, END;
1779
1780 if (!PreservedGPRs.empty()) {
1781 uint32_t StackOffset = TotalStackSizeBytes - PreservedRegsSizeBytes;
1782 for (RIter = PreservedGPRs.rbegin(), END = PreservedGPRs.rend();
1783 RIter != END; ++RIter) {
1784 Type RegType;
1785 if (RegMIPS32::isFPRReg((*RIter)->getRegNum()))
1786 RegType = IceType_f32;
1787 else
1788 RegType = IceType_i32;
1789 auto *PhysicalRegister = makeReg(RegType, (*RIter)->getRegNum());
1790 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
1791 OperandMIPS32Mem *MemoryLocation = OperandMIPS32Mem::create(
1792 Func, RegType, SP,
1793 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackOffset)));
1794 _lw(PhysicalRegister, MemoryLocation);
1795 StackOffset += typeWidthInBytesOnStack(PhysicalRegister->getType());
1796 }
1797 }
1798
1799 if (TotalStackSizeBytes) {
1800 Sandboxer(this).addiu_sp(TotalStackSizeBytes);
1801 }
1802 if (!getFlags().getUseSandboxing())
1803 return;
1804
1805 Variable *RA = getPhysicalRegister(RegMIPS32::Reg_RA);
1806 Variable *RetValue = nullptr;
1807 if (RI->getSrcSize())
1808 RetValue = llvm::cast<Variable>(RI->getSrc(0));
1809
1810 Sandboxer(this).ret(RA, RetValue);
1811
1812 RI->setDeleted();
1813 }
1814
newBaseRegister(Variable * Base,int32_t Offset,RegNumT ScratchRegNum)1815 Variable *TargetMIPS32::PostLoweringLegalizer::newBaseRegister(
1816 Variable *Base, int32_t Offset, RegNumT ScratchRegNum) {
1817 // Legalize will likely need a lui/ori combination, but if the top bits are
1818 // all 0 from negating the offset and subtracting, we could use that instead.
1819 const bool ShouldSub = Offset != 0 && (-Offset & 0xFFFF0000) == 0;
1820 Variable *ScratchReg = Target->makeReg(IceType_i32, ScratchRegNum);
1821 if (ShouldSub) {
1822 Target->_addi(ScratchReg, Base, -Offset);
1823 } else {
1824 constexpr bool SignExt = true;
1825 if (!OperandMIPS32Mem::canHoldOffset(Base->getType(), SignExt, Offset)) {
1826 const uint32_t UpperBits = (Offset >> 16) & 0xFFFF;
1827 const uint32_t LowerBits = Offset & 0xFFFF;
1828 Target->_lui(ScratchReg, Target->Ctx->getConstantInt32(UpperBits));
1829 if (LowerBits)
1830 Target->_ori(ScratchReg, ScratchReg, LowerBits);
1831 Target->_addu(ScratchReg, ScratchReg, Base);
1832 } else {
1833 Target->_addiu(ScratchReg, Base, Offset);
1834 }
1835 }
1836
1837 return ScratchReg;
1838 }
1839
legalizeMovFp(InstMIPS32MovFP64ToI64 * MovInstr)1840 void TargetMIPS32::PostLoweringLegalizer::legalizeMovFp(
1841 InstMIPS32MovFP64ToI64 *MovInstr) {
1842 Variable *Dest = MovInstr->getDest();
1843 Operand *Src = MovInstr->getSrc(0);
1844 const Type SrcTy = Src->getType();
1845
1846 if (Dest != nullptr && SrcTy == IceType_f64) {
1847 int32_t Offset = Dest->getStackOffset();
1848 auto *Base = Target->getPhysicalRegister(Target->getFrameOrStackReg());
1849 OperandMIPS32Mem *TAddr = OperandMIPS32Mem::create(
1850 Target->Func, IceType_f32, Base,
1851 llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)));
1852 OperandMIPS32Mem *Addr = legalizeMemOperand(TAddr);
1853 auto *SrcV = llvm::cast<Variable>(Src);
1854 Variable *SrcR;
1855 if (MovInstr->getInt64Part() == Int64_Lo) {
1856 SrcR = Target->makeReg(
1857 IceType_f32, RegMIPS32::get64PairFirstRegNum(SrcV->getRegNum()));
1858 } else {
1859 SrcR = Target->makeReg(
1860 IceType_f32, RegMIPS32::get64PairSecondRegNum(SrcV->getRegNum()));
1861 }
1862 Sandboxer(Target).sw(SrcR, Addr);
1863 if (MovInstr->isDestRedefined()) {
1864 Target->_set_dest_redefined();
1865 }
1866 MovInstr->setDeleted();
1867 return;
1868 }
1869
1870 llvm::report_fatal_error("legalizeMovFp: Invalid operands");
1871 }
1872
legalizeMov(InstMIPS32Mov * MovInstr)1873 void TargetMIPS32::PostLoweringLegalizer::legalizeMov(InstMIPS32Mov *MovInstr) {
1874 Variable *Dest = MovInstr->getDest();
1875 assert(Dest != nullptr);
1876 const Type DestTy = Dest->getType();
1877 assert(DestTy != IceType_i64);
1878
1879 Operand *Src = MovInstr->getSrc(0);
1880 const Type SrcTy = Src->getType();
1881 (void)SrcTy;
1882 assert(SrcTy != IceType_i64);
1883
1884 bool Legalized = false;
1885 auto *SrcR = llvm::cast<Variable>(Src);
1886 if (Dest->hasReg() && SrcR->hasReg()) {
1887 // This might be a GP to/from FP move generated due to argument passing.
1888 // Use mtc1/mfc1 instead of mov.[s/d] if src and dst registers are of
1889 // different types.
1890 const bool IsDstGPR = RegMIPS32::isGPRReg(Dest->getRegNum());
1891 const bool IsSrcGPR = RegMIPS32::isGPRReg(SrcR->getRegNum());
1892 const RegNumT SRegNum = SrcR->getRegNum();
1893 const RegNumT DRegNum = Dest->getRegNum();
1894 if (IsDstGPR != IsSrcGPR) {
1895 if (IsDstGPR) {
1896 // Dest is GPR and SrcR is FPR. Use mfc1.
1897 int32_t TypeWidth = typeWidthInBytes(DestTy);
1898 if (MovInstr->getDestHi() != nullptr)
1899 TypeWidth += typeWidthInBytes(MovInstr->getDestHi()->getType());
1900 if (TypeWidth == 8) {
1901 // Split it into two mfc1 instructions
1902 Variable *SrcGPRHi = Target->makeReg(
1903 IceType_f32, RegMIPS32::get64PairFirstRegNum(SRegNum));
1904 Variable *SrcGPRLo = Target->makeReg(
1905 IceType_f32, RegMIPS32::get64PairSecondRegNum(SRegNum));
1906 Variable *DstFPRHi, *DstFPRLo;
1907 if (MovInstr->getDestHi() != nullptr && Dest != nullptr) {
1908 DstFPRHi = Target->makeReg(IceType_i32,
1909 MovInstr->getDestHi()->getRegNum());
1910 DstFPRLo = Target->makeReg(IceType_i32, Dest->getRegNum());
1911 } else {
1912 DstFPRHi = Target->makeReg(
1913 IceType_i32, RegMIPS32::get64PairFirstRegNum(DRegNum));
1914 DstFPRLo = Target->makeReg(
1915 IceType_i32, RegMIPS32::get64PairSecondRegNum(DRegNum));
1916 }
1917 Target->_mov(DstFPRHi, SrcGPRHi);
1918 Target->_mov(DstFPRLo, SrcGPRLo);
1919 Legalized = true;
1920 } else {
1921 Variable *SrcGPR = Target->makeReg(IceType_f32, SRegNum);
1922 Variable *DstFPR = Target->makeReg(IceType_i32, DRegNum);
1923 Target->_mov(DstFPR, SrcGPR);
1924 Legalized = true;
1925 }
1926 } else {
1927 // Dest is FPR and SrcR is GPR. Use mtc1.
1928 if (typeWidthInBytes(Dest->getType()) == 8) {
1929 Variable *SrcGPRHi, *SrcGPRLo;
1930 // SrcR could be $zero which is i32
1931 if (SRegNum == RegMIPS32::Reg_ZERO) {
1932 SrcGPRHi = Target->makeReg(IceType_i32, SRegNum);
1933 SrcGPRLo = SrcGPRHi;
1934 } else {
1935 // Split it into two mtc1 instructions
1936 if (MovInstr->getSrcSize() == 2) {
1937 const auto FirstReg =
1938 (llvm::cast<Variable>(MovInstr->getSrc(0)))->getRegNum();
1939 const auto SecondReg =
1940 (llvm::cast<Variable>(MovInstr->getSrc(1)))->getRegNum();
1941 SrcGPRHi = Target->makeReg(IceType_i32, FirstReg);
1942 SrcGPRLo = Target->makeReg(IceType_i32, SecondReg);
1943 } else {
1944 SrcGPRLo = Target->makeReg(
1945 IceType_i32, RegMIPS32::get64PairFirstRegNum(SRegNum));
1946 SrcGPRHi = Target->makeReg(
1947 IceType_i32, RegMIPS32::get64PairSecondRegNum(SRegNum));
1948 }
1949 }
1950 Variable *DstFPRHi = Target->makeReg(
1951 IceType_f32, RegMIPS32::get64PairFirstRegNum(DRegNum));
1952 Variable *DstFPRLo = Target->makeReg(
1953 IceType_f32, RegMIPS32::get64PairSecondRegNum(DRegNum));
1954 Target->_mov(DstFPRHi, SrcGPRLo);
1955 Target->_mov(DstFPRLo, SrcGPRHi);
1956 Legalized = true;
1957 } else {
1958 Variable *SrcGPR = Target->makeReg(IceType_i32, SRegNum);
1959 Variable *DstFPR = Target->makeReg(IceType_f32, DRegNum);
1960 Target->_mov(DstFPR, SrcGPR);
1961 Legalized = true;
1962 }
1963 }
1964 }
1965 if (Legalized) {
1966 if (MovInstr->isDestRedefined()) {
1967 Target->_set_dest_redefined();
1968 }
1969 MovInstr->setDeleted();
1970 return;
1971 }
1972 }
1973
1974 if (!Dest->hasReg()) {
1975 auto *SrcR = llvm::cast<Variable>(Src);
1976 assert(SrcR->hasReg());
1977 assert(!SrcR->isRematerializable());
1978 int32_t Offset = Dest->getStackOffset();
1979
1980 // This is a _mov(Mem(), Variable), i.e., a store.
1981 auto *Base = Target->getPhysicalRegister(Target->getFrameOrStackReg());
1982
1983 OperandMIPS32Mem *TAddr = OperandMIPS32Mem::create(
1984 Target->Func, DestTy, Base,
1985 llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)));
1986 OperandMIPS32Mem *TAddrHi = OperandMIPS32Mem::create(
1987 Target->Func, DestTy, Base,
1988 llvm::cast<ConstantInteger32>(
1989 Target->Ctx->getConstantInt32(Offset + 4)));
1990 OperandMIPS32Mem *Addr = legalizeMemOperand(TAddr);
1991
1992 // FP arguments are passed in GP reg if first argument is in GP. In this
1993 // case type of the SrcR is still FP thus we need to explicitly generate sw
1994 // instead of swc1.
1995 const RegNumT RegNum = SrcR->getRegNum();
1996 const bool IsSrcGPReg = RegMIPS32::isGPRReg(SrcR->getRegNum());
1997 if (SrcTy == IceType_f32 && IsSrcGPReg) {
1998 Variable *SrcGPR = Target->makeReg(IceType_i32, RegNum);
1999 Sandboxer(Target).sw(SrcGPR, Addr);
2000 } else if (SrcTy == IceType_f64 && IsSrcGPReg) {
2001 Variable *SrcGPRHi =
2002 Target->makeReg(IceType_i32, RegMIPS32::get64PairFirstRegNum(RegNum));
2003 Variable *SrcGPRLo = Target->makeReg(
2004 IceType_i32, RegMIPS32::get64PairSecondRegNum(RegNum));
2005 Sandboxer(Target).sw(SrcGPRHi, Addr);
2006 OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
2007 Sandboxer(Target).sw(SrcGPRLo, AddrHi);
2008 } else if (DestTy == IceType_f64 && IsSrcGPReg) {
2009 const auto FirstReg =
2010 (llvm::cast<Variable>(MovInstr->getSrc(0)))->getRegNum();
2011 const auto SecondReg =
2012 (llvm::cast<Variable>(MovInstr->getSrc(1)))->getRegNum();
2013 Variable *SrcGPRHi = Target->makeReg(IceType_i32, FirstReg);
2014 Variable *SrcGPRLo = Target->makeReg(IceType_i32, SecondReg);
2015 Sandboxer(Target).sw(SrcGPRLo, Addr);
2016 OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
2017 Sandboxer(Target).sw(SrcGPRHi, AddrHi);
2018 } else {
2019 Sandboxer(Target).sw(SrcR, Addr);
2020 }
2021
2022 Target->Context.insert<InstFakeDef>(Dest);
2023 Legalized = true;
2024 } else if (auto *Var = llvm::dyn_cast<Variable>(Src)) {
2025 if (Var->isRematerializable()) {
2026 // This is equivalent to an x86 _lea(RematOffset(%esp/%ebp), Variable).
2027
2028 // ExtraOffset is only needed for stack-pointer based frames as we have
2029 // to account for spill storage.
2030 const int32_t ExtraOffset =
2031 (Var->getRegNum() == Target->getFrameOrStackReg())
2032 ? Target->getFrameFixedAllocaOffset()
2033 : 0;
2034
2035 const int32_t Offset = Var->getStackOffset() + ExtraOffset;
2036 Variable *Base = Target->getPhysicalRegister(Var->getRegNum());
2037 Variable *T = newBaseRegister(Base, Offset, Dest->getRegNum());
2038 Target->_mov(Dest, T);
2039 Legalized = true;
2040 } else {
2041 if (!Var->hasReg()) {
2042 // This is a _mov(Variable, Mem()), i.e., a load.
2043 const int32_t Offset = Var->getStackOffset();
2044 auto *Base = Target->getPhysicalRegister(Target->getFrameOrStackReg());
2045 const RegNumT RegNum = Dest->getRegNum();
2046 const bool IsDstGPReg = RegMIPS32::isGPRReg(Dest->getRegNum());
2047 // If we are moving i64 to a double using stack then the address may
2048 // not be aligned to 8-byte boundary as we split i64 into Hi-Lo parts
2049 // and store them individually with 4-byte alignment. Load the Hi-Lo
2050 // parts in TmpReg and move them to the dest using mtc1.
2051 if (DestTy == IceType_f64 && !Utils::IsAligned(Offset, 8) &&
2052 !IsDstGPReg) {
2053 auto *Reg = Target->makeReg(IceType_i32, Target->getReservedTmpReg());
2054 const RegNumT RegNum = Dest->getRegNum();
2055 Variable *DestLo = Target->makeReg(
2056 IceType_f32, RegMIPS32::get64PairFirstRegNum(RegNum));
2057 Variable *DestHi = Target->makeReg(
2058 IceType_f32, RegMIPS32::get64PairSecondRegNum(RegNum));
2059 OperandMIPS32Mem *AddrLo = OperandMIPS32Mem::create(
2060 Target->Func, IceType_i32, Base,
2061 llvm::cast<ConstantInteger32>(
2062 Target->Ctx->getConstantInt32(Offset)));
2063 OperandMIPS32Mem *AddrHi = OperandMIPS32Mem::create(
2064 Target->Func, IceType_i32, Base,
2065 llvm::cast<ConstantInteger32>(
2066 Target->Ctx->getConstantInt32(Offset + 4)));
2067 Sandboxer(Target).lw(Reg, AddrLo);
2068 Target->_mov(DestLo, Reg);
2069 Sandboxer(Target).lw(Reg, AddrHi);
2070 Target->_mov(DestHi, Reg);
2071 } else {
2072 OperandMIPS32Mem *TAddr = OperandMIPS32Mem::create(
2073 Target->Func, DestTy, Base,
2074 llvm::cast<ConstantInteger32>(
2075 Target->Ctx->getConstantInt32(Offset)));
2076 OperandMIPS32Mem *Addr = legalizeMemOperand(TAddr);
2077 OperandMIPS32Mem *TAddrHi = OperandMIPS32Mem::create(
2078 Target->Func, DestTy, Base,
2079 llvm::cast<ConstantInteger32>(
2080 Target->Ctx->getConstantInt32(Offset + 4)));
2081 // FP arguments are passed in GP reg if first argument is in GP.
2082 // In this case type of the Dest is still FP thus we need to
2083 // explicitly generate lw instead of lwc1.
2084 if (DestTy == IceType_f32 && IsDstGPReg) {
2085 Variable *DstGPR = Target->makeReg(IceType_i32, RegNum);
2086 Sandboxer(Target).lw(DstGPR, Addr);
2087 } else if (DestTy == IceType_f64 && IsDstGPReg) {
2088 Variable *DstGPRHi = Target->makeReg(
2089 IceType_i32, RegMIPS32::get64PairFirstRegNum(RegNum));
2090 Variable *DstGPRLo = Target->makeReg(
2091 IceType_i32, RegMIPS32::get64PairSecondRegNum(RegNum));
2092 Sandboxer(Target).lw(DstGPRHi, Addr);
2093 OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
2094 Sandboxer(Target).lw(DstGPRLo, AddrHi);
2095 } else if (DestTy == IceType_f64 && IsDstGPReg) {
2096 const auto FirstReg =
2097 (llvm::cast<Variable>(MovInstr->getSrc(0)))->getRegNum();
2098 const auto SecondReg =
2099 (llvm::cast<Variable>(MovInstr->getSrc(1)))->getRegNum();
2100 Variable *DstGPRHi = Target->makeReg(IceType_i32, FirstReg);
2101 Variable *DstGPRLo = Target->makeReg(IceType_i32, SecondReg);
2102 Sandboxer(Target).lw(DstGPRLo, Addr);
2103 OperandMIPS32Mem *AddrHi = legalizeMemOperand(TAddrHi);
2104 Sandboxer(Target).lw(DstGPRHi, AddrHi);
2105 } else {
2106 Sandboxer(Target).lw(Dest, Addr);
2107 }
2108 }
2109 Legalized = true;
2110 }
2111 }
2112 }
2113
2114 if (Legalized) {
2115 if (MovInstr->isDestRedefined()) {
2116 Target->_set_dest_redefined();
2117 }
2118 MovInstr->setDeleted();
2119 }
2120 }
2121
2122 OperandMIPS32Mem *
legalizeMemOperand(OperandMIPS32Mem * Mem)2123 TargetMIPS32::PostLoweringLegalizer::legalizeMemOperand(OperandMIPS32Mem *Mem) {
2124 if (llvm::isa<ConstantRelocatable>(Mem->getOffset())) {
2125 return nullptr;
2126 }
2127 Variable *Base = Mem->getBase();
2128 auto *Ci32 = llvm::cast<ConstantInteger32>(Mem->getOffset());
2129 int32_t Offset = Ci32->getValue();
2130
2131 if (Base->isRematerializable()) {
2132 const int32_t ExtraOffset =
2133 (Base->getRegNum() == Target->getFrameOrStackReg())
2134 ? Target->getFrameFixedAllocaOffset()
2135 : 0;
2136 Offset += Base->getStackOffset() + ExtraOffset;
2137 Base = Target->getPhysicalRegister(Base->getRegNum());
2138 }
2139
2140 constexpr bool SignExt = true;
2141 if (!OperandMIPS32Mem::canHoldOffset(Mem->getType(), SignExt, Offset)) {
2142 Base = newBaseRegister(Base, Offset, Target->getReservedTmpReg());
2143 Offset = 0;
2144 }
2145
2146 return OperandMIPS32Mem::create(
2147 Target->Func, Mem->getType(), Base,
2148 llvm::cast<ConstantInteger32>(Target->Ctx->getConstantInt32(Offset)));
2149 }
2150
legalizeImmediate(int32_t Imm)2151 Variable *TargetMIPS32::PostLoweringLegalizer::legalizeImmediate(int32_t Imm) {
2152 Variable *Reg = nullptr;
2153 if (!((std::numeric_limits<int16_t>::min() <= Imm) &&
2154 (Imm <= std::numeric_limits<int16_t>::max()))) {
2155 const uint32_t UpperBits = (Imm >> 16) & 0xFFFF;
2156 const uint32_t LowerBits = Imm & 0xFFFF;
2157 Variable *TReg = Target->makeReg(IceType_i32, Target->getReservedTmpReg());
2158 Reg = Target->makeReg(IceType_i32, Target->getReservedTmpReg());
2159 if (LowerBits) {
2160 Target->_lui(TReg, Target->Ctx->getConstantInt32(UpperBits));
2161 Target->_ori(Reg, TReg, LowerBits);
2162 } else {
2163 Target->_lui(Reg, Target->Ctx->getConstantInt32(UpperBits));
2164 }
2165 }
2166 return Reg;
2167 }
2168
postLowerLegalization()2169 void TargetMIPS32::postLowerLegalization() {
2170 Func->dump("Before postLowerLegalization");
2171 assert(hasComputedFrame());
2172 for (CfgNode *Node : Func->getNodes()) {
2173 Context.init(Node);
2174 PostLoweringLegalizer Legalizer(this);
2175 while (!Context.atEnd()) {
2176 PostIncrLoweringContext PostIncrement(Context);
2177 Inst *CurInstr = iteratorToInst(Context.getCur());
2178 const SizeT NumSrcs = CurInstr->getSrcSize();
2179 Operand *Src0 = NumSrcs < 1 ? nullptr : CurInstr->getSrc(0);
2180 Operand *Src1 = NumSrcs < 2 ? nullptr : CurInstr->getSrc(1);
2181 auto *Src0V = llvm::dyn_cast_or_null<Variable>(Src0);
2182 auto *Src0M = llvm::dyn_cast_or_null<OperandMIPS32Mem>(Src0);
2183 auto *Src1M = llvm::dyn_cast_or_null<OperandMIPS32Mem>(Src1);
2184 Variable *Dst = CurInstr->getDest();
2185 if (auto *MovInstr = llvm::dyn_cast<InstMIPS32Mov>(CurInstr)) {
2186 Legalizer.legalizeMov(MovInstr);
2187 continue;
2188 }
2189 if (auto *MovInstr = llvm::dyn_cast<InstMIPS32MovFP64ToI64>(CurInstr)) {
2190 Legalizer.legalizeMovFp(MovInstr);
2191 continue;
2192 }
2193 if (llvm::isa<InstMIPS32Sw>(CurInstr)) {
2194 if (auto *LegalMem = Legalizer.legalizeMemOperand(Src1M)) {
2195 Sandboxer(this).sw(Src0V, LegalMem);
2196 CurInstr->setDeleted();
2197 }
2198 continue;
2199 }
2200 if (llvm::isa<InstMIPS32Swc1>(CurInstr)) {
2201 if (auto *LegalMem = Legalizer.legalizeMemOperand(Src1M)) {
2202 _swc1(Src0V, LegalMem);
2203 CurInstr->setDeleted();
2204 }
2205 continue;
2206 }
2207 if (llvm::isa<InstMIPS32Sdc1>(CurInstr)) {
2208 if (auto *LegalMem = Legalizer.legalizeMemOperand(Src1M)) {
2209 _sdc1(Src0V, LegalMem);
2210 CurInstr->setDeleted();
2211 }
2212 continue;
2213 }
2214 if (llvm::isa<InstMIPS32Lw>(CurInstr)) {
2215 if (auto *LegalMem = Legalizer.legalizeMemOperand(Src0M)) {
2216 Sandboxer(this).lw(Dst, LegalMem);
2217 CurInstr->setDeleted();
2218 }
2219 continue;
2220 }
2221 if (llvm::isa<InstMIPS32Lwc1>(CurInstr)) {
2222 if (auto *LegalMem = Legalizer.legalizeMemOperand(Src0M)) {
2223 _lwc1(Dst, LegalMem);
2224 CurInstr->setDeleted();
2225 }
2226 continue;
2227 }
2228 if (llvm::isa<InstMIPS32Ldc1>(CurInstr)) {
2229 if (auto *LegalMem = Legalizer.legalizeMemOperand(Src0M)) {
2230 _ldc1(Dst, LegalMem);
2231 CurInstr->setDeleted();
2232 }
2233 continue;
2234 }
2235 if (auto *AddiuInstr = llvm::dyn_cast<InstMIPS32Addiu>(CurInstr)) {
2236 if (auto *LegalImm = Legalizer.legalizeImmediate(
2237 static_cast<int32_t>(AddiuInstr->getImmediateValue()))) {
2238 _addu(Dst, Src0V, LegalImm);
2239 CurInstr->setDeleted();
2240 }
2241 continue;
2242 }
2243 }
2244 }
2245 }
2246
loOperand(Operand * Operand)2247 Operand *TargetMIPS32::loOperand(Operand *Operand) {
2248 assert(Operand->getType() == IceType_i64);
2249 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
2250 return Var64On32->getLo();
2251 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
2252 return Ctx->getConstantInt32(static_cast<uint32_t>(Const->getValue()));
2253 }
2254 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
2255 // Conservatively disallow memory operands with side-effects (pre/post
2256 // increment) in case of duplication.
2257 assert(Mem->getAddrMode() == OperandMIPS32Mem::Offset);
2258 return OperandMIPS32Mem::create(Func, IceType_i32, Mem->getBase(),
2259 Mem->getOffset(), Mem->getAddrMode());
2260 }
2261 llvm_unreachable("Unsupported operand type");
2262 return nullptr;
2263 }
2264
getOperandAtIndex(Operand * Operand,Type BaseType,uint32_t Index)2265 Operand *TargetMIPS32::getOperandAtIndex(Operand *Operand, Type BaseType,
2266 uint32_t Index) {
2267 if (!isVectorType(Operand->getType())) {
2268 llvm::report_fatal_error("getOperandAtIndex: Operand is not vector");
2269 return nullptr;
2270 }
2271
2272 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
2273 assert(Mem->getAddrMode() == OperandMIPS32Mem::Offset);
2274 Variable *Base = Mem->getBase();
2275 auto *Offset = llvm::cast<ConstantInteger32>(Mem->getOffset());
2276 assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
2277 int32_t NextOffsetVal =
2278 Offset->getValue() + (Index * typeWidthInBytes(BaseType));
2279 constexpr bool NoSignExt = false;
2280 if (!OperandMIPS32Mem::canHoldOffset(BaseType, NoSignExt, NextOffsetVal)) {
2281 Constant *_4 = Ctx->getConstantInt32(4);
2282 Variable *NewBase = Func->makeVariable(Base->getType());
2283 lowerArithmetic(
2284 InstArithmetic::create(Func, InstArithmetic::Add, NewBase, Base, _4));
2285 Base = NewBase;
2286 } else {
2287 Offset =
2288 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
2289 }
2290 return OperandMIPS32Mem::create(Func, BaseType, Base, Offset,
2291 Mem->getAddrMode());
2292 }
2293
2294 if (auto *VarVecOn32 = llvm::dyn_cast<VariableVecOn32>(Operand))
2295 return VarVecOn32->getContainers()[Index];
2296
2297 llvm_unreachable("Unsupported operand type");
2298 return nullptr;
2299 }
2300
hiOperand(Operand * Operand)2301 Operand *TargetMIPS32::hiOperand(Operand *Operand) {
2302 assert(Operand->getType() == IceType_i64);
2303 if (Operand->getType() != IceType_i64)
2304 return Operand;
2305 if (auto *Var64On32 = llvm::dyn_cast<Variable64On32>(Operand))
2306 return Var64On32->getHi();
2307 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Operand)) {
2308 return Ctx->getConstantInt32(
2309 static_cast<uint32_t>(Const->getValue() >> 32));
2310 }
2311 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Operand)) {
2312 // Conservatively disallow memory operands with side-effects
2313 // in case of duplication.
2314 assert(Mem->getAddrMode() == OperandMIPS32Mem::Offset);
2315 const Type SplitType = IceType_i32;
2316 Variable *Base = Mem->getBase();
2317 auto *Offset = llvm::cast<ConstantInteger32>(Mem->getOffset());
2318 assert(!Utils::WouldOverflowAdd(Offset->getValue(), 4));
2319 int32_t NextOffsetVal = Offset->getValue() + 4;
2320 constexpr bool SignExt = false;
2321 if (!OperandMIPS32Mem::canHoldOffset(SplitType, SignExt, NextOffsetVal)) {
2322 // We have to make a temp variable and add 4 to either Base or Offset.
2323 // If we add 4 to Offset, this will convert a non-RegReg addressing
2324 // mode into a RegReg addressing mode. Since NaCl sandboxing disallows
2325 // RegReg addressing modes, prefer adding to base and replacing instead.
2326 // Thus we leave the old offset alone.
2327 Constant *Four = Ctx->getConstantInt32(4);
2328 Variable *NewBase = Func->makeVariable(Base->getType());
2329 lowerArithmetic(InstArithmetic::create(Func, InstArithmetic::Add, NewBase,
2330 Base, Four));
2331 Base = NewBase;
2332 } else {
2333 Offset =
2334 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(NextOffsetVal));
2335 }
2336 return OperandMIPS32Mem::create(Func, SplitType, Base, Offset,
2337 Mem->getAddrMode());
2338 }
2339 llvm_unreachable("Unsupported operand type");
2340 return nullptr;
2341 }
2342
getRegisterSet(RegSetMask Include,RegSetMask Exclude) const2343 SmallBitVector TargetMIPS32::getRegisterSet(RegSetMask Include,
2344 RegSetMask Exclude) const {
2345 SmallBitVector Registers(RegMIPS32::Reg_NUM);
2346
2347 #define X(val, encode, name, scratch, preserved, stackptr, frameptr, isInt, \
2348 isI64Pair, isFP32, isFP64, isVec128, alias_init) \
2349 if (scratch && (Include & RegSet_CallerSave)) \
2350 Registers[RegMIPS32::val] = true; \
2351 if (preserved && (Include & RegSet_CalleeSave)) \
2352 Registers[RegMIPS32::val] = true; \
2353 if (stackptr && (Include & RegSet_StackPointer)) \
2354 Registers[RegMIPS32::val] = true; \
2355 if (frameptr && (Include & RegSet_FramePointer)) \
2356 Registers[RegMIPS32::val] = true; \
2357 if (scratch && (Exclude & RegSet_CallerSave)) \
2358 Registers[RegMIPS32::val] = false; \
2359 if (preserved && (Exclude & RegSet_CalleeSave)) \
2360 Registers[RegMIPS32::val] = false; \
2361 if (stackptr && (Exclude & RegSet_StackPointer)) \
2362 Registers[RegMIPS32::val] = false; \
2363 if (frameptr && (Exclude & RegSet_FramePointer)) \
2364 Registers[RegMIPS32::val] = false;
2365
2366 REGMIPS32_TABLE
2367
2368 #undef X
2369
2370 if (NeedSandboxing) {
2371 Registers[RegMIPS32::Reg_T6] = false;
2372 Registers[RegMIPS32::Reg_T7] = false;
2373 Registers[RegMIPS32::Reg_T8] = false;
2374 }
2375 return Registers;
2376 }
2377
lowerAlloca(const InstAlloca * Instr)2378 void TargetMIPS32::lowerAlloca(const InstAlloca *Instr) {
2379 // Conservatively require the stack to be aligned. Some stack adjustment
2380 // operations implemented below assume that the stack is aligned before the
2381 // alloca. All the alloca code ensures that the stack alignment is preserved
2382 // after the alloca. The stack alignment restriction can be relaxed in some
2383 // cases.
2384 NeedsStackAlignment = true;
2385
2386 // For default align=0, set it to the real value 1, to avoid any
2387 // bit-manipulation problems below.
2388 const uint32_t AlignmentParam = std::max(1u, Instr->getAlignInBytes());
2389
2390 // LLVM enforces power of 2 alignment.
2391 assert(llvm::isPowerOf2_32(AlignmentParam));
2392 assert(llvm::isPowerOf2_32(MIPS32_STACK_ALIGNMENT_BYTES));
2393
2394 const uint32_t Alignment =
2395 std::max(AlignmentParam, MIPS32_STACK_ALIGNMENT_BYTES);
2396 const bool OverAligned = Alignment > MIPS32_STACK_ALIGNMENT_BYTES;
2397 const bool OptM1 = Func->getOptLevel() == Opt_m1;
2398 const bool AllocaWithKnownOffset = Instr->getKnownFrameOffset();
2399 const bool UseFramePointer =
2400 hasFramePointer() || OverAligned || !AllocaWithKnownOffset || OptM1;
2401
2402 if (UseFramePointer)
2403 setHasFramePointer();
2404
2405 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
2406
2407 Variable *Dest = Instr->getDest();
2408 Operand *TotalSize = Instr->getSizeInBytes();
2409
2410 if (const auto *ConstantTotalSize =
2411 llvm::dyn_cast<ConstantInteger32>(TotalSize)) {
2412 const uint32_t Value =
2413 Utils::applyAlignment(ConstantTotalSize->getValue(), Alignment);
2414 FixedAllocaSizeBytes += Value;
2415 // Constant size alloca.
2416 if (!UseFramePointer) {
2417 // If we don't need a Frame Pointer, this alloca has a known offset to the
2418 // stack pointer. We don't need adjust the stack pointer, nor assign any
2419 // value to Dest, as Dest is rematerializable.
2420 assert(Dest->isRematerializable());
2421 Context.insert<InstFakeDef>(Dest);
2422 return;
2423 }
2424
2425 if (Alignment > MIPS32_STACK_ALIGNMENT_BYTES) {
2426 CurrentAllocaOffset =
2427 Utils::applyAlignment(CurrentAllocaOffset, Alignment);
2428 }
2429 auto *T = I32Reg();
2430 _addiu(T, SP, CurrentAllocaOffset);
2431 _mov(Dest, T);
2432 CurrentAllocaOffset += Value;
2433 return;
2434
2435 } else {
2436 // Non-constant sizes need to be adjusted to the next highest multiple of
2437 // the required alignment at runtime.
2438 VariableAllocaUsed = true;
2439 VariableAllocaAlignBytes = AlignmentParam;
2440 Variable *AlignAmount;
2441 auto *TotalSizeR = legalizeToReg(TotalSize, Legal_Reg);
2442 auto *T1 = I32Reg();
2443 auto *T2 = I32Reg();
2444 auto *T3 = I32Reg();
2445 auto *T4 = I32Reg();
2446 auto *T5 = I32Reg();
2447 _addiu(T1, TotalSizeR, MIPS32_STACK_ALIGNMENT_BYTES - 1);
2448 _addiu(T2, getZero(), -MIPS32_STACK_ALIGNMENT_BYTES);
2449 _and(T3, T1, T2);
2450 _subu(T4, SP, T3);
2451 if (Instr->getAlignInBytes()) {
2452 AlignAmount =
2453 legalizeToReg(Ctx->getConstantInt32(-AlignmentParam), Legal_Reg);
2454 _and(T5, T4, AlignAmount);
2455 _mov(Dest, T5);
2456 } else {
2457 _mov(Dest, T4);
2458 }
2459 if (OptM1)
2460 _mov(SP, Dest);
2461 else
2462 Sandboxer(this).reset_sp(Dest);
2463 return;
2464 }
2465 }
2466
lowerInt64Arithmetic(const InstArithmetic * Instr,Variable * Dest,Operand * Src0,Operand * Src1)2467 void TargetMIPS32::lowerInt64Arithmetic(const InstArithmetic *Instr,
2468 Variable *Dest, Operand *Src0,
2469 Operand *Src1) {
2470 InstArithmetic::OpKind Op = Instr->getOp();
2471 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
2472 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
2473 Variable *Src0LoR = nullptr;
2474 Variable *Src1LoR = nullptr;
2475 Variable *Src0HiR = nullptr;
2476 Variable *Src1HiR = nullptr;
2477
2478 switch (Op) {
2479 case InstArithmetic::_num:
2480 llvm::report_fatal_error("Unknown arithmetic operator");
2481 return;
2482 case InstArithmetic::Add: {
2483 Src0LoR = legalizeToReg(loOperand(Src0));
2484 Src1LoR = legalizeToReg(loOperand(Src1));
2485 Src0HiR = legalizeToReg(hiOperand(Src0));
2486 Src1HiR = legalizeToReg(hiOperand(Src1));
2487 auto *T_Carry = I32Reg(), *T_Lo = I32Reg(), *T_Hi = I32Reg(),
2488 *T_Hi2 = I32Reg();
2489 _addu(T_Lo, Src0LoR, Src1LoR);
2490 _mov(DestLo, T_Lo);
2491 _sltu(T_Carry, T_Lo, Src0LoR);
2492 _addu(T_Hi, T_Carry, Src0HiR);
2493 _addu(T_Hi2, Src1HiR, T_Hi);
2494 _mov(DestHi, T_Hi2);
2495 return;
2496 }
2497 case InstArithmetic::And: {
2498 Src0LoR = legalizeToReg(loOperand(Src0));
2499 Src1LoR = legalizeToReg(loOperand(Src1));
2500 Src0HiR = legalizeToReg(hiOperand(Src0));
2501 Src1HiR = legalizeToReg(hiOperand(Src1));
2502 auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
2503 _and(T_Lo, Src0LoR, Src1LoR);
2504 _mov(DestLo, T_Lo);
2505 _and(T_Hi, Src0HiR, Src1HiR);
2506 _mov(DestHi, T_Hi);
2507 return;
2508 }
2509 case InstArithmetic::Sub: {
2510 Src0LoR = legalizeToReg(loOperand(Src0));
2511 Src1LoR = legalizeToReg(loOperand(Src1));
2512 Src0HiR = legalizeToReg(hiOperand(Src0));
2513 Src1HiR = legalizeToReg(hiOperand(Src1));
2514 auto *T_Borrow = I32Reg(), *T_Lo = I32Reg(), *T_Hi = I32Reg(),
2515 *T_Hi2 = I32Reg();
2516 _subu(T_Lo, Src0LoR, Src1LoR);
2517 _mov(DestLo, T_Lo);
2518 _sltu(T_Borrow, Src0LoR, Src1LoR);
2519 _addu(T_Hi, T_Borrow, Src1HiR);
2520 _subu(T_Hi2, Src0HiR, T_Hi);
2521 _mov(DestHi, T_Hi2);
2522 return;
2523 }
2524 case InstArithmetic::Or: {
2525 Src0LoR = legalizeToReg(loOperand(Src0));
2526 Src1LoR = legalizeToReg(loOperand(Src1));
2527 Src0HiR = legalizeToReg(hiOperand(Src0));
2528 Src1HiR = legalizeToReg(hiOperand(Src1));
2529 auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
2530 _or(T_Lo, Src0LoR, Src1LoR);
2531 _mov(DestLo, T_Lo);
2532 _or(T_Hi, Src0HiR, Src1HiR);
2533 _mov(DestHi, T_Hi);
2534 return;
2535 }
2536 case InstArithmetic::Xor: {
2537 Src0LoR = legalizeToReg(loOperand(Src0));
2538 Src1LoR = legalizeToReg(loOperand(Src1));
2539 Src0HiR = legalizeToReg(hiOperand(Src0));
2540 Src1HiR = legalizeToReg(hiOperand(Src1));
2541 auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
2542 _xor(T_Lo, Src0LoR, Src1LoR);
2543 _mov(DestLo, T_Lo);
2544 _xor(T_Hi, Src0HiR, Src1HiR);
2545 _mov(DestHi, T_Hi);
2546 return;
2547 }
2548 case InstArithmetic::Mul: {
2549 // TODO(rkotler): Make sure that mul has the side effect of clobbering
2550 // LO, HI. Check for any other LO, HI quirkiness in this section.
2551 Src0LoR = legalizeToReg(loOperand(Src0));
2552 Src1LoR = legalizeToReg(loOperand(Src1));
2553 Src0HiR = legalizeToReg(hiOperand(Src0));
2554 Src1HiR = legalizeToReg(hiOperand(Src1));
2555 auto *T_Lo = I32Reg(RegMIPS32::Reg_LO), *T_Hi = I32Reg(RegMIPS32::Reg_HI);
2556 auto *T1 = I32Reg(), *T2 = I32Reg();
2557 auto *TM1 = I32Reg(), *TM2 = I32Reg(), *TM3 = I32Reg(), *TM4 = I32Reg();
2558 _multu(T_Lo, Src0LoR, Src1LoR);
2559 Context.insert<InstFakeDef>(T_Hi, T_Lo);
2560 _mflo(T1, T_Lo);
2561 _mfhi(T2, T_Hi);
2562 _mov(DestLo, T1);
2563 _mul(TM1, Src0HiR, Src1LoR);
2564 _mul(TM2, Src0LoR, Src1HiR);
2565 _addu(TM3, TM1, T2);
2566 _addu(TM4, TM3, TM2);
2567 _mov(DestHi, TM4);
2568 return;
2569 }
2570 case InstArithmetic::Shl: {
2571 auto *T_Lo = I32Reg();
2572 auto *T_Hi = I32Reg();
2573 auto *T1_Lo = I32Reg();
2574 auto *T1_Hi = I32Reg();
2575 auto *T1 = I32Reg();
2576 auto *T2 = I32Reg();
2577 auto *T3 = I32Reg();
2578 auto *T4 = I32Reg();
2579 auto *T5 = I32Reg();
2580
2581 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2582 Src0LoR = legalizeToReg(loOperand(Src0));
2583 int64_t ShiftAmount = Const->getValue();
2584 if (ShiftAmount == 1) {
2585 Src0HiR = legalizeToReg(hiOperand(Src0));
2586 _addu(T_Lo, Src0LoR, Src0LoR);
2587 _sltu(T1, T_Lo, Src0LoR);
2588 _addu(T2, T1, Src0HiR);
2589 _addu(T_Hi, Src0HiR, T2);
2590 } else if (ShiftAmount < INT32_BITS) {
2591 Src0HiR = legalizeToReg(hiOperand(Src0));
2592 _srl(T1, Src0LoR, INT32_BITS - ShiftAmount);
2593 _sll(T2, Src0HiR, ShiftAmount);
2594 _or(T_Hi, T1, T2);
2595 _sll(T_Lo, Src0LoR, ShiftAmount);
2596 } else if (ShiftAmount == INT32_BITS) {
2597 _addiu(T_Lo, getZero(), 0);
2598 _mov(T_Hi, Src0LoR);
2599 } else if (ShiftAmount > INT32_BITS && ShiftAmount < 64) {
2600 _sll(T_Hi, Src0LoR, ShiftAmount - INT32_BITS);
2601 _addiu(T_Lo, getZero(), 0);
2602 }
2603 _mov(DestLo, T_Lo);
2604 _mov(DestHi, T_Hi);
2605 return;
2606 }
2607
2608 Src0LoR = legalizeToReg(loOperand(Src0));
2609 Src1LoR = legalizeToReg(loOperand(Src1));
2610 Src0HiR = legalizeToReg(hiOperand(Src0));
2611
2612 _sllv(T1, Src0HiR, Src1LoR);
2613 _not(T2, Src1LoR);
2614 _srl(T3, Src0LoR, 1);
2615 _srlv(T4, T3, T2);
2616 _or(T_Hi, T1, T4);
2617 _sllv(T_Lo, Src0LoR, Src1LoR);
2618
2619 _mov(T1_Hi, T_Hi);
2620 _mov(T1_Lo, T_Lo);
2621 _andi(T5, Src1LoR, INT32_BITS);
2622 _movn(T1_Hi, T_Lo, T5);
2623 _movn(T1_Lo, getZero(), T5);
2624 _mov(DestHi, T1_Hi);
2625 _mov(DestLo, T1_Lo);
2626 return;
2627 }
2628 case InstArithmetic::Lshr: {
2629
2630 auto *T_Lo = I32Reg();
2631 auto *T_Hi = I32Reg();
2632 auto *T1_Lo = I32Reg();
2633 auto *T1_Hi = I32Reg();
2634 auto *T1 = I32Reg();
2635 auto *T2 = I32Reg();
2636 auto *T3 = I32Reg();
2637 auto *T4 = I32Reg();
2638 auto *T5 = I32Reg();
2639
2640 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2641 Src0HiR = legalizeToReg(hiOperand(Src0));
2642 int64_t ShiftAmount = Const->getValue();
2643 if (ShiftAmount < INT32_BITS) {
2644 Src0LoR = legalizeToReg(loOperand(Src0));
2645 _sll(T1, Src0HiR, INT32_BITS - ShiftAmount);
2646 _srl(T2, Src0LoR, ShiftAmount);
2647 _or(T_Lo, T1, T2);
2648 _srl(T_Hi, Src0HiR, ShiftAmount);
2649 } else if (ShiftAmount == INT32_BITS) {
2650 _mov(T_Lo, Src0HiR);
2651 _addiu(T_Hi, getZero(), 0);
2652 } else if (ShiftAmount > INT32_BITS && ShiftAmount < 64) {
2653 _srl(T_Lo, Src0HiR, ShiftAmount - INT32_BITS);
2654 _addiu(T_Hi, getZero(), 0);
2655 }
2656 _mov(DestLo, T_Lo);
2657 _mov(DestHi, T_Hi);
2658 return;
2659 }
2660
2661 Src0LoR = legalizeToReg(loOperand(Src0));
2662 Src1LoR = legalizeToReg(loOperand(Src1));
2663 Src0HiR = legalizeToReg(hiOperand(Src0));
2664
2665 _srlv(T1, Src0LoR, Src1LoR);
2666 _not(T2, Src1LoR);
2667 _sll(T3, Src0HiR, 1);
2668 _sllv(T4, T3, T2);
2669 _or(T_Lo, T1, T4);
2670 _srlv(T_Hi, Src0HiR, Src1LoR);
2671
2672 _mov(T1_Hi, T_Hi);
2673 _mov(T1_Lo, T_Lo);
2674 _andi(T5, Src1LoR, INT32_BITS);
2675 _movn(T1_Lo, T_Hi, T5);
2676 _movn(T1_Hi, getZero(), T5);
2677 _mov(DestHi, T1_Hi);
2678 _mov(DestLo, T1_Lo);
2679 return;
2680 }
2681 case InstArithmetic::Ashr: {
2682
2683 auto *T_Lo = I32Reg();
2684 auto *T_Hi = I32Reg();
2685 auto *T1_Lo = I32Reg();
2686 auto *T1_Hi = I32Reg();
2687 auto *T1 = I32Reg();
2688 auto *T2 = I32Reg();
2689 auto *T3 = I32Reg();
2690 auto *T4 = I32Reg();
2691 auto *T5 = I32Reg();
2692 auto *T6 = I32Reg();
2693
2694 if (auto *Const = llvm::dyn_cast<ConstantInteger64>(Src1)) {
2695 Src0HiR = legalizeToReg(hiOperand(Src0));
2696 int64_t ShiftAmount = Const->getValue();
2697 if (ShiftAmount < INT32_BITS) {
2698 Src0LoR = legalizeToReg(loOperand(Src0));
2699 _sll(T1, Src0HiR, INT32_BITS - ShiftAmount);
2700 _srl(T2, Src0LoR, ShiftAmount);
2701 _or(T_Lo, T1, T2);
2702 _sra(T_Hi, Src0HiR, ShiftAmount);
2703 } else if (ShiftAmount == INT32_BITS) {
2704 _sra(T_Hi, Src0HiR, INT32_BITS - 1);
2705 _mov(T_Lo, Src0HiR);
2706 } else if (ShiftAmount > INT32_BITS && ShiftAmount < 64) {
2707 _sra(T_Lo, Src0HiR, ShiftAmount - INT32_BITS);
2708 _sra(T_Hi, Src0HiR, INT32_BITS - 1);
2709 }
2710 _mov(DestLo, T_Lo);
2711 _mov(DestHi, T_Hi);
2712 return;
2713 }
2714
2715 Src0LoR = legalizeToReg(loOperand(Src0));
2716 Src1LoR = legalizeToReg(loOperand(Src1));
2717 Src0HiR = legalizeToReg(hiOperand(Src0));
2718
2719 _srlv(T1, Src0LoR, Src1LoR);
2720 _not(T2, Src1LoR);
2721 _sll(T3, Src0HiR, 1);
2722 _sllv(T4, T3, T2);
2723 _or(T_Lo, T1, T4);
2724 _srav(T_Hi, Src0HiR, Src1LoR);
2725
2726 _mov(T1_Hi, T_Hi);
2727 _mov(T1_Lo, T_Lo);
2728 _andi(T5, Src1LoR, INT32_BITS);
2729 _movn(T1_Lo, T_Hi, T5);
2730 _sra(T6, Src0HiR, INT32_BITS - 1);
2731 _movn(T1_Hi, T6, T5);
2732 _mov(DestHi, T1_Hi);
2733 _mov(DestLo, T1_Lo);
2734 return;
2735 }
2736 case InstArithmetic::Fadd:
2737 case InstArithmetic::Fsub:
2738 case InstArithmetic::Fmul:
2739 case InstArithmetic::Fdiv:
2740 case InstArithmetic::Frem:
2741 llvm::report_fatal_error("FP instruction with i64 type");
2742 return;
2743 case InstArithmetic::Udiv:
2744 case InstArithmetic::Sdiv:
2745 case InstArithmetic::Urem:
2746 case InstArithmetic::Srem:
2747 llvm::report_fatal_error("64-bit div and rem should have been prelowered");
2748 return;
2749 }
2750 }
2751
lowerArithmetic(const InstArithmetic * Instr)2752 void TargetMIPS32::lowerArithmetic(const InstArithmetic *Instr) {
2753 Variable *Dest = Instr->getDest();
2754
2755 if (Dest->isRematerializable()) {
2756 Context.insert<InstFakeDef>(Dest);
2757 return;
2758 }
2759
2760 // We need to signal all the UnimplementedLoweringError errors before any
2761 // legalization into new variables, otherwise Om1 register allocation may fail
2762 // when it sees variables that are defined but not used.
2763 Type DestTy = Dest->getType();
2764 Operand *Src0 = legalizeUndef(Instr->getSrc(0));
2765 Operand *Src1 = legalizeUndef(Instr->getSrc(1));
2766 if (DestTy == IceType_i64) {
2767 lowerInt64Arithmetic(Instr, Instr->getDest(), Src0, Src1);
2768 return;
2769 }
2770 if (isVectorType(Dest->getType())) {
2771 llvm::report_fatal_error("Arithmetic: Destination type is vector");
2772 return;
2773 }
2774
2775 Variable *T = makeReg(Dest->getType());
2776 Variable *Src0R = legalizeToReg(Src0);
2777 Variable *Src1R = nullptr;
2778 uint32_t Value = 0;
2779 bool IsSrc1Imm16 = false;
2780
2781 switch (Instr->getOp()) {
2782 case InstArithmetic::Add:
2783 case InstArithmetic::Sub: {
2784 auto *Const32 = llvm::dyn_cast<ConstantInteger32>(Src1);
2785 if (Const32 != nullptr && isInt<16>(int32_t(Const32->getValue()))) {
2786 IsSrc1Imm16 = true;
2787 Value = Const32->getValue();
2788 } else {
2789 Src1R = legalizeToReg(Src1);
2790 }
2791 break;
2792 }
2793 case InstArithmetic::And:
2794 case InstArithmetic::Or:
2795 case InstArithmetic::Xor:
2796 case InstArithmetic::Shl:
2797 case InstArithmetic::Lshr:
2798 case InstArithmetic::Ashr: {
2799 auto *Const32 = llvm::dyn_cast<ConstantInteger32>(Src1);
2800 if (Const32 != nullptr && llvm::isUInt<16>(uint32_t(Const32->getValue()))) {
2801 IsSrc1Imm16 = true;
2802 Value = Const32->getValue();
2803 } else {
2804 Src1R = legalizeToReg(Src1);
2805 }
2806 break;
2807 }
2808 default:
2809 Src1R = legalizeToReg(Src1);
2810 break;
2811 }
2812 constexpr uint32_t DivideByZeroTrapCode = 7;
2813
2814 switch (Instr->getOp()) {
2815 case InstArithmetic::_num:
2816 break;
2817 case InstArithmetic::Add: {
2818 auto *T0R = Src0R;
2819 auto *T1R = Src1R;
2820 if (Dest->getType() != IceType_i32) {
2821 T0R = makeReg(IceType_i32);
2822 lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2823 if (!IsSrc1Imm16) {
2824 T1R = makeReg(IceType_i32);
2825 lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2826 }
2827 }
2828 if (IsSrc1Imm16) {
2829 _addiu(T, T0R, Value);
2830 } else {
2831 _addu(T, T0R, T1R);
2832 }
2833 _mov(Dest, T);
2834 return;
2835 }
2836 case InstArithmetic::And:
2837 if (IsSrc1Imm16) {
2838 _andi(T, Src0R, Value);
2839 } else {
2840 _and(T, Src0R, Src1R);
2841 }
2842 _mov(Dest, T);
2843 return;
2844 case InstArithmetic::Or:
2845 if (IsSrc1Imm16) {
2846 _ori(T, Src0R, Value);
2847 } else {
2848 _or(T, Src0R, Src1R);
2849 }
2850 _mov(Dest, T);
2851 return;
2852 case InstArithmetic::Xor:
2853 if (IsSrc1Imm16) {
2854 _xori(T, Src0R, Value);
2855 } else {
2856 _xor(T, Src0R, Src1R);
2857 }
2858 _mov(Dest, T);
2859 return;
2860 case InstArithmetic::Sub: {
2861 auto *T0R = Src0R;
2862 auto *T1R = Src1R;
2863 if (Dest->getType() != IceType_i32) {
2864 T0R = makeReg(IceType_i32);
2865 lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2866 if (!IsSrc1Imm16) {
2867 T1R = makeReg(IceType_i32);
2868 lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2869 }
2870 }
2871 if (IsSrc1Imm16) {
2872 _addiu(T, T0R, -Value);
2873 } else {
2874 _subu(T, T0R, T1R);
2875 }
2876 _mov(Dest, T);
2877 return;
2878 }
2879 case InstArithmetic::Mul: {
2880 _mul(T, Src0R, Src1R);
2881 _mov(Dest, T);
2882 return;
2883 }
2884 case InstArithmetic::Shl: {
2885 if (IsSrc1Imm16) {
2886 _sll(T, Src0R, Value);
2887 } else {
2888 _sllv(T, Src0R, Src1R);
2889 }
2890 _mov(Dest, T);
2891 return;
2892 }
2893 case InstArithmetic::Lshr: {
2894 auto *T0R = Src0R;
2895 auto *T1R = Src1R;
2896 if (Dest->getType() != IceType_i32) {
2897 T0R = makeReg(IceType_i32);
2898 lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
2899 if (!IsSrc1Imm16) {
2900 T1R = makeReg(IceType_i32);
2901 lowerCast(InstCast::create(Func, InstCast::Zext, T1R, Src1R));
2902 }
2903 }
2904 if (IsSrc1Imm16) {
2905 _srl(T, T0R, Value);
2906 } else {
2907 _srlv(T, T0R, T1R);
2908 }
2909 _mov(Dest, T);
2910 return;
2911 }
2912 case InstArithmetic::Ashr: {
2913 auto *T0R = Src0R;
2914 auto *T1R = Src1R;
2915 if (Dest->getType() != IceType_i32) {
2916 T0R = makeReg(IceType_i32);
2917 lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2918 if (!IsSrc1Imm16) {
2919 T1R = makeReg(IceType_i32);
2920 lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2921 }
2922 }
2923 if (IsSrc1Imm16) {
2924 _sra(T, T0R, Value);
2925 } else {
2926 _srav(T, T0R, T1R);
2927 }
2928 _mov(Dest, T);
2929 return;
2930 }
2931 case InstArithmetic::Udiv: {
2932 auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2933 auto *T0R = Src0R;
2934 auto *T1R = Src1R;
2935 if (Dest->getType() != IceType_i32) {
2936 T0R = makeReg(IceType_i32);
2937 lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
2938 T1R = makeReg(IceType_i32);
2939 lowerCast(InstCast::create(Func, InstCast::Zext, T1R, Src1R));
2940 }
2941 _divu(T_Zero, T0R, T1R);
2942 _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2943 _mflo(T, T_Zero);
2944 _mov(Dest, T);
2945 return;
2946 }
2947 case InstArithmetic::Sdiv: {
2948 auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2949 auto *T0R = Src0R;
2950 auto *T1R = Src1R;
2951 if (Dest->getType() != IceType_i32) {
2952 T0R = makeReg(IceType_i32);
2953 lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2954 T1R = makeReg(IceType_i32);
2955 lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2956 }
2957 _div(T_Zero, T0R, T1R);
2958 _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2959 _mflo(T, T_Zero);
2960 _mov(Dest, T);
2961 return;
2962 }
2963 case InstArithmetic::Urem: {
2964 auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2965 auto *T0R = Src0R;
2966 auto *T1R = Src1R;
2967 if (Dest->getType() != IceType_i32) {
2968 T0R = makeReg(IceType_i32);
2969 lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
2970 T1R = makeReg(IceType_i32);
2971 lowerCast(InstCast::create(Func, InstCast::Zext, T1R, Src1R));
2972 }
2973 _divu(T_Zero, T0R, T1R);
2974 _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2975 _mfhi(T, T_Zero);
2976 _mov(Dest, T);
2977 return;
2978 }
2979 case InstArithmetic::Srem: {
2980 auto *T_Zero = I32Reg(RegMIPS32::Reg_ZERO);
2981 auto *T0R = Src0R;
2982 auto *T1R = Src1R;
2983 if (Dest->getType() != IceType_i32) {
2984 T0R = makeReg(IceType_i32);
2985 lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
2986 T1R = makeReg(IceType_i32);
2987 lowerCast(InstCast::create(Func, InstCast::Sext, T1R, Src1R));
2988 }
2989 _div(T_Zero, T0R, T1R);
2990 _teq(T1R, T_Zero, DivideByZeroTrapCode); // Trap if divide-by-zero
2991 _mfhi(T, T_Zero);
2992 _mov(Dest, T);
2993 return;
2994 }
2995 case InstArithmetic::Fadd: {
2996 if (DestTy == IceType_f32) {
2997 _add_s(T, Src0R, Src1R);
2998 _mov(Dest, T);
2999 return;
3000 }
3001 if (DestTy == IceType_f64) {
3002 _add_d(T, Src0R, Src1R);
3003 _mov(Dest, T);
3004 return;
3005 }
3006 break;
3007 }
3008 case InstArithmetic::Fsub:
3009 if (DestTy == IceType_f32) {
3010 _sub_s(T, Src0R, Src1R);
3011 _mov(Dest, T);
3012 return;
3013 }
3014 if (DestTy == IceType_f64) {
3015 _sub_d(T, Src0R, Src1R);
3016 _mov(Dest, T);
3017 return;
3018 }
3019 break;
3020 case InstArithmetic::Fmul:
3021 if (DestTy == IceType_f32) {
3022 _mul_s(T, Src0R, Src1R);
3023 _mov(Dest, T);
3024 return;
3025 }
3026 if (DestTy == IceType_f64) {
3027 _mul_d(T, Src0R, Src1R);
3028 _mov(Dest, T);
3029 return;
3030 }
3031 break;
3032 case InstArithmetic::Fdiv:
3033 if (DestTy == IceType_f32) {
3034 _div_s(T, Src0R, Src1R);
3035 _mov(Dest, T);
3036 return;
3037 }
3038 if (DestTy == IceType_f64) {
3039 _div_d(T, Src0R, Src1R);
3040 _mov(Dest, T);
3041 return;
3042 }
3043 break;
3044 case InstArithmetic::Frem:
3045 llvm::report_fatal_error("frem should have been prelowered.");
3046 break;
3047 }
3048 llvm::report_fatal_error("Unknown arithmetic operator");
3049 }
3050
lowerAssign(const InstAssign * Instr)3051 void TargetMIPS32::lowerAssign(const InstAssign *Instr) {
3052 Variable *Dest = Instr->getDest();
3053
3054 if (Dest->isRematerializable()) {
3055 Context.insert<InstFakeDef>(Dest);
3056 return;
3057 }
3058
3059 // Source type may not be same as destination
3060 if (isVectorType(Dest->getType())) {
3061 Operand *Src0 = legalizeUndef(Instr->getSrc(0));
3062 auto *DstVec = llvm::dyn_cast<VariableVecOn32>(Dest);
3063 for (SizeT i = 0; i < DstVec->ContainersPerVector; ++i) {
3064 auto *DCont = DstVec->getContainers()[i];
3065 auto *SCont =
3066 legalize(getOperandAtIndex(Src0, IceType_i32, i), Legal_Reg);
3067 auto *TReg = makeReg(IceType_i32);
3068 _mov(TReg, SCont);
3069 _mov(DCont, TReg);
3070 }
3071 return;
3072 }
3073 Operand *Src0 = Instr->getSrc(0);
3074 assert(Dest->getType() == Src0->getType());
3075 if (Dest->getType() == IceType_i64) {
3076 Src0 = legalizeUndef(Src0);
3077 Operand *Src0Lo = legalize(loOperand(Src0), Legal_Reg);
3078 Operand *Src0Hi = legalize(hiOperand(Src0), Legal_Reg);
3079 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3080 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3081 auto *T_Lo = I32Reg(), *T_Hi = I32Reg();
3082 _mov(T_Lo, Src0Lo);
3083 _mov(DestLo, T_Lo);
3084 _mov(T_Hi, Src0Hi);
3085 _mov(DestHi, T_Hi);
3086 return;
3087 }
3088 Operand *SrcR;
3089 if (Dest->hasReg()) {
3090 // If Dest already has a physical register, then legalize the Src operand
3091 // into a Variable with the same register assignment. This especially
3092 // helps allow the use of Flex operands.
3093 SrcR = legalize(Src0, Legal_Reg, Dest->getRegNum());
3094 } else {
3095 // Dest could be a stack operand. Since we could potentially need
3096 // to do a Store (and store can only have Register operands),
3097 // legalize this to a register.
3098 SrcR = legalize(Src0, Legal_Reg);
3099 }
3100 _mov(Dest, SrcR);
3101 }
3102
lowerBr(const InstBr * Instr)3103 void TargetMIPS32::lowerBr(const InstBr *Instr) {
3104 if (Instr->isUnconditional()) {
3105 _br(Instr->getTargetUnconditional());
3106 return;
3107 }
3108 CfgNode *TargetTrue = Instr->getTargetTrue();
3109 CfgNode *TargetFalse = Instr->getTargetFalse();
3110 Operand *Boolean = Instr->getCondition();
3111 const Inst *Producer = Computations.getProducerOf(Boolean);
3112 if (Producer == nullptr) {
3113 // Since we don't know the producer of this boolean we will assume its
3114 // producer will keep it in positive logic and just emit beqz with this
3115 // Boolean as an operand.
3116 auto *BooleanR = legalizeToReg(Boolean);
3117 _br(TargetTrue, TargetFalse, BooleanR, CondMIPS32::Cond::EQZ);
3118 return;
3119 }
3120 if (Producer->getKind() == Inst::Icmp) {
3121 const InstIcmp *CompareInst = llvm::cast<InstIcmp>(Producer);
3122 Operand *Src0 = CompareInst->getSrc(0);
3123 Operand *Src1 = CompareInst->getSrc(1);
3124 const Type Src0Ty = Src0->getType();
3125 assert(Src0Ty == Src1->getType());
3126
3127 Variable *Src0R = nullptr;
3128 Variable *Src1R = nullptr;
3129 Variable *Src0HiR = nullptr;
3130 Variable *Src1HiR = nullptr;
3131 if (Src0Ty == IceType_i64) {
3132 Src0R = legalizeToReg(loOperand(Src0));
3133 Src1R = legalizeToReg(loOperand(Src1));
3134 Src0HiR = legalizeToReg(hiOperand(Src0));
3135 Src1HiR = legalizeToReg(hiOperand(Src1));
3136 } else {
3137 auto *Src0RT = legalizeToReg(Src0);
3138 auto *Src1RT = legalizeToReg(Src1);
3139 // Sign/Zero extend the source operands
3140 if (Src0Ty != IceType_i32) {
3141 InstCast::OpKind CastKind;
3142 switch (CompareInst->getCondition()) {
3143 case InstIcmp::Eq:
3144 case InstIcmp::Ne:
3145 case InstIcmp::Sgt:
3146 case InstIcmp::Sge:
3147 case InstIcmp::Slt:
3148 case InstIcmp::Sle:
3149 CastKind = InstCast::Sext;
3150 break;
3151 default:
3152 CastKind = InstCast::Zext;
3153 break;
3154 }
3155 Src0R = makeReg(IceType_i32);
3156 Src1R = makeReg(IceType_i32);
3157 lowerCast(InstCast::create(Func, CastKind, Src0R, Src0RT));
3158 lowerCast(InstCast::create(Func, CastKind, Src1R, Src1RT));
3159 } else {
3160 Src0R = Src0RT;
3161 Src1R = Src1RT;
3162 }
3163 }
3164 auto *DestT = makeReg(IceType_i32);
3165
3166 switch (CompareInst->getCondition()) {
3167 default:
3168 llvm_unreachable("unexpected condition");
3169 return;
3170 case InstIcmp::Eq: {
3171 if (Src0Ty == IceType_i64) {
3172 auto *T1 = I32Reg();
3173 auto *T2 = I32Reg();
3174 auto *T3 = I32Reg();
3175 _xor(T1, Src0HiR, Src1HiR);
3176 _xor(T2, Src0R, Src1R);
3177 _or(T3, T1, T2);
3178 _mov(DestT, T3);
3179 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3180 } else {
3181 _br(TargetTrue, TargetFalse, Src0R, Src1R, CondMIPS32::Cond::NE);
3182 }
3183 return;
3184 }
3185 case InstIcmp::Ne: {
3186 if (Src0Ty == IceType_i64) {
3187 auto *T1 = I32Reg();
3188 auto *T2 = I32Reg();
3189 auto *T3 = I32Reg();
3190 _xor(T1, Src0HiR, Src1HiR);
3191 _xor(T2, Src0R, Src1R);
3192 _or(T3, T1, T2);
3193 _mov(DestT, T3);
3194 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3195 } else {
3196 _br(TargetTrue, TargetFalse, Src0R, Src1R, CondMIPS32::Cond::EQ);
3197 }
3198 return;
3199 }
3200 case InstIcmp::Ugt: {
3201 if (Src0Ty == IceType_i64) {
3202 auto *T1 = I32Reg();
3203 auto *T2 = I32Reg();
3204 auto *T3 = I32Reg();
3205 auto *T4 = I32Reg();
3206 auto *T5 = I32Reg();
3207 _xor(T1, Src0HiR, Src1HiR);
3208 _sltu(T2, Src1HiR, Src0HiR);
3209 _xori(T3, T2, 1);
3210 _sltu(T4, Src1R, Src0R);
3211 _xori(T5, T4, 1);
3212 _movz(T3, T5, T1);
3213 _mov(DestT, T3);
3214 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3215 } else {
3216 _sltu(DestT, Src1R, Src0R);
3217 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3218 }
3219 return;
3220 }
3221 case InstIcmp::Uge: {
3222 if (Src0Ty == IceType_i64) {
3223 auto *T1 = I32Reg();
3224 auto *T2 = I32Reg();
3225 auto *T3 = I32Reg();
3226 _xor(T1, Src0HiR, Src1HiR);
3227 _sltu(T2, Src0HiR, Src1HiR);
3228 _sltu(T3, Src0R, Src1R);
3229 _movz(T2, T3, T1);
3230 _mov(DestT, T2);
3231 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3232 } else {
3233 _sltu(DestT, Src0R, Src1R);
3234 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3235 }
3236 return;
3237 }
3238 case InstIcmp::Ult: {
3239 if (Src0Ty == IceType_i64) {
3240 auto *T1 = I32Reg();
3241 auto *T2 = I32Reg();
3242 auto *T3 = I32Reg();
3243 auto *T4 = I32Reg();
3244 auto *T5 = I32Reg();
3245 _xor(T1, Src0HiR, Src1HiR);
3246 _sltu(T2, Src0HiR, Src1HiR);
3247 _xori(T3, T2, 1);
3248 _sltu(T4, Src0R, Src1R);
3249 _xori(T5, T4, 1);
3250 _movz(T3, T5, T1);
3251 _mov(DestT, T3);
3252 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3253 } else {
3254 _sltu(DestT, Src0R, Src1R);
3255 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3256 }
3257 return;
3258 }
3259 case InstIcmp::Ule: {
3260 if (Src0Ty == IceType_i64) {
3261 auto *T1 = I32Reg();
3262 auto *T2 = I32Reg();
3263 auto *T3 = I32Reg();
3264 _xor(T1, Src0HiR, Src1HiR);
3265 _sltu(T2, Src1HiR, Src0HiR);
3266 _sltu(T3, Src1R, Src0R);
3267 _movz(T2, T3, T1);
3268 _mov(DestT, T2);
3269 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3270 } else {
3271 _sltu(DestT, Src1R, Src0R);
3272 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3273 }
3274 return;
3275 }
3276 case InstIcmp::Sgt: {
3277 if (Src0Ty == IceType_i64) {
3278 auto *T1 = I32Reg();
3279 auto *T2 = I32Reg();
3280 auto *T3 = I32Reg();
3281 auto *T4 = I32Reg();
3282 auto *T5 = I32Reg();
3283 _xor(T1, Src0HiR, Src1HiR);
3284 _slt(T2, Src1HiR, Src0HiR);
3285 _xori(T3, T2, 1);
3286 _sltu(T4, Src1R, Src0R);
3287 _xori(T5, T4, 1);
3288 _movz(T3, T5, T1);
3289 _mov(DestT, T3);
3290 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3291 } else {
3292 _slt(DestT, Src1R, Src0R);
3293 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3294 }
3295 return;
3296 }
3297 case InstIcmp::Sge: {
3298 if (Src0Ty == IceType_i64) {
3299 auto *T1 = I32Reg();
3300 auto *T2 = I32Reg();
3301 auto *T3 = I32Reg();
3302 _xor(T1, Src0HiR, Src1HiR);
3303 _slt(T2, Src0HiR, Src1HiR);
3304 _sltu(T3, Src0R, Src1R);
3305 _movz(T2, T3, T1);
3306 _mov(DestT, T2);
3307 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3308 } else {
3309 _slt(DestT, Src0R, Src1R);
3310 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3311 }
3312 return;
3313 }
3314 case InstIcmp::Slt: {
3315 if (Src0Ty == IceType_i64) {
3316 auto *T1 = I32Reg();
3317 auto *T2 = I32Reg();
3318 auto *T3 = I32Reg();
3319 auto *T4 = I32Reg();
3320 auto *T5 = I32Reg();
3321 _xor(T1, Src0HiR, Src1HiR);
3322 _slt(T2, Src0HiR, Src1HiR);
3323 _xori(T3, T2, 1);
3324 _sltu(T4, Src0R, Src1R);
3325 _xori(T5, T4, 1);
3326 _movz(T3, T5, T1);
3327 _mov(DestT, T3);
3328 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3329 } else {
3330 _slt(DestT, Src0R, Src1R);
3331 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::EQZ);
3332 }
3333 return;
3334 }
3335 case InstIcmp::Sle: {
3336 if (Src0Ty == IceType_i64) {
3337 auto *T1 = I32Reg();
3338 auto *T2 = I32Reg();
3339 auto *T3 = I32Reg();
3340 _xor(T1, Src0HiR, Src1HiR);
3341 _slt(T2, Src1HiR, Src0HiR);
3342 _sltu(T3, Src1R, Src0R);
3343 _movz(T2, T3, T1);
3344 _mov(DestT, T2);
3345 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3346 } else {
3347 _slt(DestT, Src1R, Src0R);
3348 _br(TargetTrue, TargetFalse, DestT, CondMIPS32::Cond::NEZ);
3349 }
3350 return;
3351 }
3352 }
3353 }
3354 }
3355
lowerCall(const InstCall * Instr)3356 void TargetMIPS32::lowerCall(const InstCall *Instr) {
3357 CfgVector<Variable *> RegArgs;
3358 NeedsStackAlignment = true;
3359
3360 // Assign arguments to registers and stack. Also reserve stack.
3361 TargetMIPS32::CallingConv CC;
3362
3363 // Pair of Arg Operand -> GPR number assignments.
3364 llvm::SmallVector<std::pair<Operand *, RegNumT>, MIPS32_MAX_GPR_ARG> GPRArgs;
3365 llvm::SmallVector<std::pair<Operand *, RegNumT>, MIPS32_MAX_FP_ARG> FPArgs;
3366 // Pair of Arg Operand -> stack offset.
3367 llvm::SmallVector<std::pair<Operand *, int32_t>, 8> StackArgs;
3368 size_t ParameterAreaSizeBytes = 16;
3369
3370 // Classify each argument operand according to the location where the
3371 // argument is passed.
3372
3373 // v4f32 is returned through stack. $4 is setup by the caller and passed as
3374 // first argument implicitly. Callee then copies the return vector at $4.
3375 SizeT ArgNum = 0;
3376 Variable *Dest = Instr->getDest();
3377 Variable *RetVecFloat = nullptr;
3378 if (Dest && isVectorFloatingType(Dest->getType())) {
3379 ArgNum = 1;
3380 CC.discardReg(RegMIPS32::Reg_A0);
3381 RetVecFloat = Func->makeVariable(IceType_i32);
3382 auto *ByteCount = ConstantInteger32::create(Ctx, IceType_i32, 16);
3383 constexpr SizeT Alignment = 4;
3384 lowerAlloca(InstAlloca::create(Func, RetVecFloat, ByteCount, Alignment));
3385 RegArgs.emplace_back(
3386 legalizeToReg(RetVecFloat, RegNumT::fixme(RegMIPS32::Reg_A0)));
3387 }
3388
3389 for (SizeT i = 0, NumArgs = Instr->getNumArgs(); i < NumArgs; ++i) {
3390 Operand *Arg = legalizeUndef(Instr->getArg(i));
3391 const Type Ty = Arg->getType();
3392 bool InReg = false;
3393 RegNumT Reg;
3394
3395 InReg = CC.argInReg(Ty, i, &Reg);
3396
3397 if (!InReg) {
3398 if (isVectorType(Ty)) {
3399 auto *ArgVec = llvm::cast<VariableVecOn32>(Arg);
3400 ParameterAreaSizeBytes =
3401 applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i64);
3402 for (Variable *Elem : ArgVec->getContainers()) {
3403 StackArgs.push_back(std::make_pair(Elem, ParameterAreaSizeBytes));
3404 ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32);
3405 }
3406 } else {
3407 ParameterAreaSizeBytes =
3408 applyStackAlignmentTy(ParameterAreaSizeBytes, Ty);
3409 StackArgs.push_back(std::make_pair(Arg, ParameterAreaSizeBytes));
3410 ParameterAreaSizeBytes += typeWidthInBytesOnStack(Ty);
3411 }
3412 ++ArgNum;
3413 continue;
3414 }
3415
3416 if (isVectorType(Ty)) {
3417 auto *ArgVec = llvm::cast<VariableVecOn32>(Arg);
3418 Operand *Elem0 = ArgVec->getContainers()[0];
3419 Operand *Elem1 = ArgVec->getContainers()[1];
3420 GPRArgs.push_back(
3421 std::make_pair(Elem0, RegNumT::fixme((unsigned)Reg + 0)));
3422 GPRArgs.push_back(
3423 std::make_pair(Elem1, RegNumT::fixme((unsigned)Reg + 1)));
3424 Operand *Elem2 = ArgVec->getContainers()[2];
3425 Operand *Elem3 = ArgVec->getContainers()[3];
3426 // First argument is passed in $4:$5:$6:$7
3427 // Second and rest arguments are passed in $6:$7:stack:stack
3428 if (ArgNum == 0) {
3429 GPRArgs.push_back(
3430 std::make_pair(Elem2, RegNumT::fixme((unsigned)Reg + 2)));
3431 GPRArgs.push_back(
3432 std::make_pair(Elem3, RegNumT::fixme((unsigned)Reg + 3)));
3433 } else {
3434 ParameterAreaSizeBytes =
3435 applyStackAlignmentTy(ParameterAreaSizeBytes, IceType_i64);
3436 StackArgs.push_back(std::make_pair(Elem2, ParameterAreaSizeBytes));
3437 ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32);
3438 StackArgs.push_back(std::make_pair(Elem3, ParameterAreaSizeBytes));
3439 ParameterAreaSizeBytes += typeWidthInBytesOnStack(IceType_i32);
3440 }
3441 } else if (Ty == IceType_i64) {
3442 Operand *Lo = loOperand(Arg);
3443 Operand *Hi = hiOperand(Arg);
3444 GPRArgs.push_back(
3445 std::make_pair(Lo, RegMIPS32::get64PairFirstRegNum(Reg)));
3446 GPRArgs.push_back(
3447 std::make_pair(Hi, RegMIPS32::get64PairSecondRegNum(Reg)));
3448 } else if (isScalarIntegerType(Ty)) {
3449 GPRArgs.push_back(std::make_pair(Arg, Reg));
3450 } else {
3451 FPArgs.push_back(std::make_pair(Arg, Reg));
3452 }
3453 ++ArgNum;
3454 }
3455
3456 // Adjust the parameter area so that the stack is aligned. It is assumed that
3457 // the stack is already aligned at the start of the calling sequence.
3458 ParameterAreaSizeBytes = applyStackAlignment(ParameterAreaSizeBytes);
3459
3460 // Copy arguments that are passed on the stack to the appropriate stack
3461 // locations.
3462 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
3463 for (auto &StackArg : StackArgs) {
3464 ConstantInteger32 *Loc =
3465 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(StackArg.second));
3466 Type Ty = StackArg.first->getType();
3467 OperandMIPS32Mem *Addr;
3468 constexpr bool SignExt = false;
3469 if (OperandMIPS32Mem::canHoldOffset(Ty, SignExt, StackArg.second)) {
3470 Addr = OperandMIPS32Mem::create(Func, Ty, SP, Loc);
3471 } else {
3472 Variable *NewBase = Func->makeVariable(SP->getType());
3473 lowerArithmetic(
3474 InstArithmetic::create(Func, InstArithmetic::Add, NewBase, SP, Loc));
3475 Addr = formMemoryOperand(NewBase, Ty);
3476 }
3477 lowerStore(InstStore::create(Func, StackArg.first, Addr));
3478 }
3479
3480 // Generate the call instruction. Assign its result to a temporary with high
3481 // register allocation weight.
3482
3483 // ReturnReg doubles as ReturnRegLo as necessary.
3484 Variable *ReturnReg = nullptr;
3485 Variable *ReturnRegHi = nullptr;
3486 if (Dest) {
3487 switch (Dest->getType()) {
3488 case IceType_NUM:
3489 llvm_unreachable("Invalid Call dest type");
3490 return;
3491 case IceType_void:
3492 break;
3493 case IceType_i1:
3494 case IceType_i8:
3495 case IceType_i16:
3496 case IceType_i32:
3497 ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_V0);
3498 break;
3499 case IceType_i64:
3500 ReturnReg = I32Reg(RegMIPS32::Reg_V0);
3501 ReturnRegHi = I32Reg(RegMIPS32::Reg_V1);
3502 break;
3503 case IceType_f32:
3504 ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_F0);
3505 break;
3506 case IceType_f64:
3507 ReturnReg = makeReg(IceType_f64, RegMIPS32::Reg_F0);
3508 break;
3509 case IceType_v4i1:
3510 case IceType_v8i1:
3511 case IceType_v16i1:
3512 case IceType_v16i8:
3513 case IceType_v8i16:
3514 case IceType_v4i32: {
3515 ReturnReg = makeReg(Dest->getType(), RegMIPS32::Reg_V0);
3516 auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg);
3517 RetVec->initVecElement(Func);
3518 for (SizeT i = 0; i < RetVec->ContainersPerVector; ++i) {
3519 auto *Var = RetVec->getContainers()[i];
3520 Var->setRegNum(RegNumT::fixme(RegMIPS32::Reg_V0 + i));
3521 }
3522 break;
3523 }
3524 case IceType_v4f32:
3525 ReturnReg = makeReg(IceType_i32, RegMIPS32::Reg_V0);
3526 break;
3527 }
3528 }
3529 Operand *CallTarget = Instr->getCallTarget();
3530 // Allow ConstantRelocatable to be left alone as a direct call,
3531 // but force other constants like ConstantInteger32 to be in
3532 // a register and make it an indirect call.
3533 if (!llvm::isa<ConstantRelocatable>(CallTarget)) {
3534 CallTarget = legalize(CallTarget, Legal_Reg);
3535 }
3536
3537 // Copy arguments to be passed in registers to the appropriate registers.
3538 for (auto &FPArg : FPArgs) {
3539 RegArgs.emplace_back(legalizeToReg(FPArg.first, FPArg.second));
3540 }
3541 for (auto &GPRArg : GPRArgs) {
3542 RegArgs.emplace_back(legalizeToReg(GPRArg.first, GPRArg.second));
3543 }
3544
3545 // Generate a FakeUse of register arguments so that they do not get dead code
3546 // eliminated as a result of the FakeKill of scratch registers after the call.
3547 // These fake-uses need to be placed here to avoid argument registers from
3548 // being used during the legalizeToReg() calls above.
3549 for (auto *RegArg : RegArgs) {
3550 Context.insert<InstFakeUse>(RegArg);
3551 }
3552
3553 // If variable alloca is used the extra 16 bytes for argument build area
3554 // will be allocated on stack before a call.
3555 if (VariableAllocaUsed)
3556 Sandboxer(this).addiu_sp(-MaxOutArgsSizeBytes);
3557
3558 Inst *NewCall;
3559
3560 // We don't need to define the return register if it is a vector.
3561 // We have inserted fake defs of it just after the call.
3562 if (ReturnReg && isVectorIntegerType(ReturnReg->getType())) {
3563 Variable *RetReg = nullptr;
3564 NewCall = InstMIPS32Call::create(Func, RetReg, CallTarget);
3565 Context.insert(NewCall);
3566 } else {
3567 NewCall = Sandboxer(this, InstBundleLock::Opt_AlignToEnd)
3568 .jal(ReturnReg, CallTarget);
3569 }
3570
3571 if (VariableAllocaUsed)
3572 Sandboxer(this).addiu_sp(MaxOutArgsSizeBytes);
3573
3574 // Insert a fake use of stack pointer to avoid dead code elimination of addiu
3575 // instruction.
3576 Context.insert<InstFakeUse>(SP);
3577
3578 if (ReturnRegHi)
3579 Context.insert(InstFakeDef::create(Func, ReturnRegHi));
3580
3581 if (ReturnReg) {
3582 if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) {
3583 for (Variable *Var : RetVec->getContainers()) {
3584 Context.insert(InstFakeDef::create(Func, Var));
3585 }
3586 }
3587 }
3588
3589 // Insert a register-kill pseudo instruction.
3590 Context.insert(InstFakeKill::create(Func, NewCall));
3591
3592 // Generate a FakeUse to keep the call live if necessary.
3593 if (Instr->hasSideEffects() && ReturnReg) {
3594 if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) {
3595 for (Variable *Var : RetVec->getContainers()) {
3596 Context.insert<InstFakeUse>(Var);
3597 }
3598 } else {
3599 Context.insert<InstFakeUse>(ReturnReg);
3600 }
3601 }
3602
3603 if (Dest == nullptr)
3604 return;
3605
3606 // Assign the result of the call to Dest.
3607 if (ReturnReg) {
3608 if (RetVecFloat) {
3609 auto *DestVecOn32 = llvm::cast<VariableVecOn32>(Dest);
3610 auto *TBase = legalizeToReg(RetVecFloat);
3611 for (SizeT i = 0; i < DestVecOn32->ContainersPerVector; ++i) {
3612 auto *Var = DestVecOn32->getContainers()[i];
3613 auto *TVar = makeReg(IceType_i32);
3614 OperandMIPS32Mem *Mem = OperandMIPS32Mem::create(
3615 Func, IceType_i32, TBase,
3616 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(i * 4)));
3617 _lw(TVar, Mem);
3618 _mov(Var, TVar);
3619 }
3620 } else if (auto *RetVec = llvm::dyn_cast<VariableVecOn32>(ReturnReg)) {
3621 auto *DestVecOn32 = llvm::cast<VariableVecOn32>(Dest);
3622 for (SizeT i = 0; i < DestVecOn32->ContainersPerVector; ++i) {
3623 _mov(DestVecOn32->getContainers()[i], RetVec->getContainers()[i]);
3624 }
3625 } else if (ReturnRegHi) {
3626 assert(Dest->getType() == IceType_i64);
3627 auto *Dest64On32 = llvm::cast<Variable64On32>(Dest);
3628 Variable *DestLo = Dest64On32->getLo();
3629 Variable *DestHi = Dest64On32->getHi();
3630 _mov(DestLo, ReturnReg);
3631 _mov(DestHi, ReturnRegHi);
3632 } else {
3633 assert(Dest->getType() == IceType_i32 || Dest->getType() == IceType_i16 ||
3634 Dest->getType() == IceType_i8 || Dest->getType() == IceType_i1 ||
3635 isScalarFloatingType(Dest->getType()) ||
3636 isVectorType(Dest->getType()));
3637 _mov(Dest, ReturnReg);
3638 }
3639 }
3640 }
3641
lowerCast(const InstCast * Instr)3642 void TargetMIPS32::lowerCast(const InstCast *Instr) {
3643 InstCast::OpKind CastKind = Instr->getCastKind();
3644 Variable *Dest = Instr->getDest();
3645 Operand *Src0 = legalizeUndef(Instr->getSrc(0));
3646 const Type DestTy = Dest->getType();
3647 const Type Src0Ty = Src0->getType();
3648 const uint32_t ShiftAmount =
3649 (Src0Ty == IceType_i1
3650 ? INT32_BITS - 1
3651 : INT32_BITS - (CHAR_BITS * typeWidthInBytes(Src0Ty)));
3652 const uint32_t Mask =
3653 (Src0Ty == IceType_i1
3654 ? 1
3655 : (1 << (CHAR_BITS * typeWidthInBytes(Src0Ty))) - 1);
3656
3657 if (isVectorType(DestTy)) {
3658 llvm::report_fatal_error("Cast: Destination type is vector");
3659 return;
3660 }
3661 switch (CastKind) {
3662 default:
3663 Func->setError("Cast type not supported");
3664 return;
3665 case InstCast::Sext: {
3666 if (DestTy == IceType_i64) {
3667 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3668 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3669 Variable *Src0R = legalizeToReg(Src0);
3670 Variable *T1_Lo = I32Reg();
3671 Variable *T2_Lo = I32Reg();
3672 Variable *T_Hi = I32Reg();
3673 if (Src0Ty == IceType_i1) {
3674 _sll(T1_Lo, Src0R, INT32_BITS - 1);
3675 _sra(T2_Lo, T1_Lo, INT32_BITS - 1);
3676 _mov(DestHi, T2_Lo);
3677 _mov(DestLo, T2_Lo);
3678 } else if (Src0Ty == IceType_i8 || Src0Ty == IceType_i16) {
3679 _sll(T1_Lo, Src0R, ShiftAmount);
3680 _sra(T2_Lo, T1_Lo, ShiftAmount);
3681 _sra(T_Hi, T2_Lo, INT32_BITS - 1);
3682 _mov(DestHi, T_Hi);
3683 _mov(DestLo, T2_Lo);
3684 } else if (Src0Ty == IceType_i32) {
3685 _mov(T1_Lo, Src0R);
3686 _sra(T_Hi, T1_Lo, INT32_BITS - 1);
3687 _mov(DestHi, T_Hi);
3688 _mov(DestLo, T1_Lo);
3689 }
3690 } else {
3691 Variable *Src0R = legalizeToReg(Src0);
3692 Variable *T1 = makeReg(DestTy);
3693 Variable *T2 = makeReg(DestTy);
3694 if (Src0Ty == IceType_i1 || Src0Ty == IceType_i8 ||
3695 Src0Ty == IceType_i16) {
3696 _sll(T1, Src0R, ShiftAmount);
3697 _sra(T2, T1, ShiftAmount);
3698 _mov(Dest, T2);
3699 }
3700 }
3701 break;
3702 }
3703 case InstCast::Zext: {
3704 if (DestTy == IceType_i64) {
3705 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
3706 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
3707 Variable *Src0R = legalizeToReg(Src0);
3708 Variable *T_Lo = I32Reg();
3709 Variable *T_Hi = I32Reg();
3710
3711 if (Src0Ty == IceType_i1 || Src0Ty == IceType_i8 || Src0Ty == IceType_i16)
3712 _andi(T_Lo, Src0R, Mask);
3713 else if (Src0Ty == IceType_i32)
3714 _mov(T_Lo, Src0R);
3715 else
3716 assert(Src0Ty != IceType_i64);
3717 _mov(DestLo, T_Lo);
3718
3719 auto *Zero = getZero();
3720 _addiu(T_Hi, Zero, 0);
3721 _mov(DestHi, T_Hi);
3722 } else {
3723 Variable *Src0R = legalizeToReg(Src0);
3724 Variable *T = makeReg(DestTy);
3725 if (Src0Ty == IceType_i1 || Src0Ty == IceType_i8 ||
3726 Src0Ty == IceType_i16) {
3727 _andi(T, Src0R, Mask);
3728 _mov(Dest, T);
3729 }
3730 }
3731 break;
3732 }
3733 case InstCast::Trunc: {
3734 if (Src0Ty == IceType_i64)
3735 Src0 = loOperand(Src0);
3736 Variable *Src0R = legalizeToReg(Src0);
3737 Variable *T = makeReg(DestTy);
3738 switch (DestTy) {
3739 case IceType_i1:
3740 _andi(T, Src0R, 0x1);
3741 break;
3742 case IceType_i8:
3743 _andi(T, Src0R, 0xff);
3744 break;
3745 case IceType_i16:
3746 _andi(T, Src0R, 0xffff);
3747 break;
3748 default:
3749 _mov(T, Src0R);
3750 break;
3751 }
3752 _mov(Dest, T);
3753 break;
3754 }
3755 case InstCast::Fptrunc: {
3756 assert(Dest->getType() == IceType_f32);
3757 assert(Src0->getType() == IceType_f64);
3758 auto *DestR = legalizeToReg(Dest);
3759 auto *Src0R = legalizeToReg(Src0);
3760 _cvt_s_d(DestR, Src0R);
3761 _mov(Dest, DestR);
3762 break;
3763 }
3764 case InstCast::Fpext: {
3765 assert(Dest->getType() == IceType_f64);
3766 assert(Src0->getType() == IceType_f32);
3767 auto *DestR = legalizeToReg(Dest);
3768 auto *Src0R = legalizeToReg(Src0);
3769 _cvt_d_s(DestR, Src0R);
3770 _mov(Dest, DestR);
3771 break;
3772 }
3773 case InstCast::Fptosi:
3774 case InstCast::Fptoui: {
3775 if (llvm::isa<Variable64On32>(Dest)) {
3776 llvm::report_fatal_error("fp-to-i64 should have been prelowered.");
3777 return;
3778 }
3779 if (DestTy != IceType_i64) {
3780 if (Src0Ty == IceType_f32 && isScalarIntegerType(DestTy)) {
3781 Variable *Src0R = legalizeToReg(Src0);
3782 Variable *FTmp = makeReg(IceType_f32);
3783 _trunc_w_s(FTmp, Src0R);
3784 _mov(Dest, FTmp);
3785 return;
3786 }
3787 if (Src0Ty == IceType_f64 && isScalarIntegerType(DestTy)) {
3788 Variable *Src0R = legalizeToReg(Src0);
3789 Variable *FTmp = makeReg(IceType_f64);
3790 _trunc_w_d(FTmp, Src0R);
3791 _mov(Dest, FTmp);
3792 return;
3793 }
3794 }
3795 llvm::report_fatal_error("Destination is i64 in fp-to-i32");
3796 break;
3797 }
3798 case InstCast::Sitofp:
3799 case InstCast::Uitofp: {
3800 if (llvm::isa<Variable64On32>(Dest)) {
3801 llvm::report_fatal_error("i64-to-fp should have been prelowered.");
3802 return;
3803 }
3804 if (Src0Ty != IceType_i64) {
3805 Variable *Src0R = legalizeToReg(Src0);
3806 auto *T0R = Src0R;
3807 if (Src0Ty != IceType_i32) {
3808 T0R = makeReg(IceType_i32);
3809 if (CastKind == InstCast::Uitofp)
3810 lowerCast(InstCast::create(Func, InstCast::Zext, T0R, Src0R));
3811 else
3812 lowerCast(InstCast::create(Func, InstCast::Sext, T0R, Src0R));
3813 }
3814 if (isScalarIntegerType(Src0Ty) && DestTy == IceType_f32) {
3815 Variable *FTmp1 = makeReg(IceType_f32);
3816 Variable *FTmp2 = makeReg(IceType_f32);
3817 _mtc1(FTmp1, T0R);
3818 _cvt_s_w(FTmp2, FTmp1);
3819 _mov(Dest, FTmp2);
3820 return;
3821 }
3822 if (isScalarIntegerType(Src0Ty) && DestTy == IceType_f64) {
3823 Variable *FTmp1 = makeReg(IceType_f64);
3824 Variable *FTmp2 = makeReg(IceType_f64);
3825 _mtc1(FTmp1, T0R);
3826 _cvt_d_w(FTmp2, FTmp1);
3827 _mov(Dest, FTmp2);
3828 return;
3829 }
3830 }
3831 llvm::report_fatal_error("Source is i64 in i32-to-fp");
3832 break;
3833 }
3834 case InstCast::Bitcast: {
3835 Operand *Src0 = Instr->getSrc(0);
3836 if (DestTy == Src0->getType()) {
3837 auto *Assign = InstAssign::create(Func, Dest, Src0);
3838 lowerAssign(Assign);
3839 return;
3840 }
3841 if (isVectorType(DestTy) || isVectorType(Src0->getType())) {
3842 llvm::report_fatal_error(
3843 "Bitcast: vector type should have been prelowered.");
3844 return;
3845 }
3846 switch (DestTy) {
3847 case IceType_NUM:
3848 case IceType_void:
3849 llvm::report_fatal_error("Unexpected bitcast.");
3850 case IceType_i1:
3851 UnimplementedLoweringError(this, Instr);
3852 break;
3853 case IceType_i8:
3854 assert(Src0->getType() == IceType_v8i1);
3855 llvm::report_fatal_error(
3856 "i8 to v8i1 conversion should have been prelowered.");
3857 break;
3858 case IceType_i16:
3859 assert(Src0->getType() == IceType_v16i1);
3860 llvm::report_fatal_error(
3861 "i16 to v16i1 conversion should have been prelowered.");
3862 break;
3863 case IceType_i32:
3864 case IceType_f32: {
3865 Variable *Src0R = legalizeToReg(Src0);
3866 _mov(Dest, Src0R);
3867 break;
3868 }
3869 case IceType_i64: {
3870 assert(Src0->getType() == IceType_f64);
3871 Variable *Src0R = legalizeToReg(Src0);
3872 auto *T = llvm::cast<Variable64On32>(Func->makeVariable(IceType_i64));
3873 T->initHiLo(Func);
3874 T->getHi()->setMustNotHaveReg();
3875 T->getLo()->setMustNotHaveReg();
3876 Context.insert<InstFakeDef>(T->getHi());
3877 Context.insert<InstFakeDef>(T->getLo());
3878 _mov_fp64_to_i64(T->getHi(), Src0R, Int64_Hi);
3879 _mov_fp64_to_i64(T->getLo(), Src0R, Int64_Lo);
3880 lowerAssign(InstAssign::create(Func, Dest, T));
3881 break;
3882 }
3883 case IceType_f64: {
3884 assert(Src0->getType() == IceType_i64);
3885 const uint32_t Mask = 0xFFFFFFFF;
3886 if (auto *C64 = llvm::dyn_cast<ConstantInteger64>(Src0)) {
3887 Variable *RegHi, *RegLo;
3888 const uint64_t Value = C64->getValue();
3889 uint64_t Upper32Bits = (Value >> INT32_BITS) & Mask;
3890 uint64_t Lower32Bits = Value & Mask;
3891 RegLo = legalizeToReg(Ctx->getConstantInt32(Lower32Bits));
3892 RegHi = legalizeToReg(Ctx->getConstantInt32(Upper32Bits));
3893 _mov(Dest, RegHi, RegLo);
3894 } else {
3895 auto *Var64On32 = llvm::cast<Variable64On32>(Src0);
3896 auto *RegLo = legalizeToReg(loOperand(Var64On32));
3897 auto *RegHi = legalizeToReg(hiOperand(Var64On32));
3898 _mov(Dest, RegHi, RegLo);
3899 }
3900 break;
3901 }
3902 default:
3903 llvm::report_fatal_error("Unexpected bitcast.");
3904 }
3905 break;
3906 }
3907 }
3908 }
3909
lowerExtractElement(const InstExtractElement * Instr)3910 void TargetMIPS32::lowerExtractElement(const InstExtractElement *Instr) {
3911 Variable *Dest = Instr->getDest();
3912 const Type DestTy = Dest->getType();
3913 Operand *Src1 = Instr->getSrc(1);
3914 if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src1)) {
3915 const uint32_t Index = Imm->getValue();
3916 Variable *TDest = makeReg(DestTy);
3917 Variable *TReg = makeReg(DestTy);
3918 auto *Src0 = legalizeUndef(Instr->getSrc(0));
3919 auto *Src0R = llvm::dyn_cast<VariableVecOn32>(Src0);
3920 // Number of elements in each container
3921 uint32_t ElemPerCont =
3922 typeNumElements(Src0->getType()) / Src0R->ContainersPerVector;
3923 auto *Src = Src0R->getContainers()[Index / ElemPerCont];
3924 auto *SrcE = legalizeToReg(Src);
3925 // Position of the element in the container
3926 uint32_t PosInCont = Index % ElemPerCont;
3927 if (ElemPerCont == 1) {
3928 _mov(TDest, SrcE);
3929 } else if (ElemPerCont == 2) {
3930 switch (PosInCont) {
3931 case 0:
3932 _andi(TDest, SrcE, 0xffff);
3933 break;
3934 case 1:
3935 _srl(TDest, SrcE, 16);
3936 break;
3937 default:
3938 llvm::report_fatal_error("ExtractElement: Invalid PosInCont");
3939 break;
3940 }
3941 } else if (ElemPerCont == 4) {
3942 switch (PosInCont) {
3943 case 0:
3944 _andi(TDest, SrcE, 0xff);
3945 break;
3946 case 1:
3947 _srl(TReg, SrcE, 8);
3948 _andi(TDest, TReg, 0xff);
3949 break;
3950 case 2:
3951 _srl(TReg, SrcE, 16);
3952 _andi(TDest, TReg, 0xff);
3953 break;
3954 case 3:
3955 _srl(TDest, SrcE, 24);
3956 break;
3957 default:
3958 llvm::report_fatal_error("ExtractElement: Invalid PosInCont");
3959 break;
3960 }
3961 }
3962 if (typeElementType(Src0R->getType()) == IceType_i1) {
3963 Variable *TReg1 = makeReg(DestTy);
3964 _andi(TReg1, TDest, 0x1);
3965 _mov(Dest, TReg1);
3966 } else {
3967 _mov(Dest, TDest);
3968 }
3969 return;
3970 }
3971 llvm::report_fatal_error("ExtractElement requires a constant index");
3972 }
3973
lowerFcmp(const InstFcmp * Instr)3974 void TargetMIPS32::lowerFcmp(const InstFcmp *Instr) {
3975 Variable *Dest = Instr->getDest();
3976 if (isVectorType(Dest->getType())) {
3977 llvm::report_fatal_error("Fcmp: Destination type is vector");
3978 return;
3979 }
3980
3981 auto *Src0 = Instr->getSrc(0);
3982 auto *Src1 = Instr->getSrc(1);
3983 auto *Zero = getZero();
3984
3985 InstFcmp::FCond Cond = Instr->getCondition();
3986 auto *DestR = makeReg(IceType_i32);
3987 auto *Src0R = legalizeToReg(Src0);
3988 auto *Src1R = legalizeToReg(Src1);
3989 const Type Src0Ty = Src0->getType();
3990
3991 Operand *FCC0 = OperandMIPS32FCC::create(getFunc(), OperandMIPS32FCC::FCC0);
3992
3993 switch (Cond) {
3994 default: {
3995 llvm::report_fatal_error("Unhandled fp comparison.");
3996 return;
3997 }
3998 case InstFcmp::False: {
3999 Context.insert<InstFakeUse>(Src0R);
4000 Context.insert<InstFakeUse>(Src1R);
4001 _addiu(DestR, Zero, 0);
4002 _mov(Dest, DestR);
4003 break;
4004 }
4005 case InstFcmp::Oeq: {
4006 if (Src0Ty == IceType_f32) {
4007 _c_eq_s(Src0R, Src1R);
4008 } else {
4009 _c_eq_d(Src0R, Src1R);
4010 }
4011 _addiu(DestR, Zero, 1);
4012 _movf(DestR, Zero, FCC0);
4013 _mov(Dest, DestR);
4014 break;
4015 }
4016 case InstFcmp::Ogt: {
4017 if (Src0Ty == IceType_f32) {
4018 _c_ule_s(Src0R, Src1R);
4019 } else {
4020 _c_ule_d(Src0R, Src1R);
4021 }
4022 _addiu(DestR, Zero, 1);
4023 _movt(DestR, Zero, FCC0);
4024 _mov(Dest, DestR);
4025 break;
4026 }
4027 case InstFcmp::Oge: {
4028 if (Src0Ty == IceType_f32) {
4029 _c_ult_s(Src0R, Src1R);
4030 } else {
4031 _c_ult_d(Src0R, Src1R);
4032 }
4033 _addiu(DestR, Zero, 1);
4034 _movt(DestR, Zero, FCC0);
4035 _mov(Dest, DestR);
4036 break;
4037 }
4038 case InstFcmp::Olt: {
4039 if (Src0Ty == IceType_f32) {
4040 _c_olt_s(Src0R, Src1R);
4041 } else {
4042 _c_olt_d(Src0R, Src1R);
4043 }
4044 _addiu(DestR, Zero, 1);
4045 _movf(DestR, Zero, FCC0);
4046 _mov(Dest, DestR);
4047 break;
4048 }
4049 case InstFcmp::Ole: {
4050 if (Src0Ty == IceType_f32) {
4051 _c_ole_s(Src0R, Src1R);
4052 } else {
4053 _c_ole_d(Src0R, Src1R);
4054 }
4055 _addiu(DestR, Zero, 1);
4056 _movf(DestR, Zero, FCC0);
4057 _mov(Dest, DestR);
4058 break;
4059 }
4060 case InstFcmp::One: {
4061 if (Src0Ty == IceType_f32) {
4062 _c_ueq_s(Src0R, Src1R);
4063 } else {
4064 _c_ueq_d(Src0R, Src1R);
4065 }
4066 _addiu(DestR, Zero, 1);
4067 _movt(DestR, Zero, FCC0);
4068 _mov(Dest, DestR);
4069 break;
4070 }
4071 case InstFcmp::Ord: {
4072 if (Src0Ty == IceType_f32) {
4073 _c_un_s(Src0R, Src1R);
4074 } else {
4075 _c_un_d(Src0R, Src1R);
4076 }
4077 _addiu(DestR, Zero, 1);
4078 _movt(DestR, Zero, FCC0);
4079 _mov(Dest, DestR);
4080 break;
4081 }
4082 case InstFcmp::Ueq: {
4083 if (Src0Ty == IceType_f32) {
4084 _c_ueq_s(Src0R, Src1R);
4085 } else {
4086 _c_ueq_d(Src0R, Src1R);
4087 }
4088 _addiu(DestR, Zero, 1);
4089 _movf(DestR, Zero, FCC0);
4090 _mov(Dest, DestR);
4091 break;
4092 }
4093 case InstFcmp::Ugt: {
4094 if (Src0Ty == IceType_f32) {
4095 _c_ole_s(Src0R, Src1R);
4096 } else {
4097 _c_ole_d(Src0R, Src1R);
4098 }
4099 _addiu(DestR, Zero, 1);
4100 _movt(DestR, Zero, FCC0);
4101 _mov(Dest, DestR);
4102 break;
4103 }
4104 case InstFcmp::Uge: {
4105 if (Src0Ty == IceType_f32) {
4106 _c_olt_s(Src0R, Src1R);
4107 } else {
4108 _c_olt_d(Src0R, Src1R);
4109 }
4110 _addiu(DestR, Zero, 1);
4111 _movt(DestR, Zero, FCC0);
4112 _mov(Dest, DestR);
4113 break;
4114 }
4115 case InstFcmp::Ult: {
4116 if (Src0Ty == IceType_f32) {
4117 _c_ult_s(Src0R, Src1R);
4118 } else {
4119 _c_ult_d(Src0R, Src1R);
4120 }
4121 _addiu(DestR, Zero, 1);
4122 _movf(DestR, Zero, FCC0);
4123 _mov(Dest, DestR);
4124 break;
4125 }
4126 case InstFcmp::Ule: {
4127 if (Src0Ty == IceType_f32) {
4128 _c_ule_s(Src0R, Src1R);
4129 } else {
4130 _c_ule_d(Src0R, Src1R);
4131 }
4132 _addiu(DestR, Zero, 1);
4133 _movf(DestR, Zero, FCC0);
4134 _mov(Dest, DestR);
4135 break;
4136 }
4137 case InstFcmp::Une: {
4138 if (Src0Ty == IceType_f32) {
4139 _c_eq_s(Src0R, Src1R);
4140 } else {
4141 _c_eq_d(Src0R, Src1R);
4142 }
4143 _addiu(DestR, Zero, 1);
4144 _movt(DestR, Zero, FCC0);
4145 _mov(Dest, DestR);
4146 break;
4147 }
4148 case InstFcmp::Uno: {
4149 if (Src0Ty == IceType_f32) {
4150 _c_un_s(Src0R, Src1R);
4151 } else {
4152 _c_un_d(Src0R, Src1R);
4153 }
4154 _addiu(DestR, Zero, 1);
4155 _movf(DestR, Zero, FCC0);
4156 _mov(Dest, DestR);
4157 break;
4158 }
4159 case InstFcmp::True: {
4160 Context.insert<InstFakeUse>(Src0R);
4161 Context.insert<InstFakeUse>(Src1R);
4162 _addiu(DestR, Zero, 1);
4163 _mov(Dest, DestR);
4164 break;
4165 }
4166 }
4167 }
4168
lower64Icmp(const InstIcmp * Instr)4169 void TargetMIPS32::lower64Icmp(const InstIcmp *Instr) {
4170 Operand *Src0 = legalize(Instr->getSrc(0));
4171 Operand *Src1 = legalize(Instr->getSrc(1));
4172 Variable *Dest = Instr->getDest();
4173 InstIcmp::ICond Condition = Instr->getCondition();
4174
4175 Variable *Src0LoR = legalizeToReg(loOperand(Src0));
4176 Variable *Src0HiR = legalizeToReg(hiOperand(Src0));
4177 Variable *Src1LoR = legalizeToReg(loOperand(Src1));
4178 Variable *Src1HiR = legalizeToReg(hiOperand(Src1));
4179
4180 switch (Condition) {
4181 default:
4182 llvm_unreachable("unexpected condition");
4183 return;
4184 case InstIcmp::Eq: {
4185 auto *T1 = I32Reg();
4186 auto *T2 = I32Reg();
4187 auto *T3 = I32Reg();
4188 auto *T4 = I32Reg();
4189 _xor(T1, Src0HiR, Src1HiR);
4190 _xor(T2, Src0LoR, Src1LoR);
4191 _or(T3, T1, T2);
4192 _sltiu(T4, T3, 1);
4193 _mov(Dest, T4);
4194 return;
4195 }
4196 case InstIcmp::Ne: {
4197 auto *T1 = I32Reg();
4198 auto *T2 = I32Reg();
4199 auto *T3 = I32Reg();
4200 auto *T4 = I32Reg();
4201 _xor(T1, Src0HiR, Src1HiR);
4202 _xor(T2, Src0LoR, Src1LoR);
4203 _or(T3, T1, T2);
4204 _sltu(T4, getZero(), T3);
4205 _mov(Dest, T4);
4206 return;
4207 }
4208 case InstIcmp::Sgt: {
4209 auto *T1 = I32Reg();
4210 auto *T2 = I32Reg();
4211 auto *T3 = I32Reg();
4212 _xor(T1, Src0HiR, Src1HiR);
4213 _slt(T2, Src1HiR, Src0HiR);
4214 _sltu(T3, Src1LoR, Src0LoR);
4215 _movz(T2, T3, T1);
4216 _mov(Dest, T2);
4217 return;
4218 }
4219 case InstIcmp::Ugt: {
4220 auto *T1 = I32Reg();
4221 auto *T2 = I32Reg();
4222 auto *T3 = I32Reg();
4223 _xor(T1, Src0HiR, Src1HiR);
4224 _sltu(T2, Src1HiR, Src0HiR);
4225 _sltu(T3, Src1LoR, Src0LoR);
4226 _movz(T2, T3, T1);
4227 _mov(Dest, T2);
4228 return;
4229 }
4230 case InstIcmp::Sge: {
4231 auto *T1 = I32Reg();
4232 auto *T2 = I32Reg();
4233 auto *T3 = I32Reg();
4234 auto *T4 = I32Reg();
4235 auto *T5 = I32Reg();
4236 _xor(T1, Src0HiR, Src1HiR);
4237 _slt(T2, Src0HiR, Src1HiR);
4238 _xori(T3, T2, 1);
4239 _sltu(T4, Src0LoR, Src1LoR);
4240 _xori(T5, T4, 1);
4241 _movz(T3, T5, T1);
4242 _mov(Dest, T3);
4243 return;
4244 }
4245 case InstIcmp::Uge: {
4246 auto *T1 = I32Reg();
4247 auto *T2 = I32Reg();
4248 auto *T3 = I32Reg();
4249 auto *T4 = I32Reg();
4250 auto *T5 = I32Reg();
4251 _xor(T1, Src0HiR, Src1HiR);
4252 _sltu(T2, Src0HiR, Src1HiR);
4253 _xori(T3, T2, 1);
4254 _sltu(T4, Src0LoR, Src1LoR);
4255 _xori(T5, T4, 1);
4256 _movz(T3, T5, T1);
4257 _mov(Dest, T3);
4258 return;
4259 }
4260 case InstIcmp::Slt: {
4261 auto *T1 = I32Reg();
4262 auto *T2 = I32Reg();
4263 auto *T3 = I32Reg();
4264 _xor(T1, Src0HiR, Src1HiR);
4265 _slt(T2, Src0HiR, Src1HiR);
4266 _sltu(T3, Src0LoR, Src1LoR);
4267 _movz(T2, T3, T1);
4268 _mov(Dest, T2);
4269 return;
4270 }
4271 case InstIcmp::Ult: {
4272 auto *T1 = I32Reg();
4273 auto *T2 = I32Reg();
4274 auto *T3 = I32Reg();
4275 _xor(T1, Src0HiR, Src1HiR);
4276 _sltu(T2, Src0HiR, Src1HiR);
4277 _sltu(T3, Src0LoR, Src1LoR);
4278 _movz(T2, T3, T1);
4279 _mov(Dest, T2);
4280 return;
4281 }
4282 case InstIcmp::Sle: {
4283 auto *T1 = I32Reg();
4284 auto *T2 = I32Reg();
4285 auto *T3 = I32Reg();
4286 auto *T4 = I32Reg();
4287 auto *T5 = I32Reg();
4288 _xor(T1, Src0HiR, Src1HiR);
4289 _slt(T2, Src1HiR, Src0HiR);
4290 _xori(T3, T2, 1);
4291 _sltu(T4, Src1LoR, Src0LoR);
4292 _xori(T5, T4, 1);
4293 _movz(T3, T5, T1);
4294 _mov(Dest, T3);
4295 return;
4296 }
4297 case InstIcmp::Ule: {
4298 auto *T1 = I32Reg();
4299 auto *T2 = I32Reg();
4300 auto *T3 = I32Reg();
4301 auto *T4 = I32Reg();
4302 auto *T5 = I32Reg();
4303 _xor(T1, Src0HiR, Src1HiR);
4304 _sltu(T2, Src1HiR, Src0HiR);
4305 _xori(T3, T2, 1);
4306 _sltu(T4, Src1LoR, Src0LoR);
4307 _xori(T5, T4, 1);
4308 _movz(T3, T5, T1);
4309 _mov(Dest, T3);
4310 return;
4311 }
4312 }
4313 }
4314
lowerIcmp(const InstIcmp * Instr)4315 void TargetMIPS32::lowerIcmp(const InstIcmp *Instr) {
4316 auto *Src0 = Instr->getSrc(0);
4317 auto *Src1 = Instr->getSrc(1);
4318 if (Src0->getType() == IceType_i64) {
4319 lower64Icmp(Instr);
4320 return;
4321 }
4322 Variable *Dest = Instr->getDest();
4323 if (isVectorType(Dest->getType())) {
4324 llvm::report_fatal_error("Icmp: Destination type is vector");
4325 return;
4326 }
4327 InstIcmp::ICond Cond = Instr->getCondition();
4328 auto *Src0R = legalizeToReg(Src0);
4329 auto *Src1R = legalizeToReg(Src1);
4330 const Type Src0Ty = Src0R->getType();
4331 const uint32_t ShAmt = INT32_BITS - getScalarIntBitWidth(Src0->getType());
4332 Variable *Src0RT = I32Reg();
4333 Variable *Src1RT = I32Reg();
4334
4335 if (Src0Ty != IceType_i32) {
4336 _sll(Src0RT, Src0R, ShAmt);
4337 _sll(Src1RT, Src1R, ShAmt);
4338 } else {
4339 _mov(Src0RT, Src0R);
4340 _mov(Src1RT, Src1R);
4341 }
4342
4343 switch (Cond) {
4344 case InstIcmp::Eq: {
4345 auto *DestT = I32Reg();
4346 auto *T = I32Reg();
4347 _xor(T, Src0RT, Src1RT);
4348 _sltiu(DestT, T, 1);
4349 _mov(Dest, DestT);
4350 return;
4351 }
4352 case InstIcmp::Ne: {
4353 auto *DestT = I32Reg();
4354 auto *T = I32Reg();
4355 auto *Zero = getZero();
4356 _xor(T, Src0RT, Src1RT);
4357 _sltu(DestT, Zero, T);
4358 _mov(Dest, DestT);
4359 return;
4360 }
4361 case InstIcmp::Ugt: {
4362 auto *DestT = I32Reg();
4363 _sltu(DestT, Src1RT, Src0RT);
4364 _mov(Dest, DestT);
4365 return;
4366 }
4367 case InstIcmp::Uge: {
4368 auto *DestT = I32Reg();
4369 auto *T = I32Reg();
4370 _sltu(T, Src0RT, Src1RT);
4371 _xori(DestT, T, 1);
4372 _mov(Dest, DestT);
4373 return;
4374 }
4375 case InstIcmp::Ult: {
4376 auto *DestT = I32Reg();
4377 _sltu(DestT, Src0RT, Src1RT);
4378 _mov(Dest, DestT);
4379 return;
4380 }
4381 case InstIcmp::Ule: {
4382 auto *DestT = I32Reg();
4383 auto *T = I32Reg();
4384 _sltu(T, Src1RT, Src0RT);
4385 _xori(DestT, T, 1);
4386 _mov(Dest, DestT);
4387 return;
4388 }
4389 case InstIcmp::Sgt: {
4390 auto *DestT = I32Reg();
4391 _slt(DestT, Src1RT, Src0RT);
4392 _mov(Dest, DestT);
4393 return;
4394 }
4395 case InstIcmp::Sge: {
4396 auto *DestT = I32Reg();
4397 auto *T = I32Reg();
4398 _slt(T, Src0RT, Src1RT);
4399 _xori(DestT, T, 1);
4400 _mov(Dest, DestT);
4401 return;
4402 }
4403 case InstIcmp::Slt: {
4404 auto *DestT = I32Reg();
4405 _slt(DestT, Src0RT, Src1RT);
4406 _mov(Dest, DestT);
4407 return;
4408 }
4409 case InstIcmp::Sle: {
4410 auto *DestT = I32Reg();
4411 auto *T = I32Reg();
4412 _slt(T, Src1RT, Src0RT);
4413 _xori(DestT, T, 1);
4414 _mov(Dest, DestT);
4415 return;
4416 }
4417 default:
4418 llvm_unreachable("Invalid ICmp operator");
4419 return;
4420 }
4421 }
4422
lowerInsertElement(const InstInsertElement * Instr)4423 void TargetMIPS32::lowerInsertElement(const InstInsertElement *Instr) {
4424 Variable *Dest = Instr->getDest();
4425 const Type DestTy = Dest->getType();
4426 Operand *Src2 = Instr->getSrc(2);
4427 if (const auto *Imm = llvm::dyn_cast<ConstantInteger32>(Src2)) {
4428 const uint32_t Index = Imm->getValue();
4429 // Vector to insert in
4430 auto *Src0 = legalizeUndef(Instr->getSrc(0));
4431 auto *Src0R = llvm::dyn_cast<VariableVecOn32>(Src0);
4432 // Number of elements in each container
4433 uint32_t ElemPerCont =
4434 typeNumElements(Src0->getType()) / Src0R->ContainersPerVector;
4435 // Source Element
4436 auto *Src = Src0R->getContainers()[Index / ElemPerCont];
4437 auto *SrcE = Src;
4438 if (ElemPerCont > 1)
4439 SrcE = legalizeToReg(Src);
4440 // Dest is a vector
4441 auto *VDest = llvm::dyn_cast<VariableVecOn32>(Dest);
4442 VDest->initVecElement(Func);
4443 // Temp vector variable
4444 auto *TDest = makeReg(DestTy);
4445 auto *TVDest = llvm::dyn_cast<VariableVecOn32>(TDest);
4446 TVDest->initVecElement(Func);
4447 // Destination element
4448 auto *DstE = TVDest->getContainers()[Index / ElemPerCont];
4449 // Element to insert
4450 auto *Src1R = legalizeToReg(Instr->getSrc(1));
4451 auto *TReg1 = makeReg(IceType_i32);
4452 auto *TReg2 = makeReg(IceType_i32);
4453 auto *TReg3 = makeReg(IceType_i32);
4454 auto *TReg4 = makeReg(IceType_i32);
4455 auto *TReg5 = makeReg(IceType_i32);
4456 auto *TDReg = makeReg(IceType_i32);
4457 // Position of the element in the container
4458 uint32_t PosInCont = Index % ElemPerCont;
4459 // Load source vector in a temporary vector
4460 for (SizeT i = 0; i < TVDest->ContainersPerVector; ++i) {
4461 auto *DCont = TVDest->getContainers()[i];
4462 // Do not define DstE as we are going to redefine it
4463 if (DCont == DstE)
4464 continue;
4465 auto *SCont = Src0R->getContainers()[i];
4466 auto *TReg = makeReg(IceType_i32);
4467 _mov(TReg, SCont);
4468 _mov(DCont, TReg);
4469 }
4470 // Insert the element
4471 if (ElemPerCont == 1) {
4472 _mov(DstE, Src1R);
4473 } else if (ElemPerCont == 2) {
4474 switch (PosInCont) {
4475 case 0:
4476 _andi(TReg1, Src1R, 0xffff); // Clear upper 16-bits of source
4477 _srl(TReg2, SrcE, 16);
4478 _sll(TReg3, TReg2, 16); // Clear lower 16-bits of element
4479 _or(TDReg, TReg1, TReg3);
4480 _mov(DstE, TDReg);
4481 break;
4482 case 1:
4483 _sll(TReg1, Src1R, 16); // Clear lower 16-bits of source
4484 _sll(TReg2, SrcE, 16);
4485 _srl(TReg3, TReg2, 16); // Clear upper 16-bits of element
4486 _or(TDReg, TReg1, TReg3);
4487 _mov(DstE, TDReg);
4488 break;
4489 default:
4490 llvm::report_fatal_error("InsertElement: Invalid PosInCont");
4491 break;
4492 }
4493 } else if (ElemPerCont == 4) {
4494 switch (PosInCont) {
4495 case 0:
4496 _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source
4497 _srl(TReg2, SrcE, 8);
4498 _sll(TReg3, TReg2, 8); // Clear bits[7:0] of element
4499 _or(TDReg, TReg1, TReg3);
4500 _mov(DstE, TDReg);
4501 break;
4502 case 1:
4503 _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source
4504 _sll(TReg5, TReg1, 8); // Position in the destination
4505 _lui(TReg2, Ctx->getConstantInt32(0xffff));
4506 _ori(TReg3, TReg2, 0x00ff);
4507 _and(TReg4, SrcE, TReg3); // Clear bits[15:8] of element
4508 _or(TDReg, TReg5, TReg4);
4509 _mov(DstE, TDReg);
4510 break;
4511 case 2:
4512 _andi(TReg1, Src1R, 0xff); // Clear bits[31:8] of source
4513 _sll(TReg5, TReg1, 16); // Position in the destination
4514 _lui(TReg2, Ctx->getConstantInt32(0xff00));
4515 _ori(TReg3, TReg2, 0xffff);
4516 _and(TReg4, SrcE, TReg3); // Clear bits[15:8] of element
4517 _or(TDReg, TReg5, TReg4);
4518 _mov(DstE, TDReg);
4519 break;
4520 case 3:
4521 _sll(TReg1, Src1R, 24); // Position in the destination
4522 _sll(TReg2, SrcE, 8);
4523 _srl(TReg3, TReg2, 8); // Clear bits[31:24] of element
4524 _or(TDReg, TReg1, TReg3);
4525 _mov(DstE, TDReg);
4526 break;
4527 default:
4528 llvm::report_fatal_error("InsertElement: Invalid PosInCont");
4529 break;
4530 }
4531 }
4532 // Write back temporary vector to the destination
4533 auto *Assign = InstAssign::create(Func, Dest, TDest);
4534 lowerAssign(Assign);
4535 return;
4536 }
4537 llvm::report_fatal_error("InsertElement requires a constant index");
4538 }
4539
createArithInst(Intrinsics::AtomicRMWOperation Operation,Variable * Dest,Variable * Src0,Variable * Src1)4540 void TargetMIPS32::createArithInst(Intrinsics::AtomicRMWOperation Operation,
4541 Variable *Dest, Variable *Src0,
4542 Variable *Src1) {
4543 switch (Operation) {
4544 default:
4545 llvm::report_fatal_error("Unknown AtomicRMW operation");
4546 case Intrinsics::AtomicExchange:
4547 llvm::report_fatal_error("Can't handle Atomic xchg operation");
4548 case Intrinsics::AtomicAdd:
4549 _addu(Dest, Src0, Src1);
4550 break;
4551 case Intrinsics::AtomicAnd:
4552 _and(Dest, Src0, Src1);
4553 break;
4554 case Intrinsics::AtomicSub:
4555 _subu(Dest, Src0, Src1);
4556 break;
4557 case Intrinsics::AtomicOr:
4558 _or(Dest, Src0, Src1);
4559 break;
4560 case Intrinsics::AtomicXor:
4561 _xor(Dest, Src0, Src1);
4562 break;
4563 }
4564 }
4565
lowerIntrinsicCall(const InstIntrinsicCall * Instr)4566 void TargetMIPS32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) {
4567 Variable *Dest = Instr->getDest();
4568 Type DestTy = (Dest == nullptr) ? IceType_void : Dest->getType();
4569
4570 Intrinsics::IntrinsicID ID = Instr->getIntrinsicInfo().ID;
4571 switch (ID) {
4572 case Intrinsics::AtomicLoad: {
4573 assert(isScalarIntegerType(DestTy));
4574 // We require the memory address to be naturally aligned. Given that is the
4575 // case, then normal loads are atomic.
4576 if (!Intrinsics::isMemoryOrderValid(
4577 ID, getConstantMemoryOrder(Instr->getArg(1)))) {
4578 Func->setError("Unexpected memory ordering for AtomicLoad");
4579 return;
4580 }
4581 if (DestTy == IceType_i64) {
4582 llvm::report_fatal_error("AtomicLoad.i64 should have been prelowered.");
4583 return;
4584 } else if (DestTy == IceType_i32) {
4585 auto *T1 = makeReg(DestTy);
4586 auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4587 auto *Base = legalizeToReg(Instr->getArg(0));
4588 auto *Addr = formMemoryOperand(Base, DestTy);
4589 InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4590 InstMIPS32Label *Exit = InstMIPS32Label::create(Func, this);
4591 constexpr CfgNode *NoTarget = nullptr;
4592 _sync();
4593 Context.insert(Retry);
4594 Sandboxer(this).ll(T1, Addr);
4595 _br(NoTarget, NoTarget, T1, getZero(), Exit, CondMIPS32::Cond::NE);
4596 _addiu(RegAt, getZero(), 0); // Loaded value is zero here, writeback zero
4597 Sandboxer(this).sc(RegAt, Addr);
4598 _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4599 Context.insert(Exit);
4600 _sync();
4601 _mov(Dest, T1);
4602 Context.insert<InstFakeUse>(T1);
4603 } else {
4604 const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(DestTy))) - 1;
4605 auto *Base = legalizeToReg(Instr->getArg(0));
4606 auto *T1 = makeReg(IceType_i32);
4607 auto *T2 = makeReg(IceType_i32);
4608 auto *T3 = makeReg(IceType_i32);
4609 auto *T4 = makeReg(IceType_i32);
4610 auto *T5 = makeReg(IceType_i32);
4611 auto *T6 = makeReg(IceType_i32);
4612 auto *SrcMask = makeReg(IceType_i32);
4613 auto *Tdest = makeReg(IceType_i32);
4614 auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4615 InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4616 InstMIPS32Label *Exit = InstMIPS32Label::create(Func, this);
4617 constexpr CfgNode *NoTarget = nullptr;
4618 _sync();
4619 _addiu(T1, getZero(), -4); // Address mask 0xFFFFFFFC
4620 _andi(T2, Base, 3); // Last two bits of the address
4621 _and(T3, Base, T1); // Align the address
4622 _sll(T4, T2, 3);
4623 _ori(T5, getZero(), Mask);
4624 _sllv(SrcMask, T5, T4); // Source mask
4625 auto *Addr = formMemoryOperand(T3, IceType_i32);
4626 Context.insert(Retry);
4627 Sandboxer(this).ll(T6, Addr);
4628 _and(Tdest, T6, SrcMask);
4629 _br(NoTarget, NoTarget, T6, getZero(), Exit, CondMIPS32::Cond::NE);
4630 _addiu(RegAt, getZero(), 0); // Loaded value is zero here, writeback zero
4631 Sandboxer(this).sc(RegAt, Addr);
4632 _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4633 Context.insert(Exit);
4634 auto *T7 = makeReg(IceType_i32);
4635 auto *T8 = makeReg(IceType_i32);
4636 _srlv(T7, Tdest, T4);
4637 _andi(T8, T7, Mask);
4638 _sync();
4639 _mov(Dest, T8);
4640 Context.insert<InstFakeUse>(T6);
4641 Context.insert<InstFakeUse>(SrcMask);
4642 }
4643 return;
4644 }
4645 case Intrinsics::AtomicStore: {
4646 // We require the memory address to be naturally aligned. Given that is the
4647 // case, then normal stores are atomic.
4648 if (!Intrinsics::isMemoryOrderValid(
4649 ID, getConstantMemoryOrder(Instr->getArg(2)))) {
4650 Func->setError("Unexpected memory ordering for AtomicStore");
4651 return;
4652 }
4653 auto *Val = Instr->getArg(0);
4654 auto Ty = Val->getType();
4655 if (Ty == IceType_i64) {
4656 llvm::report_fatal_error("AtomicStore.i64 should have been prelowered.");
4657 return;
4658 } else if (Ty == IceType_i32) {
4659 auto *Val = legalizeToReg(Instr->getArg(0));
4660 auto *Base = legalizeToReg(Instr->getArg(1));
4661 auto *Addr = formMemoryOperand(Base, Ty);
4662 InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4663 constexpr CfgNode *NoTarget = nullptr;
4664 auto *T1 = makeReg(IceType_i32);
4665 auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4666 _sync();
4667 Context.insert(Retry);
4668 Sandboxer(this).ll(T1, Addr);
4669 _mov(RegAt, Val);
4670 Sandboxer(this).sc(RegAt, Addr);
4671 _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4672 Context.insert<InstFakeUse>(T1); // To keep LL alive
4673 _sync();
4674 } else {
4675 auto *Val = legalizeToReg(Instr->getArg(0));
4676 auto *Base = legalizeToReg(Instr->getArg(1));
4677 InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4678 constexpr CfgNode *NoTarget = nullptr;
4679 auto *T1 = makeReg(IceType_i32);
4680 auto *T2 = makeReg(IceType_i32);
4681 auto *T3 = makeReg(IceType_i32);
4682 auto *T4 = makeReg(IceType_i32);
4683 auto *T5 = makeReg(IceType_i32);
4684 auto *T6 = makeReg(IceType_i32);
4685 auto *T7 = makeReg(IceType_i32);
4686 auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4687 auto *SrcMask = makeReg(IceType_i32);
4688 auto *DstMask = makeReg(IceType_i32);
4689 const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(Ty))) - 1;
4690 _sync();
4691 _addiu(T1, getZero(), -4);
4692 _and(T7, Base, T1);
4693 auto *Addr = formMemoryOperand(T7, Ty);
4694 _andi(T2, Base, 3);
4695 _sll(T3, T2, 3);
4696 _ori(T4, getZero(), Mask);
4697 _sllv(T5, T4, T3);
4698 _sllv(T6, Val, T3);
4699 _nor(SrcMask, getZero(), T5);
4700 _and(DstMask, T6, T5);
4701 Context.insert(Retry);
4702 Sandboxer(this).ll(RegAt, Addr);
4703 _and(RegAt, RegAt, SrcMask);
4704 _or(RegAt, RegAt, DstMask);
4705 Sandboxer(this).sc(RegAt, Addr);
4706 _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4707 Context.insert<InstFakeUse>(SrcMask);
4708 Context.insert<InstFakeUse>(DstMask);
4709 _sync();
4710 }
4711 return;
4712 }
4713 case Intrinsics::AtomicCmpxchg: {
4714 assert(isScalarIntegerType(DestTy));
4715 // We require the memory address to be naturally aligned. Given that is the
4716 // case, then normal loads are atomic.
4717 if (!Intrinsics::isMemoryOrderValid(
4718 ID, getConstantMemoryOrder(Instr->getArg(3)),
4719 getConstantMemoryOrder(Instr->getArg(4)))) {
4720 Func->setError("Unexpected memory ordering for AtomicCmpxchg");
4721 return;
4722 }
4723
4724 InstMIPS32Label *Exit = InstMIPS32Label::create(Func, this);
4725 InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4726 constexpr CfgNode *NoTarget = nullptr;
4727 auto *New = Instr->getArg(2);
4728 auto *Expected = Instr->getArg(1);
4729 auto *ActualAddress = Instr->getArg(0);
4730
4731 if (DestTy == IceType_i64) {
4732 llvm::report_fatal_error(
4733 "AtomicCmpxchg.i64 should have been prelowered.");
4734 return;
4735 } else if (DestTy == IceType_i8 || DestTy == IceType_i16) {
4736 auto *NewR = legalizeToReg(New);
4737 auto *ExpectedR = legalizeToReg(Expected);
4738 auto *ActualAddressR = legalizeToReg(ActualAddress);
4739 const uint32_t ShiftAmount =
4740 (INT32_BITS - CHAR_BITS * typeWidthInBytes(DestTy));
4741 const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(DestTy))) - 1;
4742 auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4743 auto *T1 = I32Reg();
4744 auto *T2 = I32Reg();
4745 auto *T3 = I32Reg();
4746 auto *T4 = I32Reg();
4747 auto *T5 = I32Reg();
4748 auto *T6 = I32Reg();
4749 auto *T7 = I32Reg();
4750 auto *T8 = I32Reg();
4751 auto *T9 = I32Reg();
4752 _addiu(RegAt, getZero(), -4);
4753 _and(T1, ActualAddressR, RegAt);
4754 auto *Addr = formMemoryOperand(T1, DestTy);
4755 _andi(RegAt, ActualAddressR, 3);
4756 _sll(T2, RegAt, 3);
4757 _ori(RegAt, getZero(), Mask);
4758 _sllv(T3, RegAt, T2);
4759 _nor(T4, getZero(), T3);
4760 _andi(RegAt, ExpectedR, Mask);
4761 _sllv(T5, RegAt, T2);
4762 _andi(RegAt, NewR, Mask);
4763 _sllv(T6, RegAt, T2);
4764 _sync();
4765 Context.insert(Retry);
4766 Sandboxer(this).ll(T7, Addr);
4767 _and(T8, T7, T3);
4768 _br(NoTarget, NoTarget, T8, T5, Exit, CondMIPS32::Cond::NE);
4769 _and(RegAt, T7, T4);
4770 _or(T9, RegAt, T6);
4771 Sandboxer(this).sc(T9, Addr);
4772 _br(NoTarget, NoTarget, getZero(), T9, Retry, CondMIPS32::Cond::EQ);
4773 Context.insert<InstFakeUse>(getZero());
4774 Context.insert(Exit);
4775 _srlv(RegAt, T8, T2);
4776 _sll(RegAt, RegAt, ShiftAmount);
4777 _sra(RegAt, RegAt, ShiftAmount);
4778 _mov(Dest, RegAt);
4779 _sync();
4780 Context.insert<InstFakeUse>(T3);
4781 Context.insert<InstFakeUse>(T4);
4782 Context.insert<InstFakeUse>(T5);
4783 Context.insert<InstFakeUse>(T6);
4784 Context.insert<InstFakeUse>(T8);
4785 Context.insert<InstFakeUse>(ExpectedR);
4786 Context.insert<InstFakeUse>(NewR);
4787 } else {
4788 auto *T1 = I32Reg();
4789 auto *T2 = I32Reg();
4790 auto *NewR = legalizeToReg(New);
4791 auto *ExpectedR = legalizeToReg(Expected);
4792 auto *ActualAddressR = legalizeToReg(ActualAddress);
4793 _sync();
4794 Context.insert(Retry);
4795 Sandboxer(this).ll(T1, formMemoryOperand(ActualAddressR, DestTy));
4796 _br(NoTarget, NoTarget, T1, ExpectedR, Exit, CondMIPS32::Cond::NE);
4797 _mov(T2, NewR);
4798 Sandboxer(this).sc(T2, formMemoryOperand(ActualAddressR, DestTy));
4799 _br(NoTarget, NoTarget, T2, getZero(), Retry, CondMIPS32::Cond::EQ);
4800 Context.insert<InstFakeUse>(getZero());
4801 Context.insert(Exit);
4802 _mov(Dest, T1);
4803 _sync();
4804 Context.insert<InstFakeUse>(ExpectedR);
4805 Context.insert<InstFakeUse>(NewR);
4806 }
4807 return;
4808 }
4809 case Intrinsics::AtomicRMW: {
4810 assert(isScalarIntegerType(DestTy));
4811 // We require the memory address to be naturally aligned. Given that is the
4812 // case, then normal loads are atomic.
4813 if (!Intrinsics::isMemoryOrderValid(
4814 ID, getConstantMemoryOrder(Instr->getArg(3)))) {
4815 Func->setError("Unexpected memory ordering for AtomicRMW");
4816 return;
4817 }
4818
4819 constexpr CfgNode *NoTarget = nullptr;
4820 InstMIPS32Label *Retry = InstMIPS32Label::create(Func, this);
4821 auto Operation = static_cast<Intrinsics::AtomicRMWOperation>(
4822 llvm::cast<ConstantInteger32>(Instr->getArg(0))->getValue());
4823 auto *New = Instr->getArg(2);
4824 auto *ActualAddress = Instr->getArg(1);
4825
4826 if (DestTy == IceType_i64) {
4827 llvm::report_fatal_error("AtomicRMW.i64 should have been prelowered.");
4828 return;
4829 } else if (DestTy == IceType_i8 || DestTy == IceType_i16) {
4830 const uint32_t ShiftAmount =
4831 INT32_BITS - (CHAR_BITS * typeWidthInBytes(DestTy));
4832 const uint32_t Mask = (1 << (CHAR_BITS * typeWidthInBytes(DestTy))) - 1;
4833 auto *NewR = legalizeToReg(New);
4834 auto *ActualAddressR = legalizeToReg(ActualAddress);
4835 auto *RegAt = getPhysicalRegister(RegMIPS32::Reg_AT);
4836 auto *T1 = I32Reg();
4837 auto *T2 = I32Reg();
4838 auto *T3 = I32Reg();
4839 auto *T4 = I32Reg();
4840 auto *T5 = I32Reg();
4841 auto *T6 = I32Reg();
4842 auto *T7 = I32Reg();
4843 _sync();
4844 _addiu(RegAt, getZero(), -4);
4845 _and(T1, ActualAddressR, RegAt);
4846 _andi(RegAt, ActualAddressR, 3);
4847 _sll(T2, RegAt, 3);
4848 _ori(RegAt, getZero(), Mask);
4849 _sllv(T3, RegAt, T2);
4850 _nor(T4, getZero(), T3);
4851 _sllv(T5, NewR, T2);
4852 Context.insert(Retry);
4853 Sandboxer(this).ll(T6, formMemoryOperand(T1, DestTy));
4854 if (Operation != Intrinsics::AtomicExchange) {
4855 createArithInst(Operation, RegAt, T6, T5);
4856 _and(RegAt, RegAt, T3);
4857 }
4858 _and(T7, T6, T4);
4859 if (Operation == Intrinsics::AtomicExchange) {
4860 _or(RegAt, T7, T5);
4861 } else {
4862 _or(RegAt, T7, RegAt);
4863 }
4864 Sandboxer(this).sc(RegAt, formMemoryOperand(T1, DestTy));
4865 _br(NoTarget, NoTarget, RegAt, getZero(), Retry, CondMIPS32::Cond::EQ);
4866 Context.insert<InstFakeUse>(getZero());
4867 _and(RegAt, T6, T3);
4868 _srlv(RegAt, RegAt, T2);
4869 _sll(RegAt, RegAt, ShiftAmount);
4870 _sra(RegAt, RegAt, ShiftAmount);
4871 _mov(Dest, RegAt);
4872 _sync();
4873 Context.insert<InstFakeUse>(NewR);
4874 Context.insert<InstFakeUse>(Dest);
4875 } else {
4876 auto *T1 = I32Reg();
4877 auto *T2 = I32Reg();
4878 auto *NewR = legalizeToReg(New);
4879 auto *ActualAddressR = legalizeToReg(ActualAddress);
4880 _sync();
4881 Context.insert(Retry);
4882 Sandboxer(this).ll(T1, formMemoryOperand(ActualAddressR, DestTy));
4883 if (Operation == Intrinsics::AtomicExchange) {
4884 _mov(T2, NewR);
4885 } else {
4886 createArithInst(Operation, T2, T1, NewR);
4887 }
4888 Sandboxer(this).sc(T2, formMemoryOperand(ActualAddressR, DestTy));
4889 _br(NoTarget, NoTarget, T2, getZero(), Retry, CondMIPS32::Cond::EQ);
4890 Context.insert<InstFakeUse>(getZero());
4891 _mov(Dest, T1);
4892 _sync();
4893 Context.insert<InstFakeUse>(NewR);
4894 Context.insert<InstFakeUse>(Dest);
4895 }
4896 return;
4897 }
4898 case Intrinsics::AtomicFence:
4899 case Intrinsics::AtomicFenceAll:
4900 assert(Dest == nullptr);
4901 _sync();
4902 return;
4903 case Intrinsics::AtomicIsLockFree: {
4904 Operand *ByteSize = Instr->getArg(0);
4905 auto *CI = llvm::dyn_cast<ConstantInteger32>(ByteSize);
4906 auto *T = I32Reg();
4907 if (CI == nullptr) {
4908 // The PNaCl ABI requires the byte size to be a compile-time constant.
4909 Func->setError("AtomicIsLockFree byte size should be compile-time const");
4910 return;
4911 }
4912 static constexpr int32_t NotLockFree = 0;
4913 static constexpr int32_t LockFree = 1;
4914 int32_t Result = NotLockFree;
4915 switch (CI->getValue()) {
4916 case 1:
4917 case 2:
4918 case 4:
4919 Result = LockFree;
4920 break;
4921 }
4922 _addiu(T, getZero(), Result);
4923 _mov(Dest, T);
4924 return;
4925 }
4926 case Intrinsics::Bswap: {
4927 auto *Src = Instr->getArg(0);
4928 const Type SrcTy = Src->getType();
4929 assert(SrcTy == IceType_i16 || SrcTy == IceType_i32 ||
4930 SrcTy == IceType_i64);
4931 switch (SrcTy) {
4932 case IceType_i16: {
4933 auto *T1 = I32Reg();
4934 auto *T2 = I32Reg();
4935 auto *T3 = I32Reg();
4936 auto *T4 = I32Reg();
4937 auto *SrcR = legalizeToReg(Src);
4938 _sll(T1, SrcR, 8);
4939 _lui(T2, Ctx->getConstantInt32(255));
4940 _and(T1, T1, T2);
4941 _sll(T3, SrcR, 24);
4942 _or(T1, T3, T1);
4943 _srl(T4, T1, 16);
4944 _mov(Dest, T4);
4945 return;
4946 }
4947 case IceType_i32: {
4948 auto *T1 = I32Reg();
4949 auto *T2 = I32Reg();
4950 auto *T3 = I32Reg();
4951 auto *T4 = I32Reg();
4952 auto *T5 = I32Reg();
4953 auto *SrcR = legalizeToReg(Src);
4954 _srl(T1, SrcR, 24);
4955 _srl(T2, SrcR, 8);
4956 _andi(T2, T2, 0xFF00);
4957 _or(T1, T2, T1);
4958 _sll(T4, SrcR, 8);
4959 _lui(T3, Ctx->getConstantInt32(255));
4960 _and(T4, T4, T3);
4961 _sll(T5, SrcR, 24);
4962 _or(T4, T5, T4);
4963 _or(T4, T4, T1);
4964 _mov(Dest, T4);
4965 return;
4966 }
4967 case IceType_i64: {
4968 auto *T1 = I32Reg();
4969 auto *T2 = I32Reg();
4970 auto *T3 = I32Reg();
4971 auto *T4 = I32Reg();
4972 auto *T5 = I32Reg();
4973 auto *T6 = I32Reg();
4974 auto *T7 = I32Reg();
4975 auto *T8 = I32Reg();
4976 auto *T9 = I32Reg();
4977 auto *T10 = I32Reg();
4978 auto *T11 = I32Reg();
4979 auto *T12 = I32Reg();
4980 auto *T13 = I32Reg();
4981 auto *T14 = I32Reg();
4982 auto *T15 = I32Reg();
4983 auto *T16 = I32Reg();
4984 auto *T17 = I32Reg();
4985 auto *T18 = I32Reg();
4986 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
4987 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
4988 Src = legalizeUndef(Src);
4989 auto *SrcLoR = legalizeToReg(loOperand(Src));
4990 auto *SrcHiR = legalizeToReg(hiOperand(Src));
4991 _sll(T1, SrcHiR, 8);
4992 _srl(T2, SrcHiR, 24);
4993 _srl(T3, SrcHiR, 8);
4994 _andi(T3, T3, 0xFF00);
4995 _lui(T4, Ctx->getConstantInt32(255));
4996 _or(T5, T3, T2);
4997 _and(T6, T1, T4);
4998 _sll(T7, SrcHiR, 24);
4999 _or(T8, T7, T6);
5000 _srl(T9, SrcLoR, 24);
5001 _srl(T10, SrcLoR, 8);
5002 _andi(T11, T10, 0xFF00);
5003 _or(T12, T8, T5);
5004 _or(T13, T11, T9);
5005 _sll(T14, SrcLoR, 8);
5006 _and(T15, T14, T4);
5007 _sll(T16, SrcLoR, 24);
5008 _or(T17, T16, T15);
5009 _or(T18, T17, T13);
5010 _mov(DestLo, T12);
5011 _mov(DestHi, T18);
5012 return;
5013 }
5014 default:
5015 llvm::report_fatal_error("Control flow should never have reached here.");
5016 }
5017 return;
5018 }
5019 case Intrinsics::Ctpop: {
5020 llvm::report_fatal_error("Ctpop should have been prelowered.");
5021 return;
5022 }
5023 case Intrinsics::Ctlz: {
5024 auto *Src = Instr->getArg(0);
5025 const Type SrcTy = Src->getType();
5026 assert(SrcTy == IceType_i32 || SrcTy == IceType_i64);
5027 switch (SrcTy) {
5028 case IceType_i32: {
5029 auto *T = I32Reg();
5030 auto *SrcR = legalizeToReg(Src);
5031 _clz(T, SrcR);
5032 _mov(Dest, T);
5033 break;
5034 }
5035 case IceType_i64: {
5036 auto *T1 = I32Reg();
5037 auto *T2 = I32Reg();
5038 auto *T3 = I32Reg();
5039 auto *T4 = I32Reg();
5040 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
5041 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
5042 Variable *SrcHiR = legalizeToReg(hiOperand(Src));
5043 Variable *SrcLoR = legalizeToReg(loOperand(Src));
5044 _clz(T1, SrcHiR);
5045 _clz(T2, SrcLoR);
5046 _addiu(T3, T2, 32);
5047 _movn(T3, T1, SrcHiR);
5048 _addiu(T4, getZero(), 0);
5049 _mov(DestHi, T4);
5050 _mov(DestLo, T3);
5051 break;
5052 }
5053 default:
5054 llvm::report_fatal_error("Control flow should never have reached here.");
5055 }
5056 break;
5057 }
5058 case Intrinsics::Cttz: {
5059 auto *Src = Instr->getArg(0);
5060 const Type SrcTy = Src->getType();
5061 assert(SrcTy == IceType_i32 || SrcTy == IceType_i64);
5062 switch (SrcTy) {
5063 case IceType_i32: {
5064 auto *T1 = I32Reg();
5065 auto *T2 = I32Reg();
5066 auto *T3 = I32Reg();
5067 auto *T4 = I32Reg();
5068 auto *T5 = I32Reg();
5069 auto *T6 = I32Reg();
5070 auto *SrcR = legalizeToReg(Src);
5071 _addiu(T1, SrcR, -1);
5072 _not(T2, SrcR);
5073 _and(T3, T2, T1);
5074 _clz(T4, T3);
5075 _addiu(T5, getZero(), 32);
5076 _subu(T6, T5, T4);
5077 _mov(Dest, T6);
5078 break;
5079 }
5080 case IceType_i64: {
5081 auto *THi1 = I32Reg();
5082 auto *THi2 = I32Reg();
5083 auto *THi3 = I32Reg();
5084 auto *THi4 = I32Reg();
5085 auto *THi5 = I32Reg();
5086 auto *THi6 = I32Reg();
5087 auto *TLo1 = I32Reg();
5088 auto *TLo2 = I32Reg();
5089 auto *TLo3 = I32Reg();
5090 auto *TLo4 = I32Reg();
5091 auto *TLo5 = I32Reg();
5092 auto *TLo6 = I32Reg();
5093 auto *TResHi = I32Reg();
5094 auto *DestLo = llvm::cast<Variable>(loOperand(Dest));
5095 auto *DestHi = llvm::cast<Variable>(hiOperand(Dest));
5096 Variable *SrcHiR = legalizeToReg(hiOperand(Src));
5097 Variable *SrcLoR = legalizeToReg(loOperand(Src));
5098 _addiu(THi1, SrcHiR, -1);
5099 _not(THi2, SrcHiR);
5100 _and(THi3, THi2, THi1);
5101 _clz(THi4, THi3);
5102 _addiu(THi5, getZero(), 64);
5103 _subu(THi6, THi5, THi4);
5104 _addiu(TLo1, SrcLoR, -1);
5105 _not(TLo2, SrcLoR);
5106 _and(TLo3, TLo2, TLo1);
5107 _clz(TLo4, TLo3);
5108 _addiu(TLo5, getZero(), 32);
5109 _subu(TLo6, TLo5, TLo4);
5110 _movn(THi6, TLo6, SrcLoR);
5111 _addiu(TResHi, getZero(), 0);
5112 _mov(DestHi, TResHi);
5113 _mov(DestLo, THi6);
5114 break;
5115 }
5116 default:
5117 llvm::report_fatal_error("Control flow should never have reached here.");
5118 }
5119 return;
5120 }
5121 case Intrinsics::Fabs: {
5122 if (isScalarFloatingType(DestTy)) {
5123 Variable *T = makeReg(DestTy);
5124 if (DestTy == IceType_f32) {
5125 _abs_s(T, legalizeToReg(Instr->getArg(0)));
5126 } else {
5127 _abs_d(T, legalizeToReg(Instr->getArg(0)));
5128 }
5129 _mov(Dest, T);
5130 }
5131 return;
5132 }
5133 case Intrinsics::Longjmp: {
5134 llvm::report_fatal_error("longjmp should have been prelowered.");
5135 return;
5136 }
5137 case Intrinsics::Memcpy: {
5138 llvm::report_fatal_error("memcpy should have been prelowered.");
5139 return;
5140 }
5141 case Intrinsics::Memmove: {
5142 llvm::report_fatal_error("memmove should have been prelowered.");
5143 return;
5144 }
5145 case Intrinsics::Memset: {
5146 llvm::report_fatal_error("memset should have been prelowered.");
5147 return;
5148 }
5149 case Intrinsics::NaClReadTP: {
5150 if (SandboxingType != ST_NaCl)
5151 llvm::report_fatal_error("nacl-read-tp should have been prelowered.");
5152 else {
5153 auto *T8 = makeReg(IceType_i32, RegMIPS32::Reg_T8);
5154 Context.insert<InstFakeDef>(T8);
5155 Variable *TP = legalizeToReg(OperandMIPS32Mem::create(
5156 Func, getPointerType(), T8,
5157 llvm::cast<ConstantInteger32>(Ctx->getConstantZero(IceType_i32))));
5158 _mov(Dest, TP);
5159 }
5160 return;
5161 }
5162 case Intrinsics::Setjmp: {
5163 llvm::report_fatal_error("setjmp should have been prelowered.");
5164 return;
5165 }
5166 case Intrinsics::Sqrt: {
5167 if (isScalarFloatingType(DestTy)) {
5168 Variable *T = makeReg(DestTy);
5169 if (DestTy == IceType_f32) {
5170 _sqrt_s(T, legalizeToReg(Instr->getArg(0)));
5171 } else {
5172 _sqrt_d(T, legalizeToReg(Instr->getArg(0)));
5173 }
5174 _mov(Dest, T);
5175 } else {
5176 assert(getFlags().getApplicationBinaryInterface() != ::Ice::ABI_PNaCl);
5177 UnimplementedLoweringError(this, Instr); // Not required for PNaCl
5178 }
5179 return;
5180 }
5181 case Intrinsics::Stacksave: {
5182 Variable *SP = getPhysicalRegister(RegMIPS32::Reg_SP);
5183 _mov(Dest, SP);
5184 return;
5185 }
5186 case Intrinsics::Stackrestore: {
5187 Variable *Val = legalizeToReg(Instr->getArg(0));
5188 Sandboxer(this).reset_sp(Val);
5189 return;
5190 }
5191 case Intrinsics::Trap: {
5192 const uint32_t TrapCodeZero = 0;
5193 _teq(getZero(), getZero(), TrapCodeZero);
5194 return;
5195 }
5196 case Intrinsics::LoadSubVector: {
5197 UnimplementedLoweringError(this, Instr); // Not required for PNaCl
5198 return;
5199 }
5200 case Intrinsics::StoreSubVector: {
5201 UnimplementedLoweringError(this, Instr); // Not required for PNaCl
5202 return;
5203 }
5204 default: // UnknownIntrinsic
5205 Func->setError("Unexpected intrinsic");
5206 return;
5207 }
5208 return;
5209 }
5210
lowerLoad(const InstLoad * Instr)5211 void TargetMIPS32::lowerLoad(const InstLoad *Instr) {
5212 // A Load instruction can be treated the same as an Assign instruction, after
5213 // the source operand is transformed into an OperandMIPS32Mem operand.
5214 Type Ty = Instr->getDest()->getType();
5215 Operand *Src0 = formMemoryOperand(Instr->getSourceAddress(), Ty);
5216 Variable *DestLoad = Instr->getDest();
5217 auto *Assign = InstAssign::create(Func, DestLoad, Src0);
5218 lowerAssign(Assign);
5219 }
5220
5221 namespace {
dumpAddressOpt(const Cfg * Func,const Variable * Base,int32_t Offset,const Inst * Reason)5222 void dumpAddressOpt(const Cfg *Func, const Variable *Base, int32_t Offset,
5223 const Inst *Reason) {
5224 if (!BuildDefs::dump())
5225 return;
5226 if (!Func->isVerbose(IceV_AddrOpt))
5227 return;
5228 OstreamLocker _(Func->getContext());
5229 Ostream &Str = Func->getContext()->getStrDump();
5230 Str << "Instruction: ";
5231 Reason->dumpDecorated(Func);
5232 Str << " results in Base=";
5233 if (Base)
5234 Base->dump(Func);
5235 else
5236 Str << "<null>";
5237 Str << ", Offset=" << Offset << "\n";
5238 }
5239
matchAssign(const VariablesMetadata * VMetadata,Variable ** Var,int32_t * Offset,const Inst ** Reason)5240 bool matchAssign(const VariablesMetadata *VMetadata, Variable **Var,
5241 int32_t *Offset, const Inst **Reason) {
5242 // Var originates from Var=SrcVar ==> set Var:=SrcVar
5243 if (*Var == nullptr)
5244 return false;
5245 const Inst *VarAssign = VMetadata->getSingleDefinition(*Var);
5246 if (!VarAssign)
5247 return false;
5248 assert(!VMetadata->isMultiDef(*Var));
5249 if (!llvm::isa<InstAssign>(VarAssign))
5250 return false;
5251
5252 Operand *SrcOp = VarAssign->getSrc(0);
5253 bool Optimized = false;
5254 if (auto *SrcVar = llvm::dyn_cast<Variable>(SrcOp)) {
5255 if (!VMetadata->isMultiDef(SrcVar) ||
5256 // TODO: ensure SrcVar stays single-BB
5257 false) {
5258 Optimized = true;
5259 *Var = SrcVar;
5260 } else if (auto *Const = llvm::dyn_cast<ConstantInteger32>(SrcOp)) {
5261 int32_t MoreOffset = Const->getValue();
5262 int32_t NewOffset = MoreOffset + *Offset;
5263 if (Utils::WouldOverflowAdd(*Offset, MoreOffset))
5264 return false;
5265 *Var = nullptr;
5266 *Offset += NewOffset;
5267 Optimized = true;
5268 }
5269 }
5270
5271 if (Optimized) {
5272 *Reason = VarAssign;
5273 }
5274
5275 return Optimized;
5276 }
5277
isAddOrSub(const Inst * Instr,InstArithmetic::OpKind * Kind)5278 bool isAddOrSub(const Inst *Instr, InstArithmetic::OpKind *Kind) {
5279 if (const auto *Arith = llvm::dyn_cast<InstArithmetic>(Instr)) {
5280 switch (Arith->getOp()) {
5281 default:
5282 return false;
5283 case InstArithmetic::Add:
5284 case InstArithmetic::Sub:
5285 *Kind = Arith->getOp();
5286 return true;
5287 }
5288 }
5289 return false;
5290 }
5291
matchOffsetBase(const VariablesMetadata * VMetadata,Variable ** Base,int32_t * Offset,const Inst ** Reason)5292 bool matchOffsetBase(const VariablesMetadata *VMetadata, Variable **Base,
5293 int32_t *Offset, const Inst **Reason) {
5294 // Base is Base=Var+Const || Base is Base=Const+Var ==>
5295 // set Base=Var, Offset+=Const
5296 // Base is Base=Var-Const ==>
5297 // set Base=Var, Offset-=Const
5298 if (*Base == nullptr)
5299 return false;
5300 const Inst *BaseInst = VMetadata->getSingleDefinition(*Base);
5301 if (BaseInst == nullptr) {
5302 return false;
5303 }
5304 assert(!VMetadata->isMultiDef(*Base));
5305
5306 auto *ArithInst = llvm::dyn_cast<const InstArithmetic>(BaseInst);
5307 if (ArithInst == nullptr)
5308 return false;
5309 InstArithmetic::OpKind Kind;
5310 if (!isAddOrSub(ArithInst, &Kind))
5311 return false;
5312 bool IsAdd = Kind == InstArithmetic::Add;
5313 Operand *Src0 = ArithInst->getSrc(0);
5314 Operand *Src1 = ArithInst->getSrc(1);
5315 auto *Var0 = llvm::dyn_cast<Variable>(Src0);
5316 auto *Var1 = llvm::dyn_cast<Variable>(Src1);
5317 auto *Const0 = llvm::dyn_cast<ConstantInteger32>(Src0);
5318 auto *Const1 = llvm::dyn_cast<ConstantInteger32>(Src1);
5319 Variable *NewBase = nullptr;
5320 int32_t NewOffset = *Offset;
5321
5322 if (Var0 == nullptr && Const0 == nullptr) {
5323 assert(llvm::isa<ConstantRelocatable>(Src0));
5324 return false;
5325 }
5326
5327 if (Var1 == nullptr && Const1 == nullptr) {
5328 assert(llvm::isa<ConstantRelocatable>(Src1));
5329 return false;
5330 }
5331
5332 if (Var0 && Var1)
5333 // TODO(jpp): merge base/index splitting into here.
5334 return false;
5335 if (!IsAdd && Var1)
5336 return false;
5337 if (Var0)
5338 NewBase = Var0;
5339 else if (Var1)
5340 NewBase = Var1;
5341 // Compute the updated constant offset.
5342 if (Const0) {
5343 int32_t MoreOffset = IsAdd ? Const0->getValue() : -Const0->getValue();
5344 if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5345 return false;
5346 NewOffset += MoreOffset;
5347 }
5348 if (Const1) {
5349 int32_t MoreOffset = IsAdd ? Const1->getValue() : -Const1->getValue();
5350 if (Utils::WouldOverflowAdd(NewOffset, MoreOffset))
5351 return false;
5352 NewOffset += MoreOffset;
5353 }
5354
5355 // Update the computed address parameters once we are sure optimization
5356 // is valid.
5357 *Base = NewBase;
5358 *Offset = NewOffset;
5359 *Reason = BaseInst;
5360 return true;
5361 }
5362 } // end of anonymous namespace
5363
formAddressingMode(Type Ty,Cfg * Func,const Inst * LdSt,Operand * Base)5364 OperandMIPS32Mem *TargetMIPS32::formAddressingMode(Type Ty, Cfg *Func,
5365 const Inst *LdSt,
5366 Operand *Base) {
5367 assert(Base != nullptr);
5368 int32_t OffsetImm = 0;
5369
5370 Func->resetCurrentNode();
5371 if (Func->isVerbose(IceV_AddrOpt)) {
5372 OstreamLocker _(Func->getContext());
5373 Ostream &Str = Func->getContext()->getStrDump();
5374 Str << "\nAddress mode formation:\t";
5375 LdSt->dumpDecorated(Func);
5376 }
5377
5378 if (isVectorType(Ty)) {
5379 return nullptr;
5380 }
5381
5382 auto *BaseVar = llvm::dyn_cast<Variable>(Base);
5383 if (BaseVar == nullptr)
5384 return nullptr;
5385
5386 const VariablesMetadata *VMetadata = Func->getVMetadata();
5387 const Inst *Reason = nullptr;
5388
5389 do {
5390 if (Reason != nullptr) {
5391 dumpAddressOpt(Func, BaseVar, OffsetImm, Reason);
5392 Reason = nullptr;
5393 }
5394
5395 if (matchAssign(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5396 continue;
5397 }
5398
5399 if (matchOffsetBase(VMetadata, &BaseVar, &OffsetImm, &Reason)) {
5400 continue;
5401 }
5402 } while (Reason);
5403
5404 if (BaseVar == nullptr) {
5405 // We need base register rather than just OffsetImm. Move the OffsetImm to
5406 // BaseVar and form 0(BaseVar) addressing.
5407 const Type PointerType = getPointerType();
5408 BaseVar = makeReg(PointerType);
5409 Context.insert<InstAssign>(BaseVar, Ctx->getConstantInt32(OffsetImm));
5410 OffsetImm = 0;
5411 } else if (OffsetImm != 0) {
5412 // If the OffsetImm is more than signed 16-bit value then add it in the
5413 // BaseVar and form 0(BaseVar) addressing.
5414 const int32_t PositiveOffset = OffsetImm > 0 ? OffsetImm : -OffsetImm;
5415 const InstArithmetic::OpKind Op =
5416 OffsetImm > 0 ? InstArithmetic::Add : InstArithmetic::Sub;
5417 constexpr bool ZeroExt = false;
5418 if (!OperandMIPS32Mem::canHoldOffset(Ty, ZeroExt, OffsetImm)) {
5419 const Type PointerType = getPointerType();
5420 Variable *T = makeReg(PointerType);
5421 Context.insert<InstArithmetic>(Op, T, BaseVar,
5422 Ctx->getConstantInt32(PositiveOffset));
5423 BaseVar = T;
5424 OffsetImm = 0;
5425 }
5426 }
5427
5428 assert(BaseVar != nullptr);
5429 assert(OffsetImm < 0 ? (-OffsetImm & 0x0000ffff) == -OffsetImm
5430 : (OffsetImm & 0x0000ffff) == OffsetImm);
5431
5432 return OperandMIPS32Mem::create(
5433 Func, Ty, BaseVar,
5434 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(OffsetImm)));
5435 }
5436
doAddressOptLoad()5437 void TargetMIPS32::doAddressOptLoad() {
5438 Inst *Instr = iteratorToInst(Context.getCur());
5439 assert(llvm::isa<InstLoad>(Instr));
5440 Variable *Dest = Instr->getDest();
5441 Operand *Addr = Instr->getSrc(0);
5442 if (OperandMIPS32Mem *Mem =
5443 formAddressingMode(Dest->getType(), Func, Instr, Addr)) {
5444 Instr->setDeleted();
5445 Context.insert<InstLoad>(Dest, Mem);
5446 }
5447 }
5448
randomlyInsertNop(float Probability,RandomNumberGenerator & RNG)5449 void TargetMIPS32::randomlyInsertNop(float Probability,
5450 RandomNumberGenerator &RNG) {
5451 RandomNumberGeneratorWrapper RNGW(RNG);
5452 if (RNGW.getTrueWithProbability(Probability)) {
5453 _nop();
5454 }
5455 }
5456
lowerPhi(const InstPhi *)5457 void TargetMIPS32::lowerPhi(const InstPhi * /*Instr*/) {
5458 Func->setError("Phi found in regular instruction list");
5459 }
5460
lowerRet(const InstRet * Instr)5461 void TargetMIPS32::lowerRet(const InstRet *Instr) {
5462 Variable *Reg = nullptr;
5463 if (Instr->hasRetValue()) {
5464 Operand *Src0 = Instr->getRetValue();
5465 switch (Src0->getType()) {
5466 case IceType_f32: {
5467 Operand *Src0F = legalizeToReg(Src0);
5468 Reg = makeReg(Src0F->getType(), RegMIPS32::Reg_F0);
5469 _mov(Reg, Src0F);
5470 break;
5471 }
5472 case IceType_f64: {
5473 Operand *Src0F = legalizeToReg(Src0);
5474 Reg = makeReg(Src0F->getType(), RegMIPS32::Reg_F0F1);
5475 _mov(Reg, Src0F);
5476 break;
5477 }
5478 case IceType_i1:
5479 case IceType_i8:
5480 case IceType_i16:
5481 case IceType_i32: {
5482 Operand *Src0F = legalizeToReg(Src0);
5483 Reg = makeReg(Src0F->getType(), RegMIPS32::Reg_V0);
5484 _mov(Reg, Src0F);
5485 break;
5486 }
5487 case IceType_i64: {
5488 Src0 = legalizeUndef(Src0);
5489 Variable *R0 = legalizeToReg(loOperand(Src0), RegMIPS32::Reg_V0);
5490 Variable *R1 = legalizeToReg(hiOperand(Src0), RegMIPS32::Reg_V1);
5491 Reg = R0;
5492 Context.insert<InstFakeUse>(R1);
5493 break;
5494 }
5495 case IceType_v4i1:
5496 case IceType_v8i1:
5497 case IceType_v16i1:
5498 case IceType_v16i8:
5499 case IceType_v8i16:
5500 case IceType_v4i32: {
5501 auto *SrcVec = llvm::dyn_cast<VariableVecOn32>(legalizeUndef(Src0));
5502 Variable *V0 =
5503 legalizeToReg(SrcVec->getContainers()[0], RegMIPS32::Reg_V0);
5504 Variable *V1 =
5505 legalizeToReg(SrcVec->getContainers()[1], RegMIPS32::Reg_V1);
5506 Variable *A0 =
5507 legalizeToReg(SrcVec->getContainers()[2], RegMIPS32::Reg_A0);
5508 Variable *A1 =
5509 legalizeToReg(SrcVec->getContainers()[3], RegMIPS32::Reg_A1);
5510 Reg = V0;
5511 Context.insert<InstFakeUse>(V1);
5512 Context.insert<InstFakeUse>(A0);
5513 Context.insert<InstFakeUse>(A1);
5514 break;
5515 }
5516 case IceType_v4f32: {
5517 auto *SrcVec = llvm::dyn_cast<VariableVecOn32>(legalizeUndef(Src0));
5518 Reg = getImplicitRet();
5519 auto *RegT = legalizeToReg(Reg);
5520 // Return the vector through buffer in implicit argument a0
5521 for (SizeT i = 0; i < SrcVec->ContainersPerVector; ++i) {
5522 OperandMIPS32Mem *Mem = OperandMIPS32Mem::create(
5523 Func, IceType_f32, RegT,
5524 llvm::cast<ConstantInteger32>(Ctx->getConstantInt32(i * 4)));
5525 Variable *Var = legalizeToReg(SrcVec->getContainers()[i]);
5526 _sw(Var, Mem);
5527 }
5528 Variable *V0 = makeReg(IceType_i32, RegMIPS32::Reg_V0);
5529 _mov(V0, Reg); // move v0,a0
5530 Context.insert<InstFakeUse>(Reg);
5531 Context.insert<InstFakeUse>(V0);
5532 break;
5533 }
5534 default:
5535 llvm::report_fatal_error("Ret: Invalid type.");
5536 break;
5537 }
5538 }
5539 _ret(getPhysicalRegister(RegMIPS32::Reg_RA), Reg);
5540 }
5541
lowerSelect(const InstSelect * Instr)5542 void TargetMIPS32::lowerSelect(const InstSelect *Instr) {
5543 Variable *Dest = Instr->getDest();
5544 const Type DestTy = Dest->getType();
5545
5546 if (isVectorType(DestTy)) {
5547 llvm::report_fatal_error("Select: Destination type is vector");
5548 return;
5549 }
5550
5551 Variable *DestR = nullptr;
5552 Variable *DestHiR = nullptr;
5553 Variable *SrcTR = nullptr;
5554 Variable *SrcTHiR = nullptr;
5555 Variable *SrcFR = nullptr;
5556 Variable *SrcFHiR = nullptr;
5557
5558 if (DestTy == IceType_i64) {
5559 DestR = llvm::cast<Variable>(loOperand(Dest));
5560 DestHiR = llvm::cast<Variable>(hiOperand(Dest));
5561 SrcTR = legalizeToReg(loOperand(legalizeUndef(Instr->getTrueOperand())));
5562 SrcTHiR = legalizeToReg(hiOperand(legalizeUndef(Instr->getTrueOperand())));
5563 SrcFR = legalizeToReg(loOperand(legalizeUndef(Instr->getFalseOperand())));
5564 SrcFHiR = legalizeToReg(hiOperand(legalizeUndef(Instr->getFalseOperand())));
5565 } else {
5566 SrcTR = legalizeToReg(legalizeUndef(Instr->getTrueOperand()));
5567 SrcFR = legalizeToReg(legalizeUndef(Instr->getFalseOperand()));
5568 }
5569
5570 Variable *ConditionR = legalizeToReg(Instr->getCondition());
5571
5572 assert(Instr->getCondition()->getType() == IceType_i1);
5573
5574 switch (DestTy) {
5575 case IceType_i1:
5576 case IceType_i8:
5577 case IceType_i16:
5578 case IceType_i32:
5579 _movn(SrcFR, SrcTR, ConditionR);
5580 _mov(Dest, SrcFR);
5581 break;
5582 case IceType_i64:
5583 _movn(SrcFR, SrcTR, ConditionR);
5584 _movn(SrcFHiR, SrcTHiR, ConditionR);
5585 _mov(DestR, SrcFR);
5586 _mov(DestHiR, SrcFHiR);
5587 break;
5588 case IceType_f32:
5589 _movn_s(SrcFR, SrcTR, ConditionR);
5590 _mov(Dest, SrcFR);
5591 break;
5592 case IceType_f64:
5593 _movn_d(SrcFR, SrcTR, ConditionR);
5594 _mov(Dest, SrcFR);
5595 break;
5596 default:
5597 llvm::report_fatal_error("Select: Invalid type.");
5598 }
5599 }
5600
lowerShuffleVector(const InstShuffleVector * Instr)5601 void TargetMIPS32::lowerShuffleVector(const InstShuffleVector *Instr) {
5602 UnimplementedLoweringError(this, Instr);
5603 }
5604
lowerStore(const InstStore * Instr)5605 void TargetMIPS32::lowerStore(const InstStore *Instr) {
5606 Operand *Value = Instr->getData();
5607 Operand *Addr = Instr->getAddr();
5608 OperandMIPS32Mem *NewAddr = formMemoryOperand(Addr, Value->getType());
5609 Type Ty = NewAddr->getType();
5610
5611 if (Ty == IceType_i64) {
5612 Value = legalizeUndef(Value);
5613 Variable *ValueHi = legalizeToReg(hiOperand(Value));
5614 Variable *ValueLo = legalizeToReg(loOperand(Value));
5615 _sw(ValueHi, llvm::cast<OperandMIPS32Mem>(hiOperand(NewAddr)));
5616 _sw(ValueLo, llvm::cast<OperandMIPS32Mem>(loOperand(NewAddr)));
5617 } else if (isVectorType(Value->getType())) {
5618 auto *DataVec = llvm::dyn_cast<VariableVecOn32>(Value);
5619 for (SizeT i = 0; i < DataVec->ContainersPerVector; ++i) {
5620 auto *DCont = legalizeToReg(DataVec->getContainers()[i]);
5621 auto *MCont = llvm::cast<OperandMIPS32Mem>(
5622 getOperandAtIndex(NewAddr, IceType_i32, i));
5623 _sw(DCont, MCont);
5624 }
5625 } else {
5626 Variable *ValueR = legalizeToReg(Value);
5627 _sw(ValueR, NewAddr);
5628 }
5629 }
5630
doAddressOptStore()5631 void TargetMIPS32::doAddressOptStore() {
5632 Inst *Instr = iteratorToInst(Context.getCur());
5633 assert(llvm::isa<InstStore>(Instr));
5634 Operand *Src = Instr->getSrc(0);
5635 Operand *Addr = Instr->getSrc(1);
5636 if (OperandMIPS32Mem *Mem =
5637 formAddressingMode(Src->getType(), Func, Instr, Addr)) {
5638 Instr->setDeleted();
5639 Context.insert<InstStore>(Src, Mem);
5640 }
5641 }
5642
lowerSwitch(const InstSwitch * Instr)5643 void TargetMIPS32::lowerSwitch(const InstSwitch *Instr) {
5644 Operand *Src = Instr->getComparison();
5645 SizeT NumCases = Instr->getNumCases();
5646 if (Src->getType() == IceType_i64) {
5647 Src = legalizeUndef(Src);
5648 Variable *Src0Lo = legalizeToReg(loOperand(Src));
5649 Variable *Src0Hi = legalizeToReg(hiOperand(Src));
5650 for (SizeT I = 0; I < NumCases; ++I) {
5651 Operand *ValueLo = Ctx->getConstantInt32(Instr->getValue(I));
5652 Operand *ValueHi = Ctx->getConstantInt32(Instr->getValue(I) >> 32);
5653 CfgNode *TargetTrue = Instr->getLabel(I);
5654 constexpr CfgNode *NoTarget = nullptr;
5655 ValueHi = legalizeToReg(ValueHi);
5656 InstMIPS32Label *IntraLabel = InstMIPS32Label::create(Func, this);
5657 _br(NoTarget, NoTarget, Src0Hi, ValueHi, IntraLabel,
5658 CondMIPS32::Cond::NE);
5659 ValueLo = legalizeToReg(ValueLo);
5660 _br(NoTarget, TargetTrue, Src0Lo, ValueLo, CondMIPS32::Cond::EQ);
5661 Context.insert(IntraLabel);
5662 }
5663 _br(Instr->getLabelDefault());
5664 return;
5665 }
5666 Variable *SrcVar = legalizeToReg(Src);
5667 assert(SrcVar->mustHaveReg());
5668 for (SizeT I = 0; I < NumCases; ++I) {
5669 Operand *Value = Ctx->getConstantInt32(Instr->getValue(I));
5670 CfgNode *TargetTrue = Instr->getLabel(I);
5671 constexpr CfgNode *NoTargetFalse = nullptr;
5672 Value = legalizeToReg(Value);
5673 _br(NoTargetFalse, TargetTrue, SrcVar, Value, CondMIPS32::Cond::EQ);
5674 }
5675 _br(Instr->getLabelDefault());
5676 }
5677
lowerBreakpoint(const InstBreakpoint * Instr)5678 void TargetMIPS32::lowerBreakpoint(const InstBreakpoint *Instr) {
5679 UnimplementedLoweringError(this, Instr);
5680 }
5681
lowerUnreachable(const InstUnreachable *)5682 void TargetMIPS32::lowerUnreachable(const InstUnreachable *) {
5683 const uint32_t TrapCodeZero = 0;
5684 _teq(getZero(), getZero(), TrapCodeZero);
5685 }
5686
lowerOther(const Inst * Instr)5687 void TargetMIPS32::lowerOther(const Inst *Instr) {
5688 if (llvm::isa<InstMIPS32Sync>(Instr)) {
5689 _sync();
5690 } else {
5691 TargetLowering::lowerOther(Instr);
5692 }
5693 }
5694
5695 // Turn an i64 Phi instruction into a pair of i32 Phi instructions, to preserve
5696 // integrity of liveness analysis. Undef values are also turned into zeroes,
5697 // since loOperand() and hiOperand() don't expect Undef input.
prelowerPhis()5698 void TargetMIPS32::prelowerPhis() {
5699 PhiLowering::prelowerPhis32Bit<TargetMIPS32>(this, Context.getNode(), Func);
5700 }
5701
postLower()5702 void TargetMIPS32::postLower() {
5703 if (Func->getOptLevel() == Opt_m1)
5704 return;
5705 markRedefinitions();
5706 Context.availabilityUpdate();
5707 }
5708
makeRandomRegisterPermutation(llvm::SmallVectorImpl<RegNumT> & Permutation,const SmallBitVector & ExcludeRegisters,uint64_t Salt) const5709 void TargetMIPS32::makeRandomRegisterPermutation(
5710 llvm::SmallVectorImpl<RegNumT> &Permutation,
5711 const SmallBitVector &ExcludeRegisters, uint64_t Salt) const {
5712 (void)Permutation;
5713 (void)ExcludeRegisters;
5714 (void)Salt;
5715 UnimplementedError(getFlags());
5716 }
5717
5718 /* TODO(jvoung): avoid duplicate symbols with multiple targets.
5719 void ConstantUndef::emitWithoutDollar(GlobalContext *) const {
5720 llvm_unreachable("Not expecting to emitWithoutDollar undef");
5721 }
5722
5723 void ConstantUndef::emit(GlobalContext *) const {
5724 llvm_unreachable("undef value encountered by emitter.");
5725 }
5726 */
5727
TargetDataMIPS32(GlobalContext * Ctx)5728 TargetDataMIPS32::TargetDataMIPS32(GlobalContext *Ctx)
5729 : TargetDataLowering(Ctx) {}
5730
5731 // Generate .MIPS.abiflags section. This section contains a versioned data
5732 // structure with essential information required for loader to determine the
5733 // requirements of the application.
emitTargetRODataSections()5734 void TargetDataMIPS32::emitTargetRODataSections() {
5735 struct MipsABIFlagsSection Flags;
5736 ELFObjectWriter *Writer = Ctx->getObjectWriter();
5737 const std::string Name = ".MIPS.abiflags";
5738 const llvm::ELF::Elf64_Word ShType = llvm::ELF::SHT_MIPS_ABIFLAGS;
5739 const llvm::ELF::Elf64_Xword ShFlags = llvm::ELF::SHF_ALLOC;
5740 const llvm::ELF::Elf64_Xword ShAddralign = 8;
5741 const llvm::ELF::Elf64_Xword ShEntsize = sizeof(Flags);
5742 Writer->writeTargetRODataSection(
5743 Name, ShType, ShFlags, ShAddralign, ShEntsize,
5744 llvm::StringRef(reinterpret_cast<const char *>(&Flags), sizeof(Flags)));
5745 }
5746
lowerGlobals(const VariableDeclarationList & Vars,const std::string & SectionSuffix)5747 void TargetDataMIPS32::lowerGlobals(const VariableDeclarationList &Vars,
5748 const std::string &SectionSuffix) {
5749 const bool IsPIC = getFlags().getUseNonsfi();
5750 switch (getFlags().getOutFileType()) {
5751 case FT_Elf: {
5752 ELFObjectWriter *Writer = Ctx->getObjectWriter();
5753 Writer->writeDataSection(Vars, llvm::ELF::R_MIPS_32, SectionSuffix, IsPIC);
5754 } break;
5755 case FT_Asm:
5756 case FT_Iasm: {
5757 OstreamLocker L(Ctx);
5758 for (const VariableDeclaration *Var : Vars) {
5759 if (getFlags().matchTranslateOnly(Var->getName(), 0)) {
5760 emitGlobal(*Var, SectionSuffix);
5761 }
5762 }
5763 } break;
5764 }
5765 }
5766
5767 namespace {
5768 template <typename T> struct ConstantPoolEmitterTraits;
5769
5770 static_assert(sizeof(uint64_t) == 8,
5771 "uint64_t is supposed to be 8 bytes wide.");
5772
5773 // TODO(jaydeep.patil): implement the following when implementing constant
5774 // randomization:
5775 // * template <> struct ConstantPoolEmitterTraits<uint8_t>
5776 // * template <> struct ConstantPoolEmitterTraits<uint16_t>
5777 // * template <> struct ConstantPoolEmitterTraits<uint32_t>
5778 template <> struct ConstantPoolEmitterTraits<float> {
5779 using ConstantType = ConstantFloat;
5780 static constexpr Type IceType = IceType_f32;
5781 // AsmTag and TypeName can't be constexpr because llvm::StringRef is unhappy
5782 // about them being constexpr.
5783 static const char AsmTag[];
5784 static const char TypeName[];
bitcastToUint64Ice::MIPS32::__anonf63758080611::ConstantPoolEmitterTraits5785 static uint64_t bitcastToUint64(float Value) {
5786 static_assert(sizeof(Value) == sizeof(uint32_t),
5787 "Float should be 4 bytes.");
5788 const uint32_t IntValue = Utils::bitCopy<uint32_t>(Value);
5789 return static_cast<uint64_t>(IntValue);
5790 }
5791 };
5792 const char ConstantPoolEmitterTraits<float>::AsmTag[] = ".word";
5793 const char ConstantPoolEmitterTraits<float>::TypeName[] = "f32";
5794
5795 template <> struct ConstantPoolEmitterTraits<double> {
5796 using ConstantType = ConstantDouble;
5797 static constexpr Type IceType = IceType_f64;
5798 static const char AsmTag[];
5799 static const char TypeName[];
bitcastToUint64Ice::MIPS32::__anonf63758080611::ConstantPoolEmitterTraits5800 static uint64_t bitcastToUint64(double Value) {
5801 static_assert(sizeof(double) == sizeof(uint64_t),
5802 "Double should be 8 bytes.");
5803 return Utils::bitCopy<uint64_t>(Value);
5804 }
5805 };
5806 const char ConstantPoolEmitterTraits<double>::AsmTag[] = ".quad";
5807 const char ConstantPoolEmitterTraits<double>::TypeName[] = "f64";
5808
5809 template <typename T>
emitConstant(Ostream & Str,const typename ConstantPoolEmitterTraits<T>::ConstantType * Const)5810 void emitConstant(
5811 Ostream &Str,
5812 const typename ConstantPoolEmitterTraits<T>::ConstantType *Const) {
5813 if (!BuildDefs::dump())
5814 return;
5815 using Traits = ConstantPoolEmitterTraits<T>;
5816 Str << Const->getLabelName();
5817 T Value = Const->getValue();
5818 Str << ":\n\t" << Traits::AsmTag << "\t0x";
5819 Str.write_hex(Traits::bitcastToUint64(Value));
5820 Str << "\t/* " << Traits::TypeName << " " << Value << " */\n";
5821 }
5822
emitConstantPool(GlobalContext * Ctx)5823 template <typename T> void emitConstantPool(GlobalContext *Ctx) {
5824 if (!BuildDefs::dump())
5825 return;
5826 using Traits = ConstantPoolEmitterTraits<T>;
5827 static constexpr size_t MinimumAlignment = 4;
5828 SizeT Align = std::max(MinimumAlignment, typeAlignInBytes(Traits::IceType));
5829 assert((Align % 4) == 0 && "Constants should be aligned");
5830 Ostream &Str = Ctx->getStrEmit();
5831 ConstantList Pool = Ctx->getConstantPool(Traits::IceType);
5832 Str << "\t.section\t.rodata.cst" << Align << ",\"aM\",%progbits," << Align
5833 << "\n"
5834 << "\t.align\t" << (Align == 4 ? 2 : 3) << "\n";
5835 if (getFlags().getReorderPooledConstants()) {
5836 // TODO(jaydeep.patil): add constant pooling.
5837 UnimplementedError(getFlags());
5838 }
5839 for (Constant *C : Pool) {
5840 if (!C->getShouldBePooled()) {
5841 continue;
5842 }
5843 emitConstant<T>(Str, llvm::dyn_cast<typename Traits::ConstantType>(C));
5844 }
5845 }
5846 } // end of anonymous namespace
5847
lowerConstants()5848 void TargetDataMIPS32::lowerConstants() {
5849 if (getFlags().getDisableTranslation())
5850 return;
5851 switch (getFlags().getOutFileType()) {
5852 case FT_Elf: {
5853 ELFObjectWriter *Writer = Ctx->getObjectWriter();
5854 Writer->writeConstantPool<ConstantFloat>(IceType_f32);
5855 Writer->writeConstantPool<ConstantDouble>(IceType_f64);
5856 } break;
5857 case FT_Asm:
5858 case FT_Iasm: {
5859 OstreamLocker _(Ctx);
5860 emitConstantPool<float>(Ctx);
5861 emitConstantPool<double>(Ctx);
5862 break;
5863 }
5864 }
5865 }
5866
lowerJumpTables()5867 void TargetDataMIPS32::lowerJumpTables() {
5868 if (getFlags().getDisableTranslation())
5869 return;
5870 }
5871
5872 // Helper for legalize() to emit the right code to lower an operand to a
5873 // register of the appropriate type.
copyToReg(Operand * Src,RegNumT RegNum)5874 Variable *TargetMIPS32::copyToReg(Operand *Src, RegNumT RegNum) {
5875 Type Ty = Src->getType();
5876 Variable *Reg = makeReg(Ty, RegNum);
5877 if (isVectorType(Ty)) {
5878 llvm::report_fatal_error("Invalid copy from vector type.");
5879 } else {
5880 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(Src)) {
5881 _lw(Reg, Mem);
5882 } else {
5883 _mov(Reg, Src);
5884 }
5885 }
5886 return Reg;
5887 }
5888
legalize(Operand * From,LegalMask Allowed,RegNumT RegNum)5889 Operand *TargetMIPS32::legalize(Operand *From, LegalMask Allowed,
5890 RegNumT RegNum) {
5891 Type Ty = From->getType();
5892 // Assert that a physical register is allowed. To date, all calls
5893 // to legalize() allow a physical register. Legal_Flex converts
5894 // registers to the right type OperandMIPS32FlexReg as needed.
5895 assert(Allowed & Legal_Reg);
5896
5897 if (RegNum.hasNoValue()) {
5898 if (Variable *Subst = getContext().availabilityGet(From)) {
5899 // At this point we know there is a potential substitution available.
5900 if (!Subst->isRematerializable() && Subst->mustHaveReg() &&
5901 !Subst->hasReg()) {
5902 // At this point we know the substitution will have a register.
5903 if (From->getType() == Subst->getType()) {
5904 // At this point we know the substitution's register is compatible.
5905 return Subst;
5906 }
5907 }
5908 }
5909 }
5910
5911 // Go through the various types of operands:
5912 // OperandMIPS32Mem, Constant, and Variable.
5913 // Given the above assertion, if type of operand is not legal
5914 // (e.g., OperandMIPS32Mem and !Legal_Mem), we can always copy
5915 // to a register.
5916 if (auto *Mem = llvm::dyn_cast<OperandMIPS32Mem>(From)) {
5917 // Base must be in a physical register.
5918 Variable *Base = Mem->getBase();
5919 ConstantInteger32 *Offset = llvm::cast<ConstantInteger32>(Mem->getOffset());
5920 Variable *RegBase = nullptr;
5921 assert(Base);
5922
5923 RegBase = llvm::cast<Variable>(
5924 legalize(Base, Legal_Reg | Legal_Rematerializable));
5925
5926 if (Offset != nullptr && Offset->getValue() != 0) {
5927 static constexpr bool ZeroExt = false;
5928 if (!OperandMIPS32Mem::canHoldOffset(Ty, ZeroExt, Offset->getValue())) {
5929 llvm::report_fatal_error("Invalid memory offset.");
5930 }
5931 }
5932
5933 // Create a new operand if there was a change.
5934 if (Base != RegBase) {
5935 Mem = OperandMIPS32Mem::create(Func, Ty, RegBase, Offset,
5936 Mem->getAddrMode());
5937 }
5938
5939 if (Allowed & Legal_Mem) {
5940 From = Mem;
5941 } else {
5942 Variable *Reg = makeReg(Ty, RegNum);
5943 _lw(Reg, Mem);
5944 From = Reg;
5945 }
5946 return From;
5947 }
5948
5949 if (llvm::isa<Constant>(From)) {
5950 if (llvm::isa<ConstantUndef>(From)) {
5951 From = legalizeUndef(From, RegNum);
5952 if (isVectorType(Ty))
5953 return From;
5954 }
5955 if (auto *C = llvm::dyn_cast<ConstantRelocatable>(From)) {
5956 Variable *Reg = makeReg(Ty, RegNum);
5957 Variable *TReg = makeReg(Ty, RegNum);
5958 _lui(TReg, C, RO_Hi);
5959 _addiu(Reg, TReg, C, RO_Lo);
5960 return Reg;
5961 } else if (auto *C32 = llvm::dyn_cast<ConstantInteger32>(From)) {
5962 const uint32_t Value = C32->getValue();
5963 // Use addiu if the immediate is a 16bit value. Otherwise load it
5964 // using a lui-ori instructions.
5965 Variable *Reg = makeReg(Ty, RegNum);
5966 if (isInt<16>(int32_t(Value))) {
5967 Variable *Zero = makeReg(Ty, RegMIPS32::Reg_ZERO);
5968 Context.insert<InstFakeDef>(Zero);
5969 _addiu(Reg, Zero, Value);
5970 } else {
5971 uint32_t UpperBits = (Value >> 16) & 0xFFFF;
5972 uint32_t LowerBits = Value & 0xFFFF;
5973 if (LowerBits) {
5974 Variable *TReg = makeReg(Ty, RegNum);
5975 _lui(TReg, Ctx->getConstantInt32(UpperBits));
5976 _ori(Reg, TReg, LowerBits);
5977 } else {
5978 _lui(Reg, Ctx->getConstantInt32(UpperBits));
5979 }
5980 }
5981 return Reg;
5982 } else if (isScalarFloatingType(Ty)) {
5983 auto *CFrom = llvm::cast<Constant>(From);
5984 Variable *TReg = makeReg(Ty);
5985 if (!CFrom->getShouldBePooled()) {
5986 // Float/Double constant 0 is not pooled.
5987 Context.insert<InstFakeDef>(TReg);
5988 _mov(TReg, getZero());
5989 } else {
5990 // Load floats/doubles from literal pool.
5991 Constant *Offset = Ctx->getConstantSym(0, CFrom->getLabelName());
5992 Variable *TReg1 = makeReg(getPointerType());
5993 _lui(TReg1, Offset, RO_Hi);
5994 OperandMIPS32Mem *Addr =
5995 OperandMIPS32Mem::create(Func, Ty, TReg1, Offset);
5996 if (Ty == IceType_f32)
5997 Sandboxer(this).lwc1(TReg, Addr, RO_Lo);
5998 else
5999 Sandboxer(this).ldc1(TReg, Addr, RO_Lo);
6000 }
6001 return copyToReg(TReg, RegNum);
6002 }
6003 }
6004
6005 if (auto *Var = llvm::dyn_cast<Variable>(From)) {
6006 if (Var->isRematerializable()) {
6007 if (Allowed & Legal_Rematerializable) {
6008 return From;
6009 }
6010
6011 Variable *T = makeReg(Var->getType(), RegNum);
6012 _mov(T, Var);
6013 return T;
6014 }
6015 // Check if the variable is guaranteed a physical register. This
6016 // can happen either when the variable is pre-colored or when it is
6017 // assigned infinite weight.
6018 bool MustHaveRegister = (Var->hasReg() || Var->mustHaveReg());
6019 // We need a new physical register for the operand if:
6020 // Mem is not allowed and Var isn't guaranteed a physical
6021 // register, or
6022 // RegNum is required and Var->getRegNum() doesn't match.
6023 if ((!(Allowed & Legal_Mem) && !MustHaveRegister) ||
6024 (RegNum.hasValue() && RegNum != Var->getRegNum())) {
6025 From = copyToReg(From, RegNum);
6026 }
6027 return From;
6028 }
6029 return From;
6030 }
6031
6032 namespace BoolFolding {
6033 // TODO(sagar.thakur): Add remaining instruction kinds to shouldTrackProducer()
6034 // and isValidConsumer()
shouldTrackProducer(const Inst & Instr)6035 bool shouldTrackProducer(const Inst &Instr) {
6036 return Instr.getKind() == Inst::Icmp;
6037 }
6038
isValidConsumer(const Inst & Instr)6039 bool isValidConsumer(const Inst &Instr) { return Instr.getKind() == Inst::Br; }
6040 } // end of namespace BoolFolding
6041
recordProducers(CfgNode * Node)6042 void TargetMIPS32::ComputationTracker::recordProducers(CfgNode *Node) {
6043 for (Inst &Instr : Node->getInsts()) {
6044 if (Instr.isDeleted())
6045 continue;
6046 // Check whether Instr is a valid producer.
6047 Variable *Dest = Instr.getDest();
6048 if (Dest // only consider instructions with an actual dest var; and
6049 && Dest->getType() == IceType_i1 // only bool-type dest vars; and
6050 && BoolFolding::shouldTrackProducer(Instr)) { // white-listed instr.
6051 KnownComputations.emplace(Dest->getIndex(),
6052 ComputationEntry(&Instr, IceType_i1));
6053 }
6054 // Check each src variable against the map.
6055 FOREACH_VAR_IN_INST(Var, Instr) {
6056 SizeT VarNum = Var->getIndex();
6057 auto ComputationIter = KnownComputations.find(VarNum);
6058 if (ComputationIter == KnownComputations.end()) {
6059 continue;
6060 }
6061
6062 ++ComputationIter->second.NumUses;
6063 switch (ComputationIter->second.ComputationType) {
6064 default:
6065 KnownComputations.erase(VarNum);
6066 continue;
6067 case IceType_i1:
6068 if (!BoolFolding::isValidConsumer(Instr)) {
6069 KnownComputations.erase(VarNum);
6070 continue;
6071 }
6072 break;
6073 }
6074
6075 if (Instr.isLastUse(Var)) {
6076 ComputationIter->second.IsLiveOut = false;
6077 }
6078 }
6079 }
6080
6081 for (auto Iter = KnownComputations.begin(), End = KnownComputations.end();
6082 Iter != End;) {
6083 // Disable the folding if its dest may be live beyond this block.
6084 if (Iter->second.IsLiveOut || Iter->second.NumUses > 1) {
6085 Iter = KnownComputations.erase(Iter);
6086 continue;
6087 }
6088
6089 // Mark as "dead" rather than outright deleting. This is so that other
6090 // peephole style optimizations during or before lowering have access to
6091 // this instruction in undeleted form. See for example
6092 // tryOptimizedCmpxchgCmpBr().
6093 Iter->second.Instr->setDead();
6094 ++Iter;
6095 }
6096 }
6097
TargetHeaderMIPS32(GlobalContext * Ctx)6098 TargetHeaderMIPS32::TargetHeaderMIPS32(GlobalContext *Ctx)
6099 : TargetHeaderLowering(Ctx) {}
6100
lower()6101 void TargetHeaderMIPS32::lower() {
6102 if (!BuildDefs::dump())
6103 return;
6104 OstreamLocker L(Ctx);
6105 Ostream &Str = Ctx->getStrEmit();
6106 Str << "\t.set\t"
6107 << "nomicromips\n";
6108 Str << "\t.set\t"
6109 << "nomips16\n";
6110 Str << "\t.set\t"
6111 << "noat\n";
6112 if (getFlags().getUseSandboxing())
6113 Str << "\t.bundle_align_mode 4\n";
6114 }
6115
6116 SmallBitVector TargetMIPS32::TypeToRegisterSet[RCMIPS32_NUM];
6117 SmallBitVector TargetMIPS32::TypeToRegisterSetUnfiltered[RCMIPS32_NUM];
6118 SmallBitVector TargetMIPS32::RegisterAliases[RegMIPS32::Reg_NUM];
6119
Sandboxer(TargetMIPS32 * Target,InstBundleLock::Option BundleOption)6120 TargetMIPS32::Sandboxer::Sandboxer(TargetMIPS32 *Target,
6121 InstBundleLock::Option BundleOption)
6122 : Target(Target), BundleOption(BundleOption) {}
6123
~Sandboxer()6124 TargetMIPS32::Sandboxer::~Sandboxer() {}
6125
createAutoBundle()6126 void TargetMIPS32::Sandboxer::createAutoBundle() {
6127 Bundler = makeUnique<AutoBundle>(Target, BundleOption);
6128 }
6129
addiu_sp(uint32_t StackOffset)6130 void TargetMIPS32::Sandboxer::addiu_sp(uint32_t StackOffset) {
6131 Variable *SP = Target->getPhysicalRegister(RegMIPS32::Reg_SP);
6132 if (!Target->NeedSandboxing) {
6133 Target->_addiu(SP, SP, StackOffset);
6134 return;
6135 }
6136 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6137 Target->Context.insert<InstFakeDef>(T7);
6138 createAutoBundle();
6139 Target->_addiu(SP, SP, StackOffset);
6140 Target->_and(SP, SP, T7);
6141 }
6142
lw(Variable * Dest,OperandMIPS32Mem * Mem)6143 void TargetMIPS32::Sandboxer::lw(Variable *Dest, OperandMIPS32Mem *Mem) {
6144 Variable *Base = Mem->getBase();
6145 if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum()) &&
6146 (RegMIPS32::Reg_T8 != Base->getRegNum())) {
6147 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6148 Target->Context.insert<InstFakeDef>(T7);
6149 createAutoBundle();
6150 Target->_and(Base, Base, T7);
6151 }
6152 Target->_lw(Dest, Mem);
6153 if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) {
6154 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6155 Target->Context.insert<InstFakeDef>(T7);
6156 Target->_and(Dest, Dest, T7);
6157 }
6158 }
6159
ll(Variable * Dest,OperandMIPS32Mem * Mem)6160 void TargetMIPS32::Sandboxer::ll(Variable *Dest, OperandMIPS32Mem *Mem) {
6161 Variable *Base = Mem->getBase();
6162 if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6163 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6164 Target->Context.insert<InstFakeDef>(T7);
6165 createAutoBundle();
6166 Target->_and(Base, Base, T7);
6167 }
6168 Target->_ll(Dest, Mem);
6169 if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) {
6170 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6171 Target->Context.insert<InstFakeDef>(T7);
6172 Target->_and(Dest, Dest, T7);
6173 }
6174 }
6175
sc(Variable * Dest,OperandMIPS32Mem * Mem)6176 void TargetMIPS32::Sandboxer::sc(Variable *Dest, OperandMIPS32Mem *Mem) {
6177 Variable *Base = Mem->getBase();
6178 if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6179 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6180 Target->Context.insert<InstFakeDef>(T7);
6181 createAutoBundle();
6182 Target->_and(Base, Base, T7);
6183 }
6184 Target->_sc(Dest, Mem);
6185 }
6186
sw(Variable * Dest,OperandMIPS32Mem * Mem)6187 void TargetMIPS32::Sandboxer::sw(Variable *Dest, OperandMIPS32Mem *Mem) {
6188 Variable *Base = Mem->getBase();
6189 if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6190 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6191 Target->Context.insert<InstFakeDef>(T7);
6192 createAutoBundle();
6193 Target->_and(Base, Base, T7);
6194 }
6195 Target->_sw(Dest, Mem);
6196 }
6197
lwc1(Variable * Dest,OperandMIPS32Mem * Mem,RelocOp Reloc)6198 void TargetMIPS32::Sandboxer::lwc1(Variable *Dest, OperandMIPS32Mem *Mem,
6199 RelocOp Reloc) {
6200 Variable *Base = Mem->getBase();
6201 if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6202 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6203 Target->Context.insert<InstFakeDef>(T7);
6204 createAutoBundle();
6205 Target->_and(Base, Base, T7);
6206 }
6207 Target->_lwc1(Dest, Mem, Reloc);
6208 if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) {
6209 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6210 Target->Context.insert<InstFakeDef>(T7);
6211 Target->_and(Dest, Dest, T7);
6212 }
6213 }
6214
ldc1(Variable * Dest,OperandMIPS32Mem * Mem,RelocOp Reloc)6215 void TargetMIPS32::Sandboxer::ldc1(Variable *Dest, OperandMIPS32Mem *Mem,
6216 RelocOp Reloc) {
6217 Variable *Base = Mem->getBase();
6218 if (Target->NeedSandboxing && (Target->getStackReg() != Base->getRegNum())) {
6219 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6220 Target->Context.insert<InstFakeDef>(T7);
6221 createAutoBundle();
6222 Target->_and(Base, Base, T7);
6223 }
6224 Target->_ldc1(Dest, Mem, Reloc);
6225 if (Target->NeedSandboxing && (Dest->getRegNum() == Target->getStackReg())) {
6226 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6227 Target->Context.insert<InstFakeDef>(T7);
6228 Target->_and(Dest, Dest, T7);
6229 }
6230 }
6231
ret(Variable * RetAddr,Variable * RetValue)6232 void TargetMIPS32::Sandboxer::ret(Variable *RetAddr, Variable *RetValue) {
6233 if (!Target->NeedSandboxing) {
6234 Target->_ret(RetAddr, RetValue);
6235 }
6236 auto *T6 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T6);
6237 Target->Context.insert<InstFakeDef>(T6);
6238 createAutoBundle();
6239 Target->_and(RetAddr, RetAddr, T6);
6240 Target->_ret(RetAddr, RetValue);
6241 }
6242
reset_sp(Variable * Src)6243 void TargetMIPS32::Sandboxer::reset_sp(Variable *Src) {
6244 Variable *SP = Target->getPhysicalRegister(RegMIPS32::Reg_SP);
6245 if (!Target->NeedSandboxing) {
6246 Target->_mov(SP, Src);
6247 return;
6248 }
6249 auto *T7 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T7);
6250 Target->Context.insert<InstFakeDef>(T7);
6251 createAutoBundle();
6252 Target->_mov(SP, Src);
6253 Target->_and(SP, SP, T7);
6254 Target->getContext().insert<InstFakeUse>(SP);
6255 }
6256
jal(Variable * ReturnReg,Operand * CallTarget)6257 InstMIPS32Call *TargetMIPS32::Sandboxer::jal(Variable *ReturnReg,
6258 Operand *CallTarget) {
6259 if (Target->NeedSandboxing) {
6260 createAutoBundle();
6261 if (auto *CallTargetR = llvm::dyn_cast<Variable>(CallTarget)) {
6262 auto *T6 = Target->makeReg(IceType_i32, RegMIPS32::Reg_T6);
6263 Target->Context.insert<InstFakeDef>(T6);
6264 Target->_and(CallTargetR, CallTargetR, T6);
6265 }
6266 }
6267 return Target->Context.insert<InstMIPS32Call>(ReturnReg, CallTarget);
6268 }
6269
6270 } // end of namespace MIPS32
6271 } // end of namespace Ice
6272