1 //===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the PowerPC-specific support for the FastISel class. Some
10 // of the target-specific code is generated by tablegen in the file
11 // PPCGenFastISel.inc, which is #included here.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "MCTargetDesc/PPCPredicates.h"
16 #include "PPC.h"
17 #include "PPCCCState.h"
18 #include "PPCCallingConv.h"
19 #include "PPCISelLowering.h"
20 #include "PPCMachineFunctionInfo.h"
21 #include "PPCSubtarget.h"
22 #include "PPCTargetMachine.h"
23 #include "llvm/ADT/Optional.h"
24 #include "llvm/CodeGen/CallingConvLower.h"
25 #include "llvm/CodeGen/FastISel.h"
26 #include "llvm/CodeGen/FunctionLoweringInfo.h"
27 #include "llvm/CodeGen/MachineConstantPool.h"
28 #include "llvm/CodeGen/MachineFrameInfo.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 #include "llvm/CodeGen/MachineRegisterInfo.h"
31 #include "llvm/CodeGen/TargetLowering.h"
32 #include "llvm/IR/CallingConv.h"
33 #include "llvm/IR/GetElementPtrTypeIterator.h"
34 #include "llvm/IR/GlobalAlias.h"
35 #include "llvm/IR/GlobalVariable.h"
36 #include "llvm/IR/IntrinsicInst.h"
37 #include "llvm/IR/Operator.h"
38 #include "llvm/Support/Debug.h"
39 #include "llvm/Target/TargetMachine.h"
40
41 //===----------------------------------------------------------------------===//
42 //
43 // TBD:
44 // fastLowerArguments: Handle simple cases.
45 // PPCMaterializeGV: Handle TLS.
46 // SelectCall: Handle function pointers.
47 // SelectCall: Handle multi-register return values.
48 // SelectCall: Optimize away nops for local calls.
49 // processCallArgs: Handle bit-converted arguments.
50 // finishCall: Handle multi-register return values.
51 // PPCComputeAddress: Handle parameter references as FrameIndex's.
52 // PPCEmitCmp: Handle immediate as operand 1.
53 // SelectCall: Handle small byval arguments.
54 // SelectIntrinsicCall: Implement.
55 // SelectSelect: Implement.
56 // Consider factoring isTypeLegal into the base class.
57 // Implement switches and jump tables.
58 //
59 //===----------------------------------------------------------------------===//
60 using namespace llvm;
61
62 #define DEBUG_TYPE "ppcfastisel"
63
64 namespace {
65
66 typedef struct Address {
67 enum {
68 RegBase,
69 FrameIndexBase
70 } BaseType;
71
72 union {
73 unsigned Reg;
74 int FI;
75 } Base;
76
77 long Offset;
78
79 // Innocuous defaults for our address.
Address__anon406c86c10111::Address80 Address()
81 : BaseType(RegBase), Offset(0) {
82 Base.Reg = 0;
83 }
84 } Address;
85
86 class PPCFastISel final : public FastISel {
87
88 const TargetMachine &TM;
89 const PPCSubtarget *PPCSubTarget;
90 const PPCSubtarget *Subtarget;
91 PPCFunctionInfo *PPCFuncInfo;
92 const TargetInstrInfo &TII;
93 const TargetLowering &TLI;
94 LLVMContext *Context;
95
96 public:
PPCFastISel(FunctionLoweringInfo & FuncInfo,const TargetLibraryInfo * LibInfo)97 explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
98 const TargetLibraryInfo *LibInfo)
99 : FastISel(FuncInfo, LibInfo), TM(FuncInfo.MF->getTarget()),
100 PPCSubTarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
101 Subtarget(&FuncInfo.MF->getSubtarget<PPCSubtarget>()),
102 PPCFuncInfo(FuncInfo.MF->getInfo<PPCFunctionInfo>()),
103 TII(*Subtarget->getInstrInfo()), TLI(*Subtarget->getTargetLowering()),
104 Context(&FuncInfo.Fn->getContext()) {}
105
106 // Backend specific FastISel code.
107 private:
108 bool fastSelectInstruction(const Instruction *I) override;
109 unsigned fastMaterializeConstant(const Constant *C) override;
110 unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
111 bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
112 const LoadInst *LI) override;
113 bool fastLowerArguments() override;
114 unsigned fastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
115 unsigned fastEmitInst_ri(unsigned MachineInstOpcode,
116 const TargetRegisterClass *RC,
117 unsigned Op0, bool Op0IsKill,
118 uint64_t Imm);
119 unsigned fastEmitInst_r(unsigned MachineInstOpcode,
120 const TargetRegisterClass *RC,
121 unsigned Op0, bool Op0IsKill);
122 unsigned fastEmitInst_rr(unsigned MachineInstOpcode,
123 const TargetRegisterClass *RC,
124 unsigned Op0, bool Op0IsKill,
125 unsigned Op1, bool Op1IsKill);
126
127 bool fastLowerCall(CallLoweringInfo &CLI) override;
128
129 // Instruction selection routines.
130 private:
131 bool SelectLoad(const Instruction *I);
132 bool SelectStore(const Instruction *I);
133 bool SelectBranch(const Instruction *I);
134 bool SelectIndirectBr(const Instruction *I);
135 bool SelectFPExt(const Instruction *I);
136 bool SelectFPTrunc(const Instruction *I);
137 bool SelectIToFP(const Instruction *I, bool IsSigned);
138 bool SelectFPToI(const Instruction *I, bool IsSigned);
139 bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
140 bool SelectRet(const Instruction *I);
141 bool SelectTrunc(const Instruction *I);
142 bool SelectIntExt(const Instruction *I);
143
144 // Utility routines.
145 private:
146 bool isTypeLegal(Type *Ty, MVT &VT);
147 bool isLoadTypeLegal(Type *Ty, MVT &VT);
148 bool isValueAvailable(const Value *V) const;
isVSFRCRegClass(const TargetRegisterClass * RC) const149 bool isVSFRCRegClass(const TargetRegisterClass *RC) const {
150 return RC->getID() == PPC::VSFRCRegClassID;
151 }
isVSSRCRegClass(const TargetRegisterClass * RC) const152 bool isVSSRCRegClass(const TargetRegisterClass *RC) const {
153 return RC->getID() == PPC::VSSRCRegClassID;
154 }
copyRegToRegClass(const TargetRegisterClass * ToRC,unsigned SrcReg,unsigned Flag=0,unsigned SubReg=0)155 unsigned copyRegToRegClass(const TargetRegisterClass *ToRC,
156 unsigned SrcReg, unsigned Flag = 0,
157 unsigned SubReg = 0) {
158 unsigned TmpReg = createResultReg(ToRC);
159 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
160 TII.get(TargetOpcode::COPY), TmpReg).addReg(SrcReg, Flag, SubReg);
161 return TmpReg;
162 }
163 bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value,
164 bool isZExt, unsigned DestReg,
165 const PPC::Predicate Pred);
166 bool PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
167 const TargetRegisterClass *RC, bool IsZExt = true,
168 unsigned FP64LoadOpc = PPC::LFD);
169 bool PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr);
170 bool PPCComputeAddress(const Value *Obj, Address &Addr);
171 void PPCSimplifyAddress(Address &Addr, bool &UseOffset,
172 unsigned &IndexReg);
173 bool PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
174 unsigned DestReg, bool IsZExt);
175 unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
176 unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT);
177 unsigned PPCMaterializeInt(const ConstantInt *CI, MVT VT,
178 bool UseSExt = true);
179 unsigned PPCMaterialize32BitInt(int64_t Imm,
180 const TargetRegisterClass *RC);
181 unsigned PPCMaterialize64BitInt(int64_t Imm,
182 const TargetRegisterClass *RC);
183 unsigned PPCMoveToIntReg(const Instruction *I, MVT VT,
184 unsigned SrcReg, bool IsSigned);
185 unsigned PPCMoveToFPReg(MVT VT, unsigned SrcReg, bool IsSigned);
186
187 // Call handling routines.
188 private:
189 bool processCallArgs(SmallVectorImpl<Value*> &Args,
190 SmallVectorImpl<unsigned> &ArgRegs,
191 SmallVectorImpl<MVT> &ArgVTs,
192 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
193 SmallVectorImpl<unsigned> &RegArgs,
194 CallingConv::ID CC,
195 unsigned &NumBytes,
196 bool IsVarArg);
197 bool finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes);
198
199 private:
200 #include "PPCGenFastISel.inc"
201
202 };
203
204 } // end anonymous namespace
205
getComparePred(CmpInst::Predicate Pred)206 static Optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
207 switch (Pred) {
208 // These are not representable with any single compare.
209 case CmpInst::FCMP_FALSE:
210 case CmpInst::FCMP_TRUE:
211 // Major concern about the following 6 cases is NaN result. The comparison
212 // result consists of 4 bits, indicating lt, eq, gt and un (unordered),
213 // only one of which will be set. The result is generated by fcmpu
214 // instruction. However, bc instruction only inspects one of the first 3
215 // bits, so when un is set, bc instruction may jump to an undesired
216 // place.
217 //
218 // More specifically, if we expect an unordered comparison and un is set, we
219 // expect to always go to true branch; in such case UEQ, UGT and ULT still
220 // give false, which are undesired; but UNE, UGE, ULE happen to give true,
221 // since they are tested by inspecting !eq, !lt, !gt, respectively.
222 //
223 // Similarly, for ordered comparison, when un is set, we always expect the
224 // result to be false. In such case OGT, OLT and OEQ is good, since they are
225 // actually testing GT, LT, and EQ respectively, which are false. OGE, OLE
226 // and ONE are tested through !lt, !gt and !eq, and these are true.
227 case CmpInst::FCMP_UEQ:
228 case CmpInst::FCMP_UGT:
229 case CmpInst::FCMP_ULT:
230 case CmpInst::FCMP_OGE:
231 case CmpInst::FCMP_OLE:
232 case CmpInst::FCMP_ONE:
233 default:
234 return Optional<PPC::Predicate>();
235
236 case CmpInst::FCMP_OEQ:
237 case CmpInst::ICMP_EQ:
238 return PPC::PRED_EQ;
239
240 case CmpInst::FCMP_OGT:
241 case CmpInst::ICMP_UGT:
242 case CmpInst::ICMP_SGT:
243 return PPC::PRED_GT;
244
245 case CmpInst::FCMP_UGE:
246 case CmpInst::ICMP_UGE:
247 case CmpInst::ICMP_SGE:
248 return PPC::PRED_GE;
249
250 case CmpInst::FCMP_OLT:
251 case CmpInst::ICMP_ULT:
252 case CmpInst::ICMP_SLT:
253 return PPC::PRED_LT;
254
255 case CmpInst::FCMP_ULE:
256 case CmpInst::ICMP_ULE:
257 case CmpInst::ICMP_SLE:
258 return PPC::PRED_LE;
259
260 case CmpInst::FCMP_UNE:
261 case CmpInst::ICMP_NE:
262 return PPC::PRED_NE;
263
264 case CmpInst::FCMP_ORD:
265 return PPC::PRED_NU;
266
267 case CmpInst::FCMP_UNO:
268 return PPC::PRED_UN;
269 }
270 }
271
272 // Determine whether the type Ty is simple enough to be handled by
273 // fast-isel, and return its equivalent machine type in VT.
274 // FIXME: Copied directly from ARM -- factor into base class?
isTypeLegal(Type * Ty,MVT & VT)275 bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) {
276 EVT Evt = TLI.getValueType(DL, Ty, true);
277
278 // Only handle simple types.
279 if (Evt == MVT::Other || !Evt.isSimple()) return false;
280 VT = Evt.getSimpleVT();
281
282 // Handle all legal types, i.e. a register that will directly hold this
283 // value.
284 return TLI.isTypeLegal(VT);
285 }
286
287 // Determine whether the type Ty is simple enough to be handled by
288 // fast-isel as a load target, and return its equivalent machine type in VT.
isLoadTypeLegal(Type * Ty,MVT & VT)289 bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
290 if (isTypeLegal(Ty, VT)) return true;
291
292 // If this is a type than can be sign or zero-extended to a basic operation
293 // go ahead and accept it now.
294 if (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) {
295 return true;
296 }
297
298 return false;
299 }
300
isValueAvailable(const Value * V) const301 bool PPCFastISel::isValueAvailable(const Value *V) const {
302 if (!isa<Instruction>(V))
303 return true;
304
305 const auto *I = cast<Instruction>(V);
306 return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
307 }
308
309 // Given a value Obj, create an Address object Addr that represents its
310 // address. Return false if we can't handle it.
PPCComputeAddress(const Value * Obj,Address & Addr)311 bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
312 const User *U = nullptr;
313 unsigned Opcode = Instruction::UserOp1;
314 if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
315 // Don't walk into other basic blocks unless the object is an alloca from
316 // another block, otherwise it may not have a virtual register assigned.
317 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
318 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
319 Opcode = I->getOpcode();
320 U = I;
321 }
322 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
323 Opcode = C->getOpcode();
324 U = C;
325 }
326
327 switch (Opcode) {
328 default:
329 break;
330 case Instruction::BitCast:
331 // Look through bitcasts.
332 return PPCComputeAddress(U->getOperand(0), Addr);
333 case Instruction::IntToPtr:
334 // Look past no-op inttoptrs.
335 if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
336 TLI.getPointerTy(DL))
337 return PPCComputeAddress(U->getOperand(0), Addr);
338 break;
339 case Instruction::PtrToInt:
340 // Look past no-op ptrtoints.
341 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
342 return PPCComputeAddress(U->getOperand(0), Addr);
343 break;
344 case Instruction::GetElementPtr: {
345 Address SavedAddr = Addr;
346 long TmpOffset = Addr.Offset;
347
348 // Iterate through the GEP folding the constants into offsets where
349 // we can.
350 gep_type_iterator GTI = gep_type_begin(U);
351 for (User::const_op_iterator II = U->op_begin() + 1, IE = U->op_end();
352 II != IE; ++II, ++GTI) {
353 const Value *Op = *II;
354 if (StructType *STy = GTI.getStructTypeOrNull()) {
355 const StructLayout *SL = DL.getStructLayout(STy);
356 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
357 TmpOffset += SL->getElementOffset(Idx);
358 } else {
359 uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
360 for (;;) {
361 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
362 // Constant-offset addressing.
363 TmpOffset += CI->getSExtValue() * S;
364 break;
365 }
366 if (canFoldAddIntoGEP(U, Op)) {
367 // A compatible add with a constant operand. Fold the constant.
368 ConstantInt *CI =
369 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
370 TmpOffset += CI->getSExtValue() * S;
371 // Iterate on the other operand.
372 Op = cast<AddOperator>(Op)->getOperand(0);
373 continue;
374 }
375 // Unsupported
376 goto unsupported_gep;
377 }
378 }
379 }
380
381 // Try to grab the base operand now.
382 Addr.Offset = TmpOffset;
383 if (PPCComputeAddress(U->getOperand(0), Addr)) return true;
384
385 // We failed, restore everything and try the other options.
386 Addr = SavedAddr;
387
388 unsupported_gep:
389 break;
390 }
391 case Instruction::Alloca: {
392 const AllocaInst *AI = cast<AllocaInst>(Obj);
393 DenseMap<const AllocaInst*, int>::iterator SI =
394 FuncInfo.StaticAllocaMap.find(AI);
395 if (SI != FuncInfo.StaticAllocaMap.end()) {
396 Addr.BaseType = Address::FrameIndexBase;
397 Addr.Base.FI = SI->second;
398 return true;
399 }
400 break;
401 }
402 }
403
404 // FIXME: References to parameters fall through to the behavior
405 // below. They should be able to reference a frame index since
406 // they are stored to the stack, so we can get "ld rx, offset(r1)"
407 // instead of "addi ry, r1, offset / ld rx, 0(ry)". Obj will
408 // just contain the parameter. Try to handle this with a FI.
409
410 // Try to get this in a register if nothing else has worked.
411 if (Addr.Base.Reg == 0)
412 Addr.Base.Reg = getRegForValue(Obj);
413
414 // Prevent assignment of base register to X0, which is inappropriate
415 // for loads and stores alike.
416 if (Addr.Base.Reg != 0)
417 MRI.setRegClass(Addr.Base.Reg, &PPC::G8RC_and_G8RC_NOX0RegClass);
418
419 return Addr.Base.Reg != 0;
420 }
421
422 // Fix up some addresses that can't be used directly. For example, if
423 // an offset won't fit in an instruction field, we may need to move it
424 // into an index register.
PPCSimplifyAddress(Address & Addr,bool & UseOffset,unsigned & IndexReg)425 void PPCFastISel::PPCSimplifyAddress(Address &Addr, bool &UseOffset,
426 unsigned &IndexReg) {
427
428 // Check whether the offset fits in the instruction field.
429 if (!isInt<16>(Addr.Offset))
430 UseOffset = false;
431
432 // If this is a stack pointer and the offset needs to be simplified then
433 // put the alloca address into a register, set the base type back to
434 // register and continue. This should almost never happen.
435 if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) {
436 unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
437 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8),
438 ResultReg).addFrameIndex(Addr.Base.FI).addImm(0);
439 Addr.Base.Reg = ResultReg;
440 Addr.BaseType = Address::RegBase;
441 }
442
443 if (!UseOffset) {
444 IntegerType *OffsetTy = Type::getInt64Ty(*Context);
445 const ConstantInt *Offset =
446 ConstantInt::getSigned(OffsetTy, (int64_t)(Addr.Offset));
447 IndexReg = PPCMaterializeInt(Offset, MVT::i64);
448 assert(IndexReg && "Unexpected error in PPCMaterializeInt!");
449 }
450 }
451
452 // Emit a load instruction if possible, returning true if we succeeded,
453 // otherwise false. See commentary below for how the register class of
454 // the load is determined.
PPCEmitLoad(MVT VT,Register & ResultReg,Address & Addr,const TargetRegisterClass * RC,bool IsZExt,unsigned FP64LoadOpc)455 bool PPCFastISel::PPCEmitLoad(MVT VT, Register &ResultReg, Address &Addr,
456 const TargetRegisterClass *RC,
457 bool IsZExt, unsigned FP64LoadOpc) {
458 unsigned Opc;
459 bool UseOffset = true;
460 bool HasSPE = Subtarget->hasSPE();
461
462 // If ResultReg is given, it determines the register class of the load.
463 // Otherwise, RC is the register class to use. If the result of the
464 // load isn't anticipated in this block, both may be zero, in which
465 // case we must make a conservative guess. In particular, don't assign
466 // R0 or X0 to the result register, as the result may be used in a load,
467 // store, add-immediate, or isel that won't permit this. (Though
468 // perhaps the spill and reload of live-exit values would handle this?)
469 const TargetRegisterClass *UseRC =
470 (ResultReg ? MRI.getRegClass(ResultReg) :
471 (RC ? RC :
472 (VT == MVT::f64 ? (HasSPE ? &PPC::SPERCRegClass : &PPC::F8RCRegClass) :
473 (VT == MVT::f32 ? (HasSPE ? &PPC::GPRCRegClass : &PPC::F4RCRegClass) :
474 (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
475 &PPC::GPRC_and_GPRC_NOR0RegClass)))));
476
477 bool Is32BitInt = UseRC->hasSuperClassEq(&PPC::GPRCRegClass);
478
479 switch (VT.SimpleTy) {
480 default: // e.g., vector types not handled
481 return false;
482 case MVT::i8:
483 Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8;
484 break;
485 case MVT::i16:
486 Opc = (IsZExt ? (Is32BitInt ? PPC::LHZ : PPC::LHZ8)
487 : (Is32BitInt ? PPC::LHA : PPC::LHA8));
488 break;
489 case MVT::i32:
490 Opc = (IsZExt ? (Is32BitInt ? PPC::LWZ : PPC::LWZ8)
491 : (Is32BitInt ? PPC::LWA_32 : PPC::LWA));
492 if ((Opc == PPC::LWA || Opc == PPC::LWA_32) && ((Addr.Offset & 3) != 0))
493 UseOffset = false;
494 break;
495 case MVT::i64:
496 Opc = PPC::LD;
497 assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) &&
498 "64-bit load with 32-bit target??");
499 UseOffset = ((Addr.Offset & 3) == 0);
500 break;
501 case MVT::f32:
502 Opc = Subtarget->hasSPE() ? PPC::SPELWZ : PPC::LFS;
503 break;
504 case MVT::f64:
505 Opc = FP64LoadOpc;
506 break;
507 }
508
509 // If necessary, materialize the offset into a register and use
510 // the indexed form. Also handle stack pointers with special needs.
511 unsigned IndexReg = 0;
512 PPCSimplifyAddress(Addr, UseOffset, IndexReg);
513
514 // If this is a potential VSX load with an offset of 0, a VSX indexed load can
515 // be used.
516 bool IsVSSRC = isVSSRCRegClass(UseRC);
517 bool IsVSFRC = isVSFRCRegClass(UseRC);
518 bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS;
519 bool Is64VSXLoad = IsVSFRC && Opc == PPC::LFD;
520 if ((Is32VSXLoad || Is64VSXLoad) &&
521 (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
522 (Addr.Offset == 0)) {
523 UseOffset = false;
524 }
525
526 if (ResultReg == 0)
527 ResultReg = createResultReg(UseRC);
528
529 // Note: If we still have a frame index here, we know the offset is
530 // in range, as otherwise PPCSimplifyAddress would have converted it
531 // into a RegBase.
532 if (Addr.BaseType == Address::FrameIndexBase) {
533 // VSX only provides an indexed load.
534 if (Is32VSXLoad || Is64VSXLoad) return false;
535
536 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
537 MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
538 Addr.Offset),
539 MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI),
540 MFI.getObjectAlign(Addr.Base.FI));
541
542 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
543 .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
544
545 // Base reg with offset in range.
546 } else if (UseOffset) {
547 // VSX only provides an indexed load.
548 if (Is32VSXLoad || Is64VSXLoad) return false;
549
550 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
551 .addImm(Addr.Offset).addReg(Addr.Base.Reg);
552
553 // Indexed form.
554 } else {
555 // Get the RR opcode corresponding to the RI one. FIXME: It would be
556 // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
557 // is hard to get at.
558 switch (Opc) {
559 default: llvm_unreachable("Unexpected opcode!");
560 case PPC::LBZ: Opc = PPC::LBZX; break;
561 case PPC::LBZ8: Opc = PPC::LBZX8; break;
562 case PPC::LHZ: Opc = PPC::LHZX; break;
563 case PPC::LHZ8: Opc = PPC::LHZX8; break;
564 case PPC::LHA: Opc = PPC::LHAX; break;
565 case PPC::LHA8: Opc = PPC::LHAX8; break;
566 case PPC::LWZ: Opc = PPC::LWZX; break;
567 case PPC::LWZ8: Opc = PPC::LWZX8; break;
568 case PPC::LWA: Opc = PPC::LWAX; break;
569 case PPC::LWA_32: Opc = PPC::LWAX_32; break;
570 case PPC::LD: Opc = PPC::LDX; break;
571 case PPC::LFS: Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break;
572 case PPC::LFD: Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break;
573 case PPC::EVLDD: Opc = PPC::EVLDDX; break;
574 case PPC::SPELWZ: Opc = PPC::SPELWZX; break;
575 }
576
577 auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
578 ResultReg);
579
580 // If we have an index register defined we use it in the store inst,
581 // otherwise we use X0 as base as it makes the vector instructions to
582 // use zero in the computation of the effective address regardless the
583 // content of the register.
584 if (IndexReg)
585 MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
586 else
587 MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
588 }
589
590 return true;
591 }
592
593 // Attempt to fast-select a load instruction.
SelectLoad(const Instruction * I)594 bool PPCFastISel::SelectLoad(const Instruction *I) {
595 // FIXME: No atomic loads are supported.
596 if (cast<LoadInst>(I)->isAtomic())
597 return false;
598
599 // Verify we have a legal type before going any further.
600 MVT VT;
601 if (!isLoadTypeLegal(I->getType(), VT))
602 return false;
603
604 // See if we can handle this address.
605 Address Addr;
606 if (!PPCComputeAddress(I->getOperand(0), Addr))
607 return false;
608
609 // Look at the currently assigned register for this instruction
610 // to determine the required register class. This is necessary
611 // to constrain RA from using R0/X0 when this is not legal.
612 unsigned AssignedReg = FuncInfo.ValueMap[I];
613 const TargetRegisterClass *RC =
614 AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
615
616 Register ResultReg = 0;
617 if (!PPCEmitLoad(VT, ResultReg, Addr, RC, true,
618 Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
619 return false;
620 updateValueMap(I, ResultReg);
621 return true;
622 }
623
624 // Emit a store instruction to store SrcReg at Addr.
PPCEmitStore(MVT VT,unsigned SrcReg,Address & Addr)625 bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
626 assert(SrcReg && "Nothing to store!");
627 unsigned Opc;
628 bool UseOffset = true;
629
630 const TargetRegisterClass *RC = MRI.getRegClass(SrcReg);
631 bool Is32BitInt = RC->hasSuperClassEq(&PPC::GPRCRegClass);
632
633 switch (VT.SimpleTy) {
634 default: // e.g., vector types not handled
635 return false;
636 case MVT::i8:
637 Opc = Is32BitInt ? PPC::STB : PPC::STB8;
638 break;
639 case MVT::i16:
640 Opc = Is32BitInt ? PPC::STH : PPC::STH8;
641 break;
642 case MVT::i32:
643 assert(Is32BitInt && "Not GPRC for i32??");
644 Opc = PPC::STW;
645 break;
646 case MVT::i64:
647 Opc = PPC::STD;
648 UseOffset = ((Addr.Offset & 3) == 0);
649 break;
650 case MVT::f32:
651 Opc = Subtarget->hasSPE() ? PPC::SPESTW : PPC::STFS;
652 break;
653 case MVT::f64:
654 Opc = Subtarget->hasSPE() ? PPC::EVSTDD : PPC::STFD;
655 break;
656 }
657
658 // If necessary, materialize the offset into a register and use
659 // the indexed form. Also handle stack pointers with special needs.
660 unsigned IndexReg = 0;
661 PPCSimplifyAddress(Addr, UseOffset, IndexReg);
662
663 // If this is a potential VSX store with an offset of 0, a VSX indexed store
664 // can be used.
665 bool IsVSSRC = isVSSRCRegClass(RC);
666 bool IsVSFRC = isVSFRCRegClass(RC);
667 bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS;
668 bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD;
669 if ((Is32VSXStore || Is64VSXStore) &&
670 (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
671 (Addr.Offset == 0)) {
672 UseOffset = false;
673 }
674
675 // Note: If we still have a frame index here, we know the offset is
676 // in range, as otherwise PPCSimplifyAddress would have converted it
677 // into a RegBase.
678 if (Addr.BaseType == Address::FrameIndexBase) {
679 // VSX only provides an indexed store.
680 if (Is32VSXStore || Is64VSXStore) return false;
681
682 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
683 MachinePointerInfo::getFixedStack(*FuncInfo.MF, Addr.Base.FI,
684 Addr.Offset),
685 MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI),
686 MFI.getObjectAlign(Addr.Base.FI));
687
688 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
689 .addReg(SrcReg)
690 .addImm(Addr.Offset)
691 .addFrameIndex(Addr.Base.FI)
692 .addMemOperand(MMO);
693
694 // Base reg with offset in range.
695 } else if (UseOffset) {
696 // VSX only provides an indexed store.
697 if (Is32VSXStore || Is64VSXStore)
698 return false;
699
700 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
701 .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg);
702
703 // Indexed form.
704 } else {
705 // Get the RR opcode corresponding to the RI one. FIXME: It would be
706 // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
707 // is hard to get at.
708 switch (Opc) {
709 default: llvm_unreachable("Unexpected opcode!");
710 case PPC::STB: Opc = PPC::STBX; break;
711 case PPC::STH : Opc = PPC::STHX; break;
712 case PPC::STW : Opc = PPC::STWX; break;
713 case PPC::STB8: Opc = PPC::STBX8; break;
714 case PPC::STH8: Opc = PPC::STHX8; break;
715 case PPC::STW8: Opc = PPC::STWX8; break;
716 case PPC::STD: Opc = PPC::STDX; break;
717 case PPC::STFS: Opc = IsVSSRC ? PPC::STXSSPX : PPC::STFSX; break;
718 case PPC::STFD: Opc = IsVSFRC ? PPC::STXSDX : PPC::STFDX; break;
719 case PPC::EVSTDD: Opc = PPC::EVSTDDX; break;
720 case PPC::SPESTW: Opc = PPC::SPESTWX; break;
721 }
722
723 auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
724 .addReg(SrcReg);
725
726 // If we have an index register defined we use it in the store inst,
727 // otherwise we use X0 as base as it makes the vector instructions to
728 // use zero in the computation of the effective address regardless the
729 // content of the register.
730 if (IndexReg)
731 MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
732 else
733 MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
734 }
735
736 return true;
737 }
738
739 // Attempt to fast-select a store instruction.
SelectStore(const Instruction * I)740 bool PPCFastISel::SelectStore(const Instruction *I) {
741 Value *Op0 = I->getOperand(0);
742 unsigned SrcReg = 0;
743
744 // FIXME: No atomics loads are supported.
745 if (cast<StoreInst>(I)->isAtomic())
746 return false;
747
748 // Verify we have a legal type before going any further.
749 MVT VT;
750 if (!isLoadTypeLegal(Op0->getType(), VT))
751 return false;
752
753 // Get the value to be stored into a register.
754 SrcReg = getRegForValue(Op0);
755 if (SrcReg == 0)
756 return false;
757
758 // See if we can handle this address.
759 Address Addr;
760 if (!PPCComputeAddress(I->getOperand(1), Addr))
761 return false;
762
763 if (!PPCEmitStore(VT, SrcReg, Addr))
764 return false;
765
766 return true;
767 }
768
769 // Attempt to fast-select a branch instruction.
SelectBranch(const Instruction * I)770 bool PPCFastISel::SelectBranch(const Instruction *I) {
771 const BranchInst *BI = cast<BranchInst>(I);
772 MachineBasicBlock *BrBB = FuncInfo.MBB;
773 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
774 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
775
776 // For now, just try the simplest case where it's fed by a compare.
777 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
778 if (isValueAvailable(CI)) {
779 Optional<PPC::Predicate> OptPPCPred = getComparePred(CI->getPredicate());
780 if (!OptPPCPred)
781 return false;
782
783 PPC::Predicate PPCPred = OptPPCPred.getValue();
784
785 // Take advantage of fall-through opportunities.
786 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
787 std::swap(TBB, FBB);
788 PPCPred = PPC::InvertPredicate(PPCPred);
789 }
790
791 unsigned CondReg = createResultReg(&PPC::CRRCRegClass);
792
793 if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
794 CondReg, PPCPred))
795 return false;
796
797 BuildMI(*BrBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCC))
798 .addImm(Subtarget->hasSPE() ? PPC::PRED_SPE : PPCPred)
799 .addReg(CondReg)
800 .addMBB(TBB);
801 finishCondBranch(BI->getParent(), TBB, FBB);
802 return true;
803 }
804 } else if (const ConstantInt *CI =
805 dyn_cast<ConstantInt>(BI->getCondition())) {
806 uint64_t Imm = CI->getZExtValue();
807 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
808 fastEmitBranch(Target, DbgLoc);
809 return true;
810 }
811
812 // FIXME: ARM looks for a case where the block containing the compare
813 // has been split from the block containing the branch. If this happens,
814 // there is a vreg available containing the result of the compare. I'm
815 // not sure we can do much, as we've lost the predicate information with
816 // the compare instruction -- we have a 4-bit CR but don't know which bit
817 // to test here.
818 return false;
819 }
820
821 // Attempt to emit a compare of the two source values. Signed and unsigned
822 // comparisons are supported. Return false if we can't handle it.
PPCEmitCmp(const Value * SrcValue1,const Value * SrcValue2,bool IsZExt,unsigned DestReg,const PPC::Predicate Pred)823 bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
824 bool IsZExt, unsigned DestReg,
825 const PPC::Predicate Pred) {
826 Type *Ty = SrcValue1->getType();
827 EVT SrcEVT = TLI.getValueType(DL, Ty, true);
828 if (!SrcEVT.isSimple())
829 return false;
830 MVT SrcVT = SrcEVT.getSimpleVT();
831
832 if (SrcVT == MVT::i1 && Subtarget->useCRBits())
833 return false;
834
835 // See if operand 2 is an immediate encodeable in the compare.
836 // FIXME: Operands are not in canonical order at -O0, so an immediate
837 // operand in position 1 is a lost opportunity for now. We are
838 // similar to ARM in this regard.
839 long Imm = 0;
840 bool UseImm = false;
841 const bool HasSPE = Subtarget->hasSPE();
842
843 // Only 16-bit integer constants can be represented in compares for
844 // PowerPC. Others will be materialized into a register.
845 if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(SrcValue2)) {
846 if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
847 SrcVT == MVT::i8 || SrcVT == MVT::i1) {
848 const APInt &CIVal = ConstInt->getValue();
849 Imm = (IsZExt) ? (long)CIVal.getZExtValue() : (long)CIVal.getSExtValue();
850 if ((IsZExt && isUInt<16>(Imm)) || (!IsZExt && isInt<16>(Imm)))
851 UseImm = true;
852 }
853 }
854
855 unsigned SrcReg1 = getRegForValue(SrcValue1);
856 if (SrcReg1 == 0)
857 return false;
858
859 unsigned SrcReg2 = 0;
860 if (!UseImm) {
861 SrcReg2 = getRegForValue(SrcValue2);
862 if (SrcReg2 == 0)
863 return false;
864 }
865
866 unsigned CmpOpc;
867 bool NeedsExt = false;
868
869 auto RC1 = MRI.getRegClass(SrcReg1);
870 auto RC2 = SrcReg2 != 0 ? MRI.getRegClass(SrcReg2) : nullptr;
871
872 switch (SrcVT.SimpleTy) {
873 default: return false;
874 case MVT::f32:
875 if (HasSPE) {
876 switch (Pred) {
877 default: return false;
878 case PPC::PRED_EQ:
879 CmpOpc = PPC::EFSCMPEQ;
880 break;
881 case PPC::PRED_LT:
882 CmpOpc = PPC::EFSCMPLT;
883 break;
884 case PPC::PRED_GT:
885 CmpOpc = PPC::EFSCMPGT;
886 break;
887 }
888 } else {
889 CmpOpc = PPC::FCMPUS;
890 if (isVSSRCRegClass(RC1))
891 SrcReg1 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg1);
892 if (RC2 && isVSSRCRegClass(RC2))
893 SrcReg2 = copyRegToRegClass(&PPC::F4RCRegClass, SrcReg2);
894 }
895 break;
896 case MVT::f64:
897 if (HasSPE) {
898 switch (Pred) {
899 default: return false;
900 case PPC::PRED_EQ:
901 CmpOpc = PPC::EFDCMPEQ;
902 break;
903 case PPC::PRED_LT:
904 CmpOpc = PPC::EFDCMPLT;
905 break;
906 case PPC::PRED_GT:
907 CmpOpc = PPC::EFDCMPGT;
908 break;
909 }
910 } else if (isVSFRCRegClass(RC1) || (RC2 && isVSFRCRegClass(RC2))) {
911 CmpOpc = PPC::XSCMPUDP;
912 } else {
913 CmpOpc = PPC::FCMPUD;
914 }
915 break;
916 case MVT::i1:
917 case MVT::i8:
918 case MVT::i16:
919 NeedsExt = true;
920 LLVM_FALLTHROUGH;
921 case MVT::i32:
922 if (!UseImm)
923 CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
924 else
925 CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI;
926 break;
927 case MVT::i64:
928 if (!UseImm)
929 CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD;
930 else
931 CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI;
932 break;
933 }
934
935 if (NeedsExt) {
936 unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
937 if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt))
938 return false;
939 SrcReg1 = ExtReg;
940
941 if (!UseImm) {
942 unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
943 if (!PPCEmitIntExt(SrcVT, SrcReg2, MVT::i32, ExtReg, IsZExt))
944 return false;
945 SrcReg2 = ExtReg;
946 }
947 }
948
949 if (!UseImm)
950 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg)
951 .addReg(SrcReg1).addReg(SrcReg2);
952 else
953 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc), DestReg)
954 .addReg(SrcReg1).addImm(Imm);
955
956 return true;
957 }
958
959 // Attempt to fast-select a floating-point extend instruction.
SelectFPExt(const Instruction * I)960 bool PPCFastISel::SelectFPExt(const Instruction *I) {
961 Value *Src = I->getOperand(0);
962 EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
963 EVT DestVT = TLI.getValueType(DL, I->getType(), true);
964
965 if (SrcVT != MVT::f32 || DestVT != MVT::f64)
966 return false;
967
968 unsigned SrcReg = getRegForValue(Src);
969 if (!SrcReg)
970 return false;
971
972 // No code is generated for a FP extend.
973 updateValueMap(I, SrcReg);
974 return true;
975 }
976
977 // Attempt to fast-select a floating-point truncate instruction.
SelectFPTrunc(const Instruction * I)978 bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
979 Value *Src = I->getOperand(0);
980 EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
981 EVT DestVT = TLI.getValueType(DL, I->getType(), true);
982
983 if (SrcVT != MVT::f64 || DestVT != MVT::f32)
984 return false;
985
986 unsigned SrcReg = getRegForValue(Src);
987 if (!SrcReg)
988 return false;
989
990 // Round the result to single precision.
991 unsigned DestReg;
992 auto RC = MRI.getRegClass(SrcReg);
993 if (Subtarget->hasSPE()) {
994 DestReg = createResultReg(&PPC::GPRCRegClass);
995 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
996 TII.get(PPC::EFSCFD), DestReg)
997 .addReg(SrcReg);
998 } else if (isVSFRCRegClass(RC)) {
999 DestReg = createResultReg(&PPC::VSSRCRegClass);
1000 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1001 TII.get(PPC::XSRSP), DestReg)
1002 .addReg(SrcReg);
1003 } else {
1004 DestReg = createResultReg(&PPC::F4RCRegClass);
1005 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1006 TII.get(PPC::FRSP), DestReg)
1007 .addReg(SrcReg);
1008 }
1009
1010 updateValueMap(I, DestReg);
1011 return true;
1012 }
1013
1014 // Move an i32 or i64 value in a GPR to an f64 value in an FPR.
1015 // FIXME: When direct register moves are implemented (see PowerISA 2.07),
1016 // those should be used instead of moving via a stack slot when the
1017 // subtarget permits.
1018 // FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte
1019 // stack slot and 4-byte store/load sequence. Or just sext the 4-byte
1020 // case to 8 bytes which produces tighter code but wastes stack space.
PPCMoveToFPReg(MVT SrcVT,unsigned SrcReg,bool IsSigned)1021 unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
1022 bool IsSigned) {
1023
1024 // If necessary, extend 32-bit int to 64-bit.
1025 if (SrcVT == MVT::i32) {
1026 unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
1027 if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned))
1028 return 0;
1029 SrcReg = TmpReg;
1030 }
1031
1032 // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
1033 Address Addr;
1034 Addr.BaseType = Address::FrameIndexBase;
1035 Addr.Base.FI = MFI.CreateStackObject(8, Align(8), false);
1036
1037 // Store the value from the GPR.
1038 if (!PPCEmitStore(MVT::i64, SrcReg, Addr))
1039 return 0;
1040
1041 // Load the integer value into an FPR. The kind of load used depends
1042 // on a number of conditions.
1043 unsigned LoadOpc = PPC::LFD;
1044
1045 if (SrcVT == MVT::i32) {
1046 if (!IsSigned) {
1047 LoadOpc = PPC::LFIWZX;
1048 Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
1049 } else if (Subtarget->hasLFIWAX()) {
1050 LoadOpc = PPC::LFIWAX;
1051 Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
1052 }
1053 }
1054
1055 const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1056 Register ResultReg = 0;
1057 if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc))
1058 return 0;
1059
1060 return ResultReg;
1061 }
1062
1063 // Attempt to fast-select an integer-to-floating-point conversion.
1064 // FIXME: Once fast-isel has better support for VSX, conversions using
1065 // direct moves should be implemented.
SelectIToFP(const Instruction * I,bool IsSigned)1066 bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
1067 MVT DstVT;
1068 Type *DstTy = I->getType();
1069 if (!isTypeLegal(DstTy, DstVT))
1070 return false;
1071
1072 if (DstVT != MVT::f32 && DstVT != MVT::f64)
1073 return false;
1074
1075 Value *Src = I->getOperand(0);
1076 EVT SrcEVT = TLI.getValueType(DL, Src->getType(), true);
1077 if (!SrcEVT.isSimple())
1078 return false;
1079
1080 MVT SrcVT = SrcEVT.getSimpleVT();
1081
1082 if (SrcVT != MVT::i8 && SrcVT != MVT::i16 &&
1083 SrcVT != MVT::i32 && SrcVT != MVT::i64)
1084 return false;
1085
1086 unsigned SrcReg = getRegForValue(Src);
1087 if (SrcReg == 0)
1088 return false;
1089
1090 // Shortcut for SPE. Doesn't need to store/load, since it's all in the GPRs
1091 if (Subtarget->hasSPE()) {
1092 unsigned Opc;
1093 if (DstVT == MVT::f32)
1094 Opc = IsSigned ? PPC::EFSCFSI : PPC::EFSCFUI;
1095 else
1096 Opc = IsSigned ? PPC::EFDCFSI : PPC::EFDCFUI;
1097
1098 unsigned DestReg = createResultReg(&PPC::SPERCRegClass);
1099 // Generate the convert.
1100 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1101 .addReg(SrcReg);
1102 updateValueMap(I, DestReg);
1103 return true;
1104 }
1105
1106 // We can only lower an unsigned convert if we have the newer
1107 // floating-point conversion operations.
1108 if (!IsSigned && !Subtarget->hasFPCVT())
1109 return false;
1110
1111 // FIXME: For now we require the newer floating-point conversion operations
1112 // (which are present only on P7 and A2 server models) when converting
1113 // to single-precision float. Otherwise we have to generate a lot of
1114 // fiddly code to avoid double rounding. If necessary, the fiddly code
1115 // can be found in PPCTargetLowering::LowerINT_TO_FP().
1116 if (DstVT == MVT::f32 && !Subtarget->hasFPCVT())
1117 return false;
1118
1119 // Extend the input if necessary.
1120 if (SrcVT == MVT::i8 || SrcVT == MVT::i16) {
1121 unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
1122 if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned))
1123 return false;
1124 SrcVT = MVT::i64;
1125 SrcReg = TmpReg;
1126 }
1127
1128 // Move the integer value to an FPR.
1129 unsigned FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned);
1130 if (FPReg == 0)
1131 return false;
1132
1133 // Determine the opcode for the conversion.
1134 const TargetRegisterClass *RC = &PPC::F8RCRegClass;
1135 unsigned DestReg = createResultReg(RC);
1136 unsigned Opc;
1137
1138 if (DstVT == MVT::f32)
1139 Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS;
1140 else
1141 Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
1142
1143 // Generate the convert.
1144 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1145 .addReg(FPReg);
1146
1147 updateValueMap(I, DestReg);
1148 return true;
1149 }
1150
1151 // Move the floating-point value in SrcReg into an integer destination
1152 // register, and return the register (or zero if we can't handle it).
1153 // FIXME: When direct register moves are implemented (see PowerISA 2.07),
1154 // those should be used instead of moving via a stack slot when the
1155 // subtarget permits.
PPCMoveToIntReg(const Instruction * I,MVT VT,unsigned SrcReg,bool IsSigned)1156 unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
1157 unsigned SrcReg, bool IsSigned) {
1158 // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
1159 // Note that if have STFIWX available, we could use a 4-byte stack
1160 // slot for i32, but this being fast-isel we'll just go with the
1161 // easiest code gen possible.
1162 Address Addr;
1163 Addr.BaseType = Address::FrameIndexBase;
1164 Addr.Base.FI = MFI.CreateStackObject(8, Align(8), false);
1165
1166 // Store the value from the FPR.
1167 if (!PPCEmitStore(MVT::f64, SrcReg, Addr))
1168 return 0;
1169
1170 // Reload it into a GPR. If we want an i32 on big endian, modify the
1171 // address to have a 4-byte offset so we load from the right place.
1172 if (VT == MVT::i32)
1173 Addr.Offset = (Subtarget->isLittleEndian()) ? 0 : 4;
1174
1175 // Look at the currently assigned register for this instruction
1176 // to determine the required register class.
1177 unsigned AssignedReg = FuncInfo.ValueMap[I];
1178 const TargetRegisterClass *RC =
1179 AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
1180
1181 Register ResultReg = 0;
1182 if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned))
1183 return 0;
1184
1185 return ResultReg;
1186 }
1187
1188 // Attempt to fast-select a floating-point-to-integer conversion.
1189 // FIXME: Once fast-isel has better support for VSX, conversions using
1190 // direct moves should be implemented.
SelectFPToI(const Instruction * I,bool IsSigned)1191 bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
1192 MVT DstVT, SrcVT;
1193 Type *DstTy = I->getType();
1194 if (!isTypeLegal(DstTy, DstVT))
1195 return false;
1196
1197 if (DstVT != MVT::i32 && DstVT != MVT::i64)
1198 return false;
1199
1200 // If we don't have FCTIDUZ, or SPE, and we need it, punt to SelectionDAG.
1201 if (DstVT == MVT::i64 && !IsSigned && !Subtarget->hasFPCVT() &&
1202 !Subtarget->hasSPE())
1203 return false;
1204
1205 Value *Src = I->getOperand(0);
1206 Type *SrcTy = Src->getType();
1207 if (!isTypeLegal(SrcTy, SrcVT))
1208 return false;
1209
1210 if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
1211 return false;
1212
1213 unsigned SrcReg = getRegForValue(Src);
1214 if (SrcReg == 0)
1215 return false;
1216
1217 // Convert f32 to f64 or convert VSSRC to VSFRC if necessary. This is just a
1218 // meaningless copy to get the register class right.
1219 const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg);
1220 if (InRC == &PPC::F4RCRegClass)
1221 SrcReg = copyRegToRegClass(&PPC::F8RCRegClass, SrcReg);
1222 else if (InRC == &PPC::VSSRCRegClass)
1223 SrcReg = copyRegToRegClass(&PPC::VSFRCRegClass, SrcReg);
1224
1225 // Determine the opcode for the conversion, which takes place
1226 // entirely within FPRs or VSRs.
1227 unsigned DestReg;
1228 unsigned Opc;
1229 auto RC = MRI.getRegClass(SrcReg);
1230
1231 if (Subtarget->hasSPE()) {
1232 DestReg = createResultReg(&PPC::GPRCRegClass);
1233 if (IsSigned)
1234 Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ;
1235 else
1236 Opc = InRC == &PPC::GPRCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ;
1237 } else if (isVSFRCRegClass(RC)) {
1238 DestReg = createResultReg(&PPC::VSFRCRegClass);
1239 if (DstVT == MVT::i32)
1240 Opc = IsSigned ? PPC::XSCVDPSXWS : PPC::XSCVDPUXWS;
1241 else
1242 Opc = IsSigned ? PPC::XSCVDPSXDS : PPC::XSCVDPUXDS;
1243 } else {
1244 DestReg = createResultReg(&PPC::F8RCRegClass);
1245 if (DstVT == MVT::i32)
1246 if (IsSigned)
1247 Opc = PPC::FCTIWZ;
1248 else
1249 Opc = Subtarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
1250 else
1251 Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
1252 }
1253
1254 // Generate the convert.
1255 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1256 .addReg(SrcReg);
1257
1258 // Now move the integer value from a float register to an integer register.
1259 unsigned IntReg = Subtarget->hasSPE()
1260 ? DestReg
1261 : PPCMoveToIntReg(I, DstVT, DestReg, IsSigned);
1262
1263 if (IntReg == 0)
1264 return false;
1265
1266 updateValueMap(I, IntReg);
1267 return true;
1268 }
1269
1270 // Attempt to fast-select a binary integer operation that isn't already
1271 // handled automatically.
SelectBinaryIntOp(const Instruction * I,unsigned ISDOpcode)1272 bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
1273 EVT DestVT = TLI.getValueType(DL, I->getType(), true);
1274
1275 // We can get here in the case when we have a binary operation on a non-legal
1276 // type and the target independent selector doesn't know how to handle it.
1277 if (DestVT != MVT::i16 && DestVT != MVT::i8)
1278 return false;
1279
1280 // Look at the currently assigned register for this instruction
1281 // to determine the required register class. If there is no register,
1282 // make a conservative choice (don't assign R0).
1283 unsigned AssignedReg = FuncInfo.ValueMap[I];
1284 const TargetRegisterClass *RC =
1285 (AssignedReg ? MRI.getRegClass(AssignedReg) :
1286 &PPC::GPRC_and_GPRC_NOR0RegClass);
1287 bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
1288
1289 unsigned Opc;
1290 switch (ISDOpcode) {
1291 default: return false;
1292 case ISD::ADD:
1293 Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8;
1294 break;
1295 case ISD::OR:
1296 Opc = IsGPRC ? PPC::OR : PPC::OR8;
1297 break;
1298 case ISD::SUB:
1299 Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8;
1300 break;
1301 }
1302
1303 unsigned ResultReg = createResultReg(RC ? RC : &PPC::G8RCRegClass);
1304 unsigned SrcReg1 = getRegForValue(I->getOperand(0));
1305 if (SrcReg1 == 0) return false;
1306
1307 // Handle case of small immediate operand.
1308 if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(1))) {
1309 const APInt &CIVal = ConstInt->getValue();
1310 int Imm = (int)CIVal.getSExtValue();
1311 bool UseImm = true;
1312 if (isInt<16>(Imm)) {
1313 switch (Opc) {
1314 default:
1315 llvm_unreachable("Missing case!");
1316 case PPC::ADD4:
1317 Opc = PPC::ADDI;
1318 MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
1319 break;
1320 case PPC::ADD8:
1321 Opc = PPC::ADDI8;
1322 MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
1323 break;
1324 case PPC::OR:
1325 Opc = PPC::ORI;
1326 break;
1327 case PPC::OR8:
1328 Opc = PPC::ORI8;
1329 break;
1330 case PPC::SUBF:
1331 if (Imm == -32768)
1332 UseImm = false;
1333 else {
1334 Opc = PPC::ADDI;
1335 MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
1336 Imm = -Imm;
1337 }
1338 break;
1339 case PPC::SUBF8:
1340 if (Imm == -32768)
1341 UseImm = false;
1342 else {
1343 Opc = PPC::ADDI8;
1344 MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
1345 Imm = -Imm;
1346 }
1347 break;
1348 }
1349
1350 if (UseImm) {
1351 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
1352 ResultReg)
1353 .addReg(SrcReg1)
1354 .addImm(Imm);
1355 updateValueMap(I, ResultReg);
1356 return true;
1357 }
1358 }
1359 }
1360
1361 // Reg-reg case.
1362 unsigned SrcReg2 = getRegForValue(I->getOperand(1));
1363 if (SrcReg2 == 0) return false;
1364
1365 // Reverse operands for subtract-from.
1366 if (ISDOpcode == ISD::SUB)
1367 std::swap(SrcReg1, SrcReg2);
1368
1369 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
1370 .addReg(SrcReg1).addReg(SrcReg2);
1371 updateValueMap(I, ResultReg);
1372 return true;
1373 }
1374
1375 // Handle arguments to a call that we're attempting to fast-select.
1376 // Return false if the arguments are too complex for us at the moment.
processCallArgs(SmallVectorImpl<Value * > & Args,SmallVectorImpl<unsigned> & ArgRegs,SmallVectorImpl<MVT> & ArgVTs,SmallVectorImpl<ISD::ArgFlagsTy> & ArgFlags,SmallVectorImpl<unsigned> & RegArgs,CallingConv::ID CC,unsigned & NumBytes,bool IsVarArg)1377 bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
1378 SmallVectorImpl<unsigned> &ArgRegs,
1379 SmallVectorImpl<MVT> &ArgVTs,
1380 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
1381 SmallVectorImpl<unsigned> &RegArgs,
1382 CallingConv::ID CC,
1383 unsigned &NumBytes,
1384 bool IsVarArg) {
1385 SmallVector<CCValAssign, 16> ArgLocs;
1386 CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, *Context);
1387
1388 // Reserve space for the linkage area on the stack.
1389 unsigned LinkageSize = Subtarget->getFrameLowering()->getLinkageSize();
1390 CCInfo.AllocateStack(LinkageSize, Align(8));
1391
1392 CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS);
1393
1394 // Bail out if we can't handle any of the arguments.
1395 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1396 CCValAssign &VA = ArgLocs[I];
1397 MVT ArgVT = ArgVTs[VA.getValNo()];
1398
1399 // Skip vector arguments for now, as well as long double and
1400 // uint128_t, and anything that isn't passed in a register.
1401 if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 || ArgVT == MVT::i1 ||
1402 !VA.isRegLoc() || VA.needsCustom())
1403 return false;
1404
1405 // Skip bit-converted arguments for now.
1406 if (VA.getLocInfo() == CCValAssign::BCvt)
1407 return false;
1408 }
1409
1410 // Get a count of how many bytes are to be pushed onto the stack.
1411 NumBytes = CCInfo.getNextStackOffset();
1412
1413 // The prolog code of the callee may store up to 8 GPR argument registers to
1414 // the stack, allowing va_start to index over them in memory if its varargs.
1415 // Because we cannot tell if this is needed on the caller side, we have to
1416 // conservatively assume that it is needed. As such, make sure we have at
1417 // least enough stack space for the caller to store the 8 GPRs.
1418 // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area.
1419 NumBytes = std::max(NumBytes, LinkageSize + 64);
1420
1421 // Issue CALLSEQ_START.
1422 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1423 TII.get(TII.getCallFrameSetupOpcode()))
1424 .addImm(NumBytes).addImm(0);
1425
1426 // Prepare to assign register arguments. Every argument uses up a
1427 // GPR protocol register even if it's passed in a floating-point
1428 // register (unless we're using the fast calling convention).
1429 unsigned NextGPR = PPC::X3;
1430 unsigned NextFPR = PPC::F1;
1431
1432 // Process arguments.
1433 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1434 CCValAssign &VA = ArgLocs[I];
1435 unsigned Arg = ArgRegs[VA.getValNo()];
1436 MVT ArgVT = ArgVTs[VA.getValNo()];
1437
1438 // Handle argument promotion and bitcasts.
1439 switch (VA.getLocInfo()) {
1440 default:
1441 llvm_unreachable("Unknown loc info!");
1442 case CCValAssign::Full:
1443 break;
1444 case CCValAssign::SExt: {
1445 MVT DestVT = VA.getLocVT();
1446 const TargetRegisterClass *RC =
1447 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1448 unsigned TmpReg = createResultReg(RC);
1449 if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/false))
1450 llvm_unreachable("Failed to emit a sext!");
1451 ArgVT = DestVT;
1452 Arg = TmpReg;
1453 break;
1454 }
1455 case CCValAssign::AExt:
1456 case CCValAssign::ZExt: {
1457 MVT DestVT = VA.getLocVT();
1458 const TargetRegisterClass *RC =
1459 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1460 unsigned TmpReg = createResultReg(RC);
1461 if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/true))
1462 llvm_unreachable("Failed to emit a zext!");
1463 ArgVT = DestVT;
1464 Arg = TmpReg;
1465 break;
1466 }
1467 case CCValAssign::BCvt: {
1468 // FIXME: Not yet handled.
1469 llvm_unreachable("Should have bailed before getting here!");
1470 break;
1471 }
1472 }
1473
1474 // Copy this argument to the appropriate register.
1475 unsigned ArgReg;
1476 if (ArgVT == MVT::f32 || ArgVT == MVT::f64) {
1477 ArgReg = NextFPR++;
1478 if (CC != CallingConv::Fast)
1479 ++NextGPR;
1480 } else
1481 ArgReg = NextGPR++;
1482
1483 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1484 TII.get(TargetOpcode::COPY), ArgReg).addReg(Arg);
1485 RegArgs.push_back(ArgReg);
1486 }
1487
1488 return true;
1489 }
1490
1491 // For a call that we've determined we can fast-select, finish the
1492 // call sequence and generate a copy to obtain the return value (if any).
finishCall(MVT RetVT,CallLoweringInfo & CLI,unsigned & NumBytes)1493 bool PPCFastISel::finishCall(MVT RetVT, CallLoweringInfo &CLI, unsigned &NumBytes) {
1494 CallingConv::ID CC = CLI.CallConv;
1495
1496 // Issue CallSEQ_END.
1497 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1498 TII.get(TII.getCallFrameDestroyOpcode()))
1499 .addImm(NumBytes).addImm(0);
1500
1501 // Next, generate a copy to obtain the return value.
1502 // FIXME: No multi-register return values yet, though I don't foresee
1503 // any real difficulties there.
1504 if (RetVT != MVT::isVoid) {
1505 SmallVector<CCValAssign, 16> RVLocs;
1506 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
1507 CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
1508 CCValAssign &VA = RVLocs[0];
1509 assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
1510 assert(VA.isRegLoc() && "Can only return in registers!");
1511
1512 MVT DestVT = VA.getValVT();
1513 MVT CopyVT = DestVT;
1514
1515 // Ints smaller than a register still arrive in a full 64-bit
1516 // register, so make sure we recognize this.
1517 if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32)
1518 CopyVT = MVT::i64;
1519
1520 unsigned SourcePhysReg = VA.getLocReg();
1521 unsigned ResultReg = 0;
1522
1523 if (RetVT == CopyVT) {
1524 const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT);
1525 ResultReg = copyRegToRegClass(CpyRC, SourcePhysReg);
1526
1527 // If necessary, round the floating result to single precision.
1528 } else if (CopyVT == MVT::f64) {
1529 ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
1530 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::FRSP),
1531 ResultReg).addReg(SourcePhysReg);
1532
1533 // If only the low half of a general register is needed, generate
1534 // a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be
1535 // used along the fast-isel path (not lowered), and downstream logic
1536 // also doesn't like a direct subreg copy on a physical reg.)
1537 } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) {
1538 // Convert physical register from G8RC to GPRC.
1539 SourcePhysReg -= PPC::X0 - PPC::R0;
1540 ResultReg = copyRegToRegClass(&PPC::GPRCRegClass, SourcePhysReg);
1541 }
1542
1543 assert(ResultReg && "ResultReg unset!");
1544 CLI.InRegs.push_back(SourcePhysReg);
1545 CLI.ResultReg = ResultReg;
1546 CLI.NumResultRegs = 1;
1547 }
1548
1549 return true;
1550 }
1551
fastLowerCall(CallLoweringInfo & CLI)1552 bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
1553 CallingConv::ID CC = CLI.CallConv;
1554 bool IsTailCall = CLI.IsTailCall;
1555 bool IsVarArg = CLI.IsVarArg;
1556 const Value *Callee = CLI.Callee;
1557 const MCSymbol *Symbol = CLI.Symbol;
1558
1559 if (!Callee && !Symbol)
1560 return false;
1561
1562 // Allow SelectionDAG isel to handle tail calls.
1563 if (IsTailCall)
1564 return false;
1565
1566 // Let SDISel handle vararg functions.
1567 if (IsVarArg)
1568 return false;
1569
1570 // Handle simple calls for now, with legal return types and
1571 // those that can be extended.
1572 Type *RetTy = CLI.RetTy;
1573 MVT RetVT;
1574 if (RetTy->isVoidTy())
1575 RetVT = MVT::isVoid;
1576 else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
1577 RetVT != MVT::i8)
1578 return false;
1579 else if (RetVT == MVT::i1 && Subtarget->useCRBits())
1580 // We can't handle boolean returns when CR bits are in use.
1581 return false;
1582
1583 // FIXME: No multi-register return values yet.
1584 if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
1585 RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
1586 RetVT != MVT::f64) {
1587 SmallVector<CCValAssign, 16> RVLocs;
1588 CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs, *Context);
1589 CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
1590 if (RVLocs.size() > 1)
1591 return false;
1592 }
1593
1594 // Bail early if more than 8 arguments, as we only currently
1595 // handle arguments passed in registers.
1596 unsigned NumArgs = CLI.OutVals.size();
1597 if (NumArgs > 8)
1598 return false;
1599
1600 // Set up the argument vectors.
1601 SmallVector<Value*, 8> Args;
1602 SmallVector<unsigned, 8> ArgRegs;
1603 SmallVector<MVT, 8> ArgVTs;
1604 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
1605
1606 Args.reserve(NumArgs);
1607 ArgRegs.reserve(NumArgs);
1608 ArgVTs.reserve(NumArgs);
1609 ArgFlags.reserve(NumArgs);
1610
1611 for (unsigned i = 0, ie = NumArgs; i != ie; ++i) {
1612 // Only handle easy calls for now. It would be reasonably easy
1613 // to handle <= 8-byte structures passed ByVal in registers, but we
1614 // have to ensure they are right-justified in the register.
1615 ISD::ArgFlagsTy Flags = CLI.OutFlags[i];
1616 if (Flags.isInReg() || Flags.isSRet() || Flags.isNest() || Flags.isByVal())
1617 return false;
1618
1619 Value *ArgValue = CLI.OutVals[i];
1620 Type *ArgTy = ArgValue->getType();
1621 MVT ArgVT;
1622 if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
1623 return false;
1624
1625 if (ArgVT.isVector())
1626 return false;
1627
1628 unsigned Arg = getRegForValue(ArgValue);
1629 if (Arg == 0)
1630 return false;
1631
1632 Args.push_back(ArgValue);
1633 ArgRegs.push_back(Arg);
1634 ArgVTs.push_back(ArgVT);
1635 ArgFlags.push_back(Flags);
1636 }
1637
1638 // Process the arguments.
1639 SmallVector<unsigned, 8> RegArgs;
1640 unsigned NumBytes;
1641
1642 if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
1643 RegArgs, CC, NumBytes, IsVarArg))
1644 return false;
1645
1646 MachineInstrBuilder MIB;
1647 // FIXME: No handling for function pointers yet. This requires
1648 // implementing the function descriptor (OPD) setup.
1649 const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
1650 if (!GV) {
1651 // patchpoints are a special case; they always dispatch to a pointer value.
1652 // However, we don't actually want to generate the indirect call sequence
1653 // here (that will be generated, as necessary, during asm printing), and
1654 // the call we generate here will be erased by FastISel::selectPatchpoint,
1655 // so don't try very hard...
1656 if (CLI.IsPatchPoint)
1657 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::NOP));
1658 else
1659 return false;
1660 } else {
1661 // Build direct call with NOP for TOC restore.
1662 // FIXME: We can and should optimize away the NOP for local calls.
1663 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1664 TII.get(PPC::BL8_NOP));
1665 // Add callee.
1666 MIB.addGlobalAddress(GV);
1667 }
1668
1669 // Add implicit physical register uses to the call.
1670 for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II)
1671 MIB.addReg(RegArgs[II], RegState::Implicit);
1672
1673 // Direct calls, in both the ELF V1 and V2 ABIs, need the TOC register live
1674 // into the call.
1675 PPCFuncInfo->setUsesTOCBasePtr();
1676 MIB.addReg(PPC::X2, RegState::Implicit);
1677
1678 // Add a register mask with the call-preserved registers. Proper
1679 // defs for return values will be added by setPhysRegsDeadExcept().
1680 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
1681
1682 CLI.Call = MIB;
1683
1684 // Finish off the call including any return values.
1685 return finishCall(RetVT, CLI, NumBytes);
1686 }
1687
1688 // Attempt to fast-select a return instruction.
SelectRet(const Instruction * I)1689 bool PPCFastISel::SelectRet(const Instruction *I) {
1690
1691 if (!FuncInfo.CanLowerReturn)
1692 return false;
1693
1694 const ReturnInst *Ret = cast<ReturnInst>(I);
1695 const Function &F = *I->getParent()->getParent();
1696
1697 // Build a list of return value registers.
1698 SmallVector<unsigned, 4> RetRegs;
1699 CallingConv::ID CC = F.getCallingConv();
1700
1701 if (Ret->getNumOperands() > 0) {
1702 SmallVector<ISD::OutputArg, 4> Outs;
1703 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
1704
1705 // Analyze operands of the call, assigning locations to each operand.
1706 SmallVector<CCValAssign, 16> ValLocs;
1707 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, *Context);
1708 CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS);
1709 const Value *RV = Ret->getOperand(0);
1710
1711 // FIXME: Only one output register for now.
1712 if (ValLocs.size() > 1)
1713 return false;
1714
1715 // Special case for returning a constant integer of any size - materialize
1716 // the constant as an i64 and copy it to the return register.
1717 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RV)) {
1718 CCValAssign &VA = ValLocs[0];
1719
1720 Register RetReg = VA.getLocReg();
1721 // We still need to worry about properly extending the sign. For example,
1722 // we could have only a single bit or a constant that needs zero
1723 // extension rather than sign extension. Make sure we pass the return
1724 // value extension property to integer materialization.
1725 unsigned SrcReg =
1726 PPCMaterializeInt(CI, MVT::i64, VA.getLocInfo() != CCValAssign::ZExt);
1727
1728 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1729 TII.get(TargetOpcode::COPY), RetReg).addReg(SrcReg);
1730
1731 RetRegs.push_back(RetReg);
1732
1733 } else {
1734 unsigned Reg = getRegForValue(RV);
1735
1736 if (Reg == 0)
1737 return false;
1738
1739 // Copy the result values into the output registers.
1740 for (unsigned i = 0; i < ValLocs.size(); ++i) {
1741
1742 CCValAssign &VA = ValLocs[i];
1743 assert(VA.isRegLoc() && "Can only return in registers!");
1744 RetRegs.push_back(VA.getLocReg());
1745 unsigned SrcReg = Reg + VA.getValNo();
1746
1747 EVT RVEVT = TLI.getValueType(DL, RV->getType());
1748 if (!RVEVT.isSimple())
1749 return false;
1750 MVT RVVT = RVEVT.getSimpleVT();
1751 MVT DestVT = VA.getLocVT();
1752
1753 if (RVVT != DestVT && RVVT != MVT::i8 &&
1754 RVVT != MVT::i16 && RVVT != MVT::i32)
1755 return false;
1756
1757 if (RVVT != DestVT) {
1758 switch (VA.getLocInfo()) {
1759 default:
1760 llvm_unreachable("Unknown loc info!");
1761 case CCValAssign::Full:
1762 llvm_unreachable("Full value assign but types don't match?");
1763 case CCValAssign::AExt:
1764 case CCValAssign::ZExt: {
1765 const TargetRegisterClass *RC =
1766 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1767 unsigned TmpReg = createResultReg(RC);
1768 if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true))
1769 return false;
1770 SrcReg = TmpReg;
1771 break;
1772 }
1773 case CCValAssign::SExt: {
1774 const TargetRegisterClass *RC =
1775 (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
1776 unsigned TmpReg = createResultReg(RC);
1777 if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false))
1778 return false;
1779 SrcReg = TmpReg;
1780 break;
1781 }
1782 }
1783 }
1784
1785 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1786 TII.get(TargetOpcode::COPY), RetRegs[i])
1787 .addReg(SrcReg);
1788 }
1789 }
1790 }
1791
1792 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1793 TII.get(PPC::BLR8));
1794
1795 for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
1796 MIB.addReg(RetRegs[i], RegState::Implicit);
1797
1798 return true;
1799 }
1800
1801 // Attempt to emit an integer extend of SrcReg into DestReg. Both
1802 // signed and zero extensions are supported. Return false if we
1803 // can't handle it.
PPCEmitIntExt(MVT SrcVT,unsigned SrcReg,MVT DestVT,unsigned DestReg,bool IsZExt)1804 bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
1805 unsigned DestReg, bool IsZExt) {
1806 if (DestVT != MVT::i32 && DestVT != MVT::i64)
1807 return false;
1808 if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32)
1809 return false;
1810
1811 // Signed extensions use EXTSB, EXTSH, EXTSW.
1812 if (!IsZExt) {
1813 unsigned Opc;
1814 if (SrcVT == MVT::i8)
1815 Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64;
1816 else if (SrcVT == MVT::i16)
1817 Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64;
1818 else {
1819 assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
1820 Opc = PPC::EXTSW_32_64;
1821 }
1822 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
1823 .addReg(SrcReg);
1824
1825 // Unsigned 32-bit extensions use RLWINM.
1826 } else if (DestVT == MVT::i32) {
1827 unsigned MB;
1828 if (SrcVT == MVT::i8)
1829 MB = 24;
1830 else {
1831 assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
1832 MB = 16;
1833 }
1834 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLWINM),
1835 DestReg)
1836 .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31);
1837
1838 // Unsigned 64-bit extensions use RLDICL (with a 32-bit source).
1839 } else {
1840 unsigned MB;
1841 if (SrcVT == MVT::i8)
1842 MB = 56;
1843 else if (SrcVT == MVT::i16)
1844 MB = 48;
1845 else
1846 MB = 32;
1847 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1848 TII.get(PPC::RLDICL_32_64), DestReg)
1849 .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB);
1850 }
1851
1852 return true;
1853 }
1854
1855 // Attempt to fast-select an indirect branch instruction.
SelectIndirectBr(const Instruction * I)1856 bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
1857 unsigned AddrReg = getRegForValue(I->getOperand(0));
1858 if (AddrReg == 0)
1859 return false;
1860
1861 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::MTCTR8))
1862 .addReg(AddrReg);
1863 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::BCTR8));
1864
1865 const IndirectBrInst *IB = cast<IndirectBrInst>(I);
1866 for (const BasicBlock *SuccBB : IB->successors())
1867 FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[SuccBB]);
1868
1869 return true;
1870 }
1871
1872 // Attempt to fast-select an integer truncate instruction.
SelectTrunc(const Instruction * I)1873 bool PPCFastISel::SelectTrunc(const Instruction *I) {
1874 Value *Src = I->getOperand(0);
1875 EVT SrcVT = TLI.getValueType(DL, Src->getType(), true);
1876 EVT DestVT = TLI.getValueType(DL, I->getType(), true);
1877
1878 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
1879 return false;
1880
1881 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
1882 return false;
1883
1884 unsigned SrcReg = getRegForValue(Src);
1885 if (!SrcReg)
1886 return false;
1887
1888 // The only interesting case is when we need to switch register classes.
1889 if (SrcVT == MVT::i64)
1890 SrcReg = copyRegToRegClass(&PPC::GPRCRegClass, SrcReg, 0, PPC::sub_32);
1891
1892 updateValueMap(I, SrcReg);
1893 return true;
1894 }
1895
1896 // Attempt to fast-select an integer extend instruction.
SelectIntExt(const Instruction * I)1897 bool PPCFastISel::SelectIntExt(const Instruction *I) {
1898 Type *DestTy = I->getType();
1899 Value *Src = I->getOperand(0);
1900 Type *SrcTy = Src->getType();
1901
1902 bool IsZExt = isa<ZExtInst>(I);
1903 unsigned SrcReg = getRegForValue(Src);
1904 if (!SrcReg) return false;
1905
1906 EVT SrcEVT, DestEVT;
1907 SrcEVT = TLI.getValueType(DL, SrcTy, true);
1908 DestEVT = TLI.getValueType(DL, DestTy, true);
1909 if (!SrcEVT.isSimple())
1910 return false;
1911 if (!DestEVT.isSimple())
1912 return false;
1913
1914 MVT SrcVT = SrcEVT.getSimpleVT();
1915 MVT DestVT = DestEVT.getSimpleVT();
1916
1917 // If we know the register class needed for the result of this
1918 // instruction, use it. Otherwise pick the register class of the
1919 // correct size that does not contain X0/R0, since we don't know
1920 // whether downstream uses permit that assignment.
1921 unsigned AssignedReg = FuncInfo.ValueMap[I];
1922 const TargetRegisterClass *RC =
1923 (AssignedReg ? MRI.getRegClass(AssignedReg) :
1924 (DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
1925 &PPC::GPRC_and_GPRC_NOR0RegClass));
1926 unsigned ResultReg = createResultReg(RC);
1927
1928 if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt))
1929 return false;
1930
1931 updateValueMap(I, ResultReg);
1932 return true;
1933 }
1934
1935 // Attempt to fast-select an instruction that wasn't handled by
1936 // the table-generated machinery.
fastSelectInstruction(const Instruction * I)1937 bool PPCFastISel::fastSelectInstruction(const Instruction *I) {
1938
1939 switch (I->getOpcode()) {
1940 case Instruction::Load:
1941 return SelectLoad(I);
1942 case Instruction::Store:
1943 return SelectStore(I);
1944 case Instruction::Br:
1945 return SelectBranch(I);
1946 case Instruction::IndirectBr:
1947 return SelectIndirectBr(I);
1948 case Instruction::FPExt:
1949 return SelectFPExt(I);
1950 case Instruction::FPTrunc:
1951 return SelectFPTrunc(I);
1952 case Instruction::SIToFP:
1953 return SelectIToFP(I, /*IsSigned*/ true);
1954 case Instruction::UIToFP:
1955 return SelectIToFP(I, /*IsSigned*/ false);
1956 case Instruction::FPToSI:
1957 return SelectFPToI(I, /*IsSigned*/ true);
1958 case Instruction::FPToUI:
1959 return SelectFPToI(I, /*IsSigned*/ false);
1960 case Instruction::Add:
1961 return SelectBinaryIntOp(I, ISD::ADD);
1962 case Instruction::Or:
1963 return SelectBinaryIntOp(I, ISD::OR);
1964 case Instruction::Sub:
1965 return SelectBinaryIntOp(I, ISD::SUB);
1966 case Instruction::Call:
1967 // On AIX, call lowering uses the DAG-ISEL path currently so that the
1968 // callee of the direct function call instruction will be mapped to the
1969 // symbol for the function's entry point, which is distinct from the
1970 // function descriptor symbol. The latter is the symbol whose XCOFF symbol
1971 // name is the C-linkage name of the source level function.
1972 if (TM.getTargetTriple().isOSAIX())
1973 break;
1974 return selectCall(I);
1975 case Instruction::Ret:
1976 return SelectRet(I);
1977 case Instruction::Trunc:
1978 return SelectTrunc(I);
1979 case Instruction::ZExt:
1980 case Instruction::SExt:
1981 return SelectIntExt(I);
1982 // Here add other flavors of Instruction::XXX that automated
1983 // cases don't catch. For example, switches are terminators
1984 // that aren't yet handled.
1985 default:
1986 break;
1987 }
1988 return false;
1989 }
1990
1991 // Materialize a floating-point constant into a register, and return
1992 // the register number (or zero if we failed to handle it).
PPCMaterializeFP(const ConstantFP * CFP,MVT VT)1993 unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
1994 // No plans to handle long double here.
1995 if (VT != MVT::f32 && VT != MVT::f64)
1996 return 0;
1997
1998 // All FP constants are loaded from the constant pool.
1999 Align Alignment = DL.getPrefTypeAlign(CFP->getType());
2000 unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
2001 const bool HasSPE = Subtarget->hasSPE();
2002 const TargetRegisterClass *RC;
2003 if (HasSPE)
2004 RC = ((VT == MVT::f32) ? &PPC::GPRCRegClass : &PPC::SPERCRegClass);
2005 else
2006 RC = ((VT == MVT::f32) ? &PPC::F4RCRegClass : &PPC::F8RCRegClass);
2007
2008 unsigned DestReg = createResultReg(RC);
2009 CodeModel::Model CModel = TM.getCodeModel();
2010
2011 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
2012 MachinePointerInfo::getConstantPool(*FuncInfo.MF),
2013 MachineMemOperand::MOLoad, (VT == MVT::f32) ? 4 : 8, Alignment);
2014
2015 unsigned Opc;
2016
2017 if (HasSPE)
2018 Opc = ((VT == MVT::f32) ? PPC::SPELWZ : PPC::EVLDD);
2019 else
2020 Opc = ((VT == MVT::f32) ? PPC::LFS : PPC::LFD);
2021
2022 unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
2023
2024 PPCFuncInfo->setUsesTOCBasePtr();
2025 // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
2026 if (CModel == CodeModel::Small) {
2027 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocCPT),
2028 TmpReg)
2029 .addConstantPoolIndex(Idx).addReg(PPC::X2);
2030 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
2031 .addImm(0).addReg(TmpReg).addMemOperand(MMO);
2032 } else {
2033 // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA8(X2, Idx)).
2034 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA8),
2035 TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx);
2036 // But for large code model, we must generate a LDtocL followed
2037 // by the LF[SD].
2038 if (CModel == CodeModel::Large) {
2039 unsigned TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
2040 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
2041 TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg);
2042 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
2043 .addImm(0)
2044 .addReg(TmpReg2);
2045 } else
2046 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
2047 .addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO)
2048 .addReg(TmpReg)
2049 .addMemOperand(MMO);
2050 }
2051
2052 return DestReg;
2053 }
2054
2055 // Materialize the address of a global value into a register, and return
2056 // the register number (or zero if we failed to handle it).
PPCMaterializeGV(const GlobalValue * GV,MVT VT)2057 unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
2058 assert(VT == MVT::i64 && "Non-address!");
2059 const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass;
2060 unsigned DestReg = createResultReg(RC);
2061
2062 // Global values may be plain old object addresses, TLS object
2063 // addresses, constant pool entries, or jump tables. How we generate
2064 // code for these may depend on small, medium, or large code model.
2065 CodeModel::Model CModel = TM.getCodeModel();
2066
2067 // FIXME: Jump tables are not yet required because fast-isel doesn't
2068 // handle switches; if that changes, we need them as well. For now,
2069 // what follows assumes everything's a generic (or TLS) global address.
2070
2071 // FIXME: We don't yet handle the complexity of TLS.
2072 if (GV->isThreadLocal())
2073 return 0;
2074
2075 PPCFuncInfo->setUsesTOCBasePtr();
2076 // For small code model, generate a simple TOC load.
2077 if (CModel == CodeModel::Small)
2078 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtoc),
2079 DestReg)
2080 .addGlobalAddress(GV)
2081 .addReg(PPC::X2);
2082 else {
2083 // If the address is an externally defined symbol, a symbol with common
2084 // or externally available linkage, a non-local function address, or a
2085 // jump table address (not yet needed), or if we are generating code
2086 // for large code model, we generate:
2087 // LDtocL(GV, ADDIStocHA8(%x2, GV))
2088 // Otherwise we generate:
2089 // ADDItocL(ADDIStocHA8(%x2, GV), GV)
2090 // Either way, start with the ADDIStocHA8:
2091 unsigned HighPartReg = createResultReg(RC);
2092 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA8),
2093 HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
2094
2095 if (Subtarget->isGVIndirectSymbol(GV)) {
2096 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
2097 DestReg).addGlobalAddress(GV).addReg(HighPartReg);
2098 } else {
2099 // Otherwise generate the ADDItocL.
2100 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDItocL),
2101 DestReg).addReg(HighPartReg).addGlobalAddress(GV);
2102 }
2103 }
2104
2105 return DestReg;
2106 }
2107
2108 // Materialize a 32-bit integer constant into a register, and return
2109 // the register number (or zero if we failed to handle it).
PPCMaterialize32BitInt(int64_t Imm,const TargetRegisterClass * RC)2110 unsigned PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
2111 const TargetRegisterClass *RC) {
2112 unsigned Lo = Imm & 0xFFFF;
2113 unsigned Hi = (Imm >> 16) & 0xFFFF;
2114
2115 unsigned ResultReg = createResultReg(RC);
2116 bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
2117
2118 if (isInt<16>(Imm))
2119 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2120 TII.get(IsGPRC ? PPC::LI : PPC::LI8), ResultReg)
2121 .addImm(Imm);
2122 else if (Lo) {
2123 // Both Lo and Hi have nonzero bits.
2124 unsigned TmpReg = createResultReg(RC);
2125 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2126 TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), TmpReg)
2127 .addImm(Hi);
2128 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2129 TII.get(IsGPRC ? PPC::ORI : PPC::ORI8), ResultReg)
2130 .addReg(TmpReg).addImm(Lo);
2131 } else
2132 // Just Hi bits.
2133 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2134 TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), ResultReg)
2135 .addImm(Hi);
2136
2137 return ResultReg;
2138 }
2139
2140 // Materialize a 64-bit integer constant into a register, and return
2141 // the register number (or zero if we failed to handle it).
PPCMaterialize64BitInt(int64_t Imm,const TargetRegisterClass * RC)2142 unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
2143 const TargetRegisterClass *RC) {
2144 unsigned Remainder = 0;
2145 unsigned Shift = 0;
2146
2147 // If the value doesn't fit in 32 bits, see if we can shift it
2148 // so that it fits in 32 bits.
2149 if (!isInt<32>(Imm)) {
2150 Shift = countTrailingZeros<uint64_t>(Imm);
2151 int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
2152
2153 if (isInt<32>(ImmSh))
2154 Imm = ImmSh;
2155 else {
2156 Remainder = Imm;
2157 Shift = 32;
2158 Imm >>= 32;
2159 }
2160 }
2161
2162 // Handle the high-order 32 bits (if shifted) or the whole 32 bits
2163 // (if not shifted).
2164 unsigned TmpReg1 = PPCMaterialize32BitInt(Imm, RC);
2165 if (!Shift)
2166 return TmpReg1;
2167
2168 // If upper 32 bits were not zero, we've built them and need to shift
2169 // them into place.
2170 unsigned TmpReg2;
2171 if (Imm) {
2172 TmpReg2 = createResultReg(RC);
2173 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::RLDICR),
2174 TmpReg2).addReg(TmpReg1).addImm(Shift).addImm(63 - Shift);
2175 } else
2176 TmpReg2 = TmpReg1;
2177
2178 unsigned TmpReg3, Hi, Lo;
2179 if ((Hi = (Remainder >> 16) & 0xFFFF)) {
2180 TmpReg3 = createResultReg(RC);
2181 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORIS8),
2182 TmpReg3).addReg(TmpReg2).addImm(Hi);
2183 } else
2184 TmpReg3 = TmpReg2;
2185
2186 if ((Lo = Remainder & 0xFFFF)) {
2187 unsigned ResultReg = createResultReg(RC);
2188 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ORI8),
2189 ResultReg).addReg(TmpReg3).addImm(Lo);
2190 return ResultReg;
2191 }
2192
2193 return TmpReg3;
2194 }
2195
2196 // Materialize an integer constant into a register, and return
2197 // the register number (or zero if we failed to handle it).
PPCMaterializeInt(const ConstantInt * CI,MVT VT,bool UseSExt)2198 unsigned PPCFastISel::PPCMaterializeInt(const ConstantInt *CI, MVT VT,
2199 bool UseSExt) {
2200 // If we're using CR bit registers for i1 values, handle that as a special
2201 // case first.
2202 if (VT == MVT::i1 && Subtarget->useCRBits()) {
2203 unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
2204 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2205 TII.get(CI->isZero() ? PPC::CRUNSET : PPC::CRSET), ImmReg);
2206 return ImmReg;
2207 }
2208
2209 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2210 VT != MVT::i1)
2211 return 0;
2212
2213 const TargetRegisterClass *RC =
2214 ((VT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass);
2215 int64_t Imm = UseSExt ? CI->getSExtValue() : CI->getZExtValue();
2216
2217 // If the constant is in range, use a load-immediate.
2218 // Since LI will sign extend the constant we need to make sure that for
2219 // our zeroext constants that the sign extended constant fits into 16-bits -
2220 // a range of 0..0x7fff.
2221 if (isInt<16>(Imm)) {
2222 unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
2223 unsigned ImmReg = createResultReg(RC);
2224 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ImmReg)
2225 .addImm(Imm);
2226 return ImmReg;
2227 }
2228
2229 // Construct the constant piecewise.
2230 if (VT == MVT::i64)
2231 return PPCMaterialize64BitInt(Imm, RC);
2232 else if (VT == MVT::i32)
2233 return PPCMaterialize32BitInt(Imm, RC);
2234
2235 return 0;
2236 }
2237
2238 // Materialize a constant into a register, and return the register
2239 // number (or zero if we failed to handle it).
fastMaterializeConstant(const Constant * C)2240 unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) {
2241 EVT CEVT = TLI.getValueType(DL, C->getType(), true);
2242
2243 // Only handle simple types.
2244 if (!CEVT.isSimple()) return 0;
2245 MVT VT = CEVT.getSimpleVT();
2246
2247 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
2248 return PPCMaterializeFP(CFP, VT);
2249 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
2250 return PPCMaterializeGV(GV, VT);
2251 else if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
2252 // Note that the code in FunctionLoweringInfo::ComputePHILiveOutRegInfo
2253 // assumes that constant PHI operands will be zero extended, and failure to
2254 // match that assumption will cause problems if we sign extend here but
2255 // some user of a PHI is in a block for which we fall back to full SDAG
2256 // instruction selection.
2257 return PPCMaterializeInt(CI, VT, false);
2258
2259 return 0;
2260 }
2261
2262 // Materialize the address created by an alloca into a register, and
2263 // return the register number (or zero if we failed to handle it).
fastMaterializeAlloca(const AllocaInst * AI)2264 unsigned PPCFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
2265 // Don't handle dynamic allocas.
2266 if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
2267
2268 MVT VT;
2269 if (!isLoadTypeLegal(AI->getType(), VT)) return 0;
2270
2271 DenseMap<const AllocaInst*, int>::iterator SI =
2272 FuncInfo.StaticAllocaMap.find(AI);
2273
2274 if (SI != FuncInfo.StaticAllocaMap.end()) {
2275 unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
2276 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDI8),
2277 ResultReg).addFrameIndex(SI->second).addImm(0);
2278 return ResultReg;
2279 }
2280
2281 return 0;
2282 }
2283
2284 // Fold loads into extends when possible.
2285 // FIXME: We can have multiple redundant extend/trunc instructions
2286 // following a load. The folding only picks up one. Extend this
2287 // to check subsequent instructions for the same pattern and remove
2288 // them. Thus ResultReg should be the def reg for the last redundant
2289 // instruction in a chain, and all intervening instructions can be
2290 // removed from parent. Change test/CodeGen/PowerPC/fast-isel-fold.ll
2291 // to add ELF64-NOT: rldicl to the appropriate tests when this works.
tryToFoldLoadIntoMI(MachineInstr * MI,unsigned OpNo,const LoadInst * LI)2292 bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
2293 const LoadInst *LI) {
2294 // Verify we have a legal type before going any further.
2295 MVT VT;
2296 if (!isLoadTypeLegal(LI->getType(), VT))
2297 return false;
2298
2299 // Combine load followed by zero- or sign-extend.
2300 bool IsZExt = false;
2301 switch(MI->getOpcode()) {
2302 default:
2303 return false;
2304
2305 case PPC::RLDICL:
2306 case PPC::RLDICL_32_64: {
2307 IsZExt = true;
2308 unsigned MB = MI->getOperand(3).getImm();
2309 if ((VT == MVT::i8 && MB <= 56) ||
2310 (VT == MVT::i16 && MB <= 48) ||
2311 (VT == MVT::i32 && MB <= 32))
2312 break;
2313 return false;
2314 }
2315
2316 case PPC::RLWINM:
2317 case PPC::RLWINM8: {
2318 IsZExt = true;
2319 unsigned MB = MI->getOperand(3).getImm();
2320 if ((VT == MVT::i8 && MB <= 24) ||
2321 (VT == MVT::i16 && MB <= 16))
2322 break;
2323 return false;
2324 }
2325
2326 case PPC::EXTSB:
2327 case PPC::EXTSB8:
2328 case PPC::EXTSB8_32_64:
2329 /* There is no sign-extending load-byte instruction. */
2330 return false;
2331
2332 case PPC::EXTSH:
2333 case PPC::EXTSH8:
2334 case PPC::EXTSH8_32_64: {
2335 if (VT != MVT::i16 && VT != MVT::i8)
2336 return false;
2337 break;
2338 }
2339
2340 case PPC::EXTSW:
2341 case PPC::EXTSW_32:
2342 case PPC::EXTSW_32_64: {
2343 if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
2344 return false;
2345 break;
2346 }
2347 }
2348
2349 // See if we can handle this address.
2350 Address Addr;
2351 if (!PPCComputeAddress(LI->getOperand(0), Addr))
2352 return false;
2353
2354 Register ResultReg = MI->getOperand(0).getReg();
2355
2356 if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt,
2357 Subtarget->hasSPE() ? PPC::EVLDD : PPC::LFD))
2358 return false;
2359
2360 MachineBasicBlock::iterator I(MI);
2361 removeDeadCode(I, std::next(I));
2362 return true;
2363 }
2364
2365 // Attempt to lower call arguments in a faster way than done by
2366 // the selection DAG code.
fastLowerArguments()2367 bool PPCFastISel::fastLowerArguments() {
2368 // Defer to normal argument lowering for now. It's reasonably
2369 // efficient. Consider doing something like ARM to handle the
2370 // case where all args fit in registers, no varargs, no float
2371 // or vector args.
2372 return false;
2373 }
2374
2375 // Handle materializing integer constants into a register. This is not
2376 // automatically generated for PowerPC, so must be explicitly created here.
fastEmit_i(MVT Ty,MVT VT,unsigned Opc,uint64_t Imm)2377 unsigned PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
2378
2379 if (Opc != ISD::Constant)
2380 return 0;
2381
2382 // If we're using CR bit registers for i1 values, handle that as a special
2383 // case first.
2384 if (VT == MVT::i1 && Subtarget->useCRBits()) {
2385 unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
2386 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2387 TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg);
2388 return ImmReg;
2389 }
2390
2391 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
2392 VT != MVT::i1)
2393 return 0;
2394
2395 const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
2396 &PPC::GPRCRegClass);
2397 if (VT == MVT::i64)
2398 return PPCMaterialize64BitInt(Imm, RC);
2399 else
2400 return PPCMaterialize32BitInt(Imm, RC);
2401 }
2402
2403 // Override for ADDI and ADDI8 to set the correct register class
2404 // on RHS operand 0. The automatic infrastructure naively assumes
2405 // GPRC for i32 and G8RC for i64; the concept of "no R0" is lost
2406 // for these cases. At the moment, none of the other automatically
2407 // generated RI instructions require special treatment. However, once
2408 // SelectSelect is implemented, "isel" requires similar handling.
2409 //
2410 // Also be conservative about the output register class. Avoid
2411 // assigning R0 or X0 to the output register for GPRC and G8RC
2412 // register classes, as any such result could be used in ADDI, etc.,
2413 // where those regs have another meaning.
fastEmitInst_ri(unsigned MachineInstOpcode,const TargetRegisterClass * RC,unsigned Op0,bool Op0IsKill,uint64_t Imm)2414 unsigned PPCFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
2415 const TargetRegisterClass *RC,
2416 unsigned Op0, bool Op0IsKill,
2417 uint64_t Imm) {
2418 if (MachineInstOpcode == PPC::ADDI)
2419 MRI.setRegClass(Op0, &PPC::GPRC_and_GPRC_NOR0RegClass);
2420 else if (MachineInstOpcode == PPC::ADDI8)
2421 MRI.setRegClass(Op0, &PPC::G8RC_and_G8RC_NOX0RegClass);
2422
2423 const TargetRegisterClass *UseRC =
2424 (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2425 (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2426
2427 return FastISel::fastEmitInst_ri(MachineInstOpcode, UseRC,
2428 Op0, Op0IsKill, Imm);
2429 }
2430
2431 // Override for instructions with one register operand to avoid use of
2432 // R0/X0. The automatic infrastructure isn't aware of the context so
2433 // we must be conservative.
fastEmitInst_r(unsigned MachineInstOpcode,const TargetRegisterClass * RC,unsigned Op0,bool Op0IsKill)2434 unsigned PPCFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
2435 const TargetRegisterClass* RC,
2436 unsigned Op0, bool Op0IsKill) {
2437 const TargetRegisterClass *UseRC =
2438 (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2439 (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2440
2441 return FastISel::fastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill);
2442 }
2443
2444 // Override for instructions with two register operands to avoid use
2445 // of R0/X0. The automatic infrastructure isn't aware of the context
2446 // so we must be conservative.
fastEmitInst_rr(unsigned MachineInstOpcode,const TargetRegisterClass * RC,unsigned Op0,bool Op0IsKill,unsigned Op1,bool Op1IsKill)2447 unsigned PPCFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
2448 const TargetRegisterClass* RC,
2449 unsigned Op0, bool Op0IsKill,
2450 unsigned Op1, bool Op1IsKill) {
2451 const TargetRegisterClass *UseRC =
2452 (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
2453 (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
2454
2455 return FastISel::fastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill,
2456 Op1, Op1IsKill);
2457 }
2458
2459 namespace llvm {
2460 // Create the fast instruction selector for PowerPC64 ELF.
createFastISel(FunctionLoweringInfo & FuncInfo,const TargetLibraryInfo * LibInfo)2461 FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo,
2462 const TargetLibraryInfo *LibInfo) {
2463 // Only available on 64-bit ELF for now.
2464 const PPCSubtarget &Subtarget = FuncInfo.MF->getSubtarget<PPCSubtarget>();
2465 if (Subtarget.is64BitELFABI())
2466 return new PPCFastISel(FuncInfo, LibInfo);
2467 return nullptr;
2468 }
2469 }
2470