1 //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains a printer that converts from our internal representation 10 // of machine-dependent LLVM code to NVPTX assembly language. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "NVPTXAsmPrinter.h" 15 #include "MCTargetDesc/NVPTXBaseInfo.h" 16 #include "MCTargetDesc/NVPTXInstPrinter.h" 17 #include "MCTargetDesc/NVPTXMCAsmInfo.h" 18 #include "MCTargetDesc/NVPTXTargetStreamer.h" 19 #include "NVPTX.h" 20 #include "NVPTXMCExpr.h" 21 #include "NVPTXMachineFunctionInfo.h" 22 #include "NVPTXRegisterInfo.h" 23 #include "NVPTXSubtarget.h" 24 #include "NVPTXTargetMachine.h" 25 #include "NVPTXUtilities.h" 26 #include "TargetInfo/NVPTXTargetInfo.h" 27 #include "cl_common_defines.h" 28 #include "llvm/ADT/APFloat.h" 29 #include "llvm/ADT/APInt.h" 30 #include "llvm/ADT/DenseMap.h" 31 #include "llvm/ADT/DenseSet.h" 32 #include "llvm/ADT/SmallString.h" 33 #include "llvm/ADT/SmallVector.h" 34 #include "llvm/ADT/StringExtras.h" 35 #include "llvm/ADT/StringRef.h" 36 #include "llvm/ADT/Triple.h" 37 #include "llvm/ADT/Twine.h" 38 #include "llvm/Analysis/ConstantFolding.h" 39 #include "llvm/CodeGen/Analysis.h" 40 #include "llvm/CodeGen/MachineBasicBlock.h" 41 #include "llvm/CodeGen/MachineFrameInfo.h" 42 #include "llvm/CodeGen/MachineFunction.h" 43 #include "llvm/CodeGen/MachineInstr.h" 44 #include "llvm/CodeGen/MachineLoopInfo.h" 45 #include "llvm/CodeGen/MachineModuleInfo.h" 46 #include "llvm/CodeGen/MachineOperand.h" 47 #include "llvm/CodeGen/MachineRegisterInfo.h" 48 #include "llvm/CodeGen/TargetLowering.h" 49 #include "llvm/CodeGen/TargetRegisterInfo.h" 50 #include "llvm/CodeGen/ValueTypes.h" 51 #include "llvm/IR/Attributes.h" 52 #include "llvm/IR/BasicBlock.h" 53 #include "llvm/IR/Constant.h" 54 #include "llvm/IR/Constants.h" 55 #include "llvm/IR/DataLayout.h" 56 #include "llvm/IR/DebugInfo.h" 57 #include "llvm/IR/DebugInfoMetadata.h" 58 #include "llvm/IR/DebugLoc.h" 59 #include "llvm/IR/DerivedTypes.h" 60 #include "llvm/IR/Function.h" 61 #include "llvm/IR/GlobalValue.h" 62 #include "llvm/IR/GlobalVariable.h" 63 #include "llvm/IR/Instruction.h" 64 #include "llvm/IR/LLVMContext.h" 65 #include "llvm/IR/Module.h" 66 #include "llvm/IR/Operator.h" 67 #include "llvm/IR/Type.h" 68 #include "llvm/IR/User.h" 69 #include "llvm/MC/MCExpr.h" 70 #include "llvm/MC/MCInst.h" 71 #include "llvm/MC/MCInstrDesc.h" 72 #include "llvm/MC/MCStreamer.h" 73 #include "llvm/MC/MCSymbol.h" 74 #include "llvm/Support/Casting.h" 75 #include "llvm/Support/CommandLine.h" 76 #include "llvm/Support/ErrorHandling.h" 77 #include "llvm/Support/MachineValueType.h" 78 #include "llvm/Support/Path.h" 79 #include "llvm/Support/TargetRegistry.h" 80 #include "llvm/Support/raw_ostream.h" 81 #include "llvm/Target/TargetLoweringObjectFile.h" 82 #include "llvm/Target/TargetMachine.h" 83 #include "llvm/Transforms/Utils/UnrollLoop.h" 84 #include <cassert> 85 #include <cstdint> 86 #include <cstring> 87 #include <new> 88 #include <string> 89 #include <utility> 90 #include <vector> 91 92 using namespace llvm; 93 94 #define DEPOTNAME "__local_depot" 95 96 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V 97 /// depends. 98 static void 99 DiscoverDependentGlobals(const Value *V, 100 DenseSet<const GlobalVariable *> &Globals) { 101 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 102 Globals.insert(GV); 103 else { 104 if (const User *U = dyn_cast<User>(V)) { 105 for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) { 106 DiscoverDependentGlobals(U->getOperand(i), Globals); 107 } 108 } 109 } 110 } 111 112 /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable 113 /// instances to be emitted, but only after any dependents have been added 114 /// first.s 115 static void 116 VisitGlobalVariableForEmission(const GlobalVariable *GV, 117 SmallVectorImpl<const GlobalVariable *> &Order, 118 DenseSet<const GlobalVariable *> &Visited, 119 DenseSet<const GlobalVariable *> &Visiting) { 120 // Have we already visited this one? 121 if (Visited.count(GV)) 122 return; 123 124 // Do we have a circular dependency? 125 if (!Visiting.insert(GV).second) 126 report_fatal_error("Circular dependency found in global variable set"); 127 128 // Make sure we visit all dependents first 129 DenseSet<const GlobalVariable *> Others; 130 for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i) 131 DiscoverDependentGlobals(GV->getOperand(i), Others); 132 133 for (DenseSet<const GlobalVariable *>::iterator I = Others.begin(), 134 E = Others.end(); 135 I != E; ++I) 136 VisitGlobalVariableForEmission(*I, Order, Visited, Visiting); 137 138 // Now we can visit ourself 139 Order.push_back(GV); 140 Visited.insert(GV); 141 Visiting.erase(GV); 142 } 143 144 void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) { 145 MCInst Inst; 146 lowerToMCInst(MI, Inst); 147 EmitToStreamer(*OutStreamer, Inst); 148 } 149 150 // Handle symbol backtracking for targets that do not support image handles 151 bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI, 152 unsigned OpNo, MCOperand &MCOp) { 153 const MachineOperand &MO = MI->getOperand(OpNo); 154 const MCInstrDesc &MCID = MI->getDesc(); 155 156 if (MCID.TSFlags & NVPTXII::IsTexFlag) { 157 // This is a texture fetch, so operand 4 is a texref and operand 5 is 158 // a samplerref 159 if (OpNo == 4 && MO.isImm()) { 160 lowerImageHandleSymbol(MO.getImm(), MCOp); 161 return true; 162 } 163 if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) { 164 lowerImageHandleSymbol(MO.getImm(), MCOp); 165 return true; 166 } 167 168 return false; 169 } else if (MCID.TSFlags & NVPTXII::IsSuldMask) { 170 unsigned VecSize = 171 1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1); 172 173 // For a surface load of vector size N, the Nth operand will be the surfref 174 if (OpNo == VecSize && MO.isImm()) { 175 lowerImageHandleSymbol(MO.getImm(), MCOp); 176 return true; 177 } 178 179 return false; 180 } else if (MCID.TSFlags & NVPTXII::IsSustFlag) { 181 // This is a surface store, so operand 0 is a surfref 182 if (OpNo == 0 && MO.isImm()) { 183 lowerImageHandleSymbol(MO.getImm(), MCOp); 184 return true; 185 } 186 187 return false; 188 } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) { 189 // This is a query, so operand 1 is a surfref/texref 190 if (OpNo == 1 && MO.isImm()) { 191 lowerImageHandleSymbol(MO.getImm(), MCOp); 192 return true; 193 } 194 195 return false; 196 } 197 198 return false; 199 } 200 201 void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) { 202 // Ewwww 203 LLVMTargetMachine &TM = const_cast<LLVMTargetMachine&>(MF->getTarget()); 204 NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM); 205 const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>(); 206 const char *Sym = MFI->getImageHandleSymbol(Index); 207 std::string *SymNamePtr = 208 nvTM.getManagedStrPool()->getManagedString(Sym); 209 MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(StringRef(*SymNamePtr))); 210 } 211 212 void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) { 213 OutMI.setOpcode(MI->getOpcode()); 214 // Special: Do not mangle symbol operand of CALL_PROTOTYPE 215 if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) { 216 const MachineOperand &MO = MI->getOperand(0); 217 OutMI.addOperand(GetSymbolRef( 218 OutContext.getOrCreateSymbol(Twine(MO.getSymbolName())))); 219 return; 220 } 221 222 const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>(); 223 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 224 const MachineOperand &MO = MI->getOperand(i); 225 226 MCOperand MCOp; 227 if (!STI.hasImageHandles()) { 228 if (lowerImageHandleOperand(MI, i, MCOp)) { 229 OutMI.addOperand(MCOp); 230 continue; 231 } 232 } 233 234 if (lowerOperand(MO, MCOp)) 235 OutMI.addOperand(MCOp); 236 } 237 } 238 239 bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO, 240 MCOperand &MCOp) { 241 switch (MO.getType()) { 242 default: llvm_unreachable("unknown operand type"); 243 case MachineOperand::MO_Register: 244 MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg())); 245 break; 246 case MachineOperand::MO_Immediate: 247 MCOp = MCOperand::createImm(MO.getImm()); 248 break; 249 case MachineOperand::MO_MachineBasicBlock: 250 MCOp = MCOperand::createExpr(MCSymbolRefExpr::create( 251 MO.getMBB()->getSymbol(), OutContext)); 252 break; 253 case MachineOperand::MO_ExternalSymbol: 254 MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName())); 255 break; 256 case MachineOperand::MO_GlobalAddress: 257 MCOp = GetSymbolRef(getSymbol(MO.getGlobal())); 258 break; 259 case MachineOperand::MO_FPImmediate: { 260 const ConstantFP *Cnt = MO.getFPImm(); 261 const APFloat &Val = Cnt->getValueAPF(); 262 263 switch (Cnt->getType()->getTypeID()) { 264 default: report_fatal_error("Unsupported FP type"); break; 265 case Type::HalfTyID: 266 MCOp = MCOperand::createExpr( 267 NVPTXFloatMCExpr::createConstantFPHalf(Val, OutContext)); 268 break; 269 case Type::FloatTyID: 270 MCOp = MCOperand::createExpr( 271 NVPTXFloatMCExpr::createConstantFPSingle(Val, OutContext)); 272 break; 273 case Type::DoubleTyID: 274 MCOp = MCOperand::createExpr( 275 NVPTXFloatMCExpr::createConstantFPDouble(Val, OutContext)); 276 break; 277 } 278 break; 279 } 280 } 281 return true; 282 } 283 284 unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) { 285 if (Register::isVirtualRegister(Reg)) { 286 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 287 288 DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC]; 289 unsigned RegNum = RegMap[Reg]; 290 291 // Encode the register class in the upper 4 bits 292 // Must be kept in sync with NVPTXInstPrinter::printRegName 293 unsigned Ret = 0; 294 if (RC == &NVPTX::Int1RegsRegClass) { 295 Ret = (1 << 28); 296 } else if (RC == &NVPTX::Int16RegsRegClass) { 297 Ret = (2 << 28); 298 } else if (RC == &NVPTX::Int32RegsRegClass) { 299 Ret = (3 << 28); 300 } else if (RC == &NVPTX::Int64RegsRegClass) { 301 Ret = (4 << 28); 302 } else if (RC == &NVPTX::Float32RegsRegClass) { 303 Ret = (5 << 28); 304 } else if (RC == &NVPTX::Float64RegsRegClass) { 305 Ret = (6 << 28); 306 } else if (RC == &NVPTX::Float16RegsRegClass) { 307 Ret = (7 << 28); 308 } else if (RC == &NVPTX::Float16x2RegsRegClass) { 309 Ret = (8 << 28); 310 } else { 311 report_fatal_error("Bad register class"); 312 } 313 314 // Insert the vreg number 315 Ret |= (RegNum & 0x0FFFFFFF); 316 return Ret; 317 } else { 318 // Some special-use registers are actually physical registers. 319 // Encode this as the register class ID of 0 and the real register ID. 320 return Reg & 0x0FFFFFFF; 321 } 322 } 323 324 MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) { 325 const MCExpr *Expr; 326 Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, 327 OutContext); 328 return MCOperand::createExpr(Expr); 329 } 330 331 void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { 332 const DataLayout &DL = getDataLayout(); 333 const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F); 334 const TargetLowering *TLI = STI.getTargetLowering(); 335 336 Type *Ty = F->getReturnType(); 337 338 bool isABI = (STI.getSmVersion() >= 20); 339 340 if (Ty->getTypeID() == Type::VoidTyID) 341 return; 342 343 O << " ("; 344 345 if (isABI) { 346 if (Ty->isFloatingPointTy() || (Ty->isIntegerTy() && !Ty->isIntegerTy(128))) { 347 unsigned size = 0; 348 if (auto *ITy = dyn_cast<IntegerType>(Ty)) { 349 size = ITy->getBitWidth(); 350 } else { 351 assert(Ty->isFloatingPointTy() && "Floating point type expected here"); 352 size = Ty->getPrimitiveSizeInBits(); 353 } 354 // PTX ABI requires all scalar return values to be at least 32 355 // bits in size. fp16 normally uses .b16 as its storage type in 356 // PTX, so its size must be adjusted here, too. 357 if (size < 32) 358 size = 32; 359 360 O << ".param .b" << size << " func_retval0"; 361 } else if (isa<PointerType>(Ty)) { 362 O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits() 363 << " func_retval0"; 364 } else if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) { 365 unsigned totalsz = DL.getTypeAllocSize(Ty); 366 unsigned retAlignment = 0; 367 if (!getAlign(*F, 0, retAlignment)) 368 retAlignment = DL.getABITypeAlignment(Ty); 369 O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz 370 << "]"; 371 } else 372 llvm_unreachable("Unknown return type"); 373 } else { 374 SmallVector<EVT, 16> vtparts; 375 ComputeValueVTs(*TLI, DL, Ty, vtparts); 376 unsigned idx = 0; 377 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 378 unsigned elems = 1; 379 EVT elemtype = vtparts[i]; 380 if (vtparts[i].isVector()) { 381 elems = vtparts[i].getVectorNumElements(); 382 elemtype = vtparts[i].getVectorElementType(); 383 } 384 385 for (unsigned j = 0, je = elems; j != je; ++j) { 386 unsigned sz = elemtype.getSizeInBits(); 387 if (elemtype.isInteger() && (sz < 32)) 388 sz = 32; 389 O << ".reg .b" << sz << " func_retval" << idx; 390 if (j < je - 1) 391 O << ", "; 392 ++idx; 393 } 394 if (i < e - 1) 395 O << ", "; 396 } 397 } 398 O << ") "; 399 } 400 401 void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF, 402 raw_ostream &O) { 403 const Function &F = MF.getFunction(); 404 printReturnValStr(&F, O); 405 } 406 407 // Return true if MBB is the header of a loop marked with 408 // llvm.loop.unroll.disable. 409 // TODO: consider "#pragma unroll 1" which is equivalent to "#pragma nounroll". 410 bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll( 411 const MachineBasicBlock &MBB) const { 412 MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>(); 413 // We insert .pragma "nounroll" only to the loop header. 414 if (!LI.isLoopHeader(&MBB)) 415 return false; 416 417 // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore, 418 // we iterate through each back edge of the loop with header MBB, and check 419 // whether its metadata contains llvm.loop.unroll.disable. 420 for (auto I = MBB.pred_begin(); I != MBB.pred_end(); ++I) { 421 const MachineBasicBlock *PMBB = *I; 422 if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) { 423 // Edges from other loops to MBB are not back edges. 424 continue; 425 } 426 if (const BasicBlock *PBB = PMBB->getBasicBlock()) { 427 if (MDNode *LoopID = 428 PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) { 429 if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable")) 430 return true; 431 } 432 } 433 } 434 return false; 435 } 436 437 void NVPTXAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) { 438 AsmPrinter::emitBasicBlockStart(MBB); 439 if (isLoopHeaderOfNoUnroll(MBB)) 440 OutStreamer->emitRawText(StringRef("\t.pragma \"nounroll\";\n")); 441 } 442 443 void NVPTXAsmPrinter::emitFunctionEntryLabel() { 444 SmallString<128> Str; 445 raw_svector_ostream O(Str); 446 447 if (!GlobalsEmitted) { 448 emitGlobals(*MF->getFunction().getParent()); 449 GlobalsEmitted = true; 450 } 451 452 // Set up 453 MRI = &MF->getRegInfo(); 454 F = &MF->getFunction(); 455 emitLinkageDirective(F, O); 456 if (isKernelFunction(*F)) 457 O << ".entry "; 458 else { 459 O << ".func "; 460 printReturnValStr(*MF, O); 461 } 462 463 CurrentFnSym->print(O, MAI); 464 465 emitFunctionParamList(*MF, O); 466 467 if (isKernelFunction(*F)) 468 emitKernelFunctionDirectives(*F, O); 469 470 OutStreamer->emitRawText(O.str()); 471 472 VRegMapping.clear(); 473 // Emit open brace for function body. 474 OutStreamer->emitRawText(StringRef("{\n")); 475 setAndEmitFunctionVirtualRegisters(*MF); 476 // Emit initial .loc debug directive for correct relocation symbol data. 477 if (MMI && MMI->hasDebugInfo()) 478 emitInitialRawDwarfLocDirective(*MF); 479 } 480 481 bool NVPTXAsmPrinter::runOnMachineFunction(MachineFunction &F) { 482 bool Result = AsmPrinter::runOnMachineFunction(F); 483 // Emit closing brace for the body of function F. 484 // The closing brace must be emitted here because we need to emit additional 485 // debug labels/data after the last basic block. 486 // We need to emit the closing brace here because we don't have function that 487 // finished emission of the function body. 488 OutStreamer->emitRawText(StringRef("}\n")); 489 return Result; 490 } 491 492 void NVPTXAsmPrinter::emitFunctionBodyStart() { 493 SmallString<128> Str; 494 raw_svector_ostream O(Str); 495 emitDemotedVars(&MF->getFunction(), O); 496 OutStreamer->emitRawText(O.str()); 497 } 498 499 void NVPTXAsmPrinter::emitFunctionBodyEnd() { 500 VRegMapping.clear(); 501 } 502 503 const MCSymbol *NVPTXAsmPrinter::getFunctionFrameSymbol() const { 504 SmallString<128> Str; 505 raw_svector_ostream(Str) << DEPOTNAME << getFunctionNumber(); 506 return OutContext.getOrCreateSymbol(Str); 507 } 508 509 void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const { 510 Register RegNo = MI->getOperand(0).getReg(); 511 if (Register::isVirtualRegister(RegNo)) { 512 OutStreamer->AddComment(Twine("implicit-def: ") + 513 getVirtualRegisterName(RegNo)); 514 } else { 515 const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>(); 516 OutStreamer->AddComment(Twine("implicit-def: ") + 517 STI.getRegisterInfo()->getName(RegNo)); 518 } 519 OutStreamer->AddBlankLine(); 520 } 521 522 void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, 523 raw_ostream &O) const { 524 // If the NVVM IR has some of reqntid* specified, then output 525 // the reqntid directive, and set the unspecified ones to 1. 526 // If none of reqntid* is specified, don't output reqntid directive. 527 unsigned reqntidx, reqntidy, reqntidz; 528 bool specified = false; 529 if (!getReqNTIDx(F, reqntidx)) 530 reqntidx = 1; 531 else 532 specified = true; 533 if (!getReqNTIDy(F, reqntidy)) 534 reqntidy = 1; 535 else 536 specified = true; 537 if (!getReqNTIDz(F, reqntidz)) 538 reqntidz = 1; 539 else 540 specified = true; 541 542 if (specified) 543 O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz 544 << "\n"; 545 546 // If the NVVM IR has some of maxntid* specified, then output 547 // the maxntid directive, and set the unspecified ones to 1. 548 // If none of maxntid* is specified, don't output maxntid directive. 549 unsigned maxntidx, maxntidy, maxntidz; 550 specified = false; 551 if (!getMaxNTIDx(F, maxntidx)) 552 maxntidx = 1; 553 else 554 specified = true; 555 if (!getMaxNTIDy(F, maxntidy)) 556 maxntidy = 1; 557 else 558 specified = true; 559 if (!getMaxNTIDz(F, maxntidz)) 560 maxntidz = 1; 561 else 562 specified = true; 563 564 if (specified) 565 O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz 566 << "\n"; 567 568 unsigned mincta; 569 if (getMinCTASm(F, mincta)) 570 O << ".minnctapersm " << mincta << "\n"; 571 572 unsigned maxnreg; 573 if (getMaxNReg(F, maxnreg)) 574 O << ".maxnreg " << maxnreg << "\n"; 575 } 576 577 std::string 578 NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const { 579 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 580 581 std::string Name; 582 raw_string_ostream NameStr(Name); 583 584 VRegRCMap::const_iterator I = VRegMapping.find(RC); 585 assert(I != VRegMapping.end() && "Bad register class"); 586 const DenseMap<unsigned, unsigned> &RegMap = I->second; 587 588 VRegMap::const_iterator VI = RegMap.find(Reg); 589 assert(VI != RegMap.end() && "Bad virtual register"); 590 unsigned MappedVR = VI->second; 591 592 NameStr << getNVPTXRegClassStr(RC) << MappedVR; 593 594 NameStr.flush(); 595 return Name; 596 } 597 598 void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, 599 raw_ostream &O) { 600 O << getVirtualRegisterName(vr); 601 } 602 603 void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) { 604 emitLinkageDirective(F, O); 605 if (isKernelFunction(*F)) 606 O << ".entry "; 607 else 608 O << ".func "; 609 printReturnValStr(F, O); 610 getSymbol(F)->print(O, MAI); 611 O << "\n"; 612 emitFunctionParamList(F, O); 613 O << ";\n"; 614 } 615 616 static bool usedInGlobalVarDef(const Constant *C) { 617 if (!C) 618 return false; 619 620 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { 621 return GV->getName() != "llvm.used"; 622 } 623 624 for (const User *U : C->users()) 625 if (const Constant *C = dyn_cast<Constant>(U)) 626 if (usedInGlobalVarDef(C)) 627 return true; 628 629 return false; 630 } 631 632 static bool usedInOneFunc(const User *U, Function const *&oneFunc) { 633 if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) { 634 if (othergv->getName() == "llvm.used") 635 return true; 636 } 637 638 if (const Instruction *instr = dyn_cast<Instruction>(U)) { 639 if (instr->getParent() && instr->getParent()->getParent()) { 640 const Function *curFunc = instr->getParent()->getParent(); 641 if (oneFunc && (curFunc != oneFunc)) 642 return false; 643 oneFunc = curFunc; 644 return true; 645 } else 646 return false; 647 } 648 649 for (const User *UU : U->users()) 650 if (!usedInOneFunc(UU, oneFunc)) 651 return false; 652 653 return true; 654 } 655 656 /* Find out if a global variable can be demoted to local scope. 657 * Currently, this is valid for CUDA shared variables, which have local 658 * scope and global lifetime. So the conditions to check are : 659 * 1. Is the global variable in shared address space? 660 * 2. Does it have internal linkage? 661 * 3. Is the global variable referenced only in one function? 662 */ 663 static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) { 664 if (!gv->hasInternalLinkage()) 665 return false; 666 PointerType *Pty = gv->getType(); 667 if (Pty->getAddressSpace() != ADDRESS_SPACE_SHARED) 668 return false; 669 670 const Function *oneFunc = nullptr; 671 672 bool flag = usedInOneFunc(gv, oneFunc); 673 if (!flag) 674 return false; 675 if (!oneFunc) 676 return false; 677 f = oneFunc; 678 return true; 679 } 680 681 static bool useFuncSeen(const Constant *C, 682 DenseMap<const Function *, bool> &seenMap) { 683 for (const User *U : C->users()) { 684 if (const Constant *cu = dyn_cast<Constant>(U)) { 685 if (useFuncSeen(cu, seenMap)) 686 return true; 687 } else if (const Instruction *I = dyn_cast<Instruction>(U)) { 688 const BasicBlock *bb = I->getParent(); 689 if (!bb) 690 continue; 691 const Function *caller = bb->getParent(); 692 if (!caller) 693 continue; 694 if (seenMap.find(caller) != seenMap.end()) 695 return true; 696 } 697 } 698 return false; 699 } 700 701 void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) { 702 DenseMap<const Function *, bool> seenMap; 703 for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) { 704 const Function *F = &*FI; 705 706 if (F->getAttributes().hasFnAttribute("nvptx-libcall-callee")) { 707 emitDeclaration(F, O); 708 continue; 709 } 710 711 if (F->isDeclaration()) { 712 if (F->use_empty()) 713 continue; 714 if (F->getIntrinsicID()) 715 continue; 716 emitDeclaration(F, O); 717 continue; 718 } 719 for (const User *U : F->users()) { 720 if (const Constant *C = dyn_cast<Constant>(U)) { 721 if (usedInGlobalVarDef(C)) { 722 // The use is in the initialization of a global variable 723 // that is a function pointer, so print a declaration 724 // for the original function 725 emitDeclaration(F, O); 726 break; 727 } 728 // Emit a declaration of this function if the function that 729 // uses this constant expr has already been seen. 730 if (useFuncSeen(C, seenMap)) { 731 emitDeclaration(F, O); 732 break; 733 } 734 } 735 736 if (!isa<Instruction>(U)) 737 continue; 738 const Instruction *instr = cast<Instruction>(U); 739 const BasicBlock *bb = instr->getParent(); 740 if (!bb) 741 continue; 742 const Function *caller = bb->getParent(); 743 if (!caller) 744 continue; 745 746 // If a caller has already been seen, then the caller is 747 // appearing in the module before the callee. so print out 748 // a declaration for the callee. 749 if (seenMap.find(caller) != seenMap.end()) { 750 emitDeclaration(F, O); 751 break; 752 } 753 } 754 seenMap[F] = true; 755 } 756 } 757 758 static bool isEmptyXXStructor(GlobalVariable *GV) { 759 if (!GV) return true; 760 const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); 761 if (!InitList) return true; // Not an array; we don't know how to parse. 762 return InitList->getNumOperands() == 0; 763 } 764 765 void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) { 766 // Construct a default subtarget off of the TargetMachine defaults. The 767 // rest of NVPTX isn't friendly to change subtargets per function and 768 // so the default TargetMachine will have all of the options. 769 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM); 770 const auto* STI = static_cast<const NVPTXSubtarget*>(NTM.getSubtargetImpl()); 771 SmallString<128> Str1; 772 raw_svector_ostream OS1(Str1); 773 774 // Emit header before any dwarf directives are emitted below. 775 emitHeader(M, OS1, *STI); 776 OutStreamer->emitRawText(OS1.str()); 777 } 778 779 bool NVPTXAsmPrinter::doInitialization(Module &M) { 780 if (M.alias_size()) { 781 report_fatal_error("Module has aliases, which NVPTX does not support."); 782 return true; // error 783 } 784 if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors"))) { 785 report_fatal_error( 786 "Module has a nontrivial global ctor, which NVPTX does not support."); 787 return true; // error 788 } 789 if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors"))) { 790 report_fatal_error( 791 "Module has a nontrivial global dtor, which NVPTX does not support."); 792 return true; // error 793 } 794 795 // We need to call the parent's one explicitly. 796 bool Result = AsmPrinter::doInitialization(M); 797 798 GlobalsEmitted = false; 799 800 return Result; 801 } 802 803 void NVPTXAsmPrinter::emitGlobals(const Module &M) { 804 SmallString<128> Str2; 805 raw_svector_ostream OS2(Str2); 806 807 emitDeclarations(M, OS2); 808 809 // As ptxas does not support forward references of globals, we need to first 810 // sort the list of module-level globals in def-use order. We visit each 811 // global variable in order, and ensure that we emit it *after* its dependent 812 // globals. We use a little extra memory maintaining both a set and a list to 813 // have fast searches while maintaining a strict ordering. 814 SmallVector<const GlobalVariable *, 8> Globals; 815 DenseSet<const GlobalVariable *> GVVisited; 816 DenseSet<const GlobalVariable *> GVVisiting; 817 818 // Visit each global variable, in order 819 for (const GlobalVariable &I : M.globals()) 820 VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting); 821 822 assert(GVVisited.size() == M.getGlobalList().size() && 823 "Missed a global variable"); 824 assert(GVVisiting.size() == 0 && "Did not fully process a global variable"); 825 826 // Print out module-level global variables in proper order 827 for (unsigned i = 0, e = Globals.size(); i != e; ++i) 828 printModuleLevelGV(Globals[i], OS2); 829 830 OS2 << '\n'; 831 832 OutStreamer->emitRawText(OS2.str()); 833 } 834 835 void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O, 836 const NVPTXSubtarget &STI) { 837 O << "//\n"; 838 O << "// Generated by LLVM NVPTX Back-End\n"; 839 O << "//\n"; 840 O << "\n"; 841 842 unsigned PTXVersion = STI.getPTXVersion(); 843 O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n"; 844 845 O << ".target "; 846 O << STI.getTargetName(); 847 848 const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM); 849 if (NTM.getDrvInterface() == NVPTX::NVCL) 850 O << ", texmode_independent"; 851 852 bool HasFullDebugInfo = false; 853 for (DICompileUnit *CU : M.debug_compile_units()) { 854 switch(CU->getEmissionKind()) { 855 case DICompileUnit::NoDebug: 856 case DICompileUnit::DebugDirectivesOnly: 857 break; 858 case DICompileUnit::LineTablesOnly: 859 case DICompileUnit::FullDebug: 860 HasFullDebugInfo = true; 861 break; 862 } 863 if (HasFullDebugInfo) 864 break; 865 } 866 if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo) 867 O << ", debug"; 868 869 O << "\n"; 870 871 O << ".address_size "; 872 if (NTM.is64Bit()) 873 O << "64"; 874 else 875 O << "32"; 876 O << "\n"; 877 878 O << "\n"; 879 } 880 881 bool NVPTXAsmPrinter::doFinalization(Module &M) { 882 bool HasDebugInfo = MMI && MMI->hasDebugInfo(); 883 884 // If we did not emit any functions, then the global declarations have not 885 // yet been emitted. 886 if (!GlobalsEmitted) { 887 emitGlobals(M); 888 GlobalsEmitted = true; 889 } 890 891 // XXX Temproarily remove global variables so that doFinalization() will not 892 // emit them again (global variables are emitted at beginning). 893 894 Module::GlobalListType &global_list = M.getGlobalList(); 895 int i, n = global_list.size(); 896 GlobalVariable **gv_array = new GlobalVariable *[n]; 897 898 // first, back-up GlobalVariable in gv_array 899 i = 0; 900 for (Module::global_iterator I = global_list.begin(), E = global_list.end(); 901 I != E; ++I) 902 gv_array[i++] = &*I; 903 904 // second, empty global_list 905 while (!global_list.empty()) 906 global_list.remove(global_list.begin()); 907 908 // call doFinalization 909 bool ret = AsmPrinter::doFinalization(M); 910 911 // now we restore global variables 912 for (i = 0; i < n; i++) 913 global_list.insert(global_list.end(), gv_array[i]); 914 915 clearAnnotationCache(&M); 916 917 delete[] gv_array; 918 // Close the last emitted section 919 if (HasDebugInfo) { 920 static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer()) 921 ->closeLastSection(); 922 // Emit empty .debug_loc section for better support of the empty files. 923 OutStreamer->emitRawText("\t.section\t.debug_loc\t{\t}"); 924 } 925 926 // Output last DWARF .file directives, if any. 927 static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer()) 928 ->outputDwarfFileDirectives(); 929 930 return ret; 931 932 //bool Result = AsmPrinter::doFinalization(M); 933 // Instead of calling the parents doFinalization, we may 934 // clone parents doFinalization and customize here. 935 // Currently, we if NVISA out the EmitGlobals() in 936 // parent's doFinalization, which is too intrusive. 937 // 938 // Same for the doInitialization. 939 //return Result; 940 } 941 942 // This function emits appropriate linkage directives for 943 // functions and global variables. 944 // 945 // extern function declaration -> .extern 946 // extern function definition -> .visible 947 // external global variable with init -> .visible 948 // external without init -> .extern 949 // appending -> not allowed, assert. 950 // for any linkage other than 951 // internal, private, linker_private, 952 // linker_private_weak, linker_private_weak_def_auto, 953 // we emit -> .weak. 954 955 void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V, 956 raw_ostream &O) { 957 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) { 958 if (V->hasExternalLinkage()) { 959 if (isa<GlobalVariable>(V)) { 960 const GlobalVariable *GVar = cast<GlobalVariable>(V); 961 if (GVar) { 962 if (GVar->hasInitializer()) 963 O << ".visible "; 964 else 965 O << ".extern "; 966 } 967 } else if (V->isDeclaration()) 968 O << ".extern "; 969 else 970 O << ".visible "; 971 } else if (V->hasAppendingLinkage()) { 972 std::string msg; 973 msg.append("Error: "); 974 msg.append("Symbol "); 975 if (V->hasName()) 976 msg.append(std::string(V->getName())); 977 msg.append("has unsupported appending linkage type"); 978 llvm_unreachable(msg.c_str()); 979 } else if (!V->hasInternalLinkage() && 980 !V->hasPrivateLinkage()) { 981 O << ".weak "; 982 } 983 } 984 } 985 986 void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, 987 raw_ostream &O, 988 bool processDemoted) { 989 // Skip meta data 990 if (GVar->hasSection()) { 991 if (GVar->getSection() == "llvm.metadata") 992 return; 993 } 994 995 // Skip LLVM intrinsic global variables 996 if (GVar->getName().startswith("llvm.") || 997 GVar->getName().startswith("nvvm.")) 998 return; 999 1000 const DataLayout &DL = getDataLayout(); 1001 1002 // GlobalVariables are always constant pointers themselves. 1003 PointerType *PTy = GVar->getType(); 1004 Type *ETy = GVar->getValueType(); 1005 1006 if (GVar->hasExternalLinkage()) { 1007 if (GVar->hasInitializer()) 1008 O << ".visible "; 1009 else 1010 O << ".extern "; 1011 } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() || 1012 GVar->hasAvailableExternallyLinkage() || 1013 GVar->hasCommonLinkage()) { 1014 O << ".weak "; 1015 } 1016 1017 if (isTexture(*GVar)) { 1018 O << ".global .texref " << getTextureName(*GVar) << ";\n"; 1019 return; 1020 } 1021 1022 if (isSurface(*GVar)) { 1023 O << ".global .surfref " << getSurfaceName(*GVar) << ";\n"; 1024 return; 1025 } 1026 1027 if (GVar->isDeclaration()) { 1028 // (extern) declarations, no definition or initializer 1029 // Currently the only known declaration is for an automatic __local 1030 // (.shared) promoted to global. 1031 emitPTXGlobalVariable(GVar, O); 1032 O << ";\n"; 1033 return; 1034 } 1035 1036 if (isSampler(*GVar)) { 1037 O << ".global .samplerref " << getSamplerName(*GVar); 1038 1039 const Constant *Initializer = nullptr; 1040 if (GVar->hasInitializer()) 1041 Initializer = GVar->getInitializer(); 1042 const ConstantInt *CI = nullptr; 1043 if (Initializer) 1044 CI = dyn_cast<ConstantInt>(Initializer); 1045 if (CI) { 1046 unsigned sample = CI->getZExtValue(); 1047 1048 O << " = { "; 1049 1050 for (int i = 0, 1051 addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE); 1052 i < 3; i++) { 1053 O << "addr_mode_" << i << " = "; 1054 switch (addr) { 1055 case 0: 1056 O << "wrap"; 1057 break; 1058 case 1: 1059 O << "clamp_to_border"; 1060 break; 1061 case 2: 1062 O << "clamp_to_edge"; 1063 break; 1064 case 3: 1065 O << "wrap"; 1066 break; 1067 case 4: 1068 O << "mirror"; 1069 break; 1070 } 1071 O << ", "; 1072 } 1073 O << "filter_mode = "; 1074 switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) { 1075 case 0: 1076 O << "nearest"; 1077 break; 1078 case 1: 1079 O << "linear"; 1080 break; 1081 case 2: 1082 llvm_unreachable("Anisotropic filtering is not supported"); 1083 default: 1084 O << "nearest"; 1085 break; 1086 } 1087 if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) { 1088 O << ", force_unnormalized_coords = 1"; 1089 } 1090 O << " }"; 1091 } 1092 1093 O << ";\n"; 1094 return; 1095 } 1096 1097 if (GVar->hasPrivateLinkage()) { 1098 if (strncmp(GVar->getName().data(), "unrollpragma", 12) == 0) 1099 return; 1100 1101 // FIXME - need better way (e.g. Metadata) to avoid generating this global 1102 if (strncmp(GVar->getName().data(), "filename", 8) == 0) 1103 return; 1104 if (GVar->use_empty()) 1105 return; 1106 } 1107 1108 const Function *demotedFunc = nullptr; 1109 if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) { 1110 O << "// " << GVar->getName() << " has been demoted\n"; 1111 if (localDecls.find(demotedFunc) != localDecls.end()) 1112 localDecls[demotedFunc].push_back(GVar); 1113 else { 1114 std::vector<const GlobalVariable *> temp; 1115 temp.push_back(GVar); 1116 localDecls[demotedFunc] = temp; 1117 } 1118 return; 1119 } 1120 1121 O << "."; 1122 emitPTXAddressSpace(PTy->getAddressSpace(), O); 1123 1124 if (isManaged(*GVar)) { 1125 O << " .attribute(.managed)"; 1126 } 1127 1128 if (GVar->getAlignment() == 0) 1129 O << " .align " << (int)DL.getPrefTypeAlignment(ETy); 1130 else 1131 O << " .align " << GVar->getAlignment(); 1132 1133 if (ETy->isFloatingPointTy() || ETy->isPointerTy() || 1134 (ETy->isIntegerTy() && ETy->getScalarSizeInBits() <= 64)) { 1135 O << " ."; 1136 // Special case: ABI requires that we use .u8 for predicates 1137 if (ETy->isIntegerTy(1)) 1138 O << "u8"; 1139 else 1140 O << getPTXFundamentalTypeStr(ETy, false); 1141 O << " "; 1142 getSymbol(GVar)->print(O, MAI); 1143 1144 // Ptx allows variable initilization only for constant and global state 1145 // spaces. 1146 if (GVar->hasInitializer()) { 1147 if ((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) || 1148 (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) { 1149 const Constant *Initializer = GVar->getInitializer(); 1150 // 'undef' is treated as there is no value specified. 1151 if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) { 1152 O << " = "; 1153 printScalarConstant(Initializer, O); 1154 } 1155 } else { 1156 // The frontend adds zero-initializer to device and constant variables 1157 // that don't have an initial value, and UndefValue to shared 1158 // variables, so skip warning for this case. 1159 if (!GVar->getInitializer()->isNullValue() && 1160 !isa<UndefValue>(GVar->getInitializer())) { 1161 report_fatal_error("initial value of '" + GVar->getName() + 1162 "' is not allowed in addrspace(" + 1163 Twine(PTy->getAddressSpace()) + ")"); 1164 } 1165 } 1166 } 1167 } else { 1168 unsigned int ElementSize = 0; 1169 1170 // Although PTX has direct support for struct type and array type and 1171 // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for 1172 // targets that support these high level field accesses. Structs, arrays 1173 // and vectors are lowered into arrays of bytes. 1174 switch (ETy->getTypeID()) { 1175 case Type::IntegerTyID: // Integers larger than 64 bits 1176 case Type::StructTyID: 1177 case Type::ArrayTyID: 1178 case Type::FixedVectorTyID: 1179 ElementSize = DL.getTypeStoreSize(ETy); 1180 // Ptx allows variable initilization only for constant and 1181 // global state spaces. 1182 if (((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) || 1183 (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) && 1184 GVar->hasInitializer()) { 1185 const Constant *Initializer = GVar->getInitializer(); 1186 if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) { 1187 AggBuffer aggBuffer(ElementSize, O, *this); 1188 bufferAggregateConstant(Initializer, &aggBuffer); 1189 if (aggBuffer.numSymbols) { 1190 if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) { 1191 O << " .u64 "; 1192 getSymbol(GVar)->print(O, MAI); 1193 O << "["; 1194 O << ElementSize / 8; 1195 } else { 1196 O << " .u32 "; 1197 getSymbol(GVar)->print(O, MAI); 1198 O << "["; 1199 O << ElementSize / 4; 1200 } 1201 O << "]"; 1202 } else { 1203 O << " .b8 "; 1204 getSymbol(GVar)->print(O, MAI); 1205 O << "["; 1206 O << ElementSize; 1207 O << "]"; 1208 } 1209 O << " = {"; 1210 aggBuffer.print(); 1211 O << "}"; 1212 } else { 1213 O << " .b8 "; 1214 getSymbol(GVar)->print(O, MAI); 1215 if (ElementSize) { 1216 O << "["; 1217 O << ElementSize; 1218 O << "]"; 1219 } 1220 } 1221 } else { 1222 O << " .b8 "; 1223 getSymbol(GVar)->print(O, MAI); 1224 if (ElementSize) { 1225 O << "["; 1226 O << ElementSize; 1227 O << "]"; 1228 } 1229 } 1230 break; 1231 default: 1232 llvm_unreachable("type not supported yet"); 1233 } 1234 } 1235 O << ";\n"; 1236 } 1237 1238 void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) { 1239 if (localDecls.find(f) == localDecls.end()) 1240 return; 1241 1242 std::vector<const GlobalVariable *> &gvars = localDecls[f]; 1243 1244 for (unsigned i = 0, e = gvars.size(); i != e; ++i) { 1245 O << "\t// demoted variable\n\t"; 1246 printModuleLevelGV(gvars[i], O, true); 1247 } 1248 } 1249 1250 void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace, 1251 raw_ostream &O) const { 1252 switch (AddressSpace) { 1253 case ADDRESS_SPACE_LOCAL: 1254 O << "local"; 1255 break; 1256 case ADDRESS_SPACE_GLOBAL: 1257 O << "global"; 1258 break; 1259 case ADDRESS_SPACE_CONST: 1260 O << "const"; 1261 break; 1262 case ADDRESS_SPACE_SHARED: 1263 O << "shared"; 1264 break; 1265 default: 1266 report_fatal_error("Bad address space found while emitting PTX: " + 1267 llvm::Twine(AddressSpace)); 1268 break; 1269 } 1270 } 1271 1272 std::string 1273 NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const { 1274 switch (Ty->getTypeID()) { 1275 case Type::IntegerTyID: { 1276 unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); 1277 if (NumBits == 1) 1278 return "pred"; 1279 else if (NumBits <= 64) { 1280 std::string name = "u"; 1281 return name + utostr(NumBits); 1282 } else { 1283 llvm_unreachable("Integer too large"); 1284 break; 1285 } 1286 break; 1287 } 1288 case Type::HalfTyID: 1289 // fp16 is stored as .b16 for compatibility with pre-sm_53 PTX assembly. 1290 return "b16"; 1291 case Type::FloatTyID: 1292 return "f32"; 1293 case Type::DoubleTyID: 1294 return "f64"; 1295 case Type::PointerTyID: 1296 if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) 1297 if (useB4PTR) 1298 return "b64"; 1299 else 1300 return "u64"; 1301 else if (useB4PTR) 1302 return "b32"; 1303 else 1304 return "u32"; 1305 default: 1306 break; 1307 } 1308 llvm_unreachable("unexpected type"); 1309 } 1310 1311 void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, 1312 raw_ostream &O) { 1313 const DataLayout &DL = getDataLayout(); 1314 1315 // GlobalVariables are always constant pointers themselves. 1316 Type *ETy = GVar->getValueType(); 1317 1318 O << "."; 1319 emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O); 1320 if (GVar->getAlignment() == 0) 1321 O << " .align " << (int)DL.getPrefTypeAlignment(ETy); 1322 else 1323 O << " .align " << GVar->getAlignment(); 1324 1325 // Special case for i128 1326 if (ETy->isIntegerTy(128)) { 1327 O << " .b8 "; 1328 getSymbol(GVar)->print(O, MAI); 1329 O << "[16]"; 1330 return; 1331 } 1332 1333 if (ETy->isFloatingPointTy() || ETy->isIntOrPtrTy()) { 1334 O << " ."; 1335 O << getPTXFundamentalTypeStr(ETy); 1336 O << " "; 1337 getSymbol(GVar)->print(O, MAI); 1338 return; 1339 } 1340 1341 int64_t ElementSize = 0; 1342 1343 // Although PTX has direct support for struct type and array type and LLVM IR 1344 // is very similar to PTX, the LLVM CodeGen does not support for targets that 1345 // support these high level field accesses. Structs and arrays are lowered 1346 // into arrays of bytes. 1347 switch (ETy->getTypeID()) { 1348 case Type::StructTyID: 1349 case Type::ArrayTyID: 1350 case Type::FixedVectorTyID: 1351 ElementSize = DL.getTypeStoreSize(ETy); 1352 O << " .b8 "; 1353 getSymbol(GVar)->print(O, MAI); 1354 O << "["; 1355 if (ElementSize) { 1356 O << ElementSize; 1357 } 1358 O << "]"; 1359 break; 1360 default: 1361 llvm_unreachable("type not supported yet"); 1362 } 1363 } 1364 1365 static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty) { 1366 if (Ty->isSingleValueType()) 1367 return DL.getPrefTypeAlignment(Ty); 1368 1369 auto *ATy = dyn_cast<ArrayType>(Ty); 1370 if (ATy) 1371 return getOpenCLAlignment(DL, ATy->getElementType()); 1372 1373 auto *STy = dyn_cast<StructType>(Ty); 1374 if (STy) { 1375 unsigned int alignStruct = 1; 1376 // Go through each element of the struct and find the 1377 // largest alignment. 1378 for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) { 1379 Type *ETy = STy->getElementType(i); 1380 unsigned int align = getOpenCLAlignment(DL, ETy); 1381 if (align > alignStruct) 1382 alignStruct = align; 1383 } 1384 return alignStruct; 1385 } 1386 1387 auto *FTy = dyn_cast<FunctionType>(Ty); 1388 if (FTy) 1389 return DL.getPointerPrefAlignment().value(); 1390 return DL.getPrefTypeAlignment(Ty); 1391 } 1392 1393 void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I, 1394 int paramIndex, raw_ostream &O) { 1395 getSymbol(I->getParent())->print(O, MAI); 1396 O << "_param_" << paramIndex; 1397 } 1398 1399 void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { 1400 const DataLayout &DL = getDataLayout(); 1401 const AttributeList &PAL = F->getAttributes(); 1402 const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F); 1403 const TargetLowering *TLI = STI.getTargetLowering(); 1404 Function::const_arg_iterator I, E; 1405 unsigned paramIndex = 0; 1406 bool first = true; 1407 bool isKernelFunc = isKernelFunction(*F); 1408 bool isABI = (STI.getSmVersion() >= 20); 1409 bool hasImageHandles = STI.hasImageHandles(); 1410 MVT thePointerTy = TLI->getPointerTy(DL); 1411 1412 if (F->arg_empty()) { 1413 O << "()\n"; 1414 return; 1415 } 1416 1417 O << "(\n"; 1418 1419 for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) { 1420 Type *Ty = I->getType(); 1421 1422 if (!first) 1423 O << ",\n"; 1424 1425 first = false; 1426 1427 // Handle image/sampler parameters 1428 if (isKernelFunction(*F)) { 1429 if (isSampler(*I) || isImage(*I)) { 1430 if (isImage(*I)) { 1431 std::string sname = std::string(I->getName()); 1432 if (isImageWriteOnly(*I) || isImageReadWrite(*I)) { 1433 if (hasImageHandles) 1434 O << "\t.param .u64 .ptr .surfref "; 1435 else 1436 O << "\t.param .surfref "; 1437 CurrentFnSym->print(O, MAI); 1438 O << "_param_" << paramIndex; 1439 } 1440 else { // Default image is read_only 1441 if (hasImageHandles) 1442 O << "\t.param .u64 .ptr .texref "; 1443 else 1444 O << "\t.param .texref "; 1445 CurrentFnSym->print(O, MAI); 1446 O << "_param_" << paramIndex; 1447 } 1448 } else { 1449 if (hasImageHandles) 1450 O << "\t.param .u64 .ptr .samplerref "; 1451 else 1452 O << "\t.param .samplerref "; 1453 CurrentFnSym->print(O, MAI); 1454 O << "_param_" << paramIndex; 1455 } 1456 continue; 1457 } 1458 } 1459 1460 if (!PAL.hasParamAttribute(paramIndex, Attribute::ByVal)) { 1461 if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) { 1462 // Just print .param .align <a> .b8 .param[size]; 1463 // <a> = PAL.getparamalignment 1464 // size = typeallocsize of element type 1465 const Align align = DL.getValueOrABITypeAlignment( 1466 PAL.getParamAlignment(paramIndex), Ty); 1467 1468 unsigned sz = DL.getTypeAllocSize(Ty); 1469 O << "\t.param .align " << align.value() << " .b8 "; 1470 printParamName(I, paramIndex, O); 1471 O << "[" << sz << "]"; 1472 1473 continue; 1474 } 1475 // Just a scalar 1476 auto *PTy = dyn_cast<PointerType>(Ty); 1477 if (isKernelFunc) { 1478 if (PTy) { 1479 // Special handling for pointer arguments to kernel 1480 O << "\t.param .u" << thePointerTy.getSizeInBits() << " "; 1481 1482 if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() != 1483 NVPTX::CUDA) { 1484 Type *ETy = PTy->getElementType(); 1485 int addrSpace = PTy->getAddressSpace(); 1486 switch (addrSpace) { 1487 default: 1488 O << ".ptr "; 1489 break; 1490 case ADDRESS_SPACE_CONST: 1491 O << ".ptr .const "; 1492 break; 1493 case ADDRESS_SPACE_SHARED: 1494 O << ".ptr .shared "; 1495 break; 1496 case ADDRESS_SPACE_GLOBAL: 1497 O << ".ptr .global "; 1498 break; 1499 } 1500 O << ".align " << (int)getOpenCLAlignment(DL, ETy) << " "; 1501 } 1502 printParamName(I, paramIndex, O); 1503 continue; 1504 } 1505 1506 // non-pointer scalar to kernel func 1507 O << "\t.param ."; 1508 // Special case: predicate operands become .u8 types 1509 if (Ty->isIntegerTy(1)) 1510 O << "u8"; 1511 else 1512 O << getPTXFundamentalTypeStr(Ty); 1513 O << " "; 1514 printParamName(I, paramIndex, O); 1515 continue; 1516 } 1517 // Non-kernel function, just print .param .b<size> for ABI 1518 // and .reg .b<size> for non-ABI 1519 unsigned sz = 0; 1520 if (isa<IntegerType>(Ty)) { 1521 sz = cast<IntegerType>(Ty)->getBitWidth(); 1522 if (sz < 32) 1523 sz = 32; 1524 } else if (isa<PointerType>(Ty)) 1525 sz = thePointerTy.getSizeInBits(); 1526 else if (Ty->isHalfTy()) 1527 // PTX ABI requires all scalar parameters to be at least 32 1528 // bits in size. fp16 normally uses .b16 as its storage type 1529 // in PTX, so its size must be adjusted here, too. 1530 sz = 32; 1531 else 1532 sz = Ty->getPrimitiveSizeInBits(); 1533 if (isABI) 1534 O << "\t.param .b" << sz << " "; 1535 else 1536 O << "\t.reg .b" << sz << " "; 1537 printParamName(I, paramIndex, O); 1538 continue; 1539 } 1540 1541 // param has byVal attribute. So should be a pointer 1542 auto *PTy = dyn_cast<PointerType>(Ty); 1543 assert(PTy && "Param with byval attribute should be a pointer type"); 1544 Type *ETy = PTy->getElementType(); 1545 1546 if (isABI || isKernelFunc) { 1547 // Just print .param .align <a> .b8 .param[size]; 1548 // <a> = PAL.getparamalignment 1549 // size = typeallocsize of element type 1550 Align align = 1551 DL.getValueOrABITypeAlignment(PAL.getParamAlignment(paramIndex), ETy); 1552 // Work around a bug in ptxas. When PTX code takes address of 1553 // byval parameter with alignment < 4, ptxas generates code to 1554 // spill argument into memory. Alas on sm_50+ ptxas generates 1555 // SASS code that fails with misaligned access. To work around 1556 // the problem, make sure that we align byval parameters by at 1557 // least 4. Matching change must be made in LowerCall() where we 1558 // prepare parameters for the call. 1559 // 1560 // TODO: this will need to be undone when we get to support multi-TU 1561 // device-side compilation as it breaks ABI compatibility with nvcc. 1562 // Hopefully ptxas bug is fixed by then. 1563 if (!isKernelFunc && align < Align(4)) 1564 align = Align(4); 1565 unsigned sz = DL.getTypeAllocSize(ETy); 1566 O << "\t.param .align " << align.value() << " .b8 "; 1567 printParamName(I, paramIndex, O); 1568 O << "[" << sz << "]"; 1569 continue; 1570 } else { 1571 // Split the ETy into constituent parts and 1572 // print .param .b<size> <name> for each part. 1573 // Further, if a part is vector, print the above for 1574 // each vector element. 1575 SmallVector<EVT, 16> vtparts; 1576 ComputeValueVTs(*TLI, DL, ETy, vtparts); 1577 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 1578 unsigned elems = 1; 1579 EVT elemtype = vtparts[i]; 1580 if (vtparts[i].isVector()) { 1581 elems = vtparts[i].getVectorNumElements(); 1582 elemtype = vtparts[i].getVectorElementType(); 1583 } 1584 1585 for (unsigned j = 0, je = elems; j != je; ++j) { 1586 unsigned sz = elemtype.getSizeInBits(); 1587 if (elemtype.isInteger() && (sz < 32)) 1588 sz = 32; 1589 O << "\t.reg .b" << sz << " "; 1590 printParamName(I, paramIndex, O); 1591 if (j < je - 1) 1592 O << ",\n"; 1593 ++paramIndex; 1594 } 1595 if (i < e - 1) 1596 O << ",\n"; 1597 } 1598 --paramIndex; 1599 continue; 1600 } 1601 } 1602 1603 O << "\n)\n"; 1604 } 1605 1606 void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF, 1607 raw_ostream &O) { 1608 const Function &F = MF.getFunction(); 1609 emitFunctionParamList(&F, O); 1610 } 1611 1612 void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( 1613 const MachineFunction &MF) { 1614 SmallString<128> Str; 1615 raw_svector_ostream O(Str); 1616 1617 // Map the global virtual register number to a register class specific 1618 // virtual register number starting from 1 with that class. 1619 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 1620 //unsigned numRegClasses = TRI->getNumRegClasses(); 1621 1622 // Emit the Fake Stack Object 1623 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1624 int NumBytes = (int) MFI.getStackSize(); 1625 if (NumBytes) { 1626 O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t" 1627 << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n"; 1628 if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) { 1629 O << "\t.reg .b64 \t%SP;\n"; 1630 O << "\t.reg .b64 \t%SPL;\n"; 1631 } else { 1632 O << "\t.reg .b32 \t%SP;\n"; 1633 O << "\t.reg .b32 \t%SPL;\n"; 1634 } 1635 } 1636 1637 // Go through all virtual registers to establish the mapping between the 1638 // global virtual 1639 // register number and the per class virtual register number. 1640 // We use the per class virtual register number in the ptx output. 1641 unsigned int numVRs = MRI->getNumVirtRegs(); 1642 for (unsigned i = 0; i < numVRs; i++) { 1643 unsigned int vr = Register::index2VirtReg(i); 1644 const TargetRegisterClass *RC = MRI->getRegClass(vr); 1645 DenseMap<unsigned, unsigned> ®map = VRegMapping[RC]; 1646 int n = regmap.size(); 1647 regmap.insert(std::make_pair(vr, n + 1)); 1648 } 1649 1650 // Emit register declarations 1651 // @TODO: Extract out the real register usage 1652 // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n"; 1653 // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n"; 1654 // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n"; 1655 // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n"; 1656 // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n"; 1657 // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n"; 1658 // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n"; 1659 1660 // Emit declaration of the virtual registers or 'physical' registers for 1661 // each register class 1662 for (unsigned i=0; i< TRI->getNumRegClasses(); i++) { 1663 const TargetRegisterClass *RC = TRI->getRegClass(i); 1664 DenseMap<unsigned, unsigned> ®map = VRegMapping[RC]; 1665 std::string rcname = getNVPTXRegClassName(RC); 1666 std::string rcStr = getNVPTXRegClassStr(RC); 1667 int n = regmap.size(); 1668 1669 // Only declare those registers that may be used. 1670 if (n) { 1671 O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1) 1672 << ">;\n"; 1673 } 1674 } 1675 1676 OutStreamer->emitRawText(O.str()); 1677 } 1678 1679 void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) { 1680 APFloat APF = APFloat(Fp->getValueAPF()); // make a copy 1681 bool ignored; 1682 unsigned int numHex; 1683 const char *lead; 1684 1685 if (Fp->getType()->getTypeID() == Type::FloatTyID) { 1686 numHex = 8; 1687 lead = "0f"; 1688 APF.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &ignored); 1689 } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) { 1690 numHex = 16; 1691 lead = "0d"; 1692 APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &ignored); 1693 } else 1694 llvm_unreachable("unsupported fp type"); 1695 1696 APInt API = APF.bitcastToAPInt(); 1697 O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true); 1698 } 1699 1700 void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) { 1701 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) { 1702 O << CI->getValue(); 1703 return; 1704 } 1705 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) { 1706 printFPConstant(CFP, O); 1707 return; 1708 } 1709 if (isa<ConstantPointerNull>(CPV)) { 1710 O << "0"; 1711 return; 1712 } 1713 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 1714 bool IsNonGenericPointer = false; 1715 if (GVar->getType()->getAddressSpace() != 0) { 1716 IsNonGenericPointer = true; 1717 } 1718 if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) { 1719 O << "generic("; 1720 getSymbol(GVar)->print(O, MAI); 1721 O << ")"; 1722 } else { 1723 getSymbol(GVar)->print(O, MAI); 1724 } 1725 return; 1726 } 1727 if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1728 const Value *v = Cexpr->stripPointerCasts(); 1729 PointerType *PTy = dyn_cast<PointerType>(Cexpr->getType()); 1730 bool IsNonGenericPointer = false; 1731 if (PTy && PTy->getAddressSpace() != 0) { 1732 IsNonGenericPointer = true; 1733 } 1734 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { 1735 if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) { 1736 O << "generic("; 1737 getSymbol(GVar)->print(O, MAI); 1738 O << ")"; 1739 } else { 1740 getSymbol(GVar)->print(O, MAI); 1741 } 1742 return; 1743 } else { 1744 lowerConstant(CPV)->print(O, MAI); 1745 return; 1746 } 1747 } 1748 llvm_unreachable("Not scalar type found in printScalarConstant()"); 1749 } 1750 1751 // These utility functions assure we get the right sequence of bytes for a given 1752 // type even for big-endian machines 1753 template <typename T> static void ConvertIntToBytes(unsigned char *p, T val) { 1754 int64_t vp = (int64_t)val; 1755 for (unsigned i = 0; i < sizeof(T); ++i) { 1756 p[i] = (unsigned char)vp; 1757 vp >>= 8; 1758 } 1759 } 1760 static void ConvertFloatToBytes(unsigned char *p, float val) { 1761 int32_t *vp = (int32_t *)&val; 1762 for (unsigned i = 0; i < sizeof(int32_t); ++i) { 1763 p[i] = (unsigned char)*vp; 1764 *vp >>= 8; 1765 } 1766 } 1767 static void ConvertDoubleToBytes(unsigned char *p, double val) { 1768 int64_t *vp = (int64_t *)&val; 1769 for (unsigned i = 0; i < sizeof(int64_t); ++i) { 1770 p[i] = (unsigned char)*vp; 1771 *vp >>= 8; 1772 } 1773 } 1774 1775 void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, 1776 AggBuffer *aggBuffer) { 1777 const DataLayout &DL = getDataLayout(); 1778 1779 if (isa<UndefValue>(CPV) || CPV->isNullValue()) { 1780 int s = DL.getTypeAllocSize(CPV->getType()); 1781 if (s < Bytes) 1782 s = Bytes; 1783 aggBuffer->addZeros(s); 1784 return; 1785 } 1786 1787 unsigned char ptr[8]; 1788 switch (CPV->getType()->getTypeID()) { 1789 1790 case Type::IntegerTyID: { 1791 Type *ETy = CPV->getType(); 1792 if (ETy == Type::getInt8Ty(CPV->getContext())) { 1793 unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue(); 1794 ConvertIntToBytes<>(ptr, c); 1795 aggBuffer->addBytes(ptr, 1, Bytes); 1796 } else if (ETy == Type::getInt16Ty(CPV->getContext())) { 1797 short int16 = (short)cast<ConstantInt>(CPV)->getZExtValue(); 1798 ConvertIntToBytes<>(ptr, int16); 1799 aggBuffer->addBytes(ptr, 2, Bytes); 1800 } else if (ETy == Type::getInt32Ty(CPV->getContext())) { 1801 if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 1802 int int32 = (int)(constInt->getZExtValue()); 1803 ConvertIntToBytes<>(ptr, int32); 1804 aggBuffer->addBytes(ptr, 4, Bytes); 1805 break; 1806 } else if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1807 if (const auto *constInt = dyn_cast<ConstantInt>( 1808 ConstantFoldConstant(Cexpr, DL))) { 1809 int int32 = (int)(constInt->getZExtValue()); 1810 ConvertIntToBytes<>(ptr, int32); 1811 aggBuffer->addBytes(ptr, 4, Bytes); 1812 break; 1813 } 1814 if (Cexpr->getOpcode() == Instruction::PtrToInt) { 1815 Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 1816 aggBuffer->addSymbol(v, Cexpr->getOperand(0)); 1817 aggBuffer->addZeros(4); 1818 break; 1819 } 1820 } 1821 llvm_unreachable("unsupported integer const type"); 1822 } else if (ETy == Type::getInt64Ty(CPV->getContext())) { 1823 if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) { 1824 long long int64 = (long long)(constInt->getZExtValue()); 1825 ConvertIntToBytes<>(ptr, int64); 1826 aggBuffer->addBytes(ptr, 8, Bytes); 1827 break; 1828 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1829 if (const auto *constInt = dyn_cast<ConstantInt>( 1830 ConstantFoldConstant(Cexpr, DL))) { 1831 long long int64 = (long long)(constInt->getZExtValue()); 1832 ConvertIntToBytes<>(ptr, int64); 1833 aggBuffer->addBytes(ptr, 8, Bytes); 1834 break; 1835 } 1836 if (Cexpr->getOpcode() == Instruction::PtrToInt) { 1837 Value *v = Cexpr->getOperand(0)->stripPointerCasts(); 1838 aggBuffer->addSymbol(v, Cexpr->getOperand(0)); 1839 aggBuffer->addZeros(8); 1840 break; 1841 } 1842 } 1843 llvm_unreachable("unsupported integer const type"); 1844 } else 1845 llvm_unreachable("unsupported integer const type"); 1846 break; 1847 } 1848 case Type::HalfTyID: 1849 case Type::FloatTyID: 1850 case Type::DoubleTyID: { 1851 const auto *CFP = cast<ConstantFP>(CPV); 1852 Type *Ty = CFP->getType(); 1853 if (Ty == Type::getHalfTy(CPV->getContext())) { 1854 APInt API = CFP->getValueAPF().bitcastToAPInt(); 1855 uint16_t float16 = API.getLoBits(16).getZExtValue(); 1856 ConvertIntToBytes<>(ptr, float16); 1857 aggBuffer->addBytes(ptr, 2, Bytes); 1858 } else if (Ty == Type::getFloatTy(CPV->getContext())) { 1859 float float32 = (float) CFP->getValueAPF().convertToFloat(); 1860 ConvertFloatToBytes(ptr, float32); 1861 aggBuffer->addBytes(ptr, 4, Bytes); 1862 } else if (Ty == Type::getDoubleTy(CPV->getContext())) { 1863 double float64 = CFP->getValueAPF().convertToDouble(); 1864 ConvertDoubleToBytes(ptr, float64); 1865 aggBuffer->addBytes(ptr, 8, Bytes); 1866 } else { 1867 llvm_unreachable("unsupported fp const type"); 1868 } 1869 break; 1870 } 1871 case Type::PointerTyID: { 1872 if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 1873 aggBuffer->addSymbol(GVar, GVar); 1874 } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1875 const Value *v = Cexpr->stripPointerCasts(); 1876 aggBuffer->addSymbol(v, Cexpr); 1877 } 1878 unsigned int s = DL.getTypeAllocSize(CPV->getType()); 1879 aggBuffer->addZeros(s); 1880 break; 1881 } 1882 1883 case Type::ArrayTyID: 1884 case Type::FixedVectorTyID: 1885 case Type::StructTyID: { 1886 if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) { 1887 int ElementSize = DL.getTypeAllocSize(CPV->getType()); 1888 bufferAggregateConstant(CPV, aggBuffer); 1889 if (Bytes > ElementSize) 1890 aggBuffer->addZeros(Bytes - ElementSize); 1891 } else if (isa<ConstantAggregateZero>(CPV)) 1892 aggBuffer->addZeros(Bytes); 1893 else 1894 llvm_unreachable("Unexpected Constant type"); 1895 break; 1896 } 1897 1898 default: 1899 llvm_unreachable("unsupported type"); 1900 } 1901 } 1902 1903 void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV, 1904 AggBuffer *aggBuffer) { 1905 const DataLayout &DL = getDataLayout(); 1906 int Bytes; 1907 1908 // Integers of arbitrary width 1909 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) { 1910 APInt Val = CI->getValue(); 1911 for (unsigned I = 0, E = DL.getTypeAllocSize(CPV->getType()); I < E; ++I) { 1912 uint8_t Byte = Val.getLoBits(8).getZExtValue(); 1913 aggBuffer->addBytes(&Byte, 1, 1); 1914 Val.lshrInPlace(8); 1915 } 1916 return; 1917 } 1918 1919 // Old constants 1920 if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) { 1921 if (CPV->getNumOperands()) 1922 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) 1923 bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer); 1924 return; 1925 } 1926 1927 if (const ConstantDataSequential *CDS = 1928 dyn_cast<ConstantDataSequential>(CPV)) { 1929 if (CDS->getNumElements()) 1930 for (unsigned i = 0; i < CDS->getNumElements(); ++i) 1931 bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0, 1932 aggBuffer); 1933 return; 1934 } 1935 1936 if (isa<ConstantStruct>(CPV)) { 1937 if (CPV->getNumOperands()) { 1938 StructType *ST = cast<StructType>(CPV->getType()); 1939 for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) { 1940 if (i == (e - 1)) 1941 Bytes = DL.getStructLayout(ST)->getElementOffset(0) + 1942 DL.getTypeAllocSize(ST) - 1943 DL.getStructLayout(ST)->getElementOffset(i); 1944 else 1945 Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) - 1946 DL.getStructLayout(ST)->getElementOffset(i); 1947 bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer); 1948 } 1949 } 1950 return; 1951 } 1952 llvm_unreachable("unsupported constant type in printAggregateConstant()"); 1953 } 1954 1955 /// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly 1956 /// a copy from AsmPrinter::lowerConstant, except customized to only handle 1957 /// expressions that are representable in PTX and create 1958 /// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions. 1959 const MCExpr * 1960 NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) { 1961 MCContext &Ctx = OutContext; 1962 1963 if (CV->isNullValue() || isa<UndefValue>(CV)) 1964 return MCConstantExpr::create(0, Ctx); 1965 1966 if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) 1967 return MCConstantExpr::create(CI->getZExtValue(), Ctx); 1968 1969 if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) { 1970 const MCSymbolRefExpr *Expr = 1971 MCSymbolRefExpr::create(getSymbol(GV), Ctx); 1972 if (ProcessingGeneric) { 1973 return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx); 1974 } else { 1975 return Expr; 1976 } 1977 } 1978 1979 const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); 1980 if (!CE) { 1981 llvm_unreachable("Unknown constant value to lower!"); 1982 } 1983 1984 switch (CE->getOpcode()) { 1985 default: { 1986 // If the code isn't optimized, there may be outstanding folding 1987 // opportunities. Attempt to fold the expression using DataLayout as a 1988 // last resort before giving up. 1989 Constant *C = ConstantFoldConstant(CE, getDataLayout()); 1990 if (C != CE) 1991 return lowerConstantForGV(C, ProcessingGeneric); 1992 1993 // Otherwise report the problem to the user. 1994 std::string S; 1995 raw_string_ostream OS(S); 1996 OS << "Unsupported expression in static initializer: "; 1997 CE->printAsOperand(OS, /*PrintType=*/false, 1998 !MF ? nullptr : MF->getFunction().getParent()); 1999 report_fatal_error(OS.str()); 2000 } 2001 2002 case Instruction::AddrSpaceCast: { 2003 // Strip the addrspacecast and pass along the operand 2004 PointerType *DstTy = cast<PointerType>(CE->getType()); 2005 if (DstTy->getAddressSpace() == 0) { 2006 return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true); 2007 } 2008 std::string S; 2009 raw_string_ostream OS(S); 2010 OS << "Unsupported expression in static initializer: "; 2011 CE->printAsOperand(OS, /*PrintType=*/ false, 2012 !MF ? nullptr : MF->getFunction().getParent()); 2013 report_fatal_error(OS.str()); 2014 } 2015 2016 case Instruction::GetElementPtr: { 2017 const DataLayout &DL = getDataLayout(); 2018 2019 // Generate a symbolic expression for the byte address 2020 APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0); 2021 cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI); 2022 2023 const MCExpr *Base = lowerConstantForGV(CE->getOperand(0), 2024 ProcessingGeneric); 2025 if (!OffsetAI) 2026 return Base; 2027 2028 int64_t Offset = OffsetAI.getSExtValue(); 2029 return MCBinaryExpr::createAdd(Base, MCConstantExpr::create(Offset, Ctx), 2030 Ctx); 2031 } 2032 2033 case Instruction::Trunc: 2034 // We emit the value and depend on the assembler to truncate the generated 2035 // expression properly. This is important for differences between 2036 // blockaddress labels. Since the two labels are in the same function, it 2037 // is reasonable to treat their delta as a 32-bit value. 2038 LLVM_FALLTHROUGH; 2039 case Instruction::BitCast: 2040 return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric); 2041 2042 case Instruction::IntToPtr: { 2043 const DataLayout &DL = getDataLayout(); 2044 2045 // Handle casts to pointers by changing them into casts to the appropriate 2046 // integer type. This promotes constant folding and simplifies this code. 2047 Constant *Op = CE->getOperand(0); 2048 Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()), 2049 false/*ZExt*/); 2050 return lowerConstantForGV(Op, ProcessingGeneric); 2051 } 2052 2053 case Instruction::PtrToInt: { 2054 const DataLayout &DL = getDataLayout(); 2055 2056 // Support only foldable casts to/from pointers that can be eliminated by 2057 // changing the pointer to the appropriately sized integer type. 2058 Constant *Op = CE->getOperand(0); 2059 Type *Ty = CE->getType(); 2060 2061 const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric); 2062 2063 // We can emit the pointer value into this slot if the slot is an 2064 // integer slot equal to the size of the pointer. 2065 if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType())) 2066 return OpExpr; 2067 2068 // Otherwise the pointer is smaller than the resultant integer, mask off 2069 // the high bits so we are sure to get a proper truncation if the input is 2070 // a constant expr. 2071 unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType()); 2072 const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx); 2073 return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx); 2074 } 2075 2076 // The MC library also has a right-shift operator, but it isn't consistently 2077 // signed or unsigned between different targets. 2078 case Instruction::Add: { 2079 const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric); 2080 const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric); 2081 switch (CE->getOpcode()) { 2082 default: llvm_unreachable("Unknown binary operator constant cast expr"); 2083 case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx); 2084 } 2085 } 2086 } 2087 } 2088 2089 // Copy of MCExpr::print customized for NVPTX 2090 void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) { 2091 switch (Expr.getKind()) { 2092 case MCExpr::Target: 2093 return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI); 2094 case MCExpr::Constant: 2095 OS << cast<MCConstantExpr>(Expr).getValue(); 2096 return; 2097 2098 case MCExpr::SymbolRef: { 2099 const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr); 2100 const MCSymbol &Sym = SRE.getSymbol(); 2101 Sym.print(OS, MAI); 2102 return; 2103 } 2104 2105 case MCExpr::Unary: { 2106 const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr); 2107 switch (UE.getOpcode()) { 2108 case MCUnaryExpr::LNot: OS << '!'; break; 2109 case MCUnaryExpr::Minus: OS << '-'; break; 2110 case MCUnaryExpr::Not: OS << '~'; break; 2111 case MCUnaryExpr::Plus: OS << '+'; break; 2112 } 2113 printMCExpr(*UE.getSubExpr(), OS); 2114 return; 2115 } 2116 2117 case MCExpr::Binary: { 2118 const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr); 2119 2120 // Only print parens around the LHS if it is non-trivial. 2121 if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) || 2122 isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) { 2123 printMCExpr(*BE.getLHS(), OS); 2124 } else { 2125 OS << '('; 2126 printMCExpr(*BE.getLHS(), OS); 2127 OS<< ')'; 2128 } 2129 2130 switch (BE.getOpcode()) { 2131 case MCBinaryExpr::Add: 2132 // Print "X-42" instead of "X+-42". 2133 if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) { 2134 if (RHSC->getValue() < 0) { 2135 OS << RHSC->getValue(); 2136 return; 2137 } 2138 } 2139 2140 OS << '+'; 2141 break; 2142 default: llvm_unreachable("Unhandled binary operator"); 2143 } 2144 2145 // Only print parens around the LHS if it is non-trivial. 2146 if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) { 2147 printMCExpr(*BE.getRHS(), OS); 2148 } else { 2149 OS << '('; 2150 printMCExpr(*BE.getRHS(), OS); 2151 OS << ')'; 2152 } 2153 return; 2154 } 2155 } 2156 2157 llvm_unreachable("Invalid expression kind!"); 2158 } 2159 2160 /// PrintAsmOperand - Print out an operand for an inline asm expression. 2161 /// 2162 bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, 2163 const char *ExtraCode, raw_ostream &O) { 2164 if (ExtraCode && ExtraCode[0]) { 2165 if (ExtraCode[1] != 0) 2166 return true; // Unknown modifier. 2167 2168 switch (ExtraCode[0]) { 2169 default: 2170 // See if this is a generic print operand 2171 return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O); 2172 case 'r': 2173 break; 2174 } 2175 } 2176 2177 printOperand(MI, OpNo, O); 2178 2179 return false; 2180 } 2181 2182 bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, 2183 unsigned OpNo, 2184 const char *ExtraCode, 2185 raw_ostream &O) { 2186 if (ExtraCode && ExtraCode[0]) 2187 return true; // Unknown modifier 2188 2189 O << '['; 2190 printMemOperand(MI, OpNo, O); 2191 O << ']'; 2192 2193 return false; 2194 } 2195 2196 void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, 2197 raw_ostream &O) { 2198 const MachineOperand &MO = MI->getOperand(opNum); 2199 switch (MO.getType()) { 2200 case MachineOperand::MO_Register: 2201 if (Register::isPhysicalRegister(MO.getReg())) { 2202 if (MO.getReg() == NVPTX::VRDepot) 2203 O << DEPOTNAME << getFunctionNumber(); 2204 else 2205 O << NVPTXInstPrinter::getRegisterName(MO.getReg()); 2206 } else { 2207 emitVirtualRegister(MO.getReg(), O); 2208 } 2209 break; 2210 2211 case MachineOperand::MO_Immediate: 2212 O << MO.getImm(); 2213 break; 2214 2215 case MachineOperand::MO_FPImmediate: 2216 printFPConstant(MO.getFPImm(), O); 2217 break; 2218 2219 case MachineOperand::MO_GlobalAddress: 2220 PrintSymbolOperand(MO, O); 2221 break; 2222 2223 case MachineOperand::MO_MachineBasicBlock: 2224 MO.getMBB()->getSymbol()->print(O, MAI); 2225 break; 2226 2227 default: 2228 llvm_unreachable("Operand type not supported."); 2229 } 2230 } 2231 2232 void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum, 2233 raw_ostream &O, const char *Modifier) { 2234 printOperand(MI, opNum, O); 2235 2236 if (Modifier && strcmp(Modifier, "add") == 0) { 2237 O << ", "; 2238 printOperand(MI, opNum + 1, O); 2239 } else { 2240 if (MI->getOperand(opNum + 1).isImm() && 2241 MI->getOperand(opNum + 1).getImm() == 0) 2242 return; // don't print ',0' or '+0' 2243 O << "+"; 2244 printOperand(MI, opNum + 1, O); 2245 } 2246 } 2247 2248 // Force static initialization. 2249 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXAsmPrinter() { 2250 RegisterAsmPrinter<NVPTXAsmPrinter> X(getTheNVPTXTarget32()); 2251 RegisterAsmPrinter<NVPTXAsmPrinter> Y(getTheNVPTXTarget64()); 2252 } 2253