1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the PPCISelLowering class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "PPCISelLowering.h" 15 #include "MCTargetDesc/PPCPredicates.h" 16 #include "PPCCallingConv.h" 17 #include "PPCMachineFunctionInfo.h" 18 #include "PPCPerfectShuffle.h" 19 #include "PPCTargetMachine.h" 20 #include "PPCTargetObjectFile.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/ADT/StringSwitch.h" 23 #include "llvm/ADT/Triple.h" 24 #include "llvm/CodeGen/CallingConvLower.h" 25 #include "llvm/CodeGen/MachineFrameInfo.h" 26 #include "llvm/CodeGen/MachineFunction.h" 27 #include "llvm/CodeGen/MachineInstrBuilder.h" 28 #include "llvm/CodeGen/MachineLoopInfo.h" 29 #include "llvm/CodeGen/MachineRegisterInfo.h" 30 #include "llvm/CodeGen/SelectionDAG.h" 31 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 32 #include "llvm/IR/CallingConv.h" 33 #include "llvm/IR/Constants.h" 34 #include "llvm/IR/DerivedTypes.h" 35 #include "llvm/IR/Function.h" 36 #include "llvm/IR/Intrinsics.h" 37 #include "llvm/Support/CommandLine.h" 38 #include "llvm/Support/ErrorHandling.h" 39 #include "llvm/Support/MathExtras.h" 40 #include "llvm/Support/raw_ostream.h" 41 #include "llvm/Target/TargetOptions.h" 42 using namespace llvm; 43 44 // FIXME: Remove this once soft-float is supported. 45 static cl::opt<bool> DisablePPCFloatInVariadic("disable-ppc-float-in-variadic", 46 cl::desc("disable saving float registers for va_start on PPC"), cl::Hidden); 47 48 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc", 49 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); 50 51 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref", 52 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); 53 54 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned", 55 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); 56 57 // FIXME: Remove this once the bug has been fixed! 58 extern cl::opt<bool> ANDIGlueBug; 59 60 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM) 61 : TargetLowering(TM), 62 Subtarget(*TM.getSubtargetImpl()) { 63 // Use _setjmp/_longjmp instead of setjmp/longjmp. 64 setUseUnderscoreSetJmp(true); 65 setUseUnderscoreLongJmp(true); 66 67 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all 68 // arguments are at least 4/8 bytes aligned. 69 bool isPPC64 = Subtarget.isPPC64(); 70 setMinStackArgumentAlignment(isPPC64 ? 8:4); 71 72 // Set up the register classes. 73 addRegisterClass(MVT::i32, &PPC::GPRCRegClass); 74 addRegisterClass(MVT::f32, &PPC::F4RCRegClass); 75 addRegisterClass(MVT::f64, &PPC::F8RCRegClass); 76 77 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD 78 for (MVT VT : MVT::integer_valuetypes()) { 79 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 80 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand); 81 } 82 83 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 84 85 // PowerPC has pre-inc load and store's. 86 setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); 87 setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); 88 setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); 89 setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); 90 setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); 91 setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal); 92 setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal); 93 setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal); 94 setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); 95 setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); 96 97 if (Subtarget.useCRBits()) { 98 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 99 100 if (isPPC64 || Subtarget.hasFPCVT()) { 101 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); 102 AddPromotedToType (ISD::SINT_TO_FP, MVT::i1, 103 isPPC64 ? MVT::i64 : MVT::i32); 104 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); 105 AddPromotedToType (ISD::UINT_TO_FP, MVT::i1, 106 isPPC64 ? MVT::i64 : MVT::i32); 107 } else { 108 setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom); 109 setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom); 110 } 111 112 // PowerPC does not support direct load / store of condition registers 113 setOperationAction(ISD::LOAD, MVT::i1, Custom); 114 setOperationAction(ISD::STORE, MVT::i1, Custom); 115 116 // FIXME: Remove this once the ANDI glue bug is fixed: 117 if (ANDIGlueBug) 118 setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); 119 120 for (MVT VT : MVT::integer_valuetypes()) { 121 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 122 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); 123 setTruncStoreAction(VT, MVT::i1, Expand); 124 } 125 126 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass); 127 } 128 129 // This is used in the ppcf128->int sequence. Note it has different semantics 130 // from FP_ROUND: that rounds to nearest, this rounds to zero. 131 setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); 132 133 // We do not currently implement these libm ops for PowerPC. 134 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); 135 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand); 136 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand); 137 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand); 138 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); 139 setOperationAction(ISD::FREM, MVT::ppcf128, Expand); 140 141 // PowerPC has no SREM/UREM instructions 142 setOperationAction(ISD::SREM, MVT::i32, Expand); 143 setOperationAction(ISD::UREM, MVT::i32, Expand); 144 setOperationAction(ISD::SREM, MVT::i64, Expand); 145 setOperationAction(ISD::UREM, MVT::i64, Expand); 146 147 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. 148 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 149 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 150 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 151 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 152 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 153 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 154 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 155 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 156 157 // We don't support sin/cos/sqrt/fmod/pow 158 setOperationAction(ISD::FSIN , MVT::f64, Expand); 159 setOperationAction(ISD::FCOS , MVT::f64, Expand); 160 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 161 setOperationAction(ISD::FREM , MVT::f64, Expand); 162 setOperationAction(ISD::FPOW , MVT::f64, Expand); 163 setOperationAction(ISD::FMA , MVT::f64, Legal); 164 setOperationAction(ISD::FSIN , MVT::f32, Expand); 165 setOperationAction(ISD::FCOS , MVT::f32, Expand); 166 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 167 setOperationAction(ISD::FREM , MVT::f32, Expand); 168 setOperationAction(ISD::FPOW , MVT::f32, Expand); 169 setOperationAction(ISD::FMA , MVT::f32, Legal); 170 171 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 172 173 // If we're enabling GP optimizations, use hardware square root 174 if (!Subtarget.hasFSQRT() && 175 !(TM.Options.UnsafeFPMath && 176 Subtarget.hasFRSQRTE() && Subtarget.hasFRE())) 177 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 178 179 if (!Subtarget.hasFSQRT() && 180 !(TM.Options.UnsafeFPMath && 181 Subtarget.hasFRSQRTES() && Subtarget.hasFRES())) 182 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 183 184 if (Subtarget.hasFCPSGN()) { 185 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal); 186 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal); 187 } else { 188 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 189 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 190 } 191 192 if (Subtarget.hasFPRND()) { 193 setOperationAction(ISD::FFLOOR, MVT::f64, Legal); 194 setOperationAction(ISD::FCEIL, MVT::f64, Legal); 195 setOperationAction(ISD::FTRUNC, MVT::f64, Legal); 196 setOperationAction(ISD::FROUND, MVT::f64, Legal); 197 198 setOperationAction(ISD::FFLOOR, MVT::f32, Legal); 199 setOperationAction(ISD::FCEIL, MVT::f32, Legal); 200 setOperationAction(ISD::FTRUNC, MVT::f32, Legal); 201 setOperationAction(ISD::FROUND, MVT::f32, Legal); 202 } 203 204 // PowerPC does not have BSWAP, CTPOP or CTTZ 205 setOperationAction(ISD::BSWAP, MVT::i32 , Expand); 206 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 207 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); 208 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); 209 setOperationAction(ISD::BSWAP, MVT::i64 , Expand); 210 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 211 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); 212 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); 213 214 if (Subtarget.hasPOPCNTD()) { 215 setOperationAction(ISD::CTPOP, MVT::i32 , Legal); 216 setOperationAction(ISD::CTPOP, MVT::i64 , Legal); 217 } else { 218 setOperationAction(ISD::CTPOP, MVT::i32 , Expand); 219 setOperationAction(ISD::CTPOP, MVT::i64 , Expand); 220 } 221 222 // PowerPC does not have ROTR 223 setOperationAction(ISD::ROTR, MVT::i32 , Expand); 224 setOperationAction(ISD::ROTR, MVT::i64 , Expand); 225 226 if (!Subtarget.useCRBits()) { 227 // PowerPC does not have Select 228 setOperationAction(ISD::SELECT, MVT::i32, Expand); 229 setOperationAction(ISD::SELECT, MVT::i64, Expand); 230 setOperationAction(ISD::SELECT, MVT::f32, Expand); 231 setOperationAction(ISD::SELECT, MVT::f64, Expand); 232 } 233 234 // PowerPC wants to turn select_cc of FP into fsel when possible. 235 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 236 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 237 238 // PowerPC wants to optimize integer setcc a bit 239 if (!Subtarget.useCRBits()) 240 setOperationAction(ISD::SETCC, MVT::i32, Custom); 241 242 // PowerPC does not have BRCOND which requires SetCC 243 if (!Subtarget.useCRBits()) 244 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 245 246 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 247 248 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. 249 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 250 251 // PowerPC does not have [U|S]INT_TO_FP 252 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); 253 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); 254 255 setOperationAction(ISD::BITCAST, MVT::f32, Expand); 256 setOperationAction(ISD::BITCAST, MVT::i32, Expand); 257 setOperationAction(ISD::BITCAST, MVT::i64, Expand); 258 setOperationAction(ISD::BITCAST, MVT::f64, Expand); 259 260 // We cannot sextinreg(i1). Expand to shifts. 261 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 262 263 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support 264 // SjLj exception handling but a light-weight setjmp/longjmp replacement to 265 // support continuation, user-level threading, and etc.. As a result, no 266 // other SjLj exception interfaces are implemented and please don't build 267 // your own exception handling based on them. 268 // LLVM/Clang supports zero-cost DWARF exception handling. 269 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 270 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 271 272 // We want to legalize GlobalAddress and ConstantPool nodes into the 273 // appropriate instructions to materialize the address. 274 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 275 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 276 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 277 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 278 setOperationAction(ISD::JumpTable, MVT::i32, Custom); 279 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 280 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); 281 setOperationAction(ISD::BlockAddress, MVT::i64, Custom); 282 setOperationAction(ISD::ConstantPool, MVT::i64, Custom); 283 setOperationAction(ISD::JumpTable, MVT::i64, Custom); 284 285 // TRAP is legal. 286 setOperationAction(ISD::TRAP, MVT::Other, Legal); 287 288 // TRAMPOLINE is custom lowered. 289 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); 290 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); 291 292 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 293 setOperationAction(ISD::VASTART , MVT::Other, Custom); 294 295 if (Subtarget.isSVR4ABI()) { 296 if (isPPC64) { 297 // VAARG always uses double-word chunks, so promote anything smaller. 298 setOperationAction(ISD::VAARG, MVT::i1, Promote); 299 AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); 300 setOperationAction(ISD::VAARG, MVT::i8, Promote); 301 AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64); 302 setOperationAction(ISD::VAARG, MVT::i16, Promote); 303 AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64); 304 setOperationAction(ISD::VAARG, MVT::i32, Promote); 305 AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64); 306 setOperationAction(ISD::VAARG, MVT::Other, Expand); 307 } else { 308 // VAARG is custom lowered with the 32-bit SVR4 ABI. 309 setOperationAction(ISD::VAARG, MVT::Other, Custom); 310 setOperationAction(ISD::VAARG, MVT::i64, Custom); 311 } 312 } else 313 setOperationAction(ISD::VAARG, MVT::Other, Expand); 314 315 if (Subtarget.isSVR4ABI() && !isPPC64) 316 // VACOPY is custom lowered with the 32-bit SVR4 ABI. 317 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 318 else 319 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 320 321 // Use the default implementation. 322 setOperationAction(ISD::VAEND , MVT::Other, Expand); 323 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 324 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); 325 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); 326 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); 327 328 // We want to custom lower some of our intrinsics. 329 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 330 331 // To handle counter-based loop conditions. 332 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); 333 334 // Comparisons that require checking two conditions. 335 setCondCodeAction(ISD::SETULT, MVT::f32, Expand); 336 setCondCodeAction(ISD::SETULT, MVT::f64, Expand); 337 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); 338 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); 339 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); 340 setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand); 341 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); 342 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); 343 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); 344 setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); 345 setCondCodeAction(ISD::SETONE, MVT::f32, Expand); 346 setCondCodeAction(ISD::SETONE, MVT::f64, Expand); 347 348 if (Subtarget.has64BitSupport()) { 349 // They also have instructions for converting between i64 and fp. 350 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 351 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); 352 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 353 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); 354 // This is just the low 32 bits of a (signed) fp->i64 conversion. 355 // We cannot do this with Promote because i64 is not a legal type. 356 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 357 358 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) 359 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 360 } else { 361 // PowerPC does not have FP_TO_UINT on 32-bit implementations. 362 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 363 } 364 365 // With the instructions enabled under FPCVT, we can do everything. 366 if (Subtarget.hasFPCVT()) { 367 if (Subtarget.has64BitSupport()) { 368 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 369 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); 370 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 371 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); 372 } 373 374 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 375 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 376 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 377 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 378 } 379 380 if (Subtarget.use64BitRegs()) { 381 // 64-bit PowerPC implementations can support i64 types directly 382 addRegisterClass(MVT::i64, &PPC::G8RCRegClass); 383 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 384 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 385 // 64-bit PowerPC wants to expand i128 shifts itself. 386 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); 387 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); 388 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); 389 } else { 390 // 32-bit PowerPC wants to expand i64 shifts itself. 391 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 392 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 393 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 394 } 395 396 if (Subtarget.hasAltivec()) { 397 // First set operation action for all vector types to expand. Then we 398 // will selectively turn on ones that can be effectively codegen'd. 399 for (MVT VT : MVT::vector_valuetypes()) { 400 // add/sub are legal for all supported vector VT's. 401 setOperationAction(ISD::ADD , VT, Legal); 402 setOperationAction(ISD::SUB , VT, Legal); 403 404 // We promote all shuffles to v16i8. 405 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); 406 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8); 407 408 // We promote all non-typed operations to v4i32. 409 setOperationAction(ISD::AND , VT, Promote); 410 AddPromotedToType (ISD::AND , VT, MVT::v4i32); 411 setOperationAction(ISD::OR , VT, Promote); 412 AddPromotedToType (ISD::OR , VT, MVT::v4i32); 413 setOperationAction(ISD::XOR , VT, Promote); 414 AddPromotedToType (ISD::XOR , VT, MVT::v4i32); 415 setOperationAction(ISD::LOAD , VT, Promote); 416 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); 417 setOperationAction(ISD::SELECT, VT, Promote); 418 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); 419 setOperationAction(ISD::STORE, VT, Promote); 420 AddPromotedToType (ISD::STORE, VT, MVT::v4i32); 421 422 // No other operations are legal. 423 setOperationAction(ISD::MUL , VT, Expand); 424 setOperationAction(ISD::SDIV, VT, Expand); 425 setOperationAction(ISD::SREM, VT, Expand); 426 setOperationAction(ISD::UDIV, VT, Expand); 427 setOperationAction(ISD::UREM, VT, Expand); 428 setOperationAction(ISD::FDIV, VT, Expand); 429 setOperationAction(ISD::FREM, VT, Expand); 430 setOperationAction(ISD::FNEG, VT, Expand); 431 setOperationAction(ISD::FSQRT, VT, Expand); 432 setOperationAction(ISD::FLOG, VT, Expand); 433 setOperationAction(ISD::FLOG10, VT, Expand); 434 setOperationAction(ISD::FLOG2, VT, Expand); 435 setOperationAction(ISD::FEXP, VT, Expand); 436 setOperationAction(ISD::FEXP2, VT, Expand); 437 setOperationAction(ISD::FSIN, VT, Expand); 438 setOperationAction(ISD::FCOS, VT, Expand); 439 setOperationAction(ISD::FABS, VT, Expand); 440 setOperationAction(ISD::FPOWI, VT, Expand); 441 setOperationAction(ISD::FFLOOR, VT, Expand); 442 setOperationAction(ISD::FCEIL, VT, Expand); 443 setOperationAction(ISD::FTRUNC, VT, Expand); 444 setOperationAction(ISD::FRINT, VT, Expand); 445 setOperationAction(ISD::FNEARBYINT, VT, Expand); 446 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); 447 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); 448 setOperationAction(ISD::BUILD_VECTOR, VT, Expand); 449 setOperationAction(ISD::MULHU, VT, Expand); 450 setOperationAction(ISD::MULHS, VT, Expand); 451 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 452 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 453 setOperationAction(ISD::UDIVREM, VT, Expand); 454 setOperationAction(ISD::SDIVREM, VT, Expand); 455 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); 456 setOperationAction(ISD::FPOW, VT, Expand); 457 setOperationAction(ISD::BSWAP, VT, Expand); 458 setOperationAction(ISD::CTPOP, VT, Expand); 459 setOperationAction(ISD::CTLZ, VT, Expand); 460 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); 461 setOperationAction(ISD::CTTZ, VT, Expand); 462 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); 463 setOperationAction(ISD::VSELECT, VT, Expand); 464 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 465 466 for (MVT InnerVT : MVT::vector_valuetypes()) { 467 setTruncStoreAction(VT, InnerVT, Expand); 468 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); 469 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); 470 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); 471 } 472 } 473 474 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle 475 // with merges, splats, etc. 476 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); 477 478 setOperationAction(ISD::AND , MVT::v4i32, Legal); 479 setOperationAction(ISD::OR , MVT::v4i32, Legal); 480 setOperationAction(ISD::XOR , MVT::v4i32, Legal); 481 setOperationAction(ISD::LOAD , MVT::v4i32, Legal); 482 setOperationAction(ISD::SELECT, MVT::v4i32, 483 Subtarget.useCRBits() ? Legal : Expand); 484 setOperationAction(ISD::STORE , MVT::v4i32, Legal); 485 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); 486 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); 487 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); 488 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); 489 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); 490 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); 491 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); 492 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); 493 494 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); 495 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); 496 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass); 497 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass); 498 499 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 500 setOperationAction(ISD::FMA, MVT::v4f32, Legal); 501 502 if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) { 503 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 504 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 505 } 506 507 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 508 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 509 setOperationAction(ISD::MUL, MVT::v16i8, Custom); 510 511 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 512 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); 513 514 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); 515 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); 516 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); 517 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 518 519 // Altivec does not contain unordered floating-point compare instructions 520 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand); 521 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand); 522 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand); 523 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand); 524 525 if (Subtarget.hasVSX()) { 526 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); 527 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); 528 529 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); 530 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); 531 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); 532 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); 533 setOperationAction(ISD::FROUND, MVT::v2f64, Legal); 534 535 setOperationAction(ISD::FROUND, MVT::v4f32, Legal); 536 537 setOperationAction(ISD::MUL, MVT::v2f64, Legal); 538 setOperationAction(ISD::FMA, MVT::v2f64, Legal); 539 540 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 541 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 542 543 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); 544 setOperationAction(ISD::VSELECT, MVT::v8i16, Legal); 545 setOperationAction(ISD::VSELECT, MVT::v4i32, Legal); 546 setOperationAction(ISD::VSELECT, MVT::v4f32, Legal); 547 setOperationAction(ISD::VSELECT, MVT::v2f64, Legal); 548 549 // Share the Altivec comparison restrictions. 550 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand); 551 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand); 552 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand); 553 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand); 554 555 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 556 setOperationAction(ISD::STORE, MVT::v2f64, Legal); 557 558 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal); 559 560 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass); 561 562 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass); 563 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass); 564 565 // VSX v2i64 only supports non-arithmetic operations. 566 setOperationAction(ISD::ADD, MVT::v2i64, Expand); 567 setOperationAction(ISD::SUB, MVT::v2i64, Expand); 568 569 setOperationAction(ISD::SHL, MVT::v2i64, Expand); 570 setOperationAction(ISD::SRA, MVT::v2i64, Expand); 571 setOperationAction(ISD::SRL, MVT::v2i64, Expand); 572 573 setOperationAction(ISD::SETCC, MVT::v2i64, Custom); 574 575 setOperationAction(ISD::LOAD, MVT::v2i64, Promote); 576 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64); 577 setOperationAction(ISD::STORE, MVT::v2i64, Promote); 578 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64); 579 580 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal); 581 582 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); 583 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); 584 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); 585 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); 586 587 // Vector operation legalization checks the result type of 588 // SIGN_EXTEND_INREG, overall legalization checks the inner type. 589 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal); 590 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal); 591 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom); 592 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom); 593 594 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass); 595 } 596 } 597 598 if (Subtarget.has64BitSupport()) 599 setOperationAction(ISD::PREFETCH, MVT::Other, Legal); 600 601 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom); 602 603 if (!isPPC64) { 604 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); 605 setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); 606 } 607 608 setBooleanContents(ZeroOrOneBooleanContent); 609 // Altivec instructions set fields to all zeros or all ones. 610 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 611 612 if (!isPPC64) { 613 // These libcalls are not available in 32-bit. 614 setLibcallName(RTLIB::SHL_I128, nullptr); 615 setLibcallName(RTLIB::SRL_I128, nullptr); 616 setLibcallName(RTLIB::SRA_I128, nullptr); 617 } 618 619 if (isPPC64) { 620 setStackPointerRegisterToSaveRestore(PPC::X1); 621 setExceptionPointerRegister(PPC::X3); 622 setExceptionSelectorRegister(PPC::X4); 623 } else { 624 setStackPointerRegisterToSaveRestore(PPC::R1); 625 setExceptionPointerRegister(PPC::R3); 626 setExceptionSelectorRegister(PPC::R4); 627 } 628 629 // We have target-specific dag combine patterns for the following nodes: 630 setTargetDAGCombine(ISD::SINT_TO_FP); 631 if (Subtarget.hasFPCVT()) 632 setTargetDAGCombine(ISD::UINT_TO_FP); 633 setTargetDAGCombine(ISD::LOAD); 634 setTargetDAGCombine(ISD::STORE); 635 setTargetDAGCombine(ISD::BR_CC); 636 if (Subtarget.useCRBits()) 637 setTargetDAGCombine(ISD::BRCOND); 638 setTargetDAGCombine(ISD::BSWAP); 639 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 640 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); 641 setTargetDAGCombine(ISD::INTRINSIC_VOID); 642 643 setTargetDAGCombine(ISD::SIGN_EXTEND); 644 setTargetDAGCombine(ISD::ZERO_EXTEND); 645 setTargetDAGCombine(ISD::ANY_EXTEND); 646 647 if (Subtarget.useCRBits()) { 648 setTargetDAGCombine(ISD::TRUNCATE); 649 setTargetDAGCombine(ISD::SETCC); 650 setTargetDAGCombine(ISD::SELECT_CC); 651 } 652 653 // Use reciprocal estimates. 654 if (TM.Options.UnsafeFPMath) { 655 setTargetDAGCombine(ISD::FDIV); 656 setTargetDAGCombine(ISD::FSQRT); 657 } 658 659 // Darwin long double math library functions have $LDBL128 appended. 660 if (Subtarget.isDarwin()) { 661 setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); 662 setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); 663 setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); 664 setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128"); 665 setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128"); 666 setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128"); 667 setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128"); 668 setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128"); 669 setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128"); 670 setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128"); 671 } 672 673 // With 32 condition bits, we don't need to sink (and duplicate) compares 674 // aggressively in CodeGenPrep. 675 if (Subtarget.useCRBits()) 676 setHasMultipleConditionRegisters(); 677 678 setMinFunctionAlignment(2); 679 if (Subtarget.isDarwin()) 680 setPrefFunctionAlignment(4); 681 682 switch (Subtarget.getDarwinDirective()) { 683 default: break; 684 case PPC::DIR_970: 685 case PPC::DIR_A2: 686 case PPC::DIR_E500mc: 687 case PPC::DIR_E5500: 688 case PPC::DIR_PWR4: 689 case PPC::DIR_PWR5: 690 case PPC::DIR_PWR5X: 691 case PPC::DIR_PWR6: 692 case PPC::DIR_PWR6X: 693 case PPC::DIR_PWR7: 694 case PPC::DIR_PWR8: 695 setPrefFunctionAlignment(4); 696 setPrefLoopAlignment(4); 697 break; 698 } 699 700 setInsertFencesForAtomic(true); 701 702 if (Subtarget.enableMachineScheduler()) 703 setSchedulingPreference(Sched::Source); 704 else 705 setSchedulingPreference(Sched::Hybrid); 706 707 computeRegisterProperties(); 708 709 // The Freescale cores do better with aggressive inlining of memcpy and 710 // friends. GCC uses same threshold of 128 bytes (= 32 word stores). 711 if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc || 712 Subtarget.getDarwinDirective() == PPC::DIR_E5500) { 713 MaxStoresPerMemset = 32; 714 MaxStoresPerMemsetOptSize = 16; 715 MaxStoresPerMemcpy = 32; 716 MaxStoresPerMemcpyOptSize = 8; 717 MaxStoresPerMemmove = 32; 718 MaxStoresPerMemmoveOptSize = 8; 719 } 720 } 721 722 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine 723 /// the desired ByVal argument alignment. 724 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, 725 unsigned MaxMaxAlign) { 726 if (MaxAlign == MaxMaxAlign) 727 return; 728 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { 729 if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256) 730 MaxAlign = 32; 731 else if (VTy->getBitWidth() >= 128 && MaxAlign < 16) 732 MaxAlign = 16; 733 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 734 unsigned EltAlign = 0; 735 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign); 736 if (EltAlign > MaxAlign) 737 MaxAlign = EltAlign; 738 } else if (StructType *STy = dyn_cast<StructType>(Ty)) { 739 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 740 unsigned EltAlign = 0; 741 getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign); 742 if (EltAlign > MaxAlign) 743 MaxAlign = EltAlign; 744 if (MaxAlign == MaxMaxAlign) 745 break; 746 } 747 } 748 } 749 750 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 751 /// function arguments in the caller parameter area. 752 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const { 753 // Darwin passes everything on 4 byte boundary. 754 if (Subtarget.isDarwin()) 755 return 4; 756 757 // 16byte and wider vectors are passed on 16byte boundary. 758 // The rest is 8 on PPC64 and 4 on PPC32 boundary. 759 unsigned Align = Subtarget.isPPC64() ? 8 : 4; 760 if (Subtarget.hasAltivec() || Subtarget.hasQPX()) 761 getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16); 762 return Align; 763 } 764 765 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { 766 switch (Opcode) { 767 default: return nullptr; 768 case PPCISD::FSEL: return "PPCISD::FSEL"; 769 case PPCISD::FCFID: return "PPCISD::FCFID"; 770 case PPCISD::FCFIDU: return "PPCISD::FCFIDU"; 771 case PPCISD::FCFIDS: return "PPCISD::FCFIDS"; 772 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS"; 773 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; 774 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; 775 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ"; 776 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ"; 777 case PPCISD::FRE: return "PPCISD::FRE"; 778 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE"; 779 case PPCISD::STFIWX: return "PPCISD::STFIWX"; 780 case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; 781 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; 782 case PPCISD::VPERM: return "PPCISD::VPERM"; 783 case PPCISD::CMPB: return "PPCISD::CMPB"; 784 case PPCISD::Hi: return "PPCISD::Hi"; 785 case PPCISD::Lo: return "PPCISD::Lo"; 786 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; 787 case PPCISD::LOAD: return "PPCISD::LOAD"; 788 case PPCISD::LOAD_TOC: return "PPCISD::LOAD_TOC"; 789 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; 790 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; 791 case PPCISD::SRL: return "PPCISD::SRL"; 792 case PPCISD::SRA: return "PPCISD::SRA"; 793 case PPCISD::SHL: return "PPCISD::SHL"; 794 case PPCISD::CALL: return "PPCISD::CALL"; 795 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; 796 case PPCISD::CALL_TLS: return "PPCISD::CALL_TLS"; 797 case PPCISD::CALL_NOP_TLS: return "PPCISD::CALL_NOP_TLS"; 798 case PPCISD::MTCTR: return "PPCISD::MTCTR"; 799 case PPCISD::BCTRL: return "PPCISD::BCTRL"; 800 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC"; 801 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; 802 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE"; 803 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; 804 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; 805 case PPCISD::MFOCRF: return "PPCISD::MFOCRF"; 806 case PPCISD::VCMP: return "PPCISD::VCMP"; 807 case PPCISD::VCMPo: return "PPCISD::VCMPo"; 808 case PPCISD::LBRX: return "PPCISD::LBRX"; 809 case PPCISD::STBRX: return "PPCISD::STBRX"; 810 case PPCISD::LFIWAX: return "PPCISD::LFIWAX"; 811 case PPCISD::LFIWZX: return "PPCISD::LFIWZX"; 812 case PPCISD::LARX: return "PPCISD::LARX"; 813 case PPCISD::STCX: return "PPCISD::STCX"; 814 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; 815 case PPCISD::BDNZ: return "PPCISD::BDNZ"; 816 case PPCISD::BDZ: return "PPCISD::BDZ"; 817 case PPCISD::MFFS: return "PPCISD::MFFS"; 818 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; 819 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; 820 case PPCISD::CR6SET: return "PPCISD::CR6SET"; 821 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; 822 case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA"; 823 case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L"; 824 case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L"; 825 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT"; 826 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; 827 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; 828 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; 829 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; 830 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; 831 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; 832 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; 833 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; 834 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; 835 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; 836 case PPCISD::SC: return "PPCISD::SC"; 837 } 838 } 839 840 EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { 841 if (!VT.isVector()) 842 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32; 843 return VT.changeVectorElementTypeToInteger(); 844 } 845 846 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const { 847 assert(VT.isFloatingPoint() && "Non-floating-point FMA?"); 848 return true; 849 } 850 851 //===----------------------------------------------------------------------===// 852 // Node matching predicates, for use by the tblgen matching code. 853 //===----------------------------------------------------------------------===// 854 855 /// isFloatingPointZero - Return true if this is 0.0 or -0.0. 856 static bool isFloatingPointZero(SDValue Op) { 857 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 858 return CFP->getValueAPF().isZero(); 859 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 860 // Maybe this has already been legalized into the constant pool? 861 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) 862 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 863 return CFP->getValueAPF().isZero(); 864 } 865 return false; 866 } 867 868 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return 869 /// true if Op is undef or if it matches the specified value. 870 static bool isConstantOrUndef(int Op, int Val) { 871 return Op < 0 || Op == Val; 872 } 873 874 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a 875 /// VPKUHUM instruction. 876 /// The ShuffleKind distinguishes between big-endian operations with 877 /// two different inputs (0), either-endian operations with two identical 878 /// inputs (1), and little-endian operantion with two different inputs (2). 879 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). 880 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 881 SelectionDAG &DAG) { 882 bool IsLE = DAG.getSubtarget().getDataLayout()->isLittleEndian(); 883 if (ShuffleKind == 0) { 884 if (IsLE) 885 return false; 886 for (unsigned i = 0; i != 16; ++i) 887 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) 888 return false; 889 } else if (ShuffleKind == 2) { 890 if (!IsLE) 891 return false; 892 for (unsigned i = 0; i != 16; ++i) 893 if (!isConstantOrUndef(N->getMaskElt(i), i*2)) 894 return false; 895 } else if (ShuffleKind == 1) { 896 unsigned j = IsLE ? 0 : 1; 897 for (unsigned i = 0; i != 8; ++i) 898 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) || 899 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j)) 900 return false; 901 } 902 return true; 903 } 904 905 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a 906 /// VPKUWUM instruction. 907 /// The ShuffleKind distinguishes between big-endian operations with 908 /// two different inputs (0), either-endian operations with two identical 909 /// inputs (1), and little-endian operantion with two different inputs (2). 910 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td). 911 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, 912 SelectionDAG &DAG) { 913 bool IsLE = DAG.getSubtarget().getDataLayout()->isLittleEndian(); 914 if (ShuffleKind == 0) { 915 if (IsLE) 916 return false; 917 for (unsigned i = 0; i != 16; i += 2) 918 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || 919 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) 920 return false; 921 } else if (ShuffleKind == 2) { 922 if (!IsLE) 923 return false; 924 for (unsigned i = 0; i != 16; i += 2) 925 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) || 926 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1)) 927 return false; 928 } else if (ShuffleKind == 1) { 929 unsigned j = IsLE ? 0 : 2; 930 for (unsigned i = 0; i != 8; i += 2) 931 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || 932 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) || 933 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || 934 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1)) 935 return false; 936 } 937 return true; 938 } 939 940 /// isVMerge - Common function, used to match vmrg* shuffles. 941 /// 942 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, 943 unsigned LHSStart, unsigned RHSStart) { 944 if (N->getValueType(0) != MVT::v16i8) 945 return false; 946 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && 947 "Unsupported merge size!"); 948 949 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units 950 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit 951 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j), 952 LHSStart+j+i*UnitSize) || 953 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j), 954 RHSStart+j+i*UnitSize)) 955 return false; 956 } 957 return true; 958 } 959 960 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for 961 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes). 962 /// The ShuffleKind distinguishes between big-endian merges with two 963 /// different inputs (0), either-endian merges with two identical inputs (1), 964 /// and little-endian merges with two different inputs (2). For the latter, 965 /// the input operands are swapped (see PPCInstrAltivec.td). 966 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 967 unsigned ShuffleKind, SelectionDAG &DAG) { 968 if (DAG.getSubtarget().getDataLayout()->isLittleEndian()) { 969 if (ShuffleKind == 1) // unary 970 return isVMerge(N, UnitSize, 0, 0); 971 else if (ShuffleKind == 2) // swapped 972 return isVMerge(N, UnitSize, 0, 16); 973 else 974 return false; 975 } else { 976 if (ShuffleKind == 1) // unary 977 return isVMerge(N, UnitSize, 8, 8); 978 else if (ShuffleKind == 0) // normal 979 return isVMerge(N, UnitSize, 8, 24); 980 else 981 return false; 982 } 983 } 984 985 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for 986 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes). 987 /// The ShuffleKind distinguishes between big-endian merges with two 988 /// different inputs (0), either-endian merges with two identical inputs (1), 989 /// and little-endian merges with two different inputs (2). For the latter, 990 /// the input operands are swapped (see PPCInstrAltivec.td). 991 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 992 unsigned ShuffleKind, SelectionDAG &DAG) { 993 if (DAG.getSubtarget().getDataLayout()->isLittleEndian()) { 994 if (ShuffleKind == 1) // unary 995 return isVMerge(N, UnitSize, 8, 8); 996 else if (ShuffleKind == 2) // swapped 997 return isVMerge(N, UnitSize, 8, 24); 998 else 999 return false; 1000 } else { 1001 if (ShuffleKind == 1) // unary 1002 return isVMerge(N, UnitSize, 0, 0); 1003 else if (ShuffleKind == 0) // normal 1004 return isVMerge(N, UnitSize, 0, 16); 1005 else 1006 return false; 1007 } 1008 } 1009 1010 1011 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift 1012 /// amount, otherwise return -1. 1013 /// The ShuffleKind distinguishes between big-endian operations with two 1014 /// different inputs (0), either-endian operations with two identical inputs 1015 /// (1), and little-endian operations with two different inputs (2). For the 1016 /// latter, the input operands are swapped (see PPCInstrAltivec.td). 1017 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, 1018 SelectionDAG &DAG) { 1019 if (N->getValueType(0) != MVT::v16i8) 1020 return -1; 1021 1022 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1023 1024 // Find the first non-undef value in the shuffle mask. 1025 unsigned i; 1026 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i) 1027 /*search*/; 1028 1029 if (i == 16) return -1; // all undef. 1030 1031 // Otherwise, check to see if the rest of the elements are consecutively 1032 // numbered from this value. 1033 unsigned ShiftAmt = SVOp->getMaskElt(i); 1034 if (ShiftAmt < i) return -1; 1035 1036 ShiftAmt -= i; 1037 bool isLE = DAG.getTarget().getSubtargetImpl()->getDataLayout()-> 1038 isLittleEndian(); 1039 1040 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) { 1041 // Check the rest of the elements to see if they are consecutive. 1042 for (++i; i != 16; ++i) 1043 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) 1044 return -1; 1045 } else if (ShuffleKind == 1) { 1046 // Check the rest of the elements to see if they are consecutive. 1047 for (++i; i != 16; ++i) 1048 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) 1049 return -1; 1050 } else 1051 return -1; 1052 1053 if (ShuffleKind == 2 && isLE) 1054 ShiftAmt = 16 - ShiftAmt; 1055 1056 return ShiftAmt; 1057 } 1058 1059 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand 1060 /// specifies a splat of a single element that is suitable for input to 1061 /// VSPLTB/VSPLTH/VSPLTW. 1062 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { 1063 assert(N->getValueType(0) == MVT::v16i8 && 1064 (EltSize == 1 || EltSize == 2 || EltSize == 4)); 1065 1066 // This is a splat operation if each element of the permute is the same, and 1067 // if the value doesn't reference the second vector. 1068 unsigned ElementBase = N->getMaskElt(0); 1069 1070 // FIXME: Handle UNDEF elements too! 1071 if (ElementBase >= 16) 1072 return false; 1073 1074 // Check that the indices are consecutive, in the case of a multi-byte element 1075 // splatted with a v16i8 mask. 1076 for (unsigned i = 1; i != EltSize; ++i) 1077 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase)) 1078 return false; 1079 1080 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { 1081 if (N->getMaskElt(i) < 0) continue; 1082 for (unsigned j = 0; j != EltSize; ++j) 1083 if (N->getMaskElt(i+j) != N->getMaskElt(j)) 1084 return false; 1085 } 1086 return true; 1087 } 1088 1089 /// isAllNegativeZeroVector - Returns true if all elements of build_vector 1090 /// are -0.0. 1091 bool PPC::isAllNegativeZeroVector(SDNode *N) { 1092 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N); 1093 1094 APInt APVal, APUndef; 1095 unsigned BitSize; 1096 bool HasAnyUndefs; 1097 1098 if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true)) 1099 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) 1100 return CFP->getValueAPF().isNegZero(); 1101 1102 return false; 1103 } 1104 1105 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the 1106 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. 1107 unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, 1108 SelectionDAG &DAG) { 1109 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 1110 assert(isSplatShuffleMask(SVOp, EltSize)); 1111 if (DAG.getSubtarget().getDataLayout()->isLittleEndian()) 1112 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize); 1113 else 1114 return SVOp->getMaskElt(0) / EltSize; 1115 } 1116 1117 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed 1118 /// by using a vspltis[bhw] instruction of the specified element size, return 1119 /// the constant being splatted. The ByteSize field indicates the number of 1120 /// bytes of each element [124] -> [bhw]. 1121 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { 1122 SDValue OpVal(nullptr, 0); 1123 1124 // If ByteSize of the splat is bigger than the element size of the 1125 // build_vector, then we have a case where we are checking for a splat where 1126 // multiple elements of the buildvector are folded together into a single 1127 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). 1128 unsigned EltSize = 16/N->getNumOperands(); 1129 if (EltSize < ByteSize) { 1130 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. 1131 SDValue UniquedVals[4]; 1132 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); 1133 1134 // See if all of the elements in the buildvector agree across. 1135 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1136 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 1137 // If the element isn't a constant, bail fully out. 1138 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue(); 1139 1140 1141 if (!UniquedVals[i&(Multiple-1)].getNode()) 1142 UniquedVals[i&(Multiple-1)] = N->getOperand(i); 1143 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) 1144 return SDValue(); // no match. 1145 } 1146 1147 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains 1148 // either constant or undef values that are identical for each chunk. See 1149 // if these chunks can form into a larger vspltis*. 1150 1151 // Check to see if all of the leading entries are either 0 or -1. If 1152 // neither, then this won't fit into the immediate field. 1153 bool LeadingZero = true; 1154 bool LeadingOnes = true; 1155 for (unsigned i = 0; i != Multiple-1; ++i) { 1156 if (!UniquedVals[i].getNode()) continue; // Must have been undefs. 1157 1158 LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue(); 1159 LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue(); 1160 } 1161 // Finally, check the least significant entry. 1162 if (LeadingZero) { 1163 if (!UniquedVals[Multiple-1].getNode()) 1164 return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef 1165 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue(); 1166 if (Val < 16) 1167 return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4) 1168 } 1169 if (LeadingOnes) { 1170 if (!UniquedVals[Multiple-1].getNode()) 1171 return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef 1172 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue(); 1173 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) 1174 return DAG.getTargetConstant(Val, MVT::i32); 1175 } 1176 1177 return SDValue(); 1178 } 1179 1180 // Check to see if this buildvec has a single non-undef value in its elements. 1181 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1182 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 1183 if (!OpVal.getNode()) 1184 OpVal = N->getOperand(i); 1185 else if (OpVal != N->getOperand(i)) 1186 return SDValue(); 1187 } 1188 1189 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def. 1190 1191 unsigned ValSizeInBytes = EltSize; 1192 uint64_t Value = 0; 1193 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 1194 Value = CN->getZExtValue(); 1195 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 1196 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); 1197 Value = FloatToBits(CN->getValueAPF().convertToFloat()); 1198 } 1199 1200 // If the splat value is larger than the element value, then we can never do 1201 // this splat. The only case that we could fit the replicated bits into our 1202 // immediate field for would be zero, and we prefer to use vxor for it. 1203 if (ValSizeInBytes < ByteSize) return SDValue(); 1204 1205 // If the element value is larger than the splat value, cut it in half and 1206 // check to see if the two halves are equal. Continue doing this until we 1207 // get to ByteSize. This allows us to handle 0x01010101 as 0x01. 1208 while (ValSizeInBytes > ByteSize) { 1209 ValSizeInBytes >>= 1; 1210 1211 // If the top half equals the bottom half, we're still ok. 1212 if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) != 1213 (Value & ((1 << (8*ValSizeInBytes))-1))) 1214 return SDValue(); 1215 } 1216 1217 // Properly sign extend the value. 1218 int MaskVal = SignExtend32(Value, ByteSize * 8); 1219 1220 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. 1221 if (MaskVal == 0) return SDValue(); 1222 1223 // Finally, if this value fits in a 5 bit sext field, return it 1224 if (SignExtend32<5>(MaskVal) == MaskVal) 1225 return DAG.getTargetConstant(MaskVal, MVT::i32); 1226 return SDValue(); 1227 } 1228 1229 //===----------------------------------------------------------------------===// 1230 // Addressing Mode Selection 1231 //===----------------------------------------------------------------------===// 1232 1233 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit 1234 /// or 64-bit immediate, and if the value can be accurately represented as a 1235 /// sign extension from a 16-bit value. If so, this returns true and the 1236 /// immediate. 1237 static bool isIntS16Immediate(SDNode *N, short &Imm) { 1238 if (!isa<ConstantSDNode>(N)) 1239 return false; 1240 1241 Imm = (short)cast<ConstantSDNode>(N)->getZExtValue(); 1242 if (N->getValueType(0) == MVT::i32) 1243 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue(); 1244 else 1245 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue(); 1246 } 1247 static bool isIntS16Immediate(SDValue Op, short &Imm) { 1248 return isIntS16Immediate(Op.getNode(), Imm); 1249 } 1250 1251 1252 /// SelectAddressRegReg - Given the specified addressed, check to see if it 1253 /// can be represented as an indexed [r+r] operation. Returns false if it 1254 /// can be more efficiently represented with [r+imm]. 1255 bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, 1256 SDValue &Index, 1257 SelectionDAG &DAG) const { 1258 short imm = 0; 1259 if (N.getOpcode() == ISD::ADD) { 1260 if (isIntS16Immediate(N.getOperand(1), imm)) 1261 return false; // r+i 1262 if (N.getOperand(1).getOpcode() == PPCISD::Lo) 1263 return false; // r+i 1264 1265 Base = N.getOperand(0); 1266 Index = N.getOperand(1); 1267 return true; 1268 } else if (N.getOpcode() == ISD::OR) { 1269 if (isIntS16Immediate(N.getOperand(1), imm)) 1270 return false; // r+i can fold it if we can. 1271 1272 // If this is an or of disjoint bitfields, we can codegen this as an add 1273 // (for better address arithmetic) if the LHS and RHS of the OR are provably 1274 // disjoint. 1275 APInt LHSKnownZero, LHSKnownOne; 1276 APInt RHSKnownZero, RHSKnownOne; 1277 DAG.computeKnownBits(N.getOperand(0), 1278 LHSKnownZero, LHSKnownOne); 1279 1280 if (LHSKnownZero.getBoolValue()) { 1281 DAG.computeKnownBits(N.getOperand(1), 1282 RHSKnownZero, RHSKnownOne); 1283 // If all of the bits are known zero on the LHS or RHS, the add won't 1284 // carry. 1285 if (~(LHSKnownZero | RHSKnownZero) == 0) { 1286 Base = N.getOperand(0); 1287 Index = N.getOperand(1); 1288 return true; 1289 } 1290 } 1291 } 1292 1293 return false; 1294 } 1295 1296 // If we happen to be doing an i64 load or store into a stack slot that has 1297 // less than a 4-byte alignment, then the frame-index elimination may need to 1298 // use an indexed load or store instruction (because the offset may not be a 1299 // multiple of 4). The extra register needed to hold the offset comes from the 1300 // register scavenger, and it is possible that the scavenger will need to use 1301 // an emergency spill slot. As a result, we need to make sure that a spill slot 1302 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned 1303 // stack slot. 1304 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) { 1305 // FIXME: This does not handle the LWA case. 1306 if (VT != MVT::i64) 1307 return; 1308 1309 // NOTE: We'll exclude negative FIs here, which come from argument 1310 // lowering, because there are no known test cases triggering this problem 1311 // using packed structures (or similar). We can remove this exclusion if 1312 // we find such a test case. The reason why this is so test-case driven is 1313 // because this entire 'fixup' is only to prevent crashes (from the 1314 // register scavenger) on not-really-valid inputs. For example, if we have: 1315 // %a = alloca i1 1316 // %b = bitcast i1* %a to i64* 1317 // store i64* a, i64 b 1318 // then the store should really be marked as 'align 1', but is not. If it 1319 // were marked as 'align 1' then the indexed form would have been 1320 // instruction-selected initially, and the problem this 'fixup' is preventing 1321 // won't happen regardless. 1322 if (FrameIdx < 0) 1323 return; 1324 1325 MachineFunction &MF = DAG.getMachineFunction(); 1326 MachineFrameInfo *MFI = MF.getFrameInfo(); 1327 1328 unsigned Align = MFI->getObjectAlignment(FrameIdx); 1329 if (Align >= 4) 1330 return; 1331 1332 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 1333 FuncInfo->setHasNonRISpills(); 1334 } 1335 1336 /// Returns true if the address N can be represented by a base register plus 1337 /// a signed 16-bit displacement [r+imm], and if it is not better 1338 /// represented as reg+reg. If Aligned is true, only accept displacements 1339 /// suitable for STD and friends, i.e. multiples of 4. 1340 bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, 1341 SDValue &Base, 1342 SelectionDAG &DAG, 1343 bool Aligned) const { 1344 // FIXME dl should come from parent load or store, not from address 1345 SDLoc dl(N); 1346 // If this can be more profitably realized as r+r, fail. 1347 if (SelectAddressRegReg(N, Disp, Base, DAG)) 1348 return false; 1349 1350 if (N.getOpcode() == ISD::ADD) { 1351 short imm = 0; 1352 if (isIntS16Immediate(N.getOperand(1), imm) && 1353 (!Aligned || (imm & 3) == 0)) { 1354 Disp = DAG.getTargetConstant(imm, N.getValueType()); 1355 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 1356 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1357 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1358 } else { 1359 Base = N.getOperand(0); 1360 } 1361 return true; // [r+i] 1362 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { 1363 // Match LOAD (ADD (X, Lo(G))). 1364 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() 1365 && "Cannot handle constant offsets yet!"); 1366 Disp = N.getOperand(1).getOperand(0); // The global address. 1367 assert(Disp.getOpcode() == ISD::TargetGlobalAddress || 1368 Disp.getOpcode() == ISD::TargetGlobalTLSAddress || 1369 Disp.getOpcode() == ISD::TargetConstantPool || 1370 Disp.getOpcode() == ISD::TargetJumpTable); 1371 Base = N.getOperand(0); 1372 return true; // [&g+r] 1373 } 1374 } else if (N.getOpcode() == ISD::OR) { 1375 short imm = 0; 1376 if (isIntS16Immediate(N.getOperand(1), imm) && 1377 (!Aligned || (imm & 3) == 0)) { 1378 // If this is an or of disjoint bitfields, we can codegen this as an add 1379 // (for better address arithmetic) if the LHS and RHS of the OR are 1380 // provably disjoint. 1381 APInt LHSKnownZero, LHSKnownOne; 1382 DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); 1383 1384 if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { 1385 // If all of the bits are known zero on the LHS or RHS, the add won't 1386 // carry. 1387 if (FrameIndexSDNode *FI = 1388 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 1389 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1390 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1391 } else { 1392 Base = N.getOperand(0); 1393 } 1394 Disp = DAG.getTargetConstant(imm, N.getValueType()); 1395 return true; 1396 } 1397 } 1398 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 1399 // Loading from a constant address. 1400 1401 // If this address fits entirely in a 16-bit sext immediate field, codegen 1402 // this as "d, 0" 1403 short Imm; 1404 if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) { 1405 Disp = DAG.getTargetConstant(Imm, CN->getValueType(0)); 1406 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, 1407 CN->getValueType(0)); 1408 return true; 1409 } 1410 1411 // Handle 32-bit sext immediates with LIS + addr mode. 1412 if ((CN->getValueType(0) == MVT::i32 || 1413 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) && 1414 (!Aligned || (CN->getZExtValue() & 3) == 0)) { 1415 int Addr = (int)CN->getZExtValue(); 1416 1417 // Otherwise, break this down into an LIS + disp. 1418 Disp = DAG.getTargetConstant((short)Addr, MVT::i32); 1419 1420 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32); 1421 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; 1422 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0); 1423 return true; 1424 } 1425 } 1426 1427 Disp = DAG.getTargetConstant(0, getPointerTy()); 1428 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) { 1429 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1430 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1431 } else 1432 Base = N; 1433 return true; // [r+0] 1434 } 1435 1436 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be 1437 /// represented as an indexed [r+r] operation. 1438 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, 1439 SDValue &Index, 1440 SelectionDAG &DAG) const { 1441 // Check to see if we can easily represent this as an [r+r] address. This 1442 // will fail if it thinks that the address is more profitably represented as 1443 // reg+imm, e.g. where imm = 0. 1444 if (SelectAddressRegReg(N, Base, Index, DAG)) 1445 return true; 1446 1447 // If the operand is an addition, always emit this as [r+r], since this is 1448 // better (for code size, and execution, as the memop does the add for free) 1449 // than emitting an explicit add. 1450 if (N.getOpcode() == ISD::ADD) { 1451 Base = N.getOperand(0); 1452 Index = N.getOperand(1); 1453 return true; 1454 } 1455 1456 // Otherwise, do it the hard way, using R0 as the base register. 1457 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, 1458 N.getValueType()); 1459 Index = N; 1460 return true; 1461 } 1462 1463 /// getPreIndexedAddressParts - returns true by value, base pointer and 1464 /// offset pointer and addressing mode by reference if the node's address 1465 /// can be legally represented as pre-indexed load / store address. 1466 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 1467 SDValue &Offset, 1468 ISD::MemIndexedMode &AM, 1469 SelectionDAG &DAG) const { 1470 if (DisablePPCPreinc) return false; 1471 1472 bool isLoad = true; 1473 SDValue Ptr; 1474 EVT VT; 1475 unsigned Alignment; 1476 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1477 Ptr = LD->getBasePtr(); 1478 VT = LD->getMemoryVT(); 1479 Alignment = LD->getAlignment(); 1480 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 1481 Ptr = ST->getBasePtr(); 1482 VT = ST->getMemoryVT(); 1483 Alignment = ST->getAlignment(); 1484 isLoad = false; 1485 } else 1486 return false; 1487 1488 // PowerPC doesn't have preinc load/store instructions for vectors. 1489 if (VT.isVector()) 1490 return false; 1491 1492 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) { 1493 1494 // Common code will reject creating a pre-inc form if the base pointer 1495 // is a frame index, or if N is a store and the base pointer is either 1496 // the same as or a predecessor of the value being stored. Check for 1497 // those situations here, and try with swapped Base/Offset instead. 1498 bool Swap = false; 1499 1500 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base)) 1501 Swap = true; 1502 else if (!isLoad) { 1503 SDValue Val = cast<StoreSDNode>(N)->getValue(); 1504 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode())) 1505 Swap = true; 1506 } 1507 1508 if (Swap) 1509 std::swap(Base, Offset); 1510 1511 AM = ISD::PRE_INC; 1512 return true; 1513 } 1514 1515 // LDU/STU can only handle immediates that are a multiple of 4. 1516 if (VT != MVT::i64) { 1517 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false)) 1518 return false; 1519 } else { 1520 // LDU/STU need an address with at least 4-byte alignment. 1521 if (Alignment < 4) 1522 return false; 1523 1524 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true)) 1525 return false; 1526 } 1527 1528 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1529 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of 1530 // sext i32 to i64 when addr mode is r+i. 1531 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 && 1532 LD->getExtensionType() == ISD::SEXTLOAD && 1533 isa<ConstantSDNode>(Offset)) 1534 return false; 1535 } 1536 1537 AM = ISD::PRE_INC; 1538 return true; 1539 } 1540 1541 //===----------------------------------------------------------------------===// 1542 // LowerOperation implementation 1543 //===----------------------------------------------------------------------===// 1544 1545 /// GetLabelAccessInfo - Return true if we should reference labels using a 1546 /// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags. 1547 static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags, 1548 unsigned &LoOpFlags, 1549 const GlobalValue *GV = nullptr) { 1550 HiOpFlags = PPCII::MO_HA; 1551 LoOpFlags = PPCII::MO_LO; 1552 1553 // Don't use the pic base if not in PIC relocation model. 1554 bool isPIC = TM.getRelocationModel() == Reloc::PIC_; 1555 1556 if (isPIC) { 1557 HiOpFlags |= PPCII::MO_PIC_FLAG; 1558 LoOpFlags |= PPCII::MO_PIC_FLAG; 1559 } 1560 1561 // If this is a reference to a global value that requires a non-lazy-ptr, make 1562 // sure that instruction lowering adds it. 1563 if (GV && TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM)) { 1564 HiOpFlags |= PPCII::MO_NLP_FLAG; 1565 LoOpFlags |= PPCII::MO_NLP_FLAG; 1566 1567 if (GV->hasHiddenVisibility()) { 1568 HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 1569 LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 1570 } 1571 } 1572 1573 return isPIC; 1574 } 1575 1576 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, 1577 SelectionDAG &DAG) { 1578 EVT PtrVT = HiPart.getValueType(); 1579 SDValue Zero = DAG.getConstant(0, PtrVT); 1580 SDLoc DL(HiPart); 1581 1582 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero); 1583 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero); 1584 1585 // With PIC, the first instruction is actually "GR+hi(&G)". 1586 if (isPIC) 1587 Hi = DAG.getNode(ISD::ADD, DL, PtrVT, 1588 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi); 1589 1590 // Generate non-pic code that has direct accesses to the constant pool. 1591 // The address of the global is just (hi(&g)+lo(&g)). 1592 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo); 1593 } 1594 1595 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, 1596 SelectionDAG &DAG) const { 1597 EVT PtrVT = Op.getValueType(); 1598 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 1599 const Constant *C = CP->getConstVal(); 1600 1601 // 64-bit SVR4 ABI code is always position-independent. 1602 // The actual address of the GlobalValue is stored in the TOC. 1603 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 1604 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); 1605 return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA, 1606 DAG.getRegister(PPC::X2, MVT::i64)); 1607 } 1608 1609 unsigned MOHiFlag, MOLoFlag; 1610 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); 1611 1612 if (isPIC && Subtarget.isSVR4ABI()) { 1613 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 1614 PPCII::MO_PIC_FLAG); 1615 SDLoc DL(CP); 1616 return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA, 1617 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT)); 1618 } 1619 1620 SDValue CPIHi = 1621 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag); 1622 SDValue CPILo = 1623 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag); 1624 return LowerLabelRef(CPIHi, CPILo, isPIC, DAG); 1625 } 1626 1627 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { 1628 EVT PtrVT = Op.getValueType(); 1629 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 1630 1631 // 64-bit SVR4 ABI code is always position-independent. 1632 // The actual address of the GlobalValue is stored in the TOC. 1633 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 1634 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 1635 return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA, 1636 DAG.getRegister(PPC::X2, MVT::i64)); 1637 } 1638 1639 unsigned MOHiFlag, MOLoFlag; 1640 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); 1641 1642 if (isPIC && Subtarget.isSVR4ABI()) { 1643 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, 1644 PPCII::MO_PIC_FLAG); 1645 SDLoc DL(GA); 1646 return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), PtrVT, GA, 1647 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT)); 1648 } 1649 1650 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); 1651 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag); 1652 return LowerLabelRef(JTIHi, JTILo, isPIC, DAG); 1653 } 1654 1655 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, 1656 SelectionDAG &DAG) const { 1657 EVT PtrVT = Op.getValueType(); 1658 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op); 1659 const BlockAddress *BA = BASDN->getBlockAddress(); 1660 1661 // 64-bit SVR4 ABI code is always position-independent. 1662 // The actual BlockAddress is stored in the TOC. 1663 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 1664 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()); 1665 return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(BASDN), MVT::i64, GA, 1666 DAG.getRegister(PPC::X2, MVT::i64)); 1667 } 1668 1669 unsigned MOHiFlag, MOLoFlag; 1670 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); 1671 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag); 1672 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag); 1673 return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG); 1674 } 1675 1676 // Generate a call to __tls_get_addr for the given GOT entry Op. 1677 std::pair<SDValue,SDValue> 1678 PPCTargetLowering::lowerTLSCall(SDValue Op, SDLoc dl, 1679 SelectionDAG &DAG) const { 1680 1681 Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); 1682 TargetLowering::ArgListTy Args; 1683 TargetLowering::ArgListEntry Entry; 1684 Entry.Node = Op; 1685 Entry.Ty = IntPtrTy; 1686 Args.push_back(Entry); 1687 1688 TargetLowering::CallLoweringInfo CLI(DAG); 1689 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) 1690 .setCallee(CallingConv::C, IntPtrTy, 1691 DAG.getTargetExternalSymbol("__tls_get_addr", getPointerTy()), 1692 std::move(Args), 0); 1693 1694 return LowerCallTo(CLI); 1695 } 1696 1697 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, 1698 SelectionDAG &DAG) const { 1699 1700 // FIXME: TLS addresses currently use medium model code sequences, 1701 // which is the most useful form. Eventually support for small and 1702 // large models could be added if users need it, at the cost of 1703 // additional complexity. 1704 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 1705 SDLoc dl(GA); 1706 const GlobalValue *GV = GA->getGlobal(); 1707 EVT PtrVT = getPointerTy(); 1708 bool is64bit = Subtarget.isPPC64(); 1709 const Module *M = DAG.getMachineFunction().getFunction()->getParent(); 1710 PICLevel::Level picLevel = M->getPICLevel(); 1711 1712 TLSModel::Model Model = getTargetMachine().getTLSModel(GV); 1713 1714 if (Model == TLSModel::LocalExec) { 1715 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1716 PPCII::MO_TPREL_HA); 1717 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1718 PPCII::MO_TPREL_LO); 1719 SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, 1720 is64bit ? MVT::i64 : MVT::i32); 1721 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg); 1722 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); 1723 } 1724 1725 if (Model == TLSModel::InitialExec) { 1726 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 1727 SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1728 PPCII::MO_TLS); 1729 SDValue GOTPtr; 1730 if (is64bit) { 1731 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 1732 GOTPtr = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, 1733 PtrVT, GOTReg, TGA); 1734 } else 1735 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT); 1736 SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, 1737 PtrVT, TGA, GOTPtr); 1738 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS); 1739 } 1740 1741 if (Model == TLSModel::GeneralDynamic) { 1742 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1743 PPCII::MO_TLSGD); 1744 SDValue GOTPtr; 1745 if (is64bit) { 1746 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 1747 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT, 1748 GOTReg, TGA); 1749 } else { 1750 if (picLevel == PICLevel::Small) 1751 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); 1752 else 1753 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); 1754 } 1755 SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT, 1756 GOTPtr, TGA); 1757 std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG); 1758 return CallResult.first; 1759 } 1760 1761 if (Model == TLSModel::LocalDynamic) { 1762 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1763 PPCII::MO_TLSLD); 1764 SDValue GOTPtr; 1765 if (is64bit) { 1766 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 1767 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT, 1768 GOTReg, TGA); 1769 } else { 1770 if (picLevel == PICLevel::Small) 1771 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT); 1772 else 1773 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT); 1774 } 1775 SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT, 1776 GOTPtr, TGA); 1777 std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG); 1778 SDValue TLSAddr = CallResult.first; 1779 SDValue Chain = CallResult.second; 1780 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT, 1781 Chain, TLSAddr, TGA); 1782 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); 1783 } 1784 1785 llvm_unreachable("Unknown TLS model!"); 1786 } 1787 1788 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, 1789 SelectionDAG &DAG) const { 1790 EVT PtrVT = Op.getValueType(); 1791 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 1792 SDLoc DL(GSDN); 1793 const GlobalValue *GV = GSDN->getGlobal(); 1794 1795 // 64-bit SVR4 ABI code is always position-independent. 1796 // The actual address of the GlobalValue is stored in the TOC. 1797 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { 1798 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); 1799 return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA, 1800 DAG.getRegister(PPC::X2, MVT::i64)); 1801 } 1802 1803 unsigned MOHiFlag, MOLoFlag; 1804 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV); 1805 1806 if (isPIC && Subtarget.isSVR4ABI()) { 1807 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 1808 GSDN->getOffset(), 1809 PPCII::MO_PIC_FLAG); 1810 return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i32, GA, 1811 DAG.getNode(PPCISD::GlobalBaseReg, DL, MVT::i32)); 1812 } 1813 1814 SDValue GAHi = 1815 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag); 1816 SDValue GALo = 1817 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag); 1818 1819 SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG); 1820 1821 // If the global reference is actually to a non-lazy-pointer, we have to do an 1822 // extra load to get the address of the global. 1823 if (MOHiFlag & PPCII::MO_NLP_FLAG) 1824 Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(), 1825 false, false, false, 0); 1826 return Ptr; 1827 } 1828 1829 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { 1830 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 1831 SDLoc dl(Op); 1832 1833 if (Op.getValueType() == MVT::v2i64) { 1834 // When the operands themselves are v2i64 values, we need to do something 1835 // special because VSX has no underlying comparison operations for these. 1836 if (Op.getOperand(0).getValueType() == MVT::v2i64) { 1837 // Equality can be handled by casting to the legal type for Altivec 1838 // comparisons, everything else needs to be expanded. 1839 if (CC == ISD::SETEQ || CC == ISD::SETNE) { 1840 return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, 1841 DAG.getSetCC(dl, MVT::v4i32, 1842 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)), 1843 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)), 1844 CC)); 1845 } 1846 1847 return SDValue(); 1848 } 1849 1850 // We handle most of these in the usual way. 1851 return Op; 1852 } 1853 1854 // If we're comparing for equality to zero, expose the fact that this is 1855 // implented as a ctlz/srl pair on ppc, so that the dag combiner can 1856 // fold the new nodes. 1857 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 1858 if (C->isNullValue() && CC == ISD::SETEQ) { 1859 EVT VT = Op.getOperand(0).getValueType(); 1860 SDValue Zext = Op.getOperand(0); 1861 if (VT.bitsLT(MVT::i32)) { 1862 VT = MVT::i32; 1863 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0)); 1864 } 1865 unsigned Log2b = Log2_32(VT.getSizeInBits()); 1866 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext); 1867 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz, 1868 DAG.getConstant(Log2b, MVT::i32)); 1869 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc); 1870 } 1871 // Leave comparisons against 0 and -1 alone for now, since they're usually 1872 // optimized. FIXME: revisit this when we can custom lower all setcc 1873 // optimizations. 1874 if (C->isAllOnesValue() || C->isNullValue()) 1875 return SDValue(); 1876 } 1877 1878 // If we have an integer seteq/setne, turn it into a compare against zero 1879 // by xor'ing the rhs with the lhs, which is faster than setting a 1880 // condition register, reading it back out, and masking the correct bit. The 1881 // normal approach here uses sub to do this instead of xor. Using xor exposes 1882 // the result to other bit-twiddling opportunities. 1883 EVT LHSVT = Op.getOperand(0).getValueType(); 1884 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { 1885 EVT VT = Op.getValueType(); 1886 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0), 1887 Op.getOperand(1)); 1888 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC); 1889 } 1890 return SDValue(); 1891 } 1892 1893 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, 1894 const PPCSubtarget &Subtarget) const { 1895 SDNode *Node = Op.getNode(); 1896 EVT VT = Node->getValueType(0); 1897 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1898 SDValue InChain = Node->getOperand(0); 1899 SDValue VAListPtr = Node->getOperand(1); 1900 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); 1901 SDLoc dl(Node); 1902 1903 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only"); 1904 1905 // gpr_index 1906 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 1907 VAListPtr, MachinePointerInfo(SV), MVT::i8, 1908 false, false, false, 0); 1909 InChain = GprIndex.getValue(1); 1910 1911 if (VT == MVT::i64) { 1912 // Check if GprIndex is even 1913 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex, 1914 DAG.getConstant(1, MVT::i32)); 1915 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd, 1916 DAG.getConstant(0, MVT::i32), ISD::SETNE); 1917 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex, 1918 DAG.getConstant(1, MVT::i32)); 1919 // Align GprIndex to be even if it isn't 1920 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne, 1921 GprIndex); 1922 } 1923 1924 // fpr index is 1 byte after gpr 1925 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 1926 DAG.getConstant(1, MVT::i32)); 1927 1928 // fpr 1929 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 1930 FprPtr, MachinePointerInfo(SV), MVT::i8, 1931 false, false, false, 0); 1932 InChain = FprIndex.getValue(1); 1933 1934 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 1935 DAG.getConstant(8, MVT::i32)); 1936 1937 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 1938 DAG.getConstant(4, MVT::i32)); 1939 1940 // areas 1941 SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, 1942 MachinePointerInfo(), false, false, 1943 false, 0); 1944 InChain = OverflowArea.getValue(1); 1945 1946 SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, 1947 MachinePointerInfo(), false, false, 1948 false, 0); 1949 InChain = RegSaveArea.getValue(1); 1950 1951 // select overflow_area if index > 8 1952 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, 1953 DAG.getConstant(8, MVT::i32), ISD::SETLT); 1954 1955 // adjustment constant gpr_index * 4/8 1956 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32, 1957 VT.isInteger() ? GprIndex : FprIndex, 1958 DAG.getConstant(VT.isInteger() ? 4 : 8, 1959 MVT::i32)); 1960 1961 // OurReg = RegSaveArea + RegConstant 1962 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea, 1963 RegConstant); 1964 1965 // Floating types are 32 bytes into RegSaveArea 1966 if (VT.isFloatingPoint()) 1967 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg, 1968 DAG.getConstant(32, MVT::i32)); 1969 1970 // increase {f,g}pr_index by 1 (or 2 if VT is i64) 1971 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32, 1972 VT.isInteger() ? GprIndex : FprIndex, 1973 DAG.getConstant(VT == MVT::i64 ? 2 : 1, 1974 MVT::i32)); 1975 1976 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1, 1977 VT.isInteger() ? VAListPtr : FprPtr, 1978 MachinePointerInfo(SV), 1979 MVT::i8, false, false, 0); 1980 1981 // determine if we should load from reg_save_area or overflow_area 1982 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea); 1983 1984 // increase overflow_area by 4/8 if gpr/fpr > 8 1985 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea, 1986 DAG.getConstant(VT.isInteger() ? 4 : 8, 1987 MVT::i32)); 1988 1989 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea, 1990 OverflowAreaPlusN); 1991 1992 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, 1993 OverflowAreaPtr, 1994 MachinePointerInfo(), 1995 MVT::i32, false, false, 0); 1996 1997 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), 1998 false, false, false, 0); 1999 } 2000 2001 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG, 2002 const PPCSubtarget &Subtarget) const { 2003 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only"); 2004 2005 // We have to copy the entire va_list struct: 2006 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte 2007 return DAG.getMemcpy(Op.getOperand(0), Op, 2008 Op.getOperand(1), Op.getOperand(2), 2009 DAG.getConstant(12, MVT::i32), 8, false, true, 2010 MachinePointerInfo(), MachinePointerInfo()); 2011 } 2012 2013 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, 2014 SelectionDAG &DAG) const { 2015 return Op.getOperand(0); 2016 } 2017 2018 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, 2019 SelectionDAG &DAG) const { 2020 SDValue Chain = Op.getOperand(0); 2021 SDValue Trmp = Op.getOperand(1); // trampoline 2022 SDValue FPtr = Op.getOperand(2); // nested function 2023 SDValue Nest = Op.getOperand(3); // 'nest' parameter value 2024 SDLoc dl(Op); 2025 2026 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2027 bool isPPC64 = (PtrVT == MVT::i64); 2028 Type *IntPtrTy = 2029 DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType( 2030 *DAG.getContext()); 2031 2032 TargetLowering::ArgListTy Args; 2033 TargetLowering::ArgListEntry Entry; 2034 2035 Entry.Ty = IntPtrTy; 2036 Entry.Node = Trmp; Args.push_back(Entry); 2037 2038 // TrampSize == (isPPC64 ? 48 : 40); 2039 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, 2040 isPPC64 ? MVT::i64 : MVT::i32); 2041 Args.push_back(Entry); 2042 2043 Entry.Node = FPtr; Args.push_back(Entry); 2044 Entry.Node = Nest; Args.push_back(Entry); 2045 2046 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) 2047 TargetLowering::CallLoweringInfo CLI(DAG); 2048 CLI.setDebugLoc(dl).setChain(Chain) 2049 .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), 2050 DAG.getExternalSymbol("__trampoline_setup", PtrVT), 2051 std::move(Args), 0); 2052 2053 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); 2054 return CallResult.second; 2055 } 2056 2057 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, 2058 const PPCSubtarget &Subtarget) const { 2059 MachineFunction &MF = DAG.getMachineFunction(); 2060 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2061 2062 SDLoc dl(Op); 2063 2064 if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) { 2065 // vastart just stores the address of the VarArgsFrameIndex slot into the 2066 // memory location argument. 2067 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2068 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2069 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2070 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 2071 MachinePointerInfo(SV), 2072 false, false, 0); 2073 } 2074 2075 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct. 2076 // We suppose the given va_list is already allocated. 2077 // 2078 // typedef struct { 2079 // char gpr; /* index into the array of 8 GPRs 2080 // * stored in the register save area 2081 // * gpr=0 corresponds to r3, 2082 // * gpr=1 to r4, etc. 2083 // */ 2084 // char fpr; /* index into the array of 8 FPRs 2085 // * stored in the register save area 2086 // * fpr=0 corresponds to f1, 2087 // * fpr=1 to f2, etc. 2088 // */ 2089 // char *overflow_arg_area; 2090 // /* location on stack that holds 2091 // * the next overflow argument 2092 // */ 2093 // char *reg_save_area; 2094 // /* where r3:r10 and f1:f8 (if saved) 2095 // * are stored 2096 // */ 2097 // } va_list[1]; 2098 2099 2100 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32); 2101 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32); 2102 2103 2104 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2105 2106 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(), 2107 PtrVT); 2108 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 2109 PtrVT); 2110 2111 uint64_t FrameOffset = PtrVT.getSizeInBits()/8; 2112 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT); 2113 2114 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1; 2115 SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT); 2116 2117 uint64_t FPROffset = 1; 2118 SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT); 2119 2120 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2121 2122 // Store first byte : number of int regs 2123 SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, 2124 Op.getOperand(1), 2125 MachinePointerInfo(SV), 2126 MVT::i8, false, false, 0); 2127 uint64_t nextOffset = FPROffset; 2128 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1), 2129 ConstFPROffset); 2130 2131 // Store second byte : number of float regs 2132 SDValue secondStore = 2133 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, 2134 MachinePointerInfo(SV, nextOffset), MVT::i8, 2135 false, false, 0); 2136 nextOffset += StackOffset; 2137 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset); 2138 2139 // Store second word : arguments given on stack 2140 SDValue thirdStore = 2141 DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, 2142 MachinePointerInfo(SV, nextOffset), 2143 false, false, 0); 2144 nextOffset += FrameOffset; 2145 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset); 2146 2147 // Store third word : arguments given in registers 2148 return DAG.getStore(thirdStore, dl, FR, nextPtr, 2149 MachinePointerInfo(SV, nextOffset), 2150 false, false, 0); 2151 2152 } 2153 2154 #include "PPCGenCallingConv.inc" 2155 2156 // Function whose sole purpose is to kill compiler warnings 2157 // stemming from unused functions included from PPCGenCallingConv.inc. 2158 CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const { 2159 return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS; 2160 } 2161 2162 bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 2163 CCValAssign::LocInfo &LocInfo, 2164 ISD::ArgFlagsTy &ArgFlags, 2165 CCState &State) { 2166 return true; 2167 } 2168 2169 bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, 2170 MVT &LocVT, 2171 CCValAssign::LocInfo &LocInfo, 2172 ISD::ArgFlagsTy &ArgFlags, 2173 CCState &State) { 2174 static const MCPhysReg ArgRegs[] = { 2175 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 2176 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 2177 }; 2178 const unsigned NumArgRegs = array_lengthof(ArgRegs); 2179 2180 unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs); 2181 2182 // Skip one register if the first unallocated register has an even register 2183 // number and there are still argument registers available which have not been 2184 // allocated yet. RegNum is actually an index into ArgRegs, which means we 2185 // need to skip a register if RegNum is odd. 2186 if (RegNum != NumArgRegs && RegNum % 2 == 1) { 2187 State.AllocateReg(ArgRegs[RegNum]); 2188 } 2189 2190 // Always return false here, as this function only makes sure that the first 2191 // unallocated register has an odd register number and does not actually 2192 // allocate a register for the current argument. 2193 return false; 2194 } 2195 2196 bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, 2197 MVT &LocVT, 2198 CCValAssign::LocInfo &LocInfo, 2199 ISD::ArgFlagsTy &ArgFlags, 2200 CCState &State) { 2201 static const MCPhysReg ArgRegs[] = { 2202 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 2203 PPC::F8 2204 }; 2205 2206 const unsigned NumArgRegs = array_lengthof(ArgRegs); 2207 2208 unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs); 2209 2210 // If there is only one Floating-point register left we need to put both f64 2211 // values of a split ppc_fp128 value on the stack. 2212 if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) { 2213 State.AllocateReg(ArgRegs[RegNum]); 2214 } 2215 2216 // Always return false here, as this function only makes sure that the two f64 2217 // values a ppc_fp128 value is split into are both passed in registers or both 2218 // passed on the stack and does not actually allocate a register for the 2219 // current argument. 2220 return false; 2221 } 2222 2223 /// GetFPR - Get the set of FP registers that should be allocated for arguments, 2224 /// on Darwin. 2225 static const MCPhysReg *GetFPR() { 2226 static const MCPhysReg FPR[] = { 2227 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 2228 PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 2229 }; 2230 2231 return FPR; 2232 } 2233 2234 /// CalculateStackSlotSize - Calculates the size reserved for this argument on 2235 /// the stack. 2236 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, 2237 unsigned PtrByteSize) { 2238 unsigned ArgSize = ArgVT.getStoreSize(); 2239 if (Flags.isByVal()) 2240 ArgSize = Flags.getByValSize(); 2241 2242 // Round up to multiples of the pointer size, except for array members, 2243 // which are always packed. 2244 if (!Flags.isInConsecutiveRegs()) 2245 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2246 2247 return ArgSize; 2248 } 2249 2250 /// CalculateStackSlotAlignment - Calculates the alignment of this argument 2251 /// on the stack. 2252 static unsigned CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, 2253 ISD::ArgFlagsTy Flags, 2254 unsigned PtrByteSize) { 2255 unsigned Align = PtrByteSize; 2256 2257 // Altivec parameters are padded to a 16 byte boundary. 2258 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 2259 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 2260 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) 2261 Align = 16; 2262 2263 // ByVal parameters are aligned as requested. 2264 if (Flags.isByVal()) { 2265 unsigned BVAlign = Flags.getByValAlign(); 2266 if (BVAlign > PtrByteSize) { 2267 if (BVAlign % PtrByteSize != 0) 2268 llvm_unreachable( 2269 "ByVal alignment is not a multiple of the pointer size"); 2270 2271 Align = BVAlign; 2272 } 2273 } 2274 2275 // Array members are always packed to their original alignment. 2276 if (Flags.isInConsecutiveRegs()) { 2277 // If the array member was split into multiple registers, the first 2278 // needs to be aligned to the size of the full type. (Except for 2279 // ppcf128, which is only aligned as its f64 components.) 2280 if (Flags.isSplit() && OrigVT != MVT::ppcf128) 2281 Align = OrigVT.getStoreSize(); 2282 else 2283 Align = ArgVT.getStoreSize(); 2284 } 2285 2286 return Align; 2287 } 2288 2289 /// CalculateStackSlotUsed - Return whether this argument will use its 2290 /// stack slot (instead of being passed in registers). ArgOffset, 2291 /// AvailableFPRs, and AvailableVRs must hold the current argument 2292 /// position, and will be updated to account for this argument. 2293 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, 2294 ISD::ArgFlagsTy Flags, 2295 unsigned PtrByteSize, 2296 unsigned LinkageSize, 2297 unsigned ParamAreaSize, 2298 unsigned &ArgOffset, 2299 unsigned &AvailableFPRs, 2300 unsigned &AvailableVRs) { 2301 bool UseMemory = false; 2302 2303 // Respect alignment of argument on the stack. 2304 unsigned Align = 2305 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 2306 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 2307 // If there's no space left in the argument save area, we must 2308 // use memory (this check also catches zero-sized arguments). 2309 if (ArgOffset >= LinkageSize + ParamAreaSize) 2310 UseMemory = true; 2311 2312 // Allocate argument on the stack. 2313 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 2314 if (Flags.isInConsecutiveRegsLast()) 2315 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2316 // If we overran the argument save area, we must use memory 2317 // (this check catches arguments passed partially in memory) 2318 if (ArgOffset > LinkageSize + ParamAreaSize) 2319 UseMemory = true; 2320 2321 // However, if the argument is actually passed in an FPR or a VR, 2322 // we don't use memory after all. 2323 if (!Flags.isByVal()) { 2324 if (ArgVT == MVT::f32 || ArgVT == MVT::f64) 2325 if (AvailableFPRs > 0) { 2326 --AvailableFPRs; 2327 return false; 2328 } 2329 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 2330 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 2331 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) 2332 if (AvailableVRs > 0) { 2333 --AvailableVRs; 2334 return false; 2335 } 2336 } 2337 2338 return UseMemory; 2339 } 2340 2341 /// EnsureStackAlignment - Round stack frame size up from NumBytes to 2342 /// ensure minimum alignment required for target. 2343 static unsigned EnsureStackAlignment(const TargetMachine &Target, 2344 unsigned NumBytes) { 2345 unsigned TargetAlign = 2346 Target.getSubtargetImpl()->getFrameLowering()->getStackAlignment(); 2347 unsigned AlignMask = TargetAlign - 1; 2348 NumBytes = (NumBytes + AlignMask) & ~AlignMask; 2349 return NumBytes; 2350 } 2351 2352 SDValue 2353 PPCTargetLowering::LowerFormalArguments(SDValue Chain, 2354 CallingConv::ID CallConv, bool isVarArg, 2355 const SmallVectorImpl<ISD::InputArg> 2356 &Ins, 2357 SDLoc dl, SelectionDAG &DAG, 2358 SmallVectorImpl<SDValue> &InVals) 2359 const { 2360 if (Subtarget.isSVR4ABI()) { 2361 if (Subtarget.isPPC64()) 2362 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, 2363 dl, DAG, InVals); 2364 else 2365 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, 2366 dl, DAG, InVals); 2367 } else { 2368 return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, 2369 dl, DAG, InVals); 2370 } 2371 } 2372 2373 SDValue 2374 PPCTargetLowering::LowerFormalArguments_32SVR4( 2375 SDValue Chain, 2376 CallingConv::ID CallConv, bool isVarArg, 2377 const SmallVectorImpl<ISD::InputArg> 2378 &Ins, 2379 SDLoc dl, SelectionDAG &DAG, 2380 SmallVectorImpl<SDValue> &InVals) const { 2381 2382 // 32-bit SVR4 ABI Stack Frame Layout: 2383 // +-----------------------------------+ 2384 // +--> | Back chain | 2385 // | +-----------------------------------+ 2386 // | | Floating-point register save area | 2387 // | +-----------------------------------+ 2388 // | | General register save area | 2389 // | +-----------------------------------+ 2390 // | | CR save word | 2391 // | +-----------------------------------+ 2392 // | | VRSAVE save word | 2393 // | +-----------------------------------+ 2394 // | | Alignment padding | 2395 // | +-----------------------------------+ 2396 // | | Vector register save area | 2397 // | +-----------------------------------+ 2398 // | | Local variable space | 2399 // | +-----------------------------------+ 2400 // | | Parameter list area | 2401 // | +-----------------------------------+ 2402 // | | LR save word | 2403 // | +-----------------------------------+ 2404 // SP--> +--- | Back chain | 2405 // +-----------------------------------+ 2406 // 2407 // Specifications: 2408 // System V Application Binary Interface PowerPC Processor Supplement 2409 // AltiVec Technology Programming Interface Manual 2410 2411 MachineFunction &MF = DAG.getMachineFunction(); 2412 MachineFrameInfo *MFI = MF.getFrameInfo(); 2413 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2414 2415 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2416 // Potential tail calls could cause overwriting of argument stack slots. 2417 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 2418 (CallConv == CallingConv::Fast)); 2419 unsigned PtrByteSize = 4; 2420 2421 // Assign locations to all of the incoming arguments. 2422 SmallVector<CCValAssign, 16> ArgLocs; 2423 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, 2424 *DAG.getContext()); 2425 2426 // Reserve space for the linkage area on the stack. 2427 unsigned LinkageSize = PPCFrameLowering::getLinkageSize(false, false, false); 2428 CCInfo.AllocateStack(LinkageSize, PtrByteSize); 2429 2430 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); 2431 2432 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2433 CCValAssign &VA = ArgLocs[i]; 2434 2435 // Arguments stored in registers. 2436 if (VA.isRegLoc()) { 2437 const TargetRegisterClass *RC; 2438 EVT ValVT = VA.getValVT(); 2439 2440 switch (ValVT.getSimpleVT().SimpleTy) { 2441 default: 2442 llvm_unreachable("ValVT not supported by formal arguments Lowering"); 2443 case MVT::i1: 2444 case MVT::i32: 2445 RC = &PPC::GPRCRegClass; 2446 break; 2447 case MVT::f32: 2448 RC = &PPC::F4RCRegClass; 2449 break; 2450 case MVT::f64: 2451 if (Subtarget.hasVSX()) 2452 RC = &PPC::VSFRCRegClass; 2453 else 2454 RC = &PPC::F8RCRegClass; 2455 break; 2456 case MVT::v16i8: 2457 case MVT::v8i16: 2458 case MVT::v4i32: 2459 case MVT::v4f32: 2460 RC = &PPC::VRRCRegClass; 2461 break; 2462 case MVT::v2f64: 2463 case MVT::v2i64: 2464 RC = &PPC::VSHRCRegClass; 2465 break; 2466 } 2467 2468 // Transform the arguments stored in physical registers into virtual ones. 2469 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2470 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, 2471 ValVT == MVT::i1 ? MVT::i32 : ValVT); 2472 2473 if (ValVT == MVT::i1) 2474 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue); 2475 2476 InVals.push_back(ArgValue); 2477 } else { 2478 // Argument stored in memory. 2479 assert(VA.isMemLoc()); 2480 2481 unsigned ArgSize = VA.getLocVT().getStoreSize(); 2482 int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), 2483 isImmutable); 2484 2485 // Create load nodes to retrieve arguments from the stack. 2486 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2487 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 2488 MachinePointerInfo(), 2489 false, false, false, 0)); 2490 } 2491 } 2492 2493 // Assign locations to all of the incoming aggregate by value arguments. 2494 // Aggregates passed by value are stored in the local variable space of the 2495 // caller's stack frame, right above the parameter list area. 2496 SmallVector<CCValAssign, 16> ByValArgLocs; 2497 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2498 ByValArgLocs, *DAG.getContext()); 2499 2500 // Reserve stack space for the allocations in CCInfo. 2501 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 2502 2503 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal); 2504 2505 // Area that is at least reserved in the caller of this function. 2506 unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); 2507 MinReservedArea = std::max(MinReservedArea, LinkageSize); 2508 2509 // Set the size that is at least reserved in caller of this function. Tail 2510 // call optimized function's reserved stack space needs to be aligned so that 2511 // taking the difference between two stack areas will result in an aligned 2512 // stack. 2513 MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea); 2514 FuncInfo->setMinReservedArea(MinReservedArea); 2515 2516 SmallVector<SDValue, 8> MemOps; 2517 2518 // If the function takes variable number of arguments, make a frame index for 2519 // the start of the first vararg value... for expansion of llvm.va_start. 2520 if (isVarArg) { 2521 static const MCPhysReg GPArgRegs[] = { 2522 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 2523 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 2524 }; 2525 const unsigned NumGPArgRegs = array_lengthof(GPArgRegs); 2526 2527 static const MCPhysReg FPArgRegs[] = { 2528 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 2529 PPC::F8 2530 }; 2531 unsigned NumFPArgRegs = array_lengthof(FPArgRegs); 2532 if (DisablePPCFloatInVariadic) 2533 NumFPArgRegs = 0; 2534 2535 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs, 2536 NumGPArgRegs)); 2537 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs, 2538 NumFPArgRegs)); 2539 2540 // Make room for NumGPArgRegs and NumFPArgRegs. 2541 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 + 2542 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8; 2543 2544 FuncInfo->setVarArgsStackOffset( 2545 MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, 2546 CCInfo.getNextStackOffset(), true)); 2547 2548 FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false)); 2549 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2550 2551 // The fixed integer arguments of a variadic function are stored to the 2552 // VarArgsFrameIndex on the stack so that they may be loaded by deferencing 2553 // the result of va_next. 2554 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) { 2555 // Get an existing live-in vreg, or add a new one. 2556 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]); 2557 if (!VReg) 2558 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); 2559 2560 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2561 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2562 MachinePointerInfo(), false, false, 0); 2563 MemOps.push_back(Store); 2564 // Increment the address by four for the next argument to store 2565 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); 2566 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 2567 } 2568 2569 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6 2570 // is set. 2571 // The double arguments are stored to the VarArgsFrameIndex 2572 // on the stack. 2573 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) { 2574 // Get an existing live-in vreg, or add a new one. 2575 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]); 2576 if (!VReg) 2577 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); 2578 2579 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); 2580 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2581 MachinePointerInfo(), false, false, 0); 2582 MemOps.push_back(Store); 2583 // Increment the address by eight for the next argument to store 2584 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, 2585 PtrVT); 2586 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 2587 } 2588 } 2589 2590 if (!MemOps.empty()) 2591 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 2592 2593 return Chain; 2594 } 2595 2596 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 2597 // value to MVT::i64 and then truncate to the correct register size. 2598 SDValue 2599 PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, 2600 SelectionDAG &DAG, SDValue ArgVal, 2601 SDLoc dl) const { 2602 if (Flags.isSExt()) 2603 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal, 2604 DAG.getValueType(ObjectVT)); 2605 else if (Flags.isZExt()) 2606 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, 2607 DAG.getValueType(ObjectVT)); 2608 2609 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal); 2610 } 2611 2612 SDValue 2613 PPCTargetLowering::LowerFormalArguments_64SVR4( 2614 SDValue Chain, 2615 CallingConv::ID CallConv, bool isVarArg, 2616 const SmallVectorImpl<ISD::InputArg> 2617 &Ins, 2618 SDLoc dl, SelectionDAG &DAG, 2619 SmallVectorImpl<SDValue> &InVals) const { 2620 // TODO: add description of PPC stack frame format, or at least some docs. 2621 // 2622 bool isELFv2ABI = Subtarget.isELFv2ABI(); 2623 bool isLittleEndian = Subtarget.isLittleEndian(); 2624 MachineFunction &MF = DAG.getMachineFunction(); 2625 MachineFrameInfo *MFI = MF.getFrameInfo(); 2626 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2627 2628 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2629 // Potential tail calls could cause overwriting of argument stack slots. 2630 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 2631 (CallConv == CallingConv::Fast)); 2632 unsigned PtrByteSize = 8; 2633 2634 unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false, 2635 isELFv2ABI); 2636 2637 static const MCPhysReg GPR[] = { 2638 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 2639 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 2640 }; 2641 2642 static const MCPhysReg *FPR = GetFPR(); 2643 2644 static const MCPhysReg VR[] = { 2645 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 2646 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 2647 }; 2648 static const MCPhysReg VSRH[] = { 2649 PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, 2650 PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 2651 }; 2652 2653 const unsigned Num_GPR_Regs = array_lengthof(GPR); 2654 const unsigned Num_FPR_Regs = 13; 2655 const unsigned Num_VR_Regs = array_lengthof(VR); 2656 2657 // Do a first pass over the arguments to determine whether the ABI 2658 // guarantees that our caller has allocated the parameter save area 2659 // on its stack frame. In the ELFv1 ABI, this is always the case; 2660 // in the ELFv2 ABI, it is true if this is a vararg function or if 2661 // any parameter is located in a stack slot. 2662 2663 bool HasParameterArea = !isELFv2ABI || isVarArg; 2664 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize; 2665 unsigned NumBytes = LinkageSize; 2666 unsigned AvailableFPRs = Num_FPR_Regs; 2667 unsigned AvailableVRs = Num_VR_Regs; 2668 for (unsigned i = 0, e = Ins.size(); i != e; ++i) 2669 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags, 2670 PtrByteSize, LinkageSize, ParamAreaSize, 2671 NumBytes, AvailableFPRs, AvailableVRs)) 2672 HasParameterArea = true; 2673 2674 // Add DAG nodes to load the arguments or copy them out of registers. On 2675 // entry to a function on PPC, the arguments start after the linkage area, 2676 // although the first ones are often in registers. 2677 2678 unsigned ArgOffset = LinkageSize; 2679 unsigned GPR_idx, FPR_idx = 0, VR_idx = 0; 2680 SmallVector<SDValue, 8> MemOps; 2681 Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); 2682 unsigned CurArgIdx = 0; 2683 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 2684 SDValue ArgVal; 2685 bool needsLoad = false; 2686 EVT ObjectVT = Ins[ArgNo].VT; 2687 EVT OrigVT = Ins[ArgNo].ArgVT; 2688 unsigned ObjSize = ObjectVT.getStoreSize(); 2689 unsigned ArgSize = ObjSize; 2690 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 2691 if (Ins[ArgNo].isOrigArg()) { 2692 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx); 2693 CurArgIdx = Ins[ArgNo].getOrigArgIndex(); 2694 } 2695 /* Respect alignment of argument on the stack. */ 2696 unsigned Align = 2697 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize); 2698 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 2699 unsigned CurArgOffset = ArgOffset; 2700 2701 /* Compute GPR index associated with argument offset. */ 2702 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; 2703 GPR_idx = std::min(GPR_idx, Num_GPR_Regs); 2704 2705 // FIXME the codegen can be much improved in some cases. 2706 // We do not have to keep everything in memory. 2707 if (Flags.isByVal()) { 2708 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"); 2709 2710 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 2711 ObjSize = Flags.getByValSize(); 2712 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2713 // Empty aggregate parameters do not take up registers. Examples: 2714 // struct { } a; 2715 // union { } b; 2716 // int c[0]; 2717 // etc. However, we have to provide a place-holder in InVals, so 2718 // pretend we have an 8-byte item at the current address for that 2719 // purpose. 2720 if (!ObjSize) { 2721 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); 2722 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2723 InVals.push_back(FIN); 2724 continue; 2725 } 2726 2727 // Create a stack object covering all stack doublewords occupied 2728 // by the argument. If the argument is (fully or partially) on 2729 // the stack, or if the argument is fully in registers but the 2730 // caller has allocated the parameter save anyway, we can refer 2731 // directly to the caller's stack frame. Otherwise, create a 2732 // local copy in our own frame. 2733 int FI; 2734 if (HasParameterArea || 2735 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize) 2736 FI = MFI->CreateFixedObject(ArgSize, ArgOffset, false, true); 2737 else 2738 FI = MFI->CreateStackObject(ArgSize, Align, false); 2739 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2740 2741 // Handle aggregates smaller than 8 bytes. 2742 if (ObjSize < PtrByteSize) { 2743 // The value of the object is its address, which differs from the 2744 // address of the enclosing doubleword on big-endian systems. 2745 SDValue Arg = FIN; 2746 if (!isLittleEndian) { 2747 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, PtrVT); 2748 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff); 2749 } 2750 InVals.push_back(Arg); 2751 2752 if (GPR_idx != Num_GPR_Regs) { 2753 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2754 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2755 SDValue Store; 2756 2757 if (ObjSize==1 || ObjSize==2 || ObjSize==4) { 2758 EVT ObjType = (ObjSize == 1 ? MVT::i8 : 2759 (ObjSize == 2 ? MVT::i16 : MVT::i32)); 2760 Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg, 2761 MachinePointerInfo(FuncArg), 2762 ObjType, false, false, 0); 2763 } else { 2764 // For sizes that don't fit a truncating store (3, 5, 6, 7), 2765 // store the whole register as-is to the parameter save area 2766 // slot. 2767 Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2768 MachinePointerInfo(FuncArg), 2769 false, false, 0); 2770 } 2771 2772 MemOps.push_back(Store); 2773 } 2774 // Whether we copied from a register or not, advance the offset 2775 // into the parameter save area by a full doubleword. 2776 ArgOffset += PtrByteSize; 2777 continue; 2778 } 2779 2780 // The value of the object is its address, which is the address of 2781 // its first stack doubleword. 2782 InVals.push_back(FIN); 2783 2784 // Store whatever pieces of the object are in registers to memory. 2785 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 2786 if (GPR_idx == Num_GPR_Regs) 2787 break; 2788 2789 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2790 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2791 SDValue Addr = FIN; 2792 if (j) { 2793 SDValue Off = DAG.getConstant(j, PtrVT); 2794 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off); 2795 } 2796 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr, 2797 MachinePointerInfo(FuncArg, j), 2798 false, false, 0); 2799 MemOps.push_back(Store); 2800 ++GPR_idx; 2801 } 2802 ArgOffset += ArgSize; 2803 continue; 2804 } 2805 2806 switch (ObjectVT.getSimpleVT().SimpleTy) { 2807 default: llvm_unreachable("Unhandled argument type!"); 2808 case MVT::i1: 2809 case MVT::i32: 2810 case MVT::i64: 2811 // These can be scalar arguments or elements of an integer array type 2812 // passed directly. Clang may use those instead of "byval" aggregate 2813 // types to avoid forcing arguments to memory unnecessarily. 2814 if (GPR_idx != Num_GPR_Regs) { 2815 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2816 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 2817 2818 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) 2819 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 2820 // value to MVT::i64 and then truncate to the correct register size. 2821 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 2822 } else { 2823 needsLoad = true; 2824 ArgSize = PtrByteSize; 2825 } 2826 ArgOffset += 8; 2827 break; 2828 2829 case MVT::f32: 2830 case MVT::f64: 2831 // These can be scalar arguments or elements of a float array type 2832 // passed directly. The latter are used to implement ELFv2 homogenous 2833 // float aggregates. 2834 if (FPR_idx != Num_FPR_Regs) { 2835 unsigned VReg; 2836 2837 if (ObjectVT == MVT::f32) 2838 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); 2839 else 2840 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX() ? 2841 &PPC::VSFRCRegClass : 2842 &PPC::F8RCRegClass); 2843 2844 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 2845 ++FPR_idx; 2846 } else if (GPR_idx != Num_GPR_Regs) { 2847 // This can only ever happen in the presence of f32 array types, 2848 // since otherwise we never run out of FPRs before running out 2849 // of GPRs. 2850 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2851 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 2852 2853 if (ObjectVT == MVT::f32) { 2854 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0)) 2855 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal, 2856 DAG.getConstant(32, MVT::i32)); 2857 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); 2858 } 2859 2860 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal); 2861 } else { 2862 needsLoad = true; 2863 } 2864 2865 // When passing an array of floats, the array occupies consecutive 2866 // space in the argument area; only round up to the next doubleword 2867 // at the end of the array. Otherwise, each float takes 8 bytes. 2868 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize; 2869 ArgOffset += ArgSize; 2870 if (Flags.isInConsecutiveRegsLast()) 2871 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2872 break; 2873 case MVT::v4f32: 2874 case MVT::v4i32: 2875 case MVT::v8i16: 2876 case MVT::v16i8: 2877 case MVT::v2f64: 2878 case MVT::v2i64: 2879 // These can be scalar arguments or elements of a vector array type 2880 // passed directly. The latter are used to implement ELFv2 homogenous 2881 // vector aggregates. 2882 if (VR_idx != Num_VR_Regs) { 2883 unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ? 2884 MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) : 2885 MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); 2886 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 2887 ++VR_idx; 2888 } else { 2889 needsLoad = true; 2890 } 2891 ArgOffset += 16; 2892 break; 2893 } 2894 2895 // We need to load the argument to a virtual register if we determined 2896 // above that we ran out of physical registers of the appropriate type. 2897 if (needsLoad) { 2898 if (ObjSize < ArgSize && !isLittleEndian) 2899 CurArgOffset += ArgSize - ObjSize; 2900 int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable); 2901 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2902 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), 2903 false, false, false, 0); 2904 } 2905 2906 InVals.push_back(ArgVal); 2907 } 2908 2909 // Area that is at least reserved in the caller of this function. 2910 unsigned MinReservedArea; 2911 if (HasParameterArea) 2912 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize); 2913 else 2914 MinReservedArea = LinkageSize; 2915 2916 // Set the size that is at least reserved in caller of this function. Tail 2917 // call optimized functions' reserved stack space needs to be aligned so that 2918 // taking the difference between two stack areas will result in an aligned 2919 // stack. 2920 MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea); 2921 FuncInfo->setMinReservedArea(MinReservedArea); 2922 2923 // If the function takes variable number of arguments, make a frame index for 2924 // the start of the first vararg value... for expansion of llvm.va_start. 2925 if (isVarArg) { 2926 int Depth = ArgOffset; 2927 2928 FuncInfo->setVarArgsFrameIndex( 2929 MFI->CreateFixedObject(PtrByteSize, Depth, true)); 2930 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2931 2932 // If this function is vararg, store any remaining integer argument regs 2933 // to their spots on the stack so that they may be loaded by deferencing the 2934 // result of va_next. 2935 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; 2936 GPR_idx < Num_GPR_Regs; ++GPR_idx) { 2937 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2938 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2939 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2940 MachinePointerInfo(), false, false, 0); 2941 MemOps.push_back(Store); 2942 // Increment the address by four for the next argument to store 2943 SDValue PtrOff = DAG.getConstant(PtrByteSize, PtrVT); 2944 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 2945 } 2946 } 2947 2948 if (!MemOps.empty()) 2949 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 2950 2951 return Chain; 2952 } 2953 2954 SDValue 2955 PPCTargetLowering::LowerFormalArguments_Darwin( 2956 SDValue Chain, 2957 CallingConv::ID CallConv, bool isVarArg, 2958 const SmallVectorImpl<ISD::InputArg> 2959 &Ins, 2960 SDLoc dl, SelectionDAG &DAG, 2961 SmallVectorImpl<SDValue> &InVals) const { 2962 // TODO: add description of PPC stack frame format, or at least some docs. 2963 // 2964 MachineFunction &MF = DAG.getMachineFunction(); 2965 MachineFrameInfo *MFI = MF.getFrameInfo(); 2966 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2967 2968 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2969 bool isPPC64 = PtrVT == MVT::i64; 2970 // Potential tail calls could cause overwriting of argument stack slots. 2971 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 2972 (CallConv == CallingConv::Fast)); 2973 unsigned PtrByteSize = isPPC64 ? 8 : 4; 2974 2975 unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true, 2976 false); 2977 unsigned ArgOffset = LinkageSize; 2978 // Area that is at least reserved in caller of this function. 2979 unsigned MinReservedArea = ArgOffset; 2980 2981 static const MCPhysReg GPR_32[] = { // 32-bit registers. 2982 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 2983 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 2984 }; 2985 static const MCPhysReg GPR_64[] = { // 64-bit registers. 2986 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 2987 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 2988 }; 2989 2990 static const MCPhysReg *FPR = GetFPR(); 2991 2992 static const MCPhysReg VR[] = { 2993 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 2994 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 2995 }; 2996 2997 const unsigned Num_GPR_Regs = array_lengthof(GPR_32); 2998 const unsigned Num_FPR_Regs = 13; 2999 const unsigned Num_VR_Regs = array_lengthof( VR); 3000 3001 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 3002 3003 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; 3004 3005 // In 32-bit non-varargs functions, the stack space for vectors is after the 3006 // stack space for non-vectors. We do not use this space unless we have 3007 // too many vectors to fit in registers, something that only occurs in 3008 // constructed examples:), but we have to walk the arglist to figure 3009 // that out...for the pathological case, compute VecArgOffset as the 3010 // start of the vector parameter area. Computing VecArgOffset is the 3011 // entire point of the following loop. 3012 unsigned VecArgOffset = ArgOffset; 3013 if (!isVarArg && !isPPC64) { 3014 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; 3015 ++ArgNo) { 3016 EVT ObjectVT = Ins[ArgNo].VT; 3017 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 3018 3019 if (Flags.isByVal()) { 3020 // ObjSize is the true size, ArgSize rounded up to multiple of regs. 3021 unsigned ObjSize = Flags.getByValSize(); 3022 unsigned ArgSize = 3023 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3024 VecArgOffset += ArgSize; 3025 continue; 3026 } 3027 3028 switch(ObjectVT.getSimpleVT().SimpleTy) { 3029 default: llvm_unreachable("Unhandled argument type!"); 3030 case MVT::i1: 3031 case MVT::i32: 3032 case MVT::f32: 3033 VecArgOffset += 4; 3034 break; 3035 case MVT::i64: // PPC64 3036 case MVT::f64: 3037 // FIXME: We are guaranteed to be !isPPC64 at this point. 3038 // Does MVT::i64 apply? 3039 VecArgOffset += 8; 3040 break; 3041 case MVT::v4f32: 3042 case MVT::v4i32: 3043 case MVT::v8i16: 3044 case MVT::v16i8: 3045 // Nothing to do, we're only looking at Nonvector args here. 3046 break; 3047 } 3048 } 3049 } 3050 // We've found where the vector parameter area in memory is. Skip the 3051 // first 12 parameters; these don't use that memory. 3052 VecArgOffset = ((VecArgOffset+15)/16)*16; 3053 VecArgOffset += 12*16; 3054 3055 // Add DAG nodes to load the arguments or copy them out of registers. On 3056 // entry to a function on PPC, the arguments start after the linkage area, 3057 // although the first ones are often in registers. 3058 3059 SmallVector<SDValue, 8> MemOps; 3060 unsigned nAltivecParamsAtEnd = 0; 3061 Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); 3062 unsigned CurArgIdx = 0; 3063 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 3064 SDValue ArgVal; 3065 bool needsLoad = false; 3066 EVT ObjectVT = Ins[ArgNo].VT; 3067 unsigned ObjSize = ObjectVT.getSizeInBits()/8; 3068 unsigned ArgSize = ObjSize; 3069 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 3070 if (Ins[ArgNo].isOrigArg()) { 3071 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx); 3072 CurArgIdx = Ins[ArgNo].getOrigArgIndex(); 3073 } 3074 unsigned CurArgOffset = ArgOffset; 3075 3076 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. 3077 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || 3078 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { 3079 if (isVarArg || isPPC64) { 3080 MinReservedArea = ((MinReservedArea+15)/16)*16; 3081 MinReservedArea += CalculateStackSlotSize(ObjectVT, 3082 Flags, 3083 PtrByteSize); 3084 } else nAltivecParamsAtEnd++; 3085 } else 3086 // Calculate min reserved area. 3087 MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT, 3088 Flags, 3089 PtrByteSize); 3090 3091 // FIXME the codegen can be much improved in some cases. 3092 // We do not have to keep everything in memory. 3093 if (Flags.isByVal()) { 3094 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit"); 3095 3096 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 3097 ObjSize = Flags.getByValSize(); 3098 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 3099 // Objects of size 1 and 2 are right justified, everything else is 3100 // left justified. This means the memory address is adjusted forwards. 3101 if (ObjSize==1 || ObjSize==2) { 3102 CurArgOffset = CurArgOffset + (4 - ObjSize); 3103 } 3104 // The value of the object is its address. 3105 int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, false, true); 3106 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3107 InVals.push_back(FIN); 3108 if (ObjSize==1 || ObjSize==2) { 3109 if (GPR_idx != Num_GPR_Regs) { 3110 unsigned VReg; 3111 if (isPPC64) 3112 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3113 else 3114 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3115 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3116 EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16; 3117 SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, 3118 MachinePointerInfo(FuncArg), 3119 ObjType, false, false, 0); 3120 MemOps.push_back(Store); 3121 ++GPR_idx; 3122 } 3123 3124 ArgOffset += PtrByteSize; 3125 3126 continue; 3127 } 3128 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 3129 // Store whatever pieces of the object are in registers 3130 // to memory. ArgOffset will be the address of the beginning 3131 // of the object. 3132 if (GPR_idx != Num_GPR_Regs) { 3133 unsigned VReg; 3134 if (isPPC64) 3135 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3136 else 3137 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3138 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); 3139 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3140 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3141 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 3142 MachinePointerInfo(FuncArg, j), 3143 false, false, 0); 3144 MemOps.push_back(Store); 3145 ++GPR_idx; 3146 ArgOffset += PtrByteSize; 3147 } else { 3148 ArgOffset += ArgSize - (ArgOffset-CurArgOffset); 3149 break; 3150 } 3151 } 3152 continue; 3153 } 3154 3155 switch (ObjectVT.getSimpleVT().SimpleTy) { 3156 default: llvm_unreachable("Unhandled argument type!"); 3157 case MVT::i1: 3158 case MVT::i32: 3159 if (!isPPC64) { 3160 if (GPR_idx != Num_GPR_Regs) { 3161 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3162 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 3163 3164 if (ObjectVT == MVT::i1) 3165 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgVal); 3166 3167 ++GPR_idx; 3168 } else { 3169 needsLoad = true; 3170 ArgSize = PtrByteSize; 3171 } 3172 // All int arguments reserve stack space in the Darwin ABI. 3173 ArgOffset += PtrByteSize; 3174 break; 3175 } 3176 // FALLTHROUGH 3177 case MVT::i64: // PPC64 3178 if (GPR_idx != Num_GPR_Regs) { 3179 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3180 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 3181 3182 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1) 3183 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 3184 // value to MVT::i64 and then truncate to the correct register size. 3185 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 3186 3187 ++GPR_idx; 3188 } else { 3189 needsLoad = true; 3190 ArgSize = PtrByteSize; 3191 } 3192 // All int arguments reserve stack space in the Darwin ABI. 3193 ArgOffset += 8; 3194 break; 3195 3196 case MVT::f32: 3197 case MVT::f64: 3198 // Every 4 bytes of argument space consumes one of the GPRs available for 3199 // argument passing. 3200 if (GPR_idx != Num_GPR_Regs) { 3201 ++GPR_idx; 3202 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64) 3203 ++GPR_idx; 3204 } 3205 if (FPR_idx != Num_FPR_Regs) { 3206 unsigned VReg; 3207 3208 if (ObjectVT == MVT::f32) 3209 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); 3210 else 3211 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); 3212 3213 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3214 ++FPR_idx; 3215 } else { 3216 needsLoad = true; 3217 } 3218 3219 // All FP arguments reserve stack space in the Darwin ABI. 3220 ArgOffset += isPPC64 ? 8 : ObjSize; 3221 break; 3222 case MVT::v4f32: 3223 case MVT::v4i32: 3224 case MVT::v8i16: 3225 case MVT::v16i8: 3226 // Note that vector arguments in registers don't reserve stack space, 3227 // except in varargs functions. 3228 if (VR_idx != Num_VR_Regs) { 3229 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); 3230 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 3231 if (isVarArg) { 3232 while ((ArgOffset % 16) != 0) { 3233 ArgOffset += PtrByteSize; 3234 if (GPR_idx != Num_GPR_Regs) 3235 GPR_idx++; 3236 } 3237 ArgOffset += 16; 3238 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? 3239 } 3240 ++VR_idx; 3241 } else { 3242 if (!isVarArg && !isPPC64) { 3243 // Vectors go after all the nonvectors. 3244 CurArgOffset = VecArgOffset; 3245 VecArgOffset += 16; 3246 } else { 3247 // Vectors are aligned. 3248 ArgOffset = ((ArgOffset+15)/16)*16; 3249 CurArgOffset = ArgOffset; 3250 ArgOffset += 16; 3251 } 3252 needsLoad = true; 3253 } 3254 break; 3255 } 3256 3257 // We need to load the argument to a virtual register if we determined above 3258 // that we ran out of physical registers of the appropriate type. 3259 if (needsLoad) { 3260 int FI = MFI->CreateFixedObject(ObjSize, 3261 CurArgOffset + (ArgSize - ObjSize), 3262 isImmutable); 3263 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 3264 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), 3265 false, false, false, 0); 3266 } 3267 3268 InVals.push_back(ArgVal); 3269 } 3270 3271 // Allow for Altivec parameters at the end, if needed. 3272 if (nAltivecParamsAtEnd) { 3273 MinReservedArea = ((MinReservedArea+15)/16)*16; 3274 MinReservedArea += 16*nAltivecParamsAtEnd; 3275 } 3276 3277 // Area that is at least reserved in the caller of this function. 3278 MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize); 3279 3280 // Set the size that is at least reserved in caller of this function. Tail 3281 // call optimized functions' reserved stack space needs to be aligned so that 3282 // taking the difference between two stack areas will result in an aligned 3283 // stack. 3284 MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea); 3285 FuncInfo->setMinReservedArea(MinReservedArea); 3286 3287 // If the function takes variable number of arguments, make a frame index for 3288 // the start of the first vararg value... for expansion of llvm.va_start. 3289 if (isVarArg) { 3290 int Depth = ArgOffset; 3291 3292 FuncInfo->setVarArgsFrameIndex( 3293 MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, 3294 Depth, true)); 3295 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 3296 3297 // If this function is vararg, store any remaining integer argument regs 3298 // to their spots on the stack so that they may be loaded by deferencing the 3299 // result of va_next. 3300 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { 3301 unsigned VReg; 3302 3303 if (isPPC64) 3304 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 3305 else 3306 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 3307 3308 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 3309 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 3310 MachinePointerInfo(), false, false, 0); 3311 MemOps.push_back(Store); 3312 // Increment the address by four for the next argument to store 3313 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); 3314 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 3315 } 3316 } 3317 3318 if (!MemOps.empty()) 3319 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps); 3320 3321 return Chain; 3322 } 3323 3324 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be 3325 /// adjusted to accommodate the arguments for the tailcall. 3326 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, 3327 unsigned ParamSize) { 3328 3329 if (!isTailCall) return 0; 3330 3331 PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>(); 3332 unsigned CallerMinReservedArea = FI->getMinReservedArea(); 3333 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize; 3334 // Remember only if the new adjustement is bigger. 3335 if (SPDiff < FI->getTailCallSPDelta()) 3336 FI->setTailCallSPDelta(SPDiff); 3337 3338 return SPDiff; 3339 } 3340 3341 /// IsEligibleForTailCallOptimization - Check whether the call is eligible 3342 /// for tail call optimization. Targets which want to do tail call 3343 /// optimization should implement this function. 3344 bool 3345 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 3346 CallingConv::ID CalleeCC, 3347 bool isVarArg, 3348 const SmallVectorImpl<ISD::InputArg> &Ins, 3349 SelectionDAG& DAG) const { 3350 if (!getTargetMachine().Options.GuaranteedTailCallOpt) 3351 return false; 3352 3353 // Variable argument functions are not supported. 3354 if (isVarArg) 3355 return false; 3356 3357 MachineFunction &MF = DAG.getMachineFunction(); 3358 CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); 3359 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { 3360 // Functions containing by val parameters are not supported. 3361 for (unsigned i = 0; i != Ins.size(); i++) { 3362 ISD::ArgFlagsTy Flags = Ins[i].Flags; 3363 if (Flags.isByVal()) return false; 3364 } 3365 3366 // Non-PIC/GOT tail calls are supported. 3367 if (getTargetMachine().getRelocationModel() != Reloc::PIC_) 3368 return true; 3369 3370 // At the moment we can only do local tail calls (in same module, hidden 3371 // or protected) if we are generating PIC. 3372 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 3373 return G->getGlobal()->hasHiddenVisibility() 3374 || G->getGlobal()->hasProtectedVisibility(); 3375 } 3376 3377 return false; 3378 } 3379 3380 /// isCallCompatibleAddress - Return the immediate to use if the specified 3381 /// 32-bit value is representable in the immediate field of a BxA instruction. 3382 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { 3383 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 3384 if (!C) return nullptr; 3385 3386 int Addr = C->getZExtValue(); 3387 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. 3388 SignExtend32<26>(Addr) != Addr) 3389 return nullptr; // Top 6 bits have to be sext of immediate. 3390 3391 return DAG.getConstant((int)C->getZExtValue() >> 2, 3392 DAG.getTargetLoweringInfo().getPointerTy()).getNode(); 3393 } 3394 3395 namespace { 3396 3397 struct TailCallArgumentInfo { 3398 SDValue Arg; 3399 SDValue FrameIdxOp; 3400 int FrameIdx; 3401 3402 TailCallArgumentInfo() : FrameIdx(0) {} 3403 }; 3404 3405 } 3406 3407 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. 3408 static void 3409 StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, 3410 SDValue Chain, 3411 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs, 3412 SmallVectorImpl<SDValue> &MemOpChains, 3413 SDLoc dl) { 3414 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { 3415 SDValue Arg = TailCallArgs[i].Arg; 3416 SDValue FIN = TailCallArgs[i].FrameIdxOp; 3417 int FI = TailCallArgs[i].FrameIdx; 3418 // Store relative to framepointer. 3419 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN, 3420 MachinePointerInfo::getFixedStack(FI), 3421 false, false, 0)); 3422 } 3423 } 3424 3425 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to 3426 /// the appropriate stack slot for the tail call optimized function call. 3427 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, 3428 MachineFunction &MF, 3429 SDValue Chain, 3430 SDValue OldRetAddr, 3431 SDValue OldFP, 3432 int SPDiff, 3433 bool isPPC64, 3434 bool isDarwinABI, 3435 SDLoc dl) { 3436 if (SPDiff) { 3437 // Calculate the new stack slot for the return address. 3438 int SlotSize = isPPC64 ? 8 : 4; 3439 int NewRetAddrLoc = SPDiff + PPCFrameLowering::getReturnSaveOffset(isPPC64, 3440 isDarwinABI); 3441 int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize, 3442 NewRetAddrLoc, true); 3443 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 3444 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); 3445 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, 3446 MachinePointerInfo::getFixedStack(NewRetAddr), 3447 false, false, 0); 3448 3449 // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack 3450 // slot as the FP is never overwritten. 3451 if (isDarwinABI) { 3452 int NewFPLoc = 3453 SPDiff + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI); 3454 int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc, 3455 true); 3456 SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); 3457 Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, 3458 MachinePointerInfo::getFixedStack(NewFPIdx), 3459 false, false, 0); 3460 } 3461 } 3462 return Chain; 3463 } 3464 3465 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate 3466 /// the position of the argument. 3467 static void 3468 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, 3469 SDValue Arg, int SPDiff, unsigned ArgOffset, 3470 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) { 3471 int Offset = ArgOffset + SPDiff; 3472 uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8; 3473 int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); 3474 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 3475 SDValue FIN = DAG.getFrameIndex(FI, VT); 3476 TailCallArgumentInfo Info; 3477 Info.Arg = Arg; 3478 Info.FrameIdxOp = FIN; 3479 Info.FrameIdx = FI; 3480 TailCallArguments.push_back(Info); 3481 } 3482 3483 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address 3484 /// stack slot. Returns the chain as result and the loaded frame pointers in 3485 /// LROpOut/FPOpout. Used when tail calling. 3486 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, 3487 int SPDiff, 3488 SDValue Chain, 3489 SDValue &LROpOut, 3490 SDValue &FPOpOut, 3491 bool isDarwinABI, 3492 SDLoc dl) const { 3493 if (SPDiff) { 3494 // Load the LR and FP stack slot for later adjusting. 3495 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32; 3496 LROpOut = getReturnAddrFrameIndex(DAG); 3497 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(), 3498 false, false, false, 0); 3499 Chain = SDValue(LROpOut.getNode(), 1); 3500 3501 // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack 3502 // slot as the FP is never overwritten. 3503 if (isDarwinABI) { 3504 FPOpOut = getFramePointerFrameIndex(DAG); 3505 FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(), 3506 false, false, false, 0); 3507 Chain = SDValue(FPOpOut.getNode(), 1); 3508 } 3509 } 3510 return Chain; 3511 } 3512 3513 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 3514 /// by "Src" to address "Dst" of size "Size". Alignment information is 3515 /// specified by the specific parameter attribute. The copy will be passed as 3516 /// a byval function parameter. 3517 /// Sometimes what we are copying is the end of a larger object, the part that 3518 /// does not fit in registers. 3519 static SDValue 3520 CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 3521 ISD::ArgFlagsTy Flags, SelectionDAG &DAG, 3522 SDLoc dl) { 3523 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); 3524 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), 3525 false, false, MachinePointerInfo(), 3526 MachinePointerInfo()); 3527 } 3528 3529 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of 3530 /// tail calls. 3531 static void 3532 LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, 3533 SDValue Arg, SDValue PtrOff, int SPDiff, 3534 unsigned ArgOffset, bool isPPC64, bool isTailCall, 3535 bool isVector, SmallVectorImpl<SDValue> &MemOpChains, 3536 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, 3537 SDLoc dl) { 3538 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3539 if (!isTailCall) { 3540 if (isVector) { 3541 SDValue StackPtr; 3542 if (isPPC64) 3543 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 3544 else 3545 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 3546 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 3547 DAG.getConstant(ArgOffset, PtrVT)); 3548 } 3549 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, 3550 MachinePointerInfo(), false, false, 0)); 3551 // Calculate and remember argument location. 3552 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, 3553 TailCallArguments); 3554 } 3555 3556 static 3557 void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, 3558 SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes, 3559 SDValue LROp, SDValue FPOp, bool isDarwinABI, 3560 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) { 3561 MachineFunction &MF = DAG.getMachineFunction(); 3562 3563 // Emit a sequence of copyto/copyfrom virtual registers for arguments that 3564 // might overwrite each other in case of tail call optimization. 3565 SmallVector<SDValue, 8> MemOpChains2; 3566 // Do not flag preceding copytoreg stuff together with the following stuff. 3567 InFlag = SDValue(); 3568 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, 3569 MemOpChains2, dl); 3570 if (!MemOpChains2.empty()) 3571 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2); 3572 3573 // Store the return address to the appropriate stack slot. 3574 Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff, 3575 isPPC64, isDarwinABI, dl); 3576 3577 // Emit callseq_end just before tailcall node. 3578 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 3579 DAG.getIntPtrConstant(0, true), InFlag, dl); 3580 InFlag = Chain.getValue(1); 3581 } 3582 3583 // Is this global address that of a function that can be called by name? (as 3584 // opposed to something that must hold a descriptor for an indirect call). 3585 static bool isFunctionGlobalAddress(SDValue Callee) { 3586 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 3587 if (Callee.getOpcode() == ISD::GlobalTLSAddress || 3588 Callee.getOpcode() == ISD::TargetGlobalTLSAddress) 3589 return false; 3590 3591 return G->getGlobal()->getType()->getElementType()->isFunctionTy(); 3592 } 3593 3594 return false; 3595 } 3596 3597 static 3598 unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, 3599 SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall, 3600 bool IsPatchPoint, 3601 SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass, 3602 SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys, 3603 const PPCSubtarget &Subtarget) { 3604 3605 bool isPPC64 = Subtarget.isPPC64(); 3606 bool isSVR4ABI = Subtarget.isSVR4ABI(); 3607 bool isELFv2ABI = Subtarget.isELFv2ABI(); 3608 3609 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3610 NodeTys.push_back(MVT::Other); // Returns a chain 3611 NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. 3612 3613 unsigned CallOpc = PPCISD::CALL; 3614 3615 bool needIndirectCall = true; 3616 if (!isSVR4ABI || !isPPC64) 3617 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) { 3618 // If this is an absolute destination address, use the munged value. 3619 Callee = SDValue(Dest, 0); 3620 needIndirectCall = false; 3621 } 3622 3623 if (isFunctionGlobalAddress(Callee)) { 3624 GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Callee); 3625 // A call to a TLS address is actually an indirect call to a 3626 // thread-specific pointer. 3627 unsigned OpFlags = 0; 3628 if ((DAG.getTarget().getRelocationModel() != Reloc::Static && 3629 (Subtarget.getTargetTriple().isMacOSX() && 3630 Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) && 3631 (G->getGlobal()->isDeclaration() || 3632 G->getGlobal()->isWeakForLinker())) || 3633 (Subtarget.isTargetELF() && !isPPC64 && 3634 !G->getGlobal()->hasLocalLinkage() && 3635 DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { 3636 // PC-relative references to external symbols should go through $stub, 3637 // unless we're building with the leopard linker or later, which 3638 // automatically synthesizes these stubs. 3639 OpFlags = PPCII::MO_PLT_OR_STUB; 3640 } 3641 3642 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, 3643 // every direct call is) turn it into a TargetGlobalAddress / 3644 // TargetExternalSymbol node so that legalize doesn't hack it. 3645 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, 3646 Callee.getValueType(), 0, OpFlags); 3647 needIndirectCall = false; 3648 } 3649 3650 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 3651 unsigned char OpFlags = 0; 3652 3653 if ((DAG.getTarget().getRelocationModel() != Reloc::Static && 3654 (Subtarget.getTargetTriple().isMacOSX() && 3655 Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) || 3656 (Subtarget.isTargetELF() && !isPPC64 && 3657 DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { 3658 // PC-relative references to external symbols should go through $stub, 3659 // unless we're building with the leopard linker or later, which 3660 // automatically synthesizes these stubs. 3661 OpFlags = PPCII::MO_PLT_OR_STUB; 3662 } 3663 3664 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(), 3665 OpFlags); 3666 needIndirectCall = false; 3667 } 3668 3669 if (IsPatchPoint) { 3670 // We'll form an invalid direct call when lowering a patchpoint; the full 3671 // sequence for an indirect call is complicated, and many of the 3672 // instructions introduced might have side effects (and, thus, can't be 3673 // removed later). The call itself will be removed as soon as the 3674 // argument/return lowering is complete, so the fact that it has the wrong 3675 // kind of operands should not really matter. 3676 needIndirectCall = false; 3677 } 3678 3679 if (needIndirectCall) { 3680 // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair 3681 // to do the call, we can't use PPCISD::CALL. 3682 SDValue MTCTROps[] = {Chain, Callee, InFlag}; 3683 3684 if (isSVR4ABI && isPPC64 && !isELFv2ABI) { 3685 // Function pointers in the 64-bit SVR4 ABI do not point to the function 3686 // entry point, but to the function descriptor (the function entry point 3687 // address is part of the function descriptor though). 3688 // The function descriptor is a three doubleword structure with the 3689 // following fields: function entry point, TOC base address and 3690 // environment pointer. 3691 // Thus for a call through a function pointer, the following actions need 3692 // to be performed: 3693 // 1. Save the TOC of the caller in the TOC save area of its stack 3694 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()). 3695 // 2. Load the address of the function entry point from the function 3696 // descriptor. 3697 // 3. Load the TOC of the callee from the function descriptor into r2. 3698 // 4. Load the environment pointer from the function descriptor into 3699 // r11. 3700 // 5. Branch to the function entry point address. 3701 // 6. On return of the callee, the TOC of the caller needs to be 3702 // restored (this is done in FinishCall()). 3703 // 3704 // All those operations are flagged together to ensure that no other 3705 // operations can be scheduled in between. E.g. without flagging the 3706 // operations together, a TOC access in the caller could be scheduled 3707 // between the load of the callee TOC and the branch to the callee, which 3708 // results in the TOC access going through the TOC of the callee instead 3709 // of going through the TOC of the caller, which leads to incorrect code. 3710 3711 // Load the address of the function entry point from the function 3712 // descriptor. 3713 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue); 3714 SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, 3715 makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2)); 3716 Chain = LoadFuncPtr.getValue(1); 3717 InFlag = LoadFuncPtr.getValue(2); 3718 3719 // Load environment pointer into r11. 3720 // Offset of the environment pointer within the function descriptor. 3721 SDValue PtrOff = DAG.getIntPtrConstant(16); 3722 3723 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff); 3724 SDValue LoadEnvPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, Chain, AddPtr, 3725 InFlag); 3726 Chain = LoadEnvPtr.getValue(1); 3727 InFlag = LoadEnvPtr.getValue(2); 3728 3729 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, 3730 InFlag); 3731 Chain = EnvVal.getValue(0); 3732 InFlag = EnvVal.getValue(1); 3733 3734 // Load TOC of the callee into r2. We are using a target-specific load 3735 // with r2 hard coded, because the result of a target-independent load 3736 // would never go directly into r2, since r2 is a reserved register (which 3737 // prevents the register allocator from allocating it), resulting in an 3738 // additional register being allocated and an unnecessary move instruction 3739 // being generated. 3740 VTs = DAG.getVTList(MVT::Other, MVT::Glue); 3741 SDValue TOCOff = DAG.getIntPtrConstant(8); 3742 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff); 3743 SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, 3744 AddTOC, InFlag); 3745 Chain = LoadTOCPtr.getValue(0); 3746 InFlag = LoadTOCPtr.getValue(1); 3747 3748 MTCTROps[0] = Chain; 3749 MTCTROps[1] = LoadFuncPtr; 3750 MTCTROps[2] = InFlag; 3751 } 3752 3753 Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, 3754 makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2)); 3755 InFlag = Chain.getValue(1); 3756 3757 NodeTys.clear(); 3758 NodeTys.push_back(MVT::Other); 3759 NodeTys.push_back(MVT::Glue); 3760 Ops.push_back(Chain); 3761 CallOpc = PPCISD::BCTRL; 3762 Callee.setNode(nullptr); 3763 // Add use of X11 (holding environment pointer) 3764 if (isSVR4ABI && isPPC64 && !isELFv2ABI) 3765 Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); 3766 // Add CTR register as callee so a bctr can be emitted later. 3767 if (isTailCall) 3768 Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT)); 3769 } 3770 3771 // If this is a direct call, pass the chain and the callee. 3772 if (Callee.getNode()) { 3773 Ops.push_back(Chain); 3774 Ops.push_back(Callee); 3775 3776 // If this is a call to __tls_get_addr, find the symbol whose address 3777 // is to be taken and add it to the list. This will be used to 3778 // generate __tls_get_addr(<sym>@tlsgd) or __tls_get_addr(<sym>@tlsld). 3779 // We find the symbol by walking the chain to the CopyFromReg, walking 3780 // back from the CopyFromReg to the ADDI_TLSGD_L or ADDI_TLSLD_L, and 3781 // pulling the symbol from that node. 3782 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 3783 if (!strcmp(S->getSymbol(), "__tls_get_addr")) { 3784 assert(!needIndirectCall && "Indirect call to __tls_get_addr???"); 3785 SDNode *AddI = Chain.getNode()->getOperand(2).getNode(); 3786 SDValue TGTAddr = AddI->getOperand(1); 3787 assert(TGTAddr.getNode()->getOpcode() == ISD::TargetGlobalTLSAddress && 3788 "Didn't find target global TLS address where we expected one"); 3789 Ops.push_back(TGTAddr); 3790 CallOpc = PPCISD::CALL_TLS; 3791 } 3792 } 3793 // If this is a tail call add stack pointer delta. 3794 if (isTailCall) 3795 Ops.push_back(DAG.getConstant(SPDiff, MVT::i32)); 3796 3797 // Add argument registers to the end of the list so that they are known live 3798 // into the call. 3799 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 3800 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 3801 RegsToPass[i].second.getValueType())); 3802 3803 // Direct calls in the ELFv2 ABI need the TOC register live into the call. 3804 if (Callee.getNode() && isELFv2ABI && !IsPatchPoint) 3805 Ops.push_back(DAG.getRegister(PPC::X2, PtrVT)); 3806 3807 return CallOpc; 3808 } 3809 3810 static 3811 bool isLocalCall(const SDValue &Callee) 3812 { 3813 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 3814 return !G->getGlobal()->isDeclaration() && 3815 !G->getGlobal()->isWeakForLinker(); 3816 return false; 3817 } 3818 3819 SDValue 3820 PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 3821 CallingConv::ID CallConv, bool isVarArg, 3822 const SmallVectorImpl<ISD::InputArg> &Ins, 3823 SDLoc dl, SelectionDAG &DAG, 3824 SmallVectorImpl<SDValue> &InVals) const { 3825 3826 SmallVector<CCValAssign, 16> RVLocs; 3827 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 3828 *DAG.getContext()); 3829 CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC); 3830 3831 // Copy all of the result registers out of their specified physreg. 3832 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { 3833 CCValAssign &VA = RVLocs[i]; 3834 assert(VA.isRegLoc() && "Can only return in registers!"); 3835 3836 SDValue Val = DAG.getCopyFromReg(Chain, dl, 3837 VA.getLocReg(), VA.getLocVT(), InFlag); 3838 Chain = Val.getValue(1); 3839 InFlag = Val.getValue(2); 3840 3841 switch (VA.getLocInfo()) { 3842 default: llvm_unreachable("Unknown loc info!"); 3843 case CCValAssign::Full: break; 3844 case CCValAssign::AExt: 3845 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 3846 break; 3847 case CCValAssign::ZExt: 3848 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val, 3849 DAG.getValueType(VA.getValVT())); 3850 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 3851 break; 3852 case CCValAssign::SExt: 3853 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val, 3854 DAG.getValueType(VA.getValVT())); 3855 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 3856 break; 3857 } 3858 3859 InVals.push_back(Val); 3860 } 3861 3862 return Chain; 3863 } 3864 3865 SDValue 3866 PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, 3867 bool isTailCall, bool isVarArg, bool IsPatchPoint, 3868 SelectionDAG &DAG, 3869 SmallVector<std::pair<unsigned, SDValue>, 8> 3870 &RegsToPass, 3871 SDValue InFlag, SDValue Chain, 3872 SDValue &Callee, 3873 int SPDiff, unsigned NumBytes, 3874 const SmallVectorImpl<ISD::InputArg> &Ins, 3875 SmallVectorImpl<SDValue> &InVals) const { 3876 3877 bool isELFv2ABI = Subtarget.isELFv2ABI(); 3878 std::vector<EVT> NodeTys; 3879 SmallVector<SDValue, 8> Ops; 3880 unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff, 3881 isTailCall, IsPatchPoint, RegsToPass, Ops, 3882 NodeTys, Subtarget); 3883 3884 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls 3885 if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64()) 3886 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32)); 3887 3888 // When performing tail call optimization the callee pops its arguments off 3889 // the stack. Account for this here so these bytes can be pushed back on in 3890 // PPCFrameLowering::eliminateCallFramePseudoInstr. 3891 int BytesCalleePops = 3892 (CallConv == CallingConv::Fast && 3893 getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; 3894 3895 // Add a register mask operand representing the call-preserved registers. 3896 const TargetRegisterInfo *TRI = 3897 getTargetMachine().getSubtargetImpl()->getRegisterInfo(); 3898 const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); 3899 assert(Mask && "Missing call preserved mask for calling convention"); 3900 Ops.push_back(DAG.getRegisterMask(Mask)); 3901 3902 if (InFlag.getNode()) 3903 Ops.push_back(InFlag); 3904 3905 // Emit tail call. 3906 if (isTailCall) { 3907 assert(((Callee.getOpcode() == ISD::Register && 3908 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || 3909 Callee.getOpcode() == ISD::TargetExternalSymbol || 3910 Callee.getOpcode() == ISD::TargetGlobalAddress || 3911 isa<ConstantSDNode>(Callee)) && 3912 "Expecting an global address, external symbol, absolute value or register"); 3913 3914 return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops); 3915 } 3916 3917 // Add a NOP immediately after the branch instruction when using the 64-bit 3918 // SVR4 ABI. At link time, if caller and callee are in a different module and 3919 // thus have a different TOC, the call will be replaced with a call to a stub 3920 // function which saves the current TOC, loads the TOC of the callee and 3921 // branches to the callee. The NOP will be replaced with a load instruction 3922 // which restores the TOC of the caller from the TOC save slot of the current 3923 // stack frame. If caller and callee belong to the same module (and have the 3924 // same TOC), the NOP will remain unchanged. 3925 3926 if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() && 3927 !IsPatchPoint) { 3928 if (CallOpc == PPCISD::BCTRL) { 3929 // This is a call through a function pointer. 3930 // Restore the caller TOC from the save area into R2. 3931 // See PrepareCall() for more information about calls through function 3932 // pointers in the 64-bit SVR4 ABI. 3933 // We are using a target-specific load with r2 hard coded, because the 3934 // result of a target-independent load would never go directly into r2, 3935 // since r2 is a reserved register (which prevents the register allocator 3936 // from allocating it), resulting in an additional register being 3937 // allocated and an unnecessary move instruction being generated. 3938 CallOpc = PPCISD::BCTRL_LOAD_TOC; 3939 3940 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3941 SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT); 3942 unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI); 3943 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset); 3944 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff); 3945 3946 // The address needs to go after the chain input but before the flag (or 3947 // any other variadic arguments). 3948 Ops.insert(std::next(Ops.begin()), AddTOC); 3949 } else if ((CallOpc == PPCISD::CALL) && 3950 (!isLocalCall(Callee) || 3951 DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { 3952 // Otherwise insert NOP for non-local calls. 3953 CallOpc = PPCISD::CALL_NOP; 3954 } else if (CallOpc == PPCISD::CALL_TLS) 3955 // For 64-bit SVR4, TLS calls are always non-local. 3956 CallOpc = PPCISD::CALL_NOP_TLS; 3957 } 3958 3959 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); 3960 InFlag = Chain.getValue(1); 3961 3962 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 3963 DAG.getIntPtrConstant(BytesCalleePops, true), 3964 InFlag, dl); 3965 if (!Ins.empty()) 3966 InFlag = Chain.getValue(1); 3967 3968 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, 3969 Ins, dl, DAG, InVals); 3970 } 3971 3972 SDValue 3973 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 3974 SmallVectorImpl<SDValue> &InVals) const { 3975 SelectionDAG &DAG = CLI.DAG; 3976 SDLoc &dl = CLI.DL; 3977 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 3978 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 3979 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 3980 SDValue Chain = CLI.Chain; 3981 SDValue Callee = CLI.Callee; 3982 bool &isTailCall = CLI.IsTailCall; 3983 CallingConv::ID CallConv = CLI.CallConv; 3984 bool isVarArg = CLI.IsVarArg; 3985 bool IsPatchPoint = CLI.IsPatchPoint; 3986 3987 if (isTailCall) 3988 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, 3989 Ins, DAG); 3990 3991 if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall()) 3992 report_fatal_error("failed to perform tail call elimination on a call " 3993 "site marked musttail"); 3994 3995 if (Subtarget.isSVR4ABI()) { 3996 if (Subtarget.isPPC64()) 3997 return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, 3998 isTailCall, IsPatchPoint, Outs, OutVals, Ins, 3999 dl, DAG, InVals); 4000 else 4001 return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg, 4002 isTailCall, IsPatchPoint, Outs, OutVals, Ins, 4003 dl, DAG, InVals); 4004 } 4005 4006 return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, 4007 isTailCall, IsPatchPoint, Outs, OutVals, Ins, 4008 dl, DAG, InVals); 4009 } 4010 4011 SDValue 4012 PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, 4013 CallingConv::ID CallConv, bool isVarArg, 4014 bool isTailCall, bool IsPatchPoint, 4015 const SmallVectorImpl<ISD::OutputArg> &Outs, 4016 const SmallVectorImpl<SDValue> &OutVals, 4017 const SmallVectorImpl<ISD::InputArg> &Ins, 4018 SDLoc dl, SelectionDAG &DAG, 4019 SmallVectorImpl<SDValue> &InVals) const { 4020 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description 4021 // of the 32-bit SVR4 ABI stack frame layout. 4022 4023 assert((CallConv == CallingConv::C || 4024 CallConv == CallingConv::Fast) && "Unknown calling convention!"); 4025 4026 unsigned PtrByteSize = 4; 4027 4028 MachineFunction &MF = DAG.getMachineFunction(); 4029 4030 // Mark this function as potentially containing a function that contains a 4031 // tail call. As a consequence the frame pointer will be used for dynamicalloc 4032 // and restoring the callers stack pointer in this functions epilog. This is 4033 // done because by tail calling the called function might overwrite the value 4034 // in this function's (MF) stack pointer stack slot 0(SP). 4035 if (getTargetMachine().Options.GuaranteedTailCallOpt && 4036 CallConv == CallingConv::Fast) 4037 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 4038 4039 // Count how many bytes are to be pushed on the stack, including the linkage 4040 // area, parameter list area and the part of the local variable space which 4041 // contains copies of aggregates which are passed by value. 4042 4043 // Assign locations to all of the outgoing arguments. 4044 SmallVector<CCValAssign, 16> ArgLocs; 4045 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, 4046 *DAG.getContext()); 4047 4048 // Reserve space for the linkage area on the stack. 4049 CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false, false), 4050 PtrByteSize); 4051 4052 if (isVarArg) { 4053 // Handle fixed and variable vector arguments differently. 4054 // Fixed vector arguments go into registers as long as registers are 4055 // available. Variable vector arguments always go into memory. 4056 unsigned NumArgs = Outs.size(); 4057 4058 for (unsigned i = 0; i != NumArgs; ++i) { 4059 MVT ArgVT = Outs[i].VT; 4060 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 4061 bool Result; 4062 4063 if (Outs[i].IsFixed) { 4064 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, 4065 CCInfo); 4066 } else { 4067 Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, 4068 ArgFlags, CCInfo); 4069 } 4070 4071 if (Result) { 4072 #ifndef NDEBUG 4073 errs() << "Call operand #" << i << " has unhandled type " 4074 << EVT(ArgVT).getEVTString() << "\n"; 4075 #endif 4076 llvm_unreachable(nullptr); 4077 } 4078 } 4079 } else { 4080 // All arguments are treated the same. 4081 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4); 4082 } 4083 4084 // Assign locations to all of the outgoing aggregate by value arguments. 4085 SmallVector<CCValAssign, 16> ByValArgLocs; 4086 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), 4087 ByValArgLocs, *DAG.getContext()); 4088 4089 // Reserve stack space for the allocations in CCInfo. 4090 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 4091 4092 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal); 4093 4094 // Size of the linkage area, parameter list area and the part of the local 4095 // space variable where copies of aggregates which are passed by value are 4096 // stored. 4097 unsigned NumBytes = CCByValInfo.getNextStackOffset(); 4098 4099 // Calculate by how many bytes the stack has to be adjusted in case of tail 4100 // call optimization. 4101 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 4102 4103 // Adjust the stack pointer for the new arguments... 4104 // These operations are automatically eliminated by the prolog/epilog pass 4105 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), 4106 dl); 4107 SDValue CallSeqStart = Chain; 4108 4109 // Load the return address and frame pointer so it can be moved somewhere else 4110 // later. 4111 SDValue LROp, FPOp; 4112 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false, 4113 dl); 4114 4115 // Set up a copy of the stack pointer for use loading and storing any 4116 // arguments that may not fit in the registers available for argument 4117 // passing. 4118 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 4119 4120 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 4121 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 4122 SmallVector<SDValue, 8> MemOpChains; 4123 4124 bool seenFloatArg = false; 4125 // Walk the register/memloc assignments, inserting copies/loads. 4126 for (unsigned i = 0, j = 0, e = ArgLocs.size(); 4127 i != e; 4128 ++i) { 4129 CCValAssign &VA = ArgLocs[i]; 4130 SDValue Arg = OutVals[i]; 4131 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4132 4133 if (Flags.isByVal()) { 4134 // Argument is an aggregate which is passed by value, thus we need to 4135 // create a copy of it in the local variable space of the current stack 4136 // frame (which is the stack frame of the caller) and pass the address of 4137 // this copy to the callee. 4138 assert((j < ByValArgLocs.size()) && "Index out of bounds!"); 4139 CCValAssign &ByValVA = ByValArgLocs[j++]; 4140 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"); 4141 4142 // Memory reserved in the local variable space of the callers stack frame. 4143 unsigned LocMemOffset = ByValVA.getLocMemOffset(); 4144 4145 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 4146 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 4147 4148 // Create a copy of the argument in the local area of the current 4149 // stack frame. 4150 SDValue MemcpyCall = 4151 CreateCopyOfByValArgument(Arg, PtrOff, 4152 CallSeqStart.getNode()->getOperand(0), 4153 Flags, DAG, dl); 4154 4155 // This must go outside the CALLSEQ_START..END. 4156 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 4157 CallSeqStart.getNode()->getOperand(1), 4158 SDLoc(MemcpyCall)); 4159 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 4160 NewCallSeqStart.getNode()); 4161 Chain = CallSeqStart = NewCallSeqStart; 4162 4163 // Pass the address of the aggregate copy on the stack either in a 4164 // physical register or in the parameter list area of the current stack 4165 // frame to the callee. 4166 Arg = PtrOff; 4167 } 4168 4169 if (VA.isRegLoc()) { 4170 if (Arg.getValueType() == MVT::i1) 4171 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Arg); 4172 4173 seenFloatArg |= VA.getLocVT().isFloatingPoint(); 4174 // Put argument in a physical register. 4175 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 4176 } else { 4177 // Put argument in the parameter list area of the current stack frame. 4178 assert(VA.isMemLoc()); 4179 unsigned LocMemOffset = VA.getLocMemOffset(); 4180 4181 if (!isTailCall) { 4182 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 4183 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 4184 4185 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, 4186 MachinePointerInfo(), 4187 false, false, 0)); 4188 } else { 4189 // Calculate and remember argument location. 4190 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset, 4191 TailCallArguments); 4192 } 4193 } 4194 } 4195 4196 if (!MemOpChains.empty()) 4197 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 4198 4199 // Build a sequence of copy-to-reg nodes chained together with token chain 4200 // and flag operands which copy the outgoing args into the appropriate regs. 4201 SDValue InFlag; 4202 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 4203 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 4204 RegsToPass[i].second, InFlag); 4205 InFlag = Chain.getValue(1); 4206 } 4207 4208 // Set CR bit 6 to true if this is a vararg call with floating args passed in 4209 // registers. 4210 if (isVarArg) { 4211 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 4212 SDValue Ops[] = { Chain, InFlag }; 4213 4214 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, 4215 dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1)); 4216 4217 InFlag = Chain.getValue(1); 4218 } 4219 4220 if (isTailCall) 4221 PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp, 4222 false, TailCallArguments); 4223 4224 return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG, 4225 RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, 4226 Ins, InVals); 4227 } 4228 4229 // Copy an argument into memory, being careful to do this outside the 4230 // call sequence for the call to which the argument belongs. 4231 SDValue 4232 PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff, 4233 SDValue CallSeqStart, 4234 ISD::ArgFlagsTy Flags, 4235 SelectionDAG &DAG, 4236 SDLoc dl) const { 4237 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, 4238 CallSeqStart.getNode()->getOperand(0), 4239 Flags, DAG, dl); 4240 // The MEMCPY must go outside the CALLSEQ_START..END. 4241 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 4242 CallSeqStart.getNode()->getOperand(1), 4243 SDLoc(MemcpyCall)); 4244 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 4245 NewCallSeqStart.getNode()); 4246 return NewCallSeqStart; 4247 } 4248 4249 SDValue 4250 PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, 4251 CallingConv::ID CallConv, bool isVarArg, 4252 bool isTailCall, bool IsPatchPoint, 4253 const SmallVectorImpl<ISD::OutputArg> &Outs, 4254 const SmallVectorImpl<SDValue> &OutVals, 4255 const SmallVectorImpl<ISD::InputArg> &Ins, 4256 SDLoc dl, SelectionDAG &DAG, 4257 SmallVectorImpl<SDValue> &InVals) const { 4258 4259 bool isELFv2ABI = Subtarget.isELFv2ABI(); 4260 bool isLittleEndian = Subtarget.isLittleEndian(); 4261 unsigned NumOps = Outs.size(); 4262 4263 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4264 unsigned PtrByteSize = 8; 4265 4266 MachineFunction &MF = DAG.getMachineFunction(); 4267 4268 // Mark this function as potentially containing a function that contains a 4269 // tail call. As a consequence the frame pointer will be used for dynamicalloc 4270 // and restoring the callers stack pointer in this functions epilog. This is 4271 // done because by tail calling the called function might overwrite the value 4272 // in this function's (MF) stack pointer stack slot 0(SP). 4273 if (getTargetMachine().Options.GuaranteedTailCallOpt && 4274 CallConv == CallingConv::Fast) 4275 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 4276 4277 // Count how many bytes are to be pushed on the stack, including the linkage 4278 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes 4279 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage 4280 // area is 32 bytes reserved space for [SP][CR][LR][TOC]. 4281 unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false, 4282 isELFv2ABI); 4283 unsigned NumBytes = LinkageSize; 4284 4285 // Add up all the space actually used. 4286 for (unsigned i = 0; i != NumOps; ++i) { 4287 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4288 EVT ArgVT = Outs[i].VT; 4289 EVT OrigVT = Outs[i].ArgVT; 4290 4291 /* Respect alignment of argument on the stack. */ 4292 unsigned Align = 4293 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 4294 NumBytes = ((NumBytes + Align - 1) / Align) * Align; 4295 4296 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 4297 if (Flags.isInConsecutiveRegsLast()) 4298 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 4299 } 4300 4301 unsigned NumBytesActuallyUsed = NumBytes; 4302 4303 // The prolog code of the callee may store up to 8 GPR argument registers to 4304 // the stack, allowing va_start to index over them in memory if its varargs. 4305 // Because we cannot tell if this is needed on the caller side, we have to 4306 // conservatively assume that it is needed. As such, make sure we have at 4307 // least enough stack space for the caller to store the 8 GPRs. 4308 // FIXME: On ELFv2, it may be unnecessary to allocate the parameter area. 4309 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); 4310 4311 // Tail call needs the stack to be aligned. 4312 if (getTargetMachine().Options.GuaranteedTailCallOpt && 4313 CallConv == CallingConv::Fast) 4314 NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes); 4315 4316 // Calculate by how many bytes the stack has to be adjusted in case of tail 4317 // call optimization. 4318 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 4319 4320 // To protect arguments on the stack from being clobbered in a tail call, 4321 // force all the loads to happen before doing any other lowering. 4322 if (isTailCall) 4323 Chain = DAG.getStackArgumentTokenFactor(Chain); 4324 4325 // Adjust the stack pointer for the new arguments... 4326 // These operations are automatically eliminated by the prolog/epilog pass 4327 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), 4328 dl); 4329 SDValue CallSeqStart = Chain; 4330 4331 // Load the return address and frame pointer so it can be move somewhere else 4332 // later. 4333 SDValue LROp, FPOp; 4334 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true, 4335 dl); 4336 4337 // Set up a copy of the stack pointer for use loading and storing any 4338 // arguments that may not fit in the registers available for argument 4339 // passing. 4340 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 4341 4342 // Figure out which arguments are going to go in registers, and which in 4343 // memory. Also, if this is a vararg function, floating point operations 4344 // must be stored to our stack, and loaded into integer regs as well, if 4345 // any integer regs are available for argument passing. 4346 unsigned ArgOffset = LinkageSize; 4347 unsigned GPR_idx, FPR_idx = 0, VR_idx = 0; 4348 4349 static const MCPhysReg GPR[] = { 4350 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 4351 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 4352 }; 4353 static const MCPhysReg *FPR = GetFPR(); 4354 4355 static const MCPhysReg VR[] = { 4356 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 4357 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 4358 }; 4359 static const MCPhysReg VSRH[] = { 4360 PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8, 4361 PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13 4362 }; 4363 4364 const unsigned NumGPRs = array_lengthof(GPR); 4365 const unsigned NumFPRs = 13; 4366 const unsigned NumVRs = array_lengthof(VR); 4367 4368 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 4369 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 4370 4371 SmallVector<SDValue, 8> MemOpChains; 4372 for (unsigned i = 0; i != NumOps; ++i) { 4373 SDValue Arg = OutVals[i]; 4374 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4375 EVT ArgVT = Outs[i].VT; 4376 EVT OrigVT = Outs[i].ArgVT; 4377 4378 /* Respect alignment of argument on the stack. */ 4379 unsigned Align = 4380 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize); 4381 ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; 4382 4383 /* Compute GPR index associated with argument offset. */ 4384 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; 4385 GPR_idx = std::min(GPR_idx, NumGPRs); 4386 4387 // PtrOff will be used to store the current argument to the stack if a 4388 // register cannot be found for it. 4389 SDValue PtrOff; 4390 4391 PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); 4392 4393 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 4394 4395 // Promote integers to 64-bit values. 4396 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) { 4397 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 4398 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 4399 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 4400 } 4401 4402 // FIXME memcpy is used way more than necessary. Correctness first. 4403 // Note: "by value" is code for passing a structure by value, not 4404 // basic types. 4405 if (Flags.isByVal()) { 4406 // Note: Size includes alignment padding, so 4407 // struct x { short a; char b; } 4408 // will have Size = 4. With #pragma pack(1), it will have Size = 3. 4409 // These are the proper values we need for right-justifying the 4410 // aggregate in a parameter register. 4411 unsigned Size = Flags.getByValSize(); 4412 4413 // An empty aggregate parameter takes up no storage and no 4414 // registers. 4415 if (Size == 0) 4416 continue; 4417 4418 // All aggregates smaller than 8 bytes must be passed right-justified. 4419 if (Size==1 || Size==2 || Size==4) { 4420 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); 4421 if (GPR_idx != NumGPRs) { 4422 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 4423 MachinePointerInfo(), VT, 4424 false, false, false, 0); 4425 MemOpChains.push_back(Load.getValue(1)); 4426 RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load)); 4427 4428 ArgOffset += PtrByteSize; 4429 continue; 4430 } 4431 } 4432 4433 if (GPR_idx == NumGPRs && Size < 8) { 4434 SDValue AddPtr = PtrOff; 4435 if (!isLittleEndian) { 4436 SDValue Const = DAG.getConstant(PtrByteSize - Size, 4437 PtrOff.getValueType()); 4438 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 4439 } 4440 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 4441 CallSeqStart, 4442 Flags, DAG, dl); 4443 ArgOffset += PtrByteSize; 4444 continue; 4445 } 4446 // Copy entire object into memory. There are cases where gcc-generated 4447 // code assumes it is there, even if it could be put entirely into 4448 // registers. (This is not what the doc says.) 4449 4450 // FIXME: The above statement is likely due to a misunderstanding of the 4451 // documents. All arguments must be copied into the parameter area BY 4452 // THE CALLEE in the event that the callee takes the address of any 4453 // formal argument. That has not yet been implemented. However, it is 4454 // reasonable to use the stack area as a staging area for the register 4455 // load. 4456 4457 // Skip this for small aggregates, as we will use the same slot for a 4458 // right-justified copy, below. 4459 if (Size >= 8) 4460 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 4461 CallSeqStart, 4462 Flags, DAG, dl); 4463 4464 // When a register is available, pass a small aggregate right-justified. 4465 if (Size < 8 && GPR_idx != NumGPRs) { 4466 // The easiest way to get this right-justified in a register 4467 // is to copy the structure into the rightmost portion of a 4468 // local variable slot, then load the whole slot into the 4469 // register. 4470 // FIXME: The memcpy seems to produce pretty awful code for 4471 // small aggregates, particularly for packed ones. 4472 // FIXME: It would be preferable to use the slot in the 4473 // parameter save area instead of a new local variable. 4474 SDValue AddPtr = PtrOff; 4475 if (!isLittleEndian) { 4476 SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType()); 4477 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 4478 } 4479 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 4480 CallSeqStart, 4481 Flags, DAG, dl); 4482 4483 // Load the slot into the register. 4484 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff, 4485 MachinePointerInfo(), 4486 false, false, false, 0); 4487 MemOpChains.push_back(Load.getValue(1)); 4488 RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load)); 4489 4490 // Done with this argument. 4491 ArgOffset += PtrByteSize; 4492 continue; 4493 } 4494 4495 // For aggregates larger than PtrByteSize, copy the pieces of the 4496 // object that fit into registers from the parameter save area. 4497 for (unsigned j=0; j<Size; j+=PtrByteSize) { 4498 SDValue Const = DAG.getConstant(j, PtrOff.getValueType()); 4499 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 4500 if (GPR_idx != NumGPRs) { 4501 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 4502 MachinePointerInfo(), 4503 false, false, false, 0); 4504 MemOpChains.push_back(Load.getValue(1)); 4505 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4506 ArgOffset += PtrByteSize; 4507 } else { 4508 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 4509 break; 4510 } 4511 } 4512 continue; 4513 } 4514 4515 switch (Arg.getSimpleValueType().SimpleTy) { 4516 default: llvm_unreachable("Unexpected ValueType for argument!"); 4517 case MVT::i1: 4518 case MVT::i32: 4519 case MVT::i64: 4520 // These can be scalar arguments or elements of an integer array type 4521 // passed directly. Clang may use those instead of "byval" aggregate 4522 // types to avoid forcing arguments to memory unnecessarily. 4523 if (GPR_idx != NumGPRs) { 4524 RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Arg)); 4525 } else { 4526 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4527 true, isTailCall, false, MemOpChains, 4528 TailCallArguments, dl); 4529 } 4530 ArgOffset += PtrByteSize; 4531 break; 4532 case MVT::f32: 4533 case MVT::f64: { 4534 // These can be scalar arguments or elements of a float array type 4535 // passed directly. The latter are used to implement ELFv2 homogenous 4536 // float aggregates. 4537 4538 // Named arguments go into FPRs first, and once they overflow, the 4539 // remaining arguments go into GPRs and then the parameter save area. 4540 // Unnamed arguments for vararg functions always go to GPRs and 4541 // then the parameter save area. For now, put all arguments to vararg 4542 // routines always in both locations (FPR *and* GPR or stack slot). 4543 bool NeedGPROrStack = isVarArg || FPR_idx == NumFPRs; 4544 4545 // First load the argument into the next available FPR. 4546 if (FPR_idx != NumFPRs) 4547 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 4548 4549 // Next, load the argument into GPR or stack slot if needed. 4550 if (!NeedGPROrStack) 4551 ; 4552 else if (GPR_idx != NumGPRs) { 4553 // In the non-vararg case, this can only ever happen in the 4554 // presence of f32 array types, since otherwise we never run 4555 // out of FPRs before running out of GPRs. 4556 SDValue ArgVal; 4557 4558 // Double values are always passed in a single GPR. 4559 if (Arg.getValueType() != MVT::f32) { 4560 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); 4561 4562 // Non-array float values are extended and passed in a GPR. 4563 } else if (!Flags.isInConsecutiveRegs()) { 4564 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 4565 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); 4566 4567 // If we have an array of floats, we collect every odd element 4568 // together with its predecessor into one GPR. 4569 } else if (ArgOffset % PtrByteSize != 0) { 4570 SDValue Lo, Hi; 4571 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]); 4572 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 4573 if (!isLittleEndian) 4574 std::swap(Lo, Hi); 4575 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 4576 4577 // The final element, if even, goes into the first half of a GPR. 4578 } else if (Flags.isInConsecutiveRegsLast()) { 4579 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); 4580 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal); 4581 if (!isLittleEndian) 4582 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal, 4583 DAG.getConstant(32, MVT::i32)); 4584 4585 // Non-final even elements are skipped; they will be handled 4586 // together the with subsequent argument on the next go-around. 4587 } else 4588 ArgVal = SDValue(); 4589 4590 if (ArgVal.getNode()) 4591 RegsToPass.push_back(std::make_pair(GPR[GPR_idx], ArgVal)); 4592 } else { 4593 // Single-precision floating-point values are mapped to the 4594 // second (rightmost) word of the stack doubleword. 4595 if (Arg.getValueType() == MVT::f32 && 4596 !isLittleEndian && !Flags.isInConsecutiveRegs()) { 4597 SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); 4598 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 4599 } 4600 4601 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4602 true, isTailCall, false, MemOpChains, 4603 TailCallArguments, dl); 4604 } 4605 // When passing an array of floats, the array occupies consecutive 4606 // space in the argument area; only round up to the next doubleword 4607 // at the end of the array. Otherwise, each float takes 8 bytes. 4608 ArgOffset += (Arg.getValueType() == MVT::f32 && 4609 Flags.isInConsecutiveRegs()) ? 4 : 8; 4610 if (Flags.isInConsecutiveRegsLast()) 4611 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 4612 break; 4613 } 4614 case MVT::v4f32: 4615 case MVT::v4i32: 4616 case MVT::v8i16: 4617 case MVT::v16i8: 4618 case MVT::v2f64: 4619 case MVT::v2i64: 4620 // These can be scalar arguments or elements of a vector array type 4621 // passed directly. The latter are used to implement ELFv2 homogenous 4622 // vector aggregates. 4623 4624 // For a varargs call, named arguments go into VRs or on the stack as 4625 // usual; unnamed arguments always go to the stack or the corresponding 4626 // GPRs when within range. For now, we always put the value in both 4627 // locations (or even all three). 4628 if (isVarArg) { 4629 // We could elide this store in the case where the object fits 4630 // entirely in R registers. Maybe later. 4631 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 4632 MachinePointerInfo(), false, false, 0); 4633 MemOpChains.push_back(Store); 4634 if (VR_idx != NumVRs) { 4635 SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, 4636 MachinePointerInfo(), 4637 false, false, false, 0); 4638 MemOpChains.push_back(Load.getValue(1)); 4639 4640 unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 || 4641 Arg.getSimpleValueType() == MVT::v2i64) ? 4642 VSRH[VR_idx] : VR[VR_idx]; 4643 ++VR_idx; 4644 4645 RegsToPass.push_back(std::make_pair(VReg, Load)); 4646 } 4647 ArgOffset += 16; 4648 for (unsigned i=0; i<16; i+=PtrByteSize) { 4649 if (GPR_idx == NumGPRs) 4650 break; 4651 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 4652 DAG.getConstant(i, PtrVT)); 4653 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), 4654 false, false, false, 0); 4655 MemOpChains.push_back(Load.getValue(1)); 4656 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4657 } 4658 break; 4659 } 4660 4661 // Non-varargs Altivec params go into VRs or on the stack. 4662 if (VR_idx != NumVRs) { 4663 unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 || 4664 Arg.getSimpleValueType() == MVT::v2i64) ? 4665 VSRH[VR_idx] : VR[VR_idx]; 4666 ++VR_idx; 4667 4668 RegsToPass.push_back(std::make_pair(VReg, Arg)); 4669 } else { 4670 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4671 true, isTailCall, true, MemOpChains, 4672 TailCallArguments, dl); 4673 } 4674 ArgOffset += 16; 4675 break; 4676 } 4677 } 4678 4679 assert(NumBytesActuallyUsed == ArgOffset); 4680 (void)NumBytesActuallyUsed; 4681 4682 if (!MemOpChains.empty()) 4683 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 4684 4685 // Check if this is an indirect call (MTCTR/BCTRL). 4686 // See PrepareCall() for more information about calls through function 4687 // pointers in the 64-bit SVR4 ABI. 4688 if (!isTailCall && !IsPatchPoint && 4689 !isFunctionGlobalAddress(Callee) && 4690 !isa<ExternalSymbolSDNode>(Callee)) { 4691 // Load r2 into a virtual register and store it to the TOC save area. 4692 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); 4693 // TOC save area offset. 4694 unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI); 4695 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset); 4696 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 4697 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(), 4698 false, false, 0); 4699 // In the ELFv2 ABI, R12 must contain the address of an indirect callee. 4700 // This does not mean the MTCTR instruction must use R12; it's easier 4701 // to model this as an extra parameter, so do that. 4702 if (isELFv2ABI && !IsPatchPoint) 4703 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee)); 4704 } 4705 4706 // Build a sequence of copy-to-reg nodes chained together with token chain 4707 // and flag operands which copy the outgoing args into the appropriate regs. 4708 SDValue InFlag; 4709 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 4710 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 4711 RegsToPass[i].second, InFlag); 4712 InFlag = Chain.getValue(1); 4713 } 4714 4715 if (isTailCall) 4716 PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp, 4717 FPOp, true, TailCallArguments); 4718 4719 return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG, 4720 RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, 4721 Ins, InVals); 4722 } 4723 4724 SDValue 4725 PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, 4726 CallingConv::ID CallConv, bool isVarArg, 4727 bool isTailCall, bool IsPatchPoint, 4728 const SmallVectorImpl<ISD::OutputArg> &Outs, 4729 const SmallVectorImpl<SDValue> &OutVals, 4730 const SmallVectorImpl<ISD::InputArg> &Ins, 4731 SDLoc dl, SelectionDAG &DAG, 4732 SmallVectorImpl<SDValue> &InVals) const { 4733 4734 unsigned NumOps = Outs.size(); 4735 4736 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4737 bool isPPC64 = PtrVT == MVT::i64; 4738 unsigned PtrByteSize = isPPC64 ? 8 : 4; 4739 4740 MachineFunction &MF = DAG.getMachineFunction(); 4741 4742 // Mark this function as potentially containing a function that contains a 4743 // tail call. As a consequence the frame pointer will be used for dynamicalloc 4744 // and restoring the callers stack pointer in this functions epilog. This is 4745 // done because by tail calling the called function might overwrite the value 4746 // in this function's (MF) stack pointer stack slot 0(SP). 4747 if (getTargetMachine().Options.GuaranteedTailCallOpt && 4748 CallConv == CallingConv::Fast) 4749 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 4750 4751 // Count how many bytes are to be pushed on the stack, including the linkage 4752 // area, and parameter passing area. We start with 24/48 bytes, which is 4753 // prereserved space for [SP][CR][LR][3 x unused]. 4754 unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true, 4755 false); 4756 unsigned NumBytes = LinkageSize; 4757 4758 // Add up all the space actually used. 4759 // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually 4760 // they all go in registers, but we must reserve stack space for them for 4761 // possible use by the caller. In varargs or 64-bit calls, parameters are 4762 // assigned stack space in order, with padding so Altivec parameters are 4763 // 16-byte aligned. 4764 unsigned nAltivecParamsAtEnd = 0; 4765 for (unsigned i = 0; i != NumOps; ++i) { 4766 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4767 EVT ArgVT = Outs[i].VT; 4768 // Varargs Altivec parameters are padded to a 16 byte boundary. 4769 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || 4770 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || 4771 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) { 4772 if (!isVarArg && !isPPC64) { 4773 // Non-varargs Altivec parameters go after all the non-Altivec 4774 // parameters; handle those later so we know how much padding we need. 4775 nAltivecParamsAtEnd++; 4776 continue; 4777 } 4778 // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. 4779 NumBytes = ((NumBytes+15)/16)*16; 4780 } 4781 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 4782 } 4783 4784 // Allow for Altivec parameters at the end, if needed. 4785 if (nAltivecParamsAtEnd) { 4786 NumBytes = ((NumBytes+15)/16)*16; 4787 NumBytes += 16*nAltivecParamsAtEnd; 4788 } 4789 4790 // The prolog code of the callee may store up to 8 GPR argument registers to 4791 // the stack, allowing va_start to index over them in memory if its varargs. 4792 // Because we cannot tell if this is needed on the caller side, we have to 4793 // conservatively assume that it is needed. As such, make sure we have at 4794 // least enough stack space for the caller to store the 8 GPRs. 4795 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); 4796 4797 // Tail call needs the stack to be aligned. 4798 if (getTargetMachine().Options.GuaranteedTailCallOpt && 4799 CallConv == CallingConv::Fast) 4800 NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes); 4801 4802 // Calculate by how many bytes the stack has to be adjusted in case of tail 4803 // call optimization. 4804 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 4805 4806 // To protect arguments on the stack from being clobbered in a tail call, 4807 // force all the loads to happen before doing any other lowering. 4808 if (isTailCall) 4809 Chain = DAG.getStackArgumentTokenFactor(Chain); 4810 4811 // Adjust the stack pointer for the new arguments... 4812 // These operations are automatically eliminated by the prolog/epilog pass 4813 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), 4814 dl); 4815 SDValue CallSeqStart = Chain; 4816 4817 // Load the return address and frame pointer so it can be move somewhere else 4818 // later. 4819 SDValue LROp, FPOp; 4820 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true, 4821 dl); 4822 4823 // Set up a copy of the stack pointer for use loading and storing any 4824 // arguments that may not fit in the registers available for argument 4825 // passing. 4826 SDValue StackPtr; 4827 if (isPPC64) 4828 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 4829 else 4830 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 4831 4832 // Figure out which arguments are going to go in registers, and which in 4833 // memory. Also, if this is a vararg function, floating point operations 4834 // must be stored to our stack, and loaded into integer regs as well, if 4835 // any integer regs are available for argument passing. 4836 unsigned ArgOffset = LinkageSize; 4837 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 4838 4839 static const MCPhysReg GPR_32[] = { // 32-bit registers. 4840 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 4841 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 4842 }; 4843 static const MCPhysReg GPR_64[] = { // 64-bit registers. 4844 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 4845 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 4846 }; 4847 static const MCPhysReg *FPR = GetFPR(); 4848 4849 static const MCPhysReg VR[] = { 4850 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 4851 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 4852 }; 4853 const unsigned NumGPRs = array_lengthof(GPR_32); 4854 const unsigned NumFPRs = 13; 4855 const unsigned NumVRs = array_lengthof(VR); 4856 4857 const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32; 4858 4859 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 4860 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 4861 4862 SmallVector<SDValue, 8> MemOpChains; 4863 for (unsigned i = 0; i != NumOps; ++i) { 4864 SDValue Arg = OutVals[i]; 4865 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4866 4867 // PtrOff will be used to store the current argument to the stack if a 4868 // register cannot be found for it. 4869 SDValue PtrOff; 4870 4871 PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); 4872 4873 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 4874 4875 // On PPC64, promote integers to 64-bit values. 4876 if (isPPC64 && Arg.getValueType() == MVT::i32) { 4877 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 4878 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 4879 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 4880 } 4881 4882 // FIXME memcpy is used way more than necessary. Correctness first. 4883 // Note: "by value" is code for passing a structure by value, not 4884 // basic types. 4885 if (Flags.isByVal()) { 4886 unsigned Size = Flags.getByValSize(); 4887 // Very small objects are passed right-justified. Everything else is 4888 // passed left-justified. 4889 if (Size==1 || Size==2) { 4890 EVT VT = (Size==1) ? MVT::i8 : MVT::i16; 4891 if (GPR_idx != NumGPRs) { 4892 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 4893 MachinePointerInfo(), VT, 4894 false, false, false, 0); 4895 MemOpChains.push_back(Load.getValue(1)); 4896 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4897 4898 ArgOffset += PtrByteSize; 4899 } else { 4900 SDValue Const = DAG.getConstant(PtrByteSize - Size, 4901 PtrOff.getValueType()); 4902 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 4903 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 4904 CallSeqStart, 4905 Flags, DAG, dl); 4906 ArgOffset += PtrByteSize; 4907 } 4908 continue; 4909 } 4910 // Copy entire object into memory. There are cases where gcc-generated 4911 // code assumes it is there, even if it could be put entirely into 4912 // registers. (This is not what the doc says.) 4913 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 4914 CallSeqStart, 4915 Flags, DAG, dl); 4916 4917 // For small aggregates (Darwin only) and aggregates >= PtrByteSize, 4918 // copy the pieces of the object that fit into registers from the 4919 // parameter save area. 4920 for (unsigned j=0; j<Size; j+=PtrByteSize) { 4921 SDValue Const = DAG.getConstant(j, PtrOff.getValueType()); 4922 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 4923 if (GPR_idx != NumGPRs) { 4924 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 4925 MachinePointerInfo(), 4926 false, false, false, 0); 4927 MemOpChains.push_back(Load.getValue(1)); 4928 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4929 ArgOffset += PtrByteSize; 4930 } else { 4931 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 4932 break; 4933 } 4934 } 4935 continue; 4936 } 4937 4938 switch (Arg.getSimpleValueType().SimpleTy) { 4939 default: llvm_unreachable("Unexpected ValueType for argument!"); 4940 case MVT::i1: 4941 case MVT::i32: 4942 case MVT::i64: 4943 if (GPR_idx != NumGPRs) { 4944 if (Arg.getValueType() == MVT::i1) 4945 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, PtrVT, Arg); 4946 4947 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 4948 } else { 4949 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4950 isPPC64, isTailCall, false, MemOpChains, 4951 TailCallArguments, dl); 4952 } 4953 ArgOffset += PtrByteSize; 4954 break; 4955 case MVT::f32: 4956 case MVT::f64: 4957 if (FPR_idx != NumFPRs) { 4958 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 4959 4960 if (isVarArg) { 4961 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 4962 MachinePointerInfo(), false, false, 0); 4963 MemOpChains.push_back(Store); 4964 4965 // Float varargs are always shadowed in available integer registers 4966 if (GPR_idx != NumGPRs) { 4967 SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, 4968 MachinePointerInfo(), false, false, 4969 false, 0); 4970 MemOpChains.push_back(Load.getValue(1)); 4971 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4972 } 4973 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){ 4974 SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); 4975 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 4976 SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, 4977 MachinePointerInfo(), 4978 false, false, false, 0); 4979 MemOpChains.push_back(Load.getValue(1)); 4980 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4981 } 4982 } else { 4983 // If we have any FPRs remaining, we may also have GPRs remaining. 4984 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available 4985 // GPRs. 4986 if (GPR_idx != NumGPRs) 4987 ++GPR_idx; 4988 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && 4989 !isPPC64) // PPC64 has 64-bit GPR's obviously :) 4990 ++GPR_idx; 4991 } 4992 } else 4993 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4994 isPPC64, isTailCall, false, MemOpChains, 4995 TailCallArguments, dl); 4996 if (isPPC64) 4997 ArgOffset += 8; 4998 else 4999 ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8; 5000 break; 5001 case MVT::v4f32: 5002 case MVT::v4i32: 5003 case MVT::v8i16: 5004 case MVT::v16i8: 5005 if (isVarArg) { 5006 // These go aligned on the stack, or in the corresponding R registers 5007 // when within range. The Darwin PPC ABI doc claims they also go in 5008 // V registers; in fact gcc does this only for arguments that are 5009 // prototyped, not for those that match the ... We do it for all 5010 // arguments, seems to work. 5011 while (ArgOffset % 16 !=0) { 5012 ArgOffset += PtrByteSize; 5013 if (GPR_idx != NumGPRs) 5014 GPR_idx++; 5015 } 5016 // We could elide this store in the case where the object fits 5017 // entirely in R registers. Maybe later. 5018 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 5019 DAG.getConstant(ArgOffset, PtrVT)); 5020 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 5021 MachinePointerInfo(), false, false, 0); 5022 MemOpChains.push_back(Store); 5023 if (VR_idx != NumVRs) { 5024 SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, 5025 MachinePointerInfo(), 5026 false, false, false, 0); 5027 MemOpChains.push_back(Load.getValue(1)); 5028 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); 5029 } 5030 ArgOffset += 16; 5031 for (unsigned i=0; i<16; i+=PtrByteSize) { 5032 if (GPR_idx == NumGPRs) 5033 break; 5034 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 5035 DAG.getConstant(i, PtrVT)); 5036 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), 5037 false, false, false, 0); 5038 MemOpChains.push_back(Load.getValue(1)); 5039 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 5040 } 5041 break; 5042 } 5043 5044 // Non-varargs Altivec params generally go in registers, but have 5045 // stack space allocated at the end. 5046 if (VR_idx != NumVRs) { 5047 // Doesn't have GPR space allocated. 5048 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 5049 } else if (nAltivecParamsAtEnd==0) { 5050 // We are emitting Altivec params in order. 5051 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5052 isPPC64, isTailCall, true, MemOpChains, 5053 TailCallArguments, dl); 5054 ArgOffset += 16; 5055 } 5056 break; 5057 } 5058 } 5059 // If all Altivec parameters fit in registers, as they usually do, 5060 // they get stack space following the non-Altivec parameters. We 5061 // don't track this here because nobody below needs it. 5062 // If there are more Altivec parameters than fit in registers emit 5063 // the stores here. 5064 if (!isVarArg && nAltivecParamsAtEnd > NumVRs) { 5065 unsigned j = 0; 5066 // Offset is aligned; skip 1st 12 params which go in V registers. 5067 ArgOffset = ((ArgOffset+15)/16)*16; 5068 ArgOffset += 12*16; 5069 for (unsigned i = 0; i != NumOps; ++i) { 5070 SDValue Arg = OutVals[i]; 5071 EVT ArgType = Outs[i].VT; 5072 if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 || 5073 ArgType==MVT::v8i16 || ArgType==MVT::v16i8) { 5074 if (++j > NumVRs) { 5075 SDValue PtrOff; 5076 // We are emitting Altivec params in order. 5077 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 5078 isPPC64, isTailCall, true, MemOpChains, 5079 TailCallArguments, dl); 5080 ArgOffset += 16; 5081 } 5082 } 5083 } 5084 } 5085 5086 if (!MemOpChains.empty()) 5087 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); 5088 5089 // On Darwin, R12 must contain the address of an indirect callee. This does 5090 // not mean the MTCTR instruction must use R12; it's easier to model this as 5091 // an extra parameter, so do that. 5092 if (!isTailCall && 5093 !isFunctionGlobalAddress(Callee) && 5094 !isa<ExternalSymbolSDNode>(Callee) && 5095 !isBLACompatibleAddress(Callee, DAG)) 5096 RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 : 5097 PPC::R12), Callee)); 5098 5099 // Build a sequence of copy-to-reg nodes chained together with token chain 5100 // and flag operands which copy the outgoing args into the appropriate regs. 5101 SDValue InFlag; 5102 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 5103 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 5104 RegsToPass[i].second, InFlag); 5105 InFlag = Chain.getValue(1); 5106 } 5107 5108 if (isTailCall) 5109 PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp, 5110 FPOp, true, TailCallArguments); 5111 5112 return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG, 5113 RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, 5114 Ins, InVals); 5115 } 5116 5117 bool 5118 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv, 5119 MachineFunction &MF, bool isVarArg, 5120 const SmallVectorImpl<ISD::OutputArg> &Outs, 5121 LLVMContext &Context) const { 5122 SmallVector<CCValAssign, 16> RVLocs; 5123 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); 5124 return CCInfo.CheckReturn(Outs, RetCC_PPC); 5125 } 5126 5127 SDValue 5128 PPCTargetLowering::LowerReturn(SDValue Chain, 5129 CallingConv::ID CallConv, bool isVarArg, 5130 const SmallVectorImpl<ISD::OutputArg> &Outs, 5131 const SmallVectorImpl<SDValue> &OutVals, 5132 SDLoc dl, SelectionDAG &DAG) const { 5133 5134 SmallVector<CCValAssign, 16> RVLocs; 5135 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, 5136 *DAG.getContext()); 5137 CCInfo.AnalyzeReturn(Outs, RetCC_PPC); 5138 5139 SDValue Flag; 5140 SmallVector<SDValue, 4> RetOps(1, Chain); 5141 5142 // Copy the result values into the output registers. 5143 for (unsigned i = 0; i != RVLocs.size(); ++i) { 5144 CCValAssign &VA = RVLocs[i]; 5145 assert(VA.isRegLoc() && "Can only return in registers!"); 5146 5147 SDValue Arg = OutVals[i]; 5148 5149 switch (VA.getLocInfo()) { 5150 default: llvm_unreachable("Unknown loc info!"); 5151 case CCValAssign::Full: break; 5152 case CCValAssign::AExt: 5153 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 5154 break; 5155 case CCValAssign::ZExt: 5156 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 5157 break; 5158 case CCValAssign::SExt: 5159 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 5160 break; 5161 } 5162 5163 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 5164 Flag = Chain.getValue(1); 5165 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 5166 } 5167 5168 RetOps[0] = Chain; // Update chain. 5169 5170 // Add the flag if we have it. 5171 if (Flag.getNode()) 5172 RetOps.push_back(Flag); 5173 5174 return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps); 5175 } 5176 5177 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, 5178 const PPCSubtarget &Subtarget) const { 5179 // When we pop the dynamic allocation we need to restore the SP link. 5180 SDLoc dl(Op); 5181 5182 // Get the corect type for pointers. 5183 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5184 5185 // Construct the stack pointer operand. 5186 bool isPPC64 = Subtarget.isPPC64(); 5187 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1; 5188 SDValue StackPtr = DAG.getRegister(SP, PtrVT); 5189 5190 // Get the operands for the STACKRESTORE. 5191 SDValue Chain = Op.getOperand(0); 5192 SDValue SaveSP = Op.getOperand(1); 5193 5194 // Load the old link SP. 5195 SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, 5196 MachinePointerInfo(), 5197 false, false, false, 0); 5198 5199 // Restore the stack pointer. 5200 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP); 5201 5202 // Store the old link SP. 5203 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(), 5204 false, false, 0); 5205 } 5206 5207 5208 5209 SDValue 5210 PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { 5211 MachineFunction &MF = DAG.getMachineFunction(); 5212 bool isPPC64 = Subtarget.isPPC64(); 5213 bool isDarwinABI = Subtarget.isDarwinABI(); 5214 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5215 5216 // Get current frame pointer save index. The users of this index will be 5217 // primarily DYNALLOC instructions. 5218 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 5219 int RASI = FI->getReturnAddrSaveIndex(); 5220 5221 // If the frame pointer save index hasn't been defined yet. 5222 if (!RASI) { 5223 // Find out what the fix offset of the frame pointer save area. 5224 int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI); 5225 // Allocate the frame index for frame pointer save area. 5226 RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, false); 5227 // Save the result. 5228 FI->setReturnAddrSaveIndex(RASI); 5229 } 5230 return DAG.getFrameIndex(RASI, PtrVT); 5231 } 5232 5233 SDValue 5234 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { 5235 MachineFunction &MF = DAG.getMachineFunction(); 5236 bool isPPC64 = Subtarget.isPPC64(); 5237 bool isDarwinABI = Subtarget.isDarwinABI(); 5238 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5239 5240 // Get current frame pointer save index. The users of this index will be 5241 // primarily DYNALLOC instructions. 5242 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 5243 int FPSI = FI->getFramePointerSaveIndex(); 5244 5245 // If the frame pointer save index hasn't been defined yet. 5246 if (!FPSI) { 5247 // Find out what the fix offset of the frame pointer save area. 5248 int FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, 5249 isDarwinABI); 5250 5251 // Allocate the frame index for frame pointer save area. 5252 FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 5253 // Save the result. 5254 FI->setFramePointerSaveIndex(FPSI); 5255 } 5256 return DAG.getFrameIndex(FPSI, PtrVT); 5257 } 5258 5259 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 5260 SelectionDAG &DAG, 5261 const PPCSubtarget &Subtarget) const { 5262 // Get the inputs. 5263 SDValue Chain = Op.getOperand(0); 5264 SDValue Size = Op.getOperand(1); 5265 SDLoc dl(Op); 5266 5267 // Get the corect type for pointers. 5268 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5269 // Negate the size. 5270 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT, 5271 DAG.getConstant(0, PtrVT), Size); 5272 // Construct a node for the frame pointer save index. 5273 SDValue FPSIdx = getFramePointerFrameIndex(DAG); 5274 // Build a DYNALLOC node. 5275 SDValue Ops[3] = { Chain, NegSize, FPSIdx }; 5276 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); 5277 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops); 5278 } 5279 5280 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, 5281 SelectionDAG &DAG) const { 5282 SDLoc DL(Op); 5283 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL, 5284 DAG.getVTList(MVT::i32, MVT::Other), 5285 Op.getOperand(0), Op.getOperand(1)); 5286 } 5287 5288 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, 5289 SelectionDAG &DAG) const { 5290 SDLoc DL(Op); 5291 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other, 5292 Op.getOperand(0), Op.getOperand(1)); 5293 } 5294 5295 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { 5296 assert(Op.getValueType() == MVT::i1 && 5297 "Custom lowering only for i1 loads"); 5298 5299 // First, load 8 bits into 32 bits, then truncate to 1 bit. 5300 5301 SDLoc dl(Op); 5302 LoadSDNode *LD = cast<LoadSDNode>(Op); 5303 5304 SDValue Chain = LD->getChain(); 5305 SDValue BasePtr = LD->getBasePtr(); 5306 MachineMemOperand *MMO = LD->getMemOperand(); 5307 5308 SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(), Chain, 5309 BasePtr, MVT::i8, MMO); 5310 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD); 5311 5312 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) }; 5313 return DAG.getMergeValues(Ops, dl); 5314 } 5315 5316 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 5317 assert(Op.getOperand(1).getValueType() == MVT::i1 && 5318 "Custom lowering only for i1 stores"); 5319 5320 // First, zero extend to 32 bits, then use a truncating store to 8 bits. 5321 5322 SDLoc dl(Op); 5323 StoreSDNode *ST = cast<StoreSDNode>(Op); 5324 5325 SDValue Chain = ST->getChain(); 5326 SDValue BasePtr = ST->getBasePtr(); 5327 SDValue Value = ST->getValue(); 5328 MachineMemOperand *MMO = ST->getMemOperand(); 5329 5330 Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(), Value); 5331 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO); 5332 } 5333 5334 // FIXME: Remove this once the ANDI glue bug is fixed: 5335 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { 5336 assert(Op.getValueType() == MVT::i1 && 5337 "Custom lowering only for i1 results"); 5338 5339 SDLoc DL(Op); 5340 return DAG.getNode(PPCISD::ANDIo_1_GT_BIT, DL, MVT::i1, 5341 Op.getOperand(0)); 5342 } 5343 5344 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when 5345 /// possible. 5346 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 5347 // Not FP? Not a fsel. 5348 if (!Op.getOperand(0).getValueType().isFloatingPoint() || 5349 !Op.getOperand(2).getValueType().isFloatingPoint()) 5350 return Op; 5351 5352 // We might be able to do better than this under some circumstances, but in 5353 // general, fsel-based lowering of select is a finite-math-only optimization. 5354 // For more information, see section F.3 of the 2.06 ISA specification. 5355 if (!DAG.getTarget().Options.NoInfsFPMath || 5356 !DAG.getTarget().Options.NoNaNsFPMath) 5357 return Op; 5358 5359 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 5360 5361 EVT ResVT = Op.getValueType(); 5362 EVT CmpVT = Op.getOperand(0).getValueType(); 5363 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 5364 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); 5365 SDLoc dl(Op); 5366 5367 // If the RHS of the comparison is a 0.0, we don't need to do the 5368 // subtraction at all. 5369 SDValue Sel1; 5370 if (isFloatingPointZero(RHS)) 5371 switch (CC) { 5372 default: break; // SETUO etc aren't handled by fsel. 5373 case ISD::SETNE: 5374 std::swap(TV, FV); 5375 case ISD::SETEQ: 5376 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 5377 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 5378 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 5379 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits 5380 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); 5381 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 5382 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV); 5383 case ISD::SETULT: 5384 case ISD::SETLT: 5385 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 5386 case ISD::SETOGE: 5387 case ISD::SETGE: 5388 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 5389 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 5390 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 5391 case ISD::SETUGT: 5392 case ISD::SETGT: 5393 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 5394 case ISD::SETOLE: 5395 case ISD::SETLE: 5396 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 5397 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 5398 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 5399 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV); 5400 } 5401 5402 SDValue Cmp; 5403 switch (CC) { 5404 default: break; // SETUO etc aren't handled by fsel. 5405 case ISD::SETNE: 5406 std::swap(TV, FV); 5407 case ISD::SETEQ: 5408 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); 5409 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 5410 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 5411 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 5412 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits 5413 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); 5414 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 5415 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV); 5416 case ISD::SETULT: 5417 case ISD::SETLT: 5418 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); 5419 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 5420 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 5421 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 5422 case ISD::SETOGE: 5423 case ISD::SETGE: 5424 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); 5425 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 5426 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 5427 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 5428 case ISD::SETUGT: 5429 case ISD::SETGT: 5430 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); 5431 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 5432 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 5433 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 5434 case ISD::SETOLE: 5435 case ISD::SETLE: 5436 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); 5437 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 5438 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 5439 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 5440 } 5441 return Op; 5442 } 5443 5444 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, 5445 SelectionDAG &DAG, 5446 SDLoc dl) const { 5447 assert(Op.getOperand(0).getValueType().isFloatingPoint()); 5448 SDValue Src = Op.getOperand(0); 5449 if (Src.getValueType() == MVT::f32) 5450 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 5451 5452 SDValue Tmp; 5453 switch (Op.getSimpleValueType().SimpleTy) { 5454 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); 5455 case MVT::i32: 5456 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ : 5457 (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : 5458 PPCISD::FCTIDZ), 5459 dl, MVT::f64, Src); 5460 break; 5461 case MVT::i64: 5462 assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && 5463 "i64 FP_TO_UINT is supported only with FPCVT"); 5464 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 5465 PPCISD::FCTIDUZ, 5466 dl, MVT::f64, Src); 5467 break; 5468 } 5469 5470 // Convert the FP value to an int value through memory. 5471 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() && 5472 (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()); 5473 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64); 5474 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex(); 5475 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI); 5476 5477 // Emit a store to the stack slot. 5478 SDValue Chain; 5479 if (i32Stack) { 5480 MachineFunction &MF = DAG.getMachineFunction(); 5481 MachineMemOperand *MMO = 5482 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4); 5483 SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr }; 5484 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, 5485 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO); 5486 } else 5487 Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, 5488 MPI, false, false, 0); 5489 5490 // Result is a load from the stack slot. If loading 4 bytes, make sure to 5491 // add in a bias. 5492 if (Op.getValueType() == MVT::i32 && !i32Stack) { 5493 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, 5494 DAG.getConstant(4, FIPtr.getValueType())); 5495 MPI = MPI.getWithOffset(4); 5496 } 5497 5498 RLI.Chain = Chain; 5499 RLI.Ptr = FIPtr; 5500 RLI.MPI = MPI; 5501 } 5502 5503 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, 5504 SDLoc dl) const { 5505 ReuseLoadInfo RLI; 5506 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); 5507 5508 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI, false, 5509 false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo, 5510 RLI.Ranges); 5511 } 5512 5513 // We're trying to insert a regular store, S, and then a load, L. If the 5514 // incoming value, O, is a load, we might just be able to have our load use the 5515 // address used by O. However, we don't know if anything else will store to 5516 // that address before we can load from it. To prevent this situation, we need 5517 // to insert our load, L, into the chain as a peer of O. To do this, we give L 5518 // the same chain operand as O, we create a token factor from the chain results 5519 // of O and L, and we replace all uses of O's chain result with that token 5520 // factor (see spliceIntoChain below for this last part). 5521 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT, 5522 ReuseLoadInfo &RLI, 5523 SelectionDAG &DAG, 5524 ISD::LoadExtType ET) const { 5525 SDLoc dl(Op); 5526 if (ET == ISD::NON_EXTLOAD && 5527 (Op.getOpcode() == ISD::FP_TO_UINT || 5528 Op.getOpcode() == ISD::FP_TO_SINT) && 5529 isOperationLegalOrCustom(Op.getOpcode(), 5530 Op.getOperand(0).getValueType())) { 5531 5532 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl); 5533 return true; 5534 } 5535 5536 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op); 5537 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() || 5538 LD->isNonTemporal()) 5539 return false; 5540 if (LD->getMemoryVT() != MemVT) 5541 return false; 5542 5543 RLI.Ptr = LD->getBasePtr(); 5544 if (LD->isIndexed() && LD->getOffset().getOpcode() != ISD::UNDEF) { 5545 assert(LD->getAddressingMode() == ISD::PRE_INC && 5546 "Non-pre-inc AM on PPC?"); 5547 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr, 5548 LD->getOffset()); 5549 } 5550 5551 RLI.Chain = LD->getChain(); 5552 RLI.MPI = LD->getPointerInfo(); 5553 RLI.IsInvariant = LD->isInvariant(); 5554 RLI.Alignment = LD->getAlignment(); 5555 RLI.AAInfo = LD->getAAInfo(); 5556 RLI.Ranges = LD->getRanges(); 5557 5558 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1); 5559 return true; 5560 } 5561 5562 // Given the head of the old chain, ResChain, insert a token factor containing 5563 // it and NewResChain, and make users of ResChain now be users of that token 5564 // factor. 5565 void PPCTargetLowering::spliceIntoChain(SDValue ResChain, 5566 SDValue NewResChain, 5567 SelectionDAG &DAG) const { 5568 if (!ResChain) 5569 return; 5570 5571 SDLoc dl(NewResChain); 5572 5573 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 5574 NewResChain, DAG.getUNDEF(MVT::Other)); 5575 assert(TF.getNode() != NewResChain.getNode() && 5576 "A new TF really is required here"); 5577 5578 DAG.ReplaceAllUsesOfValueWith(ResChain, TF); 5579 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain); 5580 } 5581 5582 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, 5583 SelectionDAG &DAG) const { 5584 SDLoc dl(Op); 5585 // Don't handle ppc_fp128 here; let it be lowered to a libcall. 5586 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) 5587 return SDValue(); 5588 5589 if (Op.getOperand(0).getValueType() == MVT::i1) 5590 return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Op.getOperand(0), 5591 DAG.getConstantFP(1.0, Op.getValueType()), 5592 DAG.getConstantFP(0.0, Op.getValueType())); 5593 5594 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && 5595 "UINT_TO_FP is supported only with FPCVT"); 5596 5597 // If we have FCFIDS, then use it when converting to single-precision. 5598 // Otherwise, convert to double-precision and then round. 5599 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? 5600 (Op.getOpcode() == ISD::UINT_TO_FP ? 5601 PPCISD::FCFIDUS : PPCISD::FCFIDS) : 5602 (Op.getOpcode() == ISD::UINT_TO_FP ? 5603 PPCISD::FCFIDU : PPCISD::FCFID); 5604 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? 5605 MVT::f32 : MVT::f64; 5606 5607 if (Op.getOperand(0).getValueType() == MVT::i64) { 5608 SDValue SINT = Op.getOperand(0); 5609 // When converting to single-precision, we actually need to convert 5610 // to double-precision first and then round to single-precision. 5611 // To avoid double-rounding effects during that operation, we have 5612 // to prepare the input operand. Bits that might be truncated when 5613 // converting to double-precision are replaced by a bit that won't 5614 // be lost at this stage, but is below the single-precision rounding 5615 // position. 5616 // 5617 // However, if -enable-unsafe-fp-math is in effect, accept double 5618 // rounding to avoid the extra overhead. 5619 if (Op.getValueType() == MVT::f32 && 5620 !Subtarget.hasFPCVT() && 5621 !DAG.getTarget().Options.UnsafeFPMath) { 5622 5623 // Twiddle input to make sure the low 11 bits are zero. (If this 5624 // is the case, we are guaranteed the value will fit into the 53 bit 5625 // mantissa of an IEEE double-precision value without rounding.) 5626 // If any of those low 11 bits were not zero originally, make sure 5627 // bit 12 (value 2048) is set instead, so that the final rounding 5628 // to single-precision gets the correct result. 5629 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64, 5630 SINT, DAG.getConstant(2047, MVT::i64)); 5631 Round = DAG.getNode(ISD::ADD, dl, MVT::i64, 5632 Round, DAG.getConstant(2047, MVT::i64)); 5633 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT); 5634 Round = DAG.getNode(ISD::AND, dl, MVT::i64, 5635 Round, DAG.getConstant(-2048, MVT::i64)); 5636 5637 // However, we cannot use that value unconditionally: if the magnitude 5638 // of the input value is small, the bit-twiddling we did above might 5639 // end up visibly changing the output. Fortunately, in that case, we 5640 // don't need to twiddle bits since the original input will convert 5641 // exactly to double-precision floating-point already. Therefore, 5642 // construct a conditional to use the original value if the top 11 5643 // bits are all sign-bit copies, and use the rounded value computed 5644 // above otherwise. 5645 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64, 5646 SINT, DAG.getConstant(53, MVT::i32)); 5647 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64, 5648 Cond, DAG.getConstant(1, MVT::i64)); 5649 Cond = DAG.getSetCC(dl, MVT::i32, 5650 Cond, DAG.getConstant(1, MVT::i64), ISD::SETUGT); 5651 5652 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT); 5653 } 5654 5655 ReuseLoadInfo RLI; 5656 SDValue Bits; 5657 5658 MachineFunction &MF = DAG.getMachineFunction(); 5659 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) { 5660 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, false, 5661 false, RLI.IsInvariant, RLI.Alignment, RLI.AAInfo, 5662 RLI.Ranges); 5663 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); 5664 } else if (Subtarget.hasLFIWAX() && 5665 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) { 5666 MachineMemOperand *MMO = 5667 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 5668 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 5669 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 5670 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl, 5671 DAG.getVTList(MVT::f64, MVT::Other), 5672 Ops, MVT::i32, MMO); 5673 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); 5674 } else if (Subtarget.hasFPCVT() && 5675 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) { 5676 MachineMemOperand *MMO = 5677 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 5678 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 5679 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 5680 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl, 5681 DAG.getVTList(MVT::f64, MVT::Other), 5682 Ops, MVT::i32, MMO); 5683 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG); 5684 } else if (((Subtarget.hasLFIWAX() && 5685 SINT.getOpcode() == ISD::SIGN_EXTEND) || 5686 (Subtarget.hasFPCVT() && 5687 SINT.getOpcode() == ISD::ZERO_EXTEND)) && 5688 SINT.getOperand(0).getValueType() == MVT::i32) { 5689 MachineFrameInfo *FrameInfo = MF.getFrameInfo(); 5690 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5691 5692 int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); 5693 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 5694 5695 SDValue Store = 5696 DAG.getStore(DAG.getEntryNode(), dl, SINT.getOperand(0), FIdx, 5697 MachinePointerInfo::getFixedStack(FrameIdx), 5698 false, false, 0); 5699 5700 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && 5701 "Expected an i32 store"); 5702 5703 RLI.Ptr = FIdx; 5704 RLI.Chain = Store; 5705 RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx); 5706 RLI.Alignment = 4; 5707 5708 MachineMemOperand *MMO = 5709 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 5710 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 5711 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 5712 Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ? 5713 PPCISD::LFIWZX : PPCISD::LFIWAX, 5714 dl, DAG.getVTList(MVT::f64, MVT::Other), 5715 Ops, MVT::i32, MMO); 5716 } else 5717 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); 5718 5719 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits); 5720 5721 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) 5722 FP = DAG.getNode(ISD::FP_ROUND, dl, 5723 MVT::f32, FP, DAG.getIntPtrConstant(0)); 5724 return FP; 5725 } 5726 5727 assert(Op.getOperand(0).getValueType() == MVT::i32 && 5728 "Unhandled INT_TO_FP type in custom expander!"); 5729 // Since we only generate this in 64-bit mode, we can take advantage of 5730 // 64-bit registers. In particular, sign extend the input value into the 5731 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack 5732 // then lfd it and fcfid it. 5733 MachineFunction &MF = DAG.getMachineFunction(); 5734 MachineFrameInfo *FrameInfo = MF.getFrameInfo(); 5735 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5736 5737 SDValue Ld; 5738 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) { 5739 ReuseLoadInfo RLI; 5740 bool ReusingLoad; 5741 if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI, 5742 DAG))) { 5743 int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); 5744 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 5745 5746 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, 5747 MachinePointerInfo::getFixedStack(FrameIdx), 5748 false, false, 0); 5749 5750 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && 5751 "Expected an i32 store"); 5752 5753 RLI.Ptr = FIdx; 5754 RLI.Chain = Store; 5755 RLI.MPI = MachinePointerInfo::getFixedStack(FrameIdx); 5756 RLI.Alignment = 4; 5757 } 5758 5759 MachineMemOperand *MMO = 5760 MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4, 5761 RLI.Alignment, RLI.AAInfo, RLI.Ranges); 5762 SDValue Ops[] = { RLI.Chain, RLI.Ptr }; 5763 Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ? 5764 PPCISD::LFIWZX : PPCISD::LFIWAX, 5765 dl, DAG.getVTList(MVT::f64, MVT::Other), 5766 Ops, MVT::i32, MMO); 5767 if (ReusingLoad) 5768 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG); 5769 } else { 5770 assert(Subtarget.isPPC64() && 5771 "i32->FP without LFIWAX supported only on PPC64"); 5772 5773 int FrameIdx = FrameInfo->CreateStackObject(8, 8, false); 5774 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 5775 5776 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, 5777 Op.getOperand(0)); 5778 5779 // STD the extended value into the stack slot. 5780 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx, 5781 MachinePointerInfo::getFixedStack(FrameIdx), 5782 false, false, 0); 5783 5784 // Load the value as a double. 5785 Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, 5786 MachinePointerInfo::getFixedStack(FrameIdx), 5787 false, false, false, 0); 5788 } 5789 5790 // FCFID it and return it. 5791 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld); 5792 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) 5793 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0)); 5794 return FP; 5795 } 5796 5797 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 5798 SelectionDAG &DAG) const { 5799 SDLoc dl(Op); 5800 /* 5801 The rounding mode is in bits 30:31 of FPSR, and has the following 5802 settings: 5803 00 Round to nearest 5804 01 Round to 0 5805 10 Round to +inf 5806 11 Round to -inf 5807 5808 FLT_ROUNDS, on the other hand, expects the following: 5809 -1 Undefined 5810 0 Round to 0 5811 1 Round to nearest 5812 2 Round to +inf 5813 3 Round to -inf 5814 5815 To perform the conversion, we do: 5816 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1)) 5817 */ 5818 5819 MachineFunction &MF = DAG.getMachineFunction(); 5820 EVT VT = Op.getValueType(); 5821 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5822 5823 // Save FP Control Word to register 5824 EVT NodeTys[] = { 5825 MVT::f64, // return register 5826 MVT::Glue // unused in this context 5827 }; 5828 SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None); 5829 5830 // Save FP register to stack slot 5831 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false); 5832 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); 5833 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, 5834 StackSlot, MachinePointerInfo(), false, false,0); 5835 5836 // Load FP Control Word from low 32 bits of stack slot. 5837 SDValue Four = DAG.getConstant(4, PtrVT); 5838 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); 5839 SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(), 5840 false, false, false, 0); 5841 5842 // Transform as necessary 5843 SDValue CWD1 = 5844 DAG.getNode(ISD::AND, dl, MVT::i32, 5845 CWD, DAG.getConstant(3, MVT::i32)); 5846 SDValue CWD2 = 5847 DAG.getNode(ISD::SRL, dl, MVT::i32, 5848 DAG.getNode(ISD::AND, dl, MVT::i32, 5849 DAG.getNode(ISD::XOR, dl, MVT::i32, 5850 CWD, DAG.getConstant(3, MVT::i32)), 5851 DAG.getConstant(3, MVT::i32)), 5852 DAG.getConstant(1, MVT::i32)); 5853 5854 SDValue RetVal = 5855 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2); 5856 5857 return DAG.getNode((VT.getSizeInBits() < 16 ? 5858 ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); 5859 } 5860 5861 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { 5862 EVT VT = Op.getValueType(); 5863 unsigned BitWidth = VT.getSizeInBits(); 5864 SDLoc dl(Op); 5865 assert(Op.getNumOperands() == 3 && 5866 VT == Op.getOperand(1).getValueType() && 5867 "Unexpected SHL!"); 5868 5869 // Expand into a bunch of logical ops. Note that these ops 5870 // depend on the PPC behavior for oversized shift amounts. 5871 SDValue Lo = Op.getOperand(0); 5872 SDValue Hi = Op.getOperand(1); 5873 SDValue Amt = Op.getOperand(2); 5874 EVT AmtVT = Amt.getValueType(); 5875 5876 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 5877 DAG.getConstant(BitWidth, AmtVT), Amt); 5878 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt); 5879 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1); 5880 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3); 5881 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 5882 DAG.getConstant(-BitWidth, AmtVT)); 5883 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5); 5884 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 5885 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt); 5886 SDValue OutOps[] = { OutLo, OutHi }; 5887 return DAG.getMergeValues(OutOps, dl); 5888 } 5889 5890 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { 5891 EVT VT = Op.getValueType(); 5892 SDLoc dl(Op); 5893 unsigned BitWidth = VT.getSizeInBits(); 5894 assert(Op.getNumOperands() == 3 && 5895 VT == Op.getOperand(1).getValueType() && 5896 "Unexpected SRL!"); 5897 5898 // Expand into a bunch of logical ops. Note that these ops 5899 // depend on the PPC behavior for oversized shift amounts. 5900 SDValue Lo = Op.getOperand(0); 5901 SDValue Hi = Op.getOperand(1); 5902 SDValue Amt = Op.getOperand(2); 5903 EVT AmtVT = Amt.getValueType(); 5904 5905 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 5906 DAG.getConstant(BitWidth, AmtVT), Amt); 5907 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 5908 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 5909 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 5910 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 5911 DAG.getConstant(-BitWidth, AmtVT)); 5912 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5); 5913 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 5914 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt); 5915 SDValue OutOps[] = { OutLo, OutHi }; 5916 return DAG.getMergeValues(OutOps, dl); 5917 } 5918 5919 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { 5920 SDLoc dl(Op); 5921 EVT VT = Op.getValueType(); 5922 unsigned BitWidth = VT.getSizeInBits(); 5923 assert(Op.getNumOperands() == 3 && 5924 VT == Op.getOperand(1).getValueType() && 5925 "Unexpected SRA!"); 5926 5927 // Expand into a bunch of logical ops, followed by a select_cc. 5928 SDValue Lo = Op.getOperand(0); 5929 SDValue Hi = Op.getOperand(1); 5930 SDValue Amt = Op.getOperand(2); 5931 EVT AmtVT = Amt.getValueType(); 5932 5933 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 5934 DAG.getConstant(BitWidth, AmtVT), Amt); 5935 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 5936 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 5937 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 5938 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 5939 DAG.getConstant(-BitWidth, AmtVT)); 5940 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5); 5941 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt); 5942 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT), 5943 Tmp4, Tmp6, ISD::SETLE); 5944 SDValue OutOps[] = { OutLo, OutHi }; 5945 return DAG.getMergeValues(OutOps, dl); 5946 } 5947 5948 //===----------------------------------------------------------------------===// 5949 // Vector related lowering. 5950 // 5951 5952 /// BuildSplatI - Build a canonical splati of Val with an element size of 5953 /// SplatSize. Cast the result to VT. 5954 static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, 5955 SelectionDAG &DAG, SDLoc dl) { 5956 assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); 5957 5958 static const EVT VTys[] = { // canonical VT to use for each size. 5959 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 5960 }; 5961 5962 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; 5963 5964 // Force vspltis[hw] -1 to vspltisb -1 to canonicalize. 5965 if (Val == -1) 5966 SplatSize = 1; 5967 5968 EVT CanonicalVT = VTys[SplatSize-1]; 5969 5970 // Build a canonical splat for this value. 5971 SDValue Elt = DAG.getConstant(Val, MVT::i32); 5972 SmallVector<SDValue, 8> Ops; 5973 Ops.assign(CanonicalVT.getVectorNumElements(), Elt); 5974 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, Ops); 5975 return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res); 5976 } 5977 5978 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the 5979 /// specified intrinsic ID. 5980 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, 5981 SelectionDAG &DAG, SDLoc dl, 5982 EVT DestVT = MVT::Other) { 5983 if (DestVT == MVT::Other) DestVT = Op.getValueType(); 5984 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 5985 DAG.getConstant(IID, MVT::i32), Op); 5986 } 5987 5988 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the 5989 /// specified intrinsic ID. 5990 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, 5991 SelectionDAG &DAG, SDLoc dl, 5992 EVT DestVT = MVT::Other) { 5993 if (DestVT == MVT::Other) DestVT = LHS.getValueType(); 5994 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 5995 DAG.getConstant(IID, MVT::i32), LHS, RHS); 5996 } 5997 5998 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the 5999 /// specified intrinsic ID. 6000 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, 6001 SDValue Op2, SelectionDAG &DAG, 6002 SDLoc dl, EVT DestVT = MVT::Other) { 6003 if (DestVT == MVT::Other) DestVT = Op0.getValueType(); 6004 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 6005 DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2); 6006 } 6007 6008 6009 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified 6010 /// amount. The result has the specified value type. 6011 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, 6012 EVT VT, SelectionDAG &DAG, SDLoc dl) { 6013 // Force LHS/RHS to be the right type. 6014 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS); 6015 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS); 6016 6017 int Ops[16]; 6018 for (unsigned i = 0; i != 16; ++i) 6019 Ops[i] = i + Amt; 6020 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops); 6021 return DAG.getNode(ISD::BITCAST, dl, VT, T); 6022 } 6023 6024 // If this is a case we can't handle, return null and let the default 6025 // expansion code take care of it. If we CAN select this case, and if it 6026 // selects to a single instruction, return Op. Otherwise, if we can codegen 6027 // this case more efficiently than a constant pool load, lower it to the 6028 // sequence of ops that should be used. 6029 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, 6030 SelectionDAG &DAG) const { 6031 SDLoc dl(Op); 6032 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 6033 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); 6034 6035 // Check if this is a splat of a constant value. 6036 APInt APSplatBits, APSplatUndef; 6037 unsigned SplatBitSize; 6038 bool HasAnyUndefs; 6039 if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, 6040 HasAnyUndefs, 0, true) || SplatBitSize > 32) 6041 return SDValue(); 6042 6043 unsigned SplatBits = APSplatBits.getZExtValue(); 6044 unsigned SplatUndef = APSplatUndef.getZExtValue(); 6045 unsigned SplatSize = SplatBitSize / 8; 6046 6047 // First, handle single instruction cases. 6048 6049 // All zeros? 6050 if (SplatBits == 0) { 6051 // Canonicalize all zero vectors to be v4i32. 6052 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { 6053 SDValue Z = DAG.getConstant(0, MVT::i32); 6054 Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z); 6055 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z); 6056 } 6057 return Op; 6058 } 6059 6060 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. 6061 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >> 6062 (32-SplatBitSize)); 6063 if (SextVal >= -16 && SextVal <= 15) 6064 return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl); 6065 6066 6067 // Two instruction sequences. 6068 6069 // If this value is in the range [-32,30] and is even, use: 6070 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2) 6071 // If this value is in the range [17,31] and is odd, use: 6072 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16) 6073 // If this value is in the range [-31,-17] and is odd, use: 6074 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16) 6075 // Note the last two are three-instruction sequences. 6076 if (SextVal >= -32 && SextVal <= 31) { 6077 // To avoid having these optimizations undone by constant folding, 6078 // we convert to a pseudo that will be expanded later into one of 6079 // the above forms. 6080 SDValue Elt = DAG.getConstant(SextVal, MVT::i32); 6081 EVT VT = (SplatSize == 1 ? MVT::v16i8 : 6082 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32)); 6083 SDValue EltSize = DAG.getConstant(SplatSize, MVT::i32); 6084 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize); 6085 if (VT == Op.getValueType()) 6086 return RetVal; 6087 else 6088 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal); 6089 } 6090 6091 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is 6092 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important 6093 // for fneg/fabs. 6094 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { 6095 // Make -1 and vspltisw -1: 6096 SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl); 6097 6098 // Make the VSLW intrinsic, computing 0x8000_0000. 6099 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, 6100 OnesV, DAG, dl); 6101 6102 // xor by OnesV to invert it. 6103 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV); 6104 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 6105 } 6106 6107 // The remaining cases assume either big endian element order or 6108 // a splat-size that equates to the element size of the vector 6109 // to be built. An example that doesn't work for little endian is 6110 // {0, -1, 0, -1, 0, -1, 0, -1} which has a splat size of 32 bits 6111 // and a vector element size of 16 bits. The code below will 6112 // produce the vector in big endian element order, which for little 6113 // endian is {-1, 0, -1, 0, -1, 0, -1, 0}. 6114 6115 // For now, just avoid these optimizations in that case. 6116 // FIXME: Develop correct optimizations for LE with mismatched 6117 // splat and element sizes. 6118 6119 if (Subtarget.isLittleEndian() && 6120 SplatSize != Op.getValueType().getVectorElementType().getSizeInBits()) 6121 return SDValue(); 6122 6123 // Check to see if this is a wide variety of vsplti*, binop self cases. 6124 static const signed char SplatCsts[] = { 6125 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, 6126 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 6127 }; 6128 6129 for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) { 6130 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for 6131 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' 6132 int i = SplatCsts[idx]; 6133 6134 // Figure out what shift amount will be used by altivec if shifted by i in 6135 // this splat size. 6136 unsigned TypeShiftAmt = i & (SplatBitSize-1); 6137 6138 // vsplti + shl self. 6139 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) { 6140 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 6141 static const unsigned IIDs[] = { // Intrinsic to use for each size. 6142 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, 6143 Intrinsic::ppc_altivec_vslw 6144 }; 6145 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 6146 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 6147 } 6148 6149 // vsplti + srl self. 6150 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 6151 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 6152 static const unsigned IIDs[] = { // Intrinsic to use for each size. 6153 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, 6154 Intrinsic::ppc_altivec_vsrw 6155 }; 6156 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 6157 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 6158 } 6159 6160 // vsplti + sra self. 6161 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 6162 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 6163 static const unsigned IIDs[] = { // Intrinsic to use for each size. 6164 Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, 6165 Intrinsic::ppc_altivec_vsraw 6166 }; 6167 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 6168 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 6169 } 6170 6171 // vsplti + rol self. 6172 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | 6173 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { 6174 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 6175 static const unsigned IIDs[] = { // Intrinsic to use for each size. 6176 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, 6177 Intrinsic::ppc_altivec_vrlw 6178 }; 6179 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 6180 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 6181 } 6182 6183 // t = vsplti c, result = vsldoi t, t, 1 6184 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) { 6185 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 6186 return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl); 6187 } 6188 // t = vsplti c, result = vsldoi t, t, 2 6189 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) { 6190 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 6191 return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl); 6192 } 6193 // t = vsplti c, result = vsldoi t, t, 3 6194 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) { 6195 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 6196 return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl); 6197 } 6198 } 6199 6200 return SDValue(); 6201 } 6202 6203 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 6204 /// the specified operations to build the shuffle. 6205 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 6206 SDValue RHS, SelectionDAG &DAG, 6207 SDLoc dl) { 6208 unsigned OpNum = (PFEntry >> 26) & 0x0F; 6209 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 6210 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 6211 6212 enum { 6213 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 6214 OP_VMRGHW, 6215 OP_VMRGLW, 6216 OP_VSPLTISW0, 6217 OP_VSPLTISW1, 6218 OP_VSPLTISW2, 6219 OP_VSPLTISW3, 6220 OP_VSLDOI4, 6221 OP_VSLDOI8, 6222 OP_VSLDOI12 6223 }; 6224 6225 if (OpNum == OP_COPY) { 6226 if (LHSID == (1*9+2)*9+3) return LHS; 6227 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 6228 return RHS; 6229 } 6230 6231 SDValue OpLHS, OpRHS; 6232 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 6233 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 6234 6235 int ShufIdxs[16]; 6236 switch (OpNum) { 6237 default: llvm_unreachable("Unknown i32 permute!"); 6238 case OP_VMRGHW: 6239 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; 6240 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; 6241 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; 6242 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; 6243 break; 6244 case OP_VMRGLW: 6245 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; 6246 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; 6247 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; 6248 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; 6249 break; 6250 case OP_VSPLTISW0: 6251 for (unsigned i = 0; i != 16; ++i) 6252 ShufIdxs[i] = (i&3)+0; 6253 break; 6254 case OP_VSPLTISW1: 6255 for (unsigned i = 0; i != 16; ++i) 6256 ShufIdxs[i] = (i&3)+4; 6257 break; 6258 case OP_VSPLTISW2: 6259 for (unsigned i = 0; i != 16; ++i) 6260 ShufIdxs[i] = (i&3)+8; 6261 break; 6262 case OP_VSPLTISW3: 6263 for (unsigned i = 0; i != 16; ++i) 6264 ShufIdxs[i] = (i&3)+12; 6265 break; 6266 case OP_VSLDOI4: 6267 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl); 6268 case OP_VSLDOI8: 6269 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl); 6270 case OP_VSLDOI12: 6271 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl); 6272 } 6273 EVT VT = OpLHS.getValueType(); 6274 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS); 6275 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS); 6276 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs); 6277 return DAG.getNode(ISD::BITCAST, dl, VT, T); 6278 } 6279 6280 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this 6281 /// is a shuffle we can handle in a single instruction, return it. Otherwise, 6282 /// return the code it can be lowered into. Worst case, it can always be 6283 /// lowered into a vperm. 6284 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, 6285 SelectionDAG &DAG) const { 6286 SDLoc dl(Op); 6287 SDValue V1 = Op.getOperand(0); 6288 SDValue V2 = Op.getOperand(1); 6289 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); 6290 EVT VT = Op.getValueType(); 6291 bool isLittleEndian = Subtarget.isLittleEndian(); 6292 6293 // Cases that are handled by instructions that take permute immediates 6294 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be 6295 // selected by the instruction selector. 6296 if (V2.getOpcode() == ISD::UNDEF) { 6297 if (PPC::isSplatShuffleMask(SVOp, 1) || 6298 PPC::isSplatShuffleMask(SVOp, 2) || 6299 PPC::isSplatShuffleMask(SVOp, 4) || 6300 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) || 6301 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) || 6302 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 || 6303 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) || 6304 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) || 6305 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) || 6306 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) || 6307 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) || 6308 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG)) { 6309 return Op; 6310 } 6311 } 6312 6313 // Altivec has a variety of "shuffle immediates" that take two vector inputs 6314 // and produce a fixed permutation. If any of these match, do not lower to 6315 // VPERM. 6316 unsigned int ShuffleKind = isLittleEndian ? 2 : 0; 6317 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) || 6318 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) || 6319 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 || 6320 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) || 6321 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) || 6322 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) || 6323 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) || 6324 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) || 6325 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG)) 6326 return Op; 6327 6328 // Check to see if this is a shuffle of 4-byte values. If so, we can use our 6329 // perfect shuffle table to emit an optimal matching sequence. 6330 ArrayRef<int> PermMask = SVOp->getMask(); 6331 6332 unsigned PFIndexes[4]; 6333 bool isFourElementShuffle = true; 6334 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number 6335 unsigned EltNo = 8; // Start out undef. 6336 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. 6337 if (PermMask[i*4+j] < 0) 6338 continue; // Undef, ignore it. 6339 6340 unsigned ByteSource = PermMask[i*4+j]; 6341 if ((ByteSource & 3) != j) { 6342 isFourElementShuffle = false; 6343 break; 6344 } 6345 6346 if (EltNo == 8) { 6347 EltNo = ByteSource/4; 6348 } else if (EltNo != ByteSource/4) { 6349 isFourElementShuffle = false; 6350 break; 6351 } 6352 } 6353 PFIndexes[i] = EltNo; 6354 } 6355 6356 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the 6357 // perfect shuffle vector to determine if it is cost effective to do this as 6358 // discrete instructions, or whether we should use a vperm. 6359 // For now, we skip this for little endian until such time as we have a 6360 // little-endian perfect shuffle table. 6361 if (isFourElementShuffle && !isLittleEndian) { 6362 // Compute the index in the perfect shuffle table. 6363 unsigned PFTableIndex = 6364 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 6365 6366 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 6367 unsigned Cost = (PFEntry >> 30); 6368 6369 // Determining when to avoid vperm is tricky. Many things affect the cost 6370 // of vperm, particularly how many times the perm mask needs to be computed. 6371 // For example, if the perm mask can be hoisted out of a loop or is already 6372 // used (perhaps because there are multiple permutes with the same shuffle 6373 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of 6374 // the loop requires an extra register. 6375 // 6376 // As a compromise, we only emit discrete instructions if the shuffle can be 6377 // generated in 3 or fewer operations. When we have loop information 6378 // available, if this block is within a loop, we should avoid using vperm 6379 // for 3-operation perms and use a constant pool load instead. 6380 if (Cost < 3) 6381 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 6382 } 6383 6384 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant 6385 // vector that will get spilled to the constant pool. 6386 if (V2.getOpcode() == ISD::UNDEF) V2 = V1; 6387 6388 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except 6389 // that it is in input element units, not in bytes. Convert now. 6390 6391 // For little endian, the order of the input vectors is reversed, and 6392 // the permutation mask is complemented with respect to 31. This is 6393 // necessary to produce proper semantics with the big-endian-biased vperm 6394 // instruction. 6395 EVT EltVT = V1.getValueType().getVectorElementType(); 6396 unsigned BytesPerElement = EltVT.getSizeInBits()/8; 6397 6398 SmallVector<SDValue, 16> ResultMask; 6399 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { 6400 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; 6401 6402 for (unsigned j = 0; j != BytesPerElement; ++j) 6403 if (isLittleEndian) 6404 ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement+j), 6405 MVT::i32)); 6406 else 6407 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, 6408 MVT::i32)); 6409 } 6410 6411 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, 6412 ResultMask); 6413 if (isLittleEndian) 6414 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), 6415 V2, V1, VPermMask); 6416 else 6417 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), 6418 V1, V2, VPermMask); 6419 } 6420 6421 /// getAltivecCompareInfo - Given an intrinsic, return false if it is not an 6422 /// altivec comparison. If it is, return true and fill in Opc/isDot with 6423 /// information about the intrinsic. 6424 static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, 6425 bool &isDot) { 6426 unsigned IntrinsicID = 6427 cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue(); 6428 CompareOpc = -1; 6429 isDot = false; 6430 switch (IntrinsicID) { 6431 default: return false; 6432 // Comparison predicates. 6433 case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break; 6434 case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break; 6435 case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; 6436 case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; 6437 case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; 6438 case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; 6439 case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; 6440 case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; 6441 case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; 6442 case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; 6443 case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; 6444 case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; 6445 case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; 6446 6447 // Normal Comparisons. 6448 case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; 6449 case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; 6450 case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; 6451 case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; 6452 case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; 6453 case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; 6454 case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; 6455 case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; 6456 case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; 6457 case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; 6458 case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; 6459 case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; 6460 case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; 6461 } 6462 return true; 6463 } 6464 6465 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom 6466 /// lower, do it, otherwise return null. 6467 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 6468 SelectionDAG &DAG) const { 6469 // If this is a lowered altivec predicate compare, CompareOpc is set to the 6470 // opcode number of the comparison. 6471 SDLoc dl(Op); 6472 int CompareOpc; 6473 bool isDot; 6474 if (!getAltivecCompareInfo(Op, CompareOpc, isDot)) 6475 return SDValue(); // Don't custom lower most intrinsics. 6476 6477 // If this is a non-dot comparison, make the VCMP node and we are done. 6478 if (!isDot) { 6479 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(), 6480 Op.getOperand(1), Op.getOperand(2), 6481 DAG.getConstant(CompareOpc, MVT::i32)); 6482 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp); 6483 } 6484 6485 // Create the PPCISD altivec 'dot' comparison node. 6486 SDValue Ops[] = { 6487 Op.getOperand(2), // LHS 6488 Op.getOperand(3), // RHS 6489 DAG.getConstant(CompareOpc, MVT::i32) 6490 }; 6491 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue }; 6492 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); 6493 6494 // Now that we have the comparison, emit a copy from the CR to a GPR. 6495 // This is flagged to the above dot comparison. 6496 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32, 6497 DAG.getRegister(PPC::CR6, MVT::i32), 6498 CompNode.getValue(1)); 6499 6500 // Unpack the result based on how the target uses it. 6501 unsigned BitNo; // Bit # of CR6. 6502 bool InvertBit; // Invert result? 6503 switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) { 6504 default: // Can't happen, don't crash on invalid number though. 6505 case 0: // Return the value of the EQ bit of CR6. 6506 BitNo = 0; InvertBit = false; 6507 break; 6508 case 1: // Return the inverted value of the EQ bit of CR6. 6509 BitNo = 0; InvertBit = true; 6510 break; 6511 case 2: // Return the value of the LT bit of CR6. 6512 BitNo = 2; InvertBit = false; 6513 break; 6514 case 3: // Return the inverted value of the LT bit of CR6. 6515 BitNo = 2; InvertBit = true; 6516 break; 6517 } 6518 6519 // Shift the bit into the low position. 6520 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags, 6521 DAG.getConstant(8-(3-BitNo), MVT::i32)); 6522 // Isolate the bit. 6523 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags, 6524 DAG.getConstant(1, MVT::i32)); 6525 6526 // If we are supposed to, toggle the bit. 6527 if (InvertBit) 6528 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags, 6529 DAG.getConstant(1, MVT::i32)); 6530 return Flags; 6531 } 6532 6533 SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, 6534 SelectionDAG &DAG) const { 6535 SDLoc dl(Op); 6536 // For v2i64 (VSX), we can pattern patch the v2i32 case (using fp <-> int 6537 // instructions), but for smaller types, we need to first extend up to v2i32 6538 // before doing going farther. 6539 if (Op.getValueType() == MVT::v2i64) { 6540 EVT ExtVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 6541 if (ExtVT != MVT::v2i32) { 6542 Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)); 6543 Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, Op, 6544 DAG.getValueType(EVT::getVectorVT(*DAG.getContext(), 6545 ExtVT.getVectorElementType(), 4))); 6546 Op = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Op); 6547 Op = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v2i64, Op, 6548 DAG.getValueType(MVT::v2i32)); 6549 } 6550 6551 return Op; 6552 } 6553 6554 return SDValue(); 6555 } 6556 6557 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, 6558 SelectionDAG &DAG) const { 6559 SDLoc dl(Op); 6560 // Create a stack slot that is 16-byte aligned. 6561 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 6562 int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); 6563 EVT PtrVT = getPointerTy(); 6564 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 6565 6566 // Store the input value into Value#0 of the stack slot. 6567 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, 6568 Op.getOperand(0), FIdx, MachinePointerInfo(), 6569 false, false, 0); 6570 // Load it out. 6571 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(), 6572 false, false, false, 0); 6573 } 6574 6575 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { 6576 SDLoc dl(Op); 6577 if (Op.getValueType() == MVT::v4i32) { 6578 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 6579 6580 SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl); 6581 SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt. 6582 6583 SDValue RHSSwap = // = vrlw RHS, 16 6584 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl); 6585 6586 // Shrinkify inputs to v8i16. 6587 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS); 6588 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS); 6589 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap); 6590 6591 // Low parts multiplied together, generating 32-bit results (we ignore the 6592 // top parts). 6593 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, 6594 LHS, RHS, DAG, dl, MVT::v4i32); 6595 6596 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, 6597 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32); 6598 // Shift the high parts up 16 bits. 6599 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, 6600 Neg16, DAG, dl); 6601 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd); 6602 } else if (Op.getValueType() == MVT::v8i16) { 6603 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 6604 6605 SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl); 6606 6607 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm, 6608 LHS, RHS, Zero, DAG, dl); 6609 } else if (Op.getValueType() == MVT::v16i8) { 6610 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 6611 bool isLittleEndian = Subtarget.isLittleEndian(); 6612 6613 // Multiply the even 8-bit parts, producing 16-bit sums. 6614 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, 6615 LHS, RHS, DAG, dl, MVT::v8i16); 6616 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts); 6617 6618 // Multiply the odd 8-bit parts, producing 16-bit sums. 6619 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, 6620 LHS, RHS, DAG, dl, MVT::v8i16); 6621 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts); 6622 6623 // Merge the results together. Because vmuleub and vmuloub are 6624 // instructions with a big-endian bias, we must reverse the 6625 // element numbering and reverse the meaning of "odd" and "even" 6626 // when generating little endian code. 6627 int Ops[16]; 6628 for (unsigned i = 0; i != 8; ++i) { 6629 if (isLittleEndian) { 6630 Ops[i*2 ] = 2*i; 6631 Ops[i*2+1] = 2*i+16; 6632 } else { 6633 Ops[i*2 ] = 2*i+1; 6634 Ops[i*2+1] = 2*i+1+16; 6635 } 6636 } 6637 if (isLittleEndian) 6638 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops); 6639 else 6640 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); 6641 } else { 6642 llvm_unreachable("Unknown mul to lower!"); 6643 } 6644 } 6645 6646 /// LowerOperation - Provide custom lowering hooks for some operations. 6647 /// 6648 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 6649 switch (Op.getOpcode()) { 6650 default: llvm_unreachable("Wasn't expecting to be able to lower this!"); 6651 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 6652 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 6653 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 6654 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 6655 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 6656 case ISD::SETCC: return LowerSETCC(Op, DAG); 6657 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); 6658 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); 6659 case ISD::VASTART: 6660 return LowerVASTART(Op, DAG, Subtarget); 6661 6662 case ISD::VAARG: 6663 return LowerVAARG(Op, DAG, Subtarget); 6664 6665 case ISD::VACOPY: 6666 return LowerVACOPY(Op, DAG, Subtarget); 6667 6668 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, Subtarget); 6669 case ISD::DYNAMIC_STACKALLOC: 6670 return LowerDYNAMIC_STACKALLOC(Op, DAG, Subtarget); 6671 6672 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); 6673 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); 6674 6675 case ISD::LOAD: return LowerLOAD(Op, DAG); 6676 case ISD::STORE: return LowerSTORE(Op, DAG); 6677 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); 6678 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 6679 case ISD::FP_TO_UINT: 6680 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, 6681 SDLoc(Op)); 6682 case ISD::UINT_TO_FP: 6683 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 6684 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 6685 6686 // Lower 64-bit shifts. 6687 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); 6688 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG); 6689 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG); 6690 6691 // Vector-related lowering. 6692 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 6693 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 6694 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 6695 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 6696 case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); 6697 case ISD::MUL: return LowerMUL(Op, DAG); 6698 6699 // For counter-based loop handling. 6700 case ISD::INTRINSIC_W_CHAIN: return SDValue(); 6701 6702 // Frame & Return address. 6703 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 6704 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 6705 } 6706 } 6707 6708 void PPCTargetLowering::ReplaceNodeResults(SDNode *N, 6709 SmallVectorImpl<SDValue>&Results, 6710 SelectionDAG &DAG) const { 6711 const TargetMachine &TM = getTargetMachine(); 6712 SDLoc dl(N); 6713 switch (N->getOpcode()) { 6714 default: 6715 llvm_unreachable("Do not know how to custom type legalize this operation!"); 6716 case ISD::READCYCLECOUNTER: { 6717 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 6718 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0)); 6719 6720 Results.push_back(RTB); 6721 Results.push_back(RTB.getValue(1)); 6722 Results.push_back(RTB.getValue(2)); 6723 break; 6724 } 6725 case ISD::INTRINSIC_W_CHAIN: { 6726 if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 6727 Intrinsic::ppc_is_decremented_ctr_nonzero) 6728 break; 6729 6730 assert(N->getValueType(0) == MVT::i1 && 6731 "Unexpected result type for CTR decrement intrinsic"); 6732 EVT SVT = getSetCCResultType(*DAG.getContext(), N->getValueType(0)); 6733 SDVTList VTs = DAG.getVTList(SVT, MVT::Other); 6734 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0), 6735 N->getOperand(1)); 6736 6737 Results.push_back(NewInt); 6738 Results.push_back(NewInt.getValue(1)); 6739 break; 6740 } 6741 case ISD::VAARG: { 6742 if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI() 6743 || TM.getSubtarget<PPCSubtarget>().isPPC64()) 6744 return; 6745 6746 EVT VT = N->getValueType(0); 6747 6748 if (VT == MVT::i64) { 6749 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, Subtarget); 6750 6751 Results.push_back(NewNode); 6752 Results.push_back(NewNode.getValue(1)); 6753 } 6754 return; 6755 } 6756 case ISD::FP_ROUND_INREG: { 6757 assert(N->getValueType(0) == MVT::ppcf128); 6758 assert(N->getOperand(0).getValueType() == MVT::ppcf128); 6759 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 6760 MVT::f64, N->getOperand(0), 6761 DAG.getIntPtrConstant(0)); 6762 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 6763 MVT::f64, N->getOperand(0), 6764 DAG.getIntPtrConstant(1)); 6765 6766 // Add the two halves of the long double in round-to-zero mode. 6767 SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); 6768 6769 // We know the low half is about to be thrown away, so just use something 6770 // convenient. 6771 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128, 6772 FPreg, FPreg)); 6773 return; 6774 } 6775 case ISD::FP_TO_SINT: 6776 // LowerFP_TO_INT() can only handle f32 and f64. 6777 if (N->getOperand(0).getValueType() == MVT::ppcf128) 6778 return; 6779 Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); 6780 return; 6781 } 6782 } 6783 6784 6785 //===----------------------------------------------------------------------===// 6786 // Other Lowering Code 6787 //===----------------------------------------------------------------------===// 6788 6789 static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) { 6790 Module *M = Builder.GetInsertBlock()->getParent()->getParent(); 6791 Function *Func = Intrinsic::getDeclaration(M, Id); 6792 return Builder.CreateCall(Func); 6793 } 6794 6795 // The mappings for emitLeading/TrailingFence is taken from 6796 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html 6797 Instruction* PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder, 6798 AtomicOrdering Ord, bool IsStore, 6799 bool IsLoad) const { 6800 if (Ord == SequentiallyConsistent) 6801 return callIntrinsic(Builder, Intrinsic::ppc_sync); 6802 else if (isAtLeastRelease(Ord)) 6803 return callIntrinsic(Builder, Intrinsic::ppc_lwsync); 6804 else 6805 return nullptr; 6806 } 6807 6808 Instruction* PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder, 6809 AtomicOrdering Ord, bool IsStore, 6810 bool IsLoad) const { 6811 if (IsLoad && isAtLeastAcquire(Ord)) 6812 return callIntrinsic(Builder, Intrinsic::ppc_lwsync); 6813 // FIXME: this is too conservative, a dependent branch + isync is enough. 6814 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and 6815 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html 6816 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification. 6817 else 6818 return nullptr; 6819 } 6820 6821 MachineBasicBlock * 6822 PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, 6823 bool is64bit, unsigned BinOpcode) const { 6824 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 6825 const TargetInstrInfo *TII = 6826 getTargetMachine().getSubtargetImpl()->getInstrInfo(); 6827 6828 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6829 MachineFunction *F = BB->getParent(); 6830 MachineFunction::iterator It = BB; 6831 ++It; 6832 6833 unsigned dest = MI->getOperand(0).getReg(); 6834 unsigned ptrA = MI->getOperand(1).getReg(); 6835 unsigned ptrB = MI->getOperand(2).getReg(); 6836 unsigned incr = MI->getOperand(3).getReg(); 6837 DebugLoc dl = MI->getDebugLoc(); 6838 6839 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 6840 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 6841 F->insert(It, loopMBB); 6842 F->insert(It, exitMBB); 6843 exitMBB->splice(exitMBB->begin(), BB, 6844 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 6845 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6846 6847 MachineRegisterInfo &RegInfo = F->getRegInfo(); 6848 unsigned TmpReg = (!BinOpcode) ? incr : 6849 RegInfo.createVirtualRegister( is64bit ? &PPC::G8RCRegClass 6850 : &PPC::GPRCRegClass); 6851 6852 // thisMBB: 6853 // ... 6854 // fallthrough --> loopMBB 6855 BB->addSuccessor(loopMBB); 6856 6857 // loopMBB: 6858 // l[wd]arx dest, ptr 6859 // add r0, dest, incr 6860 // st[wd]cx. r0, ptr 6861 // bne- loopMBB 6862 // fallthrough --> exitMBB 6863 BB = loopMBB; 6864 BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) 6865 .addReg(ptrA).addReg(ptrB); 6866 if (BinOpcode) 6867 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); 6868 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 6869 .addReg(TmpReg).addReg(ptrA).addReg(ptrB); 6870 BuildMI(BB, dl, TII->get(PPC::BCC)) 6871 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 6872 BB->addSuccessor(loopMBB); 6873 BB->addSuccessor(exitMBB); 6874 6875 // exitMBB: 6876 // ... 6877 BB = exitMBB; 6878 return BB; 6879 } 6880 6881 MachineBasicBlock * 6882 PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, 6883 MachineBasicBlock *BB, 6884 bool is8bit, // operation 6885 unsigned BinOpcode) const { 6886 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 6887 const TargetInstrInfo *TII = 6888 getTargetMachine().getSubtargetImpl()->getInstrInfo(); 6889 // In 64 bit mode we have to use 64 bits for addresses, even though the 6890 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address 6891 // registers without caring whether they're 32 or 64, but here we're 6892 // doing actual arithmetic on the addresses. 6893 bool is64bit = Subtarget.isPPC64(); 6894 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; 6895 6896 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6897 MachineFunction *F = BB->getParent(); 6898 MachineFunction::iterator It = BB; 6899 ++It; 6900 6901 unsigned dest = MI->getOperand(0).getReg(); 6902 unsigned ptrA = MI->getOperand(1).getReg(); 6903 unsigned ptrB = MI->getOperand(2).getReg(); 6904 unsigned incr = MI->getOperand(3).getReg(); 6905 DebugLoc dl = MI->getDebugLoc(); 6906 6907 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 6908 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 6909 F->insert(It, loopMBB); 6910 F->insert(It, exitMBB); 6911 exitMBB->splice(exitMBB->begin(), BB, 6912 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 6913 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6914 6915 MachineRegisterInfo &RegInfo = F->getRegInfo(); 6916 const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass 6917 : &PPC::GPRCRegClass; 6918 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 6919 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 6920 unsigned ShiftReg = RegInfo.createVirtualRegister(RC); 6921 unsigned Incr2Reg = RegInfo.createVirtualRegister(RC); 6922 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 6923 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 6924 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 6925 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 6926 unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC); 6927 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 6928 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 6929 unsigned Ptr1Reg; 6930 unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC); 6931 6932 // thisMBB: 6933 // ... 6934 // fallthrough --> loopMBB 6935 BB->addSuccessor(loopMBB); 6936 6937 // The 4-byte load must be aligned, while a char or short may be 6938 // anywhere in the word. Hence all this nasty bookkeeping code. 6939 // add ptr1, ptrA, ptrB [copy if ptrA==0] 6940 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 6941 // xori shift, shift1, 24 [16] 6942 // rlwinm ptr, ptr1, 0, 0, 29 6943 // slw incr2, incr, shift 6944 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 6945 // slw mask, mask2, shift 6946 // loopMBB: 6947 // lwarx tmpDest, ptr 6948 // add tmp, tmpDest, incr2 6949 // andc tmp2, tmpDest, mask 6950 // and tmp3, tmp, mask 6951 // or tmp4, tmp3, tmp2 6952 // stwcx. tmp4, ptr 6953 // bne- loopMBB 6954 // fallthrough --> exitMBB 6955 // srw dest, tmpDest, shift 6956 if (ptrA != ZeroReg) { 6957 Ptr1Reg = RegInfo.createVirtualRegister(RC); 6958 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 6959 .addReg(ptrA).addReg(ptrB); 6960 } else { 6961 Ptr1Reg = ptrB; 6962 } 6963 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 6964 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 6965 BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 6966 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 6967 if (is64bit) 6968 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 6969 .addReg(Ptr1Reg).addImm(0).addImm(61); 6970 else 6971 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 6972 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 6973 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg) 6974 .addReg(incr).addReg(ShiftReg); 6975 if (is8bit) 6976 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 6977 else { 6978 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 6979 BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535); 6980 } 6981 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 6982 .addReg(Mask2Reg).addReg(ShiftReg); 6983 6984 BB = loopMBB; 6985 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 6986 .addReg(ZeroReg).addReg(PtrReg); 6987 if (BinOpcode) 6988 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg) 6989 .addReg(Incr2Reg).addReg(TmpDestReg); 6990 BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg) 6991 .addReg(TmpDestReg).addReg(MaskReg); 6992 BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg) 6993 .addReg(TmpReg).addReg(MaskReg); 6994 BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg) 6995 .addReg(Tmp3Reg).addReg(Tmp2Reg); 6996 BuildMI(BB, dl, TII->get(PPC::STWCX)) 6997 .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg); 6998 BuildMI(BB, dl, TII->get(PPC::BCC)) 6999 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 7000 BB->addSuccessor(loopMBB); 7001 BB->addSuccessor(exitMBB); 7002 7003 // exitMBB: 7004 // ... 7005 BB = exitMBB; 7006 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg) 7007 .addReg(ShiftReg); 7008 return BB; 7009 } 7010 7011 llvm::MachineBasicBlock* 7012 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, 7013 MachineBasicBlock *MBB) const { 7014 DebugLoc DL = MI->getDebugLoc(); 7015 const TargetInstrInfo *TII = 7016 getTargetMachine().getSubtargetImpl()->getInstrInfo(); 7017 7018 MachineFunction *MF = MBB->getParent(); 7019 MachineRegisterInfo &MRI = MF->getRegInfo(); 7020 7021 const BasicBlock *BB = MBB->getBasicBlock(); 7022 MachineFunction::iterator I = MBB; 7023 ++I; 7024 7025 // Memory Reference 7026 MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); 7027 MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); 7028 7029 unsigned DstReg = MI->getOperand(0).getReg(); 7030 const TargetRegisterClass *RC = MRI.getRegClass(DstReg); 7031 assert(RC->hasType(MVT::i32) && "Invalid destination!"); 7032 unsigned mainDstReg = MRI.createVirtualRegister(RC); 7033 unsigned restoreDstReg = MRI.createVirtualRegister(RC); 7034 7035 MVT PVT = getPointerTy(); 7036 assert((PVT == MVT::i64 || PVT == MVT::i32) && 7037 "Invalid Pointer Size!"); 7038 // For v = setjmp(buf), we generate 7039 // 7040 // thisMBB: 7041 // SjLjSetup mainMBB 7042 // bl mainMBB 7043 // v_restore = 1 7044 // b sinkMBB 7045 // 7046 // mainMBB: 7047 // buf[LabelOffset] = LR 7048 // v_main = 0 7049 // 7050 // sinkMBB: 7051 // v = phi(main, restore) 7052 // 7053 7054 MachineBasicBlock *thisMBB = MBB; 7055 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); 7056 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); 7057 MF->insert(I, mainMBB); 7058 MF->insert(I, sinkMBB); 7059 7060 MachineInstrBuilder MIB; 7061 7062 // Transfer the remainder of BB and its successor edges to sinkMBB. 7063 sinkMBB->splice(sinkMBB->begin(), MBB, 7064 std::next(MachineBasicBlock::iterator(MI)), MBB->end()); 7065 sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); 7066 7067 // Note that the structure of the jmp_buf used here is not compatible 7068 // with that used by libc, and is not designed to be. Specifically, it 7069 // stores only those 'reserved' registers that LLVM does not otherwise 7070 // understand how to spill. Also, by convention, by the time this 7071 // intrinsic is called, Clang has already stored the frame address in the 7072 // first slot of the buffer and stack address in the third. Following the 7073 // X86 target code, we'll store the jump address in the second slot. We also 7074 // need to save the TOC pointer (R2) to handle jumps between shared 7075 // libraries, and that will be stored in the fourth slot. The thread 7076 // identifier (R13) is not affected. 7077 7078 // thisMBB: 7079 const int64_t LabelOffset = 1 * PVT.getStoreSize(); 7080 const int64_t TOCOffset = 3 * PVT.getStoreSize(); 7081 const int64_t BPOffset = 4 * PVT.getStoreSize(); 7082 7083 // Prepare IP either in reg. 7084 const TargetRegisterClass *PtrRC = getRegClassFor(PVT); 7085 unsigned LabelReg = MRI.createVirtualRegister(PtrRC); 7086 unsigned BufReg = MI->getOperand(1).getReg(); 7087 7088 if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) { 7089 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) 7090 .addReg(PPC::X2) 7091 .addImm(TOCOffset) 7092 .addReg(BufReg); 7093 MIB.setMemRefs(MMOBegin, MMOEnd); 7094 } 7095 7096 // Naked functions never have a base pointer, and so we use r1. For all 7097 // other functions, this decision must be delayed until during PEI. 7098 unsigned BaseReg; 7099 if (MF->getFunction()->getAttributes().hasAttribute( 7100 AttributeSet::FunctionIndex, Attribute::Naked)) 7101 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1; 7102 else 7103 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP; 7104 7105 MIB = BuildMI(*thisMBB, MI, DL, 7106 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW)) 7107 .addReg(BaseReg) 7108 .addImm(BPOffset) 7109 .addReg(BufReg); 7110 MIB.setMemRefs(MMOBegin, MMOEnd); 7111 7112 // Setup 7113 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB); 7114 const PPCRegisterInfo *TRI = 7115 getTargetMachine().getSubtarget<PPCSubtarget>().getRegisterInfo(); 7116 MIB.addRegMask(TRI->getNoPreservedMask()); 7117 7118 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); 7119 7120 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup)) 7121 .addMBB(mainMBB); 7122 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB); 7123 7124 thisMBB->addSuccessor(mainMBB, /* weight */ 0); 7125 thisMBB->addSuccessor(sinkMBB, /* weight */ 1); 7126 7127 // mainMBB: 7128 // mainDstReg = 0 7129 MIB = BuildMI(mainMBB, DL, 7130 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg); 7131 7132 // Store IP 7133 if (Subtarget.isPPC64()) { 7134 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD)) 7135 .addReg(LabelReg) 7136 .addImm(LabelOffset) 7137 .addReg(BufReg); 7138 } else { 7139 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW)) 7140 .addReg(LabelReg) 7141 .addImm(LabelOffset) 7142 .addReg(BufReg); 7143 } 7144 7145 MIB.setMemRefs(MMOBegin, MMOEnd); 7146 7147 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0); 7148 mainMBB->addSuccessor(sinkMBB); 7149 7150 // sinkMBB: 7151 BuildMI(*sinkMBB, sinkMBB->begin(), DL, 7152 TII->get(PPC::PHI), DstReg) 7153 .addReg(mainDstReg).addMBB(mainMBB) 7154 .addReg(restoreDstReg).addMBB(thisMBB); 7155 7156 MI->eraseFromParent(); 7157 return sinkMBB; 7158 } 7159 7160 MachineBasicBlock * 7161 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, 7162 MachineBasicBlock *MBB) const { 7163 DebugLoc DL = MI->getDebugLoc(); 7164 const TargetInstrInfo *TII = 7165 getTargetMachine().getSubtargetImpl()->getInstrInfo(); 7166 7167 MachineFunction *MF = MBB->getParent(); 7168 MachineRegisterInfo &MRI = MF->getRegInfo(); 7169 7170 // Memory Reference 7171 MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); 7172 MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); 7173 7174 MVT PVT = getPointerTy(); 7175 assert((PVT == MVT::i64 || PVT == MVT::i32) && 7176 "Invalid Pointer Size!"); 7177 7178 const TargetRegisterClass *RC = 7179 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; 7180 unsigned Tmp = MRI.createVirtualRegister(RC); 7181 // Since FP is only updated here but NOT referenced, it's treated as GPR. 7182 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; 7183 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; 7184 unsigned BP = (PVT == MVT::i64) ? PPC::X30 : 7185 (Subtarget.isSVR4ABI() && 7186 MF->getTarget().getRelocationModel() == Reloc::PIC_ ? 7187 PPC::R29 : PPC::R30); 7188 7189 MachineInstrBuilder MIB; 7190 7191 const int64_t LabelOffset = 1 * PVT.getStoreSize(); 7192 const int64_t SPOffset = 2 * PVT.getStoreSize(); 7193 const int64_t TOCOffset = 3 * PVT.getStoreSize(); 7194 const int64_t BPOffset = 4 * PVT.getStoreSize(); 7195 7196 unsigned BufReg = MI->getOperand(0).getReg(); 7197 7198 // Reload FP (the jumped-to function may not have had a 7199 // frame pointer, and if so, then its r31 will be restored 7200 // as necessary). 7201 if (PVT == MVT::i64) { 7202 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP) 7203 .addImm(0) 7204 .addReg(BufReg); 7205 } else { 7206 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP) 7207 .addImm(0) 7208 .addReg(BufReg); 7209 } 7210 MIB.setMemRefs(MMOBegin, MMOEnd); 7211 7212 // Reload IP 7213 if (PVT == MVT::i64) { 7214 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp) 7215 .addImm(LabelOffset) 7216 .addReg(BufReg); 7217 } else { 7218 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp) 7219 .addImm(LabelOffset) 7220 .addReg(BufReg); 7221 } 7222 MIB.setMemRefs(MMOBegin, MMOEnd); 7223 7224 // Reload SP 7225 if (PVT == MVT::i64) { 7226 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP) 7227 .addImm(SPOffset) 7228 .addReg(BufReg); 7229 } else { 7230 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP) 7231 .addImm(SPOffset) 7232 .addReg(BufReg); 7233 } 7234 MIB.setMemRefs(MMOBegin, MMOEnd); 7235 7236 // Reload BP 7237 if (PVT == MVT::i64) { 7238 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP) 7239 .addImm(BPOffset) 7240 .addReg(BufReg); 7241 } else { 7242 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP) 7243 .addImm(BPOffset) 7244 .addReg(BufReg); 7245 } 7246 MIB.setMemRefs(MMOBegin, MMOEnd); 7247 7248 // Reload TOC 7249 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) { 7250 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2) 7251 .addImm(TOCOffset) 7252 .addReg(BufReg); 7253 7254 MIB.setMemRefs(MMOBegin, MMOEnd); 7255 } 7256 7257 // Jump 7258 BuildMI(*MBB, MI, DL, 7259 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp); 7260 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR)); 7261 7262 MI->eraseFromParent(); 7263 return MBB; 7264 } 7265 7266 MachineBasicBlock * 7267 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 7268 MachineBasicBlock *BB) const { 7269 if (MI->getOpcode() == TargetOpcode::STACKMAP || 7270 MI->getOpcode() == TargetOpcode::PATCHPOINT) 7271 return emitPatchPoint(MI, BB); 7272 7273 if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 || 7274 MI->getOpcode() == PPC::EH_SjLj_SetJmp64) { 7275 return emitEHSjLjSetJmp(MI, BB); 7276 } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 || 7277 MI->getOpcode() == PPC::EH_SjLj_LongJmp64) { 7278 return emitEHSjLjLongJmp(MI, BB); 7279 } 7280 7281 const TargetInstrInfo *TII = 7282 getTargetMachine().getSubtargetImpl()->getInstrInfo(); 7283 7284 // To "insert" these instructions we actually have to insert their 7285 // control-flow patterns. 7286 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 7287 MachineFunction::iterator It = BB; 7288 ++It; 7289 7290 MachineFunction *F = BB->getParent(); 7291 7292 if (Subtarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 || 7293 MI->getOpcode() == PPC::SELECT_CC_I8 || 7294 MI->getOpcode() == PPC::SELECT_I4 || 7295 MI->getOpcode() == PPC::SELECT_I8)) { 7296 SmallVector<MachineOperand, 2> Cond; 7297 if (MI->getOpcode() == PPC::SELECT_CC_I4 || 7298 MI->getOpcode() == PPC::SELECT_CC_I8) 7299 Cond.push_back(MI->getOperand(4)); 7300 else 7301 Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET)); 7302 Cond.push_back(MI->getOperand(1)); 7303 7304 DebugLoc dl = MI->getDebugLoc(); 7305 const TargetInstrInfo *TII = 7306 getTargetMachine().getSubtargetImpl()->getInstrInfo(); 7307 TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(), 7308 Cond, MI->getOperand(2).getReg(), 7309 MI->getOperand(3).getReg()); 7310 } else if (MI->getOpcode() == PPC::SELECT_CC_I4 || 7311 MI->getOpcode() == PPC::SELECT_CC_I8 || 7312 MI->getOpcode() == PPC::SELECT_CC_F4 || 7313 MI->getOpcode() == PPC::SELECT_CC_F8 || 7314 MI->getOpcode() == PPC::SELECT_CC_VRRC || 7315 MI->getOpcode() == PPC::SELECT_CC_VSFRC || 7316 MI->getOpcode() == PPC::SELECT_CC_VSRC || 7317 MI->getOpcode() == PPC::SELECT_I4 || 7318 MI->getOpcode() == PPC::SELECT_I8 || 7319 MI->getOpcode() == PPC::SELECT_F4 || 7320 MI->getOpcode() == PPC::SELECT_F8 || 7321 MI->getOpcode() == PPC::SELECT_VRRC || 7322 MI->getOpcode() == PPC::SELECT_VSFRC || 7323 MI->getOpcode() == PPC::SELECT_VSRC) { 7324 // The incoming instruction knows the destination vreg to set, the 7325 // condition code register to branch on, the true/false values to 7326 // select between, and a branch opcode to use. 7327 7328 // thisMBB: 7329 // ... 7330 // TrueVal = ... 7331 // cmpTY ccX, r1, r2 7332 // bCC copy1MBB 7333 // fallthrough --> copy0MBB 7334 MachineBasicBlock *thisMBB = BB; 7335 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 7336 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 7337 DebugLoc dl = MI->getDebugLoc(); 7338 F->insert(It, copy0MBB); 7339 F->insert(It, sinkMBB); 7340 7341 // Transfer the remainder of BB and its successor edges to sinkMBB. 7342 sinkMBB->splice(sinkMBB->begin(), BB, 7343 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 7344 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 7345 7346 // Next, add the true and fallthrough blocks as its successors. 7347 BB->addSuccessor(copy0MBB); 7348 BB->addSuccessor(sinkMBB); 7349 7350 if (MI->getOpcode() == PPC::SELECT_I4 || 7351 MI->getOpcode() == PPC::SELECT_I8 || 7352 MI->getOpcode() == PPC::SELECT_F4 || 7353 MI->getOpcode() == PPC::SELECT_F8 || 7354 MI->getOpcode() == PPC::SELECT_VRRC || 7355 MI->getOpcode() == PPC::SELECT_VSFRC || 7356 MI->getOpcode() == PPC::SELECT_VSRC) { 7357 BuildMI(BB, dl, TII->get(PPC::BC)) 7358 .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); 7359 } else { 7360 unsigned SelectPred = MI->getOperand(4).getImm(); 7361 BuildMI(BB, dl, TII->get(PPC::BCC)) 7362 .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); 7363 } 7364 7365 // copy0MBB: 7366 // %FalseValue = ... 7367 // # fallthrough to sinkMBB 7368 BB = copy0MBB; 7369 7370 // Update machine-CFG edges 7371 BB->addSuccessor(sinkMBB); 7372 7373 // sinkMBB: 7374 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 7375 // ... 7376 BB = sinkMBB; 7377 BuildMI(*BB, BB->begin(), dl, 7378 TII->get(PPC::PHI), MI->getOperand(0).getReg()) 7379 .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) 7380 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 7381 } else if (MI->getOpcode() == PPC::ReadTB) { 7382 // To read the 64-bit time-base register on a 32-bit target, we read the 7383 // two halves. Should the counter have wrapped while it was being read, we 7384 // need to try again. 7385 // ... 7386 // readLoop: 7387 // mfspr Rx,TBU # load from TBU 7388 // mfspr Ry,TB # load from TB 7389 // mfspr Rz,TBU # load from TBU 7390 // cmpw crX,Rx,Rz # check if ‘old’=’new’ 7391 // bne readLoop # branch if they're not equal 7392 // ... 7393 7394 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB); 7395 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 7396 DebugLoc dl = MI->getDebugLoc(); 7397 F->insert(It, readMBB); 7398 F->insert(It, sinkMBB); 7399 7400 // Transfer the remainder of BB and its successor edges to sinkMBB. 7401 sinkMBB->splice(sinkMBB->begin(), BB, 7402 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 7403 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 7404 7405 BB->addSuccessor(readMBB); 7406 BB = readMBB; 7407 7408 MachineRegisterInfo &RegInfo = F->getRegInfo(); 7409 unsigned ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 7410 unsigned LoReg = MI->getOperand(0).getReg(); 7411 unsigned HiReg = MI->getOperand(1).getReg(); 7412 7413 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269); 7414 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268); 7415 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269); 7416 7417 unsigned CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); 7418 7419 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg) 7420 .addReg(HiReg).addReg(ReadAgainReg); 7421 BuildMI(BB, dl, TII->get(PPC::BCC)) 7422 .addImm(PPC::PRED_NE).addReg(CmpReg).addMBB(readMBB); 7423 7424 BB->addSuccessor(readMBB); 7425 BB->addSuccessor(sinkMBB); 7426 } 7427 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8) 7428 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4); 7429 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16) 7430 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4); 7431 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32) 7432 BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4); 7433 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) 7434 BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8); 7435 7436 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8) 7437 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND); 7438 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16) 7439 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND); 7440 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32) 7441 BB = EmitAtomicBinary(MI, BB, false, PPC::AND); 7442 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64) 7443 BB = EmitAtomicBinary(MI, BB, true, PPC::AND8); 7444 7445 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8) 7446 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR); 7447 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16) 7448 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR); 7449 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32) 7450 BB = EmitAtomicBinary(MI, BB, false, PPC::OR); 7451 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64) 7452 BB = EmitAtomicBinary(MI, BB, true, PPC::OR8); 7453 7454 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8) 7455 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR); 7456 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16) 7457 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR); 7458 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32) 7459 BB = EmitAtomicBinary(MI, BB, false, PPC::XOR); 7460 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64) 7461 BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8); 7462 7463 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) 7464 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND); 7465 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) 7466 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND); 7467 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) 7468 BB = EmitAtomicBinary(MI, BB, false, PPC::NAND); 7469 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) 7470 BB = EmitAtomicBinary(MI, BB, true, PPC::NAND8); 7471 7472 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) 7473 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); 7474 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16) 7475 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF); 7476 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32) 7477 BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF); 7478 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) 7479 BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8); 7480 7481 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8) 7482 BB = EmitPartwordAtomicBinary(MI, BB, true, 0); 7483 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16) 7484 BB = EmitPartwordAtomicBinary(MI, BB, false, 0); 7485 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32) 7486 BB = EmitAtomicBinary(MI, BB, false, 0); 7487 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64) 7488 BB = EmitAtomicBinary(MI, BB, true, 0); 7489 7490 else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || 7491 MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) { 7492 bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64; 7493 7494 unsigned dest = MI->getOperand(0).getReg(); 7495 unsigned ptrA = MI->getOperand(1).getReg(); 7496 unsigned ptrB = MI->getOperand(2).getReg(); 7497 unsigned oldval = MI->getOperand(3).getReg(); 7498 unsigned newval = MI->getOperand(4).getReg(); 7499 DebugLoc dl = MI->getDebugLoc(); 7500 7501 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 7502 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 7503 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 7504 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 7505 F->insert(It, loop1MBB); 7506 F->insert(It, loop2MBB); 7507 F->insert(It, midMBB); 7508 F->insert(It, exitMBB); 7509 exitMBB->splice(exitMBB->begin(), BB, 7510 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 7511 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 7512 7513 // thisMBB: 7514 // ... 7515 // fallthrough --> loopMBB 7516 BB->addSuccessor(loop1MBB); 7517 7518 // loop1MBB: 7519 // l[wd]arx dest, ptr 7520 // cmp[wd] dest, oldval 7521 // bne- midMBB 7522 // loop2MBB: 7523 // st[wd]cx. newval, ptr 7524 // bne- loopMBB 7525 // b exitBB 7526 // midMBB: 7527 // st[wd]cx. dest, ptr 7528 // exitBB: 7529 BB = loop1MBB; 7530 BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) 7531 .addReg(ptrA).addReg(ptrB); 7532 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0) 7533 .addReg(oldval).addReg(dest); 7534 BuildMI(BB, dl, TII->get(PPC::BCC)) 7535 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 7536 BB->addSuccessor(loop2MBB); 7537 BB->addSuccessor(midMBB); 7538 7539 BB = loop2MBB; 7540 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 7541 .addReg(newval).addReg(ptrA).addReg(ptrB); 7542 BuildMI(BB, dl, TII->get(PPC::BCC)) 7543 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 7544 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 7545 BB->addSuccessor(loop1MBB); 7546 BB->addSuccessor(exitMBB); 7547 7548 BB = midMBB; 7549 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 7550 .addReg(dest).addReg(ptrA).addReg(ptrB); 7551 BB->addSuccessor(exitMBB); 7552 7553 // exitMBB: 7554 // ... 7555 BB = exitMBB; 7556 } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 || 7557 MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) { 7558 // We must use 64-bit registers for addresses when targeting 64-bit, 7559 // since we're actually doing arithmetic on them. Other registers 7560 // can be 32-bit. 7561 bool is64bit = Subtarget.isPPC64(); 7562 bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; 7563 7564 unsigned dest = MI->getOperand(0).getReg(); 7565 unsigned ptrA = MI->getOperand(1).getReg(); 7566 unsigned ptrB = MI->getOperand(2).getReg(); 7567 unsigned oldval = MI->getOperand(3).getReg(); 7568 unsigned newval = MI->getOperand(4).getReg(); 7569 DebugLoc dl = MI->getDebugLoc(); 7570 7571 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 7572 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 7573 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 7574 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 7575 F->insert(It, loop1MBB); 7576 F->insert(It, loop2MBB); 7577 F->insert(It, midMBB); 7578 F->insert(It, exitMBB); 7579 exitMBB->splice(exitMBB->begin(), BB, 7580 std::next(MachineBasicBlock::iterator(MI)), BB->end()); 7581 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 7582 7583 MachineRegisterInfo &RegInfo = F->getRegInfo(); 7584 const TargetRegisterClass *RC = is64bit ? &PPC::G8RCRegClass 7585 : &PPC::GPRCRegClass; 7586 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 7587 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 7588 unsigned ShiftReg = RegInfo.createVirtualRegister(RC); 7589 unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC); 7590 unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC); 7591 unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC); 7592 unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC); 7593 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 7594 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 7595 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 7596 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 7597 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 7598 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 7599 unsigned Ptr1Reg; 7600 unsigned TmpReg = RegInfo.createVirtualRegister(RC); 7601 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; 7602 // thisMBB: 7603 // ... 7604 // fallthrough --> loopMBB 7605 BB->addSuccessor(loop1MBB); 7606 7607 // The 4-byte load must be aligned, while a char or short may be 7608 // anywhere in the word. Hence all this nasty bookkeeping code. 7609 // add ptr1, ptrA, ptrB [copy if ptrA==0] 7610 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 7611 // xori shift, shift1, 24 [16] 7612 // rlwinm ptr, ptr1, 0, 0, 29 7613 // slw newval2, newval, shift 7614 // slw oldval2, oldval,shift 7615 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 7616 // slw mask, mask2, shift 7617 // and newval3, newval2, mask 7618 // and oldval3, oldval2, mask 7619 // loop1MBB: 7620 // lwarx tmpDest, ptr 7621 // and tmp, tmpDest, mask 7622 // cmpw tmp, oldval3 7623 // bne- midMBB 7624 // loop2MBB: 7625 // andc tmp2, tmpDest, mask 7626 // or tmp4, tmp2, newval3 7627 // stwcx. tmp4, ptr 7628 // bne- loop1MBB 7629 // b exitBB 7630 // midMBB: 7631 // stwcx. tmpDest, ptr 7632 // exitBB: 7633 // srw dest, tmpDest, shift 7634 if (ptrA != ZeroReg) { 7635 Ptr1Reg = RegInfo.createVirtualRegister(RC); 7636 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 7637 .addReg(ptrA).addReg(ptrB); 7638 } else { 7639 Ptr1Reg = ptrB; 7640 } 7641 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 7642 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 7643 BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 7644 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 7645 if (is64bit) 7646 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 7647 .addReg(Ptr1Reg).addImm(0).addImm(61); 7648 else 7649 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 7650 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 7651 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg) 7652 .addReg(newval).addReg(ShiftReg); 7653 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg) 7654 .addReg(oldval).addReg(ShiftReg); 7655 if (is8bit) 7656 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 7657 else { 7658 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 7659 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg) 7660 .addReg(Mask3Reg).addImm(65535); 7661 } 7662 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 7663 .addReg(Mask2Reg).addReg(ShiftReg); 7664 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg) 7665 .addReg(NewVal2Reg).addReg(MaskReg); 7666 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg) 7667 .addReg(OldVal2Reg).addReg(MaskReg); 7668 7669 BB = loop1MBB; 7670 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 7671 .addReg(ZeroReg).addReg(PtrReg); 7672 BuildMI(BB, dl, TII->get(PPC::AND),TmpReg) 7673 .addReg(TmpDestReg).addReg(MaskReg); 7674 BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0) 7675 .addReg(TmpReg).addReg(OldVal3Reg); 7676 BuildMI(BB, dl, TII->get(PPC::BCC)) 7677 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 7678 BB->addSuccessor(loop2MBB); 7679 BB->addSuccessor(midMBB); 7680 7681 BB = loop2MBB; 7682 BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg) 7683 .addReg(TmpDestReg).addReg(MaskReg); 7684 BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg) 7685 .addReg(Tmp2Reg).addReg(NewVal3Reg); 7686 BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg) 7687 .addReg(ZeroReg).addReg(PtrReg); 7688 BuildMI(BB, dl, TII->get(PPC::BCC)) 7689 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 7690 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 7691 BB->addSuccessor(loop1MBB); 7692 BB->addSuccessor(exitMBB); 7693 7694 BB = midMBB; 7695 BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg) 7696 .addReg(ZeroReg).addReg(PtrReg); 7697 BB->addSuccessor(exitMBB); 7698 7699 // exitMBB: 7700 // ... 7701 BB = exitMBB; 7702 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg) 7703 .addReg(ShiftReg); 7704 } else if (MI->getOpcode() == PPC::FADDrtz) { 7705 // This pseudo performs an FADD with rounding mode temporarily forced 7706 // to round-to-zero. We emit this via custom inserter since the FPSCR 7707 // is not modeled at the SelectionDAG level. 7708 unsigned Dest = MI->getOperand(0).getReg(); 7709 unsigned Src1 = MI->getOperand(1).getReg(); 7710 unsigned Src2 = MI->getOperand(2).getReg(); 7711 DebugLoc dl = MI->getDebugLoc(); 7712 7713 MachineRegisterInfo &RegInfo = F->getRegInfo(); 7714 unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); 7715 7716 // Save FPSCR value. 7717 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg); 7718 7719 // Set rounding mode to round-to-zero. 7720 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31); 7721 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30); 7722 7723 // Perform addition. 7724 BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2); 7725 7726 // Restore FPSCR value. 7727 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg); 7728 } else if (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT || 7729 MI->getOpcode() == PPC::ANDIo_1_GT_BIT || 7730 MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 || 7731 MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) { 7732 unsigned Opcode = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8 || 7733 MI->getOpcode() == PPC::ANDIo_1_GT_BIT8) ? 7734 PPC::ANDIo8 : PPC::ANDIo; 7735 bool isEQ = (MI->getOpcode() == PPC::ANDIo_1_EQ_BIT || 7736 MI->getOpcode() == PPC::ANDIo_1_EQ_BIT8); 7737 7738 MachineRegisterInfo &RegInfo = F->getRegInfo(); 7739 unsigned Dest = RegInfo.createVirtualRegister(Opcode == PPC::ANDIo ? 7740 &PPC::GPRCRegClass : 7741 &PPC::G8RCRegClass); 7742 7743 DebugLoc dl = MI->getDebugLoc(); 7744 BuildMI(*BB, MI, dl, TII->get(Opcode), Dest) 7745 .addReg(MI->getOperand(1).getReg()).addImm(1); 7746 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), 7747 MI->getOperand(0).getReg()) 7748 .addReg(isEQ ? PPC::CR0EQ : PPC::CR0GT); 7749 } else { 7750 llvm_unreachable("Unexpected instr type to insert"); 7751 } 7752 7753 MI->eraseFromParent(); // The pseudo instruction is gone now. 7754 return BB; 7755 } 7756 7757 //===----------------------------------------------------------------------===// 7758 // Target Optimization Hooks 7759 //===----------------------------------------------------------------------===// 7760 7761 SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, 7762 DAGCombinerInfo &DCI, 7763 unsigned &RefinementSteps, 7764 bool &UseOneConstNR) const { 7765 EVT VT = Operand.getValueType(); 7766 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) || 7767 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) || 7768 (VT == MVT::v4f32 && Subtarget.hasAltivec()) || 7769 (VT == MVT::v2f64 && Subtarget.hasVSX())) { 7770 // Convergence is quadratic, so we essentially double the number of digits 7771 // correct after every iteration. For both FRE and FRSQRTE, the minimum 7772 // architected relative accuracy is 2^-5. When hasRecipPrec(), this is 7773 // 2^-14. IEEE float has 23 digits and double has 52 digits. 7774 RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; 7775 if (VT.getScalarType() == MVT::f64) 7776 ++RefinementSteps; 7777 UseOneConstNR = true; 7778 return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); 7779 } 7780 return SDValue(); 7781 } 7782 7783 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, 7784 DAGCombinerInfo &DCI, 7785 unsigned &RefinementSteps) const { 7786 EVT VT = Operand.getValueType(); 7787 if ((VT == MVT::f32 && Subtarget.hasFRES()) || 7788 (VT == MVT::f64 && Subtarget.hasFRE()) || 7789 (VT == MVT::v4f32 && Subtarget.hasAltivec()) || 7790 (VT == MVT::v2f64 && Subtarget.hasVSX())) { 7791 // Convergence is quadratic, so we essentially double the number of digits 7792 // correct after every iteration. For both FRE and FRSQRTE, the minimum 7793 // architected relative accuracy is 2^-5. When hasRecipPrec(), this is 7794 // 2^-14. IEEE float has 23 digits and double has 52 digits. 7795 RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; 7796 if (VT.getScalarType() == MVT::f64) 7797 ++RefinementSteps; 7798 return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand); 7799 } 7800 return SDValue(); 7801 } 7802 7803 bool PPCTargetLowering::combineRepeatedFPDivisors(unsigned NumUsers) const { 7804 // Note: This functionality is used only when unsafe-fp-math is enabled, and 7805 // on cores with reciprocal estimates (which are used when unsafe-fp-math is 7806 // enabled for division), this functionality is redundant with the default 7807 // combiner logic (once the division -> reciprocal/multiply transformation 7808 // has taken place). As a result, this matters more for older cores than for 7809 // newer ones. 7810 7811 // Combine multiple FDIVs with the same divisor into multiple FMULs by the 7812 // reciprocal if there are two or more FDIVs (for embedded cores with only 7813 // one FP pipeline) for three or more FDIVs (for generic OOO cores). 7814 switch (Subtarget.getDarwinDirective()) { 7815 default: 7816 return NumUsers > 2; 7817 case PPC::DIR_440: 7818 case PPC::DIR_A2: 7819 case PPC::DIR_E500mc: 7820 case PPC::DIR_E5500: 7821 return NumUsers > 1; 7822 } 7823 } 7824 7825 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, 7826 unsigned Bytes, int Dist, 7827 SelectionDAG &DAG) { 7828 if (VT.getSizeInBits() / 8 != Bytes) 7829 return false; 7830 7831 SDValue BaseLoc = Base->getBasePtr(); 7832 if (Loc.getOpcode() == ISD::FrameIndex) { 7833 if (BaseLoc.getOpcode() != ISD::FrameIndex) 7834 return false; 7835 const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 7836 int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); 7837 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 7838 int FS = MFI->getObjectSize(FI); 7839 int BFS = MFI->getObjectSize(BFI); 7840 if (FS != BFS || FS != (int)Bytes) return false; 7841 return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); 7842 } 7843 7844 // Handle X+C 7845 if (DAG.isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc && 7846 cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes) 7847 return true; 7848 7849 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 7850 const GlobalValue *GV1 = nullptr; 7851 const GlobalValue *GV2 = nullptr; 7852 int64_t Offset1 = 0; 7853 int64_t Offset2 = 0; 7854 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); 7855 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); 7856 if (isGA1 && isGA2 && GV1 == GV2) 7857 return Offset1 == (Offset2 + Dist*Bytes); 7858 return false; 7859 } 7860 7861 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does 7862 // not enforce equality of the chain operands. 7863 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, 7864 unsigned Bytes, int Dist, 7865 SelectionDAG &DAG) { 7866 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) { 7867 EVT VT = LS->getMemoryVT(); 7868 SDValue Loc = LS->getBasePtr(); 7869 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG); 7870 } 7871 7872 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { 7873 EVT VT; 7874 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 7875 default: return false; 7876 case Intrinsic::ppc_altivec_lvx: 7877 case Intrinsic::ppc_altivec_lvxl: 7878 case Intrinsic::ppc_vsx_lxvw4x: 7879 VT = MVT::v4i32; 7880 break; 7881 case Intrinsic::ppc_vsx_lxvd2x: 7882 VT = MVT::v2f64; 7883 break; 7884 case Intrinsic::ppc_altivec_lvebx: 7885 VT = MVT::i8; 7886 break; 7887 case Intrinsic::ppc_altivec_lvehx: 7888 VT = MVT::i16; 7889 break; 7890 case Intrinsic::ppc_altivec_lvewx: 7891 VT = MVT::i32; 7892 break; 7893 } 7894 7895 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG); 7896 } 7897 7898 if (N->getOpcode() == ISD::INTRINSIC_VOID) { 7899 EVT VT; 7900 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 7901 default: return false; 7902 case Intrinsic::ppc_altivec_stvx: 7903 case Intrinsic::ppc_altivec_stvxl: 7904 case Intrinsic::ppc_vsx_stxvw4x: 7905 VT = MVT::v4i32; 7906 break; 7907 case Intrinsic::ppc_vsx_stxvd2x: 7908 VT = MVT::v2f64; 7909 break; 7910 case Intrinsic::ppc_altivec_stvebx: 7911 VT = MVT::i8; 7912 break; 7913 case Intrinsic::ppc_altivec_stvehx: 7914 VT = MVT::i16; 7915 break; 7916 case Intrinsic::ppc_altivec_stvewx: 7917 VT = MVT::i32; 7918 break; 7919 } 7920 7921 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG); 7922 } 7923 7924 return false; 7925 } 7926 7927 // Return true is there is a nearyby consecutive load to the one provided 7928 // (regardless of alignment). We search up and down the chain, looking though 7929 // token factors and other loads (but nothing else). As a result, a true result 7930 // indicates that it is safe to create a new consecutive load adjacent to the 7931 // load provided. 7932 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { 7933 SDValue Chain = LD->getChain(); 7934 EVT VT = LD->getMemoryVT(); 7935 7936 SmallSet<SDNode *, 16> LoadRoots; 7937 SmallVector<SDNode *, 8> Queue(1, Chain.getNode()); 7938 SmallSet<SDNode *, 16> Visited; 7939 7940 // First, search up the chain, branching to follow all token-factor operands. 7941 // If we find a consecutive load, then we're done, otherwise, record all 7942 // nodes just above the top-level loads and token factors. 7943 while (!Queue.empty()) { 7944 SDNode *ChainNext = Queue.pop_back_val(); 7945 if (!Visited.insert(ChainNext).second) 7946 continue; 7947 7948 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) { 7949 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) 7950 return true; 7951 7952 if (!Visited.count(ChainLD->getChain().getNode())) 7953 Queue.push_back(ChainLD->getChain().getNode()); 7954 } else if (ChainNext->getOpcode() == ISD::TokenFactor) { 7955 for (const SDUse &O : ChainNext->ops()) 7956 if (!Visited.count(O.getNode())) 7957 Queue.push_back(O.getNode()); 7958 } else 7959 LoadRoots.insert(ChainNext); 7960 } 7961 7962 // Second, search down the chain, starting from the top-level nodes recorded 7963 // in the first phase. These top-level nodes are the nodes just above all 7964 // loads and token factors. Starting with their uses, recursively look though 7965 // all loads (just the chain uses) and token factors to find a consecutive 7966 // load. 7967 Visited.clear(); 7968 Queue.clear(); 7969 7970 for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(), 7971 IE = LoadRoots.end(); I != IE; ++I) { 7972 Queue.push_back(*I); 7973 7974 while (!Queue.empty()) { 7975 SDNode *LoadRoot = Queue.pop_back_val(); 7976 if (!Visited.insert(LoadRoot).second) 7977 continue; 7978 7979 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot)) 7980 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) 7981 return true; 7982 7983 for (SDNode::use_iterator UI = LoadRoot->use_begin(), 7984 UE = LoadRoot->use_end(); UI != UE; ++UI) 7985 if (((isa<MemSDNode>(*UI) && 7986 cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) || 7987 UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI)) 7988 Queue.push_back(*UI); 7989 } 7990 } 7991 7992 return false; 7993 } 7994 7995 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, 7996 DAGCombinerInfo &DCI) const { 7997 SelectionDAG &DAG = DCI.DAG; 7998 SDLoc dl(N); 7999 8000 assert(Subtarget.useCRBits() && 8001 "Expecting to be tracking CR bits"); 8002 // If we're tracking CR bits, we need to be careful that we don't have: 8003 // trunc(binary-ops(zext(x), zext(y))) 8004 // or 8005 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...) 8006 // such that we're unnecessarily moving things into GPRs when it would be 8007 // better to keep them in CR bits. 8008 8009 // Note that trunc here can be an actual i1 trunc, or can be the effective 8010 // truncation that comes from a setcc or select_cc. 8011 if (N->getOpcode() == ISD::TRUNCATE && 8012 N->getValueType(0) != MVT::i1) 8013 return SDValue(); 8014 8015 if (N->getOperand(0).getValueType() != MVT::i32 && 8016 N->getOperand(0).getValueType() != MVT::i64) 8017 return SDValue(); 8018 8019 if (N->getOpcode() == ISD::SETCC || 8020 N->getOpcode() == ISD::SELECT_CC) { 8021 // If we're looking at a comparison, then we need to make sure that the 8022 // high bits (all except for the first) don't matter the result. 8023 ISD::CondCode CC = 8024 cast<CondCodeSDNode>(N->getOperand( 8025 N->getOpcode() == ISD::SETCC ? 2 : 4))->get(); 8026 unsigned OpBits = N->getOperand(0).getValueSizeInBits(); 8027 8028 if (ISD::isSignedIntSetCC(CC)) { 8029 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits || 8030 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits) 8031 return SDValue(); 8032 } else if (ISD::isUnsignedIntSetCC(CC)) { 8033 if (!DAG.MaskedValueIsZero(N->getOperand(0), 8034 APInt::getHighBitsSet(OpBits, OpBits-1)) || 8035 !DAG.MaskedValueIsZero(N->getOperand(1), 8036 APInt::getHighBitsSet(OpBits, OpBits-1))) 8037 return SDValue(); 8038 } else { 8039 // This is neither a signed nor an unsigned comparison, just make sure 8040 // that the high bits are equal. 8041 APInt Op1Zero, Op1One; 8042 APInt Op2Zero, Op2One; 8043 DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One); 8044 DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One); 8045 8046 // We don't really care about what is known about the first bit (if 8047 // anything), so clear it in all masks prior to comparing them. 8048 Op1Zero.clearBit(0); Op1One.clearBit(0); 8049 Op2Zero.clearBit(0); Op2One.clearBit(0); 8050 8051 if (Op1Zero != Op2Zero || Op1One != Op2One) 8052 return SDValue(); 8053 } 8054 } 8055 8056 // We now know that the higher-order bits are irrelevant, we just need to 8057 // make sure that all of the intermediate operations are bit operations, and 8058 // all inputs are extensions. 8059 if (N->getOperand(0).getOpcode() != ISD::AND && 8060 N->getOperand(0).getOpcode() != ISD::OR && 8061 N->getOperand(0).getOpcode() != ISD::XOR && 8062 N->getOperand(0).getOpcode() != ISD::SELECT && 8063 N->getOperand(0).getOpcode() != ISD::SELECT_CC && 8064 N->getOperand(0).getOpcode() != ISD::TRUNCATE && 8065 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND && 8066 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND && 8067 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND) 8068 return SDValue(); 8069 8070 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) && 8071 N->getOperand(1).getOpcode() != ISD::AND && 8072 N->getOperand(1).getOpcode() != ISD::OR && 8073 N->getOperand(1).getOpcode() != ISD::XOR && 8074 N->getOperand(1).getOpcode() != ISD::SELECT && 8075 N->getOperand(1).getOpcode() != ISD::SELECT_CC && 8076 N->getOperand(1).getOpcode() != ISD::TRUNCATE && 8077 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND && 8078 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND && 8079 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND) 8080 return SDValue(); 8081 8082 SmallVector<SDValue, 4> Inputs; 8083 SmallVector<SDValue, 8> BinOps, PromOps; 8084 SmallPtrSet<SDNode *, 16> Visited; 8085 8086 for (unsigned i = 0; i < 2; ++i) { 8087 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 8088 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 8089 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) && 8090 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) || 8091 isa<ConstantSDNode>(N->getOperand(i))) 8092 Inputs.push_back(N->getOperand(i)); 8093 else 8094 BinOps.push_back(N->getOperand(i)); 8095 8096 if (N->getOpcode() == ISD::TRUNCATE) 8097 break; 8098 } 8099 8100 // Visit all inputs, collect all binary operations (and, or, xor and 8101 // select) that are all fed by extensions. 8102 while (!BinOps.empty()) { 8103 SDValue BinOp = BinOps.back(); 8104 BinOps.pop_back(); 8105 8106 if (!Visited.insert(BinOp.getNode()).second) 8107 continue; 8108 8109 PromOps.push_back(BinOp); 8110 8111 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { 8112 // The condition of the select is not promoted. 8113 if (BinOp.getOpcode() == ISD::SELECT && i == 0) 8114 continue; 8115 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) 8116 continue; 8117 8118 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 8119 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 8120 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) && 8121 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) || 8122 isa<ConstantSDNode>(BinOp.getOperand(i))) { 8123 Inputs.push_back(BinOp.getOperand(i)); 8124 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || 8125 BinOp.getOperand(i).getOpcode() == ISD::OR || 8126 BinOp.getOperand(i).getOpcode() == ISD::XOR || 8127 BinOp.getOperand(i).getOpcode() == ISD::SELECT || 8128 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC || 8129 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || 8130 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND || 8131 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND || 8132 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) { 8133 BinOps.push_back(BinOp.getOperand(i)); 8134 } else { 8135 // We have an input that is not an extension or another binary 8136 // operation; we'll abort this transformation. 8137 return SDValue(); 8138 } 8139 } 8140 } 8141 8142 // Make sure that this is a self-contained cluster of operations (which 8143 // is not quite the same thing as saying that everything has only one 8144 // use). 8145 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 8146 if (isa<ConstantSDNode>(Inputs[i])) 8147 continue; 8148 8149 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), 8150 UE = Inputs[i].getNode()->use_end(); 8151 UI != UE; ++UI) { 8152 SDNode *User = *UI; 8153 if (User != N && !Visited.count(User)) 8154 return SDValue(); 8155 8156 // Make sure that we're not going to promote the non-output-value 8157 // operand(s) or SELECT or SELECT_CC. 8158 // FIXME: Although we could sometimes handle this, and it does occur in 8159 // practice that one of the condition inputs to the select is also one of 8160 // the outputs, we currently can't deal with this. 8161 if (User->getOpcode() == ISD::SELECT) { 8162 if (User->getOperand(0) == Inputs[i]) 8163 return SDValue(); 8164 } else if (User->getOpcode() == ISD::SELECT_CC) { 8165 if (User->getOperand(0) == Inputs[i] || 8166 User->getOperand(1) == Inputs[i]) 8167 return SDValue(); 8168 } 8169 } 8170 } 8171 8172 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { 8173 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), 8174 UE = PromOps[i].getNode()->use_end(); 8175 UI != UE; ++UI) { 8176 SDNode *User = *UI; 8177 if (User != N && !Visited.count(User)) 8178 return SDValue(); 8179 8180 // Make sure that we're not going to promote the non-output-value 8181 // operand(s) or SELECT or SELECT_CC. 8182 // FIXME: Although we could sometimes handle this, and it does occur in 8183 // practice that one of the condition inputs to the select is also one of 8184 // the outputs, we currently can't deal with this. 8185 if (User->getOpcode() == ISD::SELECT) { 8186 if (User->getOperand(0) == PromOps[i]) 8187 return SDValue(); 8188 } else if (User->getOpcode() == ISD::SELECT_CC) { 8189 if (User->getOperand(0) == PromOps[i] || 8190 User->getOperand(1) == PromOps[i]) 8191 return SDValue(); 8192 } 8193 } 8194 } 8195 8196 // Replace all inputs with the extension operand. 8197 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 8198 // Constants may have users outside the cluster of to-be-promoted nodes, 8199 // and so we need to replace those as we do the promotions. 8200 if (isa<ConstantSDNode>(Inputs[i])) 8201 continue; 8202 else 8203 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0)); 8204 } 8205 8206 // Replace all operations (these are all the same, but have a different 8207 // (i1) return type). DAG.getNode will validate that the types of 8208 // a binary operator match, so go through the list in reverse so that 8209 // we've likely promoted both operands first. Any intermediate truncations or 8210 // extensions disappear. 8211 while (!PromOps.empty()) { 8212 SDValue PromOp = PromOps.back(); 8213 PromOps.pop_back(); 8214 8215 if (PromOp.getOpcode() == ISD::TRUNCATE || 8216 PromOp.getOpcode() == ISD::SIGN_EXTEND || 8217 PromOp.getOpcode() == ISD::ZERO_EXTEND || 8218 PromOp.getOpcode() == ISD::ANY_EXTEND) { 8219 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) && 8220 PromOp.getOperand(0).getValueType() != MVT::i1) { 8221 // The operand is not yet ready (see comment below). 8222 PromOps.insert(PromOps.begin(), PromOp); 8223 continue; 8224 } 8225 8226 SDValue RepValue = PromOp.getOperand(0); 8227 if (isa<ConstantSDNode>(RepValue)) 8228 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue); 8229 8230 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue); 8231 continue; 8232 } 8233 8234 unsigned C; 8235 switch (PromOp.getOpcode()) { 8236 default: C = 0; break; 8237 case ISD::SELECT: C = 1; break; 8238 case ISD::SELECT_CC: C = 2; break; 8239 } 8240 8241 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) && 8242 PromOp.getOperand(C).getValueType() != MVT::i1) || 8243 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) && 8244 PromOp.getOperand(C+1).getValueType() != MVT::i1)) { 8245 // The to-be-promoted operands of this node have not yet been 8246 // promoted (this should be rare because we're going through the 8247 // list backward, but if one of the operands has several users in 8248 // this cluster of to-be-promoted nodes, it is possible). 8249 PromOps.insert(PromOps.begin(), PromOp); 8250 continue; 8251 } 8252 8253 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), 8254 PromOp.getNode()->op_end()); 8255 8256 // If there are any constant inputs, make sure they're replaced now. 8257 for (unsigned i = 0; i < 2; ++i) 8258 if (isa<ConstantSDNode>(Ops[C+i])) 8259 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]); 8260 8261 DAG.ReplaceAllUsesOfValueWith(PromOp, 8262 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops)); 8263 } 8264 8265 // Now we're left with the initial truncation itself. 8266 if (N->getOpcode() == ISD::TRUNCATE) 8267 return N->getOperand(0); 8268 8269 // Otherwise, this is a comparison. The operands to be compared have just 8270 // changed type (to i1), but everything else is the same. 8271 return SDValue(N, 0); 8272 } 8273 8274 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, 8275 DAGCombinerInfo &DCI) const { 8276 SelectionDAG &DAG = DCI.DAG; 8277 SDLoc dl(N); 8278 8279 // If we're tracking CR bits, we need to be careful that we don't have: 8280 // zext(binary-ops(trunc(x), trunc(y))) 8281 // or 8282 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...) 8283 // such that we're unnecessarily moving things into CR bits that can more 8284 // efficiently stay in GPRs. Note that if we're not certain that the high 8285 // bits are set as required by the final extension, we still may need to do 8286 // some masking to get the proper behavior. 8287 8288 // This same functionality is important on PPC64 when dealing with 8289 // 32-to-64-bit extensions; these occur often when 32-bit values are used as 8290 // the return values of functions. Because it is so similar, it is handled 8291 // here as well. 8292 8293 if (N->getValueType(0) != MVT::i32 && 8294 N->getValueType(0) != MVT::i64) 8295 return SDValue(); 8296 8297 if (!((N->getOperand(0).getValueType() == MVT::i1 && 8298 Subtarget.useCRBits()) || 8299 (N->getOperand(0).getValueType() == MVT::i32 && 8300 Subtarget.isPPC64()))) 8301 return SDValue(); 8302 8303 if (N->getOperand(0).getOpcode() != ISD::AND && 8304 N->getOperand(0).getOpcode() != ISD::OR && 8305 N->getOperand(0).getOpcode() != ISD::XOR && 8306 N->getOperand(0).getOpcode() != ISD::SELECT && 8307 N->getOperand(0).getOpcode() != ISD::SELECT_CC) 8308 return SDValue(); 8309 8310 SmallVector<SDValue, 4> Inputs; 8311 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps; 8312 SmallPtrSet<SDNode *, 16> Visited; 8313 8314 // Visit all inputs, collect all binary operations (and, or, xor and 8315 // select) that are all fed by truncations. 8316 while (!BinOps.empty()) { 8317 SDValue BinOp = BinOps.back(); 8318 BinOps.pop_back(); 8319 8320 if (!Visited.insert(BinOp.getNode()).second) 8321 continue; 8322 8323 PromOps.push_back(BinOp); 8324 8325 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) { 8326 // The condition of the select is not promoted. 8327 if (BinOp.getOpcode() == ISD::SELECT && i == 0) 8328 continue; 8329 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3) 8330 continue; 8331 8332 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE || 8333 isa<ConstantSDNode>(BinOp.getOperand(i))) { 8334 Inputs.push_back(BinOp.getOperand(i)); 8335 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND || 8336 BinOp.getOperand(i).getOpcode() == ISD::OR || 8337 BinOp.getOperand(i).getOpcode() == ISD::XOR || 8338 BinOp.getOperand(i).getOpcode() == ISD::SELECT || 8339 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) { 8340 BinOps.push_back(BinOp.getOperand(i)); 8341 } else { 8342 // We have an input that is not a truncation or another binary 8343 // operation; we'll abort this transformation. 8344 return SDValue(); 8345 } 8346 } 8347 } 8348 8349 // The operands of a select that must be truncated when the select is 8350 // promoted because the operand is actually part of the to-be-promoted set. 8351 DenseMap<SDNode *, EVT> SelectTruncOp[2]; 8352 8353 // Make sure that this is a self-contained cluster of operations (which 8354 // is not quite the same thing as saying that everything has only one 8355 // use). 8356 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 8357 if (isa<ConstantSDNode>(Inputs[i])) 8358 continue; 8359 8360 for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(), 8361 UE = Inputs[i].getNode()->use_end(); 8362 UI != UE; ++UI) { 8363 SDNode *User = *UI; 8364 if (User != N && !Visited.count(User)) 8365 return SDValue(); 8366 8367 // If we're going to promote the non-output-value operand(s) or SELECT or 8368 // SELECT_CC, record them for truncation. 8369 if (User->getOpcode() == ISD::SELECT) { 8370 if (User->getOperand(0) == Inputs[i]) 8371 SelectTruncOp[0].insert(std::make_pair(User, 8372 User->getOperand(0).getValueType())); 8373 } else if (User->getOpcode() == ISD::SELECT_CC) { 8374 if (User->getOperand(0) == Inputs[i]) 8375 SelectTruncOp[0].insert(std::make_pair(User, 8376 User->getOperand(0).getValueType())); 8377 if (User->getOperand(1) == Inputs[i]) 8378 SelectTruncOp[1].insert(std::make_pair(User, 8379 User->getOperand(1).getValueType())); 8380 } 8381 } 8382 } 8383 8384 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) { 8385 for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(), 8386 UE = PromOps[i].getNode()->use_end(); 8387 UI != UE; ++UI) { 8388 SDNode *User = *UI; 8389 if (User != N && !Visited.count(User)) 8390 return SDValue(); 8391 8392 // If we're going to promote the non-output-value operand(s) or SELECT or 8393 // SELECT_CC, record them for truncation. 8394 if (User->getOpcode() == ISD::SELECT) { 8395 if (User->getOperand(0) == PromOps[i]) 8396 SelectTruncOp[0].insert(std::make_pair(User, 8397 User->getOperand(0).getValueType())); 8398 } else if (User->getOpcode() == ISD::SELECT_CC) { 8399 if (User->getOperand(0) == PromOps[i]) 8400 SelectTruncOp[0].insert(std::make_pair(User, 8401 User->getOperand(0).getValueType())); 8402 if (User->getOperand(1) == PromOps[i]) 8403 SelectTruncOp[1].insert(std::make_pair(User, 8404 User->getOperand(1).getValueType())); 8405 } 8406 } 8407 } 8408 8409 unsigned PromBits = N->getOperand(0).getValueSizeInBits(); 8410 bool ReallyNeedsExt = false; 8411 if (N->getOpcode() != ISD::ANY_EXTEND) { 8412 // If all of the inputs are not already sign/zero extended, then 8413 // we'll still need to do that at the end. 8414 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 8415 if (isa<ConstantSDNode>(Inputs[i])) 8416 continue; 8417 8418 unsigned OpBits = 8419 Inputs[i].getOperand(0).getValueSizeInBits(); 8420 assert(PromBits < OpBits && "Truncation not to a smaller bit count?"); 8421 8422 if ((N->getOpcode() == ISD::ZERO_EXTEND && 8423 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0), 8424 APInt::getHighBitsSet(OpBits, 8425 OpBits-PromBits))) || 8426 (N->getOpcode() == ISD::SIGN_EXTEND && 8427 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) < 8428 (OpBits-(PromBits-1)))) { 8429 ReallyNeedsExt = true; 8430 break; 8431 } 8432 } 8433 } 8434 8435 // Replace all inputs, either with the truncation operand, or a 8436 // truncation or extension to the final output type. 8437 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) { 8438 // Constant inputs need to be replaced with the to-be-promoted nodes that 8439 // use them because they might have users outside of the cluster of 8440 // promoted nodes. 8441 if (isa<ConstantSDNode>(Inputs[i])) 8442 continue; 8443 8444 SDValue InSrc = Inputs[i].getOperand(0); 8445 if (Inputs[i].getValueType() == N->getValueType(0)) 8446 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc); 8447 else if (N->getOpcode() == ISD::SIGN_EXTEND) 8448 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 8449 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0))); 8450 else if (N->getOpcode() == ISD::ZERO_EXTEND) 8451 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 8452 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0))); 8453 else 8454 DAG.ReplaceAllUsesOfValueWith(Inputs[i], 8455 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0))); 8456 } 8457 8458 // Replace all operations (these are all the same, but have a different 8459 // (promoted) return type). DAG.getNode will validate that the types of 8460 // a binary operator match, so go through the list in reverse so that 8461 // we've likely promoted both operands first. 8462 while (!PromOps.empty()) { 8463 SDValue PromOp = PromOps.back(); 8464 PromOps.pop_back(); 8465 8466 unsigned C; 8467 switch (PromOp.getOpcode()) { 8468 default: C = 0; break; 8469 case ISD::SELECT: C = 1; break; 8470 case ISD::SELECT_CC: C = 2; break; 8471 } 8472 8473 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) && 8474 PromOp.getOperand(C).getValueType() != N->getValueType(0)) || 8475 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) && 8476 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) { 8477 // The to-be-promoted operands of this node have not yet been 8478 // promoted (this should be rare because we're going through the 8479 // list backward, but if one of the operands has several users in 8480 // this cluster of to-be-promoted nodes, it is possible). 8481 PromOps.insert(PromOps.begin(), PromOp); 8482 continue; 8483 } 8484 8485 // For SELECT and SELECT_CC nodes, we do a similar check for any 8486 // to-be-promoted comparison inputs. 8487 if (PromOp.getOpcode() == ISD::SELECT || 8488 PromOp.getOpcode() == ISD::SELECT_CC) { 8489 if ((SelectTruncOp[0].count(PromOp.getNode()) && 8490 PromOp.getOperand(0).getValueType() != N->getValueType(0)) || 8491 (SelectTruncOp[1].count(PromOp.getNode()) && 8492 PromOp.getOperand(1).getValueType() != N->getValueType(0))) { 8493 PromOps.insert(PromOps.begin(), PromOp); 8494 continue; 8495 } 8496 } 8497 8498 SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(), 8499 PromOp.getNode()->op_end()); 8500 8501 // If this node has constant inputs, then they'll need to be promoted here. 8502 for (unsigned i = 0; i < 2; ++i) { 8503 if (!isa<ConstantSDNode>(Ops[C+i])) 8504 continue; 8505 if (Ops[C+i].getValueType() == N->getValueType(0)) 8506 continue; 8507 8508 if (N->getOpcode() == ISD::SIGN_EXTEND) 8509 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 8510 else if (N->getOpcode() == ISD::ZERO_EXTEND) 8511 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 8512 else 8513 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0)); 8514 } 8515 8516 // If we've promoted the comparison inputs of a SELECT or SELECT_CC, 8517 // truncate them again to the original value type. 8518 if (PromOp.getOpcode() == ISD::SELECT || 8519 PromOp.getOpcode() == ISD::SELECT_CC) { 8520 auto SI0 = SelectTruncOp[0].find(PromOp.getNode()); 8521 if (SI0 != SelectTruncOp[0].end()) 8522 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]); 8523 auto SI1 = SelectTruncOp[1].find(PromOp.getNode()); 8524 if (SI1 != SelectTruncOp[1].end()) 8525 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]); 8526 } 8527 8528 DAG.ReplaceAllUsesOfValueWith(PromOp, 8529 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops)); 8530 } 8531 8532 // Now we're left with the initial extension itself. 8533 if (!ReallyNeedsExt) 8534 return N->getOperand(0); 8535 8536 // To zero extend, just mask off everything except for the first bit (in the 8537 // i1 case). 8538 if (N->getOpcode() == ISD::ZERO_EXTEND) 8539 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0), 8540 DAG.getConstant(APInt::getLowBitsSet( 8541 N->getValueSizeInBits(0), PromBits), 8542 N->getValueType(0))); 8543 8544 assert(N->getOpcode() == ISD::SIGN_EXTEND && 8545 "Invalid extension type"); 8546 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0)); 8547 SDValue ShiftCst = 8548 DAG.getConstant(N->getValueSizeInBits(0)-PromBits, ShiftAmountTy); 8549 return DAG.getNode(ISD::SRA, dl, N->getValueType(0), 8550 DAG.getNode(ISD::SHL, dl, N->getValueType(0), 8551 N->getOperand(0), ShiftCst), ShiftCst); 8552 } 8553 8554 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N, 8555 DAGCombinerInfo &DCI) const { 8556 assert((N->getOpcode() == ISD::SINT_TO_FP || 8557 N->getOpcode() == ISD::UINT_TO_FP) && 8558 "Need an int -> FP conversion node here"); 8559 8560 if (!Subtarget.has64BitSupport()) 8561 return SDValue(); 8562 8563 SelectionDAG &DAG = DCI.DAG; 8564 SDLoc dl(N); 8565 SDValue Op(N, 0); 8566 8567 // Don't handle ppc_fp128 here or i1 conversions. 8568 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) 8569 return SDValue(); 8570 if (Op.getOperand(0).getValueType() == MVT::i1) 8571 return SDValue(); 8572 8573 // For i32 intermediate values, unfortunately, the conversion functions 8574 // leave the upper 32 bits of the value are undefined. Within the set of 8575 // scalar instructions, we have no method for zero- or sign-extending the 8576 // value. Thus, we cannot handle i32 intermediate values here. 8577 if (Op.getOperand(0).getValueType() == MVT::i32) 8578 return SDValue(); 8579 8580 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && 8581 "UINT_TO_FP is supported only with FPCVT"); 8582 8583 // If we have FCFIDS, then use it when converting to single-precision. 8584 // Otherwise, convert to double-precision and then round. 8585 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? 8586 (Op.getOpcode() == ISD::UINT_TO_FP ? 8587 PPCISD::FCFIDUS : PPCISD::FCFIDS) : 8588 (Op.getOpcode() == ISD::UINT_TO_FP ? 8589 PPCISD::FCFIDU : PPCISD::FCFID); 8590 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? 8591 MVT::f32 : MVT::f64; 8592 8593 // If we're converting from a float, to an int, and back to a float again, 8594 // then we don't need the store/load pair at all. 8595 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT && 8596 Subtarget.hasFPCVT()) || 8597 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) { 8598 SDValue Src = Op.getOperand(0).getOperand(0); 8599 if (Src.getValueType() == MVT::f32) { 8600 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 8601 DCI.AddToWorklist(Src.getNode()); 8602 } 8603 8604 unsigned FCTOp = 8605 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 8606 PPCISD::FCTIDUZ; 8607 8608 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src); 8609 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp); 8610 8611 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) { 8612 FP = DAG.getNode(ISD::FP_ROUND, dl, 8613 MVT::f32, FP, DAG.getIntPtrConstant(0)); 8614 DCI.AddToWorklist(FP.getNode()); 8615 } 8616 8617 return FP; 8618 } 8619 8620 return SDValue(); 8621 } 8622 8623 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for 8624 // builtins) into loads with swaps. 8625 SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N, 8626 DAGCombinerInfo &DCI) const { 8627 SelectionDAG &DAG = DCI.DAG; 8628 SDLoc dl(N); 8629 SDValue Chain; 8630 SDValue Base; 8631 MachineMemOperand *MMO; 8632 8633 switch (N->getOpcode()) { 8634 default: 8635 llvm_unreachable("Unexpected opcode for little endian VSX load"); 8636 case ISD::LOAD: { 8637 LoadSDNode *LD = cast<LoadSDNode>(N); 8638 Chain = LD->getChain(); 8639 Base = LD->getBasePtr(); 8640 MMO = LD->getMemOperand(); 8641 // If the MMO suggests this isn't a load of a full vector, leave 8642 // things alone. For a built-in, we have to make the change for 8643 // correctness, so if there is a size problem that will be a bug. 8644 if (MMO->getSize() < 16) 8645 return SDValue(); 8646 break; 8647 } 8648 case ISD::INTRINSIC_W_CHAIN: { 8649 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N); 8650 Chain = Intrin->getChain(); 8651 Base = Intrin->getBasePtr(); 8652 MMO = Intrin->getMemOperand(); 8653 break; 8654 } 8655 } 8656 8657 MVT VecTy = N->getValueType(0).getSimpleVT(); 8658 SDValue LoadOps[] = { Chain, Base }; 8659 SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl, 8660 DAG.getVTList(VecTy, MVT::Other), 8661 LoadOps, VecTy, MMO); 8662 DCI.AddToWorklist(Load.getNode()); 8663 Chain = Load.getValue(1); 8664 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl, 8665 DAG.getVTList(VecTy, MVT::Other), Chain, Load); 8666 DCI.AddToWorklist(Swap.getNode()); 8667 return Swap; 8668 } 8669 8670 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for 8671 // builtins) into stores with swaps. 8672 SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N, 8673 DAGCombinerInfo &DCI) const { 8674 SelectionDAG &DAG = DCI.DAG; 8675 SDLoc dl(N); 8676 SDValue Chain; 8677 SDValue Base; 8678 unsigned SrcOpnd; 8679 MachineMemOperand *MMO; 8680 8681 switch (N->getOpcode()) { 8682 default: 8683 llvm_unreachable("Unexpected opcode for little endian VSX store"); 8684 case ISD::STORE: { 8685 StoreSDNode *ST = cast<StoreSDNode>(N); 8686 Chain = ST->getChain(); 8687 Base = ST->getBasePtr(); 8688 MMO = ST->getMemOperand(); 8689 SrcOpnd = 1; 8690 // If the MMO suggests this isn't a store of a full vector, leave 8691 // things alone. For a built-in, we have to make the change for 8692 // correctness, so if there is a size problem that will be a bug. 8693 if (MMO->getSize() < 16) 8694 return SDValue(); 8695 break; 8696 } 8697 case ISD::INTRINSIC_VOID: { 8698 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N); 8699 Chain = Intrin->getChain(); 8700 // Intrin->getBasePtr() oddly does not get what we want. 8701 Base = Intrin->getOperand(3); 8702 MMO = Intrin->getMemOperand(); 8703 SrcOpnd = 2; 8704 break; 8705 } 8706 } 8707 8708 SDValue Src = N->getOperand(SrcOpnd); 8709 MVT VecTy = Src.getValueType().getSimpleVT(); 8710 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl, 8711 DAG.getVTList(VecTy, MVT::Other), Chain, Src); 8712 DCI.AddToWorklist(Swap.getNode()); 8713 Chain = Swap.getValue(1); 8714 SDValue StoreOps[] = { Chain, Swap, Base }; 8715 SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl, 8716 DAG.getVTList(MVT::Other), 8717 StoreOps, VecTy, MMO); 8718 DCI.AddToWorklist(Store.getNode()); 8719 return Store; 8720 } 8721 8722 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, 8723 DAGCombinerInfo &DCI) const { 8724 const TargetMachine &TM = getTargetMachine(); 8725 SelectionDAG &DAG = DCI.DAG; 8726 SDLoc dl(N); 8727 switch (N->getOpcode()) { 8728 default: break; 8729 case PPCISD::SHL: 8730 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 8731 if (C->isNullValue()) // 0 << V -> 0. 8732 return N->getOperand(0); 8733 } 8734 break; 8735 case PPCISD::SRL: 8736 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 8737 if (C->isNullValue()) // 0 >>u V -> 0. 8738 return N->getOperand(0); 8739 } 8740 break; 8741 case PPCISD::SRA: 8742 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 8743 if (C->isNullValue() || // 0 >>s V -> 0. 8744 C->isAllOnesValue()) // -1 >>s V -> -1. 8745 return N->getOperand(0); 8746 } 8747 break; 8748 case ISD::SIGN_EXTEND: 8749 case ISD::ZERO_EXTEND: 8750 case ISD::ANY_EXTEND: 8751 return DAGCombineExtBoolTrunc(N, DCI); 8752 case ISD::TRUNCATE: 8753 case ISD::SETCC: 8754 case ISD::SELECT_CC: 8755 return DAGCombineTruncBoolExt(N, DCI); 8756 case ISD::SINT_TO_FP: 8757 case ISD::UINT_TO_FP: 8758 return combineFPToIntToFP(N, DCI); 8759 case ISD::STORE: { 8760 // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). 8761 if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() && 8762 !cast<StoreSDNode>(N)->isTruncatingStore() && 8763 N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && 8764 N->getOperand(1).getValueType() == MVT::i32 && 8765 N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) { 8766 SDValue Val = N->getOperand(1).getOperand(0); 8767 if (Val.getValueType() == MVT::f32) { 8768 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); 8769 DCI.AddToWorklist(Val.getNode()); 8770 } 8771 Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val); 8772 DCI.AddToWorklist(Val.getNode()); 8773 8774 SDValue Ops[] = { 8775 N->getOperand(0), Val, N->getOperand(2), 8776 DAG.getValueType(N->getOperand(1).getValueType()) 8777 }; 8778 8779 Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, 8780 DAG.getVTList(MVT::Other), Ops, 8781 cast<StoreSDNode>(N)->getMemoryVT(), 8782 cast<StoreSDNode>(N)->getMemOperand()); 8783 DCI.AddToWorklist(Val.getNode()); 8784 return Val; 8785 } 8786 8787 // Turn STORE (BSWAP) -> sthbrx/stwbrx. 8788 if (cast<StoreSDNode>(N)->isUnindexed() && 8789 N->getOperand(1).getOpcode() == ISD::BSWAP && 8790 N->getOperand(1).getNode()->hasOneUse() && 8791 (N->getOperand(1).getValueType() == MVT::i32 || 8792 N->getOperand(1).getValueType() == MVT::i16 || 8793 (TM.getSubtarget<PPCSubtarget>().hasLDBRX() && 8794 TM.getSubtarget<PPCSubtarget>().isPPC64() && 8795 N->getOperand(1).getValueType() == MVT::i64))) { 8796 SDValue BSwapOp = N->getOperand(1).getOperand(0); 8797 // Do an any-extend to 32-bits if this is a half-word input. 8798 if (BSwapOp.getValueType() == MVT::i16) 8799 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp); 8800 8801 SDValue Ops[] = { 8802 N->getOperand(0), BSwapOp, N->getOperand(2), 8803 DAG.getValueType(N->getOperand(1).getValueType()) 8804 }; 8805 return 8806 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other), 8807 Ops, cast<StoreSDNode>(N)->getMemoryVT(), 8808 cast<StoreSDNode>(N)->getMemOperand()); 8809 } 8810 8811 // For little endian, VSX stores require generating xxswapd/lxvd2x. 8812 EVT VT = N->getOperand(1).getValueType(); 8813 if (VT.isSimple()) { 8814 MVT StoreVT = VT.getSimpleVT(); 8815 if (TM.getSubtarget<PPCSubtarget>().hasVSX() && 8816 TM.getSubtarget<PPCSubtarget>().isLittleEndian() && 8817 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 || 8818 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32)) 8819 return expandVSXStoreForLE(N, DCI); 8820 } 8821 break; 8822 } 8823 case ISD::LOAD: { 8824 LoadSDNode *LD = cast<LoadSDNode>(N); 8825 EVT VT = LD->getValueType(0); 8826 8827 // For little endian, VSX loads require generating lxvd2x/xxswapd. 8828 if (VT.isSimple()) { 8829 MVT LoadVT = VT.getSimpleVT(); 8830 if (TM.getSubtarget<PPCSubtarget>().hasVSX() && 8831 TM.getSubtarget<PPCSubtarget>().isLittleEndian() && 8832 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 || 8833 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32)) 8834 return expandVSXLoadForLE(N, DCI); 8835 } 8836 8837 Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); 8838 unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty); 8839 if (ISD::isNON_EXTLoad(N) && VT.isVector() && 8840 TM.getSubtarget<PPCSubtarget>().hasAltivec() && 8841 // P8 and later hardware should just use LOAD. 8842 !TM.getSubtarget<PPCSubtarget>().hasP8Vector() && 8843 (VT == MVT::v16i8 || VT == MVT::v8i16 || 8844 VT == MVT::v4i32 || VT == MVT::v4f32) && 8845 LD->getAlignment() < ABIAlignment) { 8846 // This is a type-legal unaligned Altivec load. 8847 SDValue Chain = LD->getChain(); 8848 SDValue Ptr = LD->getBasePtr(); 8849 bool isLittleEndian = Subtarget.isLittleEndian(); 8850 8851 // This implements the loading of unaligned vectors as described in 8852 // the venerable Apple Velocity Engine overview. Specifically: 8853 // https://developer.apple.com/hardwaredrivers/ve/alignment.html 8854 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html 8855 // 8856 // The general idea is to expand a sequence of one or more unaligned 8857 // loads into an alignment-based permutation-control instruction (lvsl 8858 // or lvsr), a series of regular vector loads (which always truncate 8859 // their input address to an aligned address), and a series of 8860 // permutations. The results of these permutations are the requested 8861 // loaded values. The trick is that the last "extra" load is not taken 8862 // from the address you might suspect (sizeof(vector) bytes after the 8863 // last requested load), but rather sizeof(vector) - 1 bytes after the 8864 // last requested vector. The point of this is to avoid a page fault if 8865 // the base address happened to be aligned. This works because if the 8866 // base address is aligned, then adding less than a full vector length 8867 // will cause the last vector in the sequence to be (re)loaded. 8868 // Otherwise, the next vector will be fetched as you might suspect was 8869 // necessary. 8870 8871 // We might be able to reuse the permutation generation from 8872 // a different base address offset from this one by an aligned amount. 8873 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this 8874 // optimization later. 8875 Intrinsic::ID Intr = (isLittleEndian ? 8876 Intrinsic::ppc_altivec_lvsr : 8877 Intrinsic::ppc_altivec_lvsl); 8878 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, MVT::v16i8); 8879 8880 // Create the new MMO for the new base load. It is like the original MMO, 8881 // but represents an area in memory almost twice the vector size centered 8882 // on the original address. If the address is unaligned, we might start 8883 // reading up to (sizeof(vector)-1) bytes below the address of the 8884 // original unaligned load. 8885 MachineFunction &MF = DAG.getMachineFunction(); 8886 MachineMemOperand *BaseMMO = 8887 MF.getMachineMemOperand(LD->getMemOperand(), 8888 -LD->getMemoryVT().getStoreSize()+1, 8889 2*LD->getMemoryVT().getStoreSize()-1); 8890 8891 // Create the new base load. 8892 SDValue LDXIntID = DAG.getTargetConstant(Intrinsic::ppc_altivec_lvx, 8893 getPointerTy()); 8894 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr }; 8895 SDValue BaseLoad = 8896 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, 8897 DAG.getVTList(MVT::v4i32, MVT::Other), 8898 BaseLoadOps, MVT::v4i32, BaseMMO); 8899 8900 // Note that the value of IncOffset (which is provided to the next 8901 // load's pointer info offset value, and thus used to calculate the 8902 // alignment), and the value of IncValue (which is actually used to 8903 // increment the pointer value) are different! This is because we 8904 // require the next load to appear to be aligned, even though it 8905 // is actually offset from the base pointer by a lesser amount. 8906 int IncOffset = VT.getSizeInBits() / 8; 8907 int IncValue = IncOffset; 8908 8909 // Walk (both up and down) the chain looking for another load at the real 8910 // (aligned) offset (the alignment of the other load does not matter in 8911 // this case). If found, then do not use the offset reduction trick, as 8912 // that will prevent the loads from being later combined (as they would 8913 // otherwise be duplicates). 8914 if (!findConsecutiveLoad(LD, DAG)) 8915 --IncValue; 8916 8917 SDValue Increment = DAG.getConstant(IncValue, getPointerTy()); 8918 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); 8919 8920 MachineMemOperand *ExtraMMO = 8921 MF.getMachineMemOperand(LD->getMemOperand(), 8922 1, 2*LD->getMemoryVT().getStoreSize()-1); 8923 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr }; 8924 SDValue ExtraLoad = 8925 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, 8926 DAG.getVTList(MVT::v4i32, MVT::Other), 8927 ExtraLoadOps, MVT::v4i32, ExtraMMO); 8928 8929 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 8930 BaseLoad.getValue(1), ExtraLoad.getValue(1)); 8931 8932 // Because vperm has a big-endian bias, we must reverse the order 8933 // of the input vectors and complement the permute control vector 8934 // when generating little endian code. We have already handled the 8935 // latter by using lvsr instead of lvsl, so just reverse BaseLoad 8936 // and ExtraLoad here. 8937 SDValue Perm; 8938 if (isLittleEndian) 8939 Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm, 8940 ExtraLoad, BaseLoad, PermCntl, DAG, dl); 8941 else 8942 Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm, 8943 BaseLoad, ExtraLoad, PermCntl, DAG, dl); 8944 8945 if (VT != MVT::v4i32) 8946 Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm); 8947 8948 // The output of the permutation is our loaded result, the TokenFactor is 8949 // our new chain. 8950 DCI.CombineTo(N, Perm, TF); 8951 return SDValue(N, 0); 8952 } 8953 } 8954 break; 8955 case ISD::INTRINSIC_WO_CHAIN: { 8956 bool isLittleEndian = Subtarget.isLittleEndian(); 8957 Intrinsic::ID Intr = (isLittleEndian ? 8958 Intrinsic::ppc_altivec_lvsr : 8959 Intrinsic::ppc_altivec_lvsl); 8960 if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() == Intr && 8961 N->getOperand(1)->getOpcode() == ISD::ADD) { 8962 SDValue Add = N->getOperand(1); 8963 8964 if (DAG.MaskedValueIsZero(Add->getOperand(1), 8965 APInt::getAllOnesValue(4 /* 16 byte alignment */).zext( 8966 Add.getValueType().getScalarType().getSizeInBits()))) { 8967 SDNode *BasePtr = Add->getOperand(0).getNode(); 8968 for (SDNode::use_iterator UI = BasePtr->use_begin(), 8969 UE = BasePtr->use_end(); UI != UE; ++UI) { 8970 if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && 8971 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == 8972 Intr) { 8973 // We've found another LVSL/LVSR, and this address is an aligned 8974 // multiple of that one. The results will be the same, so use the 8975 // one we've just found instead. 8976 8977 return SDValue(*UI, 0); 8978 } 8979 } 8980 } 8981 } 8982 } 8983 8984 break; 8985 case ISD::INTRINSIC_W_CHAIN: { 8986 // For little endian, VSX loads require generating lxvd2x/xxswapd. 8987 if (TM.getSubtarget<PPCSubtarget>().hasVSX() && 8988 TM.getSubtarget<PPCSubtarget>().isLittleEndian()) { 8989 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 8990 default: 8991 break; 8992 case Intrinsic::ppc_vsx_lxvw4x: 8993 case Intrinsic::ppc_vsx_lxvd2x: 8994 return expandVSXLoadForLE(N, DCI); 8995 } 8996 } 8997 break; 8998 } 8999 case ISD::INTRINSIC_VOID: { 9000 // For little endian, VSX stores require generating xxswapd/stxvd2x. 9001 if (TM.getSubtarget<PPCSubtarget>().hasVSX() && 9002 TM.getSubtarget<PPCSubtarget>().isLittleEndian()) { 9003 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 9004 default: 9005 break; 9006 case Intrinsic::ppc_vsx_stxvw4x: 9007 case Intrinsic::ppc_vsx_stxvd2x: 9008 return expandVSXStoreForLE(N, DCI); 9009 } 9010 } 9011 break; 9012 } 9013 case ISD::BSWAP: 9014 // Turn BSWAP (LOAD) -> lhbrx/lwbrx. 9015 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && 9016 N->getOperand(0).hasOneUse() && 9017 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 || 9018 (TM.getSubtarget<PPCSubtarget>().hasLDBRX() && 9019 TM.getSubtarget<PPCSubtarget>().isPPC64() && 9020 N->getValueType(0) == MVT::i64))) { 9021 SDValue Load = N->getOperand(0); 9022 LoadSDNode *LD = cast<LoadSDNode>(Load); 9023 // Create the byte-swapping load. 9024 SDValue Ops[] = { 9025 LD->getChain(), // Chain 9026 LD->getBasePtr(), // Ptr 9027 DAG.getValueType(N->getValueType(0)) // VT 9028 }; 9029 SDValue BSLoad = 9030 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl, 9031 DAG.getVTList(N->getValueType(0) == MVT::i64 ? 9032 MVT::i64 : MVT::i32, MVT::Other), 9033 Ops, LD->getMemoryVT(), LD->getMemOperand()); 9034 9035 // If this is an i16 load, insert the truncate. 9036 SDValue ResVal = BSLoad; 9037 if (N->getValueType(0) == MVT::i16) 9038 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad); 9039 9040 // First, combine the bswap away. This makes the value produced by the 9041 // load dead. 9042 DCI.CombineTo(N, ResVal); 9043 9044 // Next, combine the load away, we give it a bogus result value but a real 9045 // chain result. The result value is dead because the bswap is dead. 9046 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); 9047 9048 // Return N so it doesn't get rechecked! 9049 return SDValue(N, 0); 9050 } 9051 9052 break; 9053 case PPCISD::VCMP: { 9054 // If a VCMPo node already exists with exactly the same operands as this 9055 // node, use its result instead of this node (VCMPo computes both a CR6 and 9056 // a normal output). 9057 // 9058 if (!N->getOperand(0).hasOneUse() && 9059 !N->getOperand(1).hasOneUse() && 9060 !N->getOperand(2).hasOneUse()) { 9061 9062 // Scan all of the users of the LHS, looking for VCMPo's that match. 9063 SDNode *VCMPoNode = nullptr; 9064 9065 SDNode *LHSN = N->getOperand(0).getNode(); 9066 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); 9067 UI != E; ++UI) 9068 if (UI->getOpcode() == PPCISD::VCMPo && 9069 UI->getOperand(1) == N->getOperand(1) && 9070 UI->getOperand(2) == N->getOperand(2) && 9071 UI->getOperand(0) == N->getOperand(0)) { 9072 VCMPoNode = *UI; 9073 break; 9074 } 9075 9076 // If there is no VCMPo node, or if the flag value has a single use, don't 9077 // transform this. 9078 if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1)) 9079 break; 9080 9081 // Look at the (necessarily single) use of the flag value. If it has a 9082 // chain, this transformation is more complex. Note that multiple things 9083 // could use the value result, which we should ignore. 9084 SDNode *FlagUser = nullptr; 9085 for (SDNode::use_iterator UI = VCMPoNode->use_begin(); 9086 FlagUser == nullptr; ++UI) { 9087 assert(UI != VCMPoNode->use_end() && "Didn't find user!"); 9088 SDNode *User = *UI; 9089 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { 9090 if (User->getOperand(i) == SDValue(VCMPoNode, 1)) { 9091 FlagUser = User; 9092 break; 9093 } 9094 } 9095 } 9096 9097 // If the user is a MFOCRF instruction, we know this is safe. 9098 // Otherwise we give up for right now. 9099 if (FlagUser->getOpcode() == PPCISD::MFOCRF) 9100 return SDValue(VCMPoNode, 0); 9101 } 9102 break; 9103 } 9104 case ISD::BRCOND: { 9105 SDValue Cond = N->getOperand(1); 9106 SDValue Target = N->getOperand(2); 9107 9108 if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN && 9109 cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() == 9110 Intrinsic::ppc_is_decremented_ctr_nonzero) { 9111 9112 // We now need to make the intrinsic dead (it cannot be instruction 9113 // selected). 9114 DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0)); 9115 assert(Cond.getNode()->hasOneUse() && 9116 "Counter decrement has more than one use"); 9117 9118 return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other, 9119 N->getOperand(0), Target); 9120 } 9121 } 9122 break; 9123 case ISD::BR_CC: { 9124 // If this is a branch on an altivec predicate comparison, lower this so 9125 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This 9126 // lowering is done pre-legalize, because the legalizer lowers the predicate 9127 // compare down to code that is difficult to reassemble. 9128 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 9129 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3); 9130 9131 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero 9132 // value. If so, pass-through the AND to get to the intrinsic. 9133 if (LHS.getOpcode() == ISD::AND && 9134 LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN && 9135 cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() == 9136 Intrinsic::ppc_is_decremented_ctr_nonzero && 9137 isa<ConstantSDNode>(LHS.getOperand(1)) && 9138 !cast<ConstantSDNode>(LHS.getOperand(1))->getConstantIntValue()-> 9139 isZero()) 9140 LHS = LHS.getOperand(0); 9141 9142 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN && 9143 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() == 9144 Intrinsic::ppc_is_decremented_ctr_nonzero && 9145 isa<ConstantSDNode>(RHS)) { 9146 assert((CC == ISD::SETEQ || CC == ISD::SETNE) && 9147 "Counter decrement comparison is not EQ or NE"); 9148 9149 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); 9150 bool isBDNZ = (CC == ISD::SETEQ && Val) || 9151 (CC == ISD::SETNE && !Val); 9152 9153 // We now need to make the intrinsic dead (it cannot be instruction 9154 // selected). 9155 DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0)); 9156 assert(LHS.getNode()->hasOneUse() && 9157 "Counter decrement has more than one use"); 9158 9159 return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other, 9160 N->getOperand(0), N->getOperand(4)); 9161 } 9162 9163 int CompareOpc; 9164 bool isDot; 9165 9166 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && 9167 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && 9168 getAltivecCompareInfo(LHS, CompareOpc, isDot)) { 9169 assert(isDot && "Can't compare against a vector result!"); 9170 9171 // If this is a comparison against something other than 0/1, then we know 9172 // that the condition is never/always true. 9173 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); 9174 if (Val != 0 && Val != 1) { 9175 if (CC == ISD::SETEQ) // Cond never true, remove branch. 9176 return N->getOperand(0); 9177 // Always !=, turn it into an unconditional branch. 9178 return DAG.getNode(ISD::BR, dl, MVT::Other, 9179 N->getOperand(0), N->getOperand(4)); 9180 } 9181 9182 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); 9183 9184 // Create the PPCISD altivec 'dot' comparison node. 9185 SDValue Ops[] = { 9186 LHS.getOperand(2), // LHS of compare 9187 LHS.getOperand(3), // RHS of compare 9188 DAG.getConstant(CompareOpc, MVT::i32) 9189 }; 9190 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue }; 9191 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops); 9192 9193 // Unpack the result based on how the target uses it. 9194 PPC::Predicate CompOpc; 9195 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) { 9196 default: // Can't happen, don't crash on invalid number though. 9197 case 0: // Branch on the value of the EQ bit of CR6. 9198 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE; 9199 break; 9200 case 1: // Branch on the inverted value of the EQ bit of CR6. 9201 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ; 9202 break; 9203 case 2: // Branch on the value of the LT bit of CR6. 9204 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE; 9205 break; 9206 case 3: // Branch on the inverted value of the LT bit of CR6. 9207 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT; 9208 break; 9209 } 9210 9211 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0), 9212 DAG.getConstant(CompOpc, MVT::i32), 9213 DAG.getRegister(PPC::CR6, MVT::i32), 9214 N->getOperand(4), CompNode.getValue(1)); 9215 } 9216 break; 9217 } 9218 } 9219 9220 return SDValue(); 9221 } 9222 9223 SDValue 9224 PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, 9225 SelectionDAG &DAG, 9226 std::vector<SDNode *> *Created) const { 9227 // fold (sdiv X, pow2) 9228 EVT VT = N->getValueType(0); 9229 if (VT == MVT::i64 && !Subtarget.isPPC64()) 9230 return SDValue(); 9231 if ((VT != MVT::i32 && VT != MVT::i64) || 9232 !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2())) 9233 return SDValue(); 9234 9235 SDLoc DL(N); 9236 SDValue N0 = N->getOperand(0); 9237 9238 bool IsNegPow2 = (-Divisor).isPowerOf2(); 9239 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros(); 9240 SDValue ShiftAmt = DAG.getConstant(Lg2, VT); 9241 9242 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt); 9243 if (Created) 9244 Created->push_back(Op.getNode()); 9245 9246 if (IsNegPow2) { 9247 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), Op); 9248 if (Created) 9249 Created->push_back(Op.getNode()); 9250 } 9251 9252 return Op; 9253 } 9254 9255 //===----------------------------------------------------------------------===// 9256 // Inline Assembly Support 9257 //===----------------------------------------------------------------------===// 9258 9259 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 9260 APInt &KnownZero, 9261 APInt &KnownOne, 9262 const SelectionDAG &DAG, 9263 unsigned Depth) const { 9264 KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); 9265 switch (Op.getOpcode()) { 9266 default: break; 9267 case PPCISD::LBRX: { 9268 // lhbrx is known to have the top bits cleared out. 9269 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16) 9270 KnownZero = 0xFFFF0000; 9271 break; 9272 } 9273 case ISD::INTRINSIC_WO_CHAIN: { 9274 switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) { 9275 default: break; 9276 case Intrinsic::ppc_altivec_vcmpbfp_p: 9277 case Intrinsic::ppc_altivec_vcmpeqfp_p: 9278 case Intrinsic::ppc_altivec_vcmpequb_p: 9279 case Intrinsic::ppc_altivec_vcmpequh_p: 9280 case Intrinsic::ppc_altivec_vcmpequw_p: 9281 case Intrinsic::ppc_altivec_vcmpgefp_p: 9282 case Intrinsic::ppc_altivec_vcmpgtfp_p: 9283 case Intrinsic::ppc_altivec_vcmpgtsb_p: 9284 case Intrinsic::ppc_altivec_vcmpgtsh_p: 9285 case Intrinsic::ppc_altivec_vcmpgtsw_p: 9286 case Intrinsic::ppc_altivec_vcmpgtub_p: 9287 case Intrinsic::ppc_altivec_vcmpgtuh_p: 9288 case Intrinsic::ppc_altivec_vcmpgtuw_p: 9289 KnownZero = ~1U; // All bits but the low one are known to be zero. 9290 break; 9291 } 9292 } 9293 } 9294 } 9295 9296 unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { 9297 switch (Subtarget.getDarwinDirective()) { 9298 default: break; 9299 case PPC::DIR_970: 9300 case PPC::DIR_PWR4: 9301 case PPC::DIR_PWR5: 9302 case PPC::DIR_PWR5X: 9303 case PPC::DIR_PWR6: 9304 case PPC::DIR_PWR6X: 9305 case PPC::DIR_PWR7: 9306 case PPC::DIR_PWR8: { 9307 if (!ML) 9308 break; 9309 9310 const PPCInstrInfo *TII = 9311 static_cast<const PPCInstrInfo *>(getTargetMachine().getSubtargetImpl()-> 9312 getInstrInfo()); 9313 9314 // For small loops (between 5 and 8 instructions), align to a 32-byte 9315 // boundary so that the entire loop fits in one instruction-cache line. 9316 uint64_t LoopSize = 0; 9317 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I) 9318 for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) 9319 LoopSize += TII->GetInstSizeInBytes(J); 9320 9321 if (LoopSize > 16 && LoopSize <= 32) 9322 return 5; 9323 9324 break; 9325 } 9326 } 9327 9328 return TargetLowering::getPrefLoopAlignment(ML); 9329 } 9330 9331 /// getConstraintType - Given a constraint, return the type of 9332 /// constraint it is for this target. 9333 PPCTargetLowering::ConstraintType 9334 PPCTargetLowering::getConstraintType(const std::string &Constraint) const { 9335 if (Constraint.size() == 1) { 9336 switch (Constraint[0]) { 9337 default: break; 9338 case 'b': 9339 case 'r': 9340 case 'f': 9341 case 'v': 9342 case 'y': 9343 return C_RegisterClass; 9344 case 'Z': 9345 // FIXME: While Z does indicate a memory constraint, it specifically 9346 // indicates an r+r address (used in conjunction with the 'y' modifier 9347 // in the replacement string). Currently, we're forcing the base 9348 // register to be r0 in the asm printer (which is interpreted as zero) 9349 // and forming the complete address in the second register. This is 9350 // suboptimal. 9351 return C_Memory; 9352 } 9353 } else if (Constraint == "wc") { // individual CR bits. 9354 return C_RegisterClass; 9355 } else if (Constraint == "wa" || Constraint == "wd" || 9356 Constraint == "wf" || Constraint == "ws") { 9357 return C_RegisterClass; // VSX registers. 9358 } 9359 return TargetLowering::getConstraintType(Constraint); 9360 } 9361 9362 /// Examine constraint type and operand type and determine a weight value. 9363 /// This object must already have been set up with the operand type 9364 /// and the current alternative constraint selected. 9365 TargetLowering::ConstraintWeight 9366 PPCTargetLowering::getSingleConstraintMatchWeight( 9367 AsmOperandInfo &info, const char *constraint) const { 9368 ConstraintWeight weight = CW_Invalid; 9369 Value *CallOperandVal = info.CallOperandVal; 9370 // If we don't have a value, we can't do a match, 9371 // but allow it at the lowest weight. 9372 if (!CallOperandVal) 9373 return CW_Default; 9374 Type *type = CallOperandVal->getType(); 9375 9376 // Look at the constraint type. 9377 if (StringRef(constraint) == "wc" && type->isIntegerTy(1)) 9378 return CW_Register; // an individual CR bit. 9379 else if ((StringRef(constraint) == "wa" || 9380 StringRef(constraint) == "wd" || 9381 StringRef(constraint) == "wf") && 9382 type->isVectorTy()) 9383 return CW_Register; 9384 else if (StringRef(constraint) == "ws" && type->isDoubleTy()) 9385 return CW_Register; 9386 9387 switch (*constraint) { 9388 default: 9389 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 9390 break; 9391 case 'b': 9392 if (type->isIntegerTy()) 9393 weight = CW_Register; 9394 break; 9395 case 'f': 9396 if (type->isFloatTy()) 9397 weight = CW_Register; 9398 break; 9399 case 'd': 9400 if (type->isDoubleTy()) 9401 weight = CW_Register; 9402 break; 9403 case 'v': 9404 if (type->isVectorTy()) 9405 weight = CW_Register; 9406 break; 9407 case 'y': 9408 weight = CW_Register; 9409 break; 9410 case 'Z': 9411 weight = CW_Memory; 9412 break; 9413 } 9414 return weight; 9415 } 9416 9417 std::pair<unsigned, const TargetRegisterClass*> 9418 PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 9419 MVT VT) const { 9420 if (Constraint.size() == 1) { 9421 // GCC RS6000 Constraint Letters 9422 switch (Constraint[0]) { 9423 case 'b': // R1-R31 9424 if (VT == MVT::i64 && Subtarget.isPPC64()) 9425 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass); 9426 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass); 9427 case 'r': // R0-R31 9428 if (VT == MVT::i64 && Subtarget.isPPC64()) 9429 return std::make_pair(0U, &PPC::G8RCRegClass); 9430 return std::make_pair(0U, &PPC::GPRCRegClass); 9431 case 'f': 9432 if (VT == MVT::f32 || VT == MVT::i32) 9433 return std::make_pair(0U, &PPC::F4RCRegClass); 9434 if (VT == MVT::f64 || VT == MVT::i64) 9435 return std::make_pair(0U, &PPC::F8RCRegClass); 9436 break; 9437 case 'v': 9438 return std::make_pair(0U, &PPC::VRRCRegClass); 9439 case 'y': // crrc 9440 return std::make_pair(0U, &PPC::CRRCRegClass); 9441 } 9442 } else if (Constraint == "wc") { // an individual CR bit. 9443 return std::make_pair(0U, &PPC::CRBITRCRegClass); 9444 } else if (Constraint == "wa" || Constraint == "wd" || 9445 Constraint == "wf") { 9446 return std::make_pair(0U, &PPC::VSRCRegClass); 9447 } else if (Constraint == "ws") { 9448 return std::make_pair(0U, &PPC::VSFRCRegClass); 9449 } 9450 9451 std::pair<unsigned, const TargetRegisterClass*> R = 9452 TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 9453 9454 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers 9455 // (which we call X[0-9]+). If a 64-bit value has been requested, and a 9456 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent 9457 // register. 9458 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use 9459 // the AsmName field from *RegisterInfo.td, then this would not be necessary. 9460 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() && 9461 PPC::GPRCRegClass.contains(R.first)) { 9462 const TargetRegisterInfo *TRI = 9463 getTargetMachine().getSubtargetImpl()->getRegisterInfo(); 9464 return std::make_pair(TRI->getMatchingSuperReg(R.first, 9465 PPC::sub_32, &PPC::G8RCRegClass), 9466 &PPC::G8RCRegClass); 9467 } 9468 9469 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same. 9470 if (!R.second && StringRef("{cc}").equals_lower(Constraint)) { 9471 R.first = PPC::CR0; 9472 R.second = &PPC::CRRCRegClass; 9473 } 9474 9475 return R; 9476 } 9477 9478 9479 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 9480 /// vector. If it is invalid, don't add anything to Ops. 9481 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 9482 std::string &Constraint, 9483 std::vector<SDValue>&Ops, 9484 SelectionDAG &DAG) const { 9485 SDValue Result; 9486 9487 // Only support length 1 constraints. 9488 if (Constraint.length() > 1) return; 9489 9490 char Letter = Constraint[0]; 9491 switch (Letter) { 9492 default: break; 9493 case 'I': 9494 case 'J': 9495 case 'K': 9496 case 'L': 9497 case 'M': 9498 case 'N': 9499 case 'O': 9500 case 'P': { 9501 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op); 9502 if (!CST) return; // Must be an immediate to match. 9503 int64_t Value = CST->getSExtValue(); 9504 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative 9505 // numbers are printed as such. 9506 switch (Letter) { 9507 default: llvm_unreachable("Unknown constraint letter!"); 9508 case 'I': // "I" is a signed 16-bit constant. 9509 if (isInt<16>(Value)) 9510 Result = DAG.getTargetConstant(Value, TCVT); 9511 break; 9512 case 'J': // "J" is a constant with only the high-order 16 bits nonzero. 9513 if (isShiftedUInt<16, 16>(Value)) 9514 Result = DAG.getTargetConstant(Value, TCVT); 9515 break; 9516 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. 9517 if (isShiftedInt<16, 16>(Value)) 9518 Result = DAG.getTargetConstant(Value, TCVT); 9519 break; 9520 case 'K': // "K" is a constant with only the low-order 16 bits nonzero. 9521 if (isUInt<16>(Value)) 9522 Result = DAG.getTargetConstant(Value, TCVT); 9523 break; 9524 case 'M': // "M" is a constant that is greater than 31. 9525 if (Value > 31) 9526 Result = DAG.getTargetConstant(Value, TCVT); 9527 break; 9528 case 'N': // "N" is a positive constant that is an exact power of two. 9529 if (Value > 0 && isPowerOf2_64(Value)) 9530 Result = DAG.getTargetConstant(Value, TCVT); 9531 break; 9532 case 'O': // "O" is the constant zero. 9533 if (Value == 0) 9534 Result = DAG.getTargetConstant(Value, TCVT); 9535 break; 9536 case 'P': // "P" is a constant whose negation is a signed 16-bit constant. 9537 if (isInt<16>(-Value)) 9538 Result = DAG.getTargetConstant(Value, TCVT); 9539 break; 9540 } 9541 break; 9542 } 9543 } 9544 9545 if (Result.getNode()) { 9546 Ops.push_back(Result); 9547 return; 9548 } 9549 9550 // Handle standard constraint letters. 9551 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 9552 } 9553 9554 // isLegalAddressingMode - Return true if the addressing mode represented 9555 // by AM is legal for this target, for a load/store of the specified type. 9556 bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, 9557 Type *Ty) const { 9558 // FIXME: PPC does not allow r+i addressing modes for vectors! 9559 9560 // PPC allows a sign-extended 16-bit immediate field. 9561 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) 9562 return false; 9563 9564 // No global is ever allowed as a base. 9565 if (AM.BaseGV) 9566 return false; 9567 9568 // PPC only support r+r, 9569 switch (AM.Scale) { 9570 case 0: // "r+i" or just "i", depending on HasBaseReg. 9571 break; 9572 case 1: 9573 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. 9574 return false; 9575 // Otherwise we have r+r or r+i. 9576 break; 9577 case 2: 9578 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. 9579 return false; 9580 // Allow 2*r as r+r. 9581 break; 9582 default: 9583 // No other scales are supported. 9584 return false; 9585 } 9586 9587 return true; 9588 } 9589 9590 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, 9591 SelectionDAG &DAG) const { 9592 MachineFunction &MF = DAG.getMachineFunction(); 9593 MachineFrameInfo *MFI = MF.getFrameInfo(); 9594 MFI->setReturnAddressIsTaken(true); 9595 9596 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 9597 return SDValue(); 9598 9599 SDLoc dl(Op); 9600 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 9601 9602 // Make sure the function does not optimize away the store of the RA to 9603 // the stack. 9604 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 9605 FuncInfo->setLRStoreRequired(); 9606 bool isPPC64 = Subtarget.isPPC64(); 9607 bool isDarwinABI = Subtarget.isDarwinABI(); 9608 9609 if (Depth > 0) { 9610 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 9611 SDValue Offset = 9612 9613 DAG.getConstant(PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI), 9614 isPPC64? MVT::i64 : MVT::i32); 9615 return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 9616 DAG.getNode(ISD::ADD, dl, getPointerTy(), 9617 FrameAddr, Offset), 9618 MachinePointerInfo(), false, false, false, 0); 9619 } 9620 9621 // Just load the return address off the stack. 9622 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); 9623 return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 9624 RetAddrFI, MachinePointerInfo(), false, false, false, 0); 9625 } 9626 9627 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, 9628 SelectionDAG &DAG) const { 9629 SDLoc dl(Op); 9630 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 9631 9632 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 9633 bool isPPC64 = PtrVT == MVT::i64; 9634 9635 MachineFunction &MF = DAG.getMachineFunction(); 9636 MachineFrameInfo *MFI = MF.getFrameInfo(); 9637 MFI->setFrameAddressIsTaken(true); 9638 9639 // Naked functions never have a frame pointer, and so we use r1. For all 9640 // other functions, this decision must be delayed until during PEI. 9641 unsigned FrameReg; 9642 if (MF.getFunction()->getAttributes().hasAttribute( 9643 AttributeSet::FunctionIndex, Attribute::Naked)) 9644 FrameReg = isPPC64 ? PPC::X1 : PPC::R1; 9645 else 9646 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP; 9647 9648 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, 9649 PtrVT); 9650 while (Depth--) 9651 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(), 9652 FrameAddr, MachinePointerInfo(), false, false, 9653 false, 0); 9654 return FrameAddr; 9655 } 9656 9657 // FIXME? Maybe this could be a TableGen attribute on some registers and 9658 // this table could be generated automatically from RegInfo. 9659 unsigned PPCTargetLowering::getRegisterByName(const char* RegName, 9660 EVT VT) const { 9661 bool isPPC64 = Subtarget.isPPC64(); 9662 bool isDarwinABI = Subtarget.isDarwinABI(); 9663 9664 if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) || 9665 (!isPPC64 && VT != MVT::i32)) 9666 report_fatal_error("Invalid register global variable type"); 9667 9668 bool is64Bit = isPPC64 && VT == MVT::i64; 9669 unsigned Reg = StringSwitch<unsigned>(RegName) 9670 .Case("r1", is64Bit ? PPC::X1 : PPC::R1) 9671 .Case("r2", isDarwinABI ? 0 : (is64Bit ? PPC::X2 : PPC::R2)) 9672 .Case("r13", (!isPPC64 && isDarwinABI) ? 0 : 9673 (is64Bit ? PPC::X13 : PPC::R13)) 9674 .Default(0); 9675 9676 if (Reg) 9677 return Reg; 9678 report_fatal_error("Invalid register name global variable"); 9679 } 9680 9681 bool 9682 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 9683 // The PowerPC target isn't yet aware of offsets. 9684 return false; 9685 } 9686 9687 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 9688 const CallInst &I, 9689 unsigned Intrinsic) const { 9690 9691 switch (Intrinsic) { 9692 case Intrinsic::ppc_altivec_lvx: 9693 case Intrinsic::ppc_altivec_lvxl: 9694 case Intrinsic::ppc_altivec_lvebx: 9695 case Intrinsic::ppc_altivec_lvehx: 9696 case Intrinsic::ppc_altivec_lvewx: 9697 case Intrinsic::ppc_vsx_lxvd2x: 9698 case Intrinsic::ppc_vsx_lxvw4x: { 9699 EVT VT; 9700 switch (Intrinsic) { 9701 case Intrinsic::ppc_altivec_lvebx: 9702 VT = MVT::i8; 9703 break; 9704 case Intrinsic::ppc_altivec_lvehx: 9705 VT = MVT::i16; 9706 break; 9707 case Intrinsic::ppc_altivec_lvewx: 9708 VT = MVT::i32; 9709 break; 9710 case Intrinsic::ppc_vsx_lxvd2x: 9711 VT = MVT::v2f64; 9712 break; 9713 default: 9714 VT = MVT::v4i32; 9715 break; 9716 } 9717 9718 Info.opc = ISD::INTRINSIC_W_CHAIN; 9719 Info.memVT = VT; 9720 Info.ptrVal = I.getArgOperand(0); 9721 Info.offset = -VT.getStoreSize()+1; 9722 Info.size = 2*VT.getStoreSize()-1; 9723 Info.align = 1; 9724 Info.vol = false; 9725 Info.readMem = true; 9726 Info.writeMem = false; 9727 return true; 9728 } 9729 case Intrinsic::ppc_altivec_stvx: 9730 case Intrinsic::ppc_altivec_stvxl: 9731 case Intrinsic::ppc_altivec_stvebx: 9732 case Intrinsic::ppc_altivec_stvehx: 9733 case Intrinsic::ppc_altivec_stvewx: 9734 case Intrinsic::ppc_vsx_stxvd2x: 9735 case Intrinsic::ppc_vsx_stxvw4x: { 9736 EVT VT; 9737 switch (Intrinsic) { 9738 case Intrinsic::ppc_altivec_stvebx: 9739 VT = MVT::i8; 9740 break; 9741 case Intrinsic::ppc_altivec_stvehx: 9742 VT = MVT::i16; 9743 break; 9744 case Intrinsic::ppc_altivec_stvewx: 9745 VT = MVT::i32; 9746 break; 9747 case Intrinsic::ppc_vsx_stxvd2x: 9748 VT = MVT::v2f64; 9749 break; 9750 default: 9751 VT = MVT::v4i32; 9752 break; 9753 } 9754 9755 Info.opc = ISD::INTRINSIC_VOID; 9756 Info.memVT = VT; 9757 Info.ptrVal = I.getArgOperand(1); 9758 Info.offset = -VT.getStoreSize()+1; 9759 Info.size = 2*VT.getStoreSize()-1; 9760 Info.align = 1; 9761 Info.vol = false; 9762 Info.readMem = false; 9763 Info.writeMem = true; 9764 return true; 9765 } 9766 default: 9767 break; 9768 } 9769 9770 return false; 9771 } 9772 9773 /// getOptimalMemOpType - Returns the target specific optimal type for load 9774 /// and store operations as a result of memset, memcpy, and memmove 9775 /// lowering. If DstAlign is zero that means it's safe to destination 9776 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 9777 /// means there isn't a need to check it against alignment requirement, 9778 /// probably because the source does not need to be loaded. If 'IsMemset' is 9779 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that 9780 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy 9781 /// source is constant so it does not need to be loaded. 9782 /// It returns EVT::Other if the type should be determined using generic 9783 /// target-independent logic. 9784 EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, 9785 unsigned DstAlign, unsigned SrcAlign, 9786 bool IsMemset, bool ZeroMemset, 9787 bool MemcpyStrSrc, 9788 MachineFunction &MF) const { 9789 if (Subtarget.isPPC64()) { 9790 return MVT::i64; 9791 } else { 9792 return MVT::i32; 9793 } 9794 } 9795 9796 /// \brief Returns true if it is beneficial to convert a load of a constant 9797 /// to just the constant itself. 9798 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, 9799 Type *Ty) const { 9800 assert(Ty->isIntegerTy()); 9801 9802 unsigned BitSize = Ty->getPrimitiveSizeInBits(); 9803 if (BitSize == 0 || BitSize > 64) 9804 return false; 9805 return true; 9806 } 9807 9808 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { 9809 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) 9810 return false; 9811 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); 9812 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); 9813 return NumBits1 == 64 && NumBits2 == 32; 9814 } 9815 9816 bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { 9817 if (!VT1.isInteger() || !VT2.isInteger()) 9818 return false; 9819 unsigned NumBits1 = VT1.getSizeInBits(); 9820 unsigned NumBits2 = VT2.getSizeInBits(); 9821 return NumBits1 == 64 && NumBits2 == 32; 9822 } 9823 9824 bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 9825 // Generally speaking, zexts are not free, but they are free when they can be 9826 // folded with other operations. 9827 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) { 9828 EVT MemVT = LD->getMemoryVT(); 9829 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 || 9830 (Subtarget.isPPC64() && MemVT == MVT::i32)) && 9831 (LD->getExtensionType() == ISD::NON_EXTLOAD || 9832 LD->getExtensionType() == ISD::ZEXTLOAD)) 9833 return true; 9834 } 9835 9836 // FIXME: Add other cases... 9837 // - 32-bit shifts with a zext to i64 9838 // - zext after ctlz, bswap, etc. 9839 // - zext after and by a constant mask 9840 9841 return TargetLowering::isZExtFree(Val, VT2); 9842 } 9843 9844 bool PPCTargetLowering::isFPExtFree(EVT VT) const { 9845 assert(VT.isFloatingPoint()); 9846 return true; 9847 } 9848 9849 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 9850 return isInt<16>(Imm) || isUInt<16>(Imm); 9851 } 9852 9853 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const { 9854 return isInt<16>(Imm) || isUInt<16>(Imm); 9855 } 9856 9857 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, 9858 unsigned, 9859 unsigned, 9860 bool *Fast) const { 9861 if (DisablePPCUnaligned) 9862 return false; 9863 9864 // PowerPC supports unaligned memory access for simple non-vector types. 9865 // Although accessing unaligned addresses is not as efficient as accessing 9866 // aligned addresses, it is generally more efficient than manual expansion, 9867 // and generally only traps for software emulation when crossing page 9868 // boundaries. 9869 9870 if (!VT.isSimple()) 9871 return false; 9872 9873 if (VT.getSimpleVT().isVector()) { 9874 if (Subtarget.hasVSX()) { 9875 if (VT != MVT::v2f64 && VT != MVT::v2i64 && 9876 VT != MVT::v4f32 && VT != MVT::v4i32) 9877 return false; 9878 } else { 9879 return false; 9880 } 9881 } 9882 9883 if (VT == MVT::ppcf128) 9884 return false; 9885 9886 if (Fast) 9887 *Fast = true; 9888 9889 return true; 9890 } 9891 9892 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { 9893 VT = VT.getScalarType(); 9894 9895 if (!VT.isSimple()) 9896 return false; 9897 9898 switch (VT.getSimpleVT().SimpleTy) { 9899 case MVT::f32: 9900 case MVT::f64: 9901 return true; 9902 default: 9903 break; 9904 } 9905 9906 return false; 9907 } 9908 9909 const MCPhysReg * 9910 PPCTargetLowering::getScratchRegisters(CallingConv::ID) const { 9911 // LR is a callee-save register, but we must treat it as clobbered by any call 9912 // site. Hence we include LR in the scratch registers, which are in turn added 9913 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies 9914 // to CTR, which is used by any indirect call. 9915 static const MCPhysReg ScratchRegs[] = { 9916 PPC::X11, PPC::X12, PPC::LR8, PPC::CTR8, 0 9917 }; 9918 9919 return ScratchRegs; 9920 } 9921 9922 bool 9923 PPCTargetLowering::shouldExpandBuildVectorWithShuffles( 9924 EVT VT , unsigned DefinedValues) const { 9925 if (VT == MVT::v2i64) 9926 return false; 9927 9928 return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues); 9929 } 9930 9931 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { 9932 if (DisableILPPref || Subtarget.enableMachineScheduler()) 9933 return TargetLowering::getSchedulingPreference(N); 9934 9935 return Sched::ILP; 9936 } 9937 9938 // Create a fast isel object. 9939 FastISel * 9940 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo, 9941 const TargetLibraryInfo *LibInfo) const { 9942 return PPC::createFastISel(FuncInfo, LibInfo); 9943 } 9944