106f32e7eSjoerg //===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===//
206f32e7eSjoerg //
306f32e7eSjoerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
406f32e7eSjoerg // See https://llvm.org/LICENSE.txt for license information.
506f32e7eSjoerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
606f32e7eSjoerg //
706f32e7eSjoerg //===----------------------------------------------------------------------===//
806f32e7eSjoerg ///
906f32e7eSjoerg /// \file
1006f32e7eSjoerg /// This file defines the WebAssembly-specific TargetTransformInfo
1106f32e7eSjoerg /// implementation.
1206f32e7eSjoerg ///
1306f32e7eSjoerg //===----------------------------------------------------------------------===//
1406f32e7eSjoerg 
1506f32e7eSjoerg #include "WebAssemblyTargetTransformInfo.h"
1606f32e7eSjoerg #include "llvm/CodeGen/CostTable.h"
1706f32e7eSjoerg #include "llvm/Support/Debug.h"
1806f32e7eSjoerg using namespace llvm;
1906f32e7eSjoerg 
2006f32e7eSjoerg #define DEBUG_TYPE "wasmtti"
2106f32e7eSjoerg 
2206f32e7eSjoerg TargetTransformInfo::PopcntSupportKind
getPopcntSupport(unsigned TyWidth) const2306f32e7eSjoerg WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const {
2406f32e7eSjoerg   assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
2506f32e7eSjoerg   return TargetTransformInfo::PSK_FastHardware;
2606f32e7eSjoerg }
2706f32e7eSjoerg 
getNumberOfRegisters(unsigned ClassID) const2806f32e7eSjoerg unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
2906f32e7eSjoerg   unsigned Result = BaseT::getNumberOfRegisters(ClassID);
3006f32e7eSjoerg 
3106f32e7eSjoerg   // For SIMD, use at least 16 registers, as a rough guess.
3206f32e7eSjoerg   bool Vector = (ClassID == 1);
3306f32e7eSjoerg   if (Vector)
3406f32e7eSjoerg     Result = std::max(Result, 16u);
3506f32e7eSjoerg 
3606f32e7eSjoerg   return Result;
3706f32e7eSjoerg }
3806f32e7eSjoerg 
getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const39*da58b97aSjoerg TypeSize WebAssemblyTTIImpl::getRegisterBitWidth(
40*da58b97aSjoerg     TargetTransformInfo::RegisterKind K) const {
41*da58b97aSjoerg   switch (K) {
42*da58b97aSjoerg   case TargetTransformInfo::RGK_Scalar:
43*da58b97aSjoerg     return TypeSize::getFixed(64);
44*da58b97aSjoerg   case TargetTransformInfo::RGK_FixedWidthVector:
45*da58b97aSjoerg     return TypeSize::getFixed(getST()->hasSIMD128() ? 128 : 64);
46*da58b97aSjoerg   case TargetTransformInfo::RGK_ScalableVector:
47*da58b97aSjoerg     return TypeSize::getScalable(0);
4806f32e7eSjoerg   }
4906f32e7eSjoerg 
50*da58b97aSjoerg   llvm_unreachable("Unsupported register kind");
51*da58b97aSjoerg }
5206f32e7eSjoerg 
getArithmeticInstrCost(unsigned Opcode,Type * Ty,TTI::TargetCostKind CostKind,TTI::OperandValueKind Opd1Info,TTI::OperandValueKind Opd2Info,TTI::OperandValueProperties Opd1PropInfo,TTI::OperandValueProperties Opd2PropInfo,ArrayRef<const Value * > Args,const Instruction * CxtI)53*da58b97aSjoerg InstructionCost WebAssemblyTTIImpl::getArithmeticInstrCost(
54*da58b97aSjoerg     unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
55*da58b97aSjoerg     TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info,
56*da58b97aSjoerg     TTI::OperandValueProperties Opd1PropInfo,
57*da58b97aSjoerg     TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
58*da58b97aSjoerg     const Instruction *CxtI) {
59*da58b97aSjoerg 
60*da58b97aSjoerg   InstructionCost Cost =
61*da58b97aSjoerg       BasicTTIImplBase<WebAssemblyTTIImpl>::getArithmeticInstrCost(
62*da58b97aSjoerg           Opcode, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo);
6306f32e7eSjoerg 
6406f32e7eSjoerg   if (auto *VTy = dyn_cast<VectorType>(Ty)) {
6506f32e7eSjoerg     switch (Opcode) {
6606f32e7eSjoerg     case Instruction::LShr:
6706f32e7eSjoerg     case Instruction::AShr:
6806f32e7eSjoerg     case Instruction::Shl:
6906f32e7eSjoerg       // SIMD128's shifts currently only accept a scalar shift count. For each
7006f32e7eSjoerg       // element, we'll need to extract, op, insert. The following is a rough
7106f32e7eSjoerg       // approxmation.
7206f32e7eSjoerg       if (Opd2Info != TTI::OK_UniformValue &&
7306f32e7eSjoerg           Opd2Info != TTI::OK_UniformConstantValue)
74*da58b97aSjoerg         Cost =
75*da58b97aSjoerg             cast<FixedVectorType>(VTy)->getNumElements() *
7606f32e7eSjoerg             (TargetTransformInfo::TCC_Basic +
77*da58b97aSjoerg              getArithmeticInstrCost(Opcode, VTy->getElementType(), CostKind) +
7806f32e7eSjoerg              TargetTransformInfo::TCC_Basic);
7906f32e7eSjoerg       break;
8006f32e7eSjoerg     }
8106f32e7eSjoerg   }
8206f32e7eSjoerg   return Cost;
8306f32e7eSjoerg }
8406f32e7eSjoerg 
getVectorInstrCost(unsigned Opcode,Type * Val,unsigned Index)85*da58b97aSjoerg InstructionCost WebAssemblyTTIImpl::getVectorInstrCost(unsigned Opcode,
86*da58b97aSjoerg                                                        Type *Val,
8706f32e7eSjoerg                                                        unsigned Index) {
88*da58b97aSjoerg   InstructionCost Cost =
89*da58b97aSjoerg       BasicTTIImplBase::getVectorInstrCost(Opcode, Val, Index);
9006f32e7eSjoerg 
9106f32e7eSjoerg   // SIMD128's insert/extract currently only take constant indices.
9206f32e7eSjoerg   if (Index == -1u)
9306f32e7eSjoerg     return Cost + 25 * TargetTransformInfo::TCC_Expensive;
9406f32e7eSjoerg 
9506f32e7eSjoerg   return Cost;
9606f32e7eSjoerg }
97*da58b97aSjoerg 
areInlineCompatible(const Function * Caller,const Function * Callee) const98*da58b97aSjoerg bool WebAssemblyTTIImpl::areInlineCompatible(const Function *Caller,
99*da58b97aSjoerg                                              const Function *Callee) const {
100*da58b97aSjoerg   // Allow inlining only when the Callee has a subset of the Caller's
101*da58b97aSjoerg   // features. In principle, we should be able to inline regardless of any
102*da58b97aSjoerg   // features because WebAssembly supports features at module granularity, not
103*da58b97aSjoerg   // function granularity, but without this restriction it would be possible for
104*da58b97aSjoerg   // a module to "forget" about features if all the functions that used them
105*da58b97aSjoerg   // were inlined.
106*da58b97aSjoerg   const TargetMachine &TM = getTLI()->getTargetMachine();
107*da58b97aSjoerg 
108*da58b97aSjoerg   const FeatureBitset &CallerBits =
109*da58b97aSjoerg       TM.getSubtargetImpl(*Caller)->getFeatureBits();
110*da58b97aSjoerg   const FeatureBitset &CalleeBits =
111*da58b97aSjoerg       TM.getSubtargetImpl(*Callee)->getFeatureBits();
112*da58b97aSjoerg 
113*da58b97aSjoerg   return (CallerBits & CalleeBits) == CalleeBits;
114*da58b97aSjoerg }
115*da58b97aSjoerg 
getUnrollingPreferences(Loop * L,ScalarEvolution & SE,TTI::UnrollingPreferences & UP) const116*da58b97aSjoerg void WebAssemblyTTIImpl::getUnrollingPreferences(
117*da58b97aSjoerg   Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP) const {
118*da58b97aSjoerg   // Scan the loop: don't unroll loops with calls. This is a standard approach
119*da58b97aSjoerg   // for most (all?) targets.
120*da58b97aSjoerg   for (BasicBlock *BB : L->blocks())
121*da58b97aSjoerg     for (Instruction &I : *BB)
122*da58b97aSjoerg       if (isa<CallInst>(I) || isa<InvokeInst>(I))
123*da58b97aSjoerg         if (const Function *F = cast<CallBase>(I).getCalledFunction())
124*da58b97aSjoerg           if (isLoweredToCall(F))
125*da58b97aSjoerg             return;
126*da58b97aSjoerg 
127*da58b97aSjoerg   // The chosen threshold is within the range of 'LoopMicroOpBufferSize' of
128*da58b97aSjoerg   // the various microarchitectures that use the BasicTTI implementation and
129*da58b97aSjoerg   // has been selected through heuristics across multiple cores and runtimes.
130*da58b97aSjoerg   UP.Partial = UP.Runtime = UP.UpperBound = true;
131*da58b97aSjoerg   UP.PartialThreshold = 30;
132*da58b97aSjoerg 
133*da58b97aSjoerg   // Avoid unrolling when optimizing for size.
134*da58b97aSjoerg   UP.OptSizeThreshold = 0;
135*da58b97aSjoerg   UP.PartialOptSizeThreshold = 0;
136*da58b97aSjoerg 
137*da58b97aSjoerg   // Set number of instructions optimized when "back edge"
138*da58b97aSjoerg   // becomes "fall through" to default value of 2.
139*da58b97aSjoerg   UP.BEInsns = 2;
140*da58b97aSjoerg }
141