xref: /openbsd/gnu/llvm/llvm/lib/MCA/Support.cpp (revision d415bd75)
109467b48Spatrick //===--------------------- Support.cpp --------------------------*- C++ -*-===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick /// \file
909467b48Spatrick ///
1009467b48Spatrick /// This file implements a few helper functions used by various pipeline
1109467b48Spatrick /// components.
1209467b48Spatrick ///
1309467b48Spatrick //===----------------------------------------------------------------------===//
1409467b48Spatrick 
1509467b48Spatrick #include "llvm/MCA/Support.h"
1609467b48Spatrick #include "llvm/MC/MCSchedule.h"
17*d415bd75Srobert #include <numeric>
1809467b48Spatrick 
1909467b48Spatrick namespace llvm {
2009467b48Spatrick namespace mca {
2109467b48Spatrick 
2209467b48Spatrick #define DEBUG_TYPE "llvm-mca"
2309467b48Spatrick 
operator +=(const ResourceCycles & RHS)2409467b48Spatrick ResourceCycles &ResourceCycles::operator+=(const ResourceCycles &RHS) {
2509467b48Spatrick   if (Denominator == RHS.Denominator)
2609467b48Spatrick     Numerator += RHS.Numerator;
2709467b48Spatrick   else {
2809467b48Spatrick     // Create a common denominator for LHS and RHS by calculating the least
2909467b48Spatrick     // common multiple from the GCD.
30*d415bd75Srobert     unsigned GCD = std::gcd(Denominator, RHS.Denominator);
3109467b48Spatrick     unsigned LCM = (Denominator * RHS.Denominator) / GCD;
3209467b48Spatrick     unsigned LHSNumerator = Numerator * (LCM / Denominator);
3309467b48Spatrick     unsigned RHSNumerator = RHS.Numerator * (LCM / RHS.Denominator);
3409467b48Spatrick     Numerator = LHSNumerator + RHSNumerator;
3509467b48Spatrick     Denominator = LCM;
3609467b48Spatrick   }
3709467b48Spatrick   return *this;
3809467b48Spatrick }
3909467b48Spatrick 
computeProcResourceMasks(const MCSchedModel & SM,MutableArrayRef<uint64_t> Masks)4009467b48Spatrick void computeProcResourceMasks(const MCSchedModel &SM,
4109467b48Spatrick                               MutableArrayRef<uint64_t> Masks) {
4209467b48Spatrick   unsigned ProcResourceID = 0;
4309467b48Spatrick 
4409467b48Spatrick   assert(Masks.size() == SM.getNumProcResourceKinds() &&
4509467b48Spatrick          "Invalid number of elements");
4609467b48Spatrick   // Resource at index 0 is the 'InvalidUnit'. Set an invalid mask for it.
4709467b48Spatrick   Masks[0] = 0;
4809467b48Spatrick 
4909467b48Spatrick   // Create a unique bitmask for every processor resource unit.
5009467b48Spatrick   for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
5109467b48Spatrick     const MCProcResourceDesc &Desc = *SM.getProcResource(I);
5209467b48Spatrick     if (Desc.SubUnitsIdxBegin)
5309467b48Spatrick       continue;
5409467b48Spatrick     Masks[I] = 1ULL << ProcResourceID;
5509467b48Spatrick     ProcResourceID++;
5609467b48Spatrick   }
5709467b48Spatrick 
5809467b48Spatrick   // Create a unique bitmask for every processor resource group.
5909467b48Spatrick   for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
6009467b48Spatrick     const MCProcResourceDesc &Desc = *SM.getProcResource(I);
6109467b48Spatrick     if (!Desc.SubUnitsIdxBegin)
6209467b48Spatrick       continue;
6309467b48Spatrick     Masks[I] = 1ULL << ProcResourceID;
6409467b48Spatrick     for (unsigned U = 0; U < Desc.NumUnits; ++U) {
6509467b48Spatrick       uint64_t OtherMask = Masks[Desc.SubUnitsIdxBegin[U]];
6609467b48Spatrick       Masks[I] |= OtherMask;
6709467b48Spatrick     }
6809467b48Spatrick     ProcResourceID++;
6909467b48Spatrick   }
7009467b48Spatrick 
7109467b48Spatrick #ifndef NDEBUG
7209467b48Spatrick   LLVM_DEBUG(dbgs() << "\nProcessor resource masks:"
7309467b48Spatrick                     << "\n");
7409467b48Spatrick   for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
7509467b48Spatrick     const MCProcResourceDesc &Desc = *SM.getProcResource(I);
7609467b48Spatrick     LLVM_DEBUG(dbgs() << '[' << format_decimal(I,2) << "] " << " - "
7709467b48Spatrick                       << format_hex(Masks[I],16) << " - "
7809467b48Spatrick                       << Desc.Name << '\n');
7909467b48Spatrick   }
8009467b48Spatrick #endif
8109467b48Spatrick }
8209467b48Spatrick 
computeBlockRThroughput(const MCSchedModel & SM,unsigned DispatchWidth,unsigned NumMicroOps,ArrayRef<unsigned> ProcResourceUsage)8309467b48Spatrick double computeBlockRThroughput(const MCSchedModel &SM, unsigned DispatchWidth,
8409467b48Spatrick                                unsigned NumMicroOps,
8509467b48Spatrick                                ArrayRef<unsigned> ProcResourceUsage) {
8609467b48Spatrick   // The block throughput is bounded from above by the hardware dispatch
8709467b48Spatrick   // throughput. That is because the DispatchWidth is an upper bound on the
8809467b48Spatrick   // number of opcodes that can be part of a single dispatch group.
8909467b48Spatrick   double Max = static_cast<double>(NumMicroOps) / DispatchWidth;
9009467b48Spatrick 
9109467b48Spatrick   // The block throughput is also limited by the amount of hardware parallelism.
9209467b48Spatrick   // The number of available resource units affects the resource pressure
9309467b48Spatrick   // distribution, as well as how many blocks can be executed every cycle.
9409467b48Spatrick   for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
9509467b48Spatrick     unsigned ResourceCycles = ProcResourceUsage[I];
9609467b48Spatrick     if (!ResourceCycles)
9709467b48Spatrick       continue;
9809467b48Spatrick 
9909467b48Spatrick     const MCProcResourceDesc &MCDesc = *SM.getProcResource(I);
10009467b48Spatrick     double Throughput = static_cast<double>(ResourceCycles) / MCDesc.NumUnits;
10109467b48Spatrick     Max = std::max(Max, Throughput);
10209467b48Spatrick   }
10309467b48Spatrick 
10409467b48Spatrick   // The block reciprocal throughput is computed as the MAX of:
10509467b48Spatrick   //  - (NumMicroOps / DispatchWidth)
10609467b48Spatrick   //  - (NumUnits / ResourceCycles)   for every consumed processor resource.
10709467b48Spatrick   return Max;
10809467b48Spatrick }
10909467b48Spatrick 
11009467b48Spatrick } // namespace mca
11109467b48Spatrick } // namespace llvm
112