10b57cec5SDimitry Andric //===- SIMemoryLegalizer.cpp ----------------------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric /// Memory legalizer - implements memory model. More information can be
110b57cec5SDimitry Andric /// found here:
120b57cec5SDimitry Andric ///   http://llvm.org/docs/AMDGPUUsage.html#memory-model
130b57cec5SDimitry Andric //
140b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
150b57cec5SDimitry Andric 
160b57cec5SDimitry Andric #include "AMDGPU.h"
170b57cec5SDimitry Andric #include "AMDGPUMachineModuleInfo.h"
18e8d8bef9SDimitry Andric #include "GCNSubtarget.h"
190b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
200b57cec5SDimitry Andric #include "llvm/ADT/BitmaskEnum.h"
210b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
2281ad6265SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
230b57cec5SDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
240b57cec5SDimitry Andric #include "llvm/Support/AtomicOrdering.h"
2506c3fb27SDimitry Andric #include "llvm/TargetParser/TargetParser.h"
260b57cec5SDimitry Andric 
270b57cec5SDimitry Andric using namespace llvm;
280b57cec5SDimitry Andric using namespace llvm::AMDGPU;
290b57cec5SDimitry Andric 
300b57cec5SDimitry Andric #define DEBUG_TYPE "si-memory-legalizer"
310b57cec5SDimitry Andric #define PASS_NAME "SI Memory Legalizer"
320b57cec5SDimitry Andric 
33e8d8bef9SDimitry Andric static cl::opt<bool> AmdgcnSkipCacheInvalidations(
34e8d8bef9SDimitry Andric     "amdgcn-skip-cache-invalidations", cl::init(false), cl::Hidden,
35e8d8bef9SDimitry Andric     cl::desc("Use this to skip inserting cache invalidating instructions."));
36e8d8bef9SDimitry Andric 
370b57cec5SDimitry Andric namespace {
380b57cec5SDimitry Andric 
390b57cec5SDimitry Andric LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
400b57cec5SDimitry Andric 
410b57cec5SDimitry Andric /// Memory operation flags. Can be ORed together.
420b57cec5SDimitry Andric enum class SIMemOp {
430b57cec5SDimitry Andric   NONE = 0u,
440b57cec5SDimitry Andric   LOAD = 1u << 0,
450b57cec5SDimitry Andric   STORE = 1u << 1,
460b57cec5SDimitry Andric   LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ STORE)
470b57cec5SDimitry Andric };
480b57cec5SDimitry Andric 
490b57cec5SDimitry Andric /// Position to insert a new instruction relative to an existing
500b57cec5SDimitry Andric /// instruction.
510b57cec5SDimitry Andric enum class Position {
520b57cec5SDimitry Andric   BEFORE,
530b57cec5SDimitry Andric   AFTER
540b57cec5SDimitry Andric };
550b57cec5SDimitry Andric 
560b57cec5SDimitry Andric /// The atomic synchronization scopes supported by the AMDGPU target.
570b57cec5SDimitry Andric enum class SIAtomicScope {
580b57cec5SDimitry Andric   NONE,
590b57cec5SDimitry Andric   SINGLETHREAD,
600b57cec5SDimitry Andric   WAVEFRONT,
610b57cec5SDimitry Andric   WORKGROUP,
620b57cec5SDimitry Andric   AGENT,
630b57cec5SDimitry Andric   SYSTEM
640b57cec5SDimitry Andric };
650b57cec5SDimitry Andric 
660b57cec5SDimitry Andric /// The distinct address spaces supported by the AMDGPU target for
6781ad6265SDimitry Andric /// atomic memory operation. Can be ORed together.
680b57cec5SDimitry Andric enum class SIAtomicAddrSpace {
690b57cec5SDimitry Andric   NONE = 0u,
700b57cec5SDimitry Andric   GLOBAL = 1u << 0,
710b57cec5SDimitry Andric   LDS = 1u << 1,
720b57cec5SDimitry Andric   SCRATCH = 1u << 2,
730b57cec5SDimitry Andric   GDS = 1u << 3,
740b57cec5SDimitry Andric   OTHER = 1u << 4,
750b57cec5SDimitry Andric 
760b57cec5SDimitry Andric   /// The address spaces that can be accessed by a FLAT instruction.
770b57cec5SDimitry Andric   FLAT = GLOBAL | LDS | SCRATCH,
780b57cec5SDimitry Andric 
790b57cec5SDimitry Andric   /// The address spaces that support atomic instructions.
800b57cec5SDimitry Andric   ATOMIC = GLOBAL | LDS | SCRATCH | GDS,
810b57cec5SDimitry Andric 
820b57cec5SDimitry Andric   /// All address spaces.
830b57cec5SDimitry Andric   ALL = GLOBAL | LDS | SCRATCH | GDS | OTHER,
840b57cec5SDimitry Andric 
850b57cec5SDimitry Andric   LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ ALL)
860b57cec5SDimitry Andric };
870b57cec5SDimitry Andric 
880b57cec5SDimitry Andric class SIMemOpInfo final {
890b57cec5SDimitry Andric private:
900b57cec5SDimitry Andric 
910b57cec5SDimitry Andric   friend class SIMemOpAccess;
920b57cec5SDimitry Andric 
930b57cec5SDimitry Andric   AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
940b57cec5SDimitry Andric   AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
950b57cec5SDimitry Andric   SIAtomicScope Scope = SIAtomicScope::SYSTEM;
960b57cec5SDimitry Andric   SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
970b57cec5SDimitry Andric   SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
980b57cec5SDimitry Andric   bool IsCrossAddressSpaceOrdering = false;
99e8d8bef9SDimitry Andric   bool IsVolatile = false;
1000b57cec5SDimitry Andric   bool IsNonTemporal = false;
1010b57cec5SDimitry Andric 
SIMemOpInfo(AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent,SIAtomicScope Scope=SIAtomicScope::SYSTEM,SIAtomicAddrSpace OrderingAddrSpace=SIAtomicAddrSpace::ATOMIC,SIAtomicAddrSpace InstrAddrSpace=SIAtomicAddrSpace::ALL,bool IsCrossAddressSpaceOrdering=true,AtomicOrdering FailureOrdering=AtomicOrdering::SequentiallyConsistent,bool IsVolatile=false,bool IsNonTemporal=false)1020b57cec5SDimitry Andric   SIMemOpInfo(AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent,
1030b57cec5SDimitry Andric               SIAtomicScope Scope = SIAtomicScope::SYSTEM,
1040b57cec5SDimitry Andric               SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC,
1050b57cec5SDimitry Andric               SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::ALL,
1060b57cec5SDimitry Andric               bool IsCrossAddressSpaceOrdering = true,
1070b57cec5SDimitry Andric               AtomicOrdering FailureOrdering =
1080b57cec5SDimitry Andric                 AtomicOrdering::SequentiallyConsistent,
109e8d8bef9SDimitry Andric               bool IsVolatile = false,
1100b57cec5SDimitry Andric               bool IsNonTemporal = false)
1110b57cec5SDimitry Andric     : Ordering(Ordering), FailureOrdering(FailureOrdering),
1120b57cec5SDimitry Andric       Scope(Scope), OrderingAddrSpace(OrderingAddrSpace),
1130b57cec5SDimitry Andric       InstrAddrSpace(InstrAddrSpace),
1140b57cec5SDimitry Andric       IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
115e8d8bef9SDimitry Andric       IsVolatile(IsVolatile),
1160b57cec5SDimitry Andric       IsNonTemporal(IsNonTemporal) {
117fe6060f1SDimitry Andric 
118fe6060f1SDimitry Andric     if (Ordering == AtomicOrdering::NotAtomic) {
119fe6060f1SDimitry Andric       assert(Scope == SIAtomicScope::NONE &&
120fe6060f1SDimitry Andric              OrderingAddrSpace == SIAtomicAddrSpace::NONE &&
121fe6060f1SDimitry Andric              !IsCrossAddressSpaceOrdering &&
122fe6060f1SDimitry Andric              FailureOrdering == AtomicOrdering::NotAtomic);
123fe6060f1SDimitry Andric       return;
124fe6060f1SDimitry Andric     }
125fe6060f1SDimitry Andric 
126fe6060f1SDimitry Andric     assert(Scope != SIAtomicScope::NONE &&
127fe6060f1SDimitry Andric            (OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
128fe6060f1SDimitry Andric                SIAtomicAddrSpace::NONE &&
129fe6060f1SDimitry Andric            (InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
130349cc55cSDimitry Andric                SIAtomicAddrSpace::NONE);
131fe6060f1SDimitry Andric 
1320b57cec5SDimitry Andric     // There is also no cross address space ordering if the ordering
1330b57cec5SDimitry Andric     // address space is the same as the instruction address space and
1340b57cec5SDimitry Andric     // only contains a single address space.
1350b57cec5SDimitry Andric     if ((OrderingAddrSpace == InstrAddrSpace) &&
1360b57cec5SDimitry Andric         isPowerOf2_32(uint32_t(InstrAddrSpace)))
1370b57cec5SDimitry Andric       this->IsCrossAddressSpaceOrdering = false;
138fe6060f1SDimitry Andric 
139fe6060f1SDimitry Andric     // Limit the scope to the maximum supported by the instruction's address
140fe6060f1SDimitry Andric     // spaces.
141fe6060f1SDimitry Andric     if ((InstrAddrSpace & ~SIAtomicAddrSpace::SCRATCH) ==
142fe6060f1SDimitry Andric         SIAtomicAddrSpace::NONE) {
143fe6060f1SDimitry Andric       this->Scope = std::min(Scope, SIAtomicScope::SINGLETHREAD);
144fe6060f1SDimitry Andric     } else if ((InstrAddrSpace &
145fe6060f1SDimitry Andric                 ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS)) ==
146fe6060f1SDimitry Andric                SIAtomicAddrSpace::NONE) {
147fe6060f1SDimitry Andric       this->Scope = std::min(Scope, SIAtomicScope::WORKGROUP);
148fe6060f1SDimitry Andric     } else if ((InstrAddrSpace &
149fe6060f1SDimitry Andric                 ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS |
150fe6060f1SDimitry Andric                   SIAtomicAddrSpace::GDS)) == SIAtomicAddrSpace::NONE) {
151fe6060f1SDimitry Andric       this->Scope = std::min(Scope, SIAtomicScope::AGENT);
152fe6060f1SDimitry Andric     }
1530b57cec5SDimitry Andric   }
1540b57cec5SDimitry Andric 
1550b57cec5SDimitry Andric public:
1560b57cec5SDimitry Andric   /// \returns Atomic synchronization scope of the machine instruction used to
1570b57cec5SDimitry Andric   /// create this SIMemOpInfo.
getScope() const1580b57cec5SDimitry Andric   SIAtomicScope getScope() const {
1590b57cec5SDimitry Andric     return Scope;
1600b57cec5SDimitry Andric   }
1610b57cec5SDimitry Andric 
1620b57cec5SDimitry Andric   /// \returns Ordering constraint of the machine instruction used to
1630b57cec5SDimitry Andric   /// create this SIMemOpInfo.
getOrdering() const1640b57cec5SDimitry Andric   AtomicOrdering getOrdering() const {
1650b57cec5SDimitry Andric     return Ordering;
1660b57cec5SDimitry Andric   }
1670b57cec5SDimitry Andric 
1680b57cec5SDimitry Andric   /// \returns Failure ordering constraint of the machine instruction used to
1690b57cec5SDimitry Andric   /// create this SIMemOpInfo.
getFailureOrdering() const1700b57cec5SDimitry Andric   AtomicOrdering getFailureOrdering() const {
1710b57cec5SDimitry Andric     return FailureOrdering;
1720b57cec5SDimitry Andric   }
1730b57cec5SDimitry Andric 
1740b57cec5SDimitry Andric   /// \returns The address spaces be accessed by the machine
175bdd1243dSDimitry Andric   /// instruction used to create this SIMemOpInfo.
getInstrAddrSpace() const1760b57cec5SDimitry Andric   SIAtomicAddrSpace getInstrAddrSpace() const {
1770b57cec5SDimitry Andric     return InstrAddrSpace;
1780b57cec5SDimitry Andric   }
1790b57cec5SDimitry Andric 
1800b57cec5SDimitry Andric   /// \returns The address spaces that must be ordered by the machine
181bdd1243dSDimitry Andric   /// instruction used to create this SIMemOpInfo.
getOrderingAddrSpace() const1820b57cec5SDimitry Andric   SIAtomicAddrSpace getOrderingAddrSpace() const {
1830b57cec5SDimitry Andric     return OrderingAddrSpace;
1840b57cec5SDimitry Andric   }
1850b57cec5SDimitry Andric 
1860b57cec5SDimitry Andric   /// \returns Return true iff memory ordering of operations on
1870b57cec5SDimitry Andric   /// different address spaces is required.
getIsCrossAddressSpaceOrdering() const1880b57cec5SDimitry Andric   bool getIsCrossAddressSpaceOrdering() const {
1890b57cec5SDimitry Andric     return IsCrossAddressSpaceOrdering;
1900b57cec5SDimitry Andric   }
1910b57cec5SDimitry Andric 
1920b57cec5SDimitry Andric   /// \returns True if memory access of the machine instruction used to
193e8d8bef9SDimitry Andric   /// create this SIMemOpInfo is volatile, false otherwise.
isVolatile() const194e8d8bef9SDimitry Andric   bool isVolatile() const {
195e8d8bef9SDimitry Andric     return IsVolatile;
196e8d8bef9SDimitry Andric   }
197e8d8bef9SDimitry Andric 
198e8d8bef9SDimitry Andric   /// \returns True if memory access of the machine instruction used to
199e8d8bef9SDimitry Andric   /// create this SIMemOpInfo is nontemporal, false otherwise.
isNonTemporal() const2000b57cec5SDimitry Andric   bool isNonTemporal() const {
2010b57cec5SDimitry Andric     return IsNonTemporal;
2020b57cec5SDimitry Andric   }
2030b57cec5SDimitry Andric 
2040b57cec5SDimitry Andric   /// \returns True if ordering constraint of the machine instruction used to
2050b57cec5SDimitry Andric   /// create this SIMemOpInfo is unordered or higher, false otherwise.
isAtomic() const2060b57cec5SDimitry Andric   bool isAtomic() const {
2070b57cec5SDimitry Andric     return Ordering != AtomicOrdering::NotAtomic;
2080b57cec5SDimitry Andric   }
2090b57cec5SDimitry Andric 
2100b57cec5SDimitry Andric };
2110b57cec5SDimitry Andric 
2120b57cec5SDimitry Andric class SIMemOpAccess final {
2130b57cec5SDimitry Andric private:
2140b57cec5SDimitry Andric   AMDGPUMachineModuleInfo *MMI = nullptr;
2150b57cec5SDimitry Andric 
2160b57cec5SDimitry Andric   /// Reports unsupported message \p Msg for \p MI to LLVM context.
2170b57cec5SDimitry Andric   void reportUnsupported(const MachineBasicBlock::iterator &MI,
2180b57cec5SDimitry Andric                          const char *Msg) const;
2190b57cec5SDimitry Andric 
220fe6060f1SDimitry Andric   /// Inspects the target synchronization scope \p SSID and determines
2210b57cec5SDimitry Andric   /// the SI atomic scope it corresponds to, the address spaces it
2220b57cec5SDimitry Andric   /// covers, and whether the memory ordering applies between address
2230b57cec5SDimitry Andric   /// spaces.
224bdd1243dSDimitry Andric   std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
225fe6060f1SDimitry Andric   toSIAtomicScope(SyncScope::ID SSID, SIAtomicAddrSpace InstrAddrSpace) const;
2260b57cec5SDimitry Andric 
2270b57cec5SDimitry Andric   /// \return Return a bit set of the address spaces accessed by \p AS.
2280b57cec5SDimitry Andric   SIAtomicAddrSpace toSIAtomicAddrSpace(unsigned AS) const;
2290b57cec5SDimitry Andric 
2300b57cec5SDimitry Andric   /// \returns Info constructed from \p MI, which has at least machine memory
2310b57cec5SDimitry Andric   /// operand.
232bdd1243dSDimitry Andric   std::optional<SIMemOpInfo>
233bdd1243dSDimitry Andric   constructFromMIWithMMO(const MachineBasicBlock::iterator &MI) const;
2340b57cec5SDimitry Andric 
2350b57cec5SDimitry Andric public:
2360b57cec5SDimitry Andric   /// Construct class to support accessing the machine memory operands
2370b57cec5SDimitry Andric   /// of instructions in the machine function \p MF.
2380b57cec5SDimitry Andric   SIMemOpAccess(MachineFunction &MF);
2390b57cec5SDimitry Andric 
240bdd1243dSDimitry Andric   /// \returns Load info if \p MI is a load operation, "std::nullopt" otherwise.
241bdd1243dSDimitry Andric   std::optional<SIMemOpInfo>
242bdd1243dSDimitry Andric   getLoadInfo(const MachineBasicBlock::iterator &MI) const;
2430b57cec5SDimitry Andric 
244bdd1243dSDimitry Andric   /// \returns Store info if \p MI is a store operation, "std::nullopt"
245bdd1243dSDimitry Andric   /// otherwise.
246bdd1243dSDimitry Andric   std::optional<SIMemOpInfo>
247bdd1243dSDimitry Andric   getStoreInfo(const MachineBasicBlock::iterator &MI) const;
2480b57cec5SDimitry Andric 
2490b57cec5SDimitry Andric   /// \returns Atomic fence info if \p MI is an atomic fence operation,
250bdd1243dSDimitry Andric   /// "std::nullopt" otherwise.
251bdd1243dSDimitry Andric   std::optional<SIMemOpInfo>
252bdd1243dSDimitry Andric   getAtomicFenceInfo(const MachineBasicBlock::iterator &MI) const;
2530b57cec5SDimitry Andric 
2540b57cec5SDimitry Andric   /// \returns Atomic cmpxchg/rmw info if \p MI is an atomic cmpxchg or
255bdd1243dSDimitry Andric   /// rmw operation, "std::nullopt" otherwise.
256bdd1243dSDimitry Andric   std::optional<SIMemOpInfo>
257bdd1243dSDimitry Andric   getAtomicCmpxchgOrRmwInfo(const MachineBasicBlock::iterator &MI) const;
2580b57cec5SDimitry Andric };
2590b57cec5SDimitry Andric 
2600b57cec5SDimitry Andric class SICacheControl {
2610b57cec5SDimitry Andric protected:
2620b57cec5SDimitry Andric 
263e8d8bef9SDimitry Andric   /// AMDGPU subtarget info.
264e8d8bef9SDimitry Andric   const GCNSubtarget &ST;
265e8d8bef9SDimitry Andric 
2660b57cec5SDimitry Andric   /// Instruction info.
2670b57cec5SDimitry Andric   const SIInstrInfo *TII = nullptr;
2680b57cec5SDimitry Andric 
2690b57cec5SDimitry Andric   IsaVersion IV;
2700b57cec5SDimitry Andric 
271e8d8bef9SDimitry Andric   /// Whether to insert cache invalidating instructions.
2725ffd83dbSDimitry Andric   bool InsertCacheInv;
2735ffd83dbSDimitry Andric 
2740b57cec5SDimitry Andric   SICacheControl(const GCNSubtarget &ST);
2750b57cec5SDimitry Andric 
276fe6060f1SDimitry Andric   /// Sets named bit \p BitName to "true" if present in instruction \p MI.
277fe6060f1SDimitry Andric   /// \returns Returns true if \p MI is modified, false otherwise.
278fe6060f1SDimitry Andric   bool enableNamedBit(const MachineBasicBlock::iterator MI,
279fe6060f1SDimitry Andric                       AMDGPU::CPol::CPol Bit) const;
280fe6060f1SDimitry Andric 
2810b57cec5SDimitry Andric public:
2820b57cec5SDimitry Andric 
2830b57cec5SDimitry Andric   /// Create a cache control for the subtarget \p ST.
2840b57cec5SDimitry Andric   static std::unique_ptr<SICacheControl> create(const GCNSubtarget &ST);
2850b57cec5SDimitry Andric 
2860b57cec5SDimitry Andric   /// Update \p MI memory load instruction to bypass any caches up to
2870b57cec5SDimitry Andric   /// the \p Scope memory scope for address spaces \p
2880b57cec5SDimitry Andric   /// AddrSpace. Return true iff the instruction was modified.
2890b57cec5SDimitry Andric   virtual bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
2900b57cec5SDimitry Andric                                      SIAtomicScope Scope,
2910b57cec5SDimitry Andric                                      SIAtomicAddrSpace AddrSpace) const = 0;
2920b57cec5SDimitry Andric 
293fe6060f1SDimitry Andric   /// Update \p MI memory store instruction to bypass any caches up to
294fe6060f1SDimitry Andric   /// the \p Scope memory scope for address spaces \p
295fe6060f1SDimitry Andric   /// AddrSpace. Return true iff the instruction was modified.
296fe6060f1SDimitry Andric   virtual bool enableStoreCacheBypass(const MachineBasicBlock::iterator &MI,
297fe6060f1SDimitry Andric                                       SIAtomicScope Scope,
298fe6060f1SDimitry Andric                                       SIAtomicAddrSpace AddrSpace) const = 0;
299fe6060f1SDimitry Andric 
300fe6060f1SDimitry Andric   /// Update \p MI memory read-modify-write instruction to bypass any caches up
301fe6060f1SDimitry Andric   /// to the \p Scope memory scope for address spaces \p AddrSpace. Return true
302fe6060f1SDimitry Andric   /// iff the instruction was modified.
303fe6060f1SDimitry Andric   virtual bool enableRMWCacheBypass(const MachineBasicBlock::iterator &MI,
304fe6060f1SDimitry Andric                                     SIAtomicScope Scope,
305fe6060f1SDimitry Andric                                     SIAtomicAddrSpace AddrSpace) const = 0;
306fe6060f1SDimitry Andric 
307e8d8bef9SDimitry Andric   /// Update \p MI memory instruction of kind \p Op associated with address
308e8d8bef9SDimitry Andric   /// spaces \p AddrSpace to indicate it is volatile and/or nontemporal. Return
309e8d8bef9SDimitry Andric   /// true iff the instruction was modified.
310e8d8bef9SDimitry Andric   virtual bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
3110b57cec5SDimitry Andric                                               SIAtomicAddrSpace AddrSpace,
312e8d8bef9SDimitry Andric                                               SIMemOp Op, bool IsVolatile,
313e8d8bef9SDimitry Andric                                               bool IsNonTemporal) const = 0;
3140b57cec5SDimitry Andric 
3150b57cec5SDimitry Andric   /// Inserts any necessary instructions at position \p Pos relative
316e8d8bef9SDimitry Andric   /// to instruction \p MI to ensure memory instructions before \p Pos of kind
317e8d8bef9SDimitry Andric   /// \p Op associated with address spaces \p AddrSpace have completed. Used
318e8d8bef9SDimitry Andric   /// between memory instructions to enforce the order they become visible as
319e8d8bef9SDimitry Andric   /// observed by other memory instructions executing in memory scope \p Scope.
320e8d8bef9SDimitry Andric   /// \p IsCrossAddrSpaceOrdering indicates if the memory ordering is between
321e8d8bef9SDimitry Andric   /// address spaces. Returns true iff any instructions inserted.
3220b57cec5SDimitry Andric   virtual bool insertWait(MachineBasicBlock::iterator &MI,
3230b57cec5SDimitry Andric                           SIAtomicScope Scope,
3240b57cec5SDimitry Andric                           SIAtomicAddrSpace AddrSpace,
3250b57cec5SDimitry Andric                           SIMemOp Op,
3260b57cec5SDimitry Andric                           bool IsCrossAddrSpaceOrdering,
3270b57cec5SDimitry Andric                           Position Pos) const = 0;
3280b57cec5SDimitry Andric 
329e8d8bef9SDimitry Andric   /// Inserts any necessary instructions at position \p Pos relative to
330e8d8bef9SDimitry Andric   /// instruction \p MI to ensure any subsequent memory instructions of this
331e8d8bef9SDimitry Andric   /// thread with address spaces \p AddrSpace will observe the previous memory
332e8d8bef9SDimitry Andric   /// operations by any thread for memory scopes up to memory scope \p Scope .
333e8d8bef9SDimitry Andric   /// Returns true iff any instructions inserted.
334e8d8bef9SDimitry Andric   virtual bool insertAcquire(MachineBasicBlock::iterator &MI,
335e8d8bef9SDimitry Andric                              SIAtomicScope Scope,
336e8d8bef9SDimitry Andric                              SIAtomicAddrSpace AddrSpace,
337e8d8bef9SDimitry Andric                              Position Pos) const = 0;
338e8d8bef9SDimitry Andric 
339e8d8bef9SDimitry Andric   /// Inserts any necessary instructions at position \p Pos relative to
340e8d8bef9SDimitry Andric   /// instruction \p MI to ensure previous memory instructions by this thread
341e8d8bef9SDimitry Andric   /// with address spaces \p AddrSpace have completed and can be observed by
342e8d8bef9SDimitry Andric   /// subsequent memory instructions by any thread executing in memory scope \p
343e8d8bef9SDimitry Andric   /// Scope. \p IsCrossAddrSpaceOrdering indicates if the memory ordering is
344e8d8bef9SDimitry Andric   /// between address spaces. Returns true iff any instructions inserted.
345e8d8bef9SDimitry Andric   virtual bool insertRelease(MachineBasicBlock::iterator &MI,
346e8d8bef9SDimitry Andric                              SIAtomicScope Scope,
347e8d8bef9SDimitry Andric                              SIAtomicAddrSpace AddrSpace,
348e8d8bef9SDimitry Andric                              bool IsCrossAddrSpaceOrdering,
349e8d8bef9SDimitry Andric                              Position Pos) const = 0;
350e8d8bef9SDimitry Andric 
3510b57cec5SDimitry Andric   /// Virtual destructor to allow derivations to be deleted.
3520b57cec5SDimitry Andric   virtual ~SICacheControl() = default;
3530b57cec5SDimitry Andric 
tryForceStoreSC0SC1(const SIMemOpInfo & MOI,MachineBasicBlock::iterator & MI) const35406c3fb27SDimitry Andric   virtual bool tryForceStoreSC0SC1(const SIMemOpInfo &MOI,
35506c3fb27SDimitry Andric                                    MachineBasicBlock::iterator &MI) const {
35606c3fb27SDimitry Andric     return false;
35706c3fb27SDimitry Andric   }
3580b57cec5SDimitry Andric };
3590b57cec5SDimitry Andric 
3600b57cec5SDimitry Andric class SIGfx6CacheControl : public SICacheControl {
3610b57cec5SDimitry Andric protected:
3620b57cec5SDimitry Andric 
3630b57cec5SDimitry Andric   /// Sets GLC bit to "true" if present in \p MI. Returns true if \p MI
3640b57cec5SDimitry Andric   /// is modified, false otherwise.
enableGLCBit(const MachineBasicBlock::iterator & MI) const3650b57cec5SDimitry Andric   bool enableGLCBit(const MachineBasicBlock::iterator &MI) const {
366fe6060f1SDimitry Andric     return enableNamedBit(MI, AMDGPU::CPol::GLC);
3670b57cec5SDimitry Andric   }
3680b57cec5SDimitry Andric 
3690b57cec5SDimitry Andric   /// Sets SLC bit to "true" if present in \p MI. Returns true if \p MI
3700b57cec5SDimitry Andric   /// is modified, false otherwise.
enableSLCBit(const MachineBasicBlock::iterator & MI) const3710b57cec5SDimitry Andric   bool enableSLCBit(const MachineBasicBlock::iterator &MI) const {
372fe6060f1SDimitry Andric     return enableNamedBit(MI, AMDGPU::CPol::SLC);
3730b57cec5SDimitry Andric   }
3740b57cec5SDimitry Andric 
3750b57cec5SDimitry Andric public:
3760b57cec5SDimitry Andric 
SIGfx6CacheControl(const GCNSubtarget & ST)377349cc55cSDimitry Andric   SIGfx6CacheControl(const GCNSubtarget &ST) : SICacheControl(ST) {}
3780b57cec5SDimitry Andric 
3790b57cec5SDimitry Andric   bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
3800b57cec5SDimitry Andric                              SIAtomicScope Scope,
3810b57cec5SDimitry Andric                              SIAtomicAddrSpace AddrSpace) const override;
3820b57cec5SDimitry Andric 
383fe6060f1SDimitry Andric   bool enableStoreCacheBypass(const MachineBasicBlock::iterator &MI,
384fe6060f1SDimitry Andric                               SIAtomicScope Scope,
385fe6060f1SDimitry Andric                               SIAtomicAddrSpace AddrSpace) const override;
386fe6060f1SDimitry Andric 
387fe6060f1SDimitry Andric   bool enableRMWCacheBypass(const MachineBasicBlock::iterator &MI,
388fe6060f1SDimitry Andric                             SIAtomicScope Scope,
389fe6060f1SDimitry Andric                             SIAtomicAddrSpace AddrSpace) const override;
390fe6060f1SDimitry Andric 
391e8d8bef9SDimitry Andric   bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
392e8d8bef9SDimitry Andric                                       SIAtomicAddrSpace AddrSpace, SIMemOp Op,
393e8d8bef9SDimitry Andric                                       bool IsVolatile,
394e8d8bef9SDimitry Andric                                       bool IsNonTemporal) const override;
3950b57cec5SDimitry Andric 
3960b57cec5SDimitry Andric   bool insertWait(MachineBasicBlock::iterator &MI,
3970b57cec5SDimitry Andric                   SIAtomicScope Scope,
3980b57cec5SDimitry Andric                   SIAtomicAddrSpace AddrSpace,
3990b57cec5SDimitry Andric                   SIMemOp Op,
4000b57cec5SDimitry Andric                   bool IsCrossAddrSpaceOrdering,
4010b57cec5SDimitry Andric                   Position Pos) const override;
402e8d8bef9SDimitry Andric 
403e8d8bef9SDimitry Andric   bool insertAcquire(MachineBasicBlock::iterator &MI,
404e8d8bef9SDimitry Andric                      SIAtomicScope Scope,
405e8d8bef9SDimitry Andric                      SIAtomicAddrSpace AddrSpace,
406e8d8bef9SDimitry Andric                      Position Pos) const override;
407e8d8bef9SDimitry Andric 
408e8d8bef9SDimitry Andric   bool insertRelease(MachineBasicBlock::iterator &MI,
409e8d8bef9SDimitry Andric                      SIAtomicScope Scope,
410e8d8bef9SDimitry Andric                      SIAtomicAddrSpace AddrSpace,
411e8d8bef9SDimitry Andric                      bool IsCrossAddrSpaceOrdering,
412e8d8bef9SDimitry Andric                      Position Pos) const override;
4130b57cec5SDimitry Andric };
4140b57cec5SDimitry Andric 
4150b57cec5SDimitry Andric class SIGfx7CacheControl : public SIGfx6CacheControl {
4160b57cec5SDimitry Andric public:
4170b57cec5SDimitry Andric 
SIGfx7CacheControl(const GCNSubtarget & ST)418349cc55cSDimitry Andric   SIGfx7CacheControl(const GCNSubtarget &ST) : SIGfx6CacheControl(ST) {}
4190b57cec5SDimitry Andric 
420e8d8bef9SDimitry Andric   bool insertAcquire(MachineBasicBlock::iterator &MI,
4210b57cec5SDimitry Andric                      SIAtomicScope Scope,
4220b57cec5SDimitry Andric                      SIAtomicAddrSpace AddrSpace,
4230b57cec5SDimitry Andric                      Position Pos) const override;
4240b57cec5SDimitry Andric 
4250b57cec5SDimitry Andric };
4260b57cec5SDimitry Andric 
427fe6060f1SDimitry Andric class SIGfx90ACacheControl : public SIGfx7CacheControl {
428fe6060f1SDimitry Andric public:
429fe6060f1SDimitry Andric 
SIGfx90ACacheControl(const GCNSubtarget & ST)430349cc55cSDimitry Andric   SIGfx90ACacheControl(const GCNSubtarget &ST) : SIGfx7CacheControl(ST) {}
431fe6060f1SDimitry Andric 
432fe6060f1SDimitry Andric   bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
433fe6060f1SDimitry Andric                              SIAtomicScope Scope,
434fe6060f1SDimitry Andric                              SIAtomicAddrSpace AddrSpace) const override;
435fe6060f1SDimitry Andric 
436fe6060f1SDimitry Andric   bool enableStoreCacheBypass(const MachineBasicBlock::iterator &MI,
437fe6060f1SDimitry Andric                               SIAtomicScope Scope,
438fe6060f1SDimitry Andric                               SIAtomicAddrSpace AddrSpace) const override;
439fe6060f1SDimitry Andric 
440fe6060f1SDimitry Andric   bool enableRMWCacheBypass(const MachineBasicBlock::iterator &MI,
441fe6060f1SDimitry Andric                             SIAtomicScope Scope,
442fe6060f1SDimitry Andric                             SIAtomicAddrSpace AddrSpace) const override;
443fe6060f1SDimitry Andric 
444fe6060f1SDimitry Andric   bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
445fe6060f1SDimitry Andric                                       SIAtomicAddrSpace AddrSpace, SIMemOp Op,
446fe6060f1SDimitry Andric                                       bool IsVolatile,
447fe6060f1SDimitry Andric                                       bool IsNonTemporal) const override;
448fe6060f1SDimitry Andric 
449fe6060f1SDimitry Andric   bool insertWait(MachineBasicBlock::iterator &MI,
450fe6060f1SDimitry Andric                   SIAtomicScope Scope,
451fe6060f1SDimitry Andric                   SIAtomicAddrSpace AddrSpace,
452fe6060f1SDimitry Andric                   SIMemOp Op,
453fe6060f1SDimitry Andric                   bool IsCrossAddrSpaceOrdering,
454fe6060f1SDimitry Andric                   Position Pos) const override;
455fe6060f1SDimitry Andric 
456fe6060f1SDimitry Andric   bool insertAcquire(MachineBasicBlock::iterator &MI,
457fe6060f1SDimitry Andric                      SIAtomicScope Scope,
458fe6060f1SDimitry Andric                      SIAtomicAddrSpace AddrSpace,
459fe6060f1SDimitry Andric                      Position Pos) const override;
460fe6060f1SDimitry Andric 
461fe6060f1SDimitry Andric   bool insertRelease(MachineBasicBlock::iterator &MI,
462fe6060f1SDimitry Andric                      SIAtomicScope Scope,
463fe6060f1SDimitry Andric                      SIAtomicAddrSpace AddrSpace,
464fe6060f1SDimitry Andric                      bool IsCrossAddrSpaceOrdering,
465fe6060f1SDimitry Andric                      Position Pos) const override;
466fe6060f1SDimitry Andric };
467fe6060f1SDimitry Andric 
46881ad6265SDimitry Andric class SIGfx940CacheControl : public SIGfx90ACacheControl {
46981ad6265SDimitry Andric protected:
47081ad6265SDimitry Andric 
47181ad6265SDimitry Andric   /// Sets SC0 bit to "true" if present in \p MI. Returns true if \p MI
47281ad6265SDimitry Andric   /// is modified, false otherwise.
enableSC0Bit(const MachineBasicBlock::iterator & MI) const47381ad6265SDimitry Andric   bool enableSC0Bit(const MachineBasicBlock::iterator &MI) const {
47481ad6265SDimitry Andric     return enableNamedBit(MI, AMDGPU::CPol::SC0);
47581ad6265SDimitry Andric   }
47681ad6265SDimitry Andric 
47781ad6265SDimitry Andric   /// Sets SC1 bit to "true" if present in \p MI. Returns true if \p MI
47881ad6265SDimitry Andric   /// is modified, false otherwise.
enableSC1Bit(const MachineBasicBlock::iterator & MI) const47981ad6265SDimitry Andric   bool enableSC1Bit(const MachineBasicBlock::iterator &MI) const {
48081ad6265SDimitry Andric     return enableNamedBit(MI, AMDGPU::CPol::SC1);
48181ad6265SDimitry Andric   }
48281ad6265SDimitry Andric 
48381ad6265SDimitry Andric   /// Sets NT bit to "true" if present in \p MI. Returns true if \p MI
48481ad6265SDimitry Andric   /// is modified, false otherwise.
enableNTBit(const MachineBasicBlock::iterator & MI) const48581ad6265SDimitry Andric   bool enableNTBit(const MachineBasicBlock::iterator &MI) const {
48681ad6265SDimitry Andric     return enableNamedBit(MI, AMDGPU::CPol::NT);
48781ad6265SDimitry Andric   }
48881ad6265SDimitry Andric 
48981ad6265SDimitry Andric public:
49081ad6265SDimitry Andric 
SIGfx940CacheControl(const GCNSubtarget & ST)49181ad6265SDimitry Andric   SIGfx940CacheControl(const GCNSubtarget &ST) : SIGfx90ACacheControl(ST) {};
49281ad6265SDimitry Andric 
49381ad6265SDimitry Andric   bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
49481ad6265SDimitry Andric                              SIAtomicScope Scope,
49581ad6265SDimitry Andric                              SIAtomicAddrSpace AddrSpace) const override;
49681ad6265SDimitry Andric 
49781ad6265SDimitry Andric   bool enableStoreCacheBypass(const MachineBasicBlock::iterator &MI,
49881ad6265SDimitry Andric                               SIAtomicScope Scope,
49981ad6265SDimitry Andric                               SIAtomicAddrSpace AddrSpace) const override;
50081ad6265SDimitry Andric 
50181ad6265SDimitry Andric   bool enableRMWCacheBypass(const MachineBasicBlock::iterator &MI,
50281ad6265SDimitry Andric                             SIAtomicScope Scope,
50381ad6265SDimitry Andric                             SIAtomicAddrSpace AddrSpace) const override;
50481ad6265SDimitry Andric 
50581ad6265SDimitry Andric   bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
50681ad6265SDimitry Andric                                       SIAtomicAddrSpace AddrSpace, SIMemOp Op,
50781ad6265SDimitry Andric                                       bool IsVolatile,
50881ad6265SDimitry Andric                                       bool IsNonTemporal) const override;
50981ad6265SDimitry Andric 
51081ad6265SDimitry Andric   bool insertAcquire(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
51181ad6265SDimitry Andric                      SIAtomicAddrSpace AddrSpace, Position Pos) const override;
51281ad6265SDimitry Andric 
51381ad6265SDimitry Andric   bool insertRelease(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
51481ad6265SDimitry Andric                      SIAtomicAddrSpace AddrSpace, bool IsCrossAddrSpaceOrdering,
51581ad6265SDimitry Andric                      Position Pos) const override;
51606c3fb27SDimitry Andric 
tryForceStoreSC0SC1(const SIMemOpInfo & MOI,MachineBasicBlock::iterator & MI) const51706c3fb27SDimitry Andric   bool tryForceStoreSC0SC1(const SIMemOpInfo &MOI,
51806c3fb27SDimitry Andric                            MachineBasicBlock::iterator &MI) const override {
51906c3fb27SDimitry Andric     bool Changed = false;
52006c3fb27SDimitry Andric     if (ST.hasForceStoreSC0SC1() &&
52106c3fb27SDimitry Andric         (MOI.getInstrAddrSpace() & (SIAtomicAddrSpace::SCRATCH |
52206c3fb27SDimitry Andric                                     SIAtomicAddrSpace::GLOBAL |
52306c3fb27SDimitry Andric                                     SIAtomicAddrSpace::OTHER)) !=
52406c3fb27SDimitry Andric          SIAtomicAddrSpace::NONE) {
52506c3fb27SDimitry Andric       Changed |= enableSC0Bit(MI);
52606c3fb27SDimitry Andric       Changed |= enableSC1Bit(MI);
52706c3fb27SDimitry Andric     }
52806c3fb27SDimitry Andric     return Changed;
52906c3fb27SDimitry Andric   }
53081ad6265SDimitry Andric };
53181ad6265SDimitry Andric 
5320b57cec5SDimitry Andric class SIGfx10CacheControl : public SIGfx7CacheControl {
5330b57cec5SDimitry Andric protected:
5340b57cec5SDimitry Andric 
5350b57cec5SDimitry Andric   /// Sets DLC bit to "true" if present in \p MI. Returns true if \p MI
5360b57cec5SDimitry Andric   /// is modified, false otherwise.
enableDLCBit(const MachineBasicBlock::iterator & MI) const5370b57cec5SDimitry Andric   bool enableDLCBit(const MachineBasicBlock::iterator &MI) const {
538fe6060f1SDimitry Andric     return enableNamedBit(MI, AMDGPU::CPol::DLC);
5390b57cec5SDimitry Andric   }
5400b57cec5SDimitry Andric 
5410b57cec5SDimitry Andric public:
5420b57cec5SDimitry Andric 
SIGfx10CacheControl(const GCNSubtarget & ST)543349cc55cSDimitry Andric   SIGfx10CacheControl(const GCNSubtarget &ST) : SIGfx7CacheControl(ST) {}
5440b57cec5SDimitry Andric 
5450b57cec5SDimitry Andric   bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
5460b57cec5SDimitry Andric                              SIAtomicScope Scope,
5470b57cec5SDimitry Andric                              SIAtomicAddrSpace AddrSpace) const override;
5480b57cec5SDimitry Andric 
549e8d8bef9SDimitry Andric   bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
550e8d8bef9SDimitry Andric                                       SIAtomicAddrSpace AddrSpace, SIMemOp Op,
551e8d8bef9SDimitry Andric                                       bool IsVolatile,
552e8d8bef9SDimitry Andric                                       bool IsNonTemporal) const override;
5530b57cec5SDimitry Andric 
5540b57cec5SDimitry Andric   bool insertWait(MachineBasicBlock::iterator &MI,
5550b57cec5SDimitry Andric                   SIAtomicScope Scope,
5560b57cec5SDimitry Andric                   SIAtomicAddrSpace AddrSpace,
5570b57cec5SDimitry Andric                   SIMemOp Op,
5580b57cec5SDimitry Andric                   bool IsCrossAddrSpaceOrdering,
5590b57cec5SDimitry Andric                   Position Pos) const override;
560e8d8bef9SDimitry Andric 
561e8d8bef9SDimitry Andric   bool insertAcquire(MachineBasicBlock::iterator &MI,
562e8d8bef9SDimitry Andric                      SIAtomicScope Scope,
563e8d8bef9SDimitry Andric                      SIAtomicAddrSpace AddrSpace,
564e8d8bef9SDimitry Andric                      Position Pos) const override;
5650b57cec5SDimitry Andric };
5660b57cec5SDimitry Andric 
56781ad6265SDimitry Andric class SIGfx11CacheControl : public SIGfx10CacheControl {
56881ad6265SDimitry Andric public:
SIGfx11CacheControl(const GCNSubtarget & ST)56981ad6265SDimitry Andric   SIGfx11CacheControl(const GCNSubtarget &ST) : SIGfx10CacheControl(ST) {}
57081ad6265SDimitry Andric 
57181ad6265SDimitry Andric   bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
57281ad6265SDimitry Andric                              SIAtomicScope Scope,
57381ad6265SDimitry Andric                              SIAtomicAddrSpace AddrSpace) const override;
57481ad6265SDimitry Andric 
57581ad6265SDimitry Andric   bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
57681ad6265SDimitry Andric                                       SIAtomicAddrSpace AddrSpace, SIMemOp Op,
57781ad6265SDimitry Andric                                       bool IsVolatile,
57881ad6265SDimitry Andric                                       bool IsNonTemporal) const override;
57981ad6265SDimitry Andric };
58081ad6265SDimitry Andric 
5811db9f3b2SDimitry Andric class SIGfx12CacheControl : public SIGfx11CacheControl {
5827a6dacacSDimitry Andric protected:
5837a6dacacSDimitry Andric   // Sets TH policy to \p Value if CPol operand is present in instruction \p MI.
5847a6dacacSDimitry Andric   // \returns Returns true if \p MI is modified, false otherwise.
5857a6dacacSDimitry Andric   bool setTH(const MachineBasicBlock::iterator MI,
5867a6dacacSDimitry Andric              AMDGPU::CPol::CPol Value) const;
5877a6dacacSDimitry Andric   // Sets Scope policy to \p Value if CPol operand is present in instruction \p
5887a6dacacSDimitry Andric   // MI. \returns Returns true if \p MI is modified, false otherwise.
5897a6dacacSDimitry Andric   bool setScope(const MachineBasicBlock::iterator MI,
5907a6dacacSDimitry Andric                 AMDGPU::CPol::CPol Value) const;
5917a6dacacSDimitry Andric 
5921db9f3b2SDimitry Andric public:
SIGfx12CacheControl(const GCNSubtarget & ST)5931db9f3b2SDimitry Andric   SIGfx12CacheControl(const GCNSubtarget &ST) : SIGfx11CacheControl(ST) {}
5941db9f3b2SDimitry Andric 
5957a6dacacSDimitry Andric   bool insertWait(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
5967a6dacacSDimitry Andric                   SIAtomicAddrSpace AddrSpace, SIMemOp Op,
5977a6dacacSDimitry Andric                   bool IsCrossAddrSpaceOrdering, Position Pos) const override;
5987a6dacacSDimitry Andric 
5991db9f3b2SDimitry Andric   bool insertAcquire(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
6001db9f3b2SDimitry Andric                      SIAtomicAddrSpace AddrSpace, Position Pos) const override;
6017a6dacacSDimitry Andric 
6027a6dacacSDimitry Andric   bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
6037a6dacacSDimitry Andric                                       SIAtomicAddrSpace AddrSpace, SIMemOp Op,
6047a6dacacSDimitry Andric                                       bool IsVolatile,
6057a6dacacSDimitry Andric                                       bool IsNonTemporal) const override;
6061db9f3b2SDimitry Andric };
6071db9f3b2SDimitry Andric 
6080b57cec5SDimitry Andric class SIMemoryLegalizer final : public MachineFunctionPass {
6090b57cec5SDimitry Andric private:
6100b57cec5SDimitry Andric 
6110b57cec5SDimitry Andric   /// Cache Control.
6120b57cec5SDimitry Andric   std::unique_ptr<SICacheControl> CC = nullptr;
6130b57cec5SDimitry Andric 
6140b57cec5SDimitry Andric   /// List of atomic pseudo instructions.
6150b57cec5SDimitry Andric   std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
6160b57cec5SDimitry Andric 
6170b57cec5SDimitry Andric   /// Return true iff instruction \p MI is a atomic instruction that
6180b57cec5SDimitry Andric   /// returns a result.
isAtomicRet(const MachineInstr & MI) const6190b57cec5SDimitry Andric   bool isAtomicRet(const MachineInstr &MI) const {
620fe6060f1SDimitry Andric     return SIInstrInfo::isAtomicRet(MI);
6210b57cec5SDimitry Andric   }
6220b57cec5SDimitry Andric 
6230b57cec5SDimitry Andric   /// Removes all processed atomic pseudo instructions from the current
6240b57cec5SDimitry Andric   /// function. Returns true if current function is modified, false otherwise.
6250b57cec5SDimitry Andric   bool removeAtomicPseudoMIs();
6260b57cec5SDimitry Andric 
6270b57cec5SDimitry Andric   /// Expands load operation \p MI. Returns true if instructions are
6280b57cec5SDimitry Andric   /// added/deleted or \p MI is modified, false otherwise.
6290b57cec5SDimitry Andric   bool expandLoad(const SIMemOpInfo &MOI,
6300b57cec5SDimitry Andric                   MachineBasicBlock::iterator &MI);
6310b57cec5SDimitry Andric   /// Expands store operation \p MI. Returns true if instructions are
6320b57cec5SDimitry Andric   /// added/deleted or \p MI is modified, false otherwise.
6330b57cec5SDimitry Andric   bool expandStore(const SIMemOpInfo &MOI,
6340b57cec5SDimitry Andric                    MachineBasicBlock::iterator &MI);
6350b57cec5SDimitry Andric   /// Expands atomic fence operation \p MI. Returns true if
6360b57cec5SDimitry Andric   /// instructions are added/deleted or \p MI is modified, false otherwise.
6370b57cec5SDimitry Andric   bool expandAtomicFence(const SIMemOpInfo &MOI,
6380b57cec5SDimitry Andric                          MachineBasicBlock::iterator &MI);
6390b57cec5SDimitry Andric   /// Expands atomic cmpxchg or rmw operation \p MI. Returns true if
6400b57cec5SDimitry Andric   /// instructions are added/deleted or \p MI is modified, false otherwise.
6410b57cec5SDimitry Andric   bool expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
6420b57cec5SDimitry Andric                                 MachineBasicBlock::iterator &MI);
6430b57cec5SDimitry Andric 
6440b57cec5SDimitry Andric public:
6450b57cec5SDimitry Andric   static char ID;
6460b57cec5SDimitry Andric 
SIMemoryLegalizer()6470b57cec5SDimitry Andric   SIMemoryLegalizer() : MachineFunctionPass(ID) {}
6480b57cec5SDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const6490b57cec5SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
6500b57cec5SDimitry Andric     AU.setPreservesCFG();
6510b57cec5SDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
6520b57cec5SDimitry Andric   }
6530b57cec5SDimitry Andric 
getPassName() const6540b57cec5SDimitry Andric   StringRef getPassName() const override {
6550b57cec5SDimitry Andric     return PASS_NAME;
6560b57cec5SDimitry Andric   }
6570b57cec5SDimitry Andric 
6580b57cec5SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override;
6590b57cec5SDimitry Andric };
6600b57cec5SDimitry Andric 
6610b57cec5SDimitry Andric } // end namespace anonymous
6620b57cec5SDimitry Andric 
reportUnsupported(const MachineBasicBlock::iterator & MI,const char * Msg) const6630b57cec5SDimitry Andric void SIMemOpAccess::reportUnsupported(const MachineBasicBlock::iterator &MI,
6640b57cec5SDimitry Andric                                       const char *Msg) const {
6650b57cec5SDimitry Andric   const Function &Func = MI->getParent()->getParent()->getFunction();
6660b57cec5SDimitry Andric   DiagnosticInfoUnsupported Diag(Func, Msg, MI->getDebugLoc());
6670b57cec5SDimitry Andric   Func.getContext().diagnose(Diag);
6680b57cec5SDimitry Andric }
6690b57cec5SDimitry Andric 
670bdd1243dSDimitry Andric std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
toSIAtomicScope(SyncScope::ID SSID,SIAtomicAddrSpace InstrAddrSpace) const6710b57cec5SDimitry Andric SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
672fe6060f1SDimitry Andric                                SIAtomicAddrSpace InstrAddrSpace) const {
6730b57cec5SDimitry Andric   if (SSID == SyncScope::System)
674bdd1243dSDimitry Andric     return std::tuple(SIAtomicScope::SYSTEM, SIAtomicAddrSpace::ATOMIC, true);
6750b57cec5SDimitry Andric   if (SSID == MMI->getAgentSSID())
676bdd1243dSDimitry Andric     return std::tuple(SIAtomicScope::AGENT, SIAtomicAddrSpace::ATOMIC, true);
6770b57cec5SDimitry Andric   if (SSID == MMI->getWorkgroupSSID())
678bdd1243dSDimitry Andric     return std::tuple(SIAtomicScope::WORKGROUP, SIAtomicAddrSpace::ATOMIC,
6790b57cec5SDimitry Andric                       true);
6800b57cec5SDimitry Andric   if (SSID == MMI->getWavefrontSSID())
681bdd1243dSDimitry Andric     return std::tuple(SIAtomicScope::WAVEFRONT, SIAtomicAddrSpace::ATOMIC,
6820b57cec5SDimitry Andric                       true);
6830b57cec5SDimitry Andric   if (SSID == SyncScope::SingleThread)
684bdd1243dSDimitry Andric     return std::tuple(SIAtomicScope::SINGLETHREAD, SIAtomicAddrSpace::ATOMIC,
6850b57cec5SDimitry Andric                       true);
6860b57cec5SDimitry Andric   if (SSID == MMI->getSystemOneAddressSpaceSSID())
687bdd1243dSDimitry Andric     return std::tuple(SIAtomicScope::SYSTEM,
688bdd1243dSDimitry Andric                       SIAtomicAddrSpace::ATOMIC & InstrAddrSpace, false);
6890b57cec5SDimitry Andric   if (SSID == MMI->getAgentOneAddressSpaceSSID())
690bdd1243dSDimitry Andric     return std::tuple(SIAtomicScope::AGENT,
691bdd1243dSDimitry Andric                       SIAtomicAddrSpace::ATOMIC & InstrAddrSpace, false);
6920b57cec5SDimitry Andric   if (SSID == MMI->getWorkgroupOneAddressSpaceSSID())
693bdd1243dSDimitry Andric     return std::tuple(SIAtomicScope::WORKGROUP,
694bdd1243dSDimitry Andric                       SIAtomicAddrSpace::ATOMIC & InstrAddrSpace, false);
6950b57cec5SDimitry Andric   if (SSID == MMI->getWavefrontOneAddressSpaceSSID())
696bdd1243dSDimitry Andric     return std::tuple(SIAtomicScope::WAVEFRONT,
697bdd1243dSDimitry Andric                       SIAtomicAddrSpace::ATOMIC & InstrAddrSpace, false);
6980b57cec5SDimitry Andric   if (SSID == MMI->getSingleThreadOneAddressSpaceSSID())
699bdd1243dSDimitry Andric     return std::tuple(SIAtomicScope::SINGLETHREAD,
700bdd1243dSDimitry Andric                       SIAtomicAddrSpace::ATOMIC & InstrAddrSpace, false);
701bdd1243dSDimitry Andric   return std::nullopt;
7020b57cec5SDimitry Andric }
7030b57cec5SDimitry Andric 
toSIAtomicAddrSpace(unsigned AS) const7040b57cec5SDimitry Andric SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(unsigned AS) const {
7050b57cec5SDimitry Andric   if (AS == AMDGPUAS::FLAT_ADDRESS)
7060b57cec5SDimitry Andric     return SIAtomicAddrSpace::FLAT;
7070b57cec5SDimitry Andric   if (AS == AMDGPUAS::GLOBAL_ADDRESS)
7080b57cec5SDimitry Andric     return SIAtomicAddrSpace::GLOBAL;
7090b57cec5SDimitry Andric   if (AS == AMDGPUAS::LOCAL_ADDRESS)
7100b57cec5SDimitry Andric     return SIAtomicAddrSpace::LDS;
7110b57cec5SDimitry Andric   if (AS == AMDGPUAS::PRIVATE_ADDRESS)
7120b57cec5SDimitry Andric     return SIAtomicAddrSpace::SCRATCH;
7130b57cec5SDimitry Andric   if (AS == AMDGPUAS::REGION_ADDRESS)
7140b57cec5SDimitry Andric     return SIAtomicAddrSpace::GDS;
7150b57cec5SDimitry Andric 
7160b57cec5SDimitry Andric   return SIAtomicAddrSpace::OTHER;
7170b57cec5SDimitry Andric }
7180b57cec5SDimitry Andric 
SIMemOpAccess(MachineFunction & MF)7190b57cec5SDimitry Andric SIMemOpAccess::SIMemOpAccess(MachineFunction &MF) {
7200b57cec5SDimitry Andric   MMI = &MF.getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>();
7210b57cec5SDimitry Andric }
7220b57cec5SDimitry Andric 
constructFromMIWithMMO(const MachineBasicBlock::iterator & MI) const723bdd1243dSDimitry Andric std::optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
7240b57cec5SDimitry Andric     const MachineBasicBlock::iterator &MI) const {
7250b57cec5SDimitry Andric   assert(MI->getNumMemOperands() > 0);
7260b57cec5SDimitry Andric 
7270b57cec5SDimitry Andric   SyncScope::ID SSID = SyncScope::SingleThread;
7280b57cec5SDimitry Andric   AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
7290b57cec5SDimitry Andric   AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
7300b57cec5SDimitry Andric   SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
7310b57cec5SDimitry Andric   bool IsNonTemporal = true;
732e8d8bef9SDimitry Andric   bool IsVolatile = false;
7330b57cec5SDimitry Andric 
7340b57cec5SDimitry Andric   // Validator should check whether or not MMOs cover the entire set of
7350b57cec5SDimitry Andric   // locations accessed by the memory instruction.
7360b57cec5SDimitry Andric   for (const auto &MMO : MI->memoperands()) {
7370b57cec5SDimitry Andric     IsNonTemporal &= MMO->isNonTemporal();
738e8d8bef9SDimitry Andric     IsVolatile |= MMO->isVolatile();
7390b57cec5SDimitry Andric     InstrAddrSpace |=
7400b57cec5SDimitry Andric       toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());
741fe6060f1SDimitry Andric     AtomicOrdering OpOrdering = MMO->getSuccessOrdering();
7420b57cec5SDimitry Andric     if (OpOrdering != AtomicOrdering::NotAtomic) {
7430b57cec5SDimitry Andric       const auto &IsSyncScopeInclusion =
7440b57cec5SDimitry Andric           MMI->isSyncScopeInclusion(SSID, MMO->getSyncScopeID());
7450b57cec5SDimitry Andric       if (!IsSyncScopeInclusion) {
7460b57cec5SDimitry Andric         reportUnsupported(MI,
7470b57cec5SDimitry Andric           "Unsupported non-inclusive atomic synchronization scope");
748bdd1243dSDimitry Andric         return std::nullopt;
7490b57cec5SDimitry Andric       }
7500b57cec5SDimitry Andric 
75181ad6265SDimitry Andric       SSID = *IsSyncScopeInclusion ? SSID : MMO->getSyncScopeID();
752349cc55cSDimitry Andric       Ordering = getMergedAtomicOrdering(Ordering, OpOrdering);
7530b57cec5SDimitry Andric       assert(MMO->getFailureOrdering() != AtomicOrdering::Release &&
7540b57cec5SDimitry Andric              MMO->getFailureOrdering() != AtomicOrdering::AcquireRelease);
7550b57cec5SDimitry Andric       FailureOrdering =
756349cc55cSDimitry Andric           getMergedAtomicOrdering(FailureOrdering, MMO->getFailureOrdering());
7570b57cec5SDimitry Andric     }
7580b57cec5SDimitry Andric   }
7590b57cec5SDimitry Andric 
7600b57cec5SDimitry Andric   SIAtomicScope Scope = SIAtomicScope::NONE;
7610b57cec5SDimitry Andric   SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
7620b57cec5SDimitry Andric   bool IsCrossAddressSpaceOrdering = false;
7630b57cec5SDimitry Andric   if (Ordering != AtomicOrdering::NotAtomic) {
7640b57cec5SDimitry Andric     auto ScopeOrNone = toSIAtomicScope(SSID, InstrAddrSpace);
7650b57cec5SDimitry Andric     if (!ScopeOrNone) {
7660b57cec5SDimitry Andric       reportUnsupported(MI, "Unsupported atomic synchronization scope");
767bdd1243dSDimitry Andric       return std::nullopt;
7680b57cec5SDimitry Andric     }
7690b57cec5SDimitry Andric     std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
77081ad6265SDimitry Andric         *ScopeOrNone;
7710b57cec5SDimitry Andric     if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
772fe6060f1SDimitry Andric         ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace) ||
773fe6060f1SDimitry Andric         ((InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) == SIAtomicAddrSpace::NONE)) {
7740b57cec5SDimitry Andric       reportUnsupported(MI, "Unsupported atomic address space");
775bdd1243dSDimitry Andric       return std::nullopt;
7760b57cec5SDimitry Andric     }
7770b57cec5SDimitry Andric   }
7780b57cec5SDimitry Andric   return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
779e8d8bef9SDimitry Andric                      IsCrossAddressSpaceOrdering, FailureOrdering, IsVolatile,
780e8d8bef9SDimitry Andric                      IsNonTemporal);
7810b57cec5SDimitry Andric }
7820b57cec5SDimitry Andric 
783bdd1243dSDimitry Andric std::optional<SIMemOpInfo>
getLoadInfo(const MachineBasicBlock::iterator & MI) const784bdd1243dSDimitry Andric SIMemOpAccess::getLoadInfo(const MachineBasicBlock::iterator &MI) const {
7850b57cec5SDimitry Andric   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
7860b57cec5SDimitry Andric 
7870b57cec5SDimitry Andric   if (!(MI->mayLoad() && !MI->mayStore()))
788bdd1243dSDimitry Andric     return std::nullopt;
7890b57cec5SDimitry Andric 
7900b57cec5SDimitry Andric   // Be conservative if there are no memory operands.
7910b57cec5SDimitry Andric   if (MI->getNumMemOperands() == 0)
7920b57cec5SDimitry Andric     return SIMemOpInfo();
7930b57cec5SDimitry Andric 
7940b57cec5SDimitry Andric   return constructFromMIWithMMO(MI);
7950b57cec5SDimitry Andric }
7960b57cec5SDimitry Andric 
797bdd1243dSDimitry Andric std::optional<SIMemOpInfo>
getStoreInfo(const MachineBasicBlock::iterator & MI) const798bdd1243dSDimitry Andric SIMemOpAccess::getStoreInfo(const MachineBasicBlock::iterator &MI) const {
7990b57cec5SDimitry Andric   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
8000b57cec5SDimitry Andric 
8010b57cec5SDimitry Andric   if (!(!MI->mayLoad() && MI->mayStore()))
802bdd1243dSDimitry Andric     return std::nullopt;
8030b57cec5SDimitry Andric 
8040b57cec5SDimitry Andric   // Be conservative if there are no memory operands.
8050b57cec5SDimitry Andric   if (MI->getNumMemOperands() == 0)
8060b57cec5SDimitry Andric     return SIMemOpInfo();
8070b57cec5SDimitry Andric 
8080b57cec5SDimitry Andric   return constructFromMIWithMMO(MI);
8090b57cec5SDimitry Andric }
8100b57cec5SDimitry Andric 
811bdd1243dSDimitry Andric std::optional<SIMemOpInfo>
getAtomicFenceInfo(const MachineBasicBlock::iterator & MI) const812bdd1243dSDimitry Andric SIMemOpAccess::getAtomicFenceInfo(const MachineBasicBlock::iterator &MI) const {
8130b57cec5SDimitry Andric   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
8140b57cec5SDimitry Andric 
8150b57cec5SDimitry Andric   if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
816bdd1243dSDimitry Andric     return std::nullopt;
8170b57cec5SDimitry Andric 
8180b57cec5SDimitry Andric   AtomicOrdering Ordering =
8190b57cec5SDimitry Andric     static_cast<AtomicOrdering>(MI->getOperand(0).getImm());
8200b57cec5SDimitry Andric 
8210b57cec5SDimitry Andric   SyncScope::ID SSID = static_cast<SyncScope::ID>(MI->getOperand(1).getImm());
8220b57cec5SDimitry Andric   auto ScopeOrNone = toSIAtomicScope(SSID, SIAtomicAddrSpace::ATOMIC);
8230b57cec5SDimitry Andric   if (!ScopeOrNone) {
8240b57cec5SDimitry Andric     reportUnsupported(MI, "Unsupported atomic synchronization scope");
825bdd1243dSDimitry Andric     return std::nullopt;
8260b57cec5SDimitry Andric   }
8270b57cec5SDimitry Andric 
8280b57cec5SDimitry Andric   SIAtomicScope Scope = SIAtomicScope::NONE;
8290b57cec5SDimitry Andric   SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
8300b57cec5SDimitry Andric   bool IsCrossAddressSpaceOrdering = false;
8310b57cec5SDimitry Andric   std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
83281ad6265SDimitry Andric       *ScopeOrNone;
8330b57cec5SDimitry Andric 
8340b57cec5SDimitry Andric   if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
8350b57cec5SDimitry Andric       ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
8360b57cec5SDimitry Andric     reportUnsupported(MI, "Unsupported atomic address space");
837bdd1243dSDimitry Andric     return std::nullopt;
8380b57cec5SDimitry Andric   }
8390b57cec5SDimitry Andric 
8400b57cec5SDimitry Andric   return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, SIAtomicAddrSpace::ATOMIC,
841fe6060f1SDimitry Andric                      IsCrossAddressSpaceOrdering, AtomicOrdering::NotAtomic);
8420b57cec5SDimitry Andric }
8430b57cec5SDimitry Andric 
getAtomicCmpxchgOrRmwInfo(const MachineBasicBlock::iterator & MI) const844bdd1243dSDimitry Andric std::optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
8450b57cec5SDimitry Andric     const MachineBasicBlock::iterator &MI) const {
8460b57cec5SDimitry Andric   assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
8470b57cec5SDimitry Andric 
8480b57cec5SDimitry Andric   if (!(MI->mayLoad() && MI->mayStore()))
849bdd1243dSDimitry Andric     return std::nullopt;
8500b57cec5SDimitry Andric 
8510b57cec5SDimitry Andric   // Be conservative if there are no memory operands.
8520b57cec5SDimitry Andric   if (MI->getNumMemOperands() == 0)
8530b57cec5SDimitry Andric     return SIMemOpInfo();
8540b57cec5SDimitry Andric 
8550b57cec5SDimitry Andric   return constructFromMIWithMMO(MI);
8560b57cec5SDimitry Andric }
8570b57cec5SDimitry Andric 
SICacheControl(const GCNSubtarget & ST)858e8d8bef9SDimitry Andric SICacheControl::SICacheControl(const GCNSubtarget &ST) : ST(ST) {
8590b57cec5SDimitry Andric   TII = ST.getInstrInfo();
8600b57cec5SDimitry Andric   IV = getIsaVersion(ST.getCPU());
861e8d8bef9SDimitry Andric   InsertCacheInv = !AmdgcnSkipCacheInvalidations;
8620b57cec5SDimitry Andric }
8630b57cec5SDimitry Andric 
enableNamedBit(const MachineBasicBlock::iterator MI,AMDGPU::CPol::CPol Bit) const864fe6060f1SDimitry Andric bool SICacheControl::enableNamedBit(const MachineBasicBlock::iterator MI,
865fe6060f1SDimitry Andric                                     AMDGPU::CPol::CPol Bit) const {
866fe6060f1SDimitry Andric   MachineOperand *CPol = TII->getNamedOperand(*MI, AMDGPU::OpName::cpol);
867fe6060f1SDimitry Andric   if (!CPol)
868fe6060f1SDimitry Andric     return false;
869fe6060f1SDimitry Andric 
870fe6060f1SDimitry Andric   CPol->setImm(CPol->getImm() | Bit);
871fe6060f1SDimitry Andric   return true;
872fe6060f1SDimitry Andric }
873fe6060f1SDimitry Andric 
8740b57cec5SDimitry Andric /* static */
create(const GCNSubtarget & ST)8750b57cec5SDimitry Andric std::unique_ptr<SICacheControl> SICacheControl::create(const GCNSubtarget &ST) {
8760b57cec5SDimitry Andric   GCNSubtarget::Generation Generation = ST.getGeneration();
87781ad6265SDimitry Andric   if (ST.hasGFX940Insts())
87881ad6265SDimitry Andric     return std::make_unique<SIGfx940CacheControl>(ST);
879fe6060f1SDimitry Andric   if (ST.hasGFX90AInsts())
880fe6060f1SDimitry Andric     return std::make_unique<SIGfx90ACacheControl>(ST);
8810b57cec5SDimitry Andric   if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS)
8828bcb0991SDimitry Andric     return std::make_unique<SIGfx6CacheControl>(ST);
8830b57cec5SDimitry Andric   if (Generation < AMDGPUSubtarget::GFX10)
8848bcb0991SDimitry Andric     return std::make_unique<SIGfx7CacheControl>(ST);
88581ad6265SDimitry Andric   if (Generation < AMDGPUSubtarget::GFX11)
886e8d8bef9SDimitry Andric     return std::make_unique<SIGfx10CacheControl>(ST);
8871db9f3b2SDimitry Andric   if (Generation < AMDGPUSubtarget::GFX12)
88881ad6265SDimitry Andric     return std::make_unique<SIGfx11CacheControl>(ST);
8891db9f3b2SDimitry Andric   return std::make_unique<SIGfx12CacheControl>(ST);
8900b57cec5SDimitry Andric }
8910b57cec5SDimitry Andric 
enableLoadCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const8920b57cec5SDimitry Andric bool SIGfx6CacheControl::enableLoadCacheBypass(
8930b57cec5SDimitry Andric     const MachineBasicBlock::iterator &MI,
8940b57cec5SDimitry Andric     SIAtomicScope Scope,
8950b57cec5SDimitry Andric     SIAtomicAddrSpace AddrSpace) const {
8960b57cec5SDimitry Andric   assert(MI->mayLoad() && !MI->mayStore());
8970b57cec5SDimitry Andric   bool Changed = false;
8980b57cec5SDimitry Andric 
8990b57cec5SDimitry Andric   if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
9000b57cec5SDimitry Andric     switch (Scope) {
9010b57cec5SDimitry Andric     case SIAtomicScope::SYSTEM:
9020b57cec5SDimitry Andric     case SIAtomicScope::AGENT:
9034824e7fdSDimitry Andric       // Set L1 cache policy to MISS_EVICT.
9044824e7fdSDimitry Andric       // Note: there is no L2 cache bypass policy at the ISA level.
9050b57cec5SDimitry Andric       Changed |= enableGLCBit(MI);
9060b57cec5SDimitry Andric       break;
9070b57cec5SDimitry Andric     case SIAtomicScope::WORKGROUP:
9080b57cec5SDimitry Andric     case SIAtomicScope::WAVEFRONT:
9090b57cec5SDimitry Andric     case SIAtomicScope::SINGLETHREAD:
9100b57cec5SDimitry Andric       // No cache to bypass.
9110b57cec5SDimitry Andric       break;
9120b57cec5SDimitry Andric     default:
9130b57cec5SDimitry Andric       llvm_unreachable("Unsupported synchronization scope");
9140b57cec5SDimitry Andric     }
9150b57cec5SDimitry Andric   }
9160b57cec5SDimitry Andric 
9170b57cec5SDimitry Andric   /// The scratch address space does not need the global memory caches
9180b57cec5SDimitry Andric   /// to be bypassed as all memory operations by the same thread are
9190b57cec5SDimitry Andric   /// sequentially consistent, and no other thread can access scratch
9200b57cec5SDimitry Andric   /// memory.
9210b57cec5SDimitry Andric 
922e8d8bef9SDimitry Andric   /// Other address spaces do not have a cache.
9230b57cec5SDimitry Andric 
9240b57cec5SDimitry Andric   return Changed;
9250b57cec5SDimitry Andric }
9260b57cec5SDimitry Andric 
enableStoreCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const927fe6060f1SDimitry Andric bool SIGfx6CacheControl::enableStoreCacheBypass(
928fe6060f1SDimitry Andric     const MachineBasicBlock::iterator &MI,
929fe6060f1SDimitry Andric     SIAtomicScope Scope,
930fe6060f1SDimitry Andric     SIAtomicAddrSpace AddrSpace) const {
931fe6060f1SDimitry Andric   assert(!MI->mayLoad() && MI->mayStore());
932fe6060f1SDimitry Andric   bool Changed = false;
933fe6060f1SDimitry Andric 
934fe6060f1SDimitry Andric   /// The L1 cache is write through so does not need to be bypassed. There is no
935fe6060f1SDimitry Andric   /// bypass control for the L2 cache at the isa level.
936fe6060f1SDimitry Andric 
937fe6060f1SDimitry Andric   return Changed;
938fe6060f1SDimitry Andric }
939fe6060f1SDimitry Andric 
enableRMWCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const940fe6060f1SDimitry Andric bool SIGfx6CacheControl::enableRMWCacheBypass(
941fe6060f1SDimitry Andric     const MachineBasicBlock::iterator &MI,
942fe6060f1SDimitry Andric     SIAtomicScope Scope,
943fe6060f1SDimitry Andric     SIAtomicAddrSpace AddrSpace) const {
944fe6060f1SDimitry Andric   assert(MI->mayLoad() && MI->mayStore());
945fe6060f1SDimitry Andric   bool Changed = false;
946fe6060f1SDimitry Andric 
9474824e7fdSDimitry Andric   /// Do not set GLC for RMW atomic operations as L0/L1 cache is automatically
9484824e7fdSDimitry Andric   /// bypassed, and the GLC bit is instead used to indicate if they are
9494824e7fdSDimitry Andric   /// return or no-return.
9504824e7fdSDimitry Andric   /// Note: there is no L2 cache coherent bypass control at the ISA level.
951fe6060f1SDimitry Andric 
952fe6060f1SDimitry Andric   return Changed;
953fe6060f1SDimitry Andric }
954fe6060f1SDimitry Andric 
enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator & MI,SIAtomicAddrSpace AddrSpace,SIMemOp Op,bool IsVolatile,bool IsNonTemporal) const955e8d8bef9SDimitry Andric bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal(
956e8d8bef9SDimitry Andric     MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
957e8d8bef9SDimitry Andric     bool IsVolatile, bool IsNonTemporal) const {
958e8d8bef9SDimitry Andric   // Only handle load and store, not atomic read-modify-write insructions. The
959e8d8bef9SDimitry Andric   // latter use glc to indicate if the atomic returns a result and so must not
960e8d8bef9SDimitry Andric   // be used for cache control.
9610b57cec5SDimitry Andric   assert(MI->mayLoad() ^ MI->mayStore());
962e8d8bef9SDimitry Andric 
963e8d8bef9SDimitry Andric   // Only update load and store, not LLVM IR atomic read-modify-write
964e8d8bef9SDimitry Andric   // instructions. The latter are always marked as volatile so cannot sensibly
965e8d8bef9SDimitry Andric   // handle it as do not want to pessimize all atomics. Also they do not support
966e8d8bef9SDimitry Andric   // the nontemporal attribute.
967e8d8bef9SDimitry Andric   assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
968e8d8bef9SDimitry Andric 
9690b57cec5SDimitry Andric   bool Changed = false;
9700b57cec5SDimitry Andric 
971e8d8bef9SDimitry Andric   if (IsVolatile) {
9724824e7fdSDimitry Andric     // Set L1 cache policy to be MISS_EVICT for load instructions
9734824e7fdSDimitry Andric     // and MISS_LRU for store instructions.
9744824e7fdSDimitry Andric     // Note: there is no L2 cache bypass policy at the ISA level.
975e8d8bef9SDimitry Andric     if (Op == SIMemOp::LOAD)
9760b57cec5SDimitry Andric       Changed |= enableGLCBit(MI);
977e8d8bef9SDimitry Andric 
978e8d8bef9SDimitry Andric     // Ensure operation has completed at system scope to cause all volatile
979e8d8bef9SDimitry Andric     // operations to be visible outside the program in a global order. Do not
980e8d8bef9SDimitry Andric     // request cross address space as only the global address space can be
981e8d8bef9SDimitry Andric     // observable outside the program, so no need to cause a waitcnt for LDS
982e8d8bef9SDimitry Andric     // address space operations.
983e8d8bef9SDimitry Andric     Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
984e8d8bef9SDimitry Andric                           Position::AFTER);
9850b57cec5SDimitry Andric 
9860b57cec5SDimitry Andric     return Changed;
9870b57cec5SDimitry Andric   }
9880b57cec5SDimitry Andric 
989e8d8bef9SDimitry Andric   if (IsNonTemporal) {
9904824e7fdSDimitry Andric     // Setting both GLC and SLC configures L1 cache policy to MISS_EVICT
9914824e7fdSDimitry Andric     // for both loads and stores, and the L2 cache policy to STREAM.
992e8d8bef9SDimitry Andric     Changed |= enableGLCBit(MI);
993e8d8bef9SDimitry Andric     Changed |= enableSLCBit(MI);
994e8d8bef9SDimitry Andric     return Changed;
995e8d8bef9SDimitry Andric   }
996e8d8bef9SDimitry Andric 
997e8d8bef9SDimitry Andric   return Changed;
998e8d8bef9SDimitry Andric }
999e8d8bef9SDimitry Andric 
insertWait(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,SIMemOp Op,bool IsCrossAddrSpaceOrdering,Position Pos) const1000e8d8bef9SDimitry Andric bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
1001e8d8bef9SDimitry Andric                                     SIAtomicScope Scope,
1002e8d8bef9SDimitry Andric                                     SIAtomicAddrSpace AddrSpace,
1003e8d8bef9SDimitry Andric                                     SIMemOp Op,
1004e8d8bef9SDimitry Andric                                     bool IsCrossAddrSpaceOrdering,
1005e8d8bef9SDimitry Andric                                     Position Pos) const {
1006e8d8bef9SDimitry Andric   bool Changed = false;
1007e8d8bef9SDimitry Andric 
1008e8d8bef9SDimitry Andric   MachineBasicBlock &MBB = *MI->getParent();
1009e8d8bef9SDimitry Andric   DebugLoc DL = MI->getDebugLoc();
1010e8d8bef9SDimitry Andric 
1011e8d8bef9SDimitry Andric   if (Pos == Position::AFTER)
1012e8d8bef9SDimitry Andric     ++MI;
1013e8d8bef9SDimitry Andric 
1014e8d8bef9SDimitry Andric   bool VMCnt = false;
1015e8d8bef9SDimitry Andric   bool LGKMCnt = false;
1016e8d8bef9SDimitry Andric 
1017e8d8bef9SDimitry Andric   if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
1018e8d8bef9SDimitry Andric       SIAtomicAddrSpace::NONE) {
1019e8d8bef9SDimitry Andric     switch (Scope) {
1020e8d8bef9SDimitry Andric     case SIAtomicScope::SYSTEM:
1021e8d8bef9SDimitry Andric     case SIAtomicScope::AGENT:
1022e8d8bef9SDimitry Andric       VMCnt |= true;
1023e8d8bef9SDimitry Andric       break;
1024e8d8bef9SDimitry Andric     case SIAtomicScope::WORKGROUP:
1025e8d8bef9SDimitry Andric     case SIAtomicScope::WAVEFRONT:
1026e8d8bef9SDimitry Andric     case SIAtomicScope::SINGLETHREAD:
1027e8d8bef9SDimitry Andric       // The L1 cache keeps all memory operations in order for
1028e8d8bef9SDimitry Andric       // wavefronts in the same work-group.
1029e8d8bef9SDimitry Andric       break;
1030e8d8bef9SDimitry Andric     default:
1031e8d8bef9SDimitry Andric       llvm_unreachable("Unsupported synchronization scope");
1032e8d8bef9SDimitry Andric     }
1033e8d8bef9SDimitry Andric   }
1034e8d8bef9SDimitry Andric 
1035e8d8bef9SDimitry Andric   if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1036e8d8bef9SDimitry Andric     switch (Scope) {
1037e8d8bef9SDimitry Andric     case SIAtomicScope::SYSTEM:
1038e8d8bef9SDimitry Andric     case SIAtomicScope::AGENT:
1039e8d8bef9SDimitry Andric     case SIAtomicScope::WORKGROUP:
1040e8d8bef9SDimitry Andric       // If no cross address space ordering then an "S_WAITCNT lgkmcnt(0)" is
1041e8d8bef9SDimitry Andric       // not needed as LDS operations for all waves are executed in a total
1042e8d8bef9SDimitry Andric       // global ordering as observed by all waves. Required if also
1043e8d8bef9SDimitry Andric       // synchronizing with global/GDS memory as LDS operations could be
1044e8d8bef9SDimitry Andric       // reordered with respect to later global/GDS memory operations of the
1045e8d8bef9SDimitry Andric       // same wave.
1046e8d8bef9SDimitry Andric       LGKMCnt |= IsCrossAddrSpaceOrdering;
1047e8d8bef9SDimitry Andric       break;
1048e8d8bef9SDimitry Andric     case SIAtomicScope::WAVEFRONT:
1049e8d8bef9SDimitry Andric     case SIAtomicScope::SINGLETHREAD:
1050e8d8bef9SDimitry Andric       // The LDS keeps all memory operations in order for
105181ad6265SDimitry Andric       // the same wavefront.
1052e8d8bef9SDimitry Andric       break;
1053e8d8bef9SDimitry Andric     default:
1054e8d8bef9SDimitry Andric       llvm_unreachable("Unsupported synchronization scope");
1055e8d8bef9SDimitry Andric     }
1056e8d8bef9SDimitry Andric   }
1057e8d8bef9SDimitry Andric 
1058e8d8bef9SDimitry Andric   if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
1059e8d8bef9SDimitry Andric     switch (Scope) {
1060e8d8bef9SDimitry Andric     case SIAtomicScope::SYSTEM:
1061e8d8bef9SDimitry Andric     case SIAtomicScope::AGENT:
1062e8d8bef9SDimitry Andric       // If no cross address space ordering then an GDS "S_WAITCNT lgkmcnt(0)"
1063e8d8bef9SDimitry Andric       // is not needed as GDS operations for all waves are executed in a total
1064e8d8bef9SDimitry Andric       // global ordering as observed by all waves. Required if also
1065e8d8bef9SDimitry Andric       // synchronizing with global/LDS memory as GDS operations could be
1066e8d8bef9SDimitry Andric       // reordered with respect to later global/LDS memory operations of the
1067e8d8bef9SDimitry Andric       // same wave.
1068e8d8bef9SDimitry Andric       LGKMCnt |= IsCrossAddrSpaceOrdering;
1069e8d8bef9SDimitry Andric       break;
1070e8d8bef9SDimitry Andric     case SIAtomicScope::WORKGROUP:
1071e8d8bef9SDimitry Andric     case SIAtomicScope::WAVEFRONT:
1072e8d8bef9SDimitry Andric     case SIAtomicScope::SINGLETHREAD:
1073e8d8bef9SDimitry Andric       // The GDS keeps all memory operations in order for
1074e8d8bef9SDimitry Andric       // the same work-group.
1075e8d8bef9SDimitry Andric       break;
1076e8d8bef9SDimitry Andric     default:
1077e8d8bef9SDimitry Andric       llvm_unreachable("Unsupported synchronization scope");
1078e8d8bef9SDimitry Andric     }
1079e8d8bef9SDimitry Andric   }
1080e8d8bef9SDimitry Andric 
1081e8d8bef9SDimitry Andric   if (VMCnt || LGKMCnt) {
1082e8d8bef9SDimitry Andric     unsigned WaitCntImmediate =
1083e8d8bef9SDimitry Andric       AMDGPU::encodeWaitcnt(IV,
1084e8d8bef9SDimitry Andric                             VMCnt ? 0 : getVmcntBitMask(IV),
1085e8d8bef9SDimitry Andric                             getExpcntBitMask(IV),
1086e8d8bef9SDimitry Andric                             LGKMCnt ? 0 : getLgkmcntBitMask(IV));
10875f757f3fSDimitry Andric     BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_soft))
10885f757f3fSDimitry Andric         .addImm(WaitCntImmediate);
1089e8d8bef9SDimitry Andric     Changed = true;
1090e8d8bef9SDimitry Andric   }
1091e8d8bef9SDimitry Andric 
1092e8d8bef9SDimitry Andric   if (Pos == Position::AFTER)
1093e8d8bef9SDimitry Andric     --MI;
1094e8d8bef9SDimitry Andric 
1095e8d8bef9SDimitry Andric   return Changed;
1096e8d8bef9SDimitry Andric }
1097e8d8bef9SDimitry Andric 
insertAcquire(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,Position Pos) const1098e8d8bef9SDimitry Andric bool SIGfx6CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
10990b57cec5SDimitry Andric                                        SIAtomicScope Scope,
11000b57cec5SDimitry Andric                                        SIAtomicAddrSpace AddrSpace,
11010b57cec5SDimitry Andric                                        Position Pos) const {
11025ffd83dbSDimitry Andric   if (!InsertCacheInv)
11035ffd83dbSDimitry Andric     return false;
11045ffd83dbSDimitry Andric 
11050b57cec5SDimitry Andric   bool Changed = false;
11060b57cec5SDimitry Andric 
11070b57cec5SDimitry Andric   MachineBasicBlock &MBB = *MI->getParent();
11080b57cec5SDimitry Andric   DebugLoc DL = MI->getDebugLoc();
11090b57cec5SDimitry Andric 
11100b57cec5SDimitry Andric   if (Pos == Position::AFTER)
11110b57cec5SDimitry Andric     ++MI;
11120b57cec5SDimitry Andric 
11130b57cec5SDimitry Andric   if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
11140b57cec5SDimitry Andric     switch (Scope) {
11150b57cec5SDimitry Andric     case SIAtomicScope::SYSTEM:
11160b57cec5SDimitry Andric     case SIAtomicScope::AGENT:
11170b57cec5SDimitry Andric       BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBINVL1));
11180b57cec5SDimitry Andric       Changed = true;
11190b57cec5SDimitry Andric       break;
11200b57cec5SDimitry Andric     case SIAtomicScope::WORKGROUP:
11210b57cec5SDimitry Andric     case SIAtomicScope::WAVEFRONT:
11220b57cec5SDimitry Andric     case SIAtomicScope::SINGLETHREAD:
11230b57cec5SDimitry Andric       // No cache to invalidate.
11240b57cec5SDimitry Andric       break;
11250b57cec5SDimitry Andric     default:
11260b57cec5SDimitry Andric       llvm_unreachable("Unsupported synchronization scope");
11270b57cec5SDimitry Andric     }
11280b57cec5SDimitry Andric   }
11290b57cec5SDimitry Andric 
11300b57cec5SDimitry Andric   /// The scratch address space does not need the global memory cache
11310b57cec5SDimitry Andric   /// to be flushed as all memory operations by the same thread are
11320b57cec5SDimitry Andric   /// sequentially consistent, and no other thread can access scratch
11330b57cec5SDimitry Andric   /// memory.
11340b57cec5SDimitry Andric 
1135e8d8bef9SDimitry Andric   /// Other address spaces do not have a cache.
11360b57cec5SDimitry Andric 
11370b57cec5SDimitry Andric   if (Pos == Position::AFTER)
11380b57cec5SDimitry Andric     --MI;
11390b57cec5SDimitry Andric 
11400b57cec5SDimitry Andric   return Changed;
11410b57cec5SDimitry Andric }
11420b57cec5SDimitry Andric 
insertRelease(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,bool IsCrossAddrSpaceOrdering,Position Pos) const1143e8d8bef9SDimitry Andric bool SIGfx6CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
11440b57cec5SDimitry Andric                                        SIAtomicScope Scope,
11450b57cec5SDimitry Andric                                        SIAtomicAddrSpace AddrSpace,
11460b57cec5SDimitry Andric                                        bool IsCrossAddrSpaceOrdering,
11470b57cec5SDimitry Andric                                        Position Pos) const {
1148e8d8bef9SDimitry Andric   return insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
1149e8d8bef9SDimitry Andric                     IsCrossAddrSpaceOrdering, Pos);
11500b57cec5SDimitry Andric }
11510b57cec5SDimitry Andric 
insertAcquire(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,Position Pos) const1152e8d8bef9SDimitry Andric bool SIGfx7CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
11530b57cec5SDimitry Andric                                        SIAtomicScope Scope,
11540b57cec5SDimitry Andric                                        SIAtomicAddrSpace AddrSpace,
11550b57cec5SDimitry Andric                                        Position Pos) const {
11565ffd83dbSDimitry Andric   if (!InsertCacheInv)
11575ffd83dbSDimitry Andric     return false;
11585ffd83dbSDimitry Andric 
11590b57cec5SDimitry Andric   bool Changed = false;
11600b57cec5SDimitry Andric 
11610b57cec5SDimitry Andric   MachineBasicBlock &MBB = *MI->getParent();
11620b57cec5SDimitry Andric   DebugLoc DL = MI->getDebugLoc();
11630b57cec5SDimitry Andric 
11640b57cec5SDimitry Andric   const GCNSubtarget &STM = MBB.getParent()->getSubtarget<GCNSubtarget>();
11650b57cec5SDimitry Andric 
1166e8d8bef9SDimitry Andric   const unsigned InvalidateL1 = STM.isAmdPalOS() || STM.isMesa3DOS()
11670b57cec5SDimitry Andric                                     ? AMDGPU::BUFFER_WBINVL1
11680b57cec5SDimitry Andric                                     : AMDGPU::BUFFER_WBINVL1_VOL;
11690b57cec5SDimitry Andric 
11700b57cec5SDimitry Andric   if (Pos == Position::AFTER)
11710b57cec5SDimitry Andric     ++MI;
11720b57cec5SDimitry Andric 
11730b57cec5SDimitry Andric   if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
11740b57cec5SDimitry Andric     switch (Scope) {
11750b57cec5SDimitry Andric     case SIAtomicScope::SYSTEM:
11760b57cec5SDimitry Andric     case SIAtomicScope::AGENT:
1177e8d8bef9SDimitry Andric       BuildMI(MBB, MI, DL, TII->get(InvalidateL1));
11780b57cec5SDimitry Andric       Changed = true;
11790b57cec5SDimitry Andric       break;
11800b57cec5SDimitry Andric     case SIAtomicScope::WORKGROUP:
11810b57cec5SDimitry Andric     case SIAtomicScope::WAVEFRONT:
11820b57cec5SDimitry Andric     case SIAtomicScope::SINGLETHREAD:
11830b57cec5SDimitry Andric       // No cache to invalidate.
11840b57cec5SDimitry Andric       break;
11850b57cec5SDimitry Andric     default:
11860b57cec5SDimitry Andric       llvm_unreachable("Unsupported synchronization scope");
11870b57cec5SDimitry Andric     }
11880b57cec5SDimitry Andric   }
11890b57cec5SDimitry Andric 
11900b57cec5SDimitry Andric   /// The scratch address space does not need the global memory cache
11910b57cec5SDimitry Andric   /// to be flushed as all memory operations by the same thread are
11920b57cec5SDimitry Andric   /// sequentially consistent, and no other thread can access scratch
11930b57cec5SDimitry Andric   /// memory.
11940b57cec5SDimitry Andric 
1195e8d8bef9SDimitry Andric   /// Other address spaces do not have a cache.
11960b57cec5SDimitry Andric 
11970b57cec5SDimitry Andric   if (Pos == Position::AFTER)
11980b57cec5SDimitry Andric     --MI;
11990b57cec5SDimitry Andric 
12000b57cec5SDimitry Andric   return Changed;
12010b57cec5SDimitry Andric }
12020b57cec5SDimitry Andric 
enableLoadCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const1203fe6060f1SDimitry Andric bool SIGfx90ACacheControl::enableLoadCacheBypass(
1204fe6060f1SDimitry Andric     const MachineBasicBlock::iterator &MI,
1205fe6060f1SDimitry Andric     SIAtomicScope Scope,
1206fe6060f1SDimitry Andric     SIAtomicAddrSpace AddrSpace) const {
1207fe6060f1SDimitry Andric   assert(MI->mayLoad() && !MI->mayStore());
1208fe6060f1SDimitry Andric   bool Changed = false;
1209fe6060f1SDimitry Andric 
1210fe6060f1SDimitry Andric   if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1211fe6060f1SDimitry Andric     switch (Scope) {
1212fe6060f1SDimitry Andric     case SIAtomicScope::SYSTEM:
1213fe6060f1SDimitry Andric     case SIAtomicScope::AGENT:
12144824e7fdSDimitry Andric       // Set the L1 cache policy to MISS_LRU.
12154824e7fdSDimitry Andric       // Note: there is no L2 cache bypass policy at the ISA level.
1216fe6060f1SDimitry Andric       Changed |= enableGLCBit(MI);
1217fe6060f1SDimitry Andric       break;
1218fe6060f1SDimitry Andric     case SIAtomicScope::WORKGROUP:
1219fe6060f1SDimitry Andric       // In threadgroup split mode the waves of a work-group can be executing on
1220fe6060f1SDimitry Andric       // different CUs. Therefore need to bypass the L1 which is per CU.
1221fe6060f1SDimitry Andric       // Otherwise in non-threadgroup split mode all waves of a work-group are
1222fe6060f1SDimitry Andric       // on the same CU, and so the L1 does not need to be bypassed.
1223349cc55cSDimitry Andric       if (ST.isTgSplitEnabled())
1224349cc55cSDimitry Andric         Changed |= enableGLCBit(MI);
1225fe6060f1SDimitry Andric       break;
1226fe6060f1SDimitry Andric     case SIAtomicScope::WAVEFRONT:
1227fe6060f1SDimitry Andric     case SIAtomicScope::SINGLETHREAD:
1228fe6060f1SDimitry Andric       // No cache to bypass.
1229fe6060f1SDimitry Andric       break;
1230fe6060f1SDimitry Andric     default:
1231fe6060f1SDimitry Andric       llvm_unreachable("Unsupported synchronization scope");
1232fe6060f1SDimitry Andric     }
1233fe6060f1SDimitry Andric   }
1234fe6060f1SDimitry Andric 
1235fe6060f1SDimitry Andric   /// The scratch address space does not need the global memory caches
1236fe6060f1SDimitry Andric   /// to be bypassed as all memory operations by the same thread are
1237fe6060f1SDimitry Andric   /// sequentially consistent, and no other thread can access scratch
1238fe6060f1SDimitry Andric   /// memory.
1239fe6060f1SDimitry Andric 
1240fe6060f1SDimitry Andric   /// Other address spaces do not have a cache.
1241fe6060f1SDimitry Andric 
1242fe6060f1SDimitry Andric   return Changed;
1243fe6060f1SDimitry Andric }
1244fe6060f1SDimitry Andric 
enableStoreCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const1245fe6060f1SDimitry Andric bool SIGfx90ACacheControl::enableStoreCacheBypass(
1246fe6060f1SDimitry Andric     const MachineBasicBlock::iterator &MI,
1247fe6060f1SDimitry Andric     SIAtomicScope Scope,
1248fe6060f1SDimitry Andric     SIAtomicAddrSpace AddrSpace) const {
1249fe6060f1SDimitry Andric   assert(!MI->mayLoad() && MI->mayStore());
1250fe6060f1SDimitry Andric   bool Changed = false;
1251fe6060f1SDimitry Andric 
1252fe6060f1SDimitry Andric   if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1253fe6060f1SDimitry Andric     switch (Scope) {
1254fe6060f1SDimitry Andric     case SIAtomicScope::SYSTEM:
1255fe6060f1SDimitry Andric     case SIAtomicScope::AGENT:
1256fe6060f1SDimitry Andric       /// Do not set glc for store atomic operations as they implicitly write
1257fe6060f1SDimitry Andric       /// through the L1 cache.
1258fe6060f1SDimitry Andric       break;
1259fe6060f1SDimitry Andric     case SIAtomicScope::WORKGROUP:
1260fe6060f1SDimitry Andric     case SIAtomicScope::WAVEFRONT:
1261fe6060f1SDimitry Andric     case SIAtomicScope::SINGLETHREAD:
1262fe6060f1SDimitry Andric       // No cache to bypass. Store atomics implicitly write through the L1
1263fe6060f1SDimitry Andric       // cache.
1264fe6060f1SDimitry Andric       break;
1265fe6060f1SDimitry Andric     default:
1266fe6060f1SDimitry Andric       llvm_unreachable("Unsupported synchronization scope");
1267fe6060f1SDimitry Andric     }
1268fe6060f1SDimitry Andric   }
1269fe6060f1SDimitry Andric 
1270fe6060f1SDimitry Andric   /// The scratch address space does not need the global memory caches
1271fe6060f1SDimitry Andric   /// to be bypassed as all memory operations by the same thread are
1272fe6060f1SDimitry Andric   /// sequentially consistent, and no other thread can access scratch
1273fe6060f1SDimitry Andric   /// memory.
1274fe6060f1SDimitry Andric 
1275fe6060f1SDimitry Andric   /// Other address spaces do not have a cache.
1276fe6060f1SDimitry Andric 
1277fe6060f1SDimitry Andric   return Changed;
1278fe6060f1SDimitry Andric }
1279fe6060f1SDimitry Andric 
enableRMWCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const1280fe6060f1SDimitry Andric bool SIGfx90ACacheControl::enableRMWCacheBypass(
1281fe6060f1SDimitry Andric     const MachineBasicBlock::iterator &MI,
1282fe6060f1SDimitry Andric     SIAtomicScope Scope,
1283fe6060f1SDimitry Andric     SIAtomicAddrSpace AddrSpace) const {
1284fe6060f1SDimitry Andric   assert(MI->mayLoad() && MI->mayStore());
1285fe6060f1SDimitry Andric   bool Changed = false;
1286fe6060f1SDimitry Andric 
1287fe6060f1SDimitry Andric   if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1288fe6060f1SDimitry Andric     switch (Scope) {
1289fe6060f1SDimitry Andric     case SIAtomicScope::SYSTEM:
1290fe6060f1SDimitry Andric     case SIAtomicScope::AGENT:
1291fe6060f1SDimitry Andric       /// Do not set glc for RMW atomic operations as they implicitly bypass
1292fe6060f1SDimitry Andric       /// the L1 cache, and the glc bit is instead used to indicate if they are
1293fe6060f1SDimitry Andric       /// return or no-return.
1294fe6060f1SDimitry Andric       break;
1295fe6060f1SDimitry Andric     case SIAtomicScope::WORKGROUP:
1296fe6060f1SDimitry Andric     case SIAtomicScope::WAVEFRONT:
1297fe6060f1SDimitry Andric     case SIAtomicScope::SINGLETHREAD:
1298fe6060f1SDimitry Andric       // No cache to bypass. RMW atomics implicitly bypass the L1 cache.
1299fe6060f1SDimitry Andric       break;
1300fe6060f1SDimitry Andric     default:
1301fe6060f1SDimitry Andric       llvm_unreachable("Unsupported synchronization scope");
1302fe6060f1SDimitry Andric     }
1303fe6060f1SDimitry Andric   }
1304fe6060f1SDimitry Andric 
1305fe6060f1SDimitry Andric   return Changed;
1306fe6060f1SDimitry Andric }
1307fe6060f1SDimitry Andric 
enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator & MI,SIAtomicAddrSpace AddrSpace,SIMemOp Op,bool IsVolatile,bool IsNonTemporal) const1308fe6060f1SDimitry Andric bool SIGfx90ACacheControl::enableVolatileAndOrNonTemporal(
1309fe6060f1SDimitry Andric     MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
1310fe6060f1SDimitry Andric     bool IsVolatile, bool IsNonTemporal) const {
1311fe6060f1SDimitry Andric   // Only handle load and store, not atomic read-modify-write insructions. The
1312fe6060f1SDimitry Andric   // latter use glc to indicate if the atomic returns a result and so must not
1313fe6060f1SDimitry Andric   // be used for cache control.
1314fe6060f1SDimitry Andric   assert(MI->mayLoad() ^ MI->mayStore());
1315fe6060f1SDimitry Andric 
1316fe6060f1SDimitry Andric   // Only update load and store, not LLVM IR atomic read-modify-write
1317fe6060f1SDimitry Andric   // instructions. The latter are always marked as volatile so cannot sensibly
1318fe6060f1SDimitry Andric   // handle it as do not want to pessimize all atomics. Also they do not support
1319fe6060f1SDimitry Andric   // the nontemporal attribute.
1320fe6060f1SDimitry Andric   assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
1321fe6060f1SDimitry Andric 
1322fe6060f1SDimitry Andric   bool Changed = false;
1323fe6060f1SDimitry Andric 
1324fe6060f1SDimitry Andric   if (IsVolatile) {
13254824e7fdSDimitry Andric     // Set L1 cache policy to be MISS_EVICT for load instructions
13264824e7fdSDimitry Andric     // and MISS_LRU for store instructions.
13274824e7fdSDimitry Andric     // Note: there is no L2 cache bypass policy at the ISA level.
1328349cc55cSDimitry Andric     if (Op == SIMemOp::LOAD)
1329fe6060f1SDimitry Andric       Changed |= enableGLCBit(MI);
1330fe6060f1SDimitry Andric 
1331fe6060f1SDimitry Andric     // Ensure operation has completed at system scope to cause all volatile
1332fe6060f1SDimitry Andric     // operations to be visible outside the program in a global order. Do not
1333fe6060f1SDimitry Andric     // request cross address space as only the global address space can be
1334fe6060f1SDimitry Andric     // observable outside the program, so no need to cause a waitcnt for LDS
1335fe6060f1SDimitry Andric     // address space operations.
1336fe6060f1SDimitry Andric     Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
1337fe6060f1SDimitry Andric                           Position::AFTER);
1338fe6060f1SDimitry Andric 
1339fe6060f1SDimitry Andric     return Changed;
1340fe6060f1SDimitry Andric   }
1341fe6060f1SDimitry Andric 
1342fe6060f1SDimitry Andric   if (IsNonTemporal) {
13434824e7fdSDimitry Andric     // Setting both GLC and SLC configures L1 cache policy to MISS_EVICT
13444824e7fdSDimitry Andric     // for both loads and stores, and the L2 cache policy to STREAM.
1345fe6060f1SDimitry Andric     Changed |= enableGLCBit(MI);
1346fe6060f1SDimitry Andric     Changed |= enableSLCBit(MI);
1347fe6060f1SDimitry Andric     return Changed;
1348fe6060f1SDimitry Andric   }
1349fe6060f1SDimitry Andric 
1350fe6060f1SDimitry Andric   return Changed;
1351fe6060f1SDimitry Andric }
1352fe6060f1SDimitry Andric 
insertWait(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,SIMemOp Op,bool IsCrossAddrSpaceOrdering,Position Pos) const1353fe6060f1SDimitry Andric bool SIGfx90ACacheControl::insertWait(MachineBasicBlock::iterator &MI,
1354fe6060f1SDimitry Andric                                       SIAtomicScope Scope,
1355fe6060f1SDimitry Andric                                       SIAtomicAddrSpace AddrSpace,
1356fe6060f1SDimitry Andric                                       SIMemOp Op,
1357fe6060f1SDimitry Andric                                       bool IsCrossAddrSpaceOrdering,
1358fe6060f1SDimitry Andric                                       Position Pos) const {
1359fe6060f1SDimitry Andric   if (ST.isTgSplitEnabled()) {
1360fe6060f1SDimitry Andric     // In threadgroup split mode the waves of a work-group can be executing on
1361fe6060f1SDimitry Andric     // different CUs. Therefore need to wait for global or GDS memory operations
1362fe6060f1SDimitry Andric     // to complete to ensure they are visible to waves in the other CUs.
1363fe6060f1SDimitry Andric     // Otherwise in non-threadgroup split mode all waves of a work-group are on
1364fe6060f1SDimitry Andric     // the same CU, so no need to wait for global memory as all waves in the
1365fe6060f1SDimitry Andric     // work-group access the same the L1, nor wait for GDS as access are ordered
1366fe6060f1SDimitry Andric     // on a CU.
1367fe6060f1SDimitry Andric     if (((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH |
1368fe6060f1SDimitry Andric                        SIAtomicAddrSpace::GDS)) != SIAtomicAddrSpace::NONE) &&
1369fe6060f1SDimitry Andric         (Scope == SIAtomicScope::WORKGROUP)) {
1370fe6060f1SDimitry Andric       // Same as GFX7 using agent scope.
1371fe6060f1SDimitry Andric       Scope = SIAtomicScope::AGENT;
1372fe6060f1SDimitry Andric     }
1373fe6060f1SDimitry Andric     // In threadgroup split mode LDS cannot be allocated so no need to wait for
1374fe6060f1SDimitry Andric     // LDS memory operations.
1375fe6060f1SDimitry Andric     AddrSpace &= ~SIAtomicAddrSpace::LDS;
1376fe6060f1SDimitry Andric   }
1377fe6060f1SDimitry Andric   return SIGfx7CacheControl::insertWait(MI, Scope, AddrSpace, Op,
1378fe6060f1SDimitry Andric                                         IsCrossAddrSpaceOrdering, Pos);
1379fe6060f1SDimitry Andric }
1380fe6060f1SDimitry Andric 
insertAcquire(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,Position Pos) const1381fe6060f1SDimitry Andric bool SIGfx90ACacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
1382fe6060f1SDimitry Andric                                          SIAtomicScope Scope,
1383fe6060f1SDimitry Andric                                          SIAtomicAddrSpace AddrSpace,
1384fe6060f1SDimitry Andric                                          Position Pos) const {
1385fe6060f1SDimitry Andric   if (!InsertCacheInv)
1386fe6060f1SDimitry Andric     return false;
1387fe6060f1SDimitry Andric 
1388fe6060f1SDimitry Andric   bool Changed = false;
1389fe6060f1SDimitry Andric 
1390fe6060f1SDimitry Andric   MachineBasicBlock &MBB = *MI->getParent();
1391fe6060f1SDimitry Andric   DebugLoc DL = MI->getDebugLoc();
1392fe6060f1SDimitry Andric 
1393fe6060f1SDimitry Andric   if (Pos == Position::AFTER)
1394fe6060f1SDimitry Andric     ++MI;
1395fe6060f1SDimitry Andric 
1396fe6060f1SDimitry Andric   if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1397fe6060f1SDimitry Andric     switch (Scope) {
1398fe6060f1SDimitry Andric     case SIAtomicScope::SYSTEM:
1399fe6060f1SDimitry Andric       // Ensures that following loads will not see stale remote VMEM data or
1400fe6060f1SDimitry Andric       // stale local VMEM data with MTYPE NC. Local VMEM data with MTYPE RW and
1401fe6060f1SDimitry Andric       // CC will never be stale due to the local memory probes.
1402fe6060f1SDimitry Andric       BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_INVL2));
1403fe6060f1SDimitry Andric       // Inserting a "S_WAITCNT vmcnt(0)" after is not required because the
1404fe6060f1SDimitry Andric       // hardware does not reorder memory operations by the same wave with
1405fe6060f1SDimitry Andric       // respect to a preceding "BUFFER_INVL2". The invalidate is guaranteed to
1406fe6060f1SDimitry Andric       // remove any cache lines of earlier writes by the same wave and ensures
1407fe6060f1SDimitry Andric       // later reads by the same wave will refetch the cache lines.
1408fe6060f1SDimitry Andric       Changed = true;
1409fe6060f1SDimitry Andric       break;
1410fe6060f1SDimitry Andric     case SIAtomicScope::AGENT:
1411fe6060f1SDimitry Andric       // Same as GFX7.
1412fe6060f1SDimitry Andric       break;
1413fe6060f1SDimitry Andric     case SIAtomicScope::WORKGROUP:
1414fe6060f1SDimitry Andric       // In threadgroup split mode the waves of a work-group can be executing on
1415fe6060f1SDimitry Andric       // different CUs. Therefore need to invalidate the L1 which is per CU.
1416fe6060f1SDimitry Andric       // Otherwise in non-threadgroup split mode all waves of a work-group are
1417fe6060f1SDimitry Andric       // on the same CU, and so the L1 does not need to be invalidated.
1418fe6060f1SDimitry Andric       if (ST.isTgSplitEnabled()) {
1419fe6060f1SDimitry Andric         // Same as GFX7 using agent scope.
1420fe6060f1SDimitry Andric         Scope = SIAtomicScope::AGENT;
1421fe6060f1SDimitry Andric       }
1422fe6060f1SDimitry Andric       break;
1423fe6060f1SDimitry Andric     case SIAtomicScope::WAVEFRONT:
1424fe6060f1SDimitry Andric     case SIAtomicScope::SINGLETHREAD:
1425fe6060f1SDimitry Andric       // Same as GFX7.
1426fe6060f1SDimitry Andric       break;
1427fe6060f1SDimitry Andric     default:
1428fe6060f1SDimitry Andric       llvm_unreachable("Unsupported synchronization scope");
1429fe6060f1SDimitry Andric     }
1430fe6060f1SDimitry Andric   }
1431fe6060f1SDimitry Andric 
1432fe6060f1SDimitry Andric   /// The scratch address space does not need the global memory cache
1433fe6060f1SDimitry Andric   /// to be flushed as all memory operations by the same thread are
1434fe6060f1SDimitry Andric   /// sequentially consistent, and no other thread can access scratch
1435fe6060f1SDimitry Andric   /// memory.
1436fe6060f1SDimitry Andric 
1437fe6060f1SDimitry Andric   /// Other address spaces do not have a cache.
1438fe6060f1SDimitry Andric 
1439fe6060f1SDimitry Andric   if (Pos == Position::AFTER)
1440fe6060f1SDimitry Andric     --MI;
1441fe6060f1SDimitry Andric 
1442fe6060f1SDimitry Andric   Changed |= SIGfx7CacheControl::insertAcquire(MI, Scope, AddrSpace, Pos);
1443fe6060f1SDimitry Andric 
1444fe6060f1SDimitry Andric   return Changed;
1445fe6060f1SDimitry Andric }
1446fe6060f1SDimitry Andric 
insertRelease(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,bool IsCrossAddrSpaceOrdering,Position Pos) const1447fe6060f1SDimitry Andric bool SIGfx90ACacheControl::insertRelease(MachineBasicBlock::iterator &MI,
1448fe6060f1SDimitry Andric                                          SIAtomicScope Scope,
1449fe6060f1SDimitry Andric                                          SIAtomicAddrSpace AddrSpace,
1450fe6060f1SDimitry Andric                                          bool IsCrossAddrSpaceOrdering,
1451fe6060f1SDimitry Andric                                          Position Pos) const {
1452fe6060f1SDimitry Andric   bool Changed = false;
1453fe6060f1SDimitry Andric 
1454fe6060f1SDimitry Andric   MachineBasicBlock &MBB = *MI->getParent();
14551db9f3b2SDimitry Andric   const DebugLoc &DL = MI->getDebugLoc();
1456fe6060f1SDimitry Andric 
1457fe6060f1SDimitry Andric   if (Pos == Position::AFTER)
1458fe6060f1SDimitry Andric     ++MI;
1459fe6060f1SDimitry Andric 
1460fe6060f1SDimitry Andric   if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1461fe6060f1SDimitry Andric     switch (Scope) {
1462fe6060f1SDimitry Andric     case SIAtomicScope::SYSTEM:
1463fe6060f1SDimitry Andric       // Inserting a "S_WAITCNT vmcnt(0)" before is not required because the
1464fe6060f1SDimitry Andric       // hardware does not reorder memory operations by the same wave with
1465fe6060f1SDimitry Andric       // respect to a following "BUFFER_WBL2". The "BUFFER_WBL2" is guaranteed
1466fe6060f1SDimitry Andric       // to initiate writeback of any dirty cache lines of earlier writes by the
1467fe6060f1SDimitry Andric       // same wave. A "S_WAITCNT vmcnt(0)" is needed after to ensure the
1468fe6060f1SDimitry Andric       // writeback has completed.
146981ad6265SDimitry Andric       BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2))
147081ad6265SDimitry Andric         // Set SC bits to indicate system scope.
147181ad6265SDimitry Andric         .addImm(AMDGPU::CPol::SC0 | AMDGPU::CPol::SC1);
1472fe6060f1SDimitry Andric       // Followed by same as GFX7, which will ensure the necessary "S_WAITCNT
1473fe6060f1SDimitry Andric       // vmcnt(0)" needed by the "BUFFER_WBL2".
1474fe6060f1SDimitry Andric       Changed = true;
1475fe6060f1SDimitry Andric       break;
1476fe6060f1SDimitry Andric     case SIAtomicScope::AGENT:
1477fe6060f1SDimitry Andric     case SIAtomicScope::WORKGROUP:
1478fe6060f1SDimitry Andric     case SIAtomicScope::WAVEFRONT:
1479fe6060f1SDimitry Andric     case SIAtomicScope::SINGLETHREAD:
1480fe6060f1SDimitry Andric       // Same as GFX7.
1481fe6060f1SDimitry Andric       break;
1482fe6060f1SDimitry Andric     default:
1483fe6060f1SDimitry Andric       llvm_unreachable("Unsupported synchronization scope");
1484fe6060f1SDimitry Andric     }
1485fe6060f1SDimitry Andric   }
1486fe6060f1SDimitry Andric 
1487fe6060f1SDimitry Andric   if (Pos == Position::AFTER)
1488fe6060f1SDimitry Andric     --MI;
1489fe6060f1SDimitry Andric 
1490fe6060f1SDimitry Andric   Changed |=
1491fe6060f1SDimitry Andric       SIGfx7CacheControl::insertRelease(MI, Scope, AddrSpace,
1492fe6060f1SDimitry Andric                                         IsCrossAddrSpaceOrdering, Pos);
1493fe6060f1SDimitry Andric 
1494fe6060f1SDimitry Andric   return Changed;
1495fe6060f1SDimitry Andric }
1496fe6060f1SDimitry Andric 
enableLoadCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const149781ad6265SDimitry Andric bool SIGfx940CacheControl::enableLoadCacheBypass(
149881ad6265SDimitry Andric     const MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
149981ad6265SDimitry Andric     SIAtomicAddrSpace AddrSpace) const {
150081ad6265SDimitry Andric   assert(MI->mayLoad() && !MI->mayStore());
150181ad6265SDimitry Andric   bool Changed = false;
150281ad6265SDimitry Andric 
150381ad6265SDimitry Andric   if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
150481ad6265SDimitry Andric     switch (Scope) {
150581ad6265SDimitry Andric     case SIAtomicScope::SYSTEM:
150681ad6265SDimitry Andric       // Set SC bits to indicate system scope.
150781ad6265SDimitry Andric       Changed |= enableSC0Bit(MI);
150881ad6265SDimitry Andric       Changed |= enableSC1Bit(MI);
150981ad6265SDimitry Andric       break;
151081ad6265SDimitry Andric     case SIAtomicScope::AGENT:
151181ad6265SDimitry Andric       // Set SC bits to indicate agent scope.
151281ad6265SDimitry Andric       Changed |= enableSC1Bit(MI);
151381ad6265SDimitry Andric       break;
151481ad6265SDimitry Andric     case SIAtomicScope::WORKGROUP:
151581ad6265SDimitry Andric       // In threadgroup split mode the waves of a work-group can be executing on
151681ad6265SDimitry Andric       // different CUs. Therefore need to bypass the L1 which is per CU.
151781ad6265SDimitry Andric       // Otherwise in non-threadgroup split mode all waves of a work-group are
151881ad6265SDimitry Andric       // on the same CU, and so the L1 does not need to be bypassed. Setting SC
151981ad6265SDimitry Andric       // bits to indicate work-group scope will do this automatically.
152081ad6265SDimitry Andric       Changed |= enableSC0Bit(MI);
152181ad6265SDimitry Andric       break;
152281ad6265SDimitry Andric     case SIAtomicScope::WAVEFRONT:
152381ad6265SDimitry Andric     case SIAtomicScope::SINGLETHREAD:
152481ad6265SDimitry Andric       // Leave SC bits unset to indicate wavefront scope.
152581ad6265SDimitry Andric       break;
152681ad6265SDimitry Andric     default:
152781ad6265SDimitry Andric       llvm_unreachable("Unsupported synchronization scope");
152881ad6265SDimitry Andric     }
152981ad6265SDimitry Andric   }
153081ad6265SDimitry Andric 
153181ad6265SDimitry Andric   /// The scratch address space does not need the global memory caches
153281ad6265SDimitry Andric   /// to be bypassed as all memory operations by the same thread are
153381ad6265SDimitry Andric   /// sequentially consistent, and no other thread can access scratch
153481ad6265SDimitry Andric   /// memory.
153581ad6265SDimitry Andric 
153681ad6265SDimitry Andric   /// Other address spaces do not have a cache.
153781ad6265SDimitry Andric 
153881ad6265SDimitry Andric   return Changed;
153981ad6265SDimitry Andric }
154081ad6265SDimitry Andric 
enableStoreCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const154181ad6265SDimitry Andric bool SIGfx940CacheControl::enableStoreCacheBypass(
154281ad6265SDimitry Andric     const MachineBasicBlock::iterator &MI,
154381ad6265SDimitry Andric     SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace) const {
154481ad6265SDimitry Andric   assert(!MI->mayLoad() && MI->mayStore());
154581ad6265SDimitry Andric   bool Changed = false;
154681ad6265SDimitry Andric 
154781ad6265SDimitry Andric   if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
154881ad6265SDimitry Andric     switch (Scope) {
154981ad6265SDimitry Andric     case SIAtomicScope::SYSTEM:
155081ad6265SDimitry Andric       // Set SC bits to indicate system scope.
155181ad6265SDimitry Andric       Changed |= enableSC0Bit(MI);
155281ad6265SDimitry Andric       Changed |= enableSC1Bit(MI);
155381ad6265SDimitry Andric       break;
155481ad6265SDimitry Andric     case SIAtomicScope::AGENT:
155581ad6265SDimitry Andric       // Set SC bits to indicate agent scope.
155681ad6265SDimitry Andric       Changed |= enableSC1Bit(MI);
155781ad6265SDimitry Andric       break;
155881ad6265SDimitry Andric     case SIAtomicScope::WORKGROUP:
155981ad6265SDimitry Andric       // Set SC bits to indicate workgroup scope.
156081ad6265SDimitry Andric       Changed |= enableSC0Bit(MI);
156181ad6265SDimitry Andric       break;
156281ad6265SDimitry Andric     case SIAtomicScope::WAVEFRONT:
156381ad6265SDimitry Andric     case SIAtomicScope::SINGLETHREAD:
156481ad6265SDimitry Andric       // Leave SC bits unset to indicate wavefront scope.
156581ad6265SDimitry Andric       break;
156681ad6265SDimitry Andric     default:
156781ad6265SDimitry Andric       llvm_unreachable("Unsupported synchronization scope");
156881ad6265SDimitry Andric     }
156981ad6265SDimitry Andric   }
157081ad6265SDimitry Andric 
157181ad6265SDimitry Andric   /// The scratch address space does not need the global memory caches
157281ad6265SDimitry Andric   /// to be bypassed as all memory operations by the same thread are
157381ad6265SDimitry Andric   /// sequentially consistent, and no other thread can access scratch
157481ad6265SDimitry Andric   /// memory.
157581ad6265SDimitry Andric 
157681ad6265SDimitry Andric   /// Other address spaces do not have a cache.
157781ad6265SDimitry Andric 
157881ad6265SDimitry Andric   return Changed;
157981ad6265SDimitry Andric }
158081ad6265SDimitry Andric 
enableRMWCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const158181ad6265SDimitry Andric bool SIGfx940CacheControl::enableRMWCacheBypass(
158281ad6265SDimitry Andric     const MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
158381ad6265SDimitry Andric     SIAtomicAddrSpace AddrSpace) const {
158481ad6265SDimitry Andric   assert(MI->mayLoad() && MI->mayStore());
158581ad6265SDimitry Andric   bool Changed = false;
158681ad6265SDimitry Andric 
158781ad6265SDimitry Andric   if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
158881ad6265SDimitry Andric     switch (Scope) {
158981ad6265SDimitry Andric     case SIAtomicScope::SYSTEM:
159081ad6265SDimitry Andric       // Set SC1 bit to indicate system scope.
159181ad6265SDimitry Andric       Changed |= enableSC1Bit(MI);
159281ad6265SDimitry Andric       break;
159381ad6265SDimitry Andric     case SIAtomicScope::AGENT:
159481ad6265SDimitry Andric     case SIAtomicScope::WORKGROUP:
159581ad6265SDimitry Andric     case SIAtomicScope::WAVEFRONT:
159681ad6265SDimitry Andric     case SIAtomicScope::SINGLETHREAD:
159781ad6265SDimitry Andric       // RMW atomic operations implicitly bypass the L1 cache and only use SC1
159881ad6265SDimitry Andric       // to indicate system or agent scope. The SC0 bit is used to indicate if
159981ad6265SDimitry Andric       // they are return or no-return. Leave SC1 bit unset to indicate agent
160081ad6265SDimitry Andric       // scope.
160181ad6265SDimitry Andric       break;
160281ad6265SDimitry Andric     default:
160381ad6265SDimitry Andric       llvm_unreachable("Unsupported synchronization scope");
160481ad6265SDimitry Andric     }
160581ad6265SDimitry Andric   }
160681ad6265SDimitry Andric 
160781ad6265SDimitry Andric   return Changed;
160881ad6265SDimitry Andric }
160981ad6265SDimitry Andric 
enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator & MI,SIAtomicAddrSpace AddrSpace,SIMemOp Op,bool IsVolatile,bool IsNonTemporal) const161081ad6265SDimitry Andric bool SIGfx940CacheControl::enableVolatileAndOrNonTemporal(
161181ad6265SDimitry Andric     MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
161281ad6265SDimitry Andric     bool IsVolatile, bool IsNonTemporal) const {
161381ad6265SDimitry Andric   // Only handle load and store, not atomic read-modify-write insructions. The
161481ad6265SDimitry Andric   // latter use glc to indicate if the atomic returns a result and so must not
161581ad6265SDimitry Andric   // be used for cache control.
161681ad6265SDimitry Andric   assert(MI->mayLoad() ^ MI->mayStore());
161781ad6265SDimitry Andric 
161881ad6265SDimitry Andric   // Only update load and store, not LLVM IR atomic read-modify-write
161981ad6265SDimitry Andric   // instructions. The latter are always marked as volatile so cannot sensibly
162081ad6265SDimitry Andric   // handle it as do not want to pessimize all atomics. Also they do not support
162181ad6265SDimitry Andric   // the nontemporal attribute.
162281ad6265SDimitry Andric   assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
162381ad6265SDimitry Andric 
162481ad6265SDimitry Andric   bool Changed = false;
162581ad6265SDimitry Andric 
162681ad6265SDimitry Andric   if (IsVolatile) {
162781ad6265SDimitry Andric     // Set SC bits to indicate system scope.
162881ad6265SDimitry Andric     Changed |= enableSC0Bit(MI);
162981ad6265SDimitry Andric     Changed |= enableSC1Bit(MI);
163081ad6265SDimitry Andric 
163181ad6265SDimitry Andric     // Ensure operation has completed at system scope to cause all volatile
163281ad6265SDimitry Andric     // operations to be visible outside the program in a global order. Do not
163381ad6265SDimitry Andric     // request cross address space as only the global address space can be
163481ad6265SDimitry Andric     // observable outside the program, so no need to cause a waitcnt for LDS
163581ad6265SDimitry Andric     // address space operations.
163681ad6265SDimitry Andric     Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
163781ad6265SDimitry Andric                           Position::AFTER);
163881ad6265SDimitry Andric 
163981ad6265SDimitry Andric     return Changed;
164081ad6265SDimitry Andric   }
164181ad6265SDimitry Andric 
164281ad6265SDimitry Andric   if (IsNonTemporal) {
164381ad6265SDimitry Andric     Changed |= enableNTBit(MI);
164481ad6265SDimitry Andric     return Changed;
164581ad6265SDimitry Andric   }
164681ad6265SDimitry Andric 
164781ad6265SDimitry Andric   return Changed;
164881ad6265SDimitry Andric }
164981ad6265SDimitry Andric 
insertAcquire(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,Position Pos) const165081ad6265SDimitry Andric bool SIGfx940CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
165181ad6265SDimitry Andric                                          SIAtomicScope Scope,
165281ad6265SDimitry Andric                                          SIAtomicAddrSpace AddrSpace,
165381ad6265SDimitry Andric                                          Position Pos) const {
165481ad6265SDimitry Andric   if (!InsertCacheInv)
165581ad6265SDimitry Andric     return false;
165681ad6265SDimitry Andric 
165781ad6265SDimitry Andric   bool Changed = false;
165881ad6265SDimitry Andric 
165981ad6265SDimitry Andric   MachineBasicBlock &MBB = *MI->getParent();
166081ad6265SDimitry Andric   DebugLoc DL = MI->getDebugLoc();
166181ad6265SDimitry Andric 
166281ad6265SDimitry Andric   if (Pos == Position::AFTER)
166381ad6265SDimitry Andric     ++MI;
166481ad6265SDimitry Andric 
166581ad6265SDimitry Andric   if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
166681ad6265SDimitry Andric     switch (Scope) {
166781ad6265SDimitry Andric     case SIAtomicScope::SYSTEM:
166881ad6265SDimitry Andric       // Ensures that following loads will not see stale remote VMEM data or
166981ad6265SDimitry Andric       // stale local VMEM data with MTYPE NC. Local VMEM data with MTYPE RW and
167081ad6265SDimitry Andric       // CC will never be stale due to the local memory probes.
167181ad6265SDimitry Andric       BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_INV))
167281ad6265SDimitry Andric           // Set SC bits to indicate system scope.
167381ad6265SDimitry Andric           .addImm(AMDGPU::CPol::SC0 | AMDGPU::CPol::SC1);
167481ad6265SDimitry Andric       // Inserting a "S_WAITCNT vmcnt(0)" after is not required because the
167581ad6265SDimitry Andric       // hardware does not reorder memory operations by the same wave with
167681ad6265SDimitry Andric       // respect to a preceding "BUFFER_INV". The invalidate is guaranteed to
167781ad6265SDimitry Andric       // remove any cache lines of earlier writes by the same wave and ensures
167881ad6265SDimitry Andric       // later reads by the same wave will refetch the cache lines.
167981ad6265SDimitry Andric       Changed = true;
168081ad6265SDimitry Andric       break;
168181ad6265SDimitry Andric     case SIAtomicScope::AGENT:
168281ad6265SDimitry Andric       // Ensures that following loads will not see stale remote date or local
168381ad6265SDimitry Andric       // MTYPE NC global data. Local MTYPE RW and CC memory will never be stale
168481ad6265SDimitry Andric       // due to the memory probes.
168581ad6265SDimitry Andric       BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_INV))
168681ad6265SDimitry Andric           // Set SC bits to indicate agent scope.
168781ad6265SDimitry Andric           .addImm(AMDGPU::CPol::SC1);
168881ad6265SDimitry Andric       // Inserting "S_WAITCNT vmcnt(0)" is not required because the hardware
168981ad6265SDimitry Andric       // does not reorder memory operations with respect to preceeding buffer
169081ad6265SDimitry Andric       // invalidate. The invalidate is guaranteed to remove any cache lines of
169181ad6265SDimitry Andric       // earlier writes and ensures later writes will refetch the cache lines.
169281ad6265SDimitry Andric       Changed = true;
169381ad6265SDimitry Andric       break;
169481ad6265SDimitry Andric     case SIAtomicScope::WORKGROUP:
169581ad6265SDimitry Andric       // In threadgroup split mode the waves of a work-group can be executing on
169681ad6265SDimitry Andric       // different CUs. Therefore need to invalidate the L1 which is per CU.
169781ad6265SDimitry Andric       // Otherwise in non-threadgroup split mode all waves of a work-group are
169881ad6265SDimitry Andric       // on the same CU, and so the L1 does not need to be invalidated.
169981ad6265SDimitry Andric       if (ST.isTgSplitEnabled()) {
170081ad6265SDimitry Andric         // Ensures L1 is invalidated if in threadgroup split mode. In
170181ad6265SDimitry Andric         // non-threadgroup split mode it is a NOP, but no point generating it in
170281ad6265SDimitry Andric         // that case if know not in that mode.
170381ad6265SDimitry Andric         BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_INV))
170481ad6265SDimitry Andric             // Set SC bits to indicate work-group scope.
170581ad6265SDimitry Andric             .addImm(AMDGPU::CPol::SC0);
170681ad6265SDimitry Andric         // Inserting "S_WAITCNT vmcnt(0)" is not required because the hardware
170781ad6265SDimitry Andric         // does not reorder memory operations with respect to preceeding buffer
170881ad6265SDimitry Andric         // invalidate. The invalidate is guaranteed to remove any cache lines of
170981ad6265SDimitry Andric         // earlier writes and ensures later writes will refetch the cache lines.
171081ad6265SDimitry Andric         Changed = true;
171181ad6265SDimitry Andric       }
171281ad6265SDimitry Andric       break;
171381ad6265SDimitry Andric     case SIAtomicScope::WAVEFRONT:
171481ad6265SDimitry Andric     case SIAtomicScope::SINGLETHREAD:
171581ad6265SDimitry Andric       // Could generate "BUFFER_INV" but it would do nothing as there are no
171681ad6265SDimitry Andric       // caches to invalidate.
171781ad6265SDimitry Andric       break;
171881ad6265SDimitry Andric     default:
171981ad6265SDimitry Andric       llvm_unreachable("Unsupported synchronization scope");
172081ad6265SDimitry Andric     }
172181ad6265SDimitry Andric   }
172281ad6265SDimitry Andric 
172381ad6265SDimitry Andric   /// The scratch address space does not need the global memory cache
172481ad6265SDimitry Andric   /// to be flushed as all memory operations by the same thread are
172581ad6265SDimitry Andric   /// sequentially consistent, and no other thread can access scratch
172681ad6265SDimitry Andric   /// memory.
172781ad6265SDimitry Andric 
172881ad6265SDimitry Andric   /// Other address spaces do not have a cache.
172981ad6265SDimitry Andric 
173081ad6265SDimitry Andric   if (Pos == Position::AFTER)
173181ad6265SDimitry Andric     --MI;
173281ad6265SDimitry Andric 
173381ad6265SDimitry Andric   return Changed;
173481ad6265SDimitry Andric }
173581ad6265SDimitry Andric 
insertRelease(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,bool IsCrossAddrSpaceOrdering,Position Pos) const173681ad6265SDimitry Andric bool SIGfx940CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
173781ad6265SDimitry Andric                                          SIAtomicScope Scope,
173881ad6265SDimitry Andric                                          SIAtomicAddrSpace AddrSpace,
173981ad6265SDimitry Andric                                          bool IsCrossAddrSpaceOrdering,
174081ad6265SDimitry Andric                                          Position Pos) const {
174181ad6265SDimitry Andric   bool Changed = false;
174281ad6265SDimitry Andric 
174381ad6265SDimitry Andric   MachineBasicBlock &MBB = *MI->getParent();
174481ad6265SDimitry Andric   DebugLoc DL = MI->getDebugLoc();
174581ad6265SDimitry Andric 
174681ad6265SDimitry Andric   if (Pos == Position::AFTER)
174781ad6265SDimitry Andric     ++MI;
174881ad6265SDimitry Andric 
174981ad6265SDimitry Andric   if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
175081ad6265SDimitry Andric     switch (Scope) {
175181ad6265SDimitry Andric     case SIAtomicScope::SYSTEM:
175281ad6265SDimitry Andric       // Inserting a "S_WAITCNT vmcnt(0)" before is not required because the
175381ad6265SDimitry Andric       // hardware does not reorder memory operations by the same wave with
175481ad6265SDimitry Andric       // respect to a following "BUFFER_WBL2". The "BUFFER_WBL2" is guaranteed
175581ad6265SDimitry Andric       // to initiate writeback of any dirty cache lines of earlier writes by the
175681ad6265SDimitry Andric       // same wave. A "S_WAITCNT vmcnt(0)" is needed after to ensure the
175781ad6265SDimitry Andric       // writeback has completed.
175881ad6265SDimitry Andric       BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2))
175981ad6265SDimitry Andric           // Set SC bits to indicate system scope.
176081ad6265SDimitry Andric           .addImm(AMDGPU::CPol::SC0 | AMDGPU::CPol::SC1);
176181ad6265SDimitry Andric       // Since AddrSpace contains SIAtomicAddrSpace::GLOBAL and Scope is
176281ad6265SDimitry Andric       // SIAtomicScope::SYSTEM, the following insertWait will generate the
176381ad6265SDimitry Andric       // required "S_WAITCNT vmcnt(0)" needed by the "BUFFER_WBL2".
176481ad6265SDimitry Andric       Changed = true;
176581ad6265SDimitry Andric       break;
176681ad6265SDimitry Andric     case SIAtomicScope::AGENT:
176781ad6265SDimitry Andric       BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2))
176881ad6265SDimitry Andric           // Set SC bits to indicate agent scope.
176981ad6265SDimitry Andric           .addImm(AMDGPU::CPol::SC1);
177081ad6265SDimitry Andric 
177181ad6265SDimitry Andric       // Since AddrSpace contains SIAtomicAddrSpace::GLOBAL and Scope is
177281ad6265SDimitry Andric       // SIAtomicScope::AGENT, the following insertWait will generate the
177381ad6265SDimitry Andric       // required "S_WAITCNT vmcnt(0)".
177481ad6265SDimitry Andric       Changed = true;
177581ad6265SDimitry Andric       break;
177681ad6265SDimitry Andric     case SIAtomicScope::WORKGROUP:
177781ad6265SDimitry Andric     case SIAtomicScope::WAVEFRONT:
177881ad6265SDimitry Andric     case SIAtomicScope::SINGLETHREAD:
177981ad6265SDimitry Andric       // Do not generate "BUFFER_WBL2" as there are no caches it would
178081ad6265SDimitry Andric       // writeback, and would require an otherwise unnecessary
178181ad6265SDimitry Andric       // "S_WAITCNT vmcnt(0)".
178281ad6265SDimitry Andric       break;
178381ad6265SDimitry Andric     default:
178481ad6265SDimitry Andric       llvm_unreachable("Unsupported synchronization scope");
178581ad6265SDimitry Andric     }
178681ad6265SDimitry Andric   }
178781ad6265SDimitry Andric 
178881ad6265SDimitry Andric   if (Pos == Position::AFTER)
178981ad6265SDimitry Andric     --MI;
179081ad6265SDimitry Andric 
179181ad6265SDimitry Andric   // Ensure the necessary S_WAITCNT needed by any "BUFFER_WBL2" as well as other
179281ad6265SDimitry Andric   // S_WAITCNT needed.
179381ad6265SDimitry Andric   Changed |= insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
179481ad6265SDimitry Andric                         IsCrossAddrSpaceOrdering, Pos);
179581ad6265SDimitry Andric 
179681ad6265SDimitry Andric   return Changed;
179781ad6265SDimitry Andric }
179881ad6265SDimitry Andric 
enableLoadCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const17990b57cec5SDimitry Andric bool SIGfx10CacheControl::enableLoadCacheBypass(
18000b57cec5SDimitry Andric     const MachineBasicBlock::iterator &MI,
18010b57cec5SDimitry Andric     SIAtomicScope Scope,
18020b57cec5SDimitry Andric     SIAtomicAddrSpace AddrSpace) const {
18030b57cec5SDimitry Andric   assert(MI->mayLoad() && !MI->mayStore());
18040b57cec5SDimitry Andric   bool Changed = false;
18050b57cec5SDimitry Andric 
18060b57cec5SDimitry Andric   if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
18070b57cec5SDimitry Andric     switch (Scope) {
18080b57cec5SDimitry Andric     case SIAtomicScope::SYSTEM:
18090b57cec5SDimitry Andric     case SIAtomicScope::AGENT:
18104824e7fdSDimitry Andric       // Set the L0 and L1 cache policies to MISS_EVICT.
18114824e7fdSDimitry Andric       // Note: there is no L2 cache coherent bypass control at the ISA level.
18120b57cec5SDimitry Andric       Changed |= enableGLCBit(MI);
18130b57cec5SDimitry Andric       Changed |= enableDLCBit(MI);
18140b57cec5SDimitry Andric       break;
18150b57cec5SDimitry Andric     case SIAtomicScope::WORKGROUP:
18160b57cec5SDimitry Andric       // In WGP mode the waves of a work-group can be executing on either CU of
18170b57cec5SDimitry Andric       // the WGP. Therefore need to bypass the L0 which is per CU. Otherwise in
1818e8d8bef9SDimitry Andric       // CU mode all waves of a work-group are on the same CU, and so the L0
1819e8d8bef9SDimitry Andric       // does not need to be bypassed.
1820349cc55cSDimitry Andric       if (!ST.isCuModeEnabled())
1821349cc55cSDimitry Andric         Changed |= enableGLCBit(MI);
18220b57cec5SDimitry Andric       break;
18230b57cec5SDimitry Andric     case SIAtomicScope::WAVEFRONT:
18240b57cec5SDimitry Andric     case SIAtomicScope::SINGLETHREAD:
18250b57cec5SDimitry Andric       // No cache to bypass.
18260b57cec5SDimitry Andric       break;
18270b57cec5SDimitry Andric     default:
18280b57cec5SDimitry Andric       llvm_unreachable("Unsupported synchronization scope");
18290b57cec5SDimitry Andric     }
18300b57cec5SDimitry Andric   }
18310b57cec5SDimitry Andric 
18320b57cec5SDimitry Andric   /// The scratch address space does not need the global memory caches
18330b57cec5SDimitry Andric   /// to be bypassed as all memory operations by the same thread are
18340b57cec5SDimitry Andric   /// sequentially consistent, and no other thread can access scratch
18350b57cec5SDimitry Andric   /// memory.
18360b57cec5SDimitry Andric 
1837e8d8bef9SDimitry Andric   /// Other address spaces do not have a cache.
18380b57cec5SDimitry Andric 
18390b57cec5SDimitry Andric   return Changed;
18400b57cec5SDimitry Andric }
18410b57cec5SDimitry Andric 
enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator & MI,SIAtomicAddrSpace AddrSpace,SIMemOp Op,bool IsVolatile,bool IsNonTemporal) const1842e8d8bef9SDimitry Andric bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal(
1843e8d8bef9SDimitry Andric     MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
1844e8d8bef9SDimitry Andric     bool IsVolatile, bool IsNonTemporal) const {
1845e8d8bef9SDimitry Andric 
1846e8d8bef9SDimitry Andric   // Only handle load and store, not atomic read-modify-write insructions. The
1847e8d8bef9SDimitry Andric   // latter use glc to indicate if the atomic returns a result and so must not
1848e8d8bef9SDimitry Andric   // be used for cache control.
18490b57cec5SDimitry Andric   assert(MI->mayLoad() ^ MI->mayStore());
1850e8d8bef9SDimitry Andric 
1851e8d8bef9SDimitry Andric   // Only update load and store, not LLVM IR atomic read-modify-write
1852e8d8bef9SDimitry Andric   // instructions. The latter are always marked as volatile so cannot sensibly
1853e8d8bef9SDimitry Andric   // handle it as do not want to pessimize all atomics. Also they do not support
1854e8d8bef9SDimitry Andric   // the nontemporal attribute.
1855e8d8bef9SDimitry Andric   assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
1856e8d8bef9SDimitry Andric 
18570b57cec5SDimitry Andric   bool Changed = false;
18580b57cec5SDimitry Andric 
1859e8d8bef9SDimitry Andric   if (IsVolatile) {
18604824e7fdSDimitry Andric     // Set L0 and L1 cache policy to be MISS_EVICT for load instructions
18614824e7fdSDimitry Andric     // and MISS_LRU for store instructions.
18624824e7fdSDimitry Andric     // Note: there is no L2 cache coherent bypass control at the ISA level.
1863e8d8bef9SDimitry Andric     if (Op == SIMemOp::LOAD) {
1864e8d8bef9SDimitry Andric       Changed |= enableGLCBit(MI);
1865e8d8bef9SDimitry Andric       Changed |= enableDLCBit(MI);
1866e8d8bef9SDimitry Andric     }
1867e8d8bef9SDimitry Andric 
1868e8d8bef9SDimitry Andric     // Ensure operation has completed at system scope to cause all volatile
1869e8d8bef9SDimitry Andric     // operations to be visible outside the program in a global order. Do not
1870e8d8bef9SDimitry Andric     // request cross address space as only the global address space can be
1871e8d8bef9SDimitry Andric     // observable outside the program, so no need to cause a waitcnt for LDS
1872e8d8bef9SDimitry Andric     // address space operations.
1873e8d8bef9SDimitry Andric     Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
1874e8d8bef9SDimitry Andric                           Position::AFTER);
18750b57cec5SDimitry Andric     return Changed;
18760b57cec5SDimitry Andric   }
18770b57cec5SDimitry Andric 
1878e8d8bef9SDimitry Andric   if (IsNonTemporal) {
18794824e7fdSDimitry Andric     // For loads setting SLC configures L0 and L1 cache policy to HIT_EVICT
18804824e7fdSDimitry Andric     // and L2 cache policy to STREAM.
18814824e7fdSDimitry Andric     // For stores setting both GLC and SLC configures L0 and L1 cache policy
18824824e7fdSDimitry Andric     // to MISS_EVICT and the L2 cache policy to STREAM.
18834824e7fdSDimitry Andric     if (Op == SIMemOp::STORE)
18844824e7fdSDimitry Andric       Changed |= enableGLCBit(MI);
1885e8d8bef9SDimitry Andric     Changed |= enableSLCBit(MI);
18864824e7fdSDimitry Andric 
1887e8d8bef9SDimitry Andric     return Changed;
18880b57cec5SDimitry Andric   }
18890b57cec5SDimitry Andric 
18900b57cec5SDimitry Andric   return Changed;
18910b57cec5SDimitry Andric }
18920b57cec5SDimitry Andric 
insertWait(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,SIMemOp Op,bool IsCrossAddrSpaceOrdering,Position Pos) const18930b57cec5SDimitry Andric bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI,
18940b57cec5SDimitry Andric                                      SIAtomicScope Scope,
18950b57cec5SDimitry Andric                                      SIAtomicAddrSpace AddrSpace,
18960b57cec5SDimitry Andric                                      SIMemOp Op,
18970b57cec5SDimitry Andric                                      bool IsCrossAddrSpaceOrdering,
18980b57cec5SDimitry Andric                                      Position Pos) const {
18990b57cec5SDimitry Andric   bool Changed = false;
19000b57cec5SDimitry Andric 
19010b57cec5SDimitry Andric   MachineBasicBlock &MBB = *MI->getParent();
19020b57cec5SDimitry Andric   DebugLoc DL = MI->getDebugLoc();
19030b57cec5SDimitry Andric 
19040b57cec5SDimitry Andric   if (Pos == Position::AFTER)
19050b57cec5SDimitry Andric     ++MI;
19060b57cec5SDimitry Andric 
19070b57cec5SDimitry Andric   bool VMCnt = false;
19080b57cec5SDimitry Andric   bool VSCnt = false;
19090b57cec5SDimitry Andric   bool LGKMCnt = false;
19100b57cec5SDimitry Andric 
1911e8d8bef9SDimitry Andric   if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
1912e8d8bef9SDimitry Andric       SIAtomicAddrSpace::NONE) {
19130b57cec5SDimitry Andric     switch (Scope) {
19140b57cec5SDimitry Andric     case SIAtomicScope::SYSTEM:
19150b57cec5SDimitry Andric     case SIAtomicScope::AGENT:
19160b57cec5SDimitry Andric       if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
19170b57cec5SDimitry Andric         VMCnt |= true;
19180b57cec5SDimitry Andric       if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
19190b57cec5SDimitry Andric         VSCnt |= true;
19200b57cec5SDimitry Andric       break;
19210b57cec5SDimitry Andric     case SIAtomicScope::WORKGROUP:
19220b57cec5SDimitry Andric       // In WGP mode the waves of a work-group can be executing on either CU of
19230b57cec5SDimitry Andric       // the WGP. Therefore need to wait for operations to complete to ensure
19240b57cec5SDimitry Andric       // they are visible to waves in the other CU as the L0 is per CU.
19250b57cec5SDimitry Andric       // Otherwise in CU mode and all waves of a work-group are on the same CU
19260b57cec5SDimitry Andric       // which shares the same L0.
1927e8d8bef9SDimitry Andric       if (!ST.isCuModeEnabled()) {
19280b57cec5SDimitry Andric         if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
19290b57cec5SDimitry Andric           VMCnt |= true;
19300b57cec5SDimitry Andric         if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
19310b57cec5SDimitry Andric           VSCnt |= true;
19320b57cec5SDimitry Andric       }
19330b57cec5SDimitry Andric       break;
19340b57cec5SDimitry Andric     case SIAtomicScope::WAVEFRONT:
19350b57cec5SDimitry Andric     case SIAtomicScope::SINGLETHREAD:
19360b57cec5SDimitry Andric       // The L0 cache keeps all memory operations in order for
19370b57cec5SDimitry Andric       // work-items in the same wavefront.
19380b57cec5SDimitry Andric       break;
19390b57cec5SDimitry Andric     default:
19400b57cec5SDimitry Andric       llvm_unreachable("Unsupported synchronization scope");
19410b57cec5SDimitry Andric     }
19420b57cec5SDimitry Andric   }
19430b57cec5SDimitry Andric 
19440b57cec5SDimitry Andric   if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
19450b57cec5SDimitry Andric     switch (Scope) {
19460b57cec5SDimitry Andric     case SIAtomicScope::SYSTEM:
19470b57cec5SDimitry Andric     case SIAtomicScope::AGENT:
19480b57cec5SDimitry Andric     case SIAtomicScope::WORKGROUP:
1949e8d8bef9SDimitry Andric       // If no cross address space ordering then an "S_WAITCNT lgkmcnt(0)" is
1950e8d8bef9SDimitry Andric       // not needed as LDS operations for all waves are executed in a total
1951e8d8bef9SDimitry Andric       // global ordering as observed by all waves. Required if also
1952e8d8bef9SDimitry Andric       // synchronizing with global/GDS memory as LDS operations could be
1953e8d8bef9SDimitry Andric       // reordered with respect to later global/GDS memory operations of the
1954e8d8bef9SDimitry Andric       // same wave.
19550b57cec5SDimitry Andric       LGKMCnt |= IsCrossAddrSpaceOrdering;
19560b57cec5SDimitry Andric       break;
19570b57cec5SDimitry Andric     case SIAtomicScope::WAVEFRONT:
19580b57cec5SDimitry Andric     case SIAtomicScope::SINGLETHREAD:
19590b57cec5SDimitry Andric       // The LDS keeps all memory operations in order for
196081ad6265SDimitry Andric       // the same wavefront.
19610b57cec5SDimitry Andric       break;
19620b57cec5SDimitry Andric     default:
19630b57cec5SDimitry Andric       llvm_unreachable("Unsupported synchronization scope");
19640b57cec5SDimitry Andric     }
19650b57cec5SDimitry Andric   }
19660b57cec5SDimitry Andric 
19670b57cec5SDimitry Andric   if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
19680b57cec5SDimitry Andric     switch (Scope) {
19690b57cec5SDimitry Andric     case SIAtomicScope::SYSTEM:
19700b57cec5SDimitry Andric     case SIAtomicScope::AGENT:
1971e8d8bef9SDimitry Andric       // If no cross address space ordering then an GDS "S_WAITCNT lgkmcnt(0)"
1972e8d8bef9SDimitry Andric       // is not needed as GDS operations for all waves are executed in a total
1973e8d8bef9SDimitry Andric       // global ordering as observed by all waves. Required if also
1974e8d8bef9SDimitry Andric       // synchronizing with global/LDS memory as GDS operations could be
1975e8d8bef9SDimitry Andric       // reordered with respect to later global/LDS memory operations of the
1976e8d8bef9SDimitry Andric       // same wave.
19770b57cec5SDimitry Andric       LGKMCnt |= IsCrossAddrSpaceOrdering;
19780b57cec5SDimitry Andric       break;
19790b57cec5SDimitry Andric     case SIAtomicScope::WORKGROUP:
19800b57cec5SDimitry Andric     case SIAtomicScope::WAVEFRONT:
19810b57cec5SDimitry Andric     case SIAtomicScope::SINGLETHREAD:
19820b57cec5SDimitry Andric       // The GDS keeps all memory operations in order for
19830b57cec5SDimitry Andric       // the same work-group.
19840b57cec5SDimitry Andric       break;
19850b57cec5SDimitry Andric     default:
19860b57cec5SDimitry Andric       llvm_unreachable("Unsupported synchronization scope");
19870b57cec5SDimitry Andric     }
19880b57cec5SDimitry Andric   }
19890b57cec5SDimitry Andric 
19900b57cec5SDimitry Andric   if (VMCnt || LGKMCnt) {
19910b57cec5SDimitry Andric     unsigned WaitCntImmediate =
19920b57cec5SDimitry Andric       AMDGPU::encodeWaitcnt(IV,
19930b57cec5SDimitry Andric                             VMCnt ? 0 : getVmcntBitMask(IV),
19940b57cec5SDimitry Andric                             getExpcntBitMask(IV),
19950b57cec5SDimitry Andric                             LGKMCnt ? 0 : getLgkmcntBitMask(IV));
19965f757f3fSDimitry Andric     BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_soft))
19975f757f3fSDimitry Andric         .addImm(WaitCntImmediate);
19980b57cec5SDimitry Andric     Changed = true;
19990b57cec5SDimitry Andric   }
20000b57cec5SDimitry Andric 
20010b57cec5SDimitry Andric   if (VSCnt) {
20025f757f3fSDimitry Andric     BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_VSCNT_soft))
20030b57cec5SDimitry Andric         .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
20040b57cec5SDimitry Andric         .addImm(0);
20050b57cec5SDimitry Andric     Changed = true;
20060b57cec5SDimitry Andric   }
20070b57cec5SDimitry Andric 
20080b57cec5SDimitry Andric   if (Pos == Position::AFTER)
20090b57cec5SDimitry Andric     --MI;
20100b57cec5SDimitry Andric 
20110b57cec5SDimitry Andric   return Changed;
20120b57cec5SDimitry Andric }
20130b57cec5SDimitry Andric 
insertAcquire(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,Position Pos) const2014e8d8bef9SDimitry Andric bool SIGfx10CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
2015e8d8bef9SDimitry Andric                                         SIAtomicScope Scope,
2016e8d8bef9SDimitry Andric                                         SIAtomicAddrSpace AddrSpace,
2017e8d8bef9SDimitry Andric                                         Position Pos) const {
2018e8d8bef9SDimitry Andric   if (!InsertCacheInv)
2019e8d8bef9SDimitry Andric     return false;
2020e8d8bef9SDimitry Andric 
2021e8d8bef9SDimitry Andric   bool Changed = false;
2022e8d8bef9SDimitry Andric 
2023e8d8bef9SDimitry Andric   MachineBasicBlock &MBB = *MI->getParent();
2024e8d8bef9SDimitry Andric   DebugLoc DL = MI->getDebugLoc();
2025e8d8bef9SDimitry Andric 
2026e8d8bef9SDimitry Andric   if (Pos == Position::AFTER)
2027e8d8bef9SDimitry Andric     ++MI;
2028e8d8bef9SDimitry Andric 
2029e8d8bef9SDimitry Andric   if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
2030e8d8bef9SDimitry Andric     switch (Scope) {
2031e8d8bef9SDimitry Andric     case SIAtomicScope::SYSTEM:
2032e8d8bef9SDimitry Andric     case SIAtomicScope::AGENT:
2033e8d8bef9SDimitry Andric       BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV));
2034e8d8bef9SDimitry Andric       BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL1_INV));
2035e8d8bef9SDimitry Andric       Changed = true;
2036e8d8bef9SDimitry Andric       break;
2037e8d8bef9SDimitry Andric     case SIAtomicScope::WORKGROUP:
2038e8d8bef9SDimitry Andric       // In WGP mode the waves of a work-group can be executing on either CU of
2039e8d8bef9SDimitry Andric       // the WGP. Therefore need to invalidate the L0 which is per CU. Otherwise
2040e8d8bef9SDimitry Andric       // in CU mode and all waves of a work-group are on the same CU, and so the
2041e8d8bef9SDimitry Andric       // L0 does not need to be invalidated.
2042e8d8bef9SDimitry Andric       if (!ST.isCuModeEnabled()) {
2043e8d8bef9SDimitry Andric         BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV));
2044e8d8bef9SDimitry Andric         Changed = true;
2045e8d8bef9SDimitry Andric       }
2046e8d8bef9SDimitry Andric       break;
2047e8d8bef9SDimitry Andric     case SIAtomicScope::WAVEFRONT:
2048e8d8bef9SDimitry Andric     case SIAtomicScope::SINGLETHREAD:
2049e8d8bef9SDimitry Andric       // No cache to invalidate.
2050e8d8bef9SDimitry Andric       break;
2051e8d8bef9SDimitry Andric     default:
2052e8d8bef9SDimitry Andric       llvm_unreachable("Unsupported synchronization scope");
2053e8d8bef9SDimitry Andric     }
2054e8d8bef9SDimitry Andric   }
2055e8d8bef9SDimitry Andric 
2056e8d8bef9SDimitry Andric   /// The scratch address space does not need the global memory cache
2057e8d8bef9SDimitry Andric   /// to be flushed as all memory operations by the same thread are
2058e8d8bef9SDimitry Andric   /// sequentially consistent, and no other thread can access scratch
2059e8d8bef9SDimitry Andric   /// memory.
2060e8d8bef9SDimitry Andric 
2061e8d8bef9SDimitry Andric   /// Other address spaces do not have a cache.
2062e8d8bef9SDimitry Andric 
2063e8d8bef9SDimitry Andric   if (Pos == Position::AFTER)
2064e8d8bef9SDimitry Andric     --MI;
2065e8d8bef9SDimitry Andric 
2066e8d8bef9SDimitry Andric   return Changed;
2067e8d8bef9SDimitry Andric }
2068e8d8bef9SDimitry Andric 
enableLoadCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const206981ad6265SDimitry Andric bool SIGfx11CacheControl::enableLoadCacheBypass(
207081ad6265SDimitry Andric     const MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
207181ad6265SDimitry Andric     SIAtomicAddrSpace AddrSpace) const {
207281ad6265SDimitry Andric   assert(MI->mayLoad() && !MI->mayStore());
207381ad6265SDimitry Andric   bool Changed = false;
207481ad6265SDimitry Andric 
207581ad6265SDimitry Andric   if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
207681ad6265SDimitry Andric     switch (Scope) {
207781ad6265SDimitry Andric     case SIAtomicScope::SYSTEM:
207881ad6265SDimitry Andric     case SIAtomicScope::AGENT:
207981ad6265SDimitry Andric       // Set the L0 and L1 cache policies to MISS_EVICT.
208081ad6265SDimitry Andric       // Note: there is no L2 cache coherent bypass control at the ISA level.
208181ad6265SDimitry Andric       Changed |= enableGLCBit(MI);
208281ad6265SDimitry Andric       break;
208381ad6265SDimitry Andric     case SIAtomicScope::WORKGROUP:
208481ad6265SDimitry Andric       // In WGP mode the waves of a work-group can be executing on either CU of
208581ad6265SDimitry Andric       // the WGP. Therefore need to bypass the L0 which is per CU. Otherwise in
208681ad6265SDimitry Andric       // CU mode all waves of a work-group are on the same CU, and so the L0
208781ad6265SDimitry Andric       // does not need to be bypassed.
208881ad6265SDimitry Andric       if (!ST.isCuModeEnabled())
208981ad6265SDimitry Andric         Changed |= enableGLCBit(MI);
209081ad6265SDimitry Andric       break;
209181ad6265SDimitry Andric     case SIAtomicScope::WAVEFRONT:
209281ad6265SDimitry Andric     case SIAtomicScope::SINGLETHREAD:
209381ad6265SDimitry Andric       // No cache to bypass.
209481ad6265SDimitry Andric       break;
209581ad6265SDimitry Andric     default:
209681ad6265SDimitry Andric       llvm_unreachable("Unsupported synchronization scope");
209781ad6265SDimitry Andric     }
209881ad6265SDimitry Andric   }
209981ad6265SDimitry Andric 
210081ad6265SDimitry Andric   /// The scratch address space does not need the global memory caches
210181ad6265SDimitry Andric   /// to be bypassed as all memory operations by the same thread are
210281ad6265SDimitry Andric   /// sequentially consistent, and no other thread can access scratch
210381ad6265SDimitry Andric   /// memory.
210481ad6265SDimitry Andric 
210581ad6265SDimitry Andric   /// Other address spaces do not have a cache.
210681ad6265SDimitry Andric 
210781ad6265SDimitry Andric   return Changed;
210881ad6265SDimitry Andric }
210981ad6265SDimitry Andric 
enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator & MI,SIAtomicAddrSpace AddrSpace,SIMemOp Op,bool IsVolatile,bool IsNonTemporal) const211081ad6265SDimitry Andric bool SIGfx11CacheControl::enableVolatileAndOrNonTemporal(
211181ad6265SDimitry Andric     MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
211281ad6265SDimitry Andric     bool IsVolatile, bool IsNonTemporal) const {
211381ad6265SDimitry Andric 
211481ad6265SDimitry Andric   // Only handle load and store, not atomic read-modify-write insructions. The
211581ad6265SDimitry Andric   // latter use glc to indicate if the atomic returns a result and so must not
211681ad6265SDimitry Andric   // be used for cache control.
211781ad6265SDimitry Andric   assert(MI->mayLoad() ^ MI->mayStore());
211881ad6265SDimitry Andric 
211981ad6265SDimitry Andric   // Only update load and store, not LLVM IR atomic read-modify-write
212081ad6265SDimitry Andric   // instructions. The latter are always marked as volatile so cannot sensibly
212181ad6265SDimitry Andric   // handle it as do not want to pessimize all atomics. Also they do not support
212281ad6265SDimitry Andric   // the nontemporal attribute.
212381ad6265SDimitry Andric   assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
212481ad6265SDimitry Andric 
212581ad6265SDimitry Andric   bool Changed = false;
212681ad6265SDimitry Andric 
212781ad6265SDimitry Andric   if (IsVolatile) {
212881ad6265SDimitry Andric     // Set L0 and L1 cache policy to be MISS_EVICT for load instructions
212981ad6265SDimitry Andric     // and MISS_LRU for store instructions.
213081ad6265SDimitry Andric     // Note: there is no L2 cache coherent bypass control at the ISA level.
213181ad6265SDimitry Andric     if (Op == SIMemOp::LOAD)
213281ad6265SDimitry Andric       Changed |= enableGLCBit(MI);
213381ad6265SDimitry Andric 
213481ad6265SDimitry Andric     // Set MALL NOALLOC for load and store instructions.
213581ad6265SDimitry Andric     Changed |= enableDLCBit(MI);
213681ad6265SDimitry Andric 
213781ad6265SDimitry Andric     // Ensure operation has completed at system scope to cause all volatile
213881ad6265SDimitry Andric     // operations to be visible outside the program in a global order. Do not
213981ad6265SDimitry Andric     // request cross address space as only the global address space can be
214081ad6265SDimitry Andric     // observable outside the program, so no need to cause a waitcnt for LDS
214181ad6265SDimitry Andric     // address space operations.
214281ad6265SDimitry Andric     Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
214381ad6265SDimitry Andric                           Position::AFTER);
214481ad6265SDimitry Andric     return Changed;
214581ad6265SDimitry Andric   }
214681ad6265SDimitry Andric 
214781ad6265SDimitry Andric   if (IsNonTemporal) {
214881ad6265SDimitry Andric     // For loads setting SLC configures L0 and L1 cache policy to HIT_EVICT
214981ad6265SDimitry Andric     // and L2 cache policy to STREAM.
215081ad6265SDimitry Andric     // For stores setting both GLC and SLC configures L0 and L1 cache policy
215181ad6265SDimitry Andric     // to MISS_EVICT and the L2 cache policy to STREAM.
215281ad6265SDimitry Andric     if (Op == SIMemOp::STORE)
215381ad6265SDimitry Andric       Changed |= enableGLCBit(MI);
215481ad6265SDimitry Andric     Changed |= enableSLCBit(MI);
215581ad6265SDimitry Andric 
215681ad6265SDimitry Andric     // Set MALL NOALLOC for load and store instructions.
215781ad6265SDimitry Andric     Changed |= enableDLCBit(MI);
215881ad6265SDimitry Andric     return Changed;
215981ad6265SDimitry Andric   }
216081ad6265SDimitry Andric 
216181ad6265SDimitry Andric   return Changed;
216281ad6265SDimitry Andric }
216381ad6265SDimitry Andric 
setTH(const MachineBasicBlock::iterator MI,AMDGPU::CPol::CPol Value) const21647a6dacacSDimitry Andric bool SIGfx12CacheControl::setTH(const MachineBasicBlock::iterator MI,
21657a6dacacSDimitry Andric                                 AMDGPU::CPol::CPol Value) const {
21667a6dacacSDimitry Andric   MachineOperand *CPol = TII->getNamedOperand(*MI, OpName::cpol);
21677a6dacacSDimitry Andric   if (!CPol)
21687a6dacacSDimitry Andric     return false;
21697a6dacacSDimitry Andric 
21707a6dacacSDimitry Andric   uint64_t NewTH = Value & AMDGPU::CPol::TH;
21717a6dacacSDimitry Andric   if ((CPol->getImm() & AMDGPU::CPol::TH) != NewTH) {
21727a6dacacSDimitry Andric     CPol->setImm((CPol->getImm() & ~AMDGPU::CPol::TH) | NewTH);
21737a6dacacSDimitry Andric     return true;
21747a6dacacSDimitry Andric   }
21757a6dacacSDimitry Andric 
21767a6dacacSDimitry Andric   return false;
21777a6dacacSDimitry Andric }
21787a6dacacSDimitry Andric 
setScope(const MachineBasicBlock::iterator MI,AMDGPU::CPol::CPol Value) const21797a6dacacSDimitry Andric bool SIGfx12CacheControl::setScope(const MachineBasicBlock::iterator MI,
21807a6dacacSDimitry Andric                                    AMDGPU::CPol::CPol Value) const {
21817a6dacacSDimitry Andric   MachineOperand *CPol = TII->getNamedOperand(*MI, OpName::cpol);
21827a6dacacSDimitry Andric   if (!CPol)
21837a6dacacSDimitry Andric     return false;
21847a6dacacSDimitry Andric 
21857a6dacacSDimitry Andric   uint64_t NewScope = Value & AMDGPU::CPol::SCOPE;
21867a6dacacSDimitry Andric   if ((CPol->getImm() & AMDGPU::CPol::SCOPE) != NewScope) {
21877a6dacacSDimitry Andric     CPol->setImm((CPol->getImm() & ~AMDGPU::CPol::SCOPE) | NewScope);
21887a6dacacSDimitry Andric     return true;
21897a6dacacSDimitry Andric   }
21907a6dacacSDimitry Andric 
21917a6dacacSDimitry Andric   return false;
21927a6dacacSDimitry Andric }
21937a6dacacSDimitry Andric 
insertWait(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,SIMemOp Op,bool IsCrossAddrSpaceOrdering,Position Pos) const21947a6dacacSDimitry Andric bool SIGfx12CacheControl::insertWait(MachineBasicBlock::iterator &MI,
21957a6dacacSDimitry Andric                                      SIAtomicScope Scope,
21967a6dacacSDimitry Andric                                      SIAtomicAddrSpace AddrSpace, SIMemOp Op,
21977a6dacacSDimitry Andric                                      bool IsCrossAddrSpaceOrdering,
21987a6dacacSDimitry Andric                                      Position Pos) const {
21997a6dacacSDimitry Andric   bool Changed = false;
22007a6dacacSDimitry Andric 
22017a6dacacSDimitry Andric   MachineBasicBlock &MBB = *MI->getParent();
22027a6dacacSDimitry Andric   DebugLoc DL = MI->getDebugLoc();
22037a6dacacSDimitry Andric 
22047a6dacacSDimitry Andric   bool LOADCnt = false;
22057a6dacacSDimitry Andric   bool DSCnt = false;
22067a6dacacSDimitry Andric   bool STORECnt = false;
22077a6dacacSDimitry Andric 
22087a6dacacSDimitry Andric   if (Pos == Position::AFTER)
22097a6dacacSDimitry Andric     ++MI;
22107a6dacacSDimitry Andric 
22117a6dacacSDimitry Andric   if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
22127a6dacacSDimitry Andric       SIAtomicAddrSpace::NONE) {
22137a6dacacSDimitry Andric     switch (Scope) {
22147a6dacacSDimitry Andric     case SIAtomicScope::SYSTEM:
22157a6dacacSDimitry Andric     case SIAtomicScope::AGENT:
22167a6dacacSDimitry Andric       if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
22177a6dacacSDimitry Andric         LOADCnt |= true;
22187a6dacacSDimitry Andric       if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
22197a6dacacSDimitry Andric         STORECnt |= true;
22207a6dacacSDimitry Andric       break;
22217a6dacacSDimitry Andric     case SIAtomicScope::WORKGROUP:
22227a6dacacSDimitry Andric       // In WGP mode the waves of a work-group can be executing on either CU of
22237a6dacacSDimitry Andric       // the WGP. Therefore need to wait for operations to complete to ensure
22247a6dacacSDimitry Andric       // they are visible to waves in the other CU as the L0 is per CU.
22257a6dacacSDimitry Andric       // Otherwise in CU mode and all waves of a work-group are on the same CU
22267a6dacacSDimitry Andric       // which shares the same L0.
22277a6dacacSDimitry Andric       if (!ST.isCuModeEnabled()) {
22287a6dacacSDimitry Andric         if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
22297a6dacacSDimitry Andric           LOADCnt |= true;
22307a6dacacSDimitry Andric         if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
22317a6dacacSDimitry Andric           STORECnt |= true;
22327a6dacacSDimitry Andric       }
22337a6dacacSDimitry Andric       break;
22347a6dacacSDimitry Andric     case SIAtomicScope::WAVEFRONT:
22357a6dacacSDimitry Andric     case SIAtomicScope::SINGLETHREAD:
22367a6dacacSDimitry Andric       // The L0 cache keeps all memory operations in order for
22377a6dacacSDimitry Andric       // work-items in the same wavefront.
22387a6dacacSDimitry Andric       break;
22397a6dacacSDimitry Andric     default:
22407a6dacacSDimitry Andric       llvm_unreachable("Unsupported synchronization scope");
22417a6dacacSDimitry Andric     }
22427a6dacacSDimitry Andric   }
22437a6dacacSDimitry Andric 
22447a6dacacSDimitry Andric   if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
22457a6dacacSDimitry Andric     switch (Scope) {
22467a6dacacSDimitry Andric     case SIAtomicScope::SYSTEM:
22477a6dacacSDimitry Andric     case SIAtomicScope::AGENT:
22487a6dacacSDimitry Andric     case SIAtomicScope::WORKGROUP:
22497a6dacacSDimitry Andric       // If no cross address space ordering then an "S_WAITCNT lgkmcnt(0)" is
22507a6dacacSDimitry Andric       // not needed as LDS operations for all waves are executed in a total
22517a6dacacSDimitry Andric       // global ordering as observed by all waves. Required if also
22527a6dacacSDimitry Andric       // synchronizing with global/GDS memory as LDS operations could be
22537a6dacacSDimitry Andric       // reordered with respect to later global/GDS memory operations of the
22547a6dacacSDimitry Andric       // same wave.
22557a6dacacSDimitry Andric       DSCnt |= IsCrossAddrSpaceOrdering;
22567a6dacacSDimitry Andric       break;
22577a6dacacSDimitry Andric     case SIAtomicScope::WAVEFRONT:
22587a6dacacSDimitry Andric     case SIAtomicScope::SINGLETHREAD:
22597a6dacacSDimitry Andric       // The LDS keeps all memory operations in order for
22607a6dacacSDimitry Andric       // the same wavefront.
22617a6dacacSDimitry Andric       break;
22627a6dacacSDimitry Andric     default:
22637a6dacacSDimitry Andric       llvm_unreachable("Unsupported synchronization scope");
22647a6dacacSDimitry Andric     }
22657a6dacacSDimitry Andric   }
22667a6dacacSDimitry Andric 
22677a6dacacSDimitry Andric   if (LOADCnt) {
22687a6dacacSDimitry Andric     BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAIT_BVHCNT_soft)).addImm(0);
22697a6dacacSDimitry Andric     BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAIT_SAMPLECNT_soft)).addImm(0);
22707a6dacacSDimitry Andric     BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAIT_LOADCNT_soft)).addImm(0);
22717a6dacacSDimitry Andric     Changed = true;
22727a6dacacSDimitry Andric   }
22737a6dacacSDimitry Andric 
22747a6dacacSDimitry Andric   if (STORECnt) {
22757a6dacacSDimitry Andric     BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAIT_STORECNT_soft)).addImm(0);
22767a6dacacSDimitry Andric     Changed = true;
22777a6dacacSDimitry Andric   }
22787a6dacacSDimitry Andric 
22797a6dacacSDimitry Andric   if (DSCnt) {
22807a6dacacSDimitry Andric     BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAIT_DSCNT_soft)).addImm(0);
22817a6dacacSDimitry Andric     Changed = true;
22827a6dacacSDimitry Andric   }
22837a6dacacSDimitry Andric 
22847a6dacacSDimitry Andric   if (Pos == Position::AFTER)
22857a6dacacSDimitry Andric     --MI;
22867a6dacacSDimitry Andric 
22877a6dacacSDimitry Andric   return Changed;
22887a6dacacSDimitry Andric }
22897a6dacacSDimitry Andric 
insertAcquire(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,Position Pos) const22901db9f3b2SDimitry Andric bool SIGfx12CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
22911db9f3b2SDimitry Andric                                         SIAtomicScope Scope,
22921db9f3b2SDimitry Andric                                         SIAtomicAddrSpace AddrSpace,
22931db9f3b2SDimitry Andric                                         Position Pos) const {
22941db9f3b2SDimitry Andric   if (!InsertCacheInv)
22951db9f3b2SDimitry Andric     return false;
22961db9f3b2SDimitry Andric 
22971db9f3b2SDimitry Andric   MachineBasicBlock &MBB = *MI->getParent();
22981db9f3b2SDimitry Andric   DebugLoc DL = MI->getDebugLoc();
22991db9f3b2SDimitry Andric 
23001db9f3b2SDimitry Andric   /// The scratch address space does not need the global memory cache
23011db9f3b2SDimitry Andric   /// to be flushed as all memory operations by the same thread are
23021db9f3b2SDimitry Andric   /// sequentially consistent, and no other thread can access scratch
23031db9f3b2SDimitry Andric   /// memory.
23041db9f3b2SDimitry Andric 
23051db9f3b2SDimitry Andric   /// Other address spaces do not have a cache.
23061db9f3b2SDimitry Andric   if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) == SIAtomicAddrSpace::NONE)
23071db9f3b2SDimitry Andric     return false;
23081db9f3b2SDimitry Andric 
23091db9f3b2SDimitry Andric   AMDGPU::CPol::CPol ScopeImm = AMDGPU::CPol::SCOPE_DEV;
23101db9f3b2SDimitry Andric   switch (Scope) {
23111db9f3b2SDimitry Andric   case SIAtomicScope::SYSTEM:
23121db9f3b2SDimitry Andric     ScopeImm = AMDGPU::CPol::SCOPE_SYS;
23131db9f3b2SDimitry Andric     break;
23141db9f3b2SDimitry Andric   case SIAtomicScope::AGENT:
23151db9f3b2SDimitry Andric     ScopeImm = AMDGPU::CPol::SCOPE_DEV;
23161db9f3b2SDimitry Andric     break;
23171db9f3b2SDimitry Andric   case SIAtomicScope::WORKGROUP:
23181db9f3b2SDimitry Andric     // In WGP mode the waves of a work-group can be executing on either CU of
23191db9f3b2SDimitry Andric     // the WGP. Therefore we need to invalidate the L0 which is per CU.
23201db9f3b2SDimitry Andric     // Otherwise in CU mode all waves of a work-group are on the same CU, and so
23211db9f3b2SDimitry Andric     // the L0 does not need to be invalidated.
23221db9f3b2SDimitry Andric     if (ST.isCuModeEnabled())
23231db9f3b2SDimitry Andric       return false;
23241db9f3b2SDimitry Andric 
23251db9f3b2SDimitry Andric     ScopeImm = AMDGPU::CPol::SCOPE_SE;
23261db9f3b2SDimitry Andric     break;
23271db9f3b2SDimitry Andric   case SIAtomicScope::WAVEFRONT:
23281db9f3b2SDimitry Andric   case SIAtomicScope::SINGLETHREAD:
23291db9f3b2SDimitry Andric     // No cache to invalidate.
23301db9f3b2SDimitry Andric     return false;
23311db9f3b2SDimitry Andric   default:
23321db9f3b2SDimitry Andric     llvm_unreachable("Unsupported synchronization scope");
23331db9f3b2SDimitry Andric   }
23341db9f3b2SDimitry Andric 
23351db9f3b2SDimitry Andric   if (Pos == Position::AFTER)
23361db9f3b2SDimitry Andric     ++MI;
23371db9f3b2SDimitry Andric 
23381db9f3b2SDimitry Andric   BuildMI(MBB, MI, DL, TII->get(AMDGPU::GLOBAL_INV)).addImm(ScopeImm);
23391db9f3b2SDimitry Andric 
23401db9f3b2SDimitry Andric   if (Pos == Position::AFTER)
23411db9f3b2SDimitry Andric     --MI;
23421db9f3b2SDimitry Andric 
23431db9f3b2SDimitry Andric   return true;
23441db9f3b2SDimitry Andric }
23451db9f3b2SDimitry Andric 
enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator & MI,SIAtomicAddrSpace AddrSpace,SIMemOp Op,bool IsVolatile,bool IsNonTemporal) const23467a6dacacSDimitry Andric bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
23477a6dacacSDimitry Andric     MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
23487a6dacacSDimitry Andric     bool IsVolatile, bool IsNonTemporal) const {
23497a6dacacSDimitry Andric 
23507a6dacacSDimitry Andric   // Only handle load and store, not atomic read-modify-write instructions.
23517a6dacacSDimitry Andric   assert(MI->mayLoad() ^ MI->mayStore());
23527a6dacacSDimitry Andric 
23537a6dacacSDimitry Andric   // Only update load and store, not LLVM IR atomic read-modify-write
23547a6dacacSDimitry Andric   // instructions. The latter are always marked as volatile so cannot sensibly
23557a6dacacSDimitry Andric   // handle it as do not want to pessimize all atomics. Also they do not support
23567a6dacacSDimitry Andric   // the nontemporal attribute.
23577a6dacacSDimitry Andric   assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
23587a6dacacSDimitry Andric 
23597a6dacacSDimitry Andric   bool Changed = false;
23607a6dacacSDimitry Andric 
2361*5678d1d9SDimitry Andric   if (IsNonTemporal) {
2362*5678d1d9SDimitry Andric     // Set non-temporal hint for all cache levels.
2363*5678d1d9SDimitry Andric     Changed |= setTH(MI, AMDGPU::CPol::TH_NT);
2364*5678d1d9SDimitry Andric   }
2365*5678d1d9SDimitry Andric 
23667a6dacacSDimitry Andric   if (IsVolatile) {
23677a6dacacSDimitry Andric     Changed |= setScope(MI, AMDGPU::CPol::SCOPE_SYS);
23687a6dacacSDimitry Andric 
23697a6dacacSDimitry Andric     // Ensure operation has completed at system scope to cause all volatile
23707a6dacacSDimitry Andric     // operations to be visible outside the program in a global order. Do not
23717a6dacacSDimitry Andric     // request cross address space as only the global address space can be
23727a6dacacSDimitry Andric     // observable outside the program, so no need to cause a waitcnt for LDS
23737a6dacacSDimitry Andric     // address space operations.
23747a6dacacSDimitry Andric     Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
23757a6dacacSDimitry Andric                           Position::AFTER);
23767a6dacacSDimitry Andric   }
23777a6dacacSDimitry Andric 
23787a6dacacSDimitry Andric   return Changed;
23797a6dacacSDimitry Andric }
23807a6dacacSDimitry Andric 
removeAtomicPseudoMIs()23810b57cec5SDimitry Andric bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
23820b57cec5SDimitry Andric   if (AtomicPseudoMIs.empty())
23830b57cec5SDimitry Andric     return false;
23840b57cec5SDimitry Andric 
23850b57cec5SDimitry Andric   for (auto &MI : AtomicPseudoMIs)
23860b57cec5SDimitry Andric     MI->eraseFromParent();
23870b57cec5SDimitry Andric 
23880b57cec5SDimitry Andric   AtomicPseudoMIs.clear();
23890b57cec5SDimitry Andric   return true;
23900b57cec5SDimitry Andric }
23910b57cec5SDimitry Andric 
expandLoad(const SIMemOpInfo & MOI,MachineBasicBlock::iterator & MI)23920b57cec5SDimitry Andric bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI,
23930b57cec5SDimitry Andric                                    MachineBasicBlock::iterator &MI) {
23940b57cec5SDimitry Andric   assert(MI->mayLoad() && !MI->mayStore());
23950b57cec5SDimitry Andric 
23960b57cec5SDimitry Andric   bool Changed = false;
23970b57cec5SDimitry Andric 
23980b57cec5SDimitry Andric   if (MOI.isAtomic()) {
23990b57cec5SDimitry Andric     if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
24000b57cec5SDimitry Andric         MOI.getOrdering() == AtomicOrdering::Acquire ||
24010b57cec5SDimitry Andric         MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
24020b57cec5SDimitry Andric       Changed |= CC->enableLoadCacheBypass(MI, MOI.getScope(),
24030b57cec5SDimitry Andric                                            MOI.getOrderingAddrSpace());
24040b57cec5SDimitry Andric     }
24050b57cec5SDimitry Andric 
24060b57cec5SDimitry Andric     if (MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
24070b57cec5SDimitry Andric       Changed |= CC->insertWait(MI, MOI.getScope(),
24080b57cec5SDimitry Andric                                 MOI.getOrderingAddrSpace(),
24090b57cec5SDimitry Andric                                 SIMemOp::LOAD | SIMemOp::STORE,
24100b57cec5SDimitry Andric                                 MOI.getIsCrossAddressSpaceOrdering(),
24110b57cec5SDimitry Andric                                 Position::BEFORE);
24120b57cec5SDimitry Andric 
24130b57cec5SDimitry Andric     if (MOI.getOrdering() == AtomicOrdering::Acquire ||
24140b57cec5SDimitry Andric         MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
24150b57cec5SDimitry Andric       Changed |= CC->insertWait(MI, MOI.getScope(),
24160b57cec5SDimitry Andric                                 MOI.getInstrAddrSpace(),
24170b57cec5SDimitry Andric                                 SIMemOp::LOAD,
24180b57cec5SDimitry Andric                                 MOI.getIsCrossAddressSpaceOrdering(),
24190b57cec5SDimitry Andric                                 Position::AFTER);
2420e8d8bef9SDimitry Andric       Changed |= CC->insertAcquire(MI, MOI.getScope(),
24210b57cec5SDimitry Andric                                    MOI.getOrderingAddrSpace(),
24220b57cec5SDimitry Andric                                    Position::AFTER);
24230b57cec5SDimitry Andric     }
24240b57cec5SDimitry Andric 
24250b57cec5SDimitry Andric     return Changed;
24260b57cec5SDimitry Andric   }
24270b57cec5SDimitry Andric 
2428e8d8bef9SDimitry Andric   // Atomic instructions already bypass caches to the scope specified by the
2429e8d8bef9SDimitry Andric   // SyncScope operand. Only non-atomic volatile and nontemporal instructions
2430e8d8bef9SDimitry Andric   // need additional treatment.
2431e8d8bef9SDimitry Andric   Changed |= CC->enableVolatileAndOrNonTemporal(MI, MOI.getInstrAddrSpace(),
2432e8d8bef9SDimitry Andric                                                 SIMemOp::LOAD, MOI.isVolatile(),
2433e8d8bef9SDimitry Andric                                                 MOI.isNonTemporal());
24340b57cec5SDimitry Andric   return Changed;
24350b57cec5SDimitry Andric }
24360b57cec5SDimitry Andric 
expandStore(const SIMemOpInfo & MOI,MachineBasicBlock::iterator & MI)24370b57cec5SDimitry Andric bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI,
24380b57cec5SDimitry Andric                                     MachineBasicBlock::iterator &MI) {
24390b57cec5SDimitry Andric   assert(!MI->mayLoad() && MI->mayStore());
24400b57cec5SDimitry Andric 
24410b57cec5SDimitry Andric   bool Changed = false;
24420b57cec5SDimitry Andric 
24430b57cec5SDimitry Andric   if (MOI.isAtomic()) {
2444fe6060f1SDimitry Andric     if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
2445fe6060f1SDimitry Andric         MOI.getOrdering() == AtomicOrdering::Release ||
2446fe6060f1SDimitry Andric         MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2447fe6060f1SDimitry Andric       Changed |= CC->enableStoreCacheBypass(MI, MOI.getScope(),
2448fe6060f1SDimitry Andric                                             MOI.getOrderingAddrSpace());
2449fe6060f1SDimitry Andric     }
2450fe6060f1SDimitry Andric 
24510b57cec5SDimitry Andric     if (MOI.getOrdering() == AtomicOrdering::Release ||
24520b57cec5SDimitry Andric         MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2453e8d8bef9SDimitry Andric       Changed |= CC->insertRelease(MI, MOI.getScope(),
24540b57cec5SDimitry Andric                                    MOI.getOrderingAddrSpace(),
24550b57cec5SDimitry Andric                                    MOI.getIsCrossAddressSpaceOrdering(),
24560b57cec5SDimitry Andric                                    Position::BEFORE);
24570b57cec5SDimitry Andric 
24580b57cec5SDimitry Andric     return Changed;
24590b57cec5SDimitry Andric   }
24600b57cec5SDimitry Andric 
2461e8d8bef9SDimitry Andric   // Atomic instructions already bypass caches to the scope specified by the
2462e8d8bef9SDimitry Andric   // SyncScope operand. Only non-atomic volatile and nontemporal instructions
2463e8d8bef9SDimitry Andric   // need additional treatment.
2464e8d8bef9SDimitry Andric   Changed |= CC->enableVolatileAndOrNonTemporal(
2465e8d8bef9SDimitry Andric       MI, MOI.getInstrAddrSpace(), SIMemOp::STORE, MOI.isVolatile(),
2466e8d8bef9SDimitry Andric       MOI.isNonTemporal());
24670b57cec5SDimitry Andric   return Changed;
24680b57cec5SDimitry Andric }
24690b57cec5SDimitry Andric 
expandAtomicFence(const SIMemOpInfo & MOI,MachineBasicBlock::iterator & MI)24700b57cec5SDimitry Andric bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI,
24710b57cec5SDimitry Andric                                           MachineBasicBlock::iterator &MI) {
24720b57cec5SDimitry Andric   assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
24730b57cec5SDimitry Andric 
24740b57cec5SDimitry Andric   AtomicPseudoMIs.push_back(MI);
24750b57cec5SDimitry Andric   bool Changed = false;
24760b57cec5SDimitry Andric 
24770b57cec5SDimitry Andric   if (MOI.isAtomic()) {
247806c3fb27SDimitry Andric     if (MOI.getOrdering() == AtomicOrdering::Acquire)
247906c3fb27SDimitry Andric       Changed |= CC->insertWait(MI, MOI.getScope(), MOI.getOrderingAddrSpace(),
248006c3fb27SDimitry Andric                                 SIMemOp::LOAD | SIMemOp::STORE,
248106c3fb27SDimitry Andric                                 MOI.getIsCrossAddressSpaceOrdering(),
248206c3fb27SDimitry Andric                                 Position::BEFORE);
248306c3fb27SDimitry Andric 
248406c3fb27SDimitry Andric     if (MOI.getOrdering() == AtomicOrdering::Release ||
24850b57cec5SDimitry Andric         MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
24860b57cec5SDimitry Andric         MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
24870b57cec5SDimitry Andric       /// TODO: This relies on a barrier always generating a waitcnt
24880b57cec5SDimitry Andric       /// for LDS to ensure it is not reordered with the completion of
24890b57cec5SDimitry Andric       /// the proceeding LDS operations. If barrier had a memory
24900b57cec5SDimitry Andric       /// ordering and memory scope, then library does not need to
24910b57cec5SDimitry Andric       /// generate a fence. Could add support in this file for
24920b57cec5SDimitry Andric       /// barrier. SIInsertWaitcnt.cpp could then stop unconditionally
2493e8d8bef9SDimitry Andric       /// adding S_WAITCNT before a S_BARRIER.
2494e8d8bef9SDimitry Andric       Changed |= CC->insertRelease(MI, MOI.getScope(),
24950b57cec5SDimitry Andric                                    MOI.getOrderingAddrSpace(),
24960b57cec5SDimitry Andric                                    MOI.getIsCrossAddressSpaceOrdering(),
24970b57cec5SDimitry Andric                                    Position::BEFORE);
24980b57cec5SDimitry Andric 
2499e8d8bef9SDimitry Andric     // TODO: If both release and invalidate are happening they could be combined
2500fe6060f1SDimitry Andric     // to use the single "BUFFER_WBINV*" instruction. This could be done by
2501e8d8bef9SDimitry Andric     // reorganizing this code or as part of optimizing SIInsertWaitcnt pass to
2502e8d8bef9SDimitry Andric     // track cache invalidate and write back instructions.
2503e8d8bef9SDimitry Andric 
25040b57cec5SDimitry Andric     if (MOI.getOrdering() == AtomicOrdering::Acquire ||
25050b57cec5SDimitry Andric         MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
25060b57cec5SDimitry Andric         MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2507e8d8bef9SDimitry Andric       Changed |= CC->insertAcquire(MI, MOI.getScope(),
25080b57cec5SDimitry Andric                                    MOI.getOrderingAddrSpace(),
25090b57cec5SDimitry Andric                                    Position::BEFORE);
25100b57cec5SDimitry Andric 
25110b57cec5SDimitry Andric     return Changed;
25120b57cec5SDimitry Andric   }
25130b57cec5SDimitry Andric 
25140b57cec5SDimitry Andric   return Changed;
25150b57cec5SDimitry Andric }
25160b57cec5SDimitry Andric 
expandAtomicCmpxchgOrRmw(const SIMemOpInfo & MOI,MachineBasicBlock::iterator & MI)25170b57cec5SDimitry Andric bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
25180b57cec5SDimitry Andric   MachineBasicBlock::iterator &MI) {
25190b57cec5SDimitry Andric   assert(MI->mayLoad() && MI->mayStore());
25200b57cec5SDimitry Andric 
25210b57cec5SDimitry Andric   bool Changed = false;
25220b57cec5SDimitry Andric 
25230b57cec5SDimitry Andric   if (MOI.isAtomic()) {
2524fe6060f1SDimitry Andric     if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
2525fe6060f1SDimitry Andric         MOI.getOrdering() == AtomicOrdering::Acquire ||
2526fe6060f1SDimitry Andric         MOI.getOrdering() == AtomicOrdering::Release ||
2527fe6060f1SDimitry Andric         MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
2528fe6060f1SDimitry Andric         MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2529fe6060f1SDimitry Andric       Changed |= CC->enableRMWCacheBypass(MI, MOI.getScope(),
2530fe6060f1SDimitry Andric                                           MOI.getInstrAddrSpace());
2531fe6060f1SDimitry Andric     }
2532fe6060f1SDimitry Andric 
25330b57cec5SDimitry Andric     if (MOI.getOrdering() == AtomicOrdering::Release ||
25340b57cec5SDimitry Andric         MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
25350b57cec5SDimitry Andric         MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
25360b57cec5SDimitry Andric         MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
2537e8d8bef9SDimitry Andric       Changed |= CC->insertRelease(MI, MOI.getScope(),
25380b57cec5SDimitry Andric                                    MOI.getOrderingAddrSpace(),
25390b57cec5SDimitry Andric                                    MOI.getIsCrossAddressSpaceOrdering(),
25400b57cec5SDimitry Andric                                    Position::BEFORE);
25410b57cec5SDimitry Andric 
25420b57cec5SDimitry Andric     if (MOI.getOrdering() == AtomicOrdering::Acquire ||
25430b57cec5SDimitry Andric         MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
25440b57cec5SDimitry Andric         MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
25450b57cec5SDimitry Andric         MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
25460b57cec5SDimitry Andric         MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
25470b57cec5SDimitry Andric       Changed |= CC->insertWait(MI, MOI.getScope(),
2548fe6060f1SDimitry Andric                                 MOI.getInstrAddrSpace(),
25490b57cec5SDimitry Andric                                 isAtomicRet(*MI) ? SIMemOp::LOAD :
25500b57cec5SDimitry Andric                                                    SIMemOp::STORE,
25510b57cec5SDimitry Andric                                 MOI.getIsCrossAddressSpaceOrdering(),
25520b57cec5SDimitry Andric                                 Position::AFTER);
2553e8d8bef9SDimitry Andric       Changed |= CC->insertAcquire(MI, MOI.getScope(),
25540b57cec5SDimitry Andric                                    MOI.getOrderingAddrSpace(),
25550b57cec5SDimitry Andric                                    Position::AFTER);
25560b57cec5SDimitry Andric     }
25570b57cec5SDimitry Andric 
25580b57cec5SDimitry Andric     return Changed;
25590b57cec5SDimitry Andric   }
25600b57cec5SDimitry Andric 
25610b57cec5SDimitry Andric   return Changed;
25620b57cec5SDimitry Andric }
25630b57cec5SDimitry Andric 
runOnMachineFunction(MachineFunction & MF)25640b57cec5SDimitry Andric bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) {
25650b57cec5SDimitry Andric   bool Changed = false;
25660b57cec5SDimitry Andric 
25670b57cec5SDimitry Andric   SIMemOpAccess MOA(MF);
25680b57cec5SDimitry Andric   CC = SICacheControl::create(MF.getSubtarget<GCNSubtarget>());
25690b57cec5SDimitry Andric 
25700b57cec5SDimitry Andric   for (auto &MBB : MF) {
25710b57cec5SDimitry Andric     for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) {
25725ffd83dbSDimitry Andric 
2573e8d8bef9SDimitry Andric       // Unbundle instructions after the post-RA scheduler.
2574fe6060f1SDimitry Andric       if (MI->isBundle() && MI->mayLoadOrStore()) {
25755ffd83dbSDimitry Andric         MachineBasicBlock::instr_iterator II(MI->getIterator());
25765ffd83dbSDimitry Andric         for (MachineBasicBlock::instr_iterator I = ++II, E = MBB.instr_end();
25775ffd83dbSDimitry Andric              I != E && I->isBundledWithPred(); ++I) {
25785ffd83dbSDimitry Andric           I->unbundleFromPred();
25795ffd83dbSDimitry Andric           for (MachineOperand &MO : I->operands())
25805ffd83dbSDimitry Andric             if (MO.isReg())
25815ffd83dbSDimitry Andric               MO.setIsInternalRead(false);
25825ffd83dbSDimitry Andric         }
25835ffd83dbSDimitry Andric 
25845ffd83dbSDimitry Andric         MI->eraseFromParent();
25855ffd83dbSDimitry Andric         MI = II->getIterator();
25865ffd83dbSDimitry Andric       }
25875ffd83dbSDimitry Andric 
25880b57cec5SDimitry Andric       if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic))
25890b57cec5SDimitry Andric         continue;
25900b57cec5SDimitry Andric 
25910b57cec5SDimitry Andric       if (const auto &MOI = MOA.getLoadInfo(MI))
2592bdd1243dSDimitry Andric         Changed |= expandLoad(*MOI, MI);
259306c3fb27SDimitry Andric       else if (const auto &MOI = MOA.getStoreInfo(MI)) {
2594bdd1243dSDimitry Andric         Changed |= expandStore(*MOI, MI);
259506c3fb27SDimitry Andric         Changed |= CC->tryForceStoreSC0SC1(*MOI, MI);
259606c3fb27SDimitry Andric       } else if (const auto &MOI = MOA.getAtomicFenceInfo(MI))
2597bdd1243dSDimitry Andric         Changed |= expandAtomicFence(*MOI, MI);
25980b57cec5SDimitry Andric       else if (const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI))
2599bdd1243dSDimitry Andric         Changed |= expandAtomicCmpxchgOrRmw(*MOI, MI);
26000b57cec5SDimitry Andric     }
26010b57cec5SDimitry Andric   }
26020b57cec5SDimitry Andric 
26030b57cec5SDimitry Andric   Changed |= removeAtomicPseudoMIs();
26040b57cec5SDimitry Andric   return Changed;
26050b57cec5SDimitry Andric }
26060b57cec5SDimitry Andric 
26070b57cec5SDimitry Andric INITIALIZE_PASS(SIMemoryLegalizer, DEBUG_TYPE, PASS_NAME, false, false)
26080b57cec5SDimitry Andric 
26090b57cec5SDimitry Andric char SIMemoryLegalizer::ID = 0;
26100b57cec5SDimitry Andric char &llvm::SIMemoryLegalizerID = SIMemoryLegalizer::ID;
26110b57cec5SDimitry Andric 
createSIMemoryLegalizerPass()26120b57cec5SDimitry Andric FunctionPass *llvm::createSIMemoryLegalizerPass() {
26130b57cec5SDimitry Andric   return new SIMemoryLegalizer();
26140b57cec5SDimitry Andric }
2615