10b57cec5SDimitry Andric //===- SIMemoryLegalizer.cpp ----------------------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric /// Memory legalizer - implements memory model. More information can be
110b57cec5SDimitry Andric /// found here:
120b57cec5SDimitry Andric /// http://llvm.org/docs/AMDGPUUsage.html#memory-model
130b57cec5SDimitry Andric //
140b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
150b57cec5SDimitry Andric
160b57cec5SDimitry Andric #include "AMDGPU.h"
170b57cec5SDimitry Andric #include "AMDGPUMachineModuleInfo.h"
18e8d8bef9SDimitry Andric #include "GCNSubtarget.h"
190b57cec5SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
200b57cec5SDimitry Andric #include "llvm/ADT/BitmaskEnum.h"
210b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
2281ad6265SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
230b57cec5SDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
240b57cec5SDimitry Andric #include "llvm/Support/AtomicOrdering.h"
2506c3fb27SDimitry Andric #include "llvm/TargetParser/TargetParser.h"
260b57cec5SDimitry Andric
270b57cec5SDimitry Andric using namespace llvm;
280b57cec5SDimitry Andric using namespace llvm::AMDGPU;
290b57cec5SDimitry Andric
300b57cec5SDimitry Andric #define DEBUG_TYPE "si-memory-legalizer"
310b57cec5SDimitry Andric #define PASS_NAME "SI Memory Legalizer"
320b57cec5SDimitry Andric
33e8d8bef9SDimitry Andric static cl::opt<bool> AmdgcnSkipCacheInvalidations(
34e8d8bef9SDimitry Andric "amdgcn-skip-cache-invalidations", cl::init(false), cl::Hidden,
35e8d8bef9SDimitry Andric cl::desc("Use this to skip inserting cache invalidating instructions."));
36e8d8bef9SDimitry Andric
370b57cec5SDimitry Andric namespace {
380b57cec5SDimitry Andric
390b57cec5SDimitry Andric LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
400b57cec5SDimitry Andric
410b57cec5SDimitry Andric /// Memory operation flags. Can be ORed together.
420b57cec5SDimitry Andric enum class SIMemOp {
430b57cec5SDimitry Andric NONE = 0u,
440b57cec5SDimitry Andric LOAD = 1u << 0,
450b57cec5SDimitry Andric STORE = 1u << 1,
460b57cec5SDimitry Andric LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ STORE)
470b57cec5SDimitry Andric };
480b57cec5SDimitry Andric
490b57cec5SDimitry Andric /// Position to insert a new instruction relative to an existing
500b57cec5SDimitry Andric /// instruction.
510b57cec5SDimitry Andric enum class Position {
520b57cec5SDimitry Andric BEFORE,
530b57cec5SDimitry Andric AFTER
540b57cec5SDimitry Andric };
550b57cec5SDimitry Andric
560b57cec5SDimitry Andric /// The atomic synchronization scopes supported by the AMDGPU target.
570b57cec5SDimitry Andric enum class SIAtomicScope {
580b57cec5SDimitry Andric NONE,
590b57cec5SDimitry Andric SINGLETHREAD,
600b57cec5SDimitry Andric WAVEFRONT,
610b57cec5SDimitry Andric WORKGROUP,
620b57cec5SDimitry Andric AGENT,
630b57cec5SDimitry Andric SYSTEM
640b57cec5SDimitry Andric };
650b57cec5SDimitry Andric
660b57cec5SDimitry Andric /// The distinct address spaces supported by the AMDGPU target for
6781ad6265SDimitry Andric /// atomic memory operation. Can be ORed together.
680b57cec5SDimitry Andric enum class SIAtomicAddrSpace {
690b57cec5SDimitry Andric NONE = 0u,
700b57cec5SDimitry Andric GLOBAL = 1u << 0,
710b57cec5SDimitry Andric LDS = 1u << 1,
720b57cec5SDimitry Andric SCRATCH = 1u << 2,
730b57cec5SDimitry Andric GDS = 1u << 3,
740b57cec5SDimitry Andric OTHER = 1u << 4,
750b57cec5SDimitry Andric
760b57cec5SDimitry Andric /// The address spaces that can be accessed by a FLAT instruction.
770b57cec5SDimitry Andric FLAT = GLOBAL | LDS | SCRATCH,
780b57cec5SDimitry Andric
790b57cec5SDimitry Andric /// The address spaces that support atomic instructions.
800b57cec5SDimitry Andric ATOMIC = GLOBAL | LDS | SCRATCH | GDS,
810b57cec5SDimitry Andric
820b57cec5SDimitry Andric /// All address spaces.
830b57cec5SDimitry Andric ALL = GLOBAL | LDS | SCRATCH | GDS | OTHER,
840b57cec5SDimitry Andric
850b57cec5SDimitry Andric LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ ALL)
860b57cec5SDimitry Andric };
870b57cec5SDimitry Andric
880b57cec5SDimitry Andric class SIMemOpInfo final {
890b57cec5SDimitry Andric private:
900b57cec5SDimitry Andric
910b57cec5SDimitry Andric friend class SIMemOpAccess;
920b57cec5SDimitry Andric
930b57cec5SDimitry Andric AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
940b57cec5SDimitry Andric AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
950b57cec5SDimitry Andric SIAtomicScope Scope = SIAtomicScope::SYSTEM;
960b57cec5SDimitry Andric SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
970b57cec5SDimitry Andric SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
980b57cec5SDimitry Andric bool IsCrossAddressSpaceOrdering = false;
99e8d8bef9SDimitry Andric bool IsVolatile = false;
1000b57cec5SDimitry Andric bool IsNonTemporal = false;
1010b57cec5SDimitry Andric
SIMemOpInfo(AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent,SIAtomicScope Scope=SIAtomicScope::SYSTEM,SIAtomicAddrSpace OrderingAddrSpace=SIAtomicAddrSpace::ATOMIC,SIAtomicAddrSpace InstrAddrSpace=SIAtomicAddrSpace::ALL,bool IsCrossAddressSpaceOrdering=true,AtomicOrdering FailureOrdering=AtomicOrdering::SequentiallyConsistent,bool IsVolatile=false,bool IsNonTemporal=false)1020b57cec5SDimitry Andric SIMemOpInfo(AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent,
1030b57cec5SDimitry Andric SIAtomicScope Scope = SIAtomicScope::SYSTEM,
1040b57cec5SDimitry Andric SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC,
1050b57cec5SDimitry Andric SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::ALL,
1060b57cec5SDimitry Andric bool IsCrossAddressSpaceOrdering = true,
1070b57cec5SDimitry Andric AtomicOrdering FailureOrdering =
1080b57cec5SDimitry Andric AtomicOrdering::SequentiallyConsistent,
109e8d8bef9SDimitry Andric bool IsVolatile = false,
1100b57cec5SDimitry Andric bool IsNonTemporal = false)
1110b57cec5SDimitry Andric : Ordering(Ordering), FailureOrdering(FailureOrdering),
1120b57cec5SDimitry Andric Scope(Scope), OrderingAddrSpace(OrderingAddrSpace),
1130b57cec5SDimitry Andric InstrAddrSpace(InstrAddrSpace),
1140b57cec5SDimitry Andric IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
115e8d8bef9SDimitry Andric IsVolatile(IsVolatile),
1160b57cec5SDimitry Andric IsNonTemporal(IsNonTemporal) {
117fe6060f1SDimitry Andric
118fe6060f1SDimitry Andric if (Ordering == AtomicOrdering::NotAtomic) {
119fe6060f1SDimitry Andric assert(Scope == SIAtomicScope::NONE &&
120fe6060f1SDimitry Andric OrderingAddrSpace == SIAtomicAddrSpace::NONE &&
121fe6060f1SDimitry Andric !IsCrossAddressSpaceOrdering &&
122fe6060f1SDimitry Andric FailureOrdering == AtomicOrdering::NotAtomic);
123fe6060f1SDimitry Andric return;
124fe6060f1SDimitry Andric }
125fe6060f1SDimitry Andric
126fe6060f1SDimitry Andric assert(Scope != SIAtomicScope::NONE &&
127fe6060f1SDimitry Andric (OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
128fe6060f1SDimitry Andric SIAtomicAddrSpace::NONE &&
129fe6060f1SDimitry Andric (InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) !=
130349cc55cSDimitry Andric SIAtomicAddrSpace::NONE);
131fe6060f1SDimitry Andric
1320b57cec5SDimitry Andric // There is also no cross address space ordering if the ordering
1330b57cec5SDimitry Andric // address space is the same as the instruction address space and
1340b57cec5SDimitry Andric // only contains a single address space.
1350b57cec5SDimitry Andric if ((OrderingAddrSpace == InstrAddrSpace) &&
1360b57cec5SDimitry Andric isPowerOf2_32(uint32_t(InstrAddrSpace)))
1370b57cec5SDimitry Andric this->IsCrossAddressSpaceOrdering = false;
138fe6060f1SDimitry Andric
139fe6060f1SDimitry Andric // Limit the scope to the maximum supported by the instruction's address
140fe6060f1SDimitry Andric // spaces.
141fe6060f1SDimitry Andric if ((InstrAddrSpace & ~SIAtomicAddrSpace::SCRATCH) ==
142fe6060f1SDimitry Andric SIAtomicAddrSpace::NONE) {
143fe6060f1SDimitry Andric this->Scope = std::min(Scope, SIAtomicScope::SINGLETHREAD);
144fe6060f1SDimitry Andric } else if ((InstrAddrSpace &
145fe6060f1SDimitry Andric ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS)) ==
146fe6060f1SDimitry Andric SIAtomicAddrSpace::NONE) {
147fe6060f1SDimitry Andric this->Scope = std::min(Scope, SIAtomicScope::WORKGROUP);
148fe6060f1SDimitry Andric } else if ((InstrAddrSpace &
149fe6060f1SDimitry Andric ~(SIAtomicAddrSpace::SCRATCH | SIAtomicAddrSpace::LDS |
150fe6060f1SDimitry Andric SIAtomicAddrSpace::GDS)) == SIAtomicAddrSpace::NONE) {
151fe6060f1SDimitry Andric this->Scope = std::min(Scope, SIAtomicScope::AGENT);
152fe6060f1SDimitry Andric }
1530b57cec5SDimitry Andric }
1540b57cec5SDimitry Andric
1550b57cec5SDimitry Andric public:
1560b57cec5SDimitry Andric /// \returns Atomic synchronization scope of the machine instruction used to
1570b57cec5SDimitry Andric /// create this SIMemOpInfo.
getScope() const1580b57cec5SDimitry Andric SIAtomicScope getScope() const {
1590b57cec5SDimitry Andric return Scope;
1600b57cec5SDimitry Andric }
1610b57cec5SDimitry Andric
1620b57cec5SDimitry Andric /// \returns Ordering constraint of the machine instruction used to
1630b57cec5SDimitry Andric /// create this SIMemOpInfo.
getOrdering() const1640b57cec5SDimitry Andric AtomicOrdering getOrdering() const {
1650b57cec5SDimitry Andric return Ordering;
1660b57cec5SDimitry Andric }
1670b57cec5SDimitry Andric
1680b57cec5SDimitry Andric /// \returns Failure ordering constraint of the machine instruction used to
1690b57cec5SDimitry Andric /// create this SIMemOpInfo.
getFailureOrdering() const1700b57cec5SDimitry Andric AtomicOrdering getFailureOrdering() const {
1710b57cec5SDimitry Andric return FailureOrdering;
1720b57cec5SDimitry Andric }
1730b57cec5SDimitry Andric
1740b57cec5SDimitry Andric /// \returns The address spaces be accessed by the machine
175bdd1243dSDimitry Andric /// instruction used to create this SIMemOpInfo.
getInstrAddrSpace() const1760b57cec5SDimitry Andric SIAtomicAddrSpace getInstrAddrSpace() const {
1770b57cec5SDimitry Andric return InstrAddrSpace;
1780b57cec5SDimitry Andric }
1790b57cec5SDimitry Andric
1800b57cec5SDimitry Andric /// \returns The address spaces that must be ordered by the machine
181bdd1243dSDimitry Andric /// instruction used to create this SIMemOpInfo.
getOrderingAddrSpace() const1820b57cec5SDimitry Andric SIAtomicAddrSpace getOrderingAddrSpace() const {
1830b57cec5SDimitry Andric return OrderingAddrSpace;
1840b57cec5SDimitry Andric }
1850b57cec5SDimitry Andric
1860b57cec5SDimitry Andric /// \returns Return true iff memory ordering of operations on
1870b57cec5SDimitry Andric /// different address spaces is required.
getIsCrossAddressSpaceOrdering() const1880b57cec5SDimitry Andric bool getIsCrossAddressSpaceOrdering() const {
1890b57cec5SDimitry Andric return IsCrossAddressSpaceOrdering;
1900b57cec5SDimitry Andric }
1910b57cec5SDimitry Andric
1920b57cec5SDimitry Andric /// \returns True if memory access of the machine instruction used to
193e8d8bef9SDimitry Andric /// create this SIMemOpInfo is volatile, false otherwise.
isVolatile() const194e8d8bef9SDimitry Andric bool isVolatile() const {
195e8d8bef9SDimitry Andric return IsVolatile;
196e8d8bef9SDimitry Andric }
197e8d8bef9SDimitry Andric
198e8d8bef9SDimitry Andric /// \returns True if memory access of the machine instruction used to
199e8d8bef9SDimitry Andric /// create this SIMemOpInfo is nontemporal, false otherwise.
isNonTemporal() const2000b57cec5SDimitry Andric bool isNonTemporal() const {
2010b57cec5SDimitry Andric return IsNonTemporal;
2020b57cec5SDimitry Andric }
2030b57cec5SDimitry Andric
2040b57cec5SDimitry Andric /// \returns True if ordering constraint of the machine instruction used to
2050b57cec5SDimitry Andric /// create this SIMemOpInfo is unordered or higher, false otherwise.
isAtomic() const2060b57cec5SDimitry Andric bool isAtomic() const {
2070b57cec5SDimitry Andric return Ordering != AtomicOrdering::NotAtomic;
2080b57cec5SDimitry Andric }
2090b57cec5SDimitry Andric
2100b57cec5SDimitry Andric };
2110b57cec5SDimitry Andric
2120b57cec5SDimitry Andric class SIMemOpAccess final {
2130b57cec5SDimitry Andric private:
2140b57cec5SDimitry Andric AMDGPUMachineModuleInfo *MMI = nullptr;
2150b57cec5SDimitry Andric
2160b57cec5SDimitry Andric /// Reports unsupported message \p Msg for \p MI to LLVM context.
2170b57cec5SDimitry Andric void reportUnsupported(const MachineBasicBlock::iterator &MI,
2180b57cec5SDimitry Andric const char *Msg) const;
2190b57cec5SDimitry Andric
220fe6060f1SDimitry Andric /// Inspects the target synchronization scope \p SSID and determines
2210b57cec5SDimitry Andric /// the SI atomic scope it corresponds to, the address spaces it
2220b57cec5SDimitry Andric /// covers, and whether the memory ordering applies between address
2230b57cec5SDimitry Andric /// spaces.
224bdd1243dSDimitry Andric std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
225fe6060f1SDimitry Andric toSIAtomicScope(SyncScope::ID SSID, SIAtomicAddrSpace InstrAddrSpace) const;
2260b57cec5SDimitry Andric
2270b57cec5SDimitry Andric /// \return Return a bit set of the address spaces accessed by \p AS.
2280b57cec5SDimitry Andric SIAtomicAddrSpace toSIAtomicAddrSpace(unsigned AS) const;
2290b57cec5SDimitry Andric
2300b57cec5SDimitry Andric /// \returns Info constructed from \p MI, which has at least machine memory
2310b57cec5SDimitry Andric /// operand.
232bdd1243dSDimitry Andric std::optional<SIMemOpInfo>
233bdd1243dSDimitry Andric constructFromMIWithMMO(const MachineBasicBlock::iterator &MI) const;
2340b57cec5SDimitry Andric
2350b57cec5SDimitry Andric public:
2360b57cec5SDimitry Andric /// Construct class to support accessing the machine memory operands
2370b57cec5SDimitry Andric /// of instructions in the machine function \p MF.
2380b57cec5SDimitry Andric SIMemOpAccess(MachineFunction &MF);
2390b57cec5SDimitry Andric
240bdd1243dSDimitry Andric /// \returns Load info if \p MI is a load operation, "std::nullopt" otherwise.
241bdd1243dSDimitry Andric std::optional<SIMemOpInfo>
242bdd1243dSDimitry Andric getLoadInfo(const MachineBasicBlock::iterator &MI) const;
2430b57cec5SDimitry Andric
244bdd1243dSDimitry Andric /// \returns Store info if \p MI is a store operation, "std::nullopt"
245bdd1243dSDimitry Andric /// otherwise.
246bdd1243dSDimitry Andric std::optional<SIMemOpInfo>
247bdd1243dSDimitry Andric getStoreInfo(const MachineBasicBlock::iterator &MI) const;
2480b57cec5SDimitry Andric
2490b57cec5SDimitry Andric /// \returns Atomic fence info if \p MI is an atomic fence operation,
250bdd1243dSDimitry Andric /// "std::nullopt" otherwise.
251bdd1243dSDimitry Andric std::optional<SIMemOpInfo>
252bdd1243dSDimitry Andric getAtomicFenceInfo(const MachineBasicBlock::iterator &MI) const;
2530b57cec5SDimitry Andric
2540b57cec5SDimitry Andric /// \returns Atomic cmpxchg/rmw info if \p MI is an atomic cmpxchg or
255bdd1243dSDimitry Andric /// rmw operation, "std::nullopt" otherwise.
256bdd1243dSDimitry Andric std::optional<SIMemOpInfo>
257bdd1243dSDimitry Andric getAtomicCmpxchgOrRmwInfo(const MachineBasicBlock::iterator &MI) const;
2580b57cec5SDimitry Andric };
2590b57cec5SDimitry Andric
2600b57cec5SDimitry Andric class SICacheControl {
2610b57cec5SDimitry Andric protected:
2620b57cec5SDimitry Andric
263e8d8bef9SDimitry Andric /// AMDGPU subtarget info.
264e8d8bef9SDimitry Andric const GCNSubtarget &ST;
265e8d8bef9SDimitry Andric
2660b57cec5SDimitry Andric /// Instruction info.
2670b57cec5SDimitry Andric const SIInstrInfo *TII = nullptr;
2680b57cec5SDimitry Andric
2690b57cec5SDimitry Andric IsaVersion IV;
2700b57cec5SDimitry Andric
271e8d8bef9SDimitry Andric /// Whether to insert cache invalidating instructions.
2725ffd83dbSDimitry Andric bool InsertCacheInv;
2735ffd83dbSDimitry Andric
2740b57cec5SDimitry Andric SICacheControl(const GCNSubtarget &ST);
2750b57cec5SDimitry Andric
276fe6060f1SDimitry Andric /// Sets named bit \p BitName to "true" if present in instruction \p MI.
277fe6060f1SDimitry Andric /// \returns Returns true if \p MI is modified, false otherwise.
278fe6060f1SDimitry Andric bool enableNamedBit(const MachineBasicBlock::iterator MI,
279fe6060f1SDimitry Andric AMDGPU::CPol::CPol Bit) const;
280fe6060f1SDimitry Andric
2810b57cec5SDimitry Andric public:
2820b57cec5SDimitry Andric
2830b57cec5SDimitry Andric /// Create a cache control for the subtarget \p ST.
2840b57cec5SDimitry Andric static std::unique_ptr<SICacheControl> create(const GCNSubtarget &ST);
2850b57cec5SDimitry Andric
2860b57cec5SDimitry Andric /// Update \p MI memory load instruction to bypass any caches up to
2870b57cec5SDimitry Andric /// the \p Scope memory scope for address spaces \p
2880b57cec5SDimitry Andric /// AddrSpace. Return true iff the instruction was modified.
2890b57cec5SDimitry Andric virtual bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
2900b57cec5SDimitry Andric SIAtomicScope Scope,
2910b57cec5SDimitry Andric SIAtomicAddrSpace AddrSpace) const = 0;
2920b57cec5SDimitry Andric
293fe6060f1SDimitry Andric /// Update \p MI memory store instruction to bypass any caches up to
294fe6060f1SDimitry Andric /// the \p Scope memory scope for address spaces \p
295fe6060f1SDimitry Andric /// AddrSpace. Return true iff the instruction was modified.
296fe6060f1SDimitry Andric virtual bool enableStoreCacheBypass(const MachineBasicBlock::iterator &MI,
297fe6060f1SDimitry Andric SIAtomicScope Scope,
298fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace) const = 0;
299fe6060f1SDimitry Andric
300fe6060f1SDimitry Andric /// Update \p MI memory read-modify-write instruction to bypass any caches up
301fe6060f1SDimitry Andric /// to the \p Scope memory scope for address spaces \p AddrSpace. Return true
302fe6060f1SDimitry Andric /// iff the instruction was modified.
303fe6060f1SDimitry Andric virtual bool enableRMWCacheBypass(const MachineBasicBlock::iterator &MI,
304fe6060f1SDimitry Andric SIAtomicScope Scope,
305fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace) const = 0;
306fe6060f1SDimitry Andric
307e8d8bef9SDimitry Andric /// Update \p MI memory instruction of kind \p Op associated with address
308e8d8bef9SDimitry Andric /// spaces \p AddrSpace to indicate it is volatile and/or nontemporal. Return
309e8d8bef9SDimitry Andric /// true iff the instruction was modified.
310e8d8bef9SDimitry Andric virtual bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
3110b57cec5SDimitry Andric SIAtomicAddrSpace AddrSpace,
312e8d8bef9SDimitry Andric SIMemOp Op, bool IsVolatile,
313e8d8bef9SDimitry Andric bool IsNonTemporal) const = 0;
3140b57cec5SDimitry Andric
3150b57cec5SDimitry Andric /// Inserts any necessary instructions at position \p Pos relative
316e8d8bef9SDimitry Andric /// to instruction \p MI to ensure memory instructions before \p Pos of kind
317e8d8bef9SDimitry Andric /// \p Op associated with address spaces \p AddrSpace have completed. Used
318e8d8bef9SDimitry Andric /// between memory instructions to enforce the order they become visible as
319e8d8bef9SDimitry Andric /// observed by other memory instructions executing in memory scope \p Scope.
320e8d8bef9SDimitry Andric /// \p IsCrossAddrSpaceOrdering indicates if the memory ordering is between
321e8d8bef9SDimitry Andric /// address spaces. Returns true iff any instructions inserted.
3220b57cec5SDimitry Andric virtual bool insertWait(MachineBasicBlock::iterator &MI,
3230b57cec5SDimitry Andric SIAtomicScope Scope,
3240b57cec5SDimitry Andric SIAtomicAddrSpace AddrSpace,
3250b57cec5SDimitry Andric SIMemOp Op,
3260b57cec5SDimitry Andric bool IsCrossAddrSpaceOrdering,
3270b57cec5SDimitry Andric Position Pos) const = 0;
3280b57cec5SDimitry Andric
329e8d8bef9SDimitry Andric /// Inserts any necessary instructions at position \p Pos relative to
330e8d8bef9SDimitry Andric /// instruction \p MI to ensure any subsequent memory instructions of this
331e8d8bef9SDimitry Andric /// thread with address spaces \p AddrSpace will observe the previous memory
332e8d8bef9SDimitry Andric /// operations by any thread for memory scopes up to memory scope \p Scope .
333e8d8bef9SDimitry Andric /// Returns true iff any instructions inserted.
334e8d8bef9SDimitry Andric virtual bool insertAcquire(MachineBasicBlock::iterator &MI,
335e8d8bef9SDimitry Andric SIAtomicScope Scope,
336e8d8bef9SDimitry Andric SIAtomicAddrSpace AddrSpace,
337e8d8bef9SDimitry Andric Position Pos) const = 0;
338e8d8bef9SDimitry Andric
339e8d8bef9SDimitry Andric /// Inserts any necessary instructions at position \p Pos relative to
340e8d8bef9SDimitry Andric /// instruction \p MI to ensure previous memory instructions by this thread
341e8d8bef9SDimitry Andric /// with address spaces \p AddrSpace have completed and can be observed by
342e8d8bef9SDimitry Andric /// subsequent memory instructions by any thread executing in memory scope \p
343e8d8bef9SDimitry Andric /// Scope. \p IsCrossAddrSpaceOrdering indicates if the memory ordering is
344e8d8bef9SDimitry Andric /// between address spaces. Returns true iff any instructions inserted.
345e8d8bef9SDimitry Andric virtual bool insertRelease(MachineBasicBlock::iterator &MI,
346e8d8bef9SDimitry Andric SIAtomicScope Scope,
347e8d8bef9SDimitry Andric SIAtomicAddrSpace AddrSpace,
348e8d8bef9SDimitry Andric bool IsCrossAddrSpaceOrdering,
349e8d8bef9SDimitry Andric Position Pos) const = 0;
350e8d8bef9SDimitry Andric
3510b57cec5SDimitry Andric /// Virtual destructor to allow derivations to be deleted.
3520b57cec5SDimitry Andric virtual ~SICacheControl() = default;
3530b57cec5SDimitry Andric
tryForceStoreSC0SC1(const SIMemOpInfo & MOI,MachineBasicBlock::iterator & MI) const35406c3fb27SDimitry Andric virtual bool tryForceStoreSC0SC1(const SIMemOpInfo &MOI,
35506c3fb27SDimitry Andric MachineBasicBlock::iterator &MI) const {
35606c3fb27SDimitry Andric return false;
35706c3fb27SDimitry Andric }
3580b57cec5SDimitry Andric };
3590b57cec5SDimitry Andric
3600b57cec5SDimitry Andric class SIGfx6CacheControl : public SICacheControl {
3610b57cec5SDimitry Andric protected:
3620b57cec5SDimitry Andric
3630b57cec5SDimitry Andric /// Sets GLC bit to "true" if present in \p MI. Returns true if \p MI
3640b57cec5SDimitry Andric /// is modified, false otherwise.
enableGLCBit(const MachineBasicBlock::iterator & MI) const3650b57cec5SDimitry Andric bool enableGLCBit(const MachineBasicBlock::iterator &MI) const {
366fe6060f1SDimitry Andric return enableNamedBit(MI, AMDGPU::CPol::GLC);
3670b57cec5SDimitry Andric }
3680b57cec5SDimitry Andric
3690b57cec5SDimitry Andric /// Sets SLC bit to "true" if present in \p MI. Returns true if \p MI
3700b57cec5SDimitry Andric /// is modified, false otherwise.
enableSLCBit(const MachineBasicBlock::iterator & MI) const3710b57cec5SDimitry Andric bool enableSLCBit(const MachineBasicBlock::iterator &MI) const {
372fe6060f1SDimitry Andric return enableNamedBit(MI, AMDGPU::CPol::SLC);
3730b57cec5SDimitry Andric }
3740b57cec5SDimitry Andric
3750b57cec5SDimitry Andric public:
3760b57cec5SDimitry Andric
SIGfx6CacheControl(const GCNSubtarget & ST)377349cc55cSDimitry Andric SIGfx6CacheControl(const GCNSubtarget &ST) : SICacheControl(ST) {}
3780b57cec5SDimitry Andric
3790b57cec5SDimitry Andric bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
3800b57cec5SDimitry Andric SIAtomicScope Scope,
3810b57cec5SDimitry Andric SIAtomicAddrSpace AddrSpace) const override;
3820b57cec5SDimitry Andric
383fe6060f1SDimitry Andric bool enableStoreCacheBypass(const MachineBasicBlock::iterator &MI,
384fe6060f1SDimitry Andric SIAtomicScope Scope,
385fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace) const override;
386fe6060f1SDimitry Andric
387fe6060f1SDimitry Andric bool enableRMWCacheBypass(const MachineBasicBlock::iterator &MI,
388fe6060f1SDimitry Andric SIAtomicScope Scope,
389fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace) const override;
390fe6060f1SDimitry Andric
391e8d8bef9SDimitry Andric bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
392e8d8bef9SDimitry Andric SIAtomicAddrSpace AddrSpace, SIMemOp Op,
393e8d8bef9SDimitry Andric bool IsVolatile,
394e8d8bef9SDimitry Andric bool IsNonTemporal) const override;
3950b57cec5SDimitry Andric
3960b57cec5SDimitry Andric bool insertWait(MachineBasicBlock::iterator &MI,
3970b57cec5SDimitry Andric SIAtomicScope Scope,
3980b57cec5SDimitry Andric SIAtomicAddrSpace AddrSpace,
3990b57cec5SDimitry Andric SIMemOp Op,
4000b57cec5SDimitry Andric bool IsCrossAddrSpaceOrdering,
4010b57cec5SDimitry Andric Position Pos) const override;
402e8d8bef9SDimitry Andric
403e8d8bef9SDimitry Andric bool insertAcquire(MachineBasicBlock::iterator &MI,
404e8d8bef9SDimitry Andric SIAtomicScope Scope,
405e8d8bef9SDimitry Andric SIAtomicAddrSpace AddrSpace,
406e8d8bef9SDimitry Andric Position Pos) const override;
407e8d8bef9SDimitry Andric
408e8d8bef9SDimitry Andric bool insertRelease(MachineBasicBlock::iterator &MI,
409e8d8bef9SDimitry Andric SIAtomicScope Scope,
410e8d8bef9SDimitry Andric SIAtomicAddrSpace AddrSpace,
411e8d8bef9SDimitry Andric bool IsCrossAddrSpaceOrdering,
412e8d8bef9SDimitry Andric Position Pos) const override;
4130b57cec5SDimitry Andric };
4140b57cec5SDimitry Andric
4150b57cec5SDimitry Andric class SIGfx7CacheControl : public SIGfx6CacheControl {
4160b57cec5SDimitry Andric public:
4170b57cec5SDimitry Andric
SIGfx7CacheControl(const GCNSubtarget & ST)418349cc55cSDimitry Andric SIGfx7CacheControl(const GCNSubtarget &ST) : SIGfx6CacheControl(ST) {}
4190b57cec5SDimitry Andric
420e8d8bef9SDimitry Andric bool insertAcquire(MachineBasicBlock::iterator &MI,
4210b57cec5SDimitry Andric SIAtomicScope Scope,
4220b57cec5SDimitry Andric SIAtomicAddrSpace AddrSpace,
4230b57cec5SDimitry Andric Position Pos) const override;
4240b57cec5SDimitry Andric
4250b57cec5SDimitry Andric };
4260b57cec5SDimitry Andric
427fe6060f1SDimitry Andric class SIGfx90ACacheControl : public SIGfx7CacheControl {
428fe6060f1SDimitry Andric public:
429fe6060f1SDimitry Andric
SIGfx90ACacheControl(const GCNSubtarget & ST)430349cc55cSDimitry Andric SIGfx90ACacheControl(const GCNSubtarget &ST) : SIGfx7CacheControl(ST) {}
431fe6060f1SDimitry Andric
432fe6060f1SDimitry Andric bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
433fe6060f1SDimitry Andric SIAtomicScope Scope,
434fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace) const override;
435fe6060f1SDimitry Andric
436fe6060f1SDimitry Andric bool enableStoreCacheBypass(const MachineBasicBlock::iterator &MI,
437fe6060f1SDimitry Andric SIAtomicScope Scope,
438fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace) const override;
439fe6060f1SDimitry Andric
440fe6060f1SDimitry Andric bool enableRMWCacheBypass(const MachineBasicBlock::iterator &MI,
441fe6060f1SDimitry Andric SIAtomicScope Scope,
442fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace) const override;
443fe6060f1SDimitry Andric
444fe6060f1SDimitry Andric bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
445fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace, SIMemOp Op,
446fe6060f1SDimitry Andric bool IsVolatile,
447fe6060f1SDimitry Andric bool IsNonTemporal) const override;
448fe6060f1SDimitry Andric
449fe6060f1SDimitry Andric bool insertWait(MachineBasicBlock::iterator &MI,
450fe6060f1SDimitry Andric SIAtomicScope Scope,
451fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace,
452fe6060f1SDimitry Andric SIMemOp Op,
453fe6060f1SDimitry Andric bool IsCrossAddrSpaceOrdering,
454fe6060f1SDimitry Andric Position Pos) const override;
455fe6060f1SDimitry Andric
456fe6060f1SDimitry Andric bool insertAcquire(MachineBasicBlock::iterator &MI,
457fe6060f1SDimitry Andric SIAtomicScope Scope,
458fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace,
459fe6060f1SDimitry Andric Position Pos) const override;
460fe6060f1SDimitry Andric
461fe6060f1SDimitry Andric bool insertRelease(MachineBasicBlock::iterator &MI,
462fe6060f1SDimitry Andric SIAtomicScope Scope,
463fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace,
464fe6060f1SDimitry Andric bool IsCrossAddrSpaceOrdering,
465fe6060f1SDimitry Andric Position Pos) const override;
466fe6060f1SDimitry Andric };
467fe6060f1SDimitry Andric
46881ad6265SDimitry Andric class SIGfx940CacheControl : public SIGfx90ACacheControl {
46981ad6265SDimitry Andric protected:
47081ad6265SDimitry Andric
47181ad6265SDimitry Andric /// Sets SC0 bit to "true" if present in \p MI. Returns true if \p MI
47281ad6265SDimitry Andric /// is modified, false otherwise.
enableSC0Bit(const MachineBasicBlock::iterator & MI) const47381ad6265SDimitry Andric bool enableSC0Bit(const MachineBasicBlock::iterator &MI) const {
47481ad6265SDimitry Andric return enableNamedBit(MI, AMDGPU::CPol::SC0);
47581ad6265SDimitry Andric }
47681ad6265SDimitry Andric
47781ad6265SDimitry Andric /// Sets SC1 bit to "true" if present in \p MI. Returns true if \p MI
47881ad6265SDimitry Andric /// is modified, false otherwise.
enableSC1Bit(const MachineBasicBlock::iterator & MI) const47981ad6265SDimitry Andric bool enableSC1Bit(const MachineBasicBlock::iterator &MI) const {
48081ad6265SDimitry Andric return enableNamedBit(MI, AMDGPU::CPol::SC1);
48181ad6265SDimitry Andric }
48281ad6265SDimitry Andric
48381ad6265SDimitry Andric /// Sets NT bit to "true" if present in \p MI. Returns true if \p MI
48481ad6265SDimitry Andric /// is modified, false otherwise.
enableNTBit(const MachineBasicBlock::iterator & MI) const48581ad6265SDimitry Andric bool enableNTBit(const MachineBasicBlock::iterator &MI) const {
48681ad6265SDimitry Andric return enableNamedBit(MI, AMDGPU::CPol::NT);
48781ad6265SDimitry Andric }
48881ad6265SDimitry Andric
48981ad6265SDimitry Andric public:
49081ad6265SDimitry Andric
SIGfx940CacheControl(const GCNSubtarget & ST)49181ad6265SDimitry Andric SIGfx940CacheControl(const GCNSubtarget &ST) : SIGfx90ACacheControl(ST) {};
49281ad6265SDimitry Andric
49381ad6265SDimitry Andric bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
49481ad6265SDimitry Andric SIAtomicScope Scope,
49581ad6265SDimitry Andric SIAtomicAddrSpace AddrSpace) const override;
49681ad6265SDimitry Andric
49781ad6265SDimitry Andric bool enableStoreCacheBypass(const MachineBasicBlock::iterator &MI,
49881ad6265SDimitry Andric SIAtomicScope Scope,
49981ad6265SDimitry Andric SIAtomicAddrSpace AddrSpace) const override;
50081ad6265SDimitry Andric
50181ad6265SDimitry Andric bool enableRMWCacheBypass(const MachineBasicBlock::iterator &MI,
50281ad6265SDimitry Andric SIAtomicScope Scope,
50381ad6265SDimitry Andric SIAtomicAddrSpace AddrSpace) const override;
50481ad6265SDimitry Andric
50581ad6265SDimitry Andric bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
50681ad6265SDimitry Andric SIAtomicAddrSpace AddrSpace, SIMemOp Op,
50781ad6265SDimitry Andric bool IsVolatile,
50881ad6265SDimitry Andric bool IsNonTemporal) const override;
50981ad6265SDimitry Andric
51081ad6265SDimitry Andric bool insertAcquire(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
51181ad6265SDimitry Andric SIAtomicAddrSpace AddrSpace, Position Pos) const override;
51281ad6265SDimitry Andric
51381ad6265SDimitry Andric bool insertRelease(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
51481ad6265SDimitry Andric SIAtomicAddrSpace AddrSpace, bool IsCrossAddrSpaceOrdering,
51581ad6265SDimitry Andric Position Pos) const override;
51606c3fb27SDimitry Andric
tryForceStoreSC0SC1(const SIMemOpInfo & MOI,MachineBasicBlock::iterator & MI) const51706c3fb27SDimitry Andric bool tryForceStoreSC0SC1(const SIMemOpInfo &MOI,
51806c3fb27SDimitry Andric MachineBasicBlock::iterator &MI) const override {
51906c3fb27SDimitry Andric bool Changed = false;
52006c3fb27SDimitry Andric if (ST.hasForceStoreSC0SC1() &&
52106c3fb27SDimitry Andric (MOI.getInstrAddrSpace() & (SIAtomicAddrSpace::SCRATCH |
52206c3fb27SDimitry Andric SIAtomicAddrSpace::GLOBAL |
52306c3fb27SDimitry Andric SIAtomicAddrSpace::OTHER)) !=
52406c3fb27SDimitry Andric SIAtomicAddrSpace::NONE) {
52506c3fb27SDimitry Andric Changed |= enableSC0Bit(MI);
52606c3fb27SDimitry Andric Changed |= enableSC1Bit(MI);
52706c3fb27SDimitry Andric }
52806c3fb27SDimitry Andric return Changed;
52906c3fb27SDimitry Andric }
53081ad6265SDimitry Andric };
53181ad6265SDimitry Andric
5320b57cec5SDimitry Andric class SIGfx10CacheControl : public SIGfx7CacheControl {
5330b57cec5SDimitry Andric protected:
5340b57cec5SDimitry Andric
5350b57cec5SDimitry Andric /// Sets DLC bit to "true" if present in \p MI. Returns true if \p MI
5360b57cec5SDimitry Andric /// is modified, false otherwise.
enableDLCBit(const MachineBasicBlock::iterator & MI) const5370b57cec5SDimitry Andric bool enableDLCBit(const MachineBasicBlock::iterator &MI) const {
538fe6060f1SDimitry Andric return enableNamedBit(MI, AMDGPU::CPol::DLC);
5390b57cec5SDimitry Andric }
5400b57cec5SDimitry Andric
5410b57cec5SDimitry Andric public:
5420b57cec5SDimitry Andric
SIGfx10CacheControl(const GCNSubtarget & ST)543349cc55cSDimitry Andric SIGfx10CacheControl(const GCNSubtarget &ST) : SIGfx7CacheControl(ST) {}
5440b57cec5SDimitry Andric
5450b57cec5SDimitry Andric bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
5460b57cec5SDimitry Andric SIAtomicScope Scope,
5470b57cec5SDimitry Andric SIAtomicAddrSpace AddrSpace) const override;
5480b57cec5SDimitry Andric
549e8d8bef9SDimitry Andric bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
550e8d8bef9SDimitry Andric SIAtomicAddrSpace AddrSpace, SIMemOp Op,
551e8d8bef9SDimitry Andric bool IsVolatile,
552e8d8bef9SDimitry Andric bool IsNonTemporal) const override;
5530b57cec5SDimitry Andric
5540b57cec5SDimitry Andric bool insertWait(MachineBasicBlock::iterator &MI,
5550b57cec5SDimitry Andric SIAtomicScope Scope,
5560b57cec5SDimitry Andric SIAtomicAddrSpace AddrSpace,
5570b57cec5SDimitry Andric SIMemOp Op,
5580b57cec5SDimitry Andric bool IsCrossAddrSpaceOrdering,
5590b57cec5SDimitry Andric Position Pos) const override;
560e8d8bef9SDimitry Andric
561e8d8bef9SDimitry Andric bool insertAcquire(MachineBasicBlock::iterator &MI,
562e8d8bef9SDimitry Andric SIAtomicScope Scope,
563e8d8bef9SDimitry Andric SIAtomicAddrSpace AddrSpace,
564e8d8bef9SDimitry Andric Position Pos) const override;
5650b57cec5SDimitry Andric };
5660b57cec5SDimitry Andric
56781ad6265SDimitry Andric class SIGfx11CacheControl : public SIGfx10CacheControl {
56881ad6265SDimitry Andric public:
SIGfx11CacheControl(const GCNSubtarget & ST)56981ad6265SDimitry Andric SIGfx11CacheControl(const GCNSubtarget &ST) : SIGfx10CacheControl(ST) {}
57081ad6265SDimitry Andric
57181ad6265SDimitry Andric bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
57281ad6265SDimitry Andric SIAtomicScope Scope,
57381ad6265SDimitry Andric SIAtomicAddrSpace AddrSpace) const override;
57481ad6265SDimitry Andric
57581ad6265SDimitry Andric bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
57681ad6265SDimitry Andric SIAtomicAddrSpace AddrSpace, SIMemOp Op,
57781ad6265SDimitry Andric bool IsVolatile,
57881ad6265SDimitry Andric bool IsNonTemporal) const override;
57981ad6265SDimitry Andric };
58081ad6265SDimitry Andric
5811db9f3b2SDimitry Andric class SIGfx12CacheControl : public SIGfx11CacheControl {
5827a6dacacSDimitry Andric protected:
5837a6dacacSDimitry Andric // Sets TH policy to \p Value if CPol operand is present in instruction \p MI.
5847a6dacacSDimitry Andric // \returns Returns true if \p MI is modified, false otherwise.
5857a6dacacSDimitry Andric bool setTH(const MachineBasicBlock::iterator MI,
5867a6dacacSDimitry Andric AMDGPU::CPol::CPol Value) const;
5877a6dacacSDimitry Andric // Sets Scope policy to \p Value if CPol operand is present in instruction \p
5887a6dacacSDimitry Andric // MI. \returns Returns true if \p MI is modified, false otherwise.
5897a6dacacSDimitry Andric bool setScope(const MachineBasicBlock::iterator MI,
5907a6dacacSDimitry Andric AMDGPU::CPol::CPol Value) const;
5917a6dacacSDimitry Andric
5921db9f3b2SDimitry Andric public:
SIGfx12CacheControl(const GCNSubtarget & ST)5931db9f3b2SDimitry Andric SIGfx12CacheControl(const GCNSubtarget &ST) : SIGfx11CacheControl(ST) {}
5941db9f3b2SDimitry Andric
5957a6dacacSDimitry Andric bool insertWait(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
5967a6dacacSDimitry Andric SIAtomicAddrSpace AddrSpace, SIMemOp Op,
5977a6dacacSDimitry Andric bool IsCrossAddrSpaceOrdering, Position Pos) const override;
5987a6dacacSDimitry Andric
5991db9f3b2SDimitry Andric bool insertAcquire(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
6001db9f3b2SDimitry Andric SIAtomicAddrSpace AddrSpace, Position Pos) const override;
6017a6dacacSDimitry Andric
6027a6dacacSDimitry Andric bool enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator &MI,
6037a6dacacSDimitry Andric SIAtomicAddrSpace AddrSpace, SIMemOp Op,
6047a6dacacSDimitry Andric bool IsVolatile,
6057a6dacacSDimitry Andric bool IsNonTemporal) const override;
6061db9f3b2SDimitry Andric };
6071db9f3b2SDimitry Andric
6080b57cec5SDimitry Andric class SIMemoryLegalizer final : public MachineFunctionPass {
6090b57cec5SDimitry Andric private:
6100b57cec5SDimitry Andric
6110b57cec5SDimitry Andric /// Cache Control.
6120b57cec5SDimitry Andric std::unique_ptr<SICacheControl> CC = nullptr;
6130b57cec5SDimitry Andric
6140b57cec5SDimitry Andric /// List of atomic pseudo instructions.
6150b57cec5SDimitry Andric std::list<MachineBasicBlock::iterator> AtomicPseudoMIs;
6160b57cec5SDimitry Andric
6170b57cec5SDimitry Andric /// Return true iff instruction \p MI is a atomic instruction that
6180b57cec5SDimitry Andric /// returns a result.
isAtomicRet(const MachineInstr & MI) const6190b57cec5SDimitry Andric bool isAtomicRet(const MachineInstr &MI) const {
620fe6060f1SDimitry Andric return SIInstrInfo::isAtomicRet(MI);
6210b57cec5SDimitry Andric }
6220b57cec5SDimitry Andric
6230b57cec5SDimitry Andric /// Removes all processed atomic pseudo instructions from the current
6240b57cec5SDimitry Andric /// function. Returns true if current function is modified, false otherwise.
6250b57cec5SDimitry Andric bool removeAtomicPseudoMIs();
6260b57cec5SDimitry Andric
6270b57cec5SDimitry Andric /// Expands load operation \p MI. Returns true if instructions are
6280b57cec5SDimitry Andric /// added/deleted or \p MI is modified, false otherwise.
6290b57cec5SDimitry Andric bool expandLoad(const SIMemOpInfo &MOI,
6300b57cec5SDimitry Andric MachineBasicBlock::iterator &MI);
6310b57cec5SDimitry Andric /// Expands store operation \p MI. Returns true if instructions are
6320b57cec5SDimitry Andric /// added/deleted or \p MI is modified, false otherwise.
6330b57cec5SDimitry Andric bool expandStore(const SIMemOpInfo &MOI,
6340b57cec5SDimitry Andric MachineBasicBlock::iterator &MI);
6350b57cec5SDimitry Andric /// Expands atomic fence operation \p MI. Returns true if
6360b57cec5SDimitry Andric /// instructions are added/deleted or \p MI is modified, false otherwise.
6370b57cec5SDimitry Andric bool expandAtomicFence(const SIMemOpInfo &MOI,
6380b57cec5SDimitry Andric MachineBasicBlock::iterator &MI);
6390b57cec5SDimitry Andric /// Expands atomic cmpxchg or rmw operation \p MI. Returns true if
6400b57cec5SDimitry Andric /// instructions are added/deleted or \p MI is modified, false otherwise.
6410b57cec5SDimitry Andric bool expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
6420b57cec5SDimitry Andric MachineBasicBlock::iterator &MI);
6430b57cec5SDimitry Andric
6440b57cec5SDimitry Andric public:
6450b57cec5SDimitry Andric static char ID;
6460b57cec5SDimitry Andric
SIMemoryLegalizer()6470b57cec5SDimitry Andric SIMemoryLegalizer() : MachineFunctionPass(ID) {}
6480b57cec5SDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const6490b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
6500b57cec5SDimitry Andric AU.setPreservesCFG();
6510b57cec5SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU);
6520b57cec5SDimitry Andric }
6530b57cec5SDimitry Andric
getPassName() const6540b57cec5SDimitry Andric StringRef getPassName() const override {
6550b57cec5SDimitry Andric return PASS_NAME;
6560b57cec5SDimitry Andric }
6570b57cec5SDimitry Andric
6580b57cec5SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override;
6590b57cec5SDimitry Andric };
6600b57cec5SDimitry Andric
6610b57cec5SDimitry Andric } // end namespace anonymous
6620b57cec5SDimitry Andric
reportUnsupported(const MachineBasicBlock::iterator & MI,const char * Msg) const6630b57cec5SDimitry Andric void SIMemOpAccess::reportUnsupported(const MachineBasicBlock::iterator &MI,
6640b57cec5SDimitry Andric const char *Msg) const {
6650b57cec5SDimitry Andric const Function &Func = MI->getParent()->getParent()->getFunction();
6660b57cec5SDimitry Andric DiagnosticInfoUnsupported Diag(Func, Msg, MI->getDebugLoc());
6670b57cec5SDimitry Andric Func.getContext().diagnose(Diag);
6680b57cec5SDimitry Andric }
6690b57cec5SDimitry Andric
670bdd1243dSDimitry Andric std::optional<std::tuple<SIAtomicScope, SIAtomicAddrSpace, bool>>
toSIAtomicScope(SyncScope::ID SSID,SIAtomicAddrSpace InstrAddrSpace) const6710b57cec5SDimitry Andric SIMemOpAccess::toSIAtomicScope(SyncScope::ID SSID,
672fe6060f1SDimitry Andric SIAtomicAddrSpace InstrAddrSpace) const {
6730b57cec5SDimitry Andric if (SSID == SyncScope::System)
674bdd1243dSDimitry Andric return std::tuple(SIAtomicScope::SYSTEM, SIAtomicAddrSpace::ATOMIC, true);
6750b57cec5SDimitry Andric if (SSID == MMI->getAgentSSID())
676bdd1243dSDimitry Andric return std::tuple(SIAtomicScope::AGENT, SIAtomicAddrSpace::ATOMIC, true);
6770b57cec5SDimitry Andric if (SSID == MMI->getWorkgroupSSID())
678bdd1243dSDimitry Andric return std::tuple(SIAtomicScope::WORKGROUP, SIAtomicAddrSpace::ATOMIC,
6790b57cec5SDimitry Andric true);
6800b57cec5SDimitry Andric if (SSID == MMI->getWavefrontSSID())
681bdd1243dSDimitry Andric return std::tuple(SIAtomicScope::WAVEFRONT, SIAtomicAddrSpace::ATOMIC,
6820b57cec5SDimitry Andric true);
6830b57cec5SDimitry Andric if (SSID == SyncScope::SingleThread)
684bdd1243dSDimitry Andric return std::tuple(SIAtomicScope::SINGLETHREAD, SIAtomicAddrSpace::ATOMIC,
6850b57cec5SDimitry Andric true);
6860b57cec5SDimitry Andric if (SSID == MMI->getSystemOneAddressSpaceSSID())
687bdd1243dSDimitry Andric return std::tuple(SIAtomicScope::SYSTEM,
688bdd1243dSDimitry Andric SIAtomicAddrSpace::ATOMIC & InstrAddrSpace, false);
6890b57cec5SDimitry Andric if (SSID == MMI->getAgentOneAddressSpaceSSID())
690bdd1243dSDimitry Andric return std::tuple(SIAtomicScope::AGENT,
691bdd1243dSDimitry Andric SIAtomicAddrSpace::ATOMIC & InstrAddrSpace, false);
6920b57cec5SDimitry Andric if (SSID == MMI->getWorkgroupOneAddressSpaceSSID())
693bdd1243dSDimitry Andric return std::tuple(SIAtomicScope::WORKGROUP,
694bdd1243dSDimitry Andric SIAtomicAddrSpace::ATOMIC & InstrAddrSpace, false);
6950b57cec5SDimitry Andric if (SSID == MMI->getWavefrontOneAddressSpaceSSID())
696bdd1243dSDimitry Andric return std::tuple(SIAtomicScope::WAVEFRONT,
697bdd1243dSDimitry Andric SIAtomicAddrSpace::ATOMIC & InstrAddrSpace, false);
6980b57cec5SDimitry Andric if (SSID == MMI->getSingleThreadOneAddressSpaceSSID())
699bdd1243dSDimitry Andric return std::tuple(SIAtomicScope::SINGLETHREAD,
700bdd1243dSDimitry Andric SIAtomicAddrSpace::ATOMIC & InstrAddrSpace, false);
701bdd1243dSDimitry Andric return std::nullopt;
7020b57cec5SDimitry Andric }
7030b57cec5SDimitry Andric
toSIAtomicAddrSpace(unsigned AS) const7040b57cec5SDimitry Andric SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(unsigned AS) const {
7050b57cec5SDimitry Andric if (AS == AMDGPUAS::FLAT_ADDRESS)
7060b57cec5SDimitry Andric return SIAtomicAddrSpace::FLAT;
7070b57cec5SDimitry Andric if (AS == AMDGPUAS::GLOBAL_ADDRESS)
7080b57cec5SDimitry Andric return SIAtomicAddrSpace::GLOBAL;
7090b57cec5SDimitry Andric if (AS == AMDGPUAS::LOCAL_ADDRESS)
7100b57cec5SDimitry Andric return SIAtomicAddrSpace::LDS;
7110b57cec5SDimitry Andric if (AS == AMDGPUAS::PRIVATE_ADDRESS)
7120b57cec5SDimitry Andric return SIAtomicAddrSpace::SCRATCH;
7130b57cec5SDimitry Andric if (AS == AMDGPUAS::REGION_ADDRESS)
7140b57cec5SDimitry Andric return SIAtomicAddrSpace::GDS;
7150b57cec5SDimitry Andric
7160b57cec5SDimitry Andric return SIAtomicAddrSpace::OTHER;
7170b57cec5SDimitry Andric }
7180b57cec5SDimitry Andric
SIMemOpAccess(MachineFunction & MF)7190b57cec5SDimitry Andric SIMemOpAccess::SIMemOpAccess(MachineFunction &MF) {
7200b57cec5SDimitry Andric MMI = &MF.getMMI().getObjFileInfo<AMDGPUMachineModuleInfo>();
7210b57cec5SDimitry Andric }
7220b57cec5SDimitry Andric
constructFromMIWithMMO(const MachineBasicBlock::iterator & MI) const723bdd1243dSDimitry Andric std::optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
7240b57cec5SDimitry Andric const MachineBasicBlock::iterator &MI) const {
7250b57cec5SDimitry Andric assert(MI->getNumMemOperands() > 0);
7260b57cec5SDimitry Andric
7270b57cec5SDimitry Andric SyncScope::ID SSID = SyncScope::SingleThread;
7280b57cec5SDimitry Andric AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
7290b57cec5SDimitry Andric AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
7300b57cec5SDimitry Andric SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
7310b57cec5SDimitry Andric bool IsNonTemporal = true;
732e8d8bef9SDimitry Andric bool IsVolatile = false;
7330b57cec5SDimitry Andric
7340b57cec5SDimitry Andric // Validator should check whether or not MMOs cover the entire set of
7350b57cec5SDimitry Andric // locations accessed by the memory instruction.
7360b57cec5SDimitry Andric for (const auto &MMO : MI->memoperands()) {
7370b57cec5SDimitry Andric IsNonTemporal &= MMO->isNonTemporal();
738e8d8bef9SDimitry Andric IsVolatile |= MMO->isVolatile();
7390b57cec5SDimitry Andric InstrAddrSpace |=
7400b57cec5SDimitry Andric toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());
741fe6060f1SDimitry Andric AtomicOrdering OpOrdering = MMO->getSuccessOrdering();
7420b57cec5SDimitry Andric if (OpOrdering != AtomicOrdering::NotAtomic) {
7430b57cec5SDimitry Andric const auto &IsSyncScopeInclusion =
7440b57cec5SDimitry Andric MMI->isSyncScopeInclusion(SSID, MMO->getSyncScopeID());
7450b57cec5SDimitry Andric if (!IsSyncScopeInclusion) {
7460b57cec5SDimitry Andric reportUnsupported(MI,
7470b57cec5SDimitry Andric "Unsupported non-inclusive atomic synchronization scope");
748bdd1243dSDimitry Andric return std::nullopt;
7490b57cec5SDimitry Andric }
7500b57cec5SDimitry Andric
75181ad6265SDimitry Andric SSID = *IsSyncScopeInclusion ? SSID : MMO->getSyncScopeID();
752349cc55cSDimitry Andric Ordering = getMergedAtomicOrdering(Ordering, OpOrdering);
7530b57cec5SDimitry Andric assert(MMO->getFailureOrdering() != AtomicOrdering::Release &&
7540b57cec5SDimitry Andric MMO->getFailureOrdering() != AtomicOrdering::AcquireRelease);
7550b57cec5SDimitry Andric FailureOrdering =
756349cc55cSDimitry Andric getMergedAtomicOrdering(FailureOrdering, MMO->getFailureOrdering());
7570b57cec5SDimitry Andric }
7580b57cec5SDimitry Andric }
7590b57cec5SDimitry Andric
7600b57cec5SDimitry Andric SIAtomicScope Scope = SIAtomicScope::NONE;
7610b57cec5SDimitry Andric SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
7620b57cec5SDimitry Andric bool IsCrossAddressSpaceOrdering = false;
7630b57cec5SDimitry Andric if (Ordering != AtomicOrdering::NotAtomic) {
7640b57cec5SDimitry Andric auto ScopeOrNone = toSIAtomicScope(SSID, InstrAddrSpace);
7650b57cec5SDimitry Andric if (!ScopeOrNone) {
7660b57cec5SDimitry Andric reportUnsupported(MI, "Unsupported atomic synchronization scope");
767bdd1243dSDimitry Andric return std::nullopt;
7680b57cec5SDimitry Andric }
7690b57cec5SDimitry Andric std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
77081ad6265SDimitry Andric *ScopeOrNone;
7710b57cec5SDimitry Andric if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
772fe6060f1SDimitry Andric ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace) ||
773fe6060f1SDimitry Andric ((InstrAddrSpace & SIAtomicAddrSpace::ATOMIC) == SIAtomicAddrSpace::NONE)) {
7740b57cec5SDimitry Andric reportUnsupported(MI, "Unsupported atomic address space");
775bdd1243dSDimitry Andric return std::nullopt;
7760b57cec5SDimitry Andric }
7770b57cec5SDimitry Andric }
7780b57cec5SDimitry Andric return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
779e8d8bef9SDimitry Andric IsCrossAddressSpaceOrdering, FailureOrdering, IsVolatile,
780e8d8bef9SDimitry Andric IsNonTemporal);
7810b57cec5SDimitry Andric }
7820b57cec5SDimitry Andric
783bdd1243dSDimitry Andric std::optional<SIMemOpInfo>
getLoadInfo(const MachineBasicBlock::iterator & MI) const784bdd1243dSDimitry Andric SIMemOpAccess::getLoadInfo(const MachineBasicBlock::iterator &MI) const {
7850b57cec5SDimitry Andric assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
7860b57cec5SDimitry Andric
7870b57cec5SDimitry Andric if (!(MI->mayLoad() && !MI->mayStore()))
788bdd1243dSDimitry Andric return std::nullopt;
7890b57cec5SDimitry Andric
7900b57cec5SDimitry Andric // Be conservative if there are no memory operands.
7910b57cec5SDimitry Andric if (MI->getNumMemOperands() == 0)
7920b57cec5SDimitry Andric return SIMemOpInfo();
7930b57cec5SDimitry Andric
7940b57cec5SDimitry Andric return constructFromMIWithMMO(MI);
7950b57cec5SDimitry Andric }
7960b57cec5SDimitry Andric
797bdd1243dSDimitry Andric std::optional<SIMemOpInfo>
getStoreInfo(const MachineBasicBlock::iterator & MI) const798bdd1243dSDimitry Andric SIMemOpAccess::getStoreInfo(const MachineBasicBlock::iterator &MI) const {
7990b57cec5SDimitry Andric assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
8000b57cec5SDimitry Andric
8010b57cec5SDimitry Andric if (!(!MI->mayLoad() && MI->mayStore()))
802bdd1243dSDimitry Andric return std::nullopt;
8030b57cec5SDimitry Andric
8040b57cec5SDimitry Andric // Be conservative if there are no memory operands.
8050b57cec5SDimitry Andric if (MI->getNumMemOperands() == 0)
8060b57cec5SDimitry Andric return SIMemOpInfo();
8070b57cec5SDimitry Andric
8080b57cec5SDimitry Andric return constructFromMIWithMMO(MI);
8090b57cec5SDimitry Andric }
8100b57cec5SDimitry Andric
811bdd1243dSDimitry Andric std::optional<SIMemOpInfo>
getAtomicFenceInfo(const MachineBasicBlock::iterator & MI) const812bdd1243dSDimitry Andric SIMemOpAccess::getAtomicFenceInfo(const MachineBasicBlock::iterator &MI) const {
8130b57cec5SDimitry Andric assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
8140b57cec5SDimitry Andric
8150b57cec5SDimitry Andric if (MI->getOpcode() != AMDGPU::ATOMIC_FENCE)
816bdd1243dSDimitry Andric return std::nullopt;
8170b57cec5SDimitry Andric
8180b57cec5SDimitry Andric AtomicOrdering Ordering =
8190b57cec5SDimitry Andric static_cast<AtomicOrdering>(MI->getOperand(0).getImm());
8200b57cec5SDimitry Andric
8210b57cec5SDimitry Andric SyncScope::ID SSID = static_cast<SyncScope::ID>(MI->getOperand(1).getImm());
8220b57cec5SDimitry Andric auto ScopeOrNone = toSIAtomicScope(SSID, SIAtomicAddrSpace::ATOMIC);
8230b57cec5SDimitry Andric if (!ScopeOrNone) {
8240b57cec5SDimitry Andric reportUnsupported(MI, "Unsupported atomic synchronization scope");
825bdd1243dSDimitry Andric return std::nullopt;
8260b57cec5SDimitry Andric }
8270b57cec5SDimitry Andric
8280b57cec5SDimitry Andric SIAtomicScope Scope = SIAtomicScope::NONE;
8290b57cec5SDimitry Andric SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
8300b57cec5SDimitry Andric bool IsCrossAddressSpaceOrdering = false;
8310b57cec5SDimitry Andric std::tie(Scope, OrderingAddrSpace, IsCrossAddressSpaceOrdering) =
83281ad6265SDimitry Andric *ScopeOrNone;
8330b57cec5SDimitry Andric
8340b57cec5SDimitry Andric if ((OrderingAddrSpace == SIAtomicAddrSpace::NONE) ||
8350b57cec5SDimitry Andric ((OrderingAddrSpace & SIAtomicAddrSpace::ATOMIC) != OrderingAddrSpace)) {
8360b57cec5SDimitry Andric reportUnsupported(MI, "Unsupported atomic address space");
837bdd1243dSDimitry Andric return std::nullopt;
8380b57cec5SDimitry Andric }
8390b57cec5SDimitry Andric
8400b57cec5SDimitry Andric return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, SIAtomicAddrSpace::ATOMIC,
841fe6060f1SDimitry Andric IsCrossAddressSpaceOrdering, AtomicOrdering::NotAtomic);
8420b57cec5SDimitry Andric }
8430b57cec5SDimitry Andric
getAtomicCmpxchgOrRmwInfo(const MachineBasicBlock::iterator & MI) const844bdd1243dSDimitry Andric std::optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo(
8450b57cec5SDimitry Andric const MachineBasicBlock::iterator &MI) const {
8460b57cec5SDimitry Andric assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic);
8470b57cec5SDimitry Andric
8480b57cec5SDimitry Andric if (!(MI->mayLoad() && MI->mayStore()))
849bdd1243dSDimitry Andric return std::nullopt;
8500b57cec5SDimitry Andric
8510b57cec5SDimitry Andric // Be conservative if there are no memory operands.
8520b57cec5SDimitry Andric if (MI->getNumMemOperands() == 0)
8530b57cec5SDimitry Andric return SIMemOpInfo();
8540b57cec5SDimitry Andric
8550b57cec5SDimitry Andric return constructFromMIWithMMO(MI);
8560b57cec5SDimitry Andric }
8570b57cec5SDimitry Andric
SICacheControl(const GCNSubtarget & ST)858e8d8bef9SDimitry Andric SICacheControl::SICacheControl(const GCNSubtarget &ST) : ST(ST) {
8590b57cec5SDimitry Andric TII = ST.getInstrInfo();
8600b57cec5SDimitry Andric IV = getIsaVersion(ST.getCPU());
861e8d8bef9SDimitry Andric InsertCacheInv = !AmdgcnSkipCacheInvalidations;
8620b57cec5SDimitry Andric }
8630b57cec5SDimitry Andric
enableNamedBit(const MachineBasicBlock::iterator MI,AMDGPU::CPol::CPol Bit) const864fe6060f1SDimitry Andric bool SICacheControl::enableNamedBit(const MachineBasicBlock::iterator MI,
865fe6060f1SDimitry Andric AMDGPU::CPol::CPol Bit) const {
866fe6060f1SDimitry Andric MachineOperand *CPol = TII->getNamedOperand(*MI, AMDGPU::OpName::cpol);
867fe6060f1SDimitry Andric if (!CPol)
868fe6060f1SDimitry Andric return false;
869fe6060f1SDimitry Andric
870fe6060f1SDimitry Andric CPol->setImm(CPol->getImm() | Bit);
871fe6060f1SDimitry Andric return true;
872fe6060f1SDimitry Andric }
873fe6060f1SDimitry Andric
8740b57cec5SDimitry Andric /* static */
create(const GCNSubtarget & ST)8750b57cec5SDimitry Andric std::unique_ptr<SICacheControl> SICacheControl::create(const GCNSubtarget &ST) {
8760b57cec5SDimitry Andric GCNSubtarget::Generation Generation = ST.getGeneration();
87781ad6265SDimitry Andric if (ST.hasGFX940Insts())
87881ad6265SDimitry Andric return std::make_unique<SIGfx940CacheControl>(ST);
879fe6060f1SDimitry Andric if (ST.hasGFX90AInsts())
880fe6060f1SDimitry Andric return std::make_unique<SIGfx90ACacheControl>(ST);
8810b57cec5SDimitry Andric if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS)
8828bcb0991SDimitry Andric return std::make_unique<SIGfx6CacheControl>(ST);
8830b57cec5SDimitry Andric if (Generation < AMDGPUSubtarget::GFX10)
8848bcb0991SDimitry Andric return std::make_unique<SIGfx7CacheControl>(ST);
88581ad6265SDimitry Andric if (Generation < AMDGPUSubtarget::GFX11)
886e8d8bef9SDimitry Andric return std::make_unique<SIGfx10CacheControl>(ST);
8871db9f3b2SDimitry Andric if (Generation < AMDGPUSubtarget::GFX12)
88881ad6265SDimitry Andric return std::make_unique<SIGfx11CacheControl>(ST);
8891db9f3b2SDimitry Andric return std::make_unique<SIGfx12CacheControl>(ST);
8900b57cec5SDimitry Andric }
8910b57cec5SDimitry Andric
enableLoadCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const8920b57cec5SDimitry Andric bool SIGfx6CacheControl::enableLoadCacheBypass(
8930b57cec5SDimitry Andric const MachineBasicBlock::iterator &MI,
8940b57cec5SDimitry Andric SIAtomicScope Scope,
8950b57cec5SDimitry Andric SIAtomicAddrSpace AddrSpace) const {
8960b57cec5SDimitry Andric assert(MI->mayLoad() && !MI->mayStore());
8970b57cec5SDimitry Andric bool Changed = false;
8980b57cec5SDimitry Andric
8990b57cec5SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
9000b57cec5SDimitry Andric switch (Scope) {
9010b57cec5SDimitry Andric case SIAtomicScope::SYSTEM:
9020b57cec5SDimitry Andric case SIAtomicScope::AGENT:
9034824e7fdSDimitry Andric // Set L1 cache policy to MISS_EVICT.
9044824e7fdSDimitry Andric // Note: there is no L2 cache bypass policy at the ISA level.
9050b57cec5SDimitry Andric Changed |= enableGLCBit(MI);
9060b57cec5SDimitry Andric break;
9070b57cec5SDimitry Andric case SIAtomicScope::WORKGROUP:
9080b57cec5SDimitry Andric case SIAtomicScope::WAVEFRONT:
9090b57cec5SDimitry Andric case SIAtomicScope::SINGLETHREAD:
9100b57cec5SDimitry Andric // No cache to bypass.
9110b57cec5SDimitry Andric break;
9120b57cec5SDimitry Andric default:
9130b57cec5SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
9140b57cec5SDimitry Andric }
9150b57cec5SDimitry Andric }
9160b57cec5SDimitry Andric
9170b57cec5SDimitry Andric /// The scratch address space does not need the global memory caches
9180b57cec5SDimitry Andric /// to be bypassed as all memory operations by the same thread are
9190b57cec5SDimitry Andric /// sequentially consistent, and no other thread can access scratch
9200b57cec5SDimitry Andric /// memory.
9210b57cec5SDimitry Andric
922e8d8bef9SDimitry Andric /// Other address spaces do not have a cache.
9230b57cec5SDimitry Andric
9240b57cec5SDimitry Andric return Changed;
9250b57cec5SDimitry Andric }
9260b57cec5SDimitry Andric
enableStoreCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const927fe6060f1SDimitry Andric bool SIGfx6CacheControl::enableStoreCacheBypass(
928fe6060f1SDimitry Andric const MachineBasicBlock::iterator &MI,
929fe6060f1SDimitry Andric SIAtomicScope Scope,
930fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace) const {
931fe6060f1SDimitry Andric assert(!MI->mayLoad() && MI->mayStore());
932fe6060f1SDimitry Andric bool Changed = false;
933fe6060f1SDimitry Andric
934fe6060f1SDimitry Andric /// The L1 cache is write through so does not need to be bypassed. There is no
935fe6060f1SDimitry Andric /// bypass control for the L2 cache at the isa level.
936fe6060f1SDimitry Andric
937fe6060f1SDimitry Andric return Changed;
938fe6060f1SDimitry Andric }
939fe6060f1SDimitry Andric
enableRMWCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const940fe6060f1SDimitry Andric bool SIGfx6CacheControl::enableRMWCacheBypass(
941fe6060f1SDimitry Andric const MachineBasicBlock::iterator &MI,
942fe6060f1SDimitry Andric SIAtomicScope Scope,
943fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace) const {
944fe6060f1SDimitry Andric assert(MI->mayLoad() && MI->mayStore());
945fe6060f1SDimitry Andric bool Changed = false;
946fe6060f1SDimitry Andric
9474824e7fdSDimitry Andric /// Do not set GLC for RMW atomic operations as L0/L1 cache is automatically
9484824e7fdSDimitry Andric /// bypassed, and the GLC bit is instead used to indicate if they are
9494824e7fdSDimitry Andric /// return or no-return.
9504824e7fdSDimitry Andric /// Note: there is no L2 cache coherent bypass control at the ISA level.
951fe6060f1SDimitry Andric
952fe6060f1SDimitry Andric return Changed;
953fe6060f1SDimitry Andric }
954fe6060f1SDimitry Andric
enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator & MI,SIAtomicAddrSpace AddrSpace,SIMemOp Op,bool IsVolatile,bool IsNonTemporal) const955e8d8bef9SDimitry Andric bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal(
956e8d8bef9SDimitry Andric MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
957e8d8bef9SDimitry Andric bool IsVolatile, bool IsNonTemporal) const {
958e8d8bef9SDimitry Andric // Only handle load and store, not atomic read-modify-write insructions. The
959e8d8bef9SDimitry Andric // latter use glc to indicate if the atomic returns a result and so must not
960e8d8bef9SDimitry Andric // be used for cache control.
9610b57cec5SDimitry Andric assert(MI->mayLoad() ^ MI->mayStore());
962e8d8bef9SDimitry Andric
963e8d8bef9SDimitry Andric // Only update load and store, not LLVM IR atomic read-modify-write
964e8d8bef9SDimitry Andric // instructions. The latter are always marked as volatile so cannot sensibly
965e8d8bef9SDimitry Andric // handle it as do not want to pessimize all atomics. Also they do not support
966e8d8bef9SDimitry Andric // the nontemporal attribute.
967e8d8bef9SDimitry Andric assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
968e8d8bef9SDimitry Andric
9690b57cec5SDimitry Andric bool Changed = false;
9700b57cec5SDimitry Andric
971e8d8bef9SDimitry Andric if (IsVolatile) {
9724824e7fdSDimitry Andric // Set L1 cache policy to be MISS_EVICT for load instructions
9734824e7fdSDimitry Andric // and MISS_LRU for store instructions.
9744824e7fdSDimitry Andric // Note: there is no L2 cache bypass policy at the ISA level.
975e8d8bef9SDimitry Andric if (Op == SIMemOp::LOAD)
9760b57cec5SDimitry Andric Changed |= enableGLCBit(MI);
977e8d8bef9SDimitry Andric
978e8d8bef9SDimitry Andric // Ensure operation has completed at system scope to cause all volatile
979e8d8bef9SDimitry Andric // operations to be visible outside the program in a global order. Do not
980e8d8bef9SDimitry Andric // request cross address space as only the global address space can be
981e8d8bef9SDimitry Andric // observable outside the program, so no need to cause a waitcnt for LDS
982e8d8bef9SDimitry Andric // address space operations.
983e8d8bef9SDimitry Andric Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
984e8d8bef9SDimitry Andric Position::AFTER);
9850b57cec5SDimitry Andric
9860b57cec5SDimitry Andric return Changed;
9870b57cec5SDimitry Andric }
9880b57cec5SDimitry Andric
989e8d8bef9SDimitry Andric if (IsNonTemporal) {
9904824e7fdSDimitry Andric // Setting both GLC and SLC configures L1 cache policy to MISS_EVICT
9914824e7fdSDimitry Andric // for both loads and stores, and the L2 cache policy to STREAM.
992e8d8bef9SDimitry Andric Changed |= enableGLCBit(MI);
993e8d8bef9SDimitry Andric Changed |= enableSLCBit(MI);
994e8d8bef9SDimitry Andric return Changed;
995e8d8bef9SDimitry Andric }
996e8d8bef9SDimitry Andric
997e8d8bef9SDimitry Andric return Changed;
998e8d8bef9SDimitry Andric }
999e8d8bef9SDimitry Andric
insertWait(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,SIMemOp Op,bool IsCrossAddrSpaceOrdering,Position Pos) const1000e8d8bef9SDimitry Andric bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
1001e8d8bef9SDimitry Andric SIAtomicScope Scope,
1002e8d8bef9SDimitry Andric SIAtomicAddrSpace AddrSpace,
1003e8d8bef9SDimitry Andric SIMemOp Op,
1004e8d8bef9SDimitry Andric bool IsCrossAddrSpaceOrdering,
1005e8d8bef9SDimitry Andric Position Pos) const {
1006e8d8bef9SDimitry Andric bool Changed = false;
1007e8d8bef9SDimitry Andric
1008e8d8bef9SDimitry Andric MachineBasicBlock &MBB = *MI->getParent();
1009e8d8bef9SDimitry Andric DebugLoc DL = MI->getDebugLoc();
1010e8d8bef9SDimitry Andric
1011e8d8bef9SDimitry Andric if (Pos == Position::AFTER)
1012e8d8bef9SDimitry Andric ++MI;
1013e8d8bef9SDimitry Andric
1014e8d8bef9SDimitry Andric bool VMCnt = false;
1015e8d8bef9SDimitry Andric bool LGKMCnt = false;
1016e8d8bef9SDimitry Andric
1017e8d8bef9SDimitry Andric if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
1018e8d8bef9SDimitry Andric SIAtomicAddrSpace::NONE) {
1019e8d8bef9SDimitry Andric switch (Scope) {
1020e8d8bef9SDimitry Andric case SIAtomicScope::SYSTEM:
1021e8d8bef9SDimitry Andric case SIAtomicScope::AGENT:
1022e8d8bef9SDimitry Andric VMCnt |= true;
1023e8d8bef9SDimitry Andric break;
1024e8d8bef9SDimitry Andric case SIAtomicScope::WORKGROUP:
1025e8d8bef9SDimitry Andric case SIAtomicScope::WAVEFRONT:
1026e8d8bef9SDimitry Andric case SIAtomicScope::SINGLETHREAD:
1027e8d8bef9SDimitry Andric // The L1 cache keeps all memory operations in order for
1028e8d8bef9SDimitry Andric // wavefronts in the same work-group.
1029e8d8bef9SDimitry Andric break;
1030e8d8bef9SDimitry Andric default:
1031e8d8bef9SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
1032e8d8bef9SDimitry Andric }
1033e8d8bef9SDimitry Andric }
1034e8d8bef9SDimitry Andric
1035e8d8bef9SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
1036e8d8bef9SDimitry Andric switch (Scope) {
1037e8d8bef9SDimitry Andric case SIAtomicScope::SYSTEM:
1038e8d8bef9SDimitry Andric case SIAtomicScope::AGENT:
1039e8d8bef9SDimitry Andric case SIAtomicScope::WORKGROUP:
1040e8d8bef9SDimitry Andric // If no cross address space ordering then an "S_WAITCNT lgkmcnt(0)" is
1041e8d8bef9SDimitry Andric // not needed as LDS operations for all waves are executed in a total
1042e8d8bef9SDimitry Andric // global ordering as observed by all waves. Required if also
1043e8d8bef9SDimitry Andric // synchronizing with global/GDS memory as LDS operations could be
1044e8d8bef9SDimitry Andric // reordered with respect to later global/GDS memory operations of the
1045e8d8bef9SDimitry Andric // same wave.
1046e8d8bef9SDimitry Andric LGKMCnt |= IsCrossAddrSpaceOrdering;
1047e8d8bef9SDimitry Andric break;
1048e8d8bef9SDimitry Andric case SIAtomicScope::WAVEFRONT:
1049e8d8bef9SDimitry Andric case SIAtomicScope::SINGLETHREAD:
1050e8d8bef9SDimitry Andric // The LDS keeps all memory operations in order for
105181ad6265SDimitry Andric // the same wavefront.
1052e8d8bef9SDimitry Andric break;
1053e8d8bef9SDimitry Andric default:
1054e8d8bef9SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
1055e8d8bef9SDimitry Andric }
1056e8d8bef9SDimitry Andric }
1057e8d8bef9SDimitry Andric
1058e8d8bef9SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
1059e8d8bef9SDimitry Andric switch (Scope) {
1060e8d8bef9SDimitry Andric case SIAtomicScope::SYSTEM:
1061e8d8bef9SDimitry Andric case SIAtomicScope::AGENT:
1062e8d8bef9SDimitry Andric // If no cross address space ordering then an GDS "S_WAITCNT lgkmcnt(0)"
1063e8d8bef9SDimitry Andric // is not needed as GDS operations for all waves are executed in a total
1064e8d8bef9SDimitry Andric // global ordering as observed by all waves. Required if also
1065e8d8bef9SDimitry Andric // synchronizing with global/LDS memory as GDS operations could be
1066e8d8bef9SDimitry Andric // reordered with respect to later global/LDS memory operations of the
1067e8d8bef9SDimitry Andric // same wave.
1068e8d8bef9SDimitry Andric LGKMCnt |= IsCrossAddrSpaceOrdering;
1069e8d8bef9SDimitry Andric break;
1070e8d8bef9SDimitry Andric case SIAtomicScope::WORKGROUP:
1071e8d8bef9SDimitry Andric case SIAtomicScope::WAVEFRONT:
1072e8d8bef9SDimitry Andric case SIAtomicScope::SINGLETHREAD:
1073e8d8bef9SDimitry Andric // The GDS keeps all memory operations in order for
1074e8d8bef9SDimitry Andric // the same work-group.
1075e8d8bef9SDimitry Andric break;
1076e8d8bef9SDimitry Andric default:
1077e8d8bef9SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
1078e8d8bef9SDimitry Andric }
1079e8d8bef9SDimitry Andric }
1080e8d8bef9SDimitry Andric
1081e8d8bef9SDimitry Andric if (VMCnt || LGKMCnt) {
1082e8d8bef9SDimitry Andric unsigned WaitCntImmediate =
1083e8d8bef9SDimitry Andric AMDGPU::encodeWaitcnt(IV,
1084e8d8bef9SDimitry Andric VMCnt ? 0 : getVmcntBitMask(IV),
1085e8d8bef9SDimitry Andric getExpcntBitMask(IV),
1086e8d8bef9SDimitry Andric LGKMCnt ? 0 : getLgkmcntBitMask(IV));
10875f757f3fSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_soft))
10885f757f3fSDimitry Andric .addImm(WaitCntImmediate);
1089e8d8bef9SDimitry Andric Changed = true;
1090e8d8bef9SDimitry Andric }
1091e8d8bef9SDimitry Andric
1092e8d8bef9SDimitry Andric if (Pos == Position::AFTER)
1093e8d8bef9SDimitry Andric --MI;
1094e8d8bef9SDimitry Andric
1095e8d8bef9SDimitry Andric return Changed;
1096e8d8bef9SDimitry Andric }
1097e8d8bef9SDimitry Andric
insertAcquire(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,Position Pos) const1098e8d8bef9SDimitry Andric bool SIGfx6CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
10990b57cec5SDimitry Andric SIAtomicScope Scope,
11000b57cec5SDimitry Andric SIAtomicAddrSpace AddrSpace,
11010b57cec5SDimitry Andric Position Pos) const {
11025ffd83dbSDimitry Andric if (!InsertCacheInv)
11035ffd83dbSDimitry Andric return false;
11045ffd83dbSDimitry Andric
11050b57cec5SDimitry Andric bool Changed = false;
11060b57cec5SDimitry Andric
11070b57cec5SDimitry Andric MachineBasicBlock &MBB = *MI->getParent();
11080b57cec5SDimitry Andric DebugLoc DL = MI->getDebugLoc();
11090b57cec5SDimitry Andric
11100b57cec5SDimitry Andric if (Pos == Position::AFTER)
11110b57cec5SDimitry Andric ++MI;
11120b57cec5SDimitry Andric
11130b57cec5SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
11140b57cec5SDimitry Andric switch (Scope) {
11150b57cec5SDimitry Andric case SIAtomicScope::SYSTEM:
11160b57cec5SDimitry Andric case SIAtomicScope::AGENT:
11170b57cec5SDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBINVL1));
11180b57cec5SDimitry Andric Changed = true;
11190b57cec5SDimitry Andric break;
11200b57cec5SDimitry Andric case SIAtomicScope::WORKGROUP:
11210b57cec5SDimitry Andric case SIAtomicScope::WAVEFRONT:
11220b57cec5SDimitry Andric case SIAtomicScope::SINGLETHREAD:
11230b57cec5SDimitry Andric // No cache to invalidate.
11240b57cec5SDimitry Andric break;
11250b57cec5SDimitry Andric default:
11260b57cec5SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
11270b57cec5SDimitry Andric }
11280b57cec5SDimitry Andric }
11290b57cec5SDimitry Andric
11300b57cec5SDimitry Andric /// The scratch address space does not need the global memory cache
11310b57cec5SDimitry Andric /// to be flushed as all memory operations by the same thread are
11320b57cec5SDimitry Andric /// sequentially consistent, and no other thread can access scratch
11330b57cec5SDimitry Andric /// memory.
11340b57cec5SDimitry Andric
1135e8d8bef9SDimitry Andric /// Other address spaces do not have a cache.
11360b57cec5SDimitry Andric
11370b57cec5SDimitry Andric if (Pos == Position::AFTER)
11380b57cec5SDimitry Andric --MI;
11390b57cec5SDimitry Andric
11400b57cec5SDimitry Andric return Changed;
11410b57cec5SDimitry Andric }
11420b57cec5SDimitry Andric
insertRelease(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,bool IsCrossAddrSpaceOrdering,Position Pos) const1143e8d8bef9SDimitry Andric bool SIGfx6CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
11440b57cec5SDimitry Andric SIAtomicScope Scope,
11450b57cec5SDimitry Andric SIAtomicAddrSpace AddrSpace,
11460b57cec5SDimitry Andric bool IsCrossAddrSpaceOrdering,
11470b57cec5SDimitry Andric Position Pos) const {
1148e8d8bef9SDimitry Andric return insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
1149e8d8bef9SDimitry Andric IsCrossAddrSpaceOrdering, Pos);
11500b57cec5SDimitry Andric }
11510b57cec5SDimitry Andric
insertAcquire(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,Position Pos) const1152e8d8bef9SDimitry Andric bool SIGfx7CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
11530b57cec5SDimitry Andric SIAtomicScope Scope,
11540b57cec5SDimitry Andric SIAtomicAddrSpace AddrSpace,
11550b57cec5SDimitry Andric Position Pos) const {
11565ffd83dbSDimitry Andric if (!InsertCacheInv)
11575ffd83dbSDimitry Andric return false;
11585ffd83dbSDimitry Andric
11590b57cec5SDimitry Andric bool Changed = false;
11600b57cec5SDimitry Andric
11610b57cec5SDimitry Andric MachineBasicBlock &MBB = *MI->getParent();
11620b57cec5SDimitry Andric DebugLoc DL = MI->getDebugLoc();
11630b57cec5SDimitry Andric
11640b57cec5SDimitry Andric const GCNSubtarget &STM = MBB.getParent()->getSubtarget<GCNSubtarget>();
11650b57cec5SDimitry Andric
1166e8d8bef9SDimitry Andric const unsigned InvalidateL1 = STM.isAmdPalOS() || STM.isMesa3DOS()
11670b57cec5SDimitry Andric ? AMDGPU::BUFFER_WBINVL1
11680b57cec5SDimitry Andric : AMDGPU::BUFFER_WBINVL1_VOL;
11690b57cec5SDimitry Andric
11700b57cec5SDimitry Andric if (Pos == Position::AFTER)
11710b57cec5SDimitry Andric ++MI;
11720b57cec5SDimitry Andric
11730b57cec5SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
11740b57cec5SDimitry Andric switch (Scope) {
11750b57cec5SDimitry Andric case SIAtomicScope::SYSTEM:
11760b57cec5SDimitry Andric case SIAtomicScope::AGENT:
1177e8d8bef9SDimitry Andric BuildMI(MBB, MI, DL, TII->get(InvalidateL1));
11780b57cec5SDimitry Andric Changed = true;
11790b57cec5SDimitry Andric break;
11800b57cec5SDimitry Andric case SIAtomicScope::WORKGROUP:
11810b57cec5SDimitry Andric case SIAtomicScope::WAVEFRONT:
11820b57cec5SDimitry Andric case SIAtomicScope::SINGLETHREAD:
11830b57cec5SDimitry Andric // No cache to invalidate.
11840b57cec5SDimitry Andric break;
11850b57cec5SDimitry Andric default:
11860b57cec5SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
11870b57cec5SDimitry Andric }
11880b57cec5SDimitry Andric }
11890b57cec5SDimitry Andric
11900b57cec5SDimitry Andric /// The scratch address space does not need the global memory cache
11910b57cec5SDimitry Andric /// to be flushed as all memory operations by the same thread are
11920b57cec5SDimitry Andric /// sequentially consistent, and no other thread can access scratch
11930b57cec5SDimitry Andric /// memory.
11940b57cec5SDimitry Andric
1195e8d8bef9SDimitry Andric /// Other address spaces do not have a cache.
11960b57cec5SDimitry Andric
11970b57cec5SDimitry Andric if (Pos == Position::AFTER)
11980b57cec5SDimitry Andric --MI;
11990b57cec5SDimitry Andric
12000b57cec5SDimitry Andric return Changed;
12010b57cec5SDimitry Andric }
12020b57cec5SDimitry Andric
enableLoadCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const1203fe6060f1SDimitry Andric bool SIGfx90ACacheControl::enableLoadCacheBypass(
1204fe6060f1SDimitry Andric const MachineBasicBlock::iterator &MI,
1205fe6060f1SDimitry Andric SIAtomicScope Scope,
1206fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace) const {
1207fe6060f1SDimitry Andric assert(MI->mayLoad() && !MI->mayStore());
1208fe6060f1SDimitry Andric bool Changed = false;
1209fe6060f1SDimitry Andric
1210fe6060f1SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1211fe6060f1SDimitry Andric switch (Scope) {
1212fe6060f1SDimitry Andric case SIAtomicScope::SYSTEM:
1213fe6060f1SDimitry Andric case SIAtomicScope::AGENT:
12144824e7fdSDimitry Andric // Set the L1 cache policy to MISS_LRU.
12154824e7fdSDimitry Andric // Note: there is no L2 cache bypass policy at the ISA level.
1216fe6060f1SDimitry Andric Changed |= enableGLCBit(MI);
1217fe6060f1SDimitry Andric break;
1218fe6060f1SDimitry Andric case SIAtomicScope::WORKGROUP:
1219fe6060f1SDimitry Andric // In threadgroup split mode the waves of a work-group can be executing on
1220fe6060f1SDimitry Andric // different CUs. Therefore need to bypass the L1 which is per CU.
1221fe6060f1SDimitry Andric // Otherwise in non-threadgroup split mode all waves of a work-group are
1222fe6060f1SDimitry Andric // on the same CU, and so the L1 does not need to be bypassed.
1223349cc55cSDimitry Andric if (ST.isTgSplitEnabled())
1224349cc55cSDimitry Andric Changed |= enableGLCBit(MI);
1225fe6060f1SDimitry Andric break;
1226fe6060f1SDimitry Andric case SIAtomicScope::WAVEFRONT:
1227fe6060f1SDimitry Andric case SIAtomicScope::SINGLETHREAD:
1228fe6060f1SDimitry Andric // No cache to bypass.
1229fe6060f1SDimitry Andric break;
1230fe6060f1SDimitry Andric default:
1231fe6060f1SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
1232fe6060f1SDimitry Andric }
1233fe6060f1SDimitry Andric }
1234fe6060f1SDimitry Andric
1235fe6060f1SDimitry Andric /// The scratch address space does not need the global memory caches
1236fe6060f1SDimitry Andric /// to be bypassed as all memory operations by the same thread are
1237fe6060f1SDimitry Andric /// sequentially consistent, and no other thread can access scratch
1238fe6060f1SDimitry Andric /// memory.
1239fe6060f1SDimitry Andric
1240fe6060f1SDimitry Andric /// Other address spaces do not have a cache.
1241fe6060f1SDimitry Andric
1242fe6060f1SDimitry Andric return Changed;
1243fe6060f1SDimitry Andric }
1244fe6060f1SDimitry Andric
enableStoreCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const1245fe6060f1SDimitry Andric bool SIGfx90ACacheControl::enableStoreCacheBypass(
1246fe6060f1SDimitry Andric const MachineBasicBlock::iterator &MI,
1247fe6060f1SDimitry Andric SIAtomicScope Scope,
1248fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace) const {
1249fe6060f1SDimitry Andric assert(!MI->mayLoad() && MI->mayStore());
1250fe6060f1SDimitry Andric bool Changed = false;
1251fe6060f1SDimitry Andric
1252fe6060f1SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1253fe6060f1SDimitry Andric switch (Scope) {
1254fe6060f1SDimitry Andric case SIAtomicScope::SYSTEM:
1255fe6060f1SDimitry Andric case SIAtomicScope::AGENT:
1256fe6060f1SDimitry Andric /// Do not set glc for store atomic operations as they implicitly write
1257fe6060f1SDimitry Andric /// through the L1 cache.
1258fe6060f1SDimitry Andric break;
1259fe6060f1SDimitry Andric case SIAtomicScope::WORKGROUP:
1260fe6060f1SDimitry Andric case SIAtomicScope::WAVEFRONT:
1261fe6060f1SDimitry Andric case SIAtomicScope::SINGLETHREAD:
1262fe6060f1SDimitry Andric // No cache to bypass. Store atomics implicitly write through the L1
1263fe6060f1SDimitry Andric // cache.
1264fe6060f1SDimitry Andric break;
1265fe6060f1SDimitry Andric default:
1266fe6060f1SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
1267fe6060f1SDimitry Andric }
1268fe6060f1SDimitry Andric }
1269fe6060f1SDimitry Andric
1270fe6060f1SDimitry Andric /// The scratch address space does not need the global memory caches
1271fe6060f1SDimitry Andric /// to be bypassed as all memory operations by the same thread are
1272fe6060f1SDimitry Andric /// sequentially consistent, and no other thread can access scratch
1273fe6060f1SDimitry Andric /// memory.
1274fe6060f1SDimitry Andric
1275fe6060f1SDimitry Andric /// Other address spaces do not have a cache.
1276fe6060f1SDimitry Andric
1277fe6060f1SDimitry Andric return Changed;
1278fe6060f1SDimitry Andric }
1279fe6060f1SDimitry Andric
enableRMWCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const1280fe6060f1SDimitry Andric bool SIGfx90ACacheControl::enableRMWCacheBypass(
1281fe6060f1SDimitry Andric const MachineBasicBlock::iterator &MI,
1282fe6060f1SDimitry Andric SIAtomicScope Scope,
1283fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace) const {
1284fe6060f1SDimitry Andric assert(MI->mayLoad() && MI->mayStore());
1285fe6060f1SDimitry Andric bool Changed = false;
1286fe6060f1SDimitry Andric
1287fe6060f1SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1288fe6060f1SDimitry Andric switch (Scope) {
1289fe6060f1SDimitry Andric case SIAtomicScope::SYSTEM:
1290fe6060f1SDimitry Andric case SIAtomicScope::AGENT:
1291fe6060f1SDimitry Andric /// Do not set glc for RMW atomic operations as they implicitly bypass
1292fe6060f1SDimitry Andric /// the L1 cache, and the glc bit is instead used to indicate if they are
1293fe6060f1SDimitry Andric /// return or no-return.
1294fe6060f1SDimitry Andric break;
1295fe6060f1SDimitry Andric case SIAtomicScope::WORKGROUP:
1296fe6060f1SDimitry Andric case SIAtomicScope::WAVEFRONT:
1297fe6060f1SDimitry Andric case SIAtomicScope::SINGLETHREAD:
1298fe6060f1SDimitry Andric // No cache to bypass. RMW atomics implicitly bypass the L1 cache.
1299fe6060f1SDimitry Andric break;
1300fe6060f1SDimitry Andric default:
1301fe6060f1SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
1302fe6060f1SDimitry Andric }
1303fe6060f1SDimitry Andric }
1304fe6060f1SDimitry Andric
1305fe6060f1SDimitry Andric return Changed;
1306fe6060f1SDimitry Andric }
1307fe6060f1SDimitry Andric
enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator & MI,SIAtomicAddrSpace AddrSpace,SIMemOp Op,bool IsVolatile,bool IsNonTemporal) const1308fe6060f1SDimitry Andric bool SIGfx90ACacheControl::enableVolatileAndOrNonTemporal(
1309fe6060f1SDimitry Andric MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
1310fe6060f1SDimitry Andric bool IsVolatile, bool IsNonTemporal) const {
1311fe6060f1SDimitry Andric // Only handle load and store, not atomic read-modify-write insructions. The
1312fe6060f1SDimitry Andric // latter use glc to indicate if the atomic returns a result and so must not
1313fe6060f1SDimitry Andric // be used for cache control.
1314fe6060f1SDimitry Andric assert(MI->mayLoad() ^ MI->mayStore());
1315fe6060f1SDimitry Andric
1316fe6060f1SDimitry Andric // Only update load and store, not LLVM IR atomic read-modify-write
1317fe6060f1SDimitry Andric // instructions. The latter are always marked as volatile so cannot sensibly
1318fe6060f1SDimitry Andric // handle it as do not want to pessimize all atomics. Also they do not support
1319fe6060f1SDimitry Andric // the nontemporal attribute.
1320fe6060f1SDimitry Andric assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
1321fe6060f1SDimitry Andric
1322fe6060f1SDimitry Andric bool Changed = false;
1323fe6060f1SDimitry Andric
1324fe6060f1SDimitry Andric if (IsVolatile) {
13254824e7fdSDimitry Andric // Set L1 cache policy to be MISS_EVICT for load instructions
13264824e7fdSDimitry Andric // and MISS_LRU for store instructions.
13274824e7fdSDimitry Andric // Note: there is no L2 cache bypass policy at the ISA level.
1328349cc55cSDimitry Andric if (Op == SIMemOp::LOAD)
1329fe6060f1SDimitry Andric Changed |= enableGLCBit(MI);
1330fe6060f1SDimitry Andric
1331fe6060f1SDimitry Andric // Ensure operation has completed at system scope to cause all volatile
1332fe6060f1SDimitry Andric // operations to be visible outside the program in a global order. Do not
1333fe6060f1SDimitry Andric // request cross address space as only the global address space can be
1334fe6060f1SDimitry Andric // observable outside the program, so no need to cause a waitcnt for LDS
1335fe6060f1SDimitry Andric // address space operations.
1336fe6060f1SDimitry Andric Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
1337fe6060f1SDimitry Andric Position::AFTER);
1338fe6060f1SDimitry Andric
1339fe6060f1SDimitry Andric return Changed;
1340fe6060f1SDimitry Andric }
1341fe6060f1SDimitry Andric
1342fe6060f1SDimitry Andric if (IsNonTemporal) {
13434824e7fdSDimitry Andric // Setting both GLC and SLC configures L1 cache policy to MISS_EVICT
13444824e7fdSDimitry Andric // for both loads and stores, and the L2 cache policy to STREAM.
1345fe6060f1SDimitry Andric Changed |= enableGLCBit(MI);
1346fe6060f1SDimitry Andric Changed |= enableSLCBit(MI);
1347fe6060f1SDimitry Andric return Changed;
1348fe6060f1SDimitry Andric }
1349fe6060f1SDimitry Andric
1350fe6060f1SDimitry Andric return Changed;
1351fe6060f1SDimitry Andric }
1352fe6060f1SDimitry Andric
insertWait(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,SIMemOp Op,bool IsCrossAddrSpaceOrdering,Position Pos) const1353fe6060f1SDimitry Andric bool SIGfx90ACacheControl::insertWait(MachineBasicBlock::iterator &MI,
1354fe6060f1SDimitry Andric SIAtomicScope Scope,
1355fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace,
1356fe6060f1SDimitry Andric SIMemOp Op,
1357fe6060f1SDimitry Andric bool IsCrossAddrSpaceOrdering,
1358fe6060f1SDimitry Andric Position Pos) const {
1359fe6060f1SDimitry Andric if (ST.isTgSplitEnabled()) {
1360fe6060f1SDimitry Andric // In threadgroup split mode the waves of a work-group can be executing on
1361fe6060f1SDimitry Andric // different CUs. Therefore need to wait for global or GDS memory operations
1362fe6060f1SDimitry Andric // to complete to ensure they are visible to waves in the other CUs.
1363fe6060f1SDimitry Andric // Otherwise in non-threadgroup split mode all waves of a work-group are on
1364fe6060f1SDimitry Andric // the same CU, so no need to wait for global memory as all waves in the
1365fe6060f1SDimitry Andric // work-group access the same the L1, nor wait for GDS as access are ordered
1366fe6060f1SDimitry Andric // on a CU.
1367fe6060f1SDimitry Andric if (((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH |
1368fe6060f1SDimitry Andric SIAtomicAddrSpace::GDS)) != SIAtomicAddrSpace::NONE) &&
1369fe6060f1SDimitry Andric (Scope == SIAtomicScope::WORKGROUP)) {
1370fe6060f1SDimitry Andric // Same as GFX7 using agent scope.
1371fe6060f1SDimitry Andric Scope = SIAtomicScope::AGENT;
1372fe6060f1SDimitry Andric }
1373fe6060f1SDimitry Andric // In threadgroup split mode LDS cannot be allocated so no need to wait for
1374fe6060f1SDimitry Andric // LDS memory operations.
1375fe6060f1SDimitry Andric AddrSpace &= ~SIAtomicAddrSpace::LDS;
1376fe6060f1SDimitry Andric }
1377fe6060f1SDimitry Andric return SIGfx7CacheControl::insertWait(MI, Scope, AddrSpace, Op,
1378fe6060f1SDimitry Andric IsCrossAddrSpaceOrdering, Pos);
1379fe6060f1SDimitry Andric }
1380fe6060f1SDimitry Andric
insertAcquire(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,Position Pos) const1381fe6060f1SDimitry Andric bool SIGfx90ACacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
1382fe6060f1SDimitry Andric SIAtomicScope Scope,
1383fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace,
1384fe6060f1SDimitry Andric Position Pos) const {
1385fe6060f1SDimitry Andric if (!InsertCacheInv)
1386fe6060f1SDimitry Andric return false;
1387fe6060f1SDimitry Andric
1388fe6060f1SDimitry Andric bool Changed = false;
1389fe6060f1SDimitry Andric
1390fe6060f1SDimitry Andric MachineBasicBlock &MBB = *MI->getParent();
1391fe6060f1SDimitry Andric DebugLoc DL = MI->getDebugLoc();
1392fe6060f1SDimitry Andric
1393fe6060f1SDimitry Andric if (Pos == Position::AFTER)
1394fe6060f1SDimitry Andric ++MI;
1395fe6060f1SDimitry Andric
1396fe6060f1SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1397fe6060f1SDimitry Andric switch (Scope) {
1398fe6060f1SDimitry Andric case SIAtomicScope::SYSTEM:
1399fe6060f1SDimitry Andric // Ensures that following loads will not see stale remote VMEM data or
1400fe6060f1SDimitry Andric // stale local VMEM data with MTYPE NC. Local VMEM data with MTYPE RW and
1401fe6060f1SDimitry Andric // CC will never be stale due to the local memory probes.
1402fe6060f1SDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_INVL2));
1403fe6060f1SDimitry Andric // Inserting a "S_WAITCNT vmcnt(0)" after is not required because the
1404fe6060f1SDimitry Andric // hardware does not reorder memory operations by the same wave with
1405fe6060f1SDimitry Andric // respect to a preceding "BUFFER_INVL2". The invalidate is guaranteed to
1406fe6060f1SDimitry Andric // remove any cache lines of earlier writes by the same wave and ensures
1407fe6060f1SDimitry Andric // later reads by the same wave will refetch the cache lines.
1408fe6060f1SDimitry Andric Changed = true;
1409fe6060f1SDimitry Andric break;
1410fe6060f1SDimitry Andric case SIAtomicScope::AGENT:
1411fe6060f1SDimitry Andric // Same as GFX7.
1412fe6060f1SDimitry Andric break;
1413fe6060f1SDimitry Andric case SIAtomicScope::WORKGROUP:
1414fe6060f1SDimitry Andric // In threadgroup split mode the waves of a work-group can be executing on
1415fe6060f1SDimitry Andric // different CUs. Therefore need to invalidate the L1 which is per CU.
1416fe6060f1SDimitry Andric // Otherwise in non-threadgroup split mode all waves of a work-group are
1417fe6060f1SDimitry Andric // on the same CU, and so the L1 does not need to be invalidated.
1418fe6060f1SDimitry Andric if (ST.isTgSplitEnabled()) {
1419fe6060f1SDimitry Andric // Same as GFX7 using agent scope.
1420fe6060f1SDimitry Andric Scope = SIAtomicScope::AGENT;
1421fe6060f1SDimitry Andric }
1422fe6060f1SDimitry Andric break;
1423fe6060f1SDimitry Andric case SIAtomicScope::WAVEFRONT:
1424fe6060f1SDimitry Andric case SIAtomicScope::SINGLETHREAD:
1425fe6060f1SDimitry Andric // Same as GFX7.
1426fe6060f1SDimitry Andric break;
1427fe6060f1SDimitry Andric default:
1428fe6060f1SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
1429fe6060f1SDimitry Andric }
1430fe6060f1SDimitry Andric }
1431fe6060f1SDimitry Andric
1432fe6060f1SDimitry Andric /// The scratch address space does not need the global memory cache
1433fe6060f1SDimitry Andric /// to be flushed as all memory operations by the same thread are
1434fe6060f1SDimitry Andric /// sequentially consistent, and no other thread can access scratch
1435fe6060f1SDimitry Andric /// memory.
1436fe6060f1SDimitry Andric
1437fe6060f1SDimitry Andric /// Other address spaces do not have a cache.
1438fe6060f1SDimitry Andric
1439fe6060f1SDimitry Andric if (Pos == Position::AFTER)
1440fe6060f1SDimitry Andric --MI;
1441fe6060f1SDimitry Andric
1442fe6060f1SDimitry Andric Changed |= SIGfx7CacheControl::insertAcquire(MI, Scope, AddrSpace, Pos);
1443fe6060f1SDimitry Andric
1444fe6060f1SDimitry Andric return Changed;
1445fe6060f1SDimitry Andric }
1446fe6060f1SDimitry Andric
insertRelease(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,bool IsCrossAddrSpaceOrdering,Position Pos) const1447fe6060f1SDimitry Andric bool SIGfx90ACacheControl::insertRelease(MachineBasicBlock::iterator &MI,
1448fe6060f1SDimitry Andric SIAtomicScope Scope,
1449fe6060f1SDimitry Andric SIAtomicAddrSpace AddrSpace,
1450fe6060f1SDimitry Andric bool IsCrossAddrSpaceOrdering,
1451fe6060f1SDimitry Andric Position Pos) const {
1452fe6060f1SDimitry Andric bool Changed = false;
1453fe6060f1SDimitry Andric
1454fe6060f1SDimitry Andric MachineBasicBlock &MBB = *MI->getParent();
14551db9f3b2SDimitry Andric const DebugLoc &DL = MI->getDebugLoc();
1456fe6060f1SDimitry Andric
1457fe6060f1SDimitry Andric if (Pos == Position::AFTER)
1458fe6060f1SDimitry Andric ++MI;
1459fe6060f1SDimitry Andric
1460fe6060f1SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1461fe6060f1SDimitry Andric switch (Scope) {
1462fe6060f1SDimitry Andric case SIAtomicScope::SYSTEM:
1463fe6060f1SDimitry Andric // Inserting a "S_WAITCNT vmcnt(0)" before is not required because the
1464fe6060f1SDimitry Andric // hardware does not reorder memory operations by the same wave with
1465fe6060f1SDimitry Andric // respect to a following "BUFFER_WBL2". The "BUFFER_WBL2" is guaranteed
1466fe6060f1SDimitry Andric // to initiate writeback of any dirty cache lines of earlier writes by the
1467fe6060f1SDimitry Andric // same wave. A "S_WAITCNT vmcnt(0)" is needed after to ensure the
1468fe6060f1SDimitry Andric // writeback has completed.
146981ad6265SDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2))
147081ad6265SDimitry Andric // Set SC bits to indicate system scope.
147181ad6265SDimitry Andric .addImm(AMDGPU::CPol::SC0 | AMDGPU::CPol::SC1);
1472fe6060f1SDimitry Andric // Followed by same as GFX7, which will ensure the necessary "S_WAITCNT
1473fe6060f1SDimitry Andric // vmcnt(0)" needed by the "BUFFER_WBL2".
1474fe6060f1SDimitry Andric Changed = true;
1475fe6060f1SDimitry Andric break;
1476fe6060f1SDimitry Andric case SIAtomicScope::AGENT:
1477fe6060f1SDimitry Andric case SIAtomicScope::WORKGROUP:
1478fe6060f1SDimitry Andric case SIAtomicScope::WAVEFRONT:
1479fe6060f1SDimitry Andric case SIAtomicScope::SINGLETHREAD:
1480fe6060f1SDimitry Andric // Same as GFX7.
1481fe6060f1SDimitry Andric break;
1482fe6060f1SDimitry Andric default:
1483fe6060f1SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
1484fe6060f1SDimitry Andric }
1485fe6060f1SDimitry Andric }
1486fe6060f1SDimitry Andric
1487fe6060f1SDimitry Andric if (Pos == Position::AFTER)
1488fe6060f1SDimitry Andric --MI;
1489fe6060f1SDimitry Andric
1490fe6060f1SDimitry Andric Changed |=
1491fe6060f1SDimitry Andric SIGfx7CacheControl::insertRelease(MI, Scope, AddrSpace,
1492fe6060f1SDimitry Andric IsCrossAddrSpaceOrdering, Pos);
1493fe6060f1SDimitry Andric
1494fe6060f1SDimitry Andric return Changed;
1495fe6060f1SDimitry Andric }
1496fe6060f1SDimitry Andric
enableLoadCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const149781ad6265SDimitry Andric bool SIGfx940CacheControl::enableLoadCacheBypass(
149881ad6265SDimitry Andric const MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
149981ad6265SDimitry Andric SIAtomicAddrSpace AddrSpace) const {
150081ad6265SDimitry Andric assert(MI->mayLoad() && !MI->mayStore());
150181ad6265SDimitry Andric bool Changed = false;
150281ad6265SDimitry Andric
150381ad6265SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
150481ad6265SDimitry Andric switch (Scope) {
150581ad6265SDimitry Andric case SIAtomicScope::SYSTEM:
150681ad6265SDimitry Andric // Set SC bits to indicate system scope.
150781ad6265SDimitry Andric Changed |= enableSC0Bit(MI);
150881ad6265SDimitry Andric Changed |= enableSC1Bit(MI);
150981ad6265SDimitry Andric break;
151081ad6265SDimitry Andric case SIAtomicScope::AGENT:
151181ad6265SDimitry Andric // Set SC bits to indicate agent scope.
151281ad6265SDimitry Andric Changed |= enableSC1Bit(MI);
151381ad6265SDimitry Andric break;
151481ad6265SDimitry Andric case SIAtomicScope::WORKGROUP:
151581ad6265SDimitry Andric // In threadgroup split mode the waves of a work-group can be executing on
151681ad6265SDimitry Andric // different CUs. Therefore need to bypass the L1 which is per CU.
151781ad6265SDimitry Andric // Otherwise in non-threadgroup split mode all waves of a work-group are
151881ad6265SDimitry Andric // on the same CU, and so the L1 does not need to be bypassed. Setting SC
151981ad6265SDimitry Andric // bits to indicate work-group scope will do this automatically.
152081ad6265SDimitry Andric Changed |= enableSC0Bit(MI);
152181ad6265SDimitry Andric break;
152281ad6265SDimitry Andric case SIAtomicScope::WAVEFRONT:
152381ad6265SDimitry Andric case SIAtomicScope::SINGLETHREAD:
152481ad6265SDimitry Andric // Leave SC bits unset to indicate wavefront scope.
152581ad6265SDimitry Andric break;
152681ad6265SDimitry Andric default:
152781ad6265SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
152881ad6265SDimitry Andric }
152981ad6265SDimitry Andric }
153081ad6265SDimitry Andric
153181ad6265SDimitry Andric /// The scratch address space does not need the global memory caches
153281ad6265SDimitry Andric /// to be bypassed as all memory operations by the same thread are
153381ad6265SDimitry Andric /// sequentially consistent, and no other thread can access scratch
153481ad6265SDimitry Andric /// memory.
153581ad6265SDimitry Andric
153681ad6265SDimitry Andric /// Other address spaces do not have a cache.
153781ad6265SDimitry Andric
153881ad6265SDimitry Andric return Changed;
153981ad6265SDimitry Andric }
154081ad6265SDimitry Andric
enableStoreCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const154181ad6265SDimitry Andric bool SIGfx940CacheControl::enableStoreCacheBypass(
154281ad6265SDimitry Andric const MachineBasicBlock::iterator &MI,
154381ad6265SDimitry Andric SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace) const {
154481ad6265SDimitry Andric assert(!MI->mayLoad() && MI->mayStore());
154581ad6265SDimitry Andric bool Changed = false;
154681ad6265SDimitry Andric
154781ad6265SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
154881ad6265SDimitry Andric switch (Scope) {
154981ad6265SDimitry Andric case SIAtomicScope::SYSTEM:
155081ad6265SDimitry Andric // Set SC bits to indicate system scope.
155181ad6265SDimitry Andric Changed |= enableSC0Bit(MI);
155281ad6265SDimitry Andric Changed |= enableSC1Bit(MI);
155381ad6265SDimitry Andric break;
155481ad6265SDimitry Andric case SIAtomicScope::AGENT:
155581ad6265SDimitry Andric // Set SC bits to indicate agent scope.
155681ad6265SDimitry Andric Changed |= enableSC1Bit(MI);
155781ad6265SDimitry Andric break;
155881ad6265SDimitry Andric case SIAtomicScope::WORKGROUP:
155981ad6265SDimitry Andric // Set SC bits to indicate workgroup scope.
156081ad6265SDimitry Andric Changed |= enableSC0Bit(MI);
156181ad6265SDimitry Andric break;
156281ad6265SDimitry Andric case SIAtomicScope::WAVEFRONT:
156381ad6265SDimitry Andric case SIAtomicScope::SINGLETHREAD:
156481ad6265SDimitry Andric // Leave SC bits unset to indicate wavefront scope.
156581ad6265SDimitry Andric break;
156681ad6265SDimitry Andric default:
156781ad6265SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
156881ad6265SDimitry Andric }
156981ad6265SDimitry Andric }
157081ad6265SDimitry Andric
157181ad6265SDimitry Andric /// The scratch address space does not need the global memory caches
157281ad6265SDimitry Andric /// to be bypassed as all memory operations by the same thread are
157381ad6265SDimitry Andric /// sequentially consistent, and no other thread can access scratch
157481ad6265SDimitry Andric /// memory.
157581ad6265SDimitry Andric
157681ad6265SDimitry Andric /// Other address spaces do not have a cache.
157781ad6265SDimitry Andric
157881ad6265SDimitry Andric return Changed;
157981ad6265SDimitry Andric }
158081ad6265SDimitry Andric
enableRMWCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const158181ad6265SDimitry Andric bool SIGfx940CacheControl::enableRMWCacheBypass(
158281ad6265SDimitry Andric const MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
158381ad6265SDimitry Andric SIAtomicAddrSpace AddrSpace) const {
158481ad6265SDimitry Andric assert(MI->mayLoad() && MI->mayStore());
158581ad6265SDimitry Andric bool Changed = false;
158681ad6265SDimitry Andric
158781ad6265SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
158881ad6265SDimitry Andric switch (Scope) {
158981ad6265SDimitry Andric case SIAtomicScope::SYSTEM:
159081ad6265SDimitry Andric // Set SC1 bit to indicate system scope.
159181ad6265SDimitry Andric Changed |= enableSC1Bit(MI);
159281ad6265SDimitry Andric break;
159381ad6265SDimitry Andric case SIAtomicScope::AGENT:
159481ad6265SDimitry Andric case SIAtomicScope::WORKGROUP:
159581ad6265SDimitry Andric case SIAtomicScope::WAVEFRONT:
159681ad6265SDimitry Andric case SIAtomicScope::SINGLETHREAD:
159781ad6265SDimitry Andric // RMW atomic operations implicitly bypass the L1 cache and only use SC1
159881ad6265SDimitry Andric // to indicate system or agent scope. The SC0 bit is used to indicate if
159981ad6265SDimitry Andric // they are return or no-return. Leave SC1 bit unset to indicate agent
160081ad6265SDimitry Andric // scope.
160181ad6265SDimitry Andric break;
160281ad6265SDimitry Andric default:
160381ad6265SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
160481ad6265SDimitry Andric }
160581ad6265SDimitry Andric }
160681ad6265SDimitry Andric
160781ad6265SDimitry Andric return Changed;
160881ad6265SDimitry Andric }
160981ad6265SDimitry Andric
enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator & MI,SIAtomicAddrSpace AddrSpace,SIMemOp Op,bool IsVolatile,bool IsNonTemporal) const161081ad6265SDimitry Andric bool SIGfx940CacheControl::enableVolatileAndOrNonTemporal(
161181ad6265SDimitry Andric MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
161281ad6265SDimitry Andric bool IsVolatile, bool IsNonTemporal) const {
161381ad6265SDimitry Andric // Only handle load and store, not atomic read-modify-write insructions. The
161481ad6265SDimitry Andric // latter use glc to indicate if the atomic returns a result and so must not
161581ad6265SDimitry Andric // be used for cache control.
161681ad6265SDimitry Andric assert(MI->mayLoad() ^ MI->mayStore());
161781ad6265SDimitry Andric
161881ad6265SDimitry Andric // Only update load and store, not LLVM IR atomic read-modify-write
161981ad6265SDimitry Andric // instructions. The latter are always marked as volatile so cannot sensibly
162081ad6265SDimitry Andric // handle it as do not want to pessimize all atomics. Also they do not support
162181ad6265SDimitry Andric // the nontemporal attribute.
162281ad6265SDimitry Andric assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
162381ad6265SDimitry Andric
162481ad6265SDimitry Andric bool Changed = false;
162581ad6265SDimitry Andric
162681ad6265SDimitry Andric if (IsVolatile) {
162781ad6265SDimitry Andric // Set SC bits to indicate system scope.
162881ad6265SDimitry Andric Changed |= enableSC0Bit(MI);
162981ad6265SDimitry Andric Changed |= enableSC1Bit(MI);
163081ad6265SDimitry Andric
163181ad6265SDimitry Andric // Ensure operation has completed at system scope to cause all volatile
163281ad6265SDimitry Andric // operations to be visible outside the program in a global order. Do not
163381ad6265SDimitry Andric // request cross address space as only the global address space can be
163481ad6265SDimitry Andric // observable outside the program, so no need to cause a waitcnt for LDS
163581ad6265SDimitry Andric // address space operations.
163681ad6265SDimitry Andric Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
163781ad6265SDimitry Andric Position::AFTER);
163881ad6265SDimitry Andric
163981ad6265SDimitry Andric return Changed;
164081ad6265SDimitry Andric }
164181ad6265SDimitry Andric
164281ad6265SDimitry Andric if (IsNonTemporal) {
164381ad6265SDimitry Andric Changed |= enableNTBit(MI);
164481ad6265SDimitry Andric return Changed;
164581ad6265SDimitry Andric }
164681ad6265SDimitry Andric
164781ad6265SDimitry Andric return Changed;
164881ad6265SDimitry Andric }
164981ad6265SDimitry Andric
insertAcquire(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,Position Pos) const165081ad6265SDimitry Andric bool SIGfx940CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
165181ad6265SDimitry Andric SIAtomicScope Scope,
165281ad6265SDimitry Andric SIAtomicAddrSpace AddrSpace,
165381ad6265SDimitry Andric Position Pos) const {
165481ad6265SDimitry Andric if (!InsertCacheInv)
165581ad6265SDimitry Andric return false;
165681ad6265SDimitry Andric
165781ad6265SDimitry Andric bool Changed = false;
165881ad6265SDimitry Andric
165981ad6265SDimitry Andric MachineBasicBlock &MBB = *MI->getParent();
166081ad6265SDimitry Andric DebugLoc DL = MI->getDebugLoc();
166181ad6265SDimitry Andric
166281ad6265SDimitry Andric if (Pos == Position::AFTER)
166381ad6265SDimitry Andric ++MI;
166481ad6265SDimitry Andric
166581ad6265SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
166681ad6265SDimitry Andric switch (Scope) {
166781ad6265SDimitry Andric case SIAtomicScope::SYSTEM:
166881ad6265SDimitry Andric // Ensures that following loads will not see stale remote VMEM data or
166981ad6265SDimitry Andric // stale local VMEM data with MTYPE NC. Local VMEM data with MTYPE RW and
167081ad6265SDimitry Andric // CC will never be stale due to the local memory probes.
167181ad6265SDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_INV))
167281ad6265SDimitry Andric // Set SC bits to indicate system scope.
167381ad6265SDimitry Andric .addImm(AMDGPU::CPol::SC0 | AMDGPU::CPol::SC1);
167481ad6265SDimitry Andric // Inserting a "S_WAITCNT vmcnt(0)" after is not required because the
167581ad6265SDimitry Andric // hardware does not reorder memory operations by the same wave with
167681ad6265SDimitry Andric // respect to a preceding "BUFFER_INV". The invalidate is guaranteed to
167781ad6265SDimitry Andric // remove any cache lines of earlier writes by the same wave and ensures
167881ad6265SDimitry Andric // later reads by the same wave will refetch the cache lines.
167981ad6265SDimitry Andric Changed = true;
168081ad6265SDimitry Andric break;
168181ad6265SDimitry Andric case SIAtomicScope::AGENT:
168281ad6265SDimitry Andric // Ensures that following loads will not see stale remote date or local
168381ad6265SDimitry Andric // MTYPE NC global data. Local MTYPE RW and CC memory will never be stale
168481ad6265SDimitry Andric // due to the memory probes.
168581ad6265SDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_INV))
168681ad6265SDimitry Andric // Set SC bits to indicate agent scope.
168781ad6265SDimitry Andric .addImm(AMDGPU::CPol::SC1);
168881ad6265SDimitry Andric // Inserting "S_WAITCNT vmcnt(0)" is not required because the hardware
168981ad6265SDimitry Andric // does not reorder memory operations with respect to preceeding buffer
169081ad6265SDimitry Andric // invalidate. The invalidate is guaranteed to remove any cache lines of
169181ad6265SDimitry Andric // earlier writes and ensures later writes will refetch the cache lines.
169281ad6265SDimitry Andric Changed = true;
169381ad6265SDimitry Andric break;
169481ad6265SDimitry Andric case SIAtomicScope::WORKGROUP:
169581ad6265SDimitry Andric // In threadgroup split mode the waves of a work-group can be executing on
169681ad6265SDimitry Andric // different CUs. Therefore need to invalidate the L1 which is per CU.
169781ad6265SDimitry Andric // Otherwise in non-threadgroup split mode all waves of a work-group are
169881ad6265SDimitry Andric // on the same CU, and so the L1 does not need to be invalidated.
169981ad6265SDimitry Andric if (ST.isTgSplitEnabled()) {
170081ad6265SDimitry Andric // Ensures L1 is invalidated if in threadgroup split mode. In
170181ad6265SDimitry Andric // non-threadgroup split mode it is a NOP, but no point generating it in
170281ad6265SDimitry Andric // that case if know not in that mode.
170381ad6265SDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_INV))
170481ad6265SDimitry Andric // Set SC bits to indicate work-group scope.
170581ad6265SDimitry Andric .addImm(AMDGPU::CPol::SC0);
170681ad6265SDimitry Andric // Inserting "S_WAITCNT vmcnt(0)" is not required because the hardware
170781ad6265SDimitry Andric // does not reorder memory operations with respect to preceeding buffer
170881ad6265SDimitry Andric // invalidate. The invalidate is guaranteed to remove any cache lines of
170981ad6265SDimitry Andric // earlier writes and ensures later writes will refetch the cache lines.
171081ad6265SDimitry Andric Changed = true;
171181ad6265SDimitry Andric }
171281ad6265SDimitry Andric break;
171381ad6265SDimitry Andric case SIAtomicScope::WAVEFRONT:
171481ad6265SDimitry Andric case SIAtomicScope::SINGLETHREAD:
171581ad6265SDimitry Andric // Could generate "BUFFER_INV" but it would do nothing as there are no
171681ad6265SDimitry Andric // caches to invalidate.
171781ad6265SDimitry Andric break;
171881ad6265SDimitry Andric default:
171981ad6265SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
172081ad6265SDimitry Andric }
172181ad6265SDimitry Andric }
172281ad6265SDimitry Andric
172381ad6265SDimitry Andric /// The scratch address space does not need the global memory cache
172481ad6265SDimitry Andric /// to be flushed as all memory operations by the same thread are
172581ad6265SDimitry Andric /// sequentially consistent, and no other thread can access scratch
172681ad6265SDimitry Andric /// memory.
172781ad6265SDimitry Andric
172881ad6265SDimitry Andric /// Other address spaces do not have a cache.
172981ad6265SDimitry Andric
173081ad6265SDimitry Andric if (Pos == Position::AFTER)
173181ad6265SDimitry Andric --MI;
173281ad6265SDimitry Andric
173381ad6265SDimitry Andric return Changed;
173481ad6265SDimitry Andric }
173581ad6265SDimitry Andric
insertRelease(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,bool IsCrossAddrSpaceOrdering,Position Pos) const173681ad6265SDimitry Andric bool SIGfx940CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
173781ad6265SDimitry Andric SIAtomicScope Scope,
173881ad6265SDimitry Andric SIAtomicAddrSpace AddrSpace,
173981ad6265SDimitry Andric bool IsCrossAddrSpaceOrdering,
174081ad6265SDimitry Andric Position Pos) const {
174181ad6265SDimitry Andric bool Changed = false;
174281ad6265SDimitry Andric
174381ad6265SDimitry Andric MachineBasicBlock &MBB = *MI->getParent();
174481ad6265SDimitry Andric DebugLoc DL = MI->getDebugLoc();
174581ad6265SDimitry Andric
174681ad6265SDimitry Andric if (Pos == Position::AFTER)
174781ad6265SDimitry Andric ++MI;
174881ad6265SDimitry Andric
174981ad6265SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
175081ad6265SDimitry Andric switch (Scope) {
175181ad6265SDimitry Andric case SIAtomicScope::SYSTEM:
175281ad6265SDimitry Andric // Inserting a "S_WAITCNT vmcnt(0)" before is not required because the
175381ad6265SDimitry Andric // hardware does not reorder memory operations by the same wave with
175481ad6265SDimitry Andric // respect to a following "BUFFER_WBL2". The "BUFFER_WBL2" is guaranteed
175581ad6265SDimitry Andric // to initiate writeback of any dirty cache lines of earlier writes by the
175681ad6265SDimitry Andric // same wave. A "S_WAITCNT vmcnt(0)" is needed after to ensure the
175781ad6265SDimitry Andric // writeback has completed.
175881ad6265SDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2))
175981ad6265SDimitry Andric // Set SC bits to indicate system scope.
176081ad6265SDimitry Andric .addImm(AMDGPU::CPol::SC0 | AMDGPU::CPol::SC1);
176181ad6265SDimitry Andric // Since AddrSpace contains SIAtomicAddrSpace::GLOBAL and Scope is
176281ad6265SDimitry Andric // SIAtomicScope::SYSTEM, the following insertWait will generate the
176381ad6265SDimitry Andric // required "S_WAITCNT vmcnt(0)" needed by the "BUFFER_WBL2".
176481ad6265SDimitry Andric Changed = true;
176581ad6265SDimitry Andric break;
176681ad6265SDimitry Andric case SIAtomicScope::AGENT:
176781ad6265SDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_WBL2))
176881ad6265SDimitry Andric // Set SC bits to indicate agent scope.
176981ad6265SDimitry Andric .addImm(AMDGPU::CPol::SC1);
177081ad6265SDimitry Andric
177181ad6265SDimitry Andric // Since AddrSpace contains SIAtomicAddrSpace::GLOBAL and Scope is
177281ad6265SDimitry Andric // SIAtomicScope::AGENT, the following insertWait will generate the
177381ad6265SDimitry Andric // required "S_WAITCNT vmcnt(0)".
177481ad6265SDimitry Andric Changed = true;
177581ad6265SDimitry Andric break;
177681ad6265SDimitry Andric case SIAtomicScope::WORKGROUP:
177781ad6265SDimitry Andric case SIAtomicScope::WAVEFRONT:
177881ad6265SDimitry Andric case SIAtomicScope::SINGLETHREAD:
177981ad6265SDimitry Andric // Do not generate "BUFFER_WBL2" as there are no caches it would
178081ad6265SDimitry Andric // writeback, and would require an otherwise unnecessary
178181ad6265SDimitry Andric // "S_WAITCNT vmcnt(0)".
178281ad6265SDimitry Andric break;
178381ad6265SDimitry Andric default:
178481ad6265SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
178581ad6265SDimitry Andric }
178681ad6265SDimitry Andric }
178781ad6265SDimitry Andric
178881ad6265SDimitry Andric if (Pos == Position::AFTER)
178981ad6265SDimitry Andric --MI;
179081ad6265SDimitry Andric
179181ad6265SDimitry Andric // Ensure the necessary S_WAITCNT needed by any "BUFFER_WBL2" as well as other
179281ad6265SDimitry Andric // S_WAITCNT needed.
179381ad6265SDimitry Andric Changed |= insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE,
179481ad6265SDimitry Andric IsCrossAddrSpaceOrdering, Pos);
179581ad6265SDimitry Andric
179681ad6265SDimitry Andric return Changed;
179781ad6265SDimitry Andric }
179881ad6265SDimitry Andric
enableLoadCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const17990b57cec5SDimitry Andric bool SIGfx10CacheControl::enableLoadCacheBypass(
18000b57cec5SDimitry Andric const MachineBasicBlock::iterator &MI,
18010b57cec5SDimitry Andric SIAtomicScope Scope,
18020b57cec5SDimitry Andric SIAtomicAddrSpace AddrSpace) const {
18030b57cec5SDimitry Andric assert(MI->mayLoad() && !MI->mayStore());
18040b57cec5SDimitry Andric bool Changed = false;
18050b57cec5SDimitry Andric
18060b57cec5SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
18070b57cec5SDimitry Andric switch (Scope) {
18080b57cec5SDimitry Andric case SIAtomicScope::SYSTEM:
18090b57cec5SDimitry Andric case SIAtomicScope::AGENT:
18104824e7fdSDimitry Andric // Set the L0 and L1 cache policies to MISS_EVICT.
18114824e7fdSDimitry Andric // Note: there is no L2 cache coherent bypass control at the ISA level.
18120b57cec5SDimitry Andric Changed |= enableGLCBit(MI);
18130b57cec5SDimitry Andric Changed |= enableDLCBit(MI);
18140b57cec5SDimitry Andric break;
18150b57cec5SDimitry Andric case SIAtomicScope::WORKGROUP:
18160b57cec5SDimitry Andric // In WGP mode the waves of a work-group can be executing on either CU of
18170b57cec5SDimitry Andric // the WGP. Therefore need to bypass the L0 which is per CU. Otherwise in
1818e8d8bef9SDimitry Andric // CU mode all waves of a work-group are on the same CU, and so the L0
1819e8d8bef9SDimitry Andric // does not need to be bypassed.
1820349cc55cSDimitry Andric if (!ST.isCuModeEnabled())
1821349cc55cSDimitry Andric Changed |= enableGLCBit(MI);
18220b57cec5SDimitry Andric break;
18230b57cec5SDimitry Andric case SIAtomicScope::WAVEFRONT:
18240b57cec5SDimitry Andric case SIAtomicScope::SINGLETHREAD:
18250b57cec5SDimitry Andric // No cache to bypass.
18260b57cec5SDimitry Andric break;
18270b57cec5SDimitry Andric default:
18280b57cec5SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
18290b57cec5SDimitry Andric }
18300b57cec5SDimitry Andric }
18310b57cec5SDimitry Andric
18320b57cec5SDimitry Andric /// The scratch address space does not need the global memory caches
18330b57cec5SDimitry Andric /// to be bypassed as all memory operations by the same thread are
18340b57cec5SDimitry Andric /// sequentially consistent, and no other thread can access scratch
18350b57cec5SDimitry Andric /// memory.
18360b57cec5SDimitry Andric
1837e8d8bef9SDimitry Andric /// Other address spaces do not have a cache.
18380b57cec5SDimitry Andric
18390b57cec5SDimitry Andric return Changed;
18400b57cec5SDimitry Andric }
18410b57cec5SDimitry Andric
enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator & MI,SIAtomicAddrSpace AddrSpace,SIMemOp Op,bool IsVolatile,bool IsNonTemporal) const1842e8d8bef9SDimitry Andric bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal(
1843e8d8bef9SDimitry Andric MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
1844e8d8bef9SDimitry Andric bool IsVolatile, bool IsNonTemporal) const {
1845e8d8bef9SDimitry Andric
1846e8d8bef9SDimitry Andric // Only handle load and store, not atomic read-modify-write insructions. The
1847e8d8bef9SDimitry Andric // latter use glc to indicate if the atomic returns a result and so must not
1848e8d8bef9SDimitry Andric // be used for cache control.
18490b57cec5SDimitry Andric assert(MI->mayLoad() ^ MI->mayStore());
1850e8d8bef9SDimitry Andric
1851e8d8bef9SDimitry Andric // Only update load and store, not LLVM IR atomic read-modify-write
1852e8d8bef9SDimitry Andric // instructions. The latter are always marked as volatile so cannot sensibly
1853e8d8bef9SDimitry Andric // handle it as do not want to pessimize all atomics. Also they do not support
1854e8d8bef9SDimitry Andric // the nontemporal attribute.
1855e8d8bef9SDimitry Andric assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
1856e8d8bef9SDimitry Andric
18570b57cec5SDimitry Andric bool Changed = false;
18580b57cec5SDimitry Andric
1859e8d8bef9SDimitry Andric if (IsVolatile) {
18604824e7fdSDimitry Andric // Set L0 and L1 cache policy to be MISS_EVICT for load instructions
18614824e7fdSDimitry Andric // and MISS_LRU for store instructions.
18624824e7fdSDimitry Andric // Note: there is no L2 cache coherent bypass control at the ISA level.
1863e8d8bef9SDimitry Andric if (Op == SIMemOp::LOAD) {
1864e8d8bef9SDimitry Andric Changed |= enableGLCBit(MI);
1865e8d8bef9SDimitry Andric Changed |= enableDLCBit(MI);
1866e8d8bef9SDimitry Andric }
1867e8d8bef9SDimitry Andric
1868e8d8bef9SDimitry Andric // Ensure operation has completed at system scope to cause all volatile
1869e8d8bef9SDimitry Andric // operations to be visible outside the program in a global order. Do not
1870e8d8bef9SDimitry Andric // request cross address space as only the global address space can be
1871e8d8bef9SDimitry Andric // observable outside the program, so no need to cause a waitcnt for LDS
1872e8d8bef9SDimitry Andric // address space operations.
1873e8d8bef9SDimitry Andric Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
1874e8d8bef9SDimitry Andric Position::AFTER);
18750b57cec5SDimitry Andric return Changed;
18760b57cec5SDimitry Andric }
18770b57cec5SDimitry Andric
1878e8d8bef9SDimitry Andric if (IsNonTemporal) {
18794824e7fdSDimitry Andric // For loads setting SLC configures L0 and L1 cache policy to HIT_EVICT
18804824e7fdSDimitry Andric // and L2 cache policy to STREAM.
18814824e7fdSDimitry Andric // For stores setting both GLC and SLC configures L0 and L1 cache policy
18824824e7fdSDimitry Andric // to MISS_EVICT and the L2 cache policy to STREAM.
18834824e7fdSDimitry Andric if (Op == SIMemOp::STORE)
18844824e7fdSDimitry Andric Changed |= enableGLCBit(MI);
1885e8d8bef9SDimitry Andric Changed |= enableSLCBit(MI);
18864824e7fdSDimitry Andric
1887e8d8bef9SDimitry Andric return Changed;
18880b57cec5SDimitry Andric }
18890b57cec5SDimitry Andric
18900b57cec5SDimitry Andric return Changed;
18910b57cec5SDimitry Andric }
18920b57cec5SDimitry Andric
insertWait(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,SIMemOp Op,bool IsCrossAddrSpaceOrdering,Position Pos) const18930b57cec5SDimitry Andric bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI,
18940b57cec5SDimitry Andric SIAtomicScope Scope,
18950b57cec5SDimitry Andric SIAtomicAddrSpace AddrSpace,
18960b57cec5SDimitry Andric SIMemOp Op,
18970b57cec5SDimitry Andric bool IsCrossAddrSpaceOrdering,
18980b57cec5SDimitry Andric Position Pos) const {
18990b57cec5SDimitry Andric bool Changed = false;
19000b57cec5SDimitry Andric
19010b57cec5SDimitry Andric MachineBasicBlock &MBB = *MI->getParent();
19020b57cec5SDimitry Andric DebugLoc DL = MI->getDebugLoc();
19030b57cec5SDimitry Andric
19040b57cec5SDimitry Andric if (Pos == Position::AFTER)
19050b57cec5SDimitry Andric ++MI;
19060b57cec5SDimitry Andric
19070b57cec5SDimitry Andric bool VMCnt = false;
19080b57cec5SDimitry Andric bool VSCnt = false;
19090b57cec5SDimitry Andric bool LGKMCnt = false;
19100b57cec5SDimitry Andric
1911e8d8bef9SDimitry Andric if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
1912e8d8bef9SDimitry Andric SIAtomicAddrSpace::NONE) {
19130b57cec5SDimitry Andric switch (Scope) {
19140b57cec5SDimitry Andric case SIAtomicScope::SYSTEM:
19150b57cec5SDimitry Andric case SIAtomicScope::AGENT:
19160b57cec5SDimitry Andric if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
19170b57cec5SDimitry Andric VMCnt |= true;
19180b57cec5SDimitry Andric if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
19190b57cec5SDimitry Andric VSCnt |= true;
19200b57cec5SDimitry Andric break;
19210b57cec5SDimitry Andric case SIAtomicScope::WORKGROUP:
19220b57cec5SDimitry Andric // In WGP mode the waves of a work-group can be executing on either CU of
19230b57cec5SDimitry Andric // the WGP. Therefore need to wait for operations to complete to ensure
19240b57cec5SDimitry Andric // they are visible to waves in the other CU as the L0 is per CU.
19250b57cec5SDimitry Andric // Otherwise in CU mode and all waves of a work-group are on the same CU
19260b57cec5SDimitry Andric // which shares the same L0.
1927e8d8bef9SDimitry Andric if (!ST.isCuModeEnabled()) {
19280b57cec5SDimitry Andric if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
19290b57cec5SDimitry Andric VMCnt |= true;
19300b57cec5SDimitry Andric if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
19310b57cec5SDimitry Andric VSCnt |= true;
19320b57cec5SDimitry Andric }
19330b57cec5SDimitry Andric break;
19340b57cec5SDimitry Andric case SIAtomicScope::WAVEFRONT:
19350b57cec5SDimitry Andric case SIAtomicScope::SINGLETHREAD:
19360b57cec5SDimitry Andric // The L0 cache keeps all memory operations in order for
19370b57cec5SDimitry Andric // work-items in the same wavefront.
19380b57cec5SDimitry Andric break;
19390b57cec5SDimitry Andric default:
19400b57cec5SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
19410b57cec5SDimitry Andric }
19420b57cec5SDimitry Andric }
19430b57cec5SDimitry Andric
19440b57cec5SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
19450b57cec5SDimitry Andric switch (Scope) {
19460b57cec5SDimitry Andric case SIAtomicScope::SYSTEM:
19470b57cec5SDimitry Andric case SIAtomicScope::AGENT:
19480b57cec5SDimitry Andric case SIAtomicScope::WORKGROUP:
1949e8d8bef9SDimitry Andric // If no cross address space ordering then an "S_WAITCNT lgkmcnt(0)" is
1950e8d8bef9SDimitry Andric // not needed as LDS operations for all waves are executed in a total
1951e8d8bef9SDimitry Andric // global ordering as observed by all waves. Required if also
1952e8d8bef9SDimitry Andric // synchronizing with global/GDS memory as LDS operations could be
1953e8d8bef9SDimitry Andric // reordered with respect to later global/GDS memory operations of the
1954e8d8bef9SDimitry Andric // same wave.
19550b57cec5SDimitry Andric LGKMCnt |= IsCrossAddrSpaceOrdering;
19560b57cec5SDimitry Andric break;
19570b57cec5SDimitry Andric case SIAtomicScope::WAVEFRONT:
19580b57cec5SDimitry Andric case SIAtomicScope::SINGLETHREAD:
19590b57cec5SDimitry Andric // The LDS keeps all memory operations in order for
196081ad6265SDimitry Andric // the same wavefront.
19610b57cec5SDimitry Andric break;
19620b57cec5SDimitry Andric default:
19630b57cec5SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
19640b57cec5SDimitry Andric }
19650b57cec5SDimitry Andric }
19660b57cec5SDimitry Andric
19670b57cec5SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GDS) != SIAtomicAddrSpace::NONE) {
19680b57cec5SDimitry Andric switch (Scope) {
19690b57cec5SDimitry Andric case SIAtomicScope::SYSTEM:
19700b57cec5SDimitry Andric case SIAtomicScope::AGENT:
1971e8d8bef9SDimitry Andric // If no cross address space ordering then an GDS "S_WAITCNT lgkmcnt(0)"
1972e8d8bef9SDimitry Andric // is not needed as GDS operations for all waves are executed in a total
1973e8d8bef9SDimitry Andric // global ordering as observed by all waves. Required if also
1974e8d8bef9SDimitry Andric // synchronizing with global/LDS memory as GDS operations could be
1975e8d8bef9SDimitry Andric // reordered with respect to later global/LDS memory operations of the
1976e8d8bef9SDimitry Andric // same wave.
19770b57cec5SDimitry Andric LGKMCnt |= IsCrossAddrSpaceOrdering;
19780b57cec5SDimitry Andric break;
19790b57cec5SDimitry Andric case SIAtomicScope::WORKGROUP:
19800b57cec5SDimitry Andric case SIAtomicScope::WAVEFRONT:
19810b57cec5SDimitry Andric case SIAtomicScope::SINGLETHREAD:
19820b57cec5SDimitry Andric // The GDS keeps all memory operations in order for
19830b57cec5SDimitry Andric // the same work-group.
19840b57cec5SDimitry Andric break;
19850b57cec5SDimitry Andric default:
19860b57cec5SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
19870b57cec5SDimitry Andric }
19880b57cec5SDimitry Andric }
19890b57cec5SDimitry Andric
19900b57cec5SDimitry Andric if (VMCnt || LGKMCnt) {
19910b57cec5SDimitry Andric unsigned WaitCntImmediate =
19920b57cec5SDimitry Andric AMDGPU::encodeWaitcnt(IV,
19930b57cec5SDimitry Andric VMCnt ? 0 : getVmcntBitMask(IV),
19940b57cec5SDimitry Andric getExpcntBitMask(IV),
19950b57cec5SDimitry Andric LGKMCnt ? 0 : getLgkmcntBitMask(IV));
19965f757f3fSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_soft))
19975f757f3fSDimitry Andric .addImm(WaitCntImmediate);
19980b57cec5SDimitry Andric Changed = true;
19990b57cec5SDimitry Andric }
20000b57cec5SDimitry Andric
20010b57cec5SDimitry Andric if (VSCnt) {
20025f757f3fSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAITCNT_VSCNT_soft))
20030b57cec5SDimitry Andric .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
20040b57cec5SDimitry Andric .addImm(0);
20050b57cec5SDimitry Andric Changed = true;
20060b57cec5SDimitry Andric }
20070b57cec5SDimitry Andric
20080b57cec5SDimitry Andric if (Pos == Position::AFTER)
20090b57cec5SDimitry Andric --MI;
20100b57cec5SDimitry Andric
20110b57cec5SDimitry Andric return Changed;
20120b57cec5SDimitry Andric }
20130b57cec5SDimitry Andric
insertAcquire(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,Position Pos) const2014e8d8bef9SDimitry Andric bool SIGfx10CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
2015e8d8bef9SDimitry Andric SIAtomicScope Scope,
2016e8d8bef9SDimitry Andric SIAtomicAddrSpace AddrSpace,
2017e8d8bef9SDimitry Andric Position Pos) const {
2018e8d8bef9SDimitry Andric if (!InsertCacheInv)
2019e8d8bef9SDimitry Andric return false;
2020e8d8bef9SDimitry Andric
2021e8d8bef9SDimitry Andric bool Changed = false;
2022e8d8bef9SDimitry Andric
2023e8d8bef9SDimitry Andric MachineBasicBlock &MBB = *MI->getParent();
2024e8d8bef9SDimitry Andric DebugLoc DL = MI->getDebugLoc();
2025e8d8bef9SDimitry Andric
2026e8d8bef9SDimitry Andric if (Pos == Position::AFTER)
2027e8d8bef9SDimitry Andric ++MI;
2028e8d8bef9SDimitry Andric
2029e8d8bef9SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
2030e8d8bef9SDimitry Andric switch (Scope) {
2031e8d8bef9SDimitry Andric case SIAtomicScope::SYSTEM:
2032e8d8bef9SDimitry Andric case SIAtomicScope::AGENT:
2033e8d8bef9SDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV));
2034e8d8bef9SDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL1_INV));
2035e8d8bef9SDimitry Andric Changed = true;
2036e8d8bef9SDimitry Andric break;
2037e8d8bef9SDimitry Andric case SIAtomicScope::WORKGROUP:
2038e8d8bef9SDimitry Andric // In WGP mode the waves of a work-group can be executing on either CU of
2039e8d8bef9SDimitry Andric // the WGP. Therefore need to invalidate the L0 which is per CU. Otherwise
2040e8d8bef9SDimitry Andric // in CU mode and all waves of a work-group are on the same CU, and so the
2041e8d8bef9SDimitry Andric // L0 does not need to be invalidated.
2042e8d8bef9SDimitry Andric if (!ST.isCuModeEnabled()) {
2043e8d8bef9SDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::BUFFER_GL0_INV));
2044e8d8bef9SDimitry Andric Changed = true;
2045e8d8bef9SDimitry Andric }
2046e8d8bef9SDimitry Andric break;
2047e8d8bef9SDimitry Andric case SIAtomicScope::WAVEFRONT:
2048e8d8bef9SDimitry Andric case SIAtomicScope::SINGLETHREAD:
2049e8d8bef9SDimitry Andric // No cache to invalidate.
2050e8d8bef9SDimitry Andric break;
2051e8d8bef9SDimitry Andric default:
2052e8d8bef9SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
2053e8d8bef9SDimitry Andric }
2054e8d8bef9SDimitry Andric }
2055e8d8bef9SDimitry Andric
2056e8d8bef9SDimitry Andric /// The scratch address space does not need the global memory cache
2057e8d8bef9SDimitry Andric /// to be flushed as all memory operations by the same thread are
2058e8d8bef9SDimitry Andric /// sequentially consistent, and no other thread can access scratch
2059e8d8bef9SDimitry Andric /// memory.
2060e8d8bef9SDimitry Andric
2061e8d8bef9SDimitry Andric /// Other address spaces do not have a cache.
2062e8d8bef9SDimitry Andric
2063e8d8bef9SDimitry Andric if (Pos == Position::AFTER)
2064e8d8bef9SDimitry Andric --MI;
2065e8d8bef9SDimitry Andric
2066e8d8bef9SDimitry Andric return Changed;
2067e8d8bef9SDimitry Andric }
2068e8d8bef9SDimitry Andric
enableLoadCacheBypass(const MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace) const206981ad6265SDimitry Andric bool SIGfx11CacheControl::enableLoadCacheBypass(
207081ad6265SDimitry Andric const MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
207181ad6265SDimitry Andric SIAtomicAddrSpace AddrSpace) const {
207281ad6265SDimitry Andric assert(MI->mayLoad() && !MI->mayStore());
207381ad6265SDimitry Andric bool Changed = false;
207481ad6265SDimitry Andric
207581ad6265SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
207681ad6265SDimitry Andric switch (Scope) {
207781ad6265SDimitry Andric case SIAtomicScope::SYSTEM:
207881ad6265SDimitry Andric case SIAtomicScope::AGENT:
207981ad6265SDimitry Andric // Set the L0 and L1 cache policies to MISS_EVICT.
208081ad6265SDimitry Andric // Note: there is no L2 cache coherent bypass control at the ISA level.
208181ad6265SDimitry Andric Changed |= enableGLCBit(MI);
208281ad6265SDimitry Andric break;
208381ad6265SDimitry Andric case SIAtomicScope::WORKGROUP:
208481ad6265SDimitry Andric // In WGP mode the waves of a work-group can be executing on either CU of
208581ad6265SDimitry Andric // the WGP. Therefore need to bypass the L0 which is per CU. Otherwise in
208681ad6265SDimitry Andric // CU mode all waves of a work-group are on the same CU, and so the L0
208781ad6265SDimitry Andric // does not need to be bypassed.
208881ad6265SDimitry Andric if (!ST.isCuModeEnabled())
208981ad6265SDimitry Andric Changed |= enableGLCBit(MI);
209081ad6265SDimitry Andric break;
209181ad6265SDimitry Andric case SIAtomicScope::WAVEFRONT:
209281ad6265SDimitry Andric case SIAtomicScope::SINGLETHREAD:
209381ad6265SDimitry Andric // No cache to bypass.
209481ad6265SDimitry Andric break;
209581ad6265SDimitry Andric default:
209681ad6265SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
209781ad6265SDimitry Andric }
209881ad6265SDimitry Andric }
209981ad6265SDimitry Andric
210081ad6265SDimitry Andric /// The scratch address space does not need the global memory caches
210181ad6265SDimitry Andric /// to be bypassed as all memory operations by the same thread are
210281ad6265SDimitry Andric /// sequentially consistent, and no other thread can access scratch
210381ad6265SDimitry Andric /// memory.
210481ad6265SDimitry Andric
210581ad6265SDimitry Andric /// Other address spaces do not have a cache.
210681ad6265SDimitry Andric
210781ad6265SDimitry Andric return Changed;
210881ad6265SDimitry Andric }
210981ad6265SDimitry Andric
enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator & MI,SIAtomicAddrSpace AddrSpace,SIMemOp Op,bool IsVolatile,bool IsNonTemporal) const211081ad6265SDimitry Andric bool SIGfx11CacheControl::enableVolatileAndOrNonTemporal(
211181ad6265SDimitry Andric MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
211281ad6265SDimitry Andric bool IsVolatile, bool IsNonTemporal) const {
211381ad6265SDimitry Andric
211481ad6265SDimitry Andric // Only handle load and store, not atomic read-modify-write insructions. The
211581ad6265SDimitry Andric // latter use glc to indicate if the atomic returns a result and so must not
211681ad6265SDimitry Andric // be used for cache control.
211781ad6265SDimitry Andric assert(MI->mayLoad() ^ MI->mayStore());
211881ad6265SDimitry Andric
211981ad6265SDimitry Andric // Only update load and store, not LLVM IR atomic read-modify-write
212081ad6265SDimitry Andric // instructions. The latter are always marked as volatile so cannot sensibly
212181ad6265SDimitry Andric // handle it as do not want to pessimize all atomics. Also they do not support
212281ad6265SDimitry Andric // the nontemporal attribute.
212381ad6265SDimitry Andric assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
212481ad6265SDimitry Andric
212581ad6265SDimitry Andric bool Changed = false;
212681ad6265SDimitry Andric
212781ad6265SDimitry Andric if (IsVolatile) {
212881ad6265SDimitry Andric // Set L0 and L1 cache policy to be MISS_EVICT for load instructions
212981ad6265SDimitry Andric // and MISS_LRU for store instructions.
213081ad6265SDimitry Andric // Note: there is no L2 cache coherent bypass control at the ISA level.
213181ad6265SDimitry Andric if (Op == SIMemOp::LOAD)
213281ad6265SDimitry Andric Changed |= enableGLCBit(MI);
213381ad6265SDimitry Andric
213481ad6265SDimitry Andric // Set MALL NOALLOC for load and store instructions.
213581ad6265SDimitry Andric Changed |= enableDLCBit(MI);
213681ad6265SDimitry Andric
213781ad6265SDimitry Andric // Ensure operation has completed at system scope to cause all volatile
213881ad6265SDimitry Andric // operations to be visible outside the program in a global order. Do not
213981ad6265SDimitry Andric // request cross address space as only the global address space can be
214081ad6265SDimitry Andric // observable outside the program, so no need to cause a waitcnt for LDS
214181ad6265SDimitry Andric // address space operations.
214281ad6265SDimitry Andric Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
214381ad6265SDimitry Andric Position::AFTER);
214481ad6265SDimitry Andric return Changed;
214581ad6265SDimitry Andric }
214681ad6265SDimitry Andric
214781ad6265SDimitry Andric if (IsNonTemporal) {
214881ad6265SDimitry Andric // For loads setting SLC configures L0 and L1 cache policy to HIT_EVICT
214981ad6265SDimitry Andric // and L2 cache policy to STREAM.
215081ad6265SDimitry Andric // For stores setting both GLC and SLC configures L0 and L1 cache policy
215181ad6265SDimitry Andric // to MISS_EVICT and the L2 cache policy to STREAM.
215281ad6265SDimitry Andric if (Op == SIMemOp::STORE)
215381ad6265SDimitry Andric Changed |= enableGLCBit(MI);
215481ad6265SDimitry Andric Changed |= enableSLCBit(MI);
215581ad6265SDimitry Andric
215681ad6265SDimitry Andric // Set MALL NOALLOC for load and store instructions.
215781ad6265SDimitry Andric Changed |= enableDLCBit(MI);
215881ad6265SDimitry Andric return Changed;
215981ad6265SDimitry Andric }
216081ad6265SDimitry Andric
216181ad6265SDimitry Andric return Changed;
216281ad6265SDimitry Andric }
216381ad6265SDimitry Andric
setTH(const MachineBasicBlock::iterator MI,AMDGPU::CPol::CPol Value) const21647a6dacacSDimitry Andric bool SIGfx12CacheControl::setTH(const MachineBasicBlock::iterator MI,
21657a6dacacSDimitry Andric AMDGPU::CPol::CPol Value) const {
21667a6dacacSDimitry Andric MachineOperand *CPol = TII->getNamedOperand(*MI, OpName::cpol);
21677a6dacacSDimitry Andric if (!CPol)
21687a6dacacSDimitry Andric return false;
21697a6dacacSDimitry Andric
21707a6dacacSDimitry Andric uint64_t NewTH = Value & AMDGPU::CPol::TH;
21717a6dacacSDimitry Andric if ((CPol->getImm() & AMDGPU::CPol::TH) != NewTH) {
21727a6dacacSDimitry Andric CPol->setImm((CPol->getImm() & ~AMDGPU::CPol::TH) | NewTH);
21737a6dacacSDimitry Andric return true;
21747a6dacacSDimitry Andric }
21757a6dacacSDimitry Andric
21767a6dacacSDimitry Andric return false;
21777a6dacacSDimitry Andric }
21787a6dacacSDimitry Andric
setScope(const MachineBasicBlock::iterator MI,AMDGPU::CPol::CPol Value) const21797a6dacacSDimitry Andric bool SIGfx12CacheControl::setScope(const MachineBasicBlock::iterator MI,
21807a6dacacSDimitry Andric AMDGPU::CPol::CPol Value) const {
21817a6dacacSDimitry Andric MachineOperand *CPol = TII->getNamedOperand(*MI, OpName::cpol);
21827a6dacacSDimitry Andric if (!CPol)
21837a6dacacSDimitry Andric return false;
21847a6dacacSDimitry Andric
21857a6dacacSDimitry Andric uint64_t NewScope = Value & AMDGPU::CPol::SCOPE;
21867a6dacacSDimitry Andric if ((CPol->getImm() & AMDGPU::CPol::SCOPE) != NewScope) {
21877a6dacacSDimitry Andric CPol->setImm((CPol->getImm() & ~AMDGPU::CPol::SCOPE) | NewScope);
21887a6dacacSDimitry Andric return true;
21897a6dacacSDimitry Andric }
21907a6dacacSDimitry Andric
21917a6dacacSDimitry Andric return false;
21927a6dacacSDimitry Andric }
21937a6dacacSDimitry Andric
insertWait(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,SIMemOp Op,bool IsCrossAddrSpaceOrdering,Position Pos) const21947a6dacacSDimitry Andric bool SIGfx12CacheControl::insertWait(MachineBasicBlock::iterator &MI,
21957a6dacacSDimitry Andric SIAtomicScope Scope,
21967a6dacacSDimitry Andric SIAtomicAddrSpace AddrSpace, SIMemOp Op,
21977a6dacacSDimitry Andric bool IsCrossAddrSpaceOrdering,
21987a6dacacSDimitry Andric Position Pos) const {
21997a6dacacSDimitry Andric bool Changed = false;
22007a6dacacSDimitry Andric
22017a6dacacSDimitry Andric MachineBasicBlock &MBB = *MI->getParent();
22027a6dacacSDimitry Andric DebugLoc DL = MI->getDebugLoc();
22037a6dacacSDimitry Andric
22047a6dacacSDimitry Andric bool LOADCnt = false;
22057a6dacacSDimitry Andric bool DSCnt = false;
22067a6dacacSDimitry Andric bool STORECnt = false;
22077a6dacacSDimitry Andric
22087a6dacacSDimitry Andric if (Pos == Position::AFTER)
22097a6dacacSDimitry Andric ++MI;
22107a6dacacSDimitry Andric
22117a6dacacSDimitry Andric if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
22127a6dacacSDimitry Andric SIAtomicAddrSpace::NONE) {
22137a6dacacSDimitry Andric switch (Scope) {
22147a6dacacSDimitry Andric case SIAtomicScope::SYSTEM:
22157a6dacacSDimitry Andric case SIAtomicScope::AGENT:
22167a6dacacSDimitry Andric if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
22177a6dacacSDimitry Andric LOADCnt |= true;
22187a6dacacSDimitry Andric if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
22197a6dacacSDimitry Andric STORECnt |= true;
22207a6dacacSDimitry Andric break;
22217a6dacacSDimitry Andric case SIAtomicScope::WORKGROUP:
22227a6dacacSDimitry Andric // In WGP mode the waves of a work-group can be executing on either CU of
22237a6dacacSDimitry Andric // the WGP. Therefore need to wait for operations to complete to ensure
22247a6dacacSDimitry Andric // they are visible to waves in the other CU as the L0 is per CU.
22257a6dacacSDimitry Andric // Otherwise in CU mode and all waves of a work-group are on the same CU
22267a6dacacSDimitry Andric // which shares the same L0.
22277a6dacacSDimitry Andric if (!ST.isCuModeEnabled()) {
22287a6dacacSDimitry Andric if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
22297a6dacacSDimitry Andric LOADCnt |= true;
22307a6dacacSDimitry Andric if ((Op & SIMemOp::STORE) != SIMemOp::NONE)
22317a6dacacSDimitry Andric STORECnt |= true;
22327a6dacacSDimitry Andric }
22337a6dacacSDimitry Andric break;
22347a6dacacSDimitry Andric case SIAtomicScope::WAVEFRONT:
22357a6dacacSDimitry Andric case SIAtomicScope::SINGLETHREAD:
22367a6dacacSDimitry Andric // The L0 cache keeps all memory operations in order for
22377a6dacacSDimitry Andric // work-items in the same wavefront.
22387a6dacacSDimitry Andric break;
22397a6dacacSDimitry Andric default:
22407a6dacacSDimitry Andric llvm_unreachable("Unsupported synchronization scope");
22417a6dacacSDimitry Andric }
22427a6dacacSDimitry Andric }
22437a6dacacSDimitry Andric
22447a6dacacSDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::LDS) != SIAtomicAddrSpace::NONE) {
22457a6dacacSDimitry Andric switch (Scope) {
22467a6dacacSDimitry Andric case SIAtomicScope::SYSTEM:
22477a6dacacSDimitry Andric case SIAtomicScope::AGENT:
22487a6dacacSDimitry Andric case SIAtomicScope::WORKGROUP:
22497a6dacacSDimitry Andric // If no cross address space ordering then an "S_WAITCNT lgkmcnt(0)" is
22507a6dacacSDimitry Andric // not needed as LDS operations for all waves are executed in a total
22517a6dacacSDimitry Andric // global ordering as observed by all waves. Required if also
22527a6dacacSDimitry Andric // synchronizing with global/GDS memory as LDS operations could be
22537a6dacacSDimitry Andric // reordered with respect to later global/GDS memory operations of the
22547a6dacacSDimitry Andric // same wave.
22557a6dacacSDimitry Andric DSCnt |= IsCrossAddrSpaceOrdering;
22567a6dacacSDimitry Andric break;
22577a6dacacSDimitry Andric case SIAtomicScope::WAVEFRONT:
22587a6dacacSDimitry Andric case SIAtomicScope::SINGLETHREAD:
22597a6dacacSDimitry Andric // The LDS keeps all memory operations in order for
22607a6dacacSDimitry Andric // the same wavefront.
22617a6dacacSDimitry Andric break;
22627a6dacacSDimitry Andric default:
22637a6dacacSDimitry Andric llvm_unreachable("Unsupported synchronization scope");
22647a6dacacSDimitry Andric }
22657a6dacacSDimitry Andric }
22667a6dacacSDimitry Andric
22677a6dacacSDimitry Andric if (LOADCnt) {
22687a6dacacSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAIT_BVHCNT_soft)).addImm(0);
22697a6dacacSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAIT_SAMPLECNT_soft)).addImm(0);
22707a6dacacSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAIT_LOADCNT_soft)).addImm(0);
22717a6dacacSDimitry Andric Changed = true;
22727a6dacacSDimitry Andric }
22737a6dacacSDimitry Andric
22747a6dacacSDimitry Andric if (STORECnt) {
22757a6dacacSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAIT_STORECNT_soft)).addImm(0);
22767a6dacacSDimitry Andric Changed = true;
22777a6dacacSDimitry Andric }
22787a6dacacSDimitry Andric
22797a6dacacSDimitry Andric if (DSCnt) {
22807a6dacacSDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAIT_DSCNT_soft)).addImm(0);
22817a6dacacSDimitry Andric Changed = true;
22827a6dacacSDimitry Andric }
22837a6dacacSDimitry Andric
22847a6dacacSDimitry Andric if (Pos == Position::AFTER)
22857a6dacacSDimitry Andric --MI;
22867a6dacacSDimitry Andric
22877a6dacacSDimitry Andric return Changed;
22887a6dacacSDimitry Andric }
22897a6dacacSDimitry Andric
insertAcquire(MachineBasicBlock::iterator & MI,SIAtomicScope Scope,SIAtomicAddrSpace AddrSpace,Position Pos) const22901db9f3b2SDimitry Andric bool SIGfx12CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
22911db9f3b2SDimitry Andric SIAtomicScope Scope,
22921db9f3b2SDimitry Andric SIAtomicAddrSpace AddrSpace,
22931db9f3b2SDimitry Andric Position Pos) const {
22941db9f3b2SDimitry Andric if (!InsertCacheInv)
22951db9f3b2SDimitry Andric return false;
22961db9f3b2SDimitry Andric
22971db9f3b2SDimitry Andric MachineBasicBlock &MBB = *MI->getParent();
22981db9f3b2SDimitry Andric DebugLoc DL = MI->getDebugLoc();
22991db9f3b2SDimitry Andric
23001db9f3b2SDimitry Andric /// The scratch address space does not need the global memory cache
23011db9f3b2SDimitry Andric /// to be flushed as all memory operations by the same thread are
23021db9f3b2SDimitry Andric /// sequentially consistent, and no other thread can access scratch
23031db9f3b2SDimitry Andric /// memory.
23041db9f3b2SDimitry Andric
23051db9f3b2SDimitry Andric /// Other address spaces do not have a cache.
23061db9f3b2SDimitry Andric if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) == SIAtomicAddrSpace::NONE)
23071db9f3b2SDimitry Andric return false;
23081db9f3b2SDimitry Andric
23091db9f3b2SDimitry Andric AMDGPU::CPol::CPol ScopeImm = AMDGPU::CPol::SCOPE_DEV;
23101db9f3b2SDimitry Andric switch (Scope) {
23111db9f3b2SDimitry Andric case SIAtomicScope::SYSTEM:
23121db9f3b2SDimitry Andric ScopeImm = AMDGPU::CPol::SCOPE_SYS;
23131db9f3b2SDimitry Andric break;
23141db9f3b2SDimitry Andric case SIAtomicScope::AGENT:
23151db9f3b2SDimitry Andric ScopeImm = AMDGPU::CPol::SCOPE_DEV;
23161db9f3b2SDimitry Andric break;
23171db9f3b2SDimitry Andric case SIAtomicScope::WORKGROUP:
23181db9f3b2SDimitry Andric // In WGP mode the waves of a work-group can be executing on either CU of
23191db9f3b2SDimitry Andric // the WGP. Therefore we need to invalidate the L0 which is per CU.
23201db9f3b2SDimitry Andric // Otherwise in CU mode all waves of a work-group are on the same CU, and so
23211db9f3b2SDimitry Andric // the L0 does not need to be invalidated.
23221db9f3b2SDimitry Andric if (ST.isCuModeEnabled())
23231db9f3b2SDimitry Andric return false;
23241db9f3b2SDimitry Andric
23251db9f3b2SDimitry Andric ScopeImm = AMDGPU::CPol::SCOPE_SE;
23261db9f3b2SDimitry Andric break;
23271db9f3b2SDimitry Andric case SIAtomicScope::WAVEFRONT:
23281db9f3b2SDimitry Andric case SIAtomicScope::SINGLETHREAD:
23291db9f3b2SDimitry Andric // No cache to invalidate.
23301db9f3b2SDimitry Andric return false;
23311db9f3b2SDimitry Andric default:
23321db9f3b2SDimitry Andric llvm_unreachable("Unsupported synchronization scope");
23331db9f3b2SDimitry Andric }
23341db9f3b2SDimitry Andric
23351db9f3b2SDimitry Andric if (Pos == Position::AFTER)
23361db9f3b2SDimitry Andric ++MI;
23371db9f3b2SDimitry Andric
23381db9f3b2SDimitry Andric BuildMI(MBB, MI, DL, TII->get(AMDGPU::GLOBAL_INV)).addImm(ScopeImm);
23391db9f3b2SDimitry Andric
23401db9f3b2SDimitry Andric if (Pos == Position::AFTER)
23411db9f3b2SDimitry Andric --MI;
23421db9f3b2SDimitry Andric
23431db9f3b2SDimitry Andric return true;
23441db9f3b2SDimitry Andric }
23451db9f3b2SDimitry Andric
enableVolatileAndOrNonTemporal(MachineBasicBlock::iterator & MI,SIAtomicAddrSpace AddrSpace,SIMemOp Op,bool IsVolatile,bool IsNonTemporal) const23467a6dacacSDimitry Andric bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
23477a6dacacSDimitry Andric MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
23487a6dacacSDimitry Andric bool IsVolatile, bool IsNonTemporal) const {
23497a6dacacSDimitry Andric
23507a6dacacSDimitry Andric // Only handle load and store, not atomic read-modify-write instructions.
23517a6dacacSDimitry Andric assert(MI->mayLoad() ^ MI->mayStore());
23527a6dacacSDimitry Andric
23537a6dacacSDimitry Andric // Only update load and store, not LLVM IR atomic read-modify-write
23547a6dacacSDimitry Andric // instructions. The latter are always marked as volatile so cannot sensibly
23557a6dacacSDimitry Andric // handle it as do not want to pessimize all atomics. Also they do not support
23567a6dacacSDimitry Andric // the nontemporal attribute.
23577a6dacacSDimitry Andric assert(Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
23587a6dacacSDimitry Andric
23597a6dacacSDimitry Andric bool Changed = false;
23607a6dacacSDimitry Andric
2361*5678d1d9SDimitry Andric if (IsNonTemporal) {
2362*5678d1d9SDimitry Andric // Set non-temporal hint for all cache levels.
2363*5678d1d9SDimitry Andric Changed |= setTH(MI, AMDGPU::CPol::TH_NT);
2364*5678d1d9SDimitry Andric }
2365*5678d1d9SDimitry Andric
23667a6dacacSDimitry Andric if (IsVolatile) {
23677a6dacacSDimitry Andric Changed |= setScope(MI, AMDGPU::CPol::SCOPE_SYS);
23687a6dacacSDimitry Andric
23697a6dacacSDimitry Andric // Ensure operation has completed at system scope to cause all volatile
23707a6dacacSDimitry Andric // operations to be visible outside the program in a global order. Do not
23717a6dacacSDimitry Andric // request cross address space as only the global address space can be
23727a6dacacSDimitry Andric // observable outside the program, so no need to cause a waitcnt for LDS
23737a6dacacSDimitry Andric // address space operations.
23747a6dacacSDimitry Andric Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false,
23757a6dacacSDimitry Andric Position::AFTER);
23767a6dacacSDimitry Andric }
23777a6dacacSDimitry Andric
23787a6dacacSDimitry Andric return Changed;
23797a6dacacSDimitry Andric }
23807a6dacacSDimitry Andric
removeAtomicPseudoMIs()23810b57cec5SDimitry Andric bool SIMemoryLegalizer::removeAtomicPseudoMIs() {
23820b57cec5SDimitry Andric if (AtomicPseudoMIs.empty())
23830b57cec5SDimitry Andric return false;
23840b57cec5SDimitry Andric
23850b57cec5SDimitry Andric for (auto &MI : AtomicPseudoMIs)
23860b57cec5SDimitry Andric MI->eraseFromParent();
23870b57cec5SDimitry Andric
23880b57cec5SDimitry Andric AtomicPseudoMIs.clear();
23890b57cec5SDimitry Andric return true;
23900b57cec5SDimitry Andric }
23910b57cec5SDimitry Andric
expandLoad(const SIMemOpInfo & MOI,MachineBasicBlock::iterator & MI)23920b57cec5SDimitry Andric bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI,
23930b57cec5SDimitry Andric MachineBasicBlock::iterator &MI) {
23940b57cec5SDimitry Andric assert(MI->mayLoad() && !MI->mayStore());
23950b57cec5SDimitry Andric
23960b57cec5SDimitry Andric bool Changed = false;
23970b57cec5SDimitry Andric
23980b57cec5SDimitry Andric if (MOI.isAtomic()) {
23990b57cec5SDimitry Andric if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
24000b57cec5SDimitry Andric MOI.getOrdering() == AtomicOrdering::Acquire ||
24010b57cec5SDimitry Andric MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
24020b57cec5SDimitry Andric Changed |= CC->enableLoadCacheBypass(MI, MOI.getScope(),
24030b57cec5SDimitry Andric MOI.getOrderingAddrSpace());
24040b57cec5SDimitry Andric }
24050b57cec5SDimitry Andric
24060b57cec5SDimitry Andric if (MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
24070b57cec5SDimitry Andric Changed |= CC->insertWait(MI, MOI.getScope(),
24080b57cec5SDimitry Andric MOI.getOrderingAddrSpace(),
24090b57cec5SDimitry Andric SIMemOp::LOAD | SIMemOp::STORE,
24100b57cec5SDimitry Andric MOI.getIsCrossAddressSpaceOrdering(),
24110b57cec5SDimitry Andric Position::BEFORE);
24120b57cec5SDimitry Andric
24130b57cec5SDimitry Andric if (MOI.getOrdering() == AtomicOrdering::Acquire ||
24140b57cec5SDimitry Andric MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
24150b57cec5SDimitry Andric Changed |= CC->insertWait(MI, MOI.getScope(),
24160b57cec5SDimitry Andric MOI.getInstrAddrSpace(),
24170b57cec5SDimitry Andric SIMemOp::LOAD,
24180b57cec5SDimitry Andric MOI.getIsCrossAddressSpaceOrdering(),
24190b57cec5SDimitry Andric Position::AFTER);
2420e8d8bef9SDimitry Andric Changed |= CC->insertAcquire(MI, MOI.getScope(),
24210b57cec5SDimitry Andric MOI.getOrderingAddrSpace(),
24220b57cec5SDimitry Andric Position::AFTER);
24230b57cec5SDimitry Andric }
24240b57cec5SDimitry Andric
24250b57cec5SDimitry Andric return Changed;
24260b57cec5SDimitry Andric }
24270b57cec5SDimitry Andric
2428e8d8bef9SDimitry Andric // Atomic instructions already bypass caches to the scope specified by the
2429e8d8bef9SDimitry Andric // SyncScope operand. Only non-atomic volatile and nontemporal instructions
2430e8d8bef9SDimitry Andric // need additional treatment.
2431e8d8bef9SDimitry Andric Changed |= CC->enableVolatileAndOrNonTemporal(MI, MOI.getInstrAddrSpace(),
2432e8d8bef9SDimitry Andric SIMemOp::LOAD, MOI.isVolatile(),
2433e8d8bef9SDimitry Andric MOI.isNonTemporal());
24340b57cec5SDimitry Andric return Changed;
24350b57cec5SDimitry Andric }
24360b57cec5SDimitry Andric
expandStore(const SIMemOpInfo & MOI,MachineBasicBlock::iterator & MI)24370b57cec5SDimitry Andric bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI,
24380b57cec5SDimitry Andric MachineBasicBlock::iterator &MI) {
24390b57cec5SDimitry Andric assert(!MI->mayLoad() && MI->mayStore());
24400b57cec5SDimitry Andric
24410b57cec5SDimitry Andric bool Changed = false;
24420b57cec5SDimitry Andric
24430b57cec5SDimitry Andric if (MOI.isAtomic()) {
2444fe6060f1SDimitry Andric if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
2445fe6060f1SDimitry Andric MOI.getOrdering() == AtomicOrdering::Release ||
2446fe6060f1SDimitry Andric MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2447fe6060f1SDimitry Andric Changed |= CC->enableStoreCacheBypass(MI, MOI.getScope(),
2448fe6060f1SDimitry Andric MOI.getOrderingAddrSpace());
2449fe6060f1SDimitry Andric }
2450fe6060f1SDimitry Andric
24510b57cec5SDimitry Andric if (MOI.getOrdering() == AtomicOrdering::Release ||
24520b57cec5SDimitry Andric MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2453e8d8bef9SDimitry Andric Changed |= CC->insertRelease(MI, MOI.getScope(),
24540b57cec5SDimitry Andric MOI.getOrderingAddrSpace(),
24550b57cec5SDimitry Andric MOI.getIsCrossAddressSpaceOrdering(),
24560b57cec5SDimitry Andric Position::BEFORE);
24570b57cec5SDimitry Andric
24580b57cec5SDimitry Andric return Changed;
24590b57cec5SDimitry Andric }
24600b57cec5SDimitry Andric
2461e8d8bef9SDimitry Andric // Atomic instructions already bypass caches to the scope specified by the
2462e8d8bef9SDimitry Andric // SyncScope operand. Only non-atomic volatile and nontemporal instructions
2463e8d8bef9SDimitry Andric // need additional treatment.
2464e8d8bef9SDimitry Andric Changed |= CC->enableVolatileAndOrNonTemporal(
2465e8d8bef9SDimitry Andric MI, MOI.getInstrAddrSpace(), SIMemOp::STORE, MOI.isVolatile(),
2466e8d8bef9SDimitry Andric MOI.isNonTemporal());
24670b57cec5SDimitry Andric return Changed;
24680b57cec5SDimitry Andric }
24690b57cec5SDimitry Andric
expandAtomicFence(const SIMemOpInfo & MOI,MachineBasicBlock::iterator & MI)24700b57cec5SDimitry Andric bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI,
24710b57cec5SDimitry Andric MachineBasicBlock::iterator &MI) {
24720b57cec5SDimitry Andric assert(MI->getOpcode() == AMDGPU::ATOMIC_FENCE);
24730b57cec5SDimitry Andric
24740b57cec5SDimitry Andric AtomicPseudoMIs.push_back(MI);
24750b57cec5SDimitry Andric bool Changed = false;
24760b57cec5SDimitry Andric
24770b57cec5SDimitry Andric if (MOI.isAtomic()) {
247806c3fb27SDimitry Andric if (MOI.getOrdering() == AtomicOrdering::Acquire)
247906c3fb27SDimitry Andric Changed |= CC->insertWait(MI, MOI.getScope(), MOI.getOrderingAddrSpace(),
248006c3fb27SDimitry Andric SIMemOp::LOAD | SIMemOp::STORE,
248106c3fb27SDimitry Andric MOI.getIsCrossAddressSpaceOrdering(),
248206c3fb27SDimitry Andric Position::BEFORE);
248306c3fb27SDimitry Andric
248406c3fb27SDimitry Andric if (MOI.getOrdering() == AtomicOrdering::Release ||
24850b57cec5SDimitry Andric MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
24860b57cec5SDimitry Andric MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
24870b57cec5SDimitry Andric /// TODO: This relies on a barrier always generating a waitcnt
24880b57cec5SDimitry Andric /// for LDS to ensure it is not reordered with the completion of
24890b57cec5SDimitry Andric /// the proceeding LDS operations. If barrier had a memory
24900b57cec5SDimitry Andric /// ordering and memory scope, then library does not need to
24910b57cec5SDimitry Andric /// generate a fence. Could add support in this file for
24920b57cec5SDimitry Andric /// barrier. SIInsertWaitcnt.cpp could then stop unconditionally
2493e8d8bef9SDimitry Andric /// adding S_WAITCNT before a S_BARRIER.
2494e8d8bef9SDimitry Andric Changed |= CC->insertRelease(MI, MOI.getScope(),
24950b57cec5SDimitry Andric MOI.getOrderingAddrSpace(),
24960b57cec5SDimitry Andric MOI.getIsCrossAddressSpaceOrdering(),
24970b57cec5SDimitry Andric Position::BEFORE);
24980b57cec5SDimitry Andric
2499e8d8bef9SDimitry Andric // TODO: If both release and invalidate are happening they could be combined
2500fe6060f1SDimitry Andric // to use the single "BUFFER_WBINV*" instruction. This could be done by
2501e8d8bef9SDimitry Andric // reorganizing this code or as part of optimizing SIInsertWaitcnt pass to
2502e8d8bef9SDimitry Andric // track cache invalidate and write back instructions.
2503e8d8bef9SDimitry Andric
25040b57cec5SDimitry Andric if (MOI.getOrdering() == AtomicOrdering::Acquire ||
25050b57cec5SDimitry Andric MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
25060b57cec5SDimitry Andric MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
2507e8d8bef9SDimitry Andric Changed |= CC->insertAcquire(MI, MOI.getScope(),
25080b57cec5SDimitry Andric MOI.getOrderingAddrSpace(),
25090b57cec5SDimitry Andric Position::BEFORE);
25100b57cec5SDimitry Andric
25110b57cec5SDimitry Andric return Changed;
25120b57cec5SDimitry Andric }
25130b57cec5SDimitry Andric
25140b57cec5SDimitry Andric return Changed;
25150b57cec5SDimitry Andric }
25160b57cec5SDimitry Andric
expandAtomicCmpxchgOrRmw(const SIMemOpInfo & MOI,MachineBasicBlock::iterator & MI)25170b57cec5SDimitry Andric bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
25180b57cec5SDimitry Andric MachineBasicBlock::iterator &MI) {
25190b57cec5SDimitry Andric assert(MI->mayLoad() && MI->mayStore());
25200b57cec5SDimitry Andric
25210b57cec5SDimitry Andric bool Changed = false;
25220b57cec5SDimitry Andric
25230b57cec5SDimitry Andric if (MOI.isAtomic()) {
2524fe6060f1SDimitry Andric if (MOI.getOrdering() == AtomicOrdering::Monotonic ||
2525fe6060f1SDimitry Andric MOI.getOrdering() == AtomicOrdering::Acquire ||
2526fe6060f1SDimitry Andric MOI.getOrdering() == AtomicOrdering::Release ||
2527fe6060f1SDimitry Andric MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
2528fe6060f1SDimitry Andric MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent) {
2529fe6060f1SDimitry Andric Changed |= CC->enableRMWCacheBypass(MI, MOI.getScope(),
2530fe6060f1SDimitry Andric MOI.getInstrAddrSpace());
2531fe6060f1SDimitry Andric }
2532fe6060f1SDimitry Andric
25330b57cec5SDimitry Andric if (MOI.getOrdering() == AtomicOrdering::Release ||
25340b57cec5SDimitry Andric MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
25350b57cec5SDimitry Andric MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
25360b57cec5SDimitry Andric MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent)
2537e8d8bef9SDimitry Andric Changed |= CC->insertRelease(MI, MOI.getScope(),
25380b57cec5SDimitry Andric MOI.getOrderingAddrSpace(),
25390b57cec5SDimitry Andric MOI.getIsCrossAddressSpaceOrdering(),
25400b57cec5SDimitry Andric Position::BEFORE);
25410b57cec5SDimitry Andric
25420b57cec5SDimitry Andric if (MOI.getOrdering() == AtomicOrdering::Acquire ||
25430b57cec5SDimitry Andric MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
25440b57cec5SDimitry Andric MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent ||
25450b57cec5SDimitry Andric MOI.getFailureOrdering() == AtomicOrdering::Acquire ||
25460b57cec5SDimitry Andric MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) {
25470b57cec5SDimitry Andric Changed |= CC->insertWait(MI, MOI.getScope(),
2548fe6060f1SDimitry Andric MOI.getInstrAddrSpace(),
25490b57cec5SDimitry Andric isAtomicRet(*MI) ? SIMemOp::LOAD :
25500b57cec5SDimitry Andric SIMemOp::STORE,
25510b57cec5SDimitry Andric MOI.getIsCrossAddressSpaceOrdering(),
25520b57cec5SDimitry Andric Position::AFTER);
2553e8d8bef9SDimitry Andric Changed |= CC->insertAcquire(MI, MOI.getScope(),
25540b57cec5SDimitry Andric MOI.getOrderingAddrSpace(),
25550b57cec5SDimitry Andric Position::AFTER);
25560b57cec5SDimitry Andric }
25570b57cec5SDimitry Andric
25580b57cec5SDimitry Andric return Changed;
25590b57cec5SDimitry Andric }
25600b57cec5SDimitry Andric
25610b57cec5SDimitry Andric return Changed;
25620b57cec5SDimitry Andric }
25630b57cec5SDimitry Andric
runOnMachineFunction(MachineFunction & MF)25640b57cec5SDimitry Andric bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) {
25650b57cec5SDimitry Andric bool Changed = false;
25660b57cec5SDimitry Andric
25670b57cec5SDimitry Andric SIMemOpAccess MOA(MF);
25680b57cec5SDimitry Andric CC = SICacheControl::create(MF.getSubtarget<GCNSubtarget>());
25690b57cec5SDimitry Andric
25700b57cec5SDimitry Andric for (auto &MBB : MF) {
25710b57cec5SDimitry Andric for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) {
25725ffd83dbSDimitry Andric
2573e8d8bef9SDimitry Andric // Unbundle instructions after the post-RA scheduler.
2574fe6060f1SDimitry Andric if (MI->isBundle() && MI->mayLoadOrStore()) {
25755ffd83dbSDimitry Andric MachineBasicBlock::instr_iterator II(MI->getIterator());
25765ffd83dbSDimitry Andric for (MachineBasicBlock::instr_iterator I = ++II, E = MBB.instr_end();
25775ffd83dbSDimitry Andric I != E && I->isBundledWithPred(); ++I) {
25785ffd83dbSDimitry Andric I->unbundleFromPred();
25795ffd83dbSDimitry Andric for (MachineOperand &MO : I->operands())
25805ffd83dbSDimitry Andric if (MO.isReg())
25815ffd83dbSDimitry Andric MO.setIsInternalRead(false);
25825ffd83dbSDimitry Andric }
25835ffd83dbSDimitry Andric
25845ffd83dbSDimitry Andric MI->eraseFromParent();
25855ffd83dbSDimitry Andric MI = II->getIterator();
25865ffd83dbSDimitry Andric }
25875ffd83dbSDimitry Andric
25880b57cec5SDimitry Andric if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic))
25890b57cec5SDimitry Andric continue;
25900b57cec5SDimitry Andric
25910b57cec5SDimitry Andric if (const auto &MOI = MOA.getLoadInfo(MI))
2592bdd1243dSDimitry Andric Changed |= expandLoad(*MOI, MI);
259306c3fb27SDimitry Andric else if (const auto &MOI = MOA.getStoreInfo(MI)) {
2594bdd1243dSDimitry Andric Changed |= expandStore(*MOI, MI);
259506c3fb27SDimitry Andric Changed |= CC->tryForceStoreSC0SC1(*MOI, MI);
259606c3fb27SDimitry Andric } else if (const auto &MOI = MOA.getAtomicFenceInfo(MI))
2597bdd1243dSDimitry Andric Changed |= expandAtomicFence(*MOI, MI);
25980b57cec5SDimitry Andric else if (const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI))
2599bdd1243dSDimitry Andric Changed |= expandAtomicCmpxchgOrRmw(*MOI, MI);
26000b57cec5SDimitry Andric }
26010b57cec5SDimitry Andric }
26020b57cec5SDimitry Andric
26030b57cec5SDimitry Andric Changed |= removeAtomicPseudoMIs();
26040b57cec5SDimitry Andric return Changed;
26050b57cec5SDimitry Andric }
26060b57cec5SDimitry Andric
26070b57cec5SDimitry Andric INITIALIZE_PASS(SIMemoryLegalizer, DEBUG_TYPE, PASS_NAME, false, false)
26080b57cec5SDimitry Andric
26090b57cec5SDimitry Andric char SIMemoryLegalizer::ID = 0;
26100b57cec5SDimitry Andric char &llvm::SIMemoryLegalizerID = SIMemoryLegalizer::ID;
26110b57cec5SDimitry Andric
createSIMemoryLegalizerPass()26120b57cec5SDimitry Andric FunctionPass *llvm::createSIMemoryLegalizerPass() {
26130b57cec5SDimitry Andric return new SIMemoryLegalizer();
26140b57cec5SDimitry Andric }
2615