xref: /src/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1b915e9e0SDimitry Andric //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
2b915e9e0SDimitry Andric //
3e6d15924SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e6d15924SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e6d15924SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6b915e9e0SDimitry Andric //
7b915e9e0SDimitry Andric //===----------------------------------------------------------------------===//
8b915e9e0SDimitry Andric 
94a16efa3SDimitry Andric #include "AMDGPUMachineFunction.h"
10145449b1SDimitry Andric #include "AMDGPU.h"
11eb11fae6SDimitry Andric #include "AMDGPUPerfHintAnalysis.h"
12b60736ecSDimitry Andric #include "AMDGPUSubtarget.h"
137fa27ce4SDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
14eb11fae6SDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h"
157fa27ce4SDimitry Andric #include "llvm/IR/ConstantRange.h"
164b4fe385SDimitry Andric #include "llvm/IR/Constants.h"
177fa27ce4SDimitry Andric #include "llvm/IR/Metadata.h"
18b60736ecSDimitry Andric #include "llvm/Target/TargetMachine.h"
1901095a5dSDimitry Andric 
20f8af5cf6SDimitry Andric using namespace llvm;
214a16efa3SDimitry Andric 
22aca2e42cSDimitry Andric static const GlobalVariable *
getKernelDynLDSGlobalFromFunction(const Function & F)23aca2e42cSDimitry Andric getKernelDynLDSGlobalFromFunction(const Function &F) {
24aca2e42cSDimitry Andric   const Module *M = F.getParent();
25aca2e42cSDimitry Andric   SmallString<64> KernelDynLDSName("llvm.amdgcn.");
26aca2e42cSDimitry Andric   KernelDynLDSName += F.getName();
27aca2e42cSDimitry Andric   KernelDynLDSName += ".dynlds";
28aca2e42cSDimitry Andric   return M->getNamedGlobal(KernelDynLDSName);
29aca2e42cSDimitry Andric }
30aca2e42cSDimitry Andric 
hasLDSKernelArgument(const Function & F)31aca2e42cSDimitry Andric static bool hasLDSKernelArgument(const Function &F) {
32aca2e42cSDimitry Andric   for (const Argument &Arg : F.args()) {
33aca2e42cSDimitry Andric     Type *ArgTy = Arg.getType();
34aca2e42cSDimitry Andric     if (auto PtrTy = dyn_cast<PointerType>(ArgTy)) {
35aca2e42cSDimitry Andric       if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
36aca2e42cSDimitry Andric         return true;
37aca2e42cSDimitry Andric     }
38aca2e42cSDimitry Andric   }
39aca2e42cSDimitry Andric   return false;
40aca2e42cSDimitry Andric }
41aca2e42cSDimitry Andric 
AMDGPUMachineFunction(const Function & F,const AMDGPUSubtarget & ST)42e3b55780SDimitry Andric AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F,
43e3b55780SDimitry Andric                                              const AMDGPUSubtarget &ST)
44e3b55780SDimitry Andric     : IsEntryFunction(AMDGPU::isEntryFunctionCC(F.getCallingConv())),
45b60736ecSDimitry Andric       IsModuleEntryFunction(
46e3b55780SDimitry Andric           AMDGPU::isModuleEntryFunctionCC(F.getCallingConv())),
47ac9a064cSDimitry Andric       IsChainFunction(AMDGPU::isChainCC(F.getCallingConv())) {
48eb11fae6SDimitry Andric 
49b915e9e0SDimitry Andric   // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
50b915e9e0SDimitry Andric   // except reserved size is not correctly aligned.
51eb11fae6SDimitry Andric 
52e6d15924SDimitry Andric   Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");
53344a3780SDimitry Andric   MemoryBound = MemBoundAttr.getValueAsBool();
54e6d15924SDimitry Andric 
55e6d15924SDimitry Andric   Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
56344a3780SDimitry Andric   WaveLimiter = WaveLimitAttr.getValueAsBool();
57eb11fae6SDimitry Andric 
58145449b1SDimitry Andric   // FIXME: How is this attribute supposed to interact with statically known
59145449b1SDimitry Andric   // global sizes?
60145449b1SDimitry Andric   StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
61145449b1SDimitry Andric   if (!S.empty())
62145449b1SDimitry Andric     S.consumeInteger(0, GDSSize);
63145449b1SDimitry Andric 
64145449b1SDimitry Andric   // Assume the attribute allocates before any known GDS globals.
65145449b1SDimitry Andric   StaticGDSSize = GDSSize;
66145449b1SDimitry Andric 
677fa27ce4SDimitry Andric   // Second value, if present, is the maximum value that can be assigned.
687fa27ce4SDimitry Andric   // Useful in PromoteAlloca or for LDS spills. Could be used for diagnostics
697fa27ce4SDimitry Andric   // during codegen.
707fa27ce4SDimitry Andric   std::pair<unsigned, unsigned> LDSSizeRange = AMDGPU::getIntegerPairAttribute(
717fa27ce4SDimitry Andric       F, "amdgpu-lds-size", {0, UINT32_MAX}, true);
727fa27ce4SDimitry Andric 
737fa27ce4SDimitry Andric   // The two separate variables are only profitable when the LDS module lowering
747fa27ce4SDimitry Andric   // pass is disabled. If graphics does not use dynamic LDS, this is never
757fa27ce4SDimitry Andric   // profitable. Leaving cleanup for a later change.
767fa27ce4SDimitry Andric   LDSSize = LDSSizeRange.first;
777fa27ce4SDimitry Andric   StaticLDSSize = LDSSize;
787fa27ce4SDimitry Andric 
79eb11fae6SDimitry Andric   CallingConv::ID CC = F.getCallingConv();
80eb11fae6SDimitry Andric   if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
81eb11fae6SDimitry Andric     ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign);
82e3b55780SDimitry Andric 
83e3b55780SDimitry Andric   // FIXME: Shouldn't be target specific
84e3b55780SDimitry Andric   Attribute NSZAttr = F.getFnAttribute("no-signed-zeros-fp-math");
85e3b55780SDimitry Andric   NoSignedZerosFPMath =
86e3b55780SDimitry Andric       NSZAttr.isStringAttribute() && NSZAttr.getValueAsString() == "true";
87aca2e42cSDimitry Andric 
88aca2e42cSDimitry Andric   const GlobalVariable *DynLdsGlobal = getKernelDynLDSGlobalFromFunction(F);
89aca2e42cSDimitry Andric   if (DynLdsGlobal || hasLDSKernelArgument(F))
90aca2e42cSDimitry Andric     UsesDynamicLDS = true;
9101095a5dSDimitry Andric }
924a16efa3SDimitry Andric 
allocateLDSGlobal(const DataLayout & DL,const GlobalVariable & GV,Align Trailing)93b915e9e0SDimitry Andric unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
94e3b55780SDimitry Andric                                                   const GlobalVariable &GV,
95e3b55780SDimitry Andric                                                   Align Trailing) {
96e3b55780SDimitry Andric   auto Entry = LocalMemoryObjects.insert(std::pair(&GV, 0));
97b915e9e0SDimitry Andric   if (!Entry.second)
98b915e9e0SDimitry Andric     return Entry.first->second;
99b915e9e0SDimitry Andric 
100cfca06d7SDimitry Andric   Align Alignment =
101cfca06d7SDimitry Andric       DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
102b915e9e0SDimitry Andric 
103145449b1SDimitry Andric   unsigned Offset;
104145449b1SDimitry Andric   if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
1057fa27ce4SDimitry Andric 
1067fa27ce4SDimitry Andric     std::optional<uint32_t> MaybeAbs = getLDSAbsoluteAddress(GV);
1077fa27ce4SDimitry Andric     if (MaybeAbs) {
1087fa27ce4SDimitry Andric       // Absolute address LDS variables that exist prior to the LDS lowering
1097fa27ce4SDimitry Andric       // pass raise a fatal error in that pass. These failure modes are only
1107fa27ce4SDimitry Andric       // reachable if that lowering pass is disabled or broken. If/when adding
1117fa27ce4SDimitry Andric       // support for absolute addresses on user specified variables, the
1127fa27ce4SDimitry Andric       // alignment check moves to the lowering pass and the frame calculation
1137fa27ce4SDimitry Andric       // needs to take the user variables into consideration.
1147fa27ce4SDimitry Andric 
1157fa27ce4SDimitry Andric       uint32_t ObjectStart = *MaybeAbs;
1167fa27ce4SDimitry Andric 
1177fa27ce4SDimitry Andric       if (ObjectStart != alignTo(ObjectStart, Alignment)) {
1187fa27ce4SDimitry Andric         report_fatal_error("Absolute address LDS variable inconsistent with "
1197fa27ce4SDimitry Andric                            "variable alignment");
1207fa27ce4SDimitry Andric       }
1217fa27ce4SDimitry Andric 
1227fa27ce4SDimitry Andric       if (isModuleEntryFunction()) {
1237fa27ce4SDimitry Andric         // If this is a module entry function, we can also sanity check against
1247fa27ce4SDimitry Andric         // the static frame. Strictly it would be better to check against the
1257fa27ce4SDimitry Andric         // attribute, i.e. that the variable is within the always-allocated
1267fa27ce4SDimitry Andric         // section, and not within some other non-absolute-address object
1277fa27ce4SDimitry Andric         // allocated here, but the extra error detection is minimal and we would
1287fa27ce4SDimitry Andric         // have to pass the Function around or cache the attribute value.
1297fa27ce4SDimitry Andric         uint32_t ObjectEnd =
1307fa27ce4SDimitry Andric             ObjectStart + DL.getTypeAllocSize(GV.getValueType());
1317fa27ce4SDimitry Andric         if (ObjectEnd > StaticLDSSize) {
1327fa27ce4SDimitry Andric           report_fatal_error(
1337fa27ce4SDimitry Andric               "Absolute address LDS variable outside of static frame");
1347fa27ce4SDimitry Andric         }
1357fa27ce4SDimitry Andric       }
1367fa27ce4SDimitry Andric 
1377fa27ce4SDimitry Andric       Entry.first->second = ObjectStart;
1387fa27ce4SDimitry Andric       return ObjectStart;
1397fa27ce4SDimitry Andric     }
1407fa27ce4SDimitry Andric 
141b915e9e0SDimitry Andric     /// TODO: We should sort these to minimize wasted space due to alignment
142b915e9e0SDimitry Andric     /// padding. Currently the padding is decided by the first encountered use
143b915e9e0SDimitry Andric     /// during lowering.
144145449b1SDimitry Andric     Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment);
145b915e9e0SDimitry Andric 
146b60736ecSDimitry Andric     StaticLDSSize += DL.getTypeAllocSize(GV.getValueType());
147b60736ecSDimitry Andric 
148e3b55780SDimitry Andric     // Align LDS size to trailing, e.g. for aligning dynamic shared memory
149e3b55780SDimitry Andric     LDSSize = alignTo(StaticLDSSize, Trailing);
150145449b1SDimitry Andric   } else {
151145449b1SDimitry Andric     assert(GV.getAddressSpace() == AMDGPUAS::REGION_ADDRESS &&
152145449b1SDimitry Andric            "expected region address space");
153b915e9e0SDimitry Andric 
154145449b1SDimitry Andric     Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment);
155145449b1SDimitry Andric     StaticGDSSize += DL.getTypeAllocSize(GV.getValueType());
156145449b1SDimitry Andric 
157145449b1SDimitry Andric     // FIXME: Apply alignment of dynamic GDS
158145449b1SDimitry Andric     GDSSize = StaticGDSSize;
159145449b1SDimitry Andric   }
160145449b1SDimitry Andric 
161145449b1SDimitry Andric   Entry.first->second = Offset;
162b915e9e0SDimitry Andric   return Offset;
1634a16efa3SDimitry Andric }
164b60736ecSDimitry Andric 
165e3b55780SDimitry Andric std::optional<uint32_t>
getLDSKernelIdMetadata(const Function & F)1664b4fe385SDimitry Andric AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) {
1677fa27ce4SDimitry Andric   // TODO: Would be more consistent with the abs symbols to use a range
1687fa27ce4SDimitry Andric   MDNode *MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");
1694b4fe385SDimitry Andric   if (MD && MD->getNumOperands() == 1) {
1707fa27ce4SDimitry Andric     if (ConstantInt *KnownSize =
1717fa27ce4SDimitry Andric             mdconst::extract<ConstantInt>(MD->getOperand(0))) {
1727fa27ce4SDimitry Andric       uint64_t ZExt = KnownSize->getZExtValue();
1737fa27ce4SDimitry Andric       if (ZExt <= UINT32_MAX) {
1747fa27ce4SDimitry Andric         return ZExt;
1754b4fe385SDimitry Andric       }
1764b4fe385SDimitry Andric     }
1774b4fe385SDimitry Andric   }
1784b4fe385SDimitry Andric   return {};
1794b4fe385SDimitry Andric }
1804b4fe385SDimitry Andric 
1817fa27ce4SDimitry Andric std::optional<uint32_t>
getLDSAbsoluteAddress(const GlobalValue & GV)1827fa27ce4SDimitry Andric AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue &GV) {
1837fa27ce4SDimitry Andric   if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
1847fa27ce4SDimitry Andric     return {};
1857fa27ce4SDimitry Andric 
1867fa27ce4SDimitry Andric   std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange();
1877fa27ce4SDimitry Andric   if (!AbsSymRange)
1887fa27ce4SDimitry Andric     return {};
1897fa27ce4SDimitry Andric 
1907fa27ce4SDimitry Andric   if (const APInt *V = AbsSymRange->getSingleElement()) {
1917fa27ce4SDimitry Andric     std::optional<uint64_t> ZExt = V->tryZExtValue();
1927fa27ce4SDimitry Andric     if (ZExt && (*ZExt <= UINT32_MAX)) {
1937fa27ce4SDimitry Andric       return *ZExt;
1947fa27ce4SDimitry Andric     }
1957fa27ce4SDimitry Andric   }
1967fa27ce4SDimitry Andric 
1977fa27ce4SDimitry Andric   return {};
1987fa27ce4SDimitry Andric }
1997fa27ce4SDimitry Andric 
setDynLDSAlign(const Function & F,const GlobalVariable & GV)2007fa27ce4SDimitry Andric void AMDGPUMachineFunction::setDynLDSAlign(const Function &F,
201b60736ecSDimitry Andric                                            const GlobalVariable &GV) {
2027fa27ce4SDimitry Andric   const Module *M = F.getParent();
2037fa27ce4SDimitry Andric   const DataLayout &DL = M->getDataLayout();
204b60736ecSDimitry Andric   assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
205b60736ecSDimitry Andric 
206b60736ecSDimitry Andric   Align Alignment =
207b60736ecSDimitry Andric       DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
208b60736ecSDimitry Andric   if (Alignment <= DynLDSAlign)
209b60736ecSDimitry Andric     return;
210b60736ecSDimitry Andric 
211b60736ecSDimitry Andric   LDSSize = alignTo(StaticLDSSize, Alignment);
212b60736ecSDimitry Andric   DynLDSAlign = Alignment;
2137fa27ce4SDimitry Andric 
2147fa27ce4SDimitry Andric   // If there is a dynamic LDS variable associated with this function F, every
2157fa27ce4SDimitry Andric   // further dynamic LDS instance (allocated by calling setDynLDSAlign) must
2167fa27ce4SDimitry Andric   // map to the same address. This holds because no LDS is allocated after the
2177fa27ce4SDimitry Andric   // lowering pass if there are dynamic LDS variables present.
2187fa27ce4SDimitry Andric   const GlobalVariable *Dyn = getKernelDynLDSGlobalFromFunction(F);
2197fa27ce4SDimitry Andric   if (Dyn) {
2207fa27ce4SDimitry Andric     unsigned Offset = LDSSize; // return this?
2217fa27ce4SDimitry Andric     std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*Dyn);
2227fa27ce4SDimitry Andric     if (!Expect || (Offset != *Expect)) {
2237fa27ce4SDimitry Andric       report_fatal_error("Inconsistent metadata on dynamic LDS variable");
2247fa27ce4SDimitry Andric     }
2257fa27ce4SDimitry Andric   }
226b60736ecSDimitry Andric }
227aca2e42cSDimitry Andric 
setUsesDynamicLDS(bool DynLDS)228aca2e42cSDimitry Andric void AMDGPUMachineFunction::setUsesDynamicLDS(bool DynLDS) {
229aca2e42cSDimitry Andric   UsesDynamicLDS = DynLDS;
230aca2e42cSDimitry Andric }
231aca2e42cSDimitry Andric 
isDynamicLDSUsed() const232aca2e42cSDimitry Andric bool AMDGPUMachineFunction::isDynamicLDSUsed() const { return UsesDynamicLDS; }
233