xref: /src/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
1c0981da4SDimitry Andric //===-- AMDGPUPromoteKernelArguments.cpp ----------------------------------===//
2c0981da4SDimitry Andric //
3c0981da4SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4c0981da4SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5c0981da4SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6c0981da4SDimitry Andric //
7c0981da4SDimitry Andric //===----------------------------------------------------------------------===//
8c0981da4SDimitry Andric //
9c0981da4SDimitry Andric /// \file This pass recursively promotes generic pointer arguments of a kernel
10c0981da4SDimitry Andric /// into the global address space.
11c0981da4SDimitry Andric ///
12c0981da4SDimitry Andric /// The pass walks kernel's pointer arguments, then loads from them. If a loaded
13c0981da4SDimitry Andric /// value is a pointer and loaded pointer is unmodified in the kernel before the
14c0981da4SDimitry Andric /// load, then promote loaded pointer to global. Then recursively continue.
15c0981da4SDimitry Andric //
16c0981da4SDimitry Andric //===----------------------------------------------------------------------===//
17c0981da4SDimitry Andric 
18c0981da4SDimitry Andric #include "AMDGPU.h"
19145449b1SDimitry Andric #include "Utils/AMDGPUMemoryUtils.h"
20c0981da4SDimitry Andric #include "llvm/ADT/SmallVector.h"
21145449b1SDimitry Andric #include "llvm/Analysis/AliasAnalysis.h"
22c0981da4SDimitry Andric #include "llvm/Analysis/MemorySSA.h"
23c0981da4SDimitry Andric #include "llvm/IR/IRBuilder.h"
24c0981da4SDimitry Andric #include "llvm/InitializePasses.h"
25c0981da4SDimitry Andric 
26c0981da4SDimitry Andric #define DEBUG_TYPE "amdgpu-promote-kernel-arguments"
27c0981da4SDimitry Andric 
28c0981da4SDimitry Andric using namespace llvm;
29c0981da4SDimitry Andric 
30c0981da4SDimitry Andric namespace {
31c0981da4SDimitry Andric 
32c0981da4SDimitry Andric class AMDGPUPromoteKernelArguments : public FunctionPass {
33c0981da4SDimitry Andric   MemorySSA *MSSA;
34c0981da4SDimitry Andric 
35145449b1SDimitry Andric   AliasAnalysis *AA;
36145449b1SDimitry Andric 
37c0981da4SDimitry Andric   Instruction *ArgCastInsertPt;
38c0981da4SDimitry Andric 
39c0981da4SDimitry Andric   SmallVector<Value *> Ptrs;
40c0981da4SDimitry Andric 
41c0981da4SDimitry Andric   void enqueueUsers(Value *Ptr);
42c0981da4SDimitry Andric 
43c0981da4SDimitry Andric   bool promotePointer(Value *Ptr);
44c0981da4SDimitry Andric 
45145449b1SDimitry Andric   bool promoteLoad(LoadInst *LI);
46145449b1SDimitry Andric 
47c0981da4SDimitry Andric public:
48c0981da4SDimitry Andric   static char ID;
49c0981da4SDimitry Andric 
AMDGPUPromoteKernelArguments()50c0981da4SDimitry Andric   AMDGPUPromoteKernelArguments() : FunctionPass(ID) {}
51c0981da4SDimitry Andric 
52145449b1SDimitry Andric   bool run(Function &F, MemorySSA &MSSA, AliasAnalysis &AA);
53c0981da4SDimitry Andric 
54c0981da4SDimitry Andric   bool runOnFunction(Function &F) override;
55c0981da4SDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const56c0981da4SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
57145449b1SDimitry Andric     AU.addRequired<AAResultsWrapperPass>();
58c0981da4SDimitry Andric     AU.addRequired<MemorySSAWrapperPass>();
59c0981da4SDimitry Andric     AU.setPreservesAll();
60c0981da4SDimitry Andric   }
61c0981da4SDimitry Andric };
62c0981da4SDimitry Andric 
63c0981da4SDimitry Andric } // end anonymous namespace
64c0981da4SDimitry Andric 
enqueueUsers(Value * Ptr)65c0981da4SDimitry Andric void AMDGPUPromoteKernelArguments::enqueueUsers(Value *Ptr) {
66c0981da4SDimitry Andric   SmallVector<User *> PtrUsers(Ptr->users());
67c0981da4SDimitry Andric 
68c0981da4SDimitry Andric   while (!PtrUsers.empty()) {
69c0981da4SDimitry Andric     Instruction *U = dyn_cast<Instruction>(PtrUsers.pop_back_val());
70c0981da4SDimitry Andric     if (!U)
71c0981da4SDimitry Andric       continue;
72c0981da4SDimitry Andric 
73c0981da4SDimitry Andric     switch (U->getOpcode()) {
74c0981da4SDimitry Andric     default:
75c0981da4SDimitry Andric       break;
76c0981da4SDimitry Andric     case Instruction::Load: {
77c0981da4SDimitry Andric       LoadInst *LD = cast<LoadInst>(U);
78145449b1SDimitry Andric       if (LD->getPointerOperand()->stripInBoundsOffsets() == Ptr &&
79145449b1SDimitry Andric           !AMDGPU::isClobberedInFunction(LD, MSSA, AA))
80c0981da4SDimitry Andric         Ptrs.push_back(LD);
81145449b1SDimitry Andric 
82c0981da4SDimitry Andric       break;
83c0981da4SDimitry Andric     }
84c0981da4SDimitry Andric     case Instruction::GetElementPtr:
85c0981da4SDimitry Andric     case Instruction::AddrSpaceCast:
86c0981da4SDimitry Andric     case Instruction::BitCast:
87c0981da4SDimitry Andric       if (U->getOperand(0)->stripInBoundsOffsets() == Ptr)
88c0981da4SDimitry Andric         PtrUsers.append(U->user_begin(), U->user_end());
89c0981da4SDimitry Andric       break;
90c0981da4SDimitry Andric     }
91c0981da4SDimitry Andric   }
92c0981da4SDimitry Andric }
93c0981da4SDimitry Andric 
promotePointer(Value * Ptr)94c0981da4SDimitry Andric bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) {
95145449b1SDimitry Andric   bool Changed = false;
96145449b1SDimitry Andric 
97145449b1SDimitry Andric   LoadInst *LI = dyn_cast<LoadInst>(Ptr);
98145449b1SDimitry Andric   if (LI)
99145449b1SDimitry Andric     Changed |= promoteLoad(LI);
100145449b1SDimitry Andric 
101145449b1SDimitry Andric   PointerType *PT = dyn_cast<PointerType>(Ptr->getType());
102145449b1SDimitry Andric   if (!PT)
103145449b1SDimitry Andric     return Changed;
104145449b1SDimitry Andric 
105145449b1SDimitry Andric   if (PT->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||
106145449b1SDimitry Andric       PT->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
107145449b1SDimitry Andric       PT->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS)
108c0981da4SDimitry Andric     enqueueUsers(Ptr);
109c0981da4SDimitry Andric 
110c0981da4SDimitry Andric   if (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS)
111145449b1SDimitry Andric     return Changed;
112c0981da4SDimitry Andric 
113145449b1SDimitry Andric   IRBuilder<> B(LI ? &*std::next(cast<Instruction>(Ptr)->getIterator())
114145449b1SDimitry Andric                    : ArgCastInsertPt);
115c0981da4SDimitry Andric 
116c0981da4SDimitry Andric   // Cast pointer to global address space and back to flat and let
117c0981da4SDimitry Andric   // Infer Address Spaces pass to do all necessary rewriting.
118c0981da4SDimitry Andric   PointerType *NewPT =
1197fa27ce4SDimitry Andric       PointerType::get(PT->getContext(), AMDGPUAS::GLOBAL_ADDRESS);
120c0981da4SDimitry Andric   Value *Cast =
121c0981da4SDimitry Andric       B.CreateAddrSpaceCast(Ptr, NewPT, Twine(Ptr->getName(), ".global"));
122c0981da4SDimitry Andric   Value *CastBack =
123c0981da4SDimitry Andric       B.CreateAddrSpaceCast(Cast, PT, Twine(Ptr->getName(), ".flat"));
124c0981da4SDimitry Andric   Ptr->replaceUsesWithIf(CastBack,
125c0981da4SDimitry Andric                          [Cast](Use &U) { return U.getUser() != Cast; });
126c0981da4SDimitry Andric 
127c0981da4SDimitry Andric   return true;
128c0981da4SDimitry Andric }
129c0981da4SDimitry Andric 
promoteLoad(LoadInst * LI)130145449b1SDimitry Andric bool AMDGPUPromoteKernelArguments::promoteLoad(LoadInst *LI) {
131145449b1SDimitry Andric   if (!LI->isSimple())
132145449b1SDimitry Andric     return false;
133145449b1SDimitry Andric 
134145449b1SDimitry Andric   LI->setMetadata("amdgpu.noclobber", MDNode::get(LI->getContext(), {}));
135145449b1SDimitry Andric   return true;
136145449b1SDimitry Andric }
137145449b1SDimitry Andric 
138c0981da4SDimitry Andric // skip allocas
getInsertPt(BasicBlock & BB)139c0981da4SDimitry Andric static BasicBlock::iterator getInsertPt(BasicBlock &BB) {
140c0981da4SDimitry Andric   BasicBlock::iterator InsPt = BB.getFirstInsertionPt();
141c0981da4SDimitry Andric   for (BasicBlock::iterator E = BB.end(); InsPt != E; ++InsPt) {
142c0981da4SDimitry Andric     AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt);
143c0981da4SDimitry Andric 
144c0981da4SDimitry Andric     // If this is a dynamic alloca, the value may depend on the loaded kernargs,
145c0981da4SDimitry Andric     // so loads will need to be inserted before it.
146c0981da4SDimitry Andric     if (!AI || !AI->isStaticAlloca())
147c0981da4SDimitry Andric       break;
148c0981da4SDimitry Andric   }
149c0981da4SDimitry Andric 
150c0981da4SDimitry Andric   return InsPt;
151c0981da4SDimitry Andric }
152c0981da4SDimitry Andric 
run(Function & F,MemorySSA & MSSA,AliasAnalysis & AA)153145449b1SDimitry Andric bool AMDGPUPromoteKernelArguments::run(Function &F, MemorySSA &MSSA,
154145449b1SDimitry Andric                                        AliasAnalysis &AA) {
155c0981da4SDimitry Andric   if (skipFunction(F))
156c0981da4SDimitry Andric     return false;
157c0981da4SDimitry Andric 
158c0981da4SDimitry Andric   CallingConv::ID CC = F.getCallingConv();
159c0981da4SDimitry Andric   if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty())
160c0981da4SDimitry Andric     return false;
161c0981da4SDimitry Andric 
162c0981da4SDimitry Andric   ArgCastInsertPt = &*getInsertPt(*F.begin());
163c0981da4SDimitry Andric   this->MSSA = &MSSA;
164145449b1SDimitry Andric   this->AA = &AA;
165c0981da4SDimitry Andric 
166c0981da4SDimitry Andric   for (Argument &Arg : F.args()) {
167c0981da4SDimitry Andric     if (Arg.use_empty())
168c0981da4SDimitry Andric       continue;
169c0981da4SDimitry Andric 
170c0981da4SDimitry Andric     PointerType *PT = dyn_cast<PointerType>(Arg.getType());
171c0981da4SDimitry Andric     if (!PT || (PT->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS &&
172c0981da4SDimitry Andric                 PT->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS &&
173c0981da4SDimitry Andric                 PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS))
174c0981da4SDimitry Andric       continue;
175c0981da4SDimitry Andric 
176c0981da4SDimitry Andric     Ptrs.push_back(&Arg);
177c0981da4SDimitry Andric   }
178c0981da4SDimitry Andric 
179c0981da4SDimitry Andric   bool Changed = false;
180c0981da4SDimitry Andric   while (!Ptrs.empty()) {
181c0981da4SDimitry Andric     Value *Ptr = Ptrs.pop_back_val();
182c0981da4SDimitry Andric     Changed |= promotePointer(Ptr);
183c0981da4SDimitry Andric   }
184c0981da4SDimitry Andric 
185c0981da4SDimitry Andric   return Changed;
186c0981da4SDimitry Andric }
187c0981da4SDimitry Andric 
runOnFunction(Function & F)188c0981da4SDimitry Andric bool AMDGPUPromoteKernelArguments::runOnFunction(Function &F) {
189c0981da4SDimitry Andric   MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
190145449b1SDimitry Andric   AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
191145449b1SDimitry Andric   return run(F, MSSA, AA);
192c0981da4SDimitry Andric }
193c0981da4SDimitry Andric 
194c0981da4SDimitry Andric INITIALIZE_PASS_BEGIN(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
195c0981da4SDimitry Andric                       "AMDGPU Promote Kernel Arguments", false, false)
196145449b1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
197c0981da4SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
198c0981da4SDimitry Andric INITIALIZE_PASS_END(AMDGPUPromoteKernelArguments, DEBUG_TYPE,
199c0981da4SDimitry Andric                     "AMDGPU Promote Kernel Arguments", false, false)
200c0981da4SDimitry Andric 
201c0981da4SDimitry Andric char AMDGPUPromoteKernelArguments::ID = 0;
202c0981da4SDimitry Andric 
createAMDGPUPromoteKernelArgumentsPass()203c0981da4SDimitry Andric FunctionPass *llvm::createAMDGPUPromoteKernelArgumentsPass() {
204c0981da4SDimitry Andric   return new AMDGPUPromoteKernelArguments();
205c0981da4SDimitry Andric }
206c0981da4SDimitry Andric 
207c0981da4SDimitry Andric PreservedAnalyses
run(Function & F,FunctionAnalysisManager & AM)208c0981da4SDimitry Andric AMDGPUPromoteKernelArgumentsPass::run(Function &F,
209c0981da4SDimitry Andric                                       FunctionAnalysisManager &AM) {
210c0981da4SDimitry Andric   MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
211145449b1SDimitry Andric   AliasAnalysis &AA = AM.getResult<AAManager>(F);
212145449b1SDimitry Andric   if (AMDGPUPromoteKernelArguments().run(F, MSSA, AA)) {
213c0981da4SDimitry Andric     PreservedAnalyses PA;
214c0981da4SDimitry Andric     PA.preserveSet<CFGAnalyses>();
215c0981da4SDimitry Andric     PA.preserve<MemorySSAAnalysis>();
216c0981da4SDimitry Andric     return PA;
217c0981da4SDimitry Andric   }
218c0981da4SDimitry Andric   return PreservedAnalyses::all();
219c0981da4SDimitry Andric }
220