xref: /src/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp (revision 349cc55c9796c4596a5b9904cd3281af295f878f)
1044eb2f6SDimitry Andric //===- AMDGPUAnnotateKernelFeaturesPass.cpp -------------------------------===//
2dd58ef01SDimitry Andric //
3e6d15924SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e6d15924SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e6d15924SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6dd58ef01SDimitry Andric //
7dd58ef01SDimitry Andric //===----------------------------------------------------------------------===//
8dd58ef01SDimitry Andric //
9c0981da4SDimitry Andric /// \file This pass propagates the uniform-work-group-size attribute from
10c0981da4SDimitry Andric /// kernels to leaf functions when possible. It also adds additional attributes
11c0981da4SDimitry Andric /// to hint ABI lowering optimizations later.
12dd58ef01SDimitry Andric //
13dd58ef01SDimitry Andric //===----------------------------------------------------------------------===//
14dd58ef01SDimitry Andric 
15dd58ef01SDimitry Andric #include "AMDGPU.h"
16b60736ecSDimitry Andric #include "GCNSubtarget.h"
17044eb2f6SDimitry Andric #include "llvm/Analysis/CallGraph.h"
1893c91e39SDimitry Andric #include "llvm/Analysis/CallGraphSCCPass.h"
19b5630dbaSDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
20b60736ecSDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
21b60736ecSDimitry Andric #include "llvm/IR/IntrinsicsR600.h"
22044eb2f6SDimitry Andric #include "llvm/Target/TargetMachine.h"
23dd58ef01SDimitry Andric 
24dd58ef01SDimitry Andric #define DEBUG_TYPE "amdgpu-annotate-kernel-features"
25dd58ef01SDimitry Andric 
26dd58ef01SDimitry Andric using namespace llvm;
27dd58ef01SDimitry Andric 
28dd58ef01SDimitry Andric namespace {
2993c91e39SDimitry Andric class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
30dd58ef01SDimitry Andric private:
3193c91e39SDimitry Andric   const TargetMachine *TM = nullptr;
3201095a5dSDimitry Andric 
3393c91e39SDimitry Andric   bool addFeatureAttributes(Function &F);
34dd58ef01SDimitry Andric 
35dd58ef01SDimitry Andric public:
36dd58ef01SDimitry Andric   static char ID;
37dd58ef01SDimitry Andric 
AMDGPUAnnotateKernelFeatures()3893c91e39SDimitry Andric   AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {}
3993c91e39SDimitry Andric 
4093c91e39SDimitry Andric   bool doInitialization(CallGraph &CG) override;
4193c91e39SDimitry Andric   bool runOnSCC(CallGraphSCC &SCC) override;
42044eb2f6SDimitry Andric 
getPassName() const43b915e9e0SDimitry Andric   StringRef getPassName() const override {
44dd58ef01SDimitry Andric     return "AMDGPU Annotate Kernel Features";
45dd58ef01SDimitry Andric   }
46dd58ef01SDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const47dd58ef01SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
48dd58ef01SDimitry Andric     AU.setPreservesAll();
4993c91e39SDimitry Andric     CallGraphSCCPass::getAnalysisUsage(AU);
50dd58ef01SDimitry Andric   }
51dd58ef01SDimitry Andric };
52dd58ef01SDimitry Andric 
53044eb2f6SDimitry Andric } // end anonymous namespace
54dd58ef01SDimitry Andric 
55dd58ef01SDimitry Andric char AMDGPUAnnotateKernelFeatures::ID = 0;
56dd58ef01SDimitry Andric 
57dd58ef01SDimitry Andric char &llvm::AMDGPUAnnotateKernelFeaturesID = AMDGPUAnnotateKernelFeatures::ID;
58dd58ef01SDimitry Andric 
5901095a5dSDimitry Andric INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
60dd58ef01SDimitry Andric                 "Add AMDGPU function attributes", false, false)
61dd58ef01SDimitry Andric 
addFeatureAttributes(Function & F)6293c91e39SDimitry Andric bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
63cfca06d7SDimitry Andric   bool HaveStackObjects = false;
6493c91e39SDimitry Andric   bool Changed = false;
6593c91e39SDimitry Andric   bool HaveCall = false;
6693c91e39SDimitry Andric   bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv());
6793c91e39SDimitry Andric 
6893c91e39SDimitry Andric   for (BasicBlock &BB : F) {
6993c91e39SDimitry Andric     for (Instruction &I : BB) {
70cfca06d7SDimitry Andric       if (isa<AllocaInst>(I)) {
71cfca06d7SDimitry Andric         HaveStackObjects = true;
72cfca06d7SDimitry Andric         continue;
73cfca06d7SDimitry Andric       }
74cfca06d7SDimitry Andric 
75cfca06d7SDimitry Andric       if (auto *CB = dyn_cast<CallBase>(&I)) {
76cfca06d7SDimitry Andric         const Function *Callee =
77cfca06d7SDimitry Andric             dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
7893c91e39SDimitry Andric 
79c0981da4SDimitry Andric         // Note the occurrence of indirect call.
8093c91e39SDimitry Andric         if (!Callee) {
81c0981da4SDimitry Andric           if (!CB->isInlineAsm())
8293c91e39SDimitry Andric             HaveCall = true;
83c0981da4SDimitry Andric 
8493c91e39SDimitry Andric           continue;
8593c91e39SDimitry Andric         }
8693c91e39SDimitry Andric 
8793c91e39SDimitry Andric         Intrinsic::ID IID = Callee->getIntrinsicID();
8893c91e39SDimitry Andric         if (IID == Intrinsic::not_intrinsic) {
8993c91e39SDimitry Andric           HaveCall = true;
9093c91e39SDimitry Andric           Changed = true;
9193c91e39SDimitry Andric         }
9293c91e39SDimitry Andric       }
9393c91e39SDimitry Andric     }
94cfca06d7SDimitry Andric   }
9593c91e39SDimitry Andric 
9693c91e39SDimitry Andric   // TODO: We could refine this to captured pointers that could possibly be
9793c91e39SDimitry Andric   // accessed by flat instructions. For now this is mostly a poor way of
9893c91e39SDimitry Andric   // estimating whether there are calls before argument lowering.
99cfca06d7SDimitry Andric   if (!IsFunc && HaveCall) {
100cfca06d7SDimitry Andric     F.addFnAttr("amdgpu-calls");
101cfca06d7SDimitry Andric     Changed = true;
102cfca06d7SDimitry Andric   }
103cfca06d7SDimitry Andric 
104cfca06d7SDimitry Andric   if (HaveStackObjects) {
105cfca06d7SDimitry Andric     F.addFnAttr("amdgpu-stack-objects");
10693c91e39SDimitry Andric     Changed = true;
10793c91e39SDimitry Andric   }
10893c91e39SDimitry Andric 
10993c91e39SDimitry Andric   return Changed;
11093c91e39SDimitry Andric }
11193c91e39SDimitry Andric 
runOnSCC(CallGraphSCC & SCC)11293c91e39SDimitry Andric bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
11393c91e39SDimitry Andric   bool Changed = false;
114e6d15924SDimitry Andric 
11593c91e39SDimitry Andric   for (CallGraphNode *I : SCC) {
11693c91e39SDimitry Andric     Function *F = I->getFunction();
117344a3780SDimitry Andric     // Ignore functions with graphics calling conventions, these are currently
118344a3780SDimitry Andric     // not allowed to have kernel arguments.
119344a3780SDimitry Andric     if (!F || F->isDeclaration() || AMDGPU::isGraphics(F->getCallingConv()))
12093c91e39SDimitry Andric       continue;
121344a3780SDimitry Andric     // Add feature attributes
12293c91e39SDimitry Andric     Changed |= addFeatureAttributes(*F);
12393c91e39SDimitry Andric   }
12493c91e39SDimitry Andric 
12593c91e39SDimitry Andric   return Changed;
12693c91e39SDimitry Andric }
12793c91e39SDimitry Andric 
doInitialization(CallGraph & CG)12893c91e39SDimitry Andric bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
12993c91e39SDimitry Andric   auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
13093c91e39SDimitry Andric   if (!TPC)
13193c91e39SDimitry Andric     report_fatal_error("TargetMachine is required");
13293c91e39SDimitry Andric 
13393c91e39SDimitry Andric   TM = &TPC->getTM<TargetMachine>();
13401095a5dSDimitry Andric   return false;
13501095a5dSDimitry Andric }
13601095a5dSDimitry Andric 
createAMDGPUAnnotateKernelFeaturesPass()13793c91e39SDimitry Andric Pass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
138b5630dbaSDimitry Andric   return new AMDGPUAnnotateKernelFeatures();
139dd58ef01SDimitry Andric }
140