xref: /src/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1b1c73532SDimitry Andric //===- AMDGPUInsertSingleUseVDST.cpp - Insert s_singleuse_vdst instructions ==//
2b1c73532SDimitry Andric //
3b1c73532SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4b1c73532SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5b1c73532SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6b1c73532SDimitry Andric //
7b1c73532SDimitry Andric //===----------------------------------------------------------------------===//
8b1c73532SDimitry Andric //
9b1c73532SDimitry Andric /// \file
10b1c73532SDimitry Andric /// Insert s_singleuse_vdst instructions on GFX11.5+ to mark regions of VALU
11b1c73532SDimitry Andric /// instructions that produce single-use VGPR values. If the value is forwarded
12b1c73532SDimitry Andric /// to the consumer instruction prior to VGPR writeback, the hardware can
13b1c73532SDimitry Andric /// then skip (kill) the VGPR write.
14b1c73532SDimitry Andric //
15b1c73532SDimitry Andric //===----------------------------------------------------------------------===//
16b1c73532SDimitry Andric 
17b1c73532SDimitry Andric #include "AMDGPU.h"
18ac9a064cSDimitry Andric #include "AMDGPUGenSearchableTables.inc"
19b1c73532SDimitry Andric #include "GCNSubtarget.h"
20b1c73532SDimitry Andric #include "SIInstrInfo.h"
21ac9a064cSDimitry Andric #include "SIRegisterInfo.h"
22b1c73532SDimitry Andric #include "llvm/ADT/DenseMap.h"
23b1c73532SDimitry Andric #include "llvm/ADT/STLExtras.h"
24ac9a064cSDimitry Andric #include "llvm/ADT/SmallVector.h"
25b1c73532SDimitry Andric #include "llvm/ADT/StringRef.h"
26b1c73532SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
27b1c73532SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
28b1c73532SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
29b1c73532SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
30b1c73532SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
31b1c73532SDimitry Andric #include "llvm/CodeGen/MachineOperand.h"
32b1c73532SDimitry Andric #include "llvm/CodeGen/Register.h"
33b1c73532SDimitry Andric #include "llvm/IR/DebugLoc.h"
34b1c73532SDimitry Andric #include "llvm/MC/MCRegister.h"
35ac9a064cSDimitry Andric #include "llvm/MC/MCRegisterInfo.h"
36b1c73532SDimitry Andric #include "llvm/Pass.h"
37ac9a064cSDimitry Andric #include <array>
38b1c73532SDimitry Andric 
39b1c73532SDimitry Andric using namespace llvm;
40b1c73532SDimitry Andric 
41b1c73532SDimitry Andric #define DEBUG_TYPE "amdgpu-insert-single-use-vdst"
42b1c73532SDimitry Andric 
43b1c73532SDimitry Andric namespace {
44b1c73532SDimitry Andric class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
45b1c73532SDimitry Andric private:
46b1c73532SDimitry Andric   const SIInstrInfo *SII;
47ac9a064cSDimitry Andric   class SingleUseInstruction {
48ac9a064cSDimitry Andric   private:
49ac9a064cSDimitry Andric     static const unsigned MaxSkipRange = 0b111;
50ac9a064cSDimitry Andric     static const unsigned MaxNumberOfSkipRegions = 2;
51ac9a064cSDimitry Andric 
52ac9a064cSDimitry Andric     unsigned LastEncodedPositionEnd;
53ac9a064cSDimitry Andric     MachineInstr *ProducerInstr;
54ac9a064cSDimitry Andric 
55ac9a064cSDimitry Andric     std::array<unsigned, MaxNumberOfSkipRegions + 1> SingleUseRegions;
56ac9a064cSDimitry Andric     SmallVector<unsigned, MaxNumberOfSkipRegions> SkipRegions;
57ac9a064cSDimitry Andric 
58ac9a064cSDimitry Andric     // Adds a skip region into the instruction.
skip(const unsigned ProducerPosition)59ac9a064cSDimitry Andric     void skip(const unsigned ProducerPosition) {
60ac9a064cSDimitry Andric       while (LastEncodedPositionEnd + MaxSkipRange < ProducerPosition) {
61ac9a064cSDimitry Andric         SkipRegions.push_back(MaxSkipRange);
62ac9a064cSDimitry Andric         LastEncodedPositionEnd += MaxSkipRange;
63ac9a064cSDimitry Andric       }
64ac9a064cSDimitry Andric       SkipRegions.push_back(ProducerPosition - LastEncodedPositionEnd);
65ac9a064cSDimitry Andric       LastEncodedPositionEnd = ProducerPosition;
66ac9a064cSDimitry Andric     }
67ac9a064cSDimitry Andric 
currentRegionHasSpace()68ac9a064cSDimitry Andric     bool currentRegionHasSpace() {
69ac9a064cSDimitry Andric       const auto Region = SkipRegions.size();
70ac9a064cSDimitry Andric       // The first region has an extra bit of encoding space.
71ac9a064cSDimitry Andric       return SingleUseRegions[Region] <
72ac9a064cSDimitry Andric              ((Region == MaxNumberOfSkipRegions) ? 0b1111U : 0b111U);
73ac9a064cSDimitry Andric     }
74ac9a064cSDimitry Andric 
encodeImm()75ac9a064cSDimitry Andric     unsigned encodeImm() {
76ac9a064cSDimitry Andric       // Handle the first Single Use Region separately as it has an extra bit
77ac9a064cSDimitry Andric       // of encoding space.
78ac9a064cSDimitry Andric       unsigned Imm = SingleUseRegions[SkipRegions.size()];
79ac9a064cSDimitry Andric       unsigned ShiftAmount = 4;
80ac9a064cSDimitry Andric       for (unsigned i = SkipRegions.size(); i > 0; i--) {
81ac9a064cSDimitry Andric         Imm |= SkipRegions[i - 1] << ShiftAmount;
82ac9a064cSDimitry Andric         ShiftAmount += 3;
83ac9a064cSDimitry Andric         Imm |= SingleUseRegions[i - 1] << ShiftAmount;
84ac9a064cSDimitry Andric         ShiftAmount += 3;
85ac9a064cSDimitry Andric       }
86ac9a064cSDimitry Andric       return Imm;
87ac9a064cSDimitry Andric     }
88ac9a064cSDimitry Andric 
89ac9a064cSDimitry Andric   public:
SingleUseInstruction(const unsigned ProducerPosition,MachineInstr * Producer)90ac9a064cSDimitry Andric     SingleUseInstruction(const unsigned ProducerPosition,
91ac9a064cSDimitry Andric                          MachineInstr *Producer)
92ac9a064cSDimitry Andric         : LastEncodedPositionEnd(ProducerPosition + 1), ProducerInstr(Producer),
93ac9a064cSDimitry Andric           SingleUseRegions({1, 0, 0}) {}
94ac9a064cSDimitry Andric 
95ac9a064cSDimitry Andric     // Returns false if adding a new single use producer failed. This happens
96ac9a064cSDimitry Andric     // because it could not be encoded, either because there is no room to
97ac9a064cSDimitry Andric     // encode another single use producer region or that this single use
98ac9a064cSDimitry Andric     // producer is too far away to encode the amount of instructions to skip.
tryAddProducer(const unsigned ProducerPosition,MachineInstr * MI)99ac9a064cSDimitry Andric     bool tryAddProducer(const unsigned ProducerPosition, MachineInstr *MI) {
100ac9a064cSDimitry Andric       // Producer is too far away to encode into this instruction or another
101ac9a064cSDimitry Andric       // skip region is needed and SkipRegions.size() = 2 so there's no room for
102ac9a064cSDimitry Andric       // another skip region, therefore a new instruction is needed.
103ac9a064cSDimitry Andric       if (LastEncodedPositionEnd +
104ac9a064cSDimitry Andric               (MaxSkipRange * (MaxNumberOfSkipRegions - SkipRegions.size())) <
105ac9a064cSDimitry Andric           ProducerPosition)
106ac9a064cSDimitry Andric         return false;
107ac9a064cSDimitry Andric 
108ac9a064cSDimitry Andric       // If a skip region is needed.
109ac9a064cSDimitry Andric       if (LastEncodedPositionEnd != ProducerPosition ||
110ac9a064cSDimitry Andric           !currentRegionHasSpace()) {
111ac9a064cSDimitry Andric         // If the current region is out of space therefore a skip region would
112ac9a064cSDimitry Andric         // be needed, but there is no room for another skip region.
113ac9a064cSDimitry Andric         if (SkipRegions.size() == MaxNumberOfSkipRegions)
114ac9a064cSDimitry Andric           return false;
115ac9a064cSDimitry Andric         skip(ProducerPosition);
116ac9a064cSDimitry Andric       }
117ac9a064cSDimitry Andric 
118ac9a064cSDimitry Andric       SingleUseRegions[SkipRegions.size()]++;
119ac9a064cSDimitry Andric       LastEncodedPositionEnd = ProducerPosition + 1;
120ac9a064cSDimitry Andric       ProducerInstr = MI;
121ac9a064cSDimitry Andric       return true;
122ac9a064cSDimitry Andric     }
123ac9a064cSDimitry Andric 
emit(const SIInstrInfo * SII)124ac9a064cSDimitry Andric     auto emit(const SIInstrInfo *SII) {
125ac9a064cSDimitry Andric       return BuildMI(*ProducerInstr->getParent(), ProducerInstr, DebugLoc(),
126ac9a064cSDimitry Andric                      SII->get(AMDGPU::S_SINGLEUSE_VDST))
127ac9a064cSDimitry Andric           .addImm(encodeImm());
128ac9a064cSDimitry Andric     }
129ac9a064cSDimitry Andric   };
130b1c73532SDimitry Andric 
131b1c73532SDimitry Andric public:
132b1c73532SDimitry Andric   static char ID;
133b1c73532SDimitry Andric 
AMDGPUInsertSingleUseVDST()134b1c73532SDimitry Andric   AMDGPUInsertSingleUseVDST() : MachineFunctionPass(ID) {}
135b1c73532SDimitry Andric 
insertSingleUseInstructions(ArrayRef<std::pair<unsigned,MachineInstr * >> SingleUseProducers) const136ac9a064cSDimitry Andric   void insertSingleUseInstructions(
137ac9a064cSDimitry Andric       ArrayRef<std::pair<unsigned, MachineInstr *>> SingleUseProducers) const {
138ac9a064cSDimitry Andric     SmallVector<SingleUseInstruction> Instructions;
139ac9a064cSDimitry Andric 
140ac9a064cSDimitry Andric     for (auto &[Position, MI] : SingleUseProducers) {
141ac9a064cSDimitry Andric       // Encode this position into the last single use instruction if possible.
142ac9a064cSDimitry Andric       if (Instructions.empty() ||
143ac9a064cSDimitry Andric           !Instructions.back().tryAddProducer(Position, MI)) {
144ac9a064cSDimitry Andric         // If not, add a new instruction.
145ac9a064cSDimitry Andric         Instructions.push_back(SingleUseInstruction(Position, MI));
146ac9a064cSDimitry Andric       }
147ac9a064cSDimitry Andric     }
148ac9a064cSDimitry Andric 
149ac9a064cSDimitry Andric     for (auto &Instruction : Instructions)
150ac9a064cSDimitry Andric       Instruction.emit(SII);
151b1c73532SDimitry Andric   }
152b1c73532SDimitry Andric 
runOnMachineFunction(MachineFunction & MF)153b1c73532SDimitry Andric   bool runOnMachineFunction(MachineFunction &MF) override {
154b1c73532SDimitry Andric     const auto &ST = MF.getSubtarget<GCNSubtarget>();
155b1c73532SDimitry Andric     if (!ST.hasVGPRSingleUseHintInsts())
156b1c73532SDimitry Andric       return false;
157b1c73532SDimitry Andric 
158b1c73532SDimitry Andric     SII = ST.getInstrInfo();
159b1c73532SDimitry Andric     const auto *TRI = &SII->getRegisterInfo();
160b1c73532SDimitry Andric     bool InstructionEmitted = false;
161b1c73532SDimitry Andric 
162b1c73532SDimitry Andric     for (MachineBasicBlock &MBB : MF) {
163ac9a064cSDimitry Andric       DenseMap<MCRegUnit, unsigned> RegisterUseCount;
164b1c73532SDimitry Andric 
165b1c73532SDimitry Andric       // Handle boundaries at the end of basic block separately to avoid
166b1c73532SDimitry Andric       // false positives. If they are live at the end of a basic block then
167b1c73532SDimitry Andric       // assume it has more uses later on.
168ac9a064cSDimitry Andric       for (const auto &Liveout : MBB.liveouts()) {
169ac9a064cSDimitry Andric         for (MCRegUnitMaskIterator Units(Liveout.PhysReg, TRI); Units.isValid();
170ac9a064cSDimitry Andric              ++Units) {
171ac9a064cSDimitry Andric           const auto [Unit, Mask] = *Units;
172ac9a064cSDimitry Andric           if ((Mask & Liveout.LaneMask).any())
173ac9a064cSDimitry Andric             RegisterUseCount[Unit] = 2;
174ac9a064cSDimitry Andric         }
175ac9a064cSDimitry Andric       }
176b1c73532SDimitry Andric 
177ac9a064cSDimitry Andric       SmallVector<std::pair<unsigned, MachineInstr *>>
178ac9a064cSDimitry Andric           SingleUseProducerPositions;
179ac9a064cSDimitry Andric 
180ac9a064cSDimitry Andric       unsigned VALUInstrCount = 0;
181b1c73532SDimitry Andric       for (MachineInstr &MI : reverse(MBB.instrs())) {
182b1c73532SDimitry Andric         // All registers in all operands need to be single use for an
183b1c73532SDimitry Andric         // instruction to be marked as a single use producer.
184b1c73532SDimitry Andric         bool AllProducerOperandsAreSingleUse = true;
185b1c73532SDimitry Andric 
186ac9a064cSDimitry Andric         // Gather a list of Registers used before updating use counts to avoid
187ac9a064cSDimitry Andric         // double counting registers that appear multiple times in a single
188ac9a064cSDimitry Andric         // MachineInstr.
189ac9a064cSDimitry Andric         SmallVector<MCRegUnit> RegistersUsed;
190ac9a064cSDimitry Andric 
191ac9a064cSDimitry Andric         for (const auto &Operand : MI.all_defs()) {
192ac9a064cSDimitry Andric           const auto Reg = Operand.getReg();
193ac9a064cSDimitry Andric 
194ac9a064cSDimitry Andric           const auto RegUnits = TRI->regunits(Reg);
195ac9a064cSDimitry Andric           if (any_of(RegUnits, [&RegisterUseCount](const MCRegUnit Unit) {
196ac9a064cSDimitry Andric                 return RegisterUseCount[Unit] > 1;
197ac9a064cSDimitry Andric               }))
198ac9a064cSDimitry Andric             AllProducerOperandsAreSingleUse = false;
199ac9a064cSDimitry Andric 
200ac9a064cSDimitry Andric           // Reset uses count when a register is no longer live.
201ac9a064cSDimitry Andric           for (const MCRegUnit Unit : RegUnits)
202ac9a064cSDimitry Andric             RegisterUseCount.erase(Unit);
203ac9a064cSDimitry Andric         }
204ac9a064cSDimitry Andric 
205ac9a064cSDimitry Andric         for (const auto &Operand : MI.all_uses()) {
206b1c73532SDimitry Andric           const auto Reg = Operand.getReg();
207b1c73532SDimitry Andric 
208b1c73532SDimitry Andric           // Count the number of times each register is read.
209ac9a064cSDimitry Andric           for (const MCRegUnit Unit : TRI->regunits(Reg)) {
210ac9a064cSDimitry Andric             if (!is_contained(RegistersUsed, Unit))
211ac9a064cSDimitry Andric               RegistersUsed.push_back(Unit);
212ac9a064cSDimitry Andric           }
213ac9a064cSDimitry Andric         }
214ac9a064cSDimitry Andric         for (const MCRegUnit Unit : RegistersUsed)
215ac9a064cSDimitry Andric           RegisterUseCount[Unit]++;
216b1c73532SDimitry Andric 
217b1c73532SDimitry Andric         // Do not attempt to optimise across exec mask changes.
218ac9a064cSDimitry Andric         if (MI.modifiesRegister(AMDGPU::EXEC, TRI) ||
219ac9a064cSDimitry Andric             AMDGPU::isInvalidSingleUseConsumerInst(MI.getOpcode())) {
220b1c73532SDimitry Andric           for (auto &UsedReg : RegisterUseCount)
221b1c73532SDimitry Andric             UsedReg.second = 2;
222b1c73532SDimitry Andric         }
223b1c73532SDimitry Andric 
224ac9a064cSDimitry Andric         if (!SIInstrInfo::isVALU(MI) ||
225ac9a064cSDimitry Andric             AMDGPU::isInvalidSingleUseProducerInst(MI.getOpcode()))
226b1c73532SDimitry Andric           continue;
227ac9a064cSDimitry Andric         if (AllProducerOperandsAreSingleUse) {
228ac9a064cSDimitry Andric           SingleUseProducerPositions.push_back({VALUInstrCount, &MI});
229b1c73532SDimitry Andric           InstructionEmitted = true;
230b1c73532SDimitry Andric         }
231ac9a064cSDimitry Andric         VALUInstrCount++;
232b1c73532SDimitry Andric       }
233ac9a064cSDimitry Andric       insertSingleUseInstructions(SingleUseProducerPositions);
234b1c73532SDimitry Andric     }
235b1c73532SDimitry Andric     return InstructionEmitted;
236b1c73532SDimitry Andric   }
237b1c73532SDimitry Andric };
238b1c73532SDimitry Andric } // namespace
239b1c73532SDimitry Andric 
240b1c73532SDimitry Andric char AMDGPUInsertSingleUseVDST::ID = 0;
241b1c73532SDimitry Andric 
242b1c73532SDimitry Andric char &llvm::AMDGPUInsertSingleUseVDSTID = AMDGPUInsertSingleUseVDST::ID;
243b1c73532SDimitry Andric 
244b1c73532SDimitry Andric INITIALIZE_PASS(AMDGPUInsertSingleUseVDST, DEBUG_TYPE,
245b1c73532SDimitry Andric                 "AMDGPU Insert SingleUseVDST", false, false)
246