xref: /src/contrib/llvm-project/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1044eb2f6SDimitry Andric //===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
201095a5dSDimitry Andric //
3e6d15924SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e6d15924SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e6d15924SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
601095a5dSDimitry Andric //
701095a5dSDimitry Andric //===----------------------------------------------------------------------===//
801095a5dSDimitry Andric //
901095a5dSDimitry Andric //===----------------------------------------------------------------------===//
1001095a5dSDimitry Andric //
1101095a5dSDimitry Andric /// \file
1201095a5dSDimitry Andric ///
1301095a5dSDimitry Andric /// This file contains definition for AMDGPU ISA disassembler
1401095a5dSDimitry Andric //
1501095a5dSDimitry Andric //===----------------------------------------------------------------------===//
1601095a5dSDimitry Andric 
1701095a5dSDimitry Andric // ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
1801095a5dSDimitry Andric 
19044eb2f6SDimitry Andric #include "Disassembler/AMDGPUDisassembler.h"
20eb11fae6SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21145449b1SDimitry Andric #include "SIDefines.h"
22145449b1SDimitry Andric #include "SIRegisterInfo.h"
23e6d15924SDimitry Andric #include "TargetInfo/AMDGPUTargetInfo.h"
24ac9a064cSDimitry Andric #include "Utils/AMDGPUAsmUtils.h"
2501095a5dSDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
26b60736ecSDimitry Andric #include "llvm-c/DisassemblerTypes.h"
27145449b1SDimitry Andric #include "llvm/BinaryFormat/ELF.h"
28e6d15924SDimitry Andric #include "llvm/MC/MCAsmInfo.h"
2901095a5dSDimitry Andric #include "llvm/MC/MCContext.h"
30145449b1SDimitry Andric #include "llvm/MC/MCDecoderOps.h"
31044eb2f6SDimitry Andric #include "llvm/MC/MCExpr.h"
32c0981da4SDimitry Andric #include "llvm/MC/MCInstrDesc.h"
33145449b1SDimitry Andric #include "llvm/MC/MCRegisterInfo.h"
34145449b1SDimitry Andric #include "llvm/MC/MCSubtargetInfo.h"
35145449b1SDimitry Andric #include "llvm/MC/TargetRegistry.h"
36b60736ecSDimitry Andric #include "llvm/Support/AMDHSAKernelDescriptor.h"
3701095a5dSDimitry Andric 
3801095a5dSDimitry Andric using namespace llvm;
3901095a5dSDimitry Andric 
4001095a5dSDimitry Andric #define DEBUG_TYPE "amdgpu-disassembler"
4101095a5dSDimitry Andric 
42b60736ecSDimitry Andric #define SGPR_MAX                                                               \
43b60736ecSDimitry Andric   (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10                           \
44e6d15924SDimitry Andric                  : AMDGPU::EncValues::SGPR_MAX_SI)
45e6d15924SDimitry Andric 
46044eb2f6SDimitry Andric using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
4701095a5dSDimitry Andric 
addDefaultWaveSize(const MCSubtargetInfo & STI,MCContext & Ctx)48ac9a064cSDimitry Andric static const MCSubtargetInfo &addDefaultWaveSize(const MCSubtargetInfo &STI,
49ac9a064cSDimitry Andric                                                  MCContext &Ctx) {
50ac9a064cSDimitry Andric   if (!STI.hasFeature(AMDGPU::FeatureWavefrontSize64) &&
51ac9a064cSDimitry Andric       !STI.hasFeature(AMDGPU::FeatureWavefrontSize32)) {
52ac9a064cSDimitry Andric     MCSubtargetInfo &STICopy = Ctx.getSubtargetCopy(STI);
53ac9a064cSDimitry Andric     // If there is no default wave size it must be a generation before gfx10,
54ac9a064cSDimitry Andric     // these have FeatureWavefrontSize64 in their definition already. For gfx10+
55ac9a064cSDimitry Andric     // set wave32 as a default.
56ac9a064cSDimitry Andric     STICopy.ToggleFeature(AMDGPU::FeatureWavefrontSize32);
57ac9a064cSDimitry Andric     return STICopy;
58ac9a064cSDimitry Andric   }
59ac9a064cSDimitry Andric 
60ac9a064cSDimitry Andric   return STI;
61ac9a064cSDimitry Andric }
62ac9a064cSDimitry Andric 
AMDGPUDisassembler(const MCSubtargetInfo & STI,MCContext & Ctx,MCInstrInfo const * MCII)63e6d15924SDimitry Andric AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
647fa27ce4SDimitry Andric                                        MCContext &Ctx, MCInstrInfo const *MCII)
65ac9a064cSDimitry Andric     : MCDisassembler(addDefaultWaveSize(STI, Ctx), Ctx), MCII(MCII),
66ac9a064cSDimitry Andric       MRI(*Ctx.getRegisterInfo()), MAI(*Ctx.getAsmInfo()),
67ac9a064cSDimitry Andric       TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
68ac9a064cSDimitry Andric       CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
69e6d15924SDimitry Andric   // ToDo: AMDGPUDisassembler supports only VI ISA.
707fa27ce4SDimitry Andric   if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
71e6d15924SDimitry Andric     report_fatal_error("Disassembly not yet supported for subtarget");
72ac9a064cSDimitry Andric 
73ac9a064cSDimitry Andric   for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
74ac9a064cSDimitry Andric     createConstantSymbolExpr(Symbol, Code);
75ac9a064cSDimitry Andric 
76ac9a064cSDimitry Andric   UCVersionW64Expr = createConstantSymbolExpr("UC_VERSION_W64_BIT", 0x2000);
77ac9a064cSDimitry Andric   UCVersionW32Expr = createConstantSymbolExpr("UC_VERSION_W32_BIT", 0x4000);
78ac9a064cSDimitry Andric   UCVersionMDPExpr = createConstantSymbolExpr("UC_VERSION_MDP_BIT", 0x8000);
79ac9a064cSDimitry Andric }
80ac9a064cSDimitry Andric 
setABIVersion(unsigned Version)81ac9a064cSDimitry Andric void AMDGPUDisassembler::setABIVersion(unsigned Version) {
82ac9a064cSDimitry Andric   CodeObjectVersion = AMDGPU::getAMDHSACodeObjectVersion(Version);
83e6d15924SDimitry Andric }
84e6d15924SDimitry Andric 
8501095a5dSDimitry Andric inline static MCDisassembler::DecodeStatus
addOperand(MCInst & Inst,const MCOperand & Opnd)8601095a5dSDimitry Andric addOperand(MCInst &Inst, const MCOperand& Opnd) {
8701095a5dSDimitry Andric   Inst.addOperand(Opnd);
8801095a5dSDimitry Andric   return Opnd.isValid() ?
8901095a5dSDimitry Andric     MCDisassembler::Success :
90706b4fc4SDimitry Andric     MCDisassembler::Fail;
9101095a5dSDimitry Andric }
9201095a5dSDimitry Andric 
insertNamedMCOperand(MCInst & MI,const MCOperand & Op,uint16_t NameIdx)9308bbd35aSDimitry Andric static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op,
9408bbd35aSDimitry Andric                                 uint16_t NameIdx) {
9508bbd35aSDimitry Andric   int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx);
9608bbd35aSDimitry Andric   if (OpIdx != -1) {
9708bbd35aSDimitry Andric     auto I = MI.begin();
9808bbd35aSDimitry Andric     std::advance(I, OpIdx);
9908bbd35aSDimitry Andric     MI.insert(I, Op);
10008bbd35aSDimitry Andric   }
10108bbd35aSDimitry Andric   return OpIdx;
10208bbd35aSDimitry Andric }
10308bbd35aSDimitry Andric 
decodeSOPPBrTarget(MCInst & Inst,unsigned Imm,uint64_t Addr,const MCDisassembler * Decoder)1047fa27ce4SDimitry Andric static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
105145449b1SDimitry Andric                                        uint64_t Addr,
106145449b1SDimitry Andric                                        const MCDisassembler *Decoder) {
107b915e9e0SDimitry Andric   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
108b915e9e0SDimitry Andric 
109e6d15924SDimitry Andric   // Our branches take a simm16, but we need two extra bits to account for the
110e6d15924SDimitry Andric   // factor of 4.
111b915e9e0SDimitry Andric   APInt SignedOffset(18, Imm * 4, true);
112b915e9e0SDimitry Andric   int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue();
113b915e9e0SDimitry Andric 
114145449b1SDimitry Andric   if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
115b915e9e0SDimitry Andric     return MCDisassembler::Success;
116b915e9e0SDimitry Andric   return addOperand(Inst, MCOperand::createImm(Imm));
117b915e9e0SDimitry Andric }
118b915e9e0SDimitry Andric 
decodeSMEMOffset(MCInst & Inst,unsigned Imm,uint64_t Addr,const MCDisassembler * Decoder)119145449b1SDimitry Andric static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
120145449b1SDimitry Andric                                      const MCDisassembler *Decoder) {
121cfca06d7SDimitry Andric   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
122cfca06d7SDimitry Andric   int64_t Offset;
123b1c73532SDimitry Andric   if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
124b1c73532SDimitry Andric     Offset = SignExtend64<24>(Imm);
125b1c73532SDimitry Andric   } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
126cfca06d7SDimitry Andric     Offset = Imm & 0xFFFFF;
127cfca06d7SDimitry Andric   } else { // GFX9+ supports 21-bit signed offsets.
128cfca06d7SDimitry Andric     Offset = SignExtend64<21>(Imm);
129cfca06d7SDimitry Andric   }
130cfca06d7SDimitry Andric   return addOperand(Inst, MCOperand::createImm(Offset));
131cfca06d7SDimitry Andric }
132cfca06d7SDimitry Andric 
decodeBoolReg(MCInst & Inst,unsigned Val,uint64_t Addr,const MCDisassembler * Decoder)133145449b1SDimitry Andric static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
134145449b1SDimitry Andric                                   const MCDisassembler *Decoder) {
135e6d15924SDimitry Andric   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
136e6d15924SDimitry Andric   return addOperand(Inst, DAsm->decodeBoolReg(Val));
137e6d15924SDimitry Andric }
138e6d15924SDimitry Andric 
decodeSplitBarrier(MCInst & Inst,unsigned Val,uint64_t Addr,const MCDisassembler * Decoder)139312c0ed1SDimitry Andric static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
140312c0ed1SDimitry Andric                                        uint64_t Addr,
141312c0ed1SDimitry Andric                                        const MCDisassembler *Decoder) {
142312c0ed1SDimitry Andric   auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
143312c0ed1SDimitry Andric   return addOperand(Inst, DAsm->decodeSplitBarrier(Val));
144312c0ed1SDimitry Andric }
145312c0ed1SDimitry Andric 
decodeDpp8FI(MCInst & Inst,unsigned Val,uint64_t Addr,const MCDisassembler * Decoder)146ac9a064cSDimitry Andric static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
147ac9a064cSDimitry Andric                                  const MCDisassembler *Decoder) {
148ac9a064cSDimitry Andric   auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
149ac9a064cSDimitry Andric   return addOperand(Inst, DAsm->decodeDpp8FI(Val));
150ac9a064cSDimitry Andric }
151ac9a064cSDimitry Andric 
152ab44ce3dSDimitry Andric #define DECODE_OPERAND(StaticDecoderName, DecoderName)                         \
153145449b1SDimitry Andric   static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm,            \
15401095a5dSDimitry Andric                                         uint64_t /*Addr*/,                     \
155145449b1SDimitry Andric                                         const MCDisassembler *Decoder) {       \
15601095a5dSDimitry Andric     auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);              \
157ab44ce3dSDimitry Andric     return addOperand(Inst, DAsm->DecoderName(Imm));                           \
15801095a5dSDimitry Andric   }
15901095a5dSDimitry Andric 
1607fa27ce4SDimitry Andric // Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
1617fa27ce4SDimitry Andric // number of register. Used by VGPR only and AGPR only operands.
1627fa27ce4SDimitry Andric #define DECODE_OPERAND_REG_8(RegClass)                                         \
1637fa27ce4SDimitry Andric   static DecodeStatus Decode##RegClass##RegisterClass(                         \
1647fa27ce4SDimitry Andric       MCInst &Inst, unsigned Imm, uint64_t /*Addr*/,                           \
1657fa27ce4SDimitry Andric       const MCDisassembler *Decoder) {                                         \
1667fa27ce4SDimitry Andric     assert(Imm < (1 << 8) && "8-bit encoding");                                \
1677fa27ce4SDimitry Andric     auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);              \
1687fa27ce4SDimitry Andric     return addOperand(                                                         \
1697fa27ce4SDimitry Andric         Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm));      \
170b915e9e0SDimitry Andric   }
171b915e9e0SDimitry Andric 
1727fa27ce4SDimitry Andric #define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm, MandatoryLiteral,         \
1737fa27ce4SDimitry Andric                      ImmWidth)                                                 \
1747fa27ce4SDimitry Andric   static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/,      \
1757fa27ce4SDimitry Andric                            const MCDisassembler *Decoder) {                    \
1767fa27ce4SDimitry Andric     assert(Imm < (1 << EncSize) && #EncSize "-bit encoding");                  \
1777fa27ce4SDimitry Andric     auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);              \
1787fa27ce4SDimitry Andric     return addOperand(Inst,                                                    \
1797fa27ce4SDimitry Andric                       DAsm->decodeSrcOp(AMDGPUDisassembler::OpWidth, EncImm,   \
1807fa27ce4SDimitry Andric                                         MandatoryLiteral, ImmWidth));          \
18171d5a254SDimitry Andric   }
18201095a5dSDimitry Andric 
decodeSrcOp(MCInst & Inst,unsigned EncSize,AMDGPUDisassembler::OpWidthTy OpWidth,unsigned Imm,unsigned EncImm,bool MandatoryLiteral,unsigned ImmWidth,AMDGPU::OperandSemantics Sema,const MCDisassembler * Decoder)183ac9a064cSDimitry Andric static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
184ac9a064cSDimitry Andric                                 AMDGPUDisassembler::OpWidthTy OpWidth,
185ac9a064cSDimitry Andric                                 unsigned Imm, unsigned EncImm,
186ac9a064cSDimitry Andric                                 bool MandatoryLiteral, unsigned ImmWidth,
187ac9a064cSDimitry Andric                                 AMDGPU::OperandSemantics Sema,
188ac9a064cSDimitry Andric                                 const MCDisassembler *Decoder) {
189ac9a064cSDimitry Andric   assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
190ac9a064cSDimitry Andric   auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
191ac9a064cSDimitry Andric   return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm, MandatoryLiteral,
192ac9a064cSDimitry Andric                                             ImmWidth, Sema));
193ac9a064cSDimitry Andric }
194ac9a064cSDimitry Andric 
1957fa27ce4SDimitry Andric // Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
1967fa27ce4SDimitry Andric // get register class. Used by SGPR only operands.
1977fa27ce4SDimitry Andric #define DECODE_OPERAND_REG_7(RegClass, OpWidth)                                \
1987fa27ce4SDimitry Andric   DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm, false, 0)
199344a3780SDimitry Andric 
2007fa27ce4SDimitry Andric // Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
2017fa27ce4SDimitry Andric // Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
2027fa27ce4SDimitry Andric // Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
2037fa27ce4SDimitry Andric // Used by AV_ register classes (AGPR or VGPR only register operands).
204ac9a064cSDimitry Andric template <AMDGPUDisassembler::OpWidthTy OpWidth>
decodeAV10(MCInst & Inst,unsigned Imm,uint64_t,const MCDisassembler * Decoder)205ac9a064cSDimitry Andric static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
206ac9a064cSDimitry Andric                                const MCDisassembler *Decoder) {
207ac9a064cSDimitry Andric   return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm | AMDGPU::EncValues::IS_VGPR,
208ac9a064cSDimitry Andric                      false, 0, AMDGPU::OperandSemantics::INT, Decoder);
209ac9a064cSDimitry Andric }
210e6d15924SDimitry Andric 
2117fa27ce4SDimitry Andric // Decoder for Src(9-bit encoding) registers only.
212ac9a064cSDimitry Andric template <AMDGPUDisassembler::OpWidthTy OpWidth>
decodeSrcReg9(MCInst & Inst,unsigned Imm,uint64_t,const MCDisassembler * Decoder)213ac9a064cSDimitry Andric static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
214ac9a064cSDimitry Andric                                   uint64_t /* Addr */,
215ac9a064cSDimitry Andric                                   const MCDisassembler *Decoder) {
216ac9a064cSDimitry Andric   return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, 0,
217ac9a064cSDimitry Andric                      AMDGPU::OperandSemantics::INT, Decoder);
218ac9a064cSDimitry Andric }
219e6d15924SDimitry Andric 
2207fa27ce4SDimitry Andric // Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
2217fa27ce4SDimitry Andric // Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
2227fa27ce4SDimitry Andric // only.
223ac9a064cSDimitry Andric template <AMDGPUDisassembler::OpWidthTy OpWidth>
decodeSrcA9(MCInst & Inst,unsigned Imm,uint64_t,const MCDisassembler * Decoder)224ac9a064cSDimitry Andric static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
225ac9a064cSDimitry Andric                                 const MCDisassembler *Decoder) {
226ac9a064cSDimitry Andric   return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, 0,
227ac9a064cSDimitry Andric                      AMDGPU::OperandSemantics::INT, Decoder);
228ac9a064cSDimitry Andric }
229344a3780SDimitry Andric 
2307fa27ce4SDimitry Andric // Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
2317fa27ce4SDimitry Andric // Imm{9} is acc, registers only.
232ac9a064cSDimitry Andric template <AMDGPUDisassembler::OpWidthTy OpWidth>
decodeSrcAV10(MCInst & Inst,unsigned Imm,uint64_t,const MCDisassembler * Decoder)233ac9a064cSDimitry Andric static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
234ac9a064cSDimitry Andric                                   uint64_t /* Addr */,
235ac9a064cSDimitry Andric                                   const MCDisassembler *Decoder) {
236ac9a064cSDimitry Andric   return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, false, 0,
237ac9a064cSDimitry Andric                      AMDGPU::OperandSemantics::INT, Decoder);
238ac9a064cSDimitry Andric }
239e6d15924SDimitry Andric 
2407fa27ce4SDimitry Andric // Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
2417fa27ce4SDimitry Andric // register from RegClass or immediate. Registers that don't belong to RegClass
2427fa27ce4SDimitry Andric // will be decoded and InstPrinter will report warning. Immediate will be
2437fa27ce4SDimitry Andric // decoded into constant of size ImmWidth, should match width of immediate used
2447fa27ce4SDimitry Andric // by OperandType (important for floating point types).
245ac9a064cSDimitry Andric template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
246ac9a064cSDimitry Andric           unsigned OperandSemantics>
decodeSrcRegOrImm9(MCInst & Inst,unsigned Imm,uint64_t,const MCDisassembler * Decoder)247ac9a064cSDimitry Andric static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
248ac9a064cSDimitry Andric                                        uint64_t /* Addr */,
249ac9a064cSDimitry Andric                                        const MCDisassembler *Decoder) {
250ac9a064cSDimitry Andric   return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, ImmWidth,
251ac9a064cSDimitry Andric                      (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
252ac9a064cSDimitry Andric }
253aca2e42cSDimitry Andric 
2547fa27ce4SDimitry Andric // Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
2557fa27ce4SDimitry Andric // and decode using 'enum10' from decodeSrcOp.
256ac9a064cSDimitry Andric template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
257ac9a064cSDimitry Andric           unsigned OperandSemantics>
decodeSrcRegOrImmA9(MCInst & Inst,unsigned Imm,uint64_t,const MCDisassembler * Decoder)258ac9a064cSDimitry Andric static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
259ac9a064cSDimitry Andric                                         uint64_t /* Addr */,
260ac9a064cSDimitry Andric                                         const MCDisassembler *Decoder) {
261ac9a064cSDimitry Andric   return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, ImmWidth,
262ac9a064cSDimitry Andric                      (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
263ac9a064cSDimitry Andric }
264e6d15924SDimitry Andric 
265ac9a064cSDimitry Andric template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
266ac9a064cSDimitry Andric           unsigned OperandSemantics>
decodeSrcRegOrImmDeferred9(MCInst & Inst,unsigned Imm,uint64_t,const MCDisassembler * Decoder)267ac9a064cSDimitry Andric static DecodeStatus decodeSrcRegOrImmDeferred9(MCInst &Inst, unsigned Imm,
268ac9a064cSDimitry Andric                                                uint64_t /* Addr */,
269ac9a064cSDimitry Andric                                                const MCDisassembler *Decoder) {
270ac9a064cSDimitry Andric   return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, true, ImmWidth,
271ac9a064cSDimitry Andric                      (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
272ac9a064cSDimitry Andric }
273e6d15924SDimitry Andric 
2747fa27ce4SDimitry Andric // Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
2757fa27ce4SDimitry Andric // when RegisterClass is used as an operand. Most often used for destination
2767fa27ce4SDimitry Andric // operands.
277344a3780SDimitry Andric 
2787fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(VGPR_32)
DECODE_OPERAND_REG_8(VGPR_32_Lo128)2797fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(VGPR_32_Lo128)
2807fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(VReg_64)
2817fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(VReg_96)
2827fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(VReg_128)
2837fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(VReg_256)
2847fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(VReg_288)
2857fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(VReg_352)
2867fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(VReg_384)
2877fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(VReg_512)
2887fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(VReg_1024)
289344a3780SDimitry Andric 
2907fa27ce4SDimitry Andric DECODE_OPERAND_REG_7(SReg_32, OPW32)
291312c0ed1SDimitry Andric DECODE_OPERAND_REG_7(SReg_32_XEXEC, OPW32)
2927fa27ce4SDimitry Andric DECODE_OPERAND_REG_7(SReg_32_XM0_XEXEC, OPW32)
2937fa27ce4SDimitry Andric DECODE_OPERAND_REG_7(SReg_32_XEXEC_HI, OPW32)
2947fa27ce4SDimitry Andric DECODE_OPERAND_REG_7(SReg_64, OPW64)
2957fa27ce4SDimitry Andric DECODE_OPERAND_REG_7(SReg_64_XEXEC, OPW64)
296312c0ed1SDimitry Andric DECODE_OPERAND_REG_7(SReg_96, OPW96)
2977fa27ce4SDimitry Andric DECODE_OPERAND_REG_7(SReg_128, OPW128)
2987fa27ce4SDimitry Andric DECODE_OPERAND_REG_7(SReg_256, OPW256)
2997fa27ce4SDimitry Andric DECODE_OPERAND_REG_7(SReg_512, OPW512)
300344a3780SDimitry Andric 
3017fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(AGPR_32)
3027fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(AReg_64)
3037fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(AReg_128)
3047fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(AReg_256)
3057fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(AReg_512)
3067fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(AReg_1024)
307344a3780SDimitry Andric 
308b1c73532SDimitry Andric static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm,
309b1c73532SDimitry Andric                                                uint64_t /*Addr*/,
310b1c73532SDimitry Andric                                                const MCDisassembler *Decoder) {
311b1c73532SDimitry Andric   assert(isUInt<10>(Imm) && "10-bit encoding expected");
312b1c73532SDimitry Andric   assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
313b1c73532SDimitry Andric 
314b1c73532SDimitry Andric   bool IsHi = Imm & (1 << 9);
315b1c73532SDimitry Andric   unsigned RegIdx = Imm & 0xff;
316b1c73532SDimitry Andric   auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
317b1c73532SDimitry Andric   return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
318b1c73532SDimitry Andric }
319b1c73532SDimitry Andric 
320b1c73532SDimitry Andric static DecodeStatus
DecodeVGPR_16_Lo128RegisterClass(MCInst & Inst,unsigned Imm,uint64_t,const MCDisassembler * Decoder)321b1c73532SDimitry Andric DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/,
322b1c73532SDimitry Andric                                  const MCDisassembler *Decoder) {
323b1c73532SDimitry Andric   assert(isUInt<8>(Imm) && "8-bit encoding expected");
324b1c73532SDimitry Andric 
325b1c73532SDimitry Andric   bool IsHi = Imm & (1 << 7);
326b1c73532SDimitry Andric   unsigned RegIdx = Imm & 0x7f;
327b1c73532SDimitry Andric   auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
328b1c73532SDimitry Andric   return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
329b1c73532SDimitry Andric }
330b1c73532SDimitry Andric 
decodeOperand_VSrcT16_Lo128(MCInst & Inst,unsigned Imm,uint64_t,const MCDisassembler * Decoder)331b1c73532SDimitry Andric static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm,
332b1c73532SDimitry Andric                                                 uint64_t /*Addr*/,
333b1c73532SDimitry Andric                                                 const MCDisassembler *Decoder) {
334b1c73532SDimitry Andric   assert(isUInt<9>(Imm) && "9-bit encoding expected");
335b1c73532SDimitry Andric 
336b1c73532SDimitry Andric   const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
337b1c73532SDimitry Andric   bool IsVGPR = Imm & (1 << 8);
338b1c73532SDimitry Andric   if (IsVGPR) {
339b1c73532SDimitry Andric     bool IsHi = Imm & (1 << 7);
340b1c73532SDimitry Andric     unsigned RegIdx = Imm & 0x7f;
341b1c73532SDimitry Andric     return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
342b1c73532SDimitry Andric   }
343b1c73532SDimitry Andric   return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
344b1c73532SDimitry Andric                                                    Imm & 0xFF, false, 16));
345b1c73532SDimitry Andric }
346b1c73532SDimitry Andric 
decodeOperand_VSrcT16(MCInst & Inst,unsigned Imm,uint64_t,const MCDisassembler * Decoder)347b1c73532SDimitry Andric static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
348b1c73532SDimitry Andric                                           uint64_t /*Addr*/,
349b1c73532SDimitry Andric                                           const MCDisassembler *Decoder) {
350b1c73532SDimitry Andric   assert(isUInt<10>(Imm) && "10-bit encoding expected");
351b1c73532SDimitry Andric 
352b1c73532SDimitry Andric   const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
353b1c73532SDimitry Andric   bool IsVGPR = Imm & (1 << 8);
354b1c73532SDimitry Andric   if (IsVGPR) {
355b1c73532SDimitry Andric     bool IsHi = Imm & (1 << 9);
356b1c73532SDimitry Andric     unsigned RegIdx = Imm & 0xff;
357b1c73532SDimitry Andric     return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
358b1c73532SDimitry Andric   }
359b1c73532SDimitry Andric   return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
360b1c73532SDimitry Andric                                                    Imm & 0xFF, false, 16));
361b1c73532SDimitry Andric }
3627fa27ce4SDimitry Andric 
decodeOperand_KImmFP(MCInst & Inst,unsigned Imm,uint64_t Addr,const MCDisassembler * Decoder)3637fa27ce4SDimitry Andric static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
364145449b1SDimitry Andric                                          uint64_t Addr,
365145449b1SDimitry Andric                                          const MCDisassembler *Decoder) {
366c0981da4SDimitry Andric   const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
367c0981da4SDimitry Andric   return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
368c0981da4SDimitry Andric }
369c0981da4SDimitry Andric 
decodeOperandVOPDDstY(MCInst & Inst,unsigned Val,uint64_t Addr,const void * Decoder)370145449b1SDimitry Andric static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
371145449b1SDimitry Andric                                           uint64_t Addr, const void *Decoder) {
372145449b1SDimitry Andric   const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
373145449b1SDimitry Andric   return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
374145449b1SDimitry Andric }
375145449b1SDimitry Andric 
IsAGPROperand(const MCInst & Inst,int OpIdx,const MCRegisterInfo * MRI)376344a3780SDimitry Andric static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
377344a3780SDimitry Andric                           const MCRegisterInfo *MRI) {
378344a3780SDimitry Andric   if (OpIdx < 0)
379344a3780SDimitry Andric     return false;
380344a3780SDimitry Andric 
381344a3780SDimitry Andric   const MCOperand &Op = Inst.getOperand(OpIdx);
382344a3780SDimitry Andric   if (!Op.isReg())
383344a3780SDimitry Andric     return false;
384344a3780SDimitry Andric 
385344a3780SDimitry Andric   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
386344a3780SDimitry Andric   auto Reg = Sub ? Sub : Op.getReg();
387344a3780SDimitry Andric   return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
388344a3780SDimitry Andric }
389344a3780SDimitry Andric 
decodeAVLdSt(MCInst & Inst,unsigned Imm,AMDGPUDisassembler::OpWidthTy Opw,const MCDisassembler * Decoder)390ac9a064cSDimitry Andric static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
391344a3780SDimitry Andric                                  AMDGPUDisassembler::OpWidthTy Opw,
392145449b1SDimitry Andric                                  const MCDisassembler *Decoder) {
393344a3780SDimitry Andric   auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
394344a3780SDimitry Andric   if (!DAsm->isGFX90A()) {
395344a3780SDimitry Andric     Imm &= 511;
396344a3780SDimitry Andric   } else {
397344a3780SDimitry Andric     // If atomic has both vdata and vdst their register classes are tied.
398344a3780SDimitry Andric     // The bit is decoded along with the vdst, first operand. We need to
399344a3780SDimitry Andric     // change register class to AGPR if vdst was AGPR.
400344a3780SDimitry Andric     // If a DS instruction has both data0 and data1 their register classes
401344a3780SDimitry Andric     // are also tied.
402344a3780SDimitry Andric     unsigned Opc = Inst.getOpcode();
403344a3780SDimitry Andric     uint64_t TSFlags = DAsm->getMCII()->get(Opc).TSFlags;
404344a3780SDimitry Andric     uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
405344a3780SDimitry Andric                                                         : AMDGPU::OpName::vdata;
406344a3780SDimitry Andric     const MCRegisterInfo *MRI = DAsm->getContext().getRegisterInfo();
407344a3780SDimitry Andric     int DataIdx = AMDGPU::getNamedOperandIdx(Opc, DataNameIdx);
408344a3780SDimitry Andric     if ((int)Inst.getNumOperands() == DataIdx) {
409344a3780SDimitry Andric       int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
410344a3780SDimitry Andric       if (IsAGPROperand(Inst, DstIdx, MRI))
411344a3780SDimitry Andric         Imm |= 512;
412344a3780SDimitry Andric     }
413344a3780SDimitry Andric 
414344a3780SDimitry Andric     if (TSFlags & SIInstrFlags::DS) {
415344a3780SDimitry Andric       int Data2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1);
416344a3780SDimitry Andric       if ((int)Inst.getNumOperands() == Data2Idx &&
417344a3780SDimitry Andric           IsAGPROperand(Inst, DataIdx, MRI))
418344a3780SDimitry Andric         Imm |= 512;
419344a3780SDimitry Andric     }
420344a3780SDimitry Andric   }
421344a3780SDimitry Andric   return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
422344a3780SDimitry Andric }
423344a3780SDimitry Andric 
424ac9a064cSDimitry Andric template <AMDGPUDisassembler::OpWidthTy Opw>
decodeAVLdSt(MCInst & Inst,unsigned Imm,uint64_t,const MCDisassembler * Decoder)425ac9a064cSDimitry Andric static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
426ac9a064cSDimitry Andric                                  uint64_t /* Addr */,
427ac9a064cSDimitry Andric                                  const MCDisassembler *Decoder) {
428ac9a064cSDimitry Andric   return decodeAVLdSt(Inst, Imm, Opw, Decoder);
429ac9a064cSDimitry Andric }
430ac9a064cSDimitry Andric 
decodeOperand_VSrc_f64(MCInst & Inst,unsigned Imm,uint64_t Addr,const MCDisassembler * Decoder)431b1c73532SDimitry Andric static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
432b1c73532SDimitry Andric                                            uint64_t Addr,
433b1c73532SDimitry Andric                                            const MCDisassembler *Decoder) {
434b1c73532SDimitry Andric   assert(Imm < (1 << 9) && "9-bit encoding");
435b1c73532SDimitry Andric   auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
436ac9a064cSDimitry Andric   return addOperand(Inst,
437ac9a064cSDimitry Andric                     DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm, false, 64,
438ac9a064cSDimitry Andric                                       AMDGPU::OperandSemantics::FP64));
439e3b55780SDimitry Andric }
440e3b55780SDimitry Andric 
44108bbd35aSDimitry Andric #define DECODE_SDWA(DecName) \
44208bbd35aSDimitry Andric DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
443ab44ce3dSDimitry Andric 
44408bbd35aSDimitry Andric DECODE_SDWA(Src32)
DECODE_SDWA(Src16)44508bbd35aSDimitry Andric DECODE_SDWA(Src16)
44608bbd35aSDimitry Andric DECODE_SDWA(VopcDst)
447ab44ce3dSDimitry Andric 
448ac9a064cSDimitry Andric static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm,
449ac9a064cSDimitry Andric                                      uint64_t /* Addr */,
450ac9a064cSDimitry Andric                                      const MCDisassembler *Decoder) {
451ac9a064cSDimitry Andric   auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
452ac9a064cSDimitry Andric   return addOperand(Inst, DAsm->decodeVersionImm(Imm));
453ac9a064cSDimitry Andric }
454ac9a064cSDimitry Andric 
45501095a5dSDimitry Andric #include "AMDGPUGenDisassemblerTables.inc"
45601095a5dSDimitry Andric 
45701095a5dSDimitry Andric //===----------------------------------------------------------------------===//
45801095a5dSDimitry Andric //
45901095a5dSDimitry Andric //===----------------------------------------------------------------------===//
46001095a5dSDimitry Andric 
eatBytes(ArrayRef<uint8_t> & Bytes)46101095a5dSDimitry Andric template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
46201095a5dSDimitry Andric   assert(Bytes.size() >= sizeof(T));
463b1c73532SDimitry Andric   const auto Res =
464b1c73532SDimitry Andric       support::endian::read<T, llvm::endianness::little>(Bytes.data());
46501095a5dSDimitry Andric   Bytes = Bytes.slice(sizeof(T));
46601095a5dSDimitry Andric   return Res;
46701095a5dSDimitry Andric }
46801095a5dSDimitry Andric 
eat12Bytes(ArrayRef<uint8_t> & Bytes)469145449b1SDimitry Andric static inline DecoderUInt128 eat12Bytes(ArrayRef<uint8_t> &Bytes) {
470145449b1SDimitry Andric   assert(Bytes.size() >= 12);
471b1c73532SDimitry Andric   uint64_t Lo =
472b1c73532SDimitry Andric       support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());
473145449b1SDimitry Andric   Bytes = Bytes.slice(8);
474b1c73532SDimitry Andric   uint64_t Hi =
475b1c73532SDimitry Andric       support::endian::read<uint32_t, llvm::endianness::little>(Bytes.data());
476145449b1SDimitry Andric   Bytes = Bytes.slice(4);
477145449b1SDimitry Andric   return DecoderUInt128(Lo, Hi);
47801095a5dSDimitry Andric }
47901095a5dSDimitry Andric 
getInstruction(MCInst & MI,uint64_t & Size,ArrayRef<uint8_t> Bytes_,uint64_t Address,raw_ostream & CS) const48001095a5dSDimitry Andric DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
48101095a5dSDimitry Andric                                                 ArrayRef<uint8_t> Bytes_,
48201095a5dSDimitry Andric                                                 uint64_t Address,
48301095a5dSDimitry Andric                                                 raw_ostream &CS) const {
484e6d15924SDimitry Andric   unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
48501095a5dSDimitry Andric   Bytes = Bytes_.slice(0, MaxInstBytesNum);
48601095a5dSDimitry Andric 
487ac9a064cSDimitry Andric   // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
488ac9a064cSDimitry Andric   // there are fewer bytes left). This will be overridden on success.
489ac9a064cSDimitry Andric   Size = std::min((size_t)4, Bytes_.size());
490ac9a064cSDimitry Andric 
49101095a5dSDimitry Andric   do {
49201095a5dSDimitry Andric     // ToDo: better to switch encoding length using some bit predicate
49301095a5dSDimitry Andric     // but it is unknown yet, so try all we can
49401095a5dSDimitry Andric 
49501095a5dSDimitry Andric     // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
49601095a5dSDimitry Andric     // encodings
497145449b1SDimitry Andric     if (isGFX11Plus() && Bytes.size() >= 12 ) {
498145449b1SDimitry Andric       DecoderUInt128 DecW = eat12Bytes(Bytes);
499312c0ed1SDimitry Andric 
500ac9a064cSDimitry Andric       if (isGFX11() &&
501ac9a064cSDimitry Andric           tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
502ac9a064cSDimitry Andric                         DecW, Address, CS))
503145449b1SDimitry Andric         break;
504b1c73532SDimitry Andric 
505ac9a064cSDimitry Andric       if (isGFX12() &&
506ac9a064cSDimitry Andric           tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
507ac9a064cSDimitry Andric                         DecW, Address, CS))
508b1c73532SDimitry Andric         break;
5094df029ccSDimitry Andric 
510ac9a064cSDimitry Andric       if (isGFX12() &&
511ac9a064cSDimitry Andric           tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
5124df029ccSDimitry Andric         break;
513ac9a064cSDimitry Andric 
514145449b1SDimitry Andric       // Reinitialize Bytes
515145449b1SDimitry Andric       Bytes = Bytes_.slice(0, MaxInstBytesNum);
516ac9a064cSDimitry Andric     }
517145449b1SDimitry Andric 
51801095a5dSDimitry Andric     if (Bytes.size() >= 8) {
51901095a5dSDimitry Andric       const uint64_t QW = eatBytes<uint64_t>(Bytes);
520e6d15924SDimitry Andric 
521ac9a064cSDimitry Andric       if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
522ac9a064cSDimitry Andric           tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS))
523cfca06d7SDimitry Andric         break;
524ac9a064cSDimitry Andric 
525ac9a064cSDimitry Andric       if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
526ac9a064cSDimitry Andric           tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS))
527cfca06d7SDimitry Andric         break;
528eb11fae6SDimitry Andric 
529eb11fae6SDimitry Andric       // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
530eb11fae6SDimitry Andric       // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
531eb11fae6SDimitry Andric       // table first so we print the correct name.
532ac9a064cSDimitry Andric       if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
533ac9a064cSDimitry Andric           tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS))
534eb11fae6SDimitry Andric         break;
53501095a5dSDimitry Andric 
536ac9a064cSDimitry Andric       if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
537ac9a064cSDimitry Andric           tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS))
538ac9a064cSDimitry Andric         break;
539ac9a064cSDimitry Andric 
540ac9a064cSDimitry Andric       if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
541ac9a064cSDimitry Andric           tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
542ac9a064cSDimitry Andric         break;
543ac9a064cSDimitry Andric 
544ac9a064cSDimitry Andric       if ((isVI() || isGFX9()) &&
545ac9a064cSDimitry Andric           tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
546ac9a064cSDimitry Andric         break;
547ac9a064cSDimitry Andric 
548ac9a064cSDimitry Andric       if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
549ac9a064cSDimitry Andric         break;
550ac9a064cSDimitry Andric 
551ac9a064cSDimitry Andric       if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
552ac9a064cSDimitry Andric         break;
553ac9a064cSDimitry Andric 
554ac9a064cSDimitry Andric       if (isGFX12() &&
555ac9a064cSDimitry Andric           tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
556ac9a064cSDimitry Andric                         Address, CS))
557ac9a064cSDimitry Andric         break;
558ac9a064cSDimitry Andric 
559ac9a064cSDimitry Andric       if (isGFX11() &&
560ac9a064cSDimitry Andric           tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
561ac9a064cSDimitry Andric                         Address, CS))
562ac9a064cSDimitry Andric         break;
563ac9a064cSDimitry Andric 
564ac9a064cSDimitry Andric       if (isGFX11() &&
565ac9a064cSDimitry Andric           tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
566ac9a064cSDimitry Andric         break;
567ac9a064cSDimitry Andric 
568ac9a064cSDimitry Andric       if (isGFX12() &&
569ac9a064cSDimitry Andric           tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
570ac9a064cSDimitry Andric         break;
571ac9a064cSDimitry Andric 
572ac9a064cSDimitry Andric       // Reinitialize Bytes
57301095a5dSDimitry Andric       Bytes = Bytes_.slice(0, MaxInstBytesNum);
574ac9a064cSDimitry Andric     }
57501095a5dSDimitry Andric 
57601095a5dSDimitry Andric     // Try decode 32-bit instruction
577ac9a064cSDimitry Andric     if (Bytes.size() >= 4) {
57801095a5dSDimitry Andric       const uint32_t DW = eatBytes<uint32_t>(Bytes);
57901095a5dSDimitry Andric 
580ac9a064cSDimitry Andric       if ((isVI() || isGFX9()) &&
581ac9a064cSDimitry Andric           tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
582ac9a064cSDimitry Andric         break;
58301095a5dSDimitry Andric 
584ac9a064cSDimitry Andric       if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
585ac9a064cSDimitry Andric         break;
586044eb2f6SDimitry Andric 
587ac9a064cSDimitry Andric       if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
588ac9a064cSDimitry Andric         break;
589ac9a064cSDimitry Andric 
590ac9a064cSDimitry Andric       if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
591ac9a064cSDimitry Andric           tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS))
592ac9a064cSDimitry Andric         break;
593ac9a064cSDimitry Andric 
594ac9a064cSDimitry Andric       if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
595ac9a064cSDimitry Andric           tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
596ac9a064cSDimitry Andric         break;
597ac9a064cSDimitry Andric 
598ac9a064cSDimitry Andric       if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
599ac9a064cSDimitry Andric         break;
600ac9a064cSDimitry Andric 
601ac9a064cSDimitry Andric       if (isGFX11() &&
602ac9a064cSDimitry Andric           tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
603ac9a064cSDimitry Andric                         Address, CS))
604ac9a064cSDimitry Andric         break;
605ac9a064cSDimitry Andric 
606ac9a064cSDimitry Andric       if (isGFX12() &&
607ac9a064cSDimitry Andric           tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
608ac9a064cSDimitry Andric                         Address, CS))
609344a3780SDimitry Andric         break;
610344a3780SDimitry Andric     }
611344a3780SDimitry Andric 
612ac9a064cSDimitry Andric     return MCDisassembler::Fail;
61301095a5dSDimitry Andric   } while (false);
61401095a5dSDimitry Andric 
615ac9a064cSDimitry Andric   if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
616ac9a064cSDimitry Andric     if (isMacDPP(MI))
617ac9a064cSDimitry Andric       convertMacDPPInst(MI);
618ac9a064cSDimitry Andric 
619ac9a064cSDimitry Andric     if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
620ac9a064cSDimitry Andric       convertVOP3PDPPInst(MI);
621ac9a064cSDimitry Andric     else if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC) ||
622ac9a064cSDimitry Andric              AMDGPU::isVOPC64DPP(MI.getOpcode()))
623ac9a064cSDimitry Andric       convertVOPCDPPInst(MI); // Special VOP3 case
624ac9a064cSDimitry Andric     else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) !=
625ac9a064cSDimitry Andric              -1)
626ac9a064cSDimitry Andric       convertDPP8Inst(MI);
627ac9a064cSDimitry Andric     else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
628ac9a064cSDimitry Andric       convertVOP3DPPInst(MI); // Regular VOP3 case
629ac9a064cSDimitry Andric   }
630ac9a064cSDimitry Andric 
631ac9a064cSDimitry Andric   if (AMDGPU::isMAC(MI.getOpcode())) {
63271d5a254SDimitry Andric     // Insert dummy unused src2_modifiers.
63308bbd35aSDimitry Andric     insertNamedMCOperand(MI, MCOperand::createImm(0),
63471d5a254SDimitry Andric                          AMDGPU::OpName::src2_modifiers);
63571d5a254SDimitry Andric   }
63671d5a254SDimitry Andric 
637ac9a064cSDimitry Andric   if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
638ac9a064cSDimitry Andric       MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
639ac9a064cSDimitry Andric     // Insert dummy unused src2_modifiers.
640ac9a064cSDimitry Andric     insertNamedMCOperand(MI, MCOperand::createImm(0),
641ac9a064cSDimitry Andric                          AMDGPU::OpName::src2_modifiers);
642ac9a064cSDimitry Andric   }
643ac9a064cSDimitry Andric 
644ac9a064cSDimitry Andric   if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
64599aabd70SDimitry Andric       !AMDGPU::hasGDS(STI)) {
64699aabd70SDimitry Andric     insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
64799aabd70SDimitry Andric   }
64899aabd70SDimitry Andric 
649ac9a064cSDimitry Andric   if (MCII->get(MI.getOpcode()).TSFlags &
650ac9a064cSDimitry Andric       (SIInstrFlags::MUBUF | SIInstrFlags::FLAT | SIInstrFlags::SMRD)) {
651344a3780SDimitry Andric     int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
652344a3780SDimitry Andric                                              AMDGPU::OpName::cpol);
653344a3780SDimitry Andric     if (CPolPos != -1) {
654344a3780SDimitry Andric       unsigned CPol =
655344a3780SDimitry Andric           (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
656344a3780SDimitry Andric               AMDGPU::CPol::GLC : 0;
657344a3780SDimitry Andric       if (MI.getNumOperands() <= (unsigned)CPolPos) {
658344a3780SDimitry Andric         insertNamedMCOperand(MI, MCOperand::createImm(CPol),
659344a3780SDimitry Andric                              AMDGPU::OpName::cpol);
660344a3780SDimitry Andric       } else if (CPol) {
661344a3780SDimitry Andric         MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
662344a3780SDimitry Andric       }
663344a3780SDimitry Andric     }
664344a3780SDimitry Andric   }
665344a3780SDimitry Andric 
666ac9a064cSDimitry Andric   if ((MCII->get(MI.getOpcode()).TSFlags &
667344a3780SDimitry Andric        (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) &&
6687fa27ce4SDimitry Andric       (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
669344a3780SDimitry Andric     // GFX90A lost TFE, its place is occupied by ACC.
670344a3780SDimitry Andric     int TFEOpIdx =
671344a3780SDimitry Andric         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
672344a3780SDimitry Andric     if (TFEOpIdx != -1) {
673344a3780SDimitry Andric       auto TFEIter = MI.begin();
674344a3780SDimitry Andric       std::advance(TFEIter, TFEOpIdx);
675344a3780SDimitry Andric       MI.insert(TFEIter, MCOperand::createImm(0));
676344a3780SDimitry Andric     }
677344a3780SDimitry Andric   }
678344a3780SDimitry Andric 
679ac9a064cSDimitry Andric   if (MCII->get(MI.getOpcode()).TSFlags &
680ac9a064cSDimitry Andric       (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) {
681344a3780SDimitry Andric     int SWZOpIdx =
682344a3780SDimitry Andric         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
683344a3780SDimitry Andric     if (SWZOpIdx != -1) {
684344a3780SDimitry Andric       auto SWZIter = MI.begin();
685344a3780SDimitry Andric       std::advance(SWZIter, SWZOpIdx);
686344a3780SDimitry Andric       MI.insert(SWZIter, MCOperand::createImm(0));
687344a3780SDimitry Andric     }
688b60736ecSDimitry Andric   }
689b60736ecSDimitry Andric 
690ac9a064cSDimitry Andric   if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG) {
691e6d15924SDimitry Andric     int VAddr0Idx =
692e6d15924SDimitry Andric         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
693e6d15924SDimitry Andric     int RsrcIdx =
694e6d15924SDimitry Andric         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
695e6d15924SDimitry Andric     unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
696e6d15924SDimitry Andric     if (VAddr0Idx >= 0 && NSAArgs > 0) {
697e6d15924SDimitry Andric       unsigned NSAWords = (NSAArgs + 3) / 4;
698ac9a064cSDimitry Andric       if (Bytes.size() < 4 * NSAWords)
699ac9a064cSDimitry Andric         return MCDisassembler::Fail;
700e6d15924SDimitry Andric       for (unsigned i = 0; i < NSAArgs; ++i) {
701145449b1SDimitry Andric         const unsigned VAddrIdx = VAddr0Idx + 1 + i;
702e3b55780SDimitry Andric         auto VAddrRCID =
703e3b55780SDimitry Andric             MCII->get(MI.getOpcode()).operands()[VAddrIdx].RegClass;
704ac9a064cSDimitry Andric         MI.insert(MI.begin() + VAddrIdx, createRegOperand(VAddrRCID, Bytes[i]));
705e6d15924SDimitry Andric       }
706e6d15924SDimitry Andric       Bytes = Bytes.slice(4 * NSAWords);
707e6d15924SDimitry Andric     }
708ac9a064cSDimitry Andric 
709ac9a064cSDimitry Andric     convertMIMGInst(MI);
710e6d15924SDimitry Andric   }
711e6d15924SDimitry Andric 
712ac9a064cSDimitry Andric   if (MCII->get(MI.getOpcode()).TSFlags &
713ac9a064cSDimitry Andric       (SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE))
714ac9a064cSDimitry Andric     convertMIMGInst(MI);
715044eb2f6SDimitry Andric 
716ac9a064cSDimitry Andric   if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
717ac9a064cSDimitry Andric     convertEXPInst(MI);
718b1c73532SDimitry Andric 
719ac9a064cSDimitry Andric   if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
720ac9a064cSDimitry Andric     convertVINTERPInst(MI);
721145449b1SDimitry Andric 
722ac9a064cSDimitry Andric   if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
723ac9a064cSDimitry Andric     convertSDWAInst(MI);
72408bbd35aSDimitry Andric 
725e6d15924SDimitry Andric   int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
726e6d15924SDimitry Andric                                               AMDGPU::OpName::vdst_in);
727e6d15924SDimitry Andric   if (VDstIn_Idx != -1) {
728e6d15924SDimitry Andric     int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
729e6d15924SDimitry Andric                            MCOI::OperandConstraint::TIED_TO);
730e6d15924SDimitry Andric     if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
731e6d15924SDimitry Andric          !MI.getOperand(VDstIn_Idx).isReg() ||
732e6d15924SDimitry Andric          MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
733e6d15924SDimitry Andric       if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
734e6d15924SDimitry Andric         MI.erase(&MI.getOperand(VDstIn_Idx));
735e6d15924SDimitry Andric       insertNamedMCOperand(MI,
736e6d15924SDimitry Andric         MCOperand::createReg(MI.getOperand(Tied).getReg()),
737e6d15924SDimitry Andric         AMDGPU::OpName::vdst_in);
738e6d15924SDimitry Andric     }
739e6d15924SDimitry Andric   }
740e6d15924SDimitry Andric 
741c0981da4SDimitry Andric   int ImmLitIdx =
742c0981da4SDimitry Andric       AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
743e3b55780SDimitry Andric   bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
744ac9a064cSDimitry Andric   if (ImmLitIdx != -1 && !IsSOPK)
745ac9a064cSDimitry Andric     convertFMAanyK(MI, ImmLitIdx);
746c0981da4SDimitry Andric 
747ac9a064cSDimitry Andric   Size = MaxInstBytesNum - Bytes.size();
748ac9a064cSDimitry Andric   return MCDisassembler::Success;
74901095a5dSDimitry Andric }
75001095a5dSDimitry Andric 
convertEXPInst(MCInst & MI) const751ac9a064cSDimitry Andric void AMDGPUDisassembler::convertEXPInst(MCInst &MI) const {
752b1c73532SDimitry Andric   if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
753145449b1SDimitry Andric     // The MCInst still has these fields even though they are no longer encoded
754145449b1SDimitry Andric     // in the GFX11 instruction.
755145449b1SDimitry Andric     insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
756145449b1SDimitry Andric     insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
757145449b1SDimitry Andric   }
758145449b1SDimitry Andric }
759145449b1SDimitry Andric 
convertVINTERPInst(MCInst & MI) const760ac9a064cSDimitry Andric void AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const {
761145449b1SDimitry Andric   if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 ||
762b1c73532SDimitry Andric       MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx12 ||
763145449b1SDimitry Andric       MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 ||
764b1c73532SDimitry Andric       MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx12 ||
765145449b1SDimitry Andric       MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx11 ||
766b1c73532SDimitry Andric       MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx12 ||
767b1c73532SDimitry Andric       MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11 ||
768b1c73532SDimitry Andric       MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx12) {
769145449b1SDimitry Andric     // The MCInst has this field that is not directly encoded in the
770145449b1SDimitry Andric     // instruction.
771145449b1SDimitry Andric     insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
772145449b1SDimitry Andric   }
773145449b1SDimitry Andric }
774145449b1SDimitry Andric 
convertSDWAInst(MCInst & MI) const775ac9a064cSDimitry Andric void AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
7767fa27ce4SDimitry Andric   if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
7777fa27ce4SDimitry Andric       STI.hasFeature(AMDGPU::FeatureGFX10)) {
778e3b55780SDimitry Andric     if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
77908bbd35aSDimitry Andric       // VOPC - insert clamp
78008bbd35aSDimitry Andric       insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
7817fa27ce4SDimitry Andric   } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
78208bbd35aSDimitry Andric     int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
78308bbd35aSDimitry Andric     if (SDst != -1) {
78408bbd35aSDimitry Andric       // VOPC - insert VCC register as sdst
785044eb2f6SDimitry Andric       insertNamedMCOperand(MI, createRegOperand(AMDGPU::VCC),
78608bbd35aSDimitry Andric                            AMDGPU::OpName::sdst);
78708bbd35aSDimitry Andric     } else {
78808bbd35aSDimitry Andric       // VOP1/2 - insert omod if present in instruction
78908bbd35aSDimitry Andric       insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
79008bbd35aSDimitry Andric     }
79108bbd35aSDimitry Andric   }
79208bbd35aSDimitry Andric }
79308bbd35aSDimitry Andric 
7944b4fe385SDimitry Andric struct VOPModifiers {
7954b4fe385SDimitry Andric   unsigned OpSel = 0;
7964b4fe385SDimitry Andric   unsigned OpSelHi = 0;
7974b4fe385SDimitry Andric   unsigned NegLo = 0;
7984b4fe385SDimitry Andric   unsigned NegHi = 0;
7994b4fe385SDimitry Andric };
8004b4fe385SDimitry Andric 
8014b4fe385SDimitry Andric // Reconstruct values of VOP3/VOP3P operands such as op_sel.
8024b4fe385SDimitry Andric // Note that these values do not affect disassembler output,
8034b4fe385SDimitry Andric // so this is only necessary for consistency with src_modifiers.
collectVOPModifiers(const MCInst & MI,bool IsVOP3P=false)8044b4fe385SDimitry Andric static VOPModifiers collectVOPModifiers(const MCInst &MI,
8054b4fe385SDimitry Andric                                         bool IsVOP3P = false) {
8064b4fe385SDimitry Andric   VOPModifiers Modifiers;
8074b4fe385SDimitry Andric   unsigned Opc = MI.getOpcode();
8084b4fe385SDimitry Andric   const int ModOps[] = {AMDGPU::OpName::src0_modifiers,
8094b4fe385SDimitry Andric                         AMDGPU::OpName::src1_modifiers,
8104b4fe385SDimitry Andric                         AMDGPU::OpName::src2_modifiers};
8114b4fe385SDimitry Andric   for (int J = 0; J < 3; ++J) {
8124b4fe385SDimitry Andric     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8134b4fe385SDimitry Andric     if (OpIdx == -1)
8144b4fe385SDimitry Andric       continue;
8154b4fe385SDimitry Andric 
8164b4fe385SDimitry Andric     unsigned Val = MI.getOperand(OpIdx).getImm();
8174b4fe385SDimitry Andric 
8184b4fe385SDimitry Andric     Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
8194b4fe385SDimitry Andric     if (IsVOP3P) {
8204b4fe385SDimitry Andric       Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
8214b4fe385SDimitry Andric       Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
8224b4fe385SDimitry Andric       Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
8234b4fe385SDimitry Andric     } else if (J == 0) {
8244b4fe385SDimitry Andric       Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
8254b4fe385SDimitry Andric     }
8264b4fe385SDimitry Andric   }
8274b4fe385SDimitry Andric 
8284b4fe385SDimitry Andric   return Modifiers;
8294b4fe385SDimitry Andric }
8304b4fe385SDimitry Andric 
831ac9a064cSDimitry Andric // Instructions decode the op_sel/suffix bits into the src_modifier
832ac9a064cSDimitry Andric // operands. Copy those bits into the src operands for true16 VGPRs.
convertTrue16OpSel(MCInst & MI) const833ac9a064cSDimitry Andric void AMDGPUDisassembler::convertTrue16OpSel(MCInst &MI) const {
834ac9a064cSDimitry Andric   const unsigned Opc = MI.getOpcode();
835ac9a064cSDimitry Andric   const MCRegisterClass &ConversionRC =
836ac9a064cSDimitry Andric       MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
837ac9a064cSDimitry Andric   constexpr std::array<std::tuple<int, int, unsigned>, 4> OpAndOpMods = {
838ac9a064cSDimitry Andric       {{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
839ac9a064cSDimitry Andric         SISrcMods::OP_SEL_0},
840ac9a064cSDimitry Andric        {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
841ac9a064cSDimitry Andric         SISrcMods::OP_SEL_0},
842ac9a064cSDimitry Andric        {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
843ac9a064cSDimitry Andric         SISrcMods::OP_SEL_0},
844ac9a064cSDimitry Andric        {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
845ac9a064cSDimitry Andric         SISrcMods::DST_OP_SEL}}};
846ac9a064cSDimitry Andric   for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
847ac9a064cSDimitry Andric     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
848ac9a064cSDimitry Andric     int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName);
849ac9a064cSDimitry Andric     if (OpIdx == -1 || OpModsIdx == -1)
850ac9a064cSDimitry Andric       continue;
851ac9a064cSDimitry Andric     MCOperand &Op = MI.getOperand(OpIdx);
852ac9a064cSDimitry Andric     if (!Op.isReg())
853ac9a064cSDimitry Andric       continue;
854ac9a064cSDimitry Andric     if (!ConversionRC.contains(Op.getReg()))
855ac9a064cSDimitry Andric       continue;
856ac9a064cSDimitry Andric     unsigned OpEnc = MRI.getEncodingValue(Op.getReg());
857ac9a064cSDimitry Andric     const MCOperand &OpMods = MI.getOperand(OpModsIdx);
858ac9a064cSDimitry Andric     unsigned ModVal = OpMods.getImm();
859ac9a064cSDimitry Andric     if (ModVal & OpSelMask) { // isHi
860ac9a064cSDimitry Andric       unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
861ac9a064cSDimitry Andric       Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1));
862ac9a064cSDimitry Andric     }
863ac9a064cSDimitry Andric   }
864ac9a064cSDimitry Andric }
865ac9a064cSDimitry Andric 
866e3b55780SDimitry Andric // MAC opcodes have special old and src2 operands.
867e3b55780SDimitry Andric // src2 is tied to dst, while old is not tied (but assumed to be).
isMacDPP(MCInst & MI) const868e3b55780SDimitry Andric bool AMDGPUDisassembler::isMacDPP(MCInst &MI) const {
869e3b55780SDimitry Andric   constexpr int DST_IDX = 0;
870e3b55780SDimitry Andric   auto Opcode = MI.getOpcode();
871e3b55780SDimitry Andric   const auto &Desc = MCII->get(Opcode);
872e3b55780SDimitry Andric   auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
873e3b55780SDimitry Andric 
874e3b55780SDimitry Andric   if (OldIdx != -1 && Desc.getOperandConstraint(
875e3b55780SDimitry Andric                           OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
876e3b55780SDimitry Andric     assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
877e3b55780SDimitry Andric     assert(Desc.getOperandConstraint(
878e3b55780SDimitry Andric                AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
879e3b55780SDimitry Andric                MCOI::OperandConstraint::TIED_TO) == DST_IDX);
880e3b55780SDimitry Andric     (void)DST_IDX;
881e3b55780SDimitry Andric     return true;
882e3b55780SDimitry Andric   }
883e3b55780SDimitry Andric 
884e3b55780SDimitry Andric   return false;
885e3b55780SDimitry Andric }
886e3b55780SDimitry Andric 
887e3b55780SDimitry Andric // Create dummy old operand and insert dummy unused src2_modifiers
convertMacDPPInst(MCInst & MI) const888e3b55780SDimitry Andric void AMDGPUDisassembler::convertMacDPPInst(MCInst &MI) const {
889e3b55780SDimitry Andric   assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
890e3b55780SDimitry Andric   insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
891e3b55780SDimitry Andric   insertNamedMCOperand(MI, MCOperand::createImm(0),
892e3b55780SDimitry Andric                        AMDGPU::OpName::src2_modifiers);
893e3b55780SDimitry Andric }
894e3b55780SDimitry Andric 
convertDPP8Inst(MCInst & MI) const895ac9a064cSDimitry Andric void AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
896e6d15924SDimitry Andric   unsigned Opc = MI.getOpcode();
897ac9a064cSDimitry Andric 
898ac9a064cSDimitry Andric   int VDstInIdx =
899ac9a064cSDimitry Andric       AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
900ac9a064cSDimitry Andric   if (VDstInIdx != -1)
901ac9a064cSDimitry Andric     insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
902e3b55780SDimitry Andric 
903e3b55780SDimitry Andric   unsigned DescNumOps = MCII->get(Opc).getNumOperands();
904e3b55780SDimitry Andric   if (MI.getNumOperands() < DescNumOps &&
905e3b55780SDimitry Andric       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
906ac9a064cSDimitry Andric     convertTrue16OpSel(MI);
9074b4fe385SDimitry Andric     auto Mods = collectVOPModifiers(MI);
9084b4fe385SDimitry Andric     insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
9094b4fe385SDimitry Andric                          AMDGPU::OpName::op_sel);
910145449b1SDimitry Andric   } else {
911e6d15924SDimitry Andric     // Insert dummy unused src modifiers.
912e6d15924SDimitry Andric     if (MI.getNumOperands() < DescNumOps &&
913e3b55780SDimitry Andric         AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
914e6d15924SDimitry Andric       insertNamedMCOperand(MI, MCOperand::createImm(0),
915e6d15924SDimitry Andric                            AMDGPU::OpName::src0_modifiers);
916e6d15924SDimitry Andric 
917e6d15924SDimitry Andric     if (MI.getNumOperands() < DescNumOps &&
918e3b55780SDimitry Andric         AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
919e6d15924SDimitry Andric       insertNamedMCOperand(MI, MCOperand::createImm(0),
920e6d15924SDimitry Andric                            AMDGPU::OpName::src1_modifiers);
921145449b1SDimitry Andric   }
922e3b55780SDimitry Andric }
923e6d15924SDimitry Andric 
convertVOP3DPPInst(MCInst & MI) const924ac9a064cSDimitry Andric void AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
925ac9a064cSDimitry Andric   convertTrue16OpSel(MI);
926ac9a064cSDimitry Andric 
927ac9a064cSDimitry Andric   int VDstInIdx =
928ac9a064cSDimitry Andric       AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
929ac9a064cSDimitry Andric   if (VDstInIdx != -1)
930ac9a064cSDimitry Andric     insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
931e3b55780SDimitry Andric 
9324b4fe385SDimitry Andric   unsigned Opc = MI.getOpcode();
9334b4fe385SDimitry Andric   unsigned DescNumOps = MCII->get(Opc).getNumOperands();
9344b4fe385SDimitry Andric   if (MI.getNumOperands() < DescNumOps &&
935e3b55780SDimitry Andric       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9364b4fe385SDimitry Andric     auto Mods = collectVOPModifiers(MI);
9374b4fe385SDimitry Andric     insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
9384b4fe385SDimitry Andric                          AMDGPU::OpName::op_sel);
9394b4fe385SDimitry Andric   }
9404b4fe385SDimitry Andric }
9414b4fe385SDimitry Andric 
942e6d15924SDimitry Andric // Note that before gfx10, the MIMG encoding provided no information about
943e6d15924SDimitry Andric // VADDR size. Consequently, decoded instructions always show address as if it
944e6d15924SDimitry Andric // has 1 dword, which could be not really so.
convertMIMGInst(MCInst & MI) const945ac9a064cSDimitry Andric void AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
946b1c73532SDimitry Andric   auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
947eb11fae6SDimitry Andric 
948eb11fae6SDimitry Andric   int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
949eb11fae6SDimitry Andric                                            AMDGPU::OpName::vdst);
950eb11fae6SDimitry Andric 
951044eb2f6SDimitry Andric   int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
952044eb2f6SDimitry Andric                                             AMDGPU::OpName::vdata);
953e6d15924SDimitry Andric   int VAddr0Idx =
954e6d15924SDimitry Andric       AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
955ac9a064cSDimitry Andric   int RsrcOpName = (TSFlags & SIInstrFlags::MIMG) ? AMDGPU::OpName::srsrc
956b1c73532SDimitry Andric                                                   : AMDGPU::OpName::rsrc;
957b1c73532SDimitry Andric   int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);
958044eb2f6SDimitry Andric   int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
959044eb2f6SDimitry Andric                                             AMDGPU::OpName::dmask);
960eb11fae6SDimitry Andric 
961eb11fae6SDimitry Andric   int TFEIdx   = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
962eb11fae6SDimitry Andric                                             AMDGPU::OpName::tfe);
963eb11fae6SDimitry Andric   int D16Idx   = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
964eb11fae6SDimitry Andric                                             AMDGPU::OpName::d16);
965eb11fae6SDimitry Andric 
966c0981da4SDimitry Andric   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
967c0981da4SDimitry Andric   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
968c0981da4SDimitry Andric       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
969c0981da4SDimitry Andric 
970eb11fae6SDimitry Andric   assert(VDataIdx != -1);
971c0981da4SDimitry Andric   if (BaseOpcode->BVH) {
972c0981da4SDimitry Andric     // Add A16 operand for intersect_ray instructions
9737fa27ce4SDimitry Andric     addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
974ac9a064cSDimitry Andric     return;
975b60736ecSDimitry Andric   }
976eb11fae6SDimitry Andric 
977eb11fae6SDimitry Andric   bool IsAtomic = (VDstIdx != -1);
978b1c73532SDimitry Andric   bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
979b1c73532SDimitry Andric   bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
980e6d15924SDimitry Andric   bool IsNSA = false;
9817fa27ce4SDimitry Andric   bool IsPartialNSA = false;
982e6d15924SDimitry Andric   unsigned AddrSize = Info->VAddrDwords;
983044eb2f6SDimitry Andric 
984145449b1SDimitry Andric   if (isGFX10Plus()) {
985e6d15924SDimitry Andric     unsigned DimIdx =
986e6d15924SDimitry Andric         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
987344a3780SDimitry Andric     int A16Idx =
988344a3780SDimitry Andric         AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
989e6d15924SDimitry Andric     const AMDGPU::MIMGDimInfo *Dim =
990e6d15924SDimitry Andric         AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
991344a3780SDimitry Andric     const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
992e6d15924SDimitry Andric 
993344a3780SDimitry Andric     AddrSize =
994344a3780SDimitry Andric         AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
995344a3780SDimitry Andric 
996b1c73532SDimitry Andric     // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
997b1c73532SDimitry Andric     // VIMAGE insts other than BVH never use vaddr4.
998145449b1SDimitry Andric     IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
999b1c73532SDimitry Andric             Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1000b1c73532SDimitry Andric             Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1001e6d15924SDimitry Andric     if (!IsNSA) {
1002b1c73532SDimitry Andric       if (!IsVSample && AddrSize > 12)
1003e6d15924SDimitry Andric         AddrSize = 16;
1004e6d15924SDimitry Andric     } else {
1005e6d15924SDimitry Andric       if (AddrSize > Info->VAddrDwords) {
10067fa27ce4SDimitry Andric         if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
10077fa27ce4SDimitry Andric           // The NSA encoding does not contain enough operands for the
10087fa27ce4SDimitry Andric           // combination of base opcode / dimension. Should this be an error?
1009ac9a064cSDimitry Andric           return;
1010e6d15924SDimitry Andric         }
10117fa27ce4SDimitry Andric         IsPartialNSA = true;
10127fa27ce4SDimitry Andric       }
1013e6d15924SDimitry Andric     }
1014e6d15924SDimitry Andric   }
1015e6d15924SDimitry Andric 
1016e6d15924SDimitry Andric   unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
1017e3b55780SDimitry Andric   unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1);
1018044eb2f6SDimitry Andric 
1019eb11fae6SDimitry Andric   bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
1020eb11fae6SDimitry Andric   if (D16 && AMDGPU::hasPackedD16(STI)) {
1021eb11fae6SDimitry Andric     DstSize = (DstSize + 1) / 2;
1022eb11fae6SDimitry Andric   }
1023eb11fae6SDimitry Andric 
1024344a3780SDimitry Andric   if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
1025b60736ecSDimitry Andric     DstSize += 1;
1026eb11fae6SDimitry Andric 
1027e6d15924SDimitry Andric   if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1028ac9a064cSDimitry Andric     return;
1029e6d15924SDimitry Andric 
1030e6d15924SDimitry Andric   int NewOpcode =
1031e6d15924SDimitry Andric       AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
1032eb11fae6SDimitry Andric   if (NewOpcode == -1)
1033ac9a064cSDimitry Andric     return;
1034eb11fae6SDimitry Andric 
1035e6d15924SDimitry Andric   // Widen the register to the correct number of enabled channels.
1036e6d15924SDimitry Andric   unsigned NewVdata = AMDGPU::NoRegister;
1037e6d15924SDimitry Andric   if (DstSize != Info->VDataDwords) {
1038e3b55780SDimitry Andric     auto DataRCID = MCII->get(NewOpcode).operands()[VDataIdx].RegClass;
1039044eb2f6SDimitry Andric 
1040eb11fae6SDimitry Andric     // Get first subregister of VData
1041044eb2f6SDimitry Andric     unsigned Vdata0 = MI.getOperand(VDataIdx).getReg();
1042eb11fae6SDimitry Andric     unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1043eb11fae6SDimitry Andric     Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1044eb11fae6SDimitry Andric 
1045e6d15924SDimitry Andric     NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
1046e6d15924SDimitry Andric                                        &MRI.getRegClass(DataRCID));
1047044eb2f6SDimitry Andric     if (NewVdata == AMDGPU::NoRegister) {
1048044eb2f6SDimitry Andric       // It's possible to encode this such that the low register + enabled
1049044eb2f6SDimitry Andric       // components exceeds the register count.
1050ac9a064cSDimitry Andric       return;
1051044eb2f6SDimitry Andric     }
1052e6d15924SDimitry Andric   }
1053e6d15924SDimitry Andric 
10547fa27ce4SDimitry Andric   // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
10557fa27ce4SDimitry Andric   // If using partial NSA on GFX11+ widen last address register.
10567fa27ce4SDimitry Andric   int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
10577fa27ce4SDimitry Andric   unsigned NewVAddrSA = AMDGPU::NoRegister;
10587fa27ce4SDimitry Andric   if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
10597fa27ce4SDimitry Andric       AddrSize != Info->VAddrDwords) {
10607fa27ce4SDimitry Andric     unsigned VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
10617fa27ce4SDimitry Andric     unsigned VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
10627fa27ce4SDimitry Andric     VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1063e6d15924SDimitry Andric 
10647fa27ce4SDimitry Andric     auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
10657fa27ce4SDimitry Andric     NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0,
1066e6d15924SDimitry Andric                                         &MRI.getRegClass(AddrRCID));
10677fa27ce4SDimitry Andric     if (!NewVAddrSA)
1068ac9a064cSDimitry Andric       return;
1069e6d15924SDimitry Andric   }
1070044eb2f6SDimitry Andric 
1071044eb2f6SDimitry Andric   MI.setOpcode(NewOpcode);
1072e6d15924SDimitry Andric 
1073e6d15924SDimitry Andric   if (NewVdata != AMDGPU::NoRegister) {
1074044eb2f6SDimitry Andric     MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
1075eb11fae6SDimitry Andric 
1076eb11fae6SDimitry Andric     if (IsAtomic) {
1077eb11fae6SDimitry Andric       // Atomic operations have an additional operand (a copy of data)
1078eb11fae6SDimitry Andric       MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
1079eb11fae6SDimitry Andric     }
1080e6d15924SDimitry Andric   }
1081e6d15924SDimitry Andric 
10827fa27ce4SDimitry Andric   if (NewVAddrSA) {
10837fa27ce4SDimitry Andric     MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA);
1084e6d15924SDimitry Andric   } else if (IsNSA) {
1085e6d15924SDimitry Andric     assert(AddrSize <= Info->VAddrDwords);
1086e6d15924SDimitry Andric     MI.erase(MI.begin() + VAddr0Idx + AddrSize,
1087e6d15924SDimitry Andric              MI.begin() + VAddr0Idx + Info->VAddrDwords);
1088e6d15924SDimitry Andric   }
1089044eb2f6SDimitry Andric }
1090044eb2f6SDimitry Andric 
1091145449b1SDimitry Andric // Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1092145449b1SDimitry Andric // decoder only adds to src_modifiers, so manually add the bits to the other
1093145449b1SDimitry Andric // operands.
convertVOP3PDPPInst(MCInst & MI) const1094ac9a064cSDimitry Andric void AMDGPUDisassembler::convertVOP3PDPPInst(MCInst &MI) const {
1095145449b1SDimitry Andric   unsigned Opc = MI.getOpcode();
1096145449b1SDimitry Andric   unsigned DescNumOps = MCII->get(Opc).getNumOperands();
10974b4fe385SDimitry Andric   auto Mods = collectVOPModifiers(MI, true);
1098145449b1SDimitry Andric 
1099145449b1SDimitry Andric   if (MI.getNumOperands() < DescNumOps &&
1100e3b55780SDimitry Andric       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1101145449b1SDimitry Andric     insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1102145449b1SDimitry Andric 
1103145449b1SDimitry Andric   if (MI.getNumOperands() < DescNumOps &&
1104e3b55780SDimitry Andric       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
11054b4fe385SDimitry Andric     insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
1106145449b1SDimitry Andric                          AMDGPU::OpName::op_sel);
1107145449b1SDimitry Andric   if (MI.getNumOperands() < DescNumOps &&
1108e3b55780SDimitry Andric       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
11094b4fe385SDimitry Andric     insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSelHi),
1110145449b1SDimitry Andric                          AMDGPU::OpName::op_sel_hi);
1111145449b1SDimitry Andric   if (MI.getNumOperands() < DescNumOps &&
1112e3b55780SDimitry Andric       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
11134b4fe385SDimitry Andric     insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegLo),
1114145449b1SDimitry Andric                          AMDGPU::OpName::neg_lo);
1115145449b1SDimitry Andric   if (MI.getNumOperands() < DescNumOps &&
1116e3b55780SDimitry Andric       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
11174b4fe385SDimitry Andric     insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegHi),
1118145449b1SDimitry Andric                          AMDGPU::OpName::neg_hi);
1119145449b1SDimitry Andric }
1120145449b1SDimitry Andric 
1121145449b1SDimitry Andric // Create dummy old operand and insert optional operands
convertVOPCDPPInst(MCInst & MI) const1122ac9a064cSDimitry Andric void AMDGPUDisassembler::convertVOPCDPPInst(MCInst &MI) const {
1123145449b1SDimitry Andric   unsigned Opc = MI.getOpcode();
1124145449b1SDimitry Andric   unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1125145449b1SDimitry Andric 
1126145449b1SDimitry Andric   if (MI.getNumOperands() < DescNumOps &&
1127e3b55780SDimitry Andric       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1128145449b1SDimitry Andric     insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1129145449b1SDimitry Andric 
1130145449b1SDimitry Andric   if (MI.getNumOperands() < DescNumOps &&
1131e3b55780SDimitry Andric       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1132145449b1SDimitry Andric     insertNamedMCOperand(MI, MCOperand::createImm(0),
1133145449b1SDimitry Andric                          AMDGPU::OpName::src0_modifiers);
1134145449b1SDimitry Andric 
1135145449b1SDimitry Andric   if (MI.getNumOperands() < DescNumOps &&
1136e3b55780SDimitry Andric       AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1137145449b1SDimitry Andric     insertNamedMCOperand(MI, MCOperand::createImm(0),
1138145449b1SDimitry Andric                          AMDGPU::OpName::src1_modifiers);
1139145449b1SDimitry Andric }
1140145449b1SDimitry Andric 
convertFMAanyK(MCInst & MI,int ImmLitIdx) const1141ac9a064cSDimitry Andric void AMDGPUDisassembler::convertFMAanyK(MCInst &MI, int ImmLitIdx) const {
1142c0981da4SDimitry Andric   assert(HasLiteral && "Should have decoded a literal");
1143c0981da4SDimitry Andric   const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
1144c0981da4SDimitry Andric   unsigned DescNumOps = Desc.getNumOperands();
1145145449b1SDimitry Andric   insertNamedMCOperand(MI, MCOperand::createImm(Literal),
1146145449b1SDimitry Andric                        AMDGPU::OpName::immDeferred);
1147c0981da4SDimitry Andric   assert(DescNumOps == MI.getNumOperands());
1148c0981da4SDimitry Andric   for (unsigned I = 0; I < DescNumOps; ++I) {
1149c0981da4SDimitry Andric     auto &Op = MI.getOperand(I);
1150e3b55780SDimitry Andric     auto OpType = Desc.operands()[I].OperandType;
1151c0981da4SDimitry Andric     bool IsDeferredOp = (OpType == AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED ||
1152c0981da4SDimitry Andric                          OpType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED);
1153c0981da4SDimitry Andric     if (Op.isImm() && Op.getImm() == AMDGPU::EncValues::LITERAL_CONST &&
1154c0981da4SDimitry Andric         IsDeferredOp)
1155c0981da4SDimitry Andric       Op.setImm(Literal);
1156c0981da4SDimitry Andric   }
1157c0981da4SDimitry Andric }
1158c0981da4SDimitry Andric 
getRegClassName(unsigned RegClassID) const115901095a5dSDimitry Andric const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
116001095a5dSDimitry Andric   return getContext().getRegisterInfo()->
116101095a5dSDimitry Andric     getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
116201095a5dSDimitry Andric }
116301095a5dSDimitry Andric 
116401095a5dSDimitry Andric inline
errOperand(unsigned V,const Twine & ErrMsg) const116501095a5dSDimitry Andric MCOperand AMDGPUDisassembler::errOperand(unsigned V,
116601095a5dSDimitry Andric                                          const Twine& ErrMsg) const {
116701095a5dSDimitry Andric   *CommentStream << "Error: " + ErrMsg;
116801095a5dSDimitry Andric 
116901095a5dSDimitry Andric   // ToDo: add support for error operands to MCInst.h
117001095a5dSDimitry Andric   // return MCOperand::createError(V);
117101095a5dSDimitry Andric   return MCOperand();
117201095a5dSDimitry Andric }
117301095a5dSDimitry Andric 
117401095a5dSDimitry Andric inline
createRegOperand(unsigned int RegId) const117501095a5dSDimitry Andric MCOperand AMDGPUDisassembler::createRegOperand(unsigned int RegId) const {
1176044eb2f6SDimitry Andric   return MCOperand::createReg(AMDGPU::getMCReg(RegId, STI));
117701095a5dSDimitry Andric }
117801095a5dSDimitry Andric 
117901095a5dSDimitry Andric inline
createRegOperand(unsigned RegClassID,unsigned Val) const118001095a5dSDimitry Andric MCOperand AMDGPUDisassembler::createRegOperand(unsigned RegClassID,
118101095a5dSDimitry Andric                                                unsigned Val) const {
118201095a5dSDimitry Andric   const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
118301095a5dSDimitry Andric   if (Val >= RegCl.getNumRegs())
118401095a5dSDimitry Andric     return errOperand(Val, Twine(getRegClassName(RegClassID)) +
118501095a5dSDimitry Andric                            ": unknown register " + Twine(Val));
118601095a5dSDimitry Andric   return createRegOperand(RegCl.getRegister(Val));
118701095a5dSDimitry Andric }
118801095a5dSDimitry Andric 
118901095a5dSDimitry Andric inline
createSRegOperand(unsigned SRegClassID,unsigned Val) const119001095a5dSDimitry Andric MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
119101095a5dSDimitry Andric                                                 unsigned Val) const {
119201095a5dSDimitry Andric   // ToDo: SI/CI have 104 SGPRs, VI - 102
119301095a5dSDimitry Andric   // Valery: here we accepting as much as we can, let assembler sort it out
119401095a5dSDimitry Andric   int shift = 0;
119501095a5dSDimitry Andric   switch (SRegClassID) {
119601095a5dSDimitry Andric   case AMDGPU::SGPR_32RegClassID:
119701095a5dSDimitry Andric   case AMDGPU::TTMP_32RegClassID:
119801095a5dSDimitry Andric     break;
119901095a5dSDimitry Andric   case AMDGPU::SGPR_64RegClassID:
120001095a5dSDimitry Andric   case AMDGPU::TTMP_64RegClassID:
120101095a5dSDimitry Andric     shift = 1;
120201095a5dSDimitry Andric     break;
1203312c0ed1SDimitry Andric   case AMDGPU::SGPR_96RegClassID:
1204312c0ed1SDimitry Andric   case AMDGPU::TTMP_96RegClassID:
120501095a5dSDimitry Andric   case AMDGPU::SGPR_128RegClassID:
120601095a5dSDimitry Andric   case AMDGPU::TTMP_128RegClassID:
120701095a5dSDimitry Andric   // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
120801095a5dSDimitry Andric   // this bundle?
1209c7dac04cSDimitry Andric   case AMDGPU::SGPR_256RegClassID:
1210c7dac04cSDimitry Andric   case AMDGPU::TTMP_256RegClassID:
121101095a5dSDimitry Andric     // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
121201095a5dSDimitry Andric   // this bundle?
1213e3b55780SDimitry Andric   case AMDGPU::SGPR_288RegClassID:
1214e3b55780SDimitry Andric   case AMDGPU::TTMP_288RegClassID:
1215e3b55780SDimitry Andric   case AMDGPU::SGPR_320RegClassID:
1216e3b55780SDimitry Andric   case AMDGPU::TTMP_320RegClassID:
1217e3b55780SDimitry Andric   case AMDGPU::SGPR_352RegClassID:
1218e3b55780SDimitry Andric   case AMDGPU::TTMP_352RegClassID:
1219e3b55780SDimitry Andric   case AMDGPU::SGPR_384RegClassID:
1220e3b55780SDimitry Andric   case AMDGPU::TTMP_384RegClassID:
1221c7dac04cSDimitry Andric   case AMDGPU::SGPR_512RegClassID:
1222c7dac04cSDimitry Andric   case AMDGPU::TTMP_512RegClassID:
122301095a5dSDimitry Andric     shift = 2;
122401095a5dSDimitry Andric     break;
122501095a5dSDimitry Andric   // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
122601095a5dSDimitry Andric   // this bundle?
122701095a5dSDimitry Andric   default:
1228b915e9e0SDimitry Andric     llvm_unreachable("unhandled register class");
122901095a5dSDimitry Andric   }
1230b915e9e0SDimitry Andric 
1231b915e9e0SDimitry Andric   if (Val % (1 << shift)) {
123201095a5dSDimitry Andric     *CommentStream << "Warning: " << getRegClassName(SRegClassID)
123301095a5dSDimitry Andric                    << ": scalar reg isn't aligned " << Val;
1234b915e9e0SDimitry Andric   }
1235b915e9e0SDimitry Andric 
123601095a5dSDimitry Andric   return createRegOperand(SRegClassID, Val >> shift);
123701095a5dSDimitry Andric }
123801095a5dSDimitry Andric 
createVGPR16Operand(unsigned RegIdx,bool IsHi) const1239b1c73532SDimitry Andric MCOperand AMDGPUDisassembler::createVGPR16Operand(unsigned RegIdx,
1240b1c73532SDimitry Andric                                                   bool IsHi) const {
124177dbea07SDimitry Andric   unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
124277dbea07SDimitry Andric   return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16);
1243b1c73532SDimitry Andric }
1244b1c73532SDimitry Andric 
1245c0981da4SDimitry Andric // Decode Literals for insts which always have a literal in the encoding
1246c0981da4SDimitry Andric MCOperand
decodeMandatoryLiteralConstant(unsigned Val) const1247c0981da4SDimitry Andric AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
1248c0981da4SDimitry Andric   if (HasLiteral) {
1249145449b1SDimitry Andric     assert(
1250145449b1SDimitry Andric         AMDGPU::hasVOPD(STI) &&
1251145449b1SDimitry Andric         "Should only decode multiple kimm with VOPD, check VSrc operand types");
1252c0981da4SDimitry Andric     if (Literal != Val)
1253c0981da4SDimitry Andric       return errOperand(Val, "More than one unique literal is illegal");
1254c0981da4SDimitry Andric   }
1255c0981da4SDimitry Andric   HasLiteral = true;
1256c0981da4SDimitry Andric   Literal = Val;
1257c0981da4SDimitry Andric   return MCOperand::createImm(Literal);
1258c0981da4SDimitry Andric }
1259c0981da4SDimitry Andric 
decodeLiteralConstant(bool ExtendFP64) const1260b1c73532SDimitry Andric MCOperand AMDGPUDisassembler::decodeLiteralConstant(bool ExtendFP64) const {
126101095a5dSDimitry Andric   // For now all literal constants are supposed to be unsigned integer
126201095a5dSDimitry Andric   // ToDo: deal with signed/unsigned 64-bit integer constants
126301095a5dSDimitry Andric   // ToDo: deal with float/double constants
1264b5630dbaSDimitry Andric   if (!HasLiteral) {
1265b5630dbaSDimitry Andric     if (Bytes.size() < 4) {
126601095a5dSDimitry Andric       return errOperand(0, "cannot read literal, inst bytes left " +
126701095a5dSDimitry Andric                         Twine(Bytes.size()));
1268b5630dbaSDimitry Andric     }
1269b5630dbaSDimitry Andric     HasLiteral = true;
1270b1c73532SDimitry Andric     Literal = Literal64 = eatBytes<uint32_t>(Bytes);
1271b1c73532SDimitry Andric     if (ExtendFP64)
1272b1c73532SDimitry Andric       Literal64 <<= 32;
1273b5630dbaSDimitry Andric   }
1274b1c73532SDimitry Andric   return MCOperand::createImm(ExtendFP64 ? Literal64 : Literal);
127501095a5dSDimitry Andric }
127601095a5dSDimitry Andric 
decodeIntImmed(unsigned Imm)127701095a5dSDimitry Andric MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
127801095a5dSDimitry Andric   using namespace AMDGPU::EncValues;
1279044eb2f6SDimitry Andric 
128001095a5dSDimitry Andric   assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
128101095a5dSDimitry Andric   return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
128201095a5dSDimitry Andric     (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
128301095a5dSDimitry Andric     (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
128401095a5dSDimitry Andric       // Cast prevents negative overflow.
128501095a5dSDimitry Andric }
128601095a5dSDimitry Andric 
getInlineImmVal32(unsigned Imm)1287b915e9e0SDimitry Andric static int64_t getInlineImmVal32(unsigned Imm) {
1288b915e9e0SDimitry Andric   switch (Imm) {
1289b915e9e0SDimitry Andric   case 240:
12907fa27ce4SDimitry Andric     return llvm::bit_cast<uint32_t>(0.5f);
1291b915e9e0SDimitry Andric   case 241:
12927fa27ce4SDimitry Andric     return llvm::bit_cast<uint32_t>(-0.5f);
1293b915e9e0SDimitry Andric   case 242:
12947fa27ce4SDimitry Andric     return llvm::bit_cast<uint32_t>(1.0f);
1295b915e9e0SDimitry Andric   case 243:
12967fa27ce4SDimitry Andric     return llvm::bit_cast<uint32_t>(-1.0f);
1297b915e9e0SDimitry Andric   case 244:
12987fa27ce4SDimitry Andric     return llvm::bit_cast<uint32_t>(2.0f);
1299b915e9e0SDimitry Andric   case 245:
13007fa27ce4SDimitry Andric     return llvm::bit_cast<uint32_t>(-2.0f);
1301b915e9e0SDimitry Andric   case 246:
13027fa27ce4SDimitry Andric     return llvm::bit_cast<uint32_t>(4.0f);
1303b915e9e0SDimitry Andric   case 247:
13047fa27ce4SDimitry Andric     return llvm::bit_cast<uint32_t>(-4.0f);
1305b915e9e0SDimitry Andric   case 248: // 1 / (2 * PI)
1306b915e9e0SDimitry Andric     return 0x3e22f983;
1307b915e9e0SDimitry Andric   default:
1308b915e9e0SDimitry Andric     llvm_unreachable("invalid fp inline imm");
1309b915e9e0SDimitry Andric   }
1310b915e9e0SDimitry Andric }
1311b915e9e0SDimitry Andric 
getInlineImmVal64(unsigned Imm)1312b915e9e0SDimitry Andric static int64_t getInlineImmVal64(unsigned Imm) {
1313b915e9e0SDimitry Andric   switch (Imm) {
1314b915e9e0SDimitry Andric   case 240:
13157fa27ce4SDimitry Andric     return llvm::bit_cast<uint64_t>(0.5);
1316b915e9e0SDimitry Andric   case 241:
13177fa27ce4SDimitry Andric     return llvm::bit_cast<uint64_t>(-0.5);
1318b915e9e0SDimitry Andric   case 242:
13197fa27ce4SDimitry Andric     return llvm::bit_cast<uint64_t>(1.0);
1320b915e9e0SDimitry Andric   case 243:
13217fa27ce4SDimitry Andric     return llvm::bit_cast<uint64_t>(-1.0);
1322b915e9e0SDimitry Andric   case 244:
13237fa27ce4SDimitry Andric     return llvm::bit_cast<uint64_t>(2.0);
1324b915e9e0SDimitry Andric   case 245:
13257fa27ce4SDimitry Andric     return llvm::bit_cast<uint64_t>(-2.0);
1326b915e9e0SDimitry Andric   case 246:
13277fa27ce4SDimitry Andric     return llvm::bit_cast<uint64_t>(4.0);
1328b915e9e0SDimitry Andric   case 247:
13297fa27ce4SDimitry Andric     return llvm::bit_cast<uint64_t>(-4.0);
1330b915e9e0SDimitry Andric   case 248: // 1 / (2 * PI)
1331b915e9e0SDimitry Andric     return 0x3fc45f306dc9c882;
1332b915e9e0SDimitry Andric   default:
1333b915e9e0SDimitry Andric     llvm_unreachable("invalid fp inline imm");
1334b915e9e0SDimitry Andric   }
1335b915e9e0SDimitry Andric }
1336b915e9e0SDimitry Andric 
getInlineImmValF16(unsigned Imm)1337ac9a064cSDimitry Andric static int64_t getInlineImmValF16(unsigned Imm) {
1338b915e9e0SDimitry Andric   switch (Imm) {
1339b915e9e0SDimitry Andric   case 240:
1340b915e9e0SDimitry Andric     return 0x3800;
1341b915e9e0SDimitry Andric   case 241:
1342b915e9e0SDimitry Andric     return 0xB800;
1343b915e9e0SDimitry Andric   case 242:
1344b915e9e0SDimitry Andric     return 0x3C00;
1345b915e9e0SDimitry Andric   case 243:
1346b915e9e0SDimitry Andric     return 0xBC00;
1347b915e9e0SDimitry Andric   case 244:
1348b915e9e0SDimitry Andric     return 0x4000;
1349b915e9e0SDimitry Andric   case 245:
1350b915e9e0SDimitry Andric     return 0xC000;
1351b915e9e0SDimitry Andric   case 246:
1352b915e9e0SDimitry Andric     return 0x4400;
1353b915e9e0SDimitry Andric   case 247:
1354b915e9e0SDimitry Andric     return 0xC400;
1355b915e9e0SDimitry Andric   case 248: // 1 / (2 * PI)
1356b915e9e0SDimitry Andric     return 0x3118;
1357b915e9e0SDimitry Andric   default:
1358b915e9e0SDimitry Andric     llvm_unreachable("invalid fp inline imm");
1359b915e9e0SDimitry Andric   }
1360b915e9e0SDimitry Andric }
1361b915e9e0SDimitry Andric 
getInlineImmValBF16(unsigned Imm)1362ac9a064cSDimitry Andric static int64_t getInlineImmValBF16(unsigned Imm) {
1363ac9a064cSDimitry Andric   switch (Imm) {
1364ac9a064cSDimitry Andric   case 240:
1365ac9a064cSDimitry Andric     return 0x3F00;
1366ac9a064cSDimitry Andric   case 241:
1367ac9a064cSDimitry Andric     return 0xBF00;
1368ac9a064cSDimitry Andric   case 242:
1369ac9a064cSDimitry Andric     return 0x3F80;
1370ac9a064cSDimitry Andric   case 243:
1371ac9a064cSDimitry Andric     return 0xBF80;
1372ac9a064cSDimitry Andric   case 244:
1373ac9a064cSDimitry Andric     return 0x4000;
1374ac9a064cSDimitry Andric   case 245:
1375ac9a064cSDimitry Andric     return 0xC000;
1376ac9a064cSDimitry Andric   case 246:
1377ac9a064cSDimitry Andric     return 0x4080;
1378ac9a064cSDimitry Andric   case 247:
1379ac9a064cSDimitry Andric     return 0xC080;
1380ac9a064cSDimitry Andric   case 248: // 1 / (2 * PI)
1381ac9a064cSDimitry Andric     return 0x3E22;
1382ac9a064cSDimitry Andric   default:
1383ac9a064cSDimitry Andric     llvm_unreachable("invalid fp inline imm");
1384ac9a064cSDimitry Andric   }
1385ac9a064cSDimitry Andric }
1386ac9a064cSDimitry Andric 
getInlineImmVal16(unsigned Imm,AMDGPU::OperandSemantics Sema)1387ac9a064cSDimitry Andric static int64_t getInlineImmVal16(unsigned Imm, AMDGPU::OperandSemantics Sema) {
1388ac9a064cSDimitry Andric   return (Sema == AMDGPU::OperandSemantics::BF16) ? getInlineImmValBF16(Imm)
1389ac9a064cSDimitry Andric                                                   : getInlineImmValF16(Imm);
1390ac9a064cSDimitry Andric }
1391ac9a064cSDimitry Andric 
decodeFPImmed(unsigned ImmWidth,unsigned Imm,AMDGPU::OperandSemantics Sema)1392ac9a064cSDimitry Andric MCOperand AMDGPUDisassembler::decodeFPImmed(unsigned ImmWidth, unsigned Imm,
1393ac9a064cSDimitry Andric                                             AMDGPU::OperandSemantics Sema) {
1394ac9a064cSDimitry Andric   assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN &&
1395ac9a064cSDimitry Andric          Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX);
1396b915e9e0SDimitry Andric 
139701095a5dSDimitry Andric   // ToDo: case 248: 1/(2*PI) - is allowed only on VI
13987fa27ce4SDimitry Andric   // ImmWidth 0 is a default case where operand should not allow immediates.
13997fa27ce4SDimitry Andric   // Imm value is still decoded into 32 bit immediate operand, inst printer will
14007fa27ce4SDimitry Andric   // use it to print verbose error message.
14017fa27ce4SDimitry Andric   switch (ImmWidth) {
14027fa27ce4SDimitry Andric   case 0:
14037fa27ce4SDimitry Andric   case 32:
1404b915e9e0SDimitry Andric     return MCOperand::createImm(getInlineImmVal32(Imm));
14057fa27ce4SDimitry Andric   case 64:
1406b915e9e0SDimitry Andric     return MCOperand::createImm(getInlineImmVal64(Imm));
14077fa27ce4SDimitry Andric   case 16:
1408ac9a064cSDimitry Andric     return MCOperand::createImm(getInlineImmVal16(Imm, Sema));
1409b915e9e0SDimitry Andric   default:
1410b915e9e0SDimitry Andric     llvm_unreachable("implement me");
141101095a5dSDimitry Andric   }
141201095a5dSDimitry Andric }
141301095a5dSDimitry Andric 
getVgprClassId(const OpWidthTy Width) const141401095a5dSDimitry Andric unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {
141501095a5dSDimitry Andric   using namespace AMDGPU;
1416044eb2f6SDimitry Andric 
141701095a5dSDimitry Andric   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
141801095a5dSDimitry Andric   switch (Width) {
141901095a5dSDimitry Andric   default: // fall
1420b915e9e0SDimitry Andric   case OPW32:
1421b915e9e0SDimitry Andric   case OPW16:
142271d5a254SDimitry Andric   case OPWV216:
1423b915e9e0SDimitry Andric     return VGPR_32RegClassID;
1424344a3780SDimitry Andric   case OPW64:
1425344a3780SDimitry Andric   case OPWV232: return VReg_64RegClassID;
1426344a3780SDimitry Andric   case OPW96: return VReg_96RegClassID;
142701095a5dSDimitry Andric   case OPW128: return VReg_128RegClassID;
1428344a3780SDimitry Andric   case OPW160: return VReg_160RegClassID;
1429344a3780SDimitry Andric   case OPW256: return VReg_256RegClassID;
1430e3b55780SDimitry Andric   case OPW288: return VReg_288RegClassID;
1431e3b55780SDimitry Andric   case OPW320: return VReg_320RegClassID;
1432e3b55780SDimitry Andric   case OPW352: return VReg_352RegClassID;
1433e3b55780SDimitry Andric   case OPW384: return VReg_384RegClassID;
1434344a3780SDimitry Andric   case OPW512: return VReg_512RegClassID;
1435344a3780SDimitry Andric   case OPW1024: return VReg_1024RegClassID;
143601095a5dSDimitry Andric   }
143701095a5dSDimitry Andric }
143801095a5dSDimitry Andric 
getAgprClassId(const OpWidthTy Width) const1439e6d15924SDimitry Andric unsigned AMDGPUDisassembler::getAgprClassId(const OpWidthTy Width) const {
1440e6d15924SDimitry Andric   using namespace AMDGPU;
1441e6d15924SDimitry Andric 
1442e6d15924SDimitry Andric   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1443e6d15924SDimitry Andric   switch (Width) {
1444e6d15924SDimitry Andric   default: // fall
1445e6d15924SDimitry Andric   case OPW32:
1446e6d15924SDimitry Andric   case OPW16:
1447e6d15924SDimitry Andric   case OPWV216:
1448e6d15924SDimitry Andric     return AGPR_32RegClassID;
1449344a3780SDimitry Andric   case OPW64:
1450344a3780SDimitry Andric   case OPWV232: return AReg_64RegClassID;
1451344a3780SDimitry Andric   case OPW96: return AReg_96RegClassID;
1452e6d15924SDimitry Andric   case OPW128: return AReg_128RegClassID;
1453344a3780SDimitry Andric   case OPW160: return AReg_160RegClassID;
1454cfca06d7SDimitry Andric   case OPW256: return AReg_256RegClassID;
1455e3b55780SDimitry Andric   case OPW288: return AReg_288RegClassID;
1456e3b55780SDimitry Andric   case OPW320: return AReg_320RegClassID;
1457e3b55780SDimitry Andric   case OPW352: return AReg_352RegClassID;
1458e3b55780SDimitry Andric   case OPW384: return AReg_384RegClassID;
1459e6d15924SDimitry Andric   case OPW512: return AReg_512RegClassID;
1460e6d15924SDimitry Andric   case OPW1024: return AReg_1024RegClassID;
1461e6d15924SDimitry Andric   }
1462e6d15924SDimitry Andric }
1463e6d15924SDimitry Andric 
1464e6d15924SDimitry Andric 
getSgprClassId(const OpWidthTy Width) const146501095a5dSDimitry Andric unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const {
146601095a5dSDimitry Andric   using namespace AMDGPU;
1467044eb2f6SDimitry Andric 
146801095a5dSDimitry Andric   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
146901095a5dSDimitry Andric   switch (Width) {
147001095a5dSDimitry Andric   default: // fall
1471b915e9e0SDimitry Andric   case OPW32:
1472b915e9e0SDimitry Andric   case OPW16:
147371d5a254SDimitry Andric   case OPWV216:
1474b915e9e0SDimitry Andric     return SGPR_32RegClassID;
1475344a3780SDimitry Andric   case OPW64:
1476344a3780SDimitry Andric   case OPWV232: return SGPR_64RegClassID;
1477344a3780SDimitry Andric   case OPW96: return SGPR_96RegClassID;
147801095a5dSDimitry Andric   case OPW128: return SGPR_128RegClassID;
1479344a3780SDimitry Andric   case OPW160: return SGPR_160RegClassID;
1480c7dac04cSDimitry Andric   case OPW256: return SGPR_256RegClassID;
1481e3b55780SDimitry Andric   case OPW288: return SGPR_288RegClassID;
1482e3b55780SDimitry Andric   case OPW320: return SGPR_320RegClassID;
1483e3b55780SDimitry Andric   case OPW352: return SGPR_352RegClassID;
1484e3b55780SDimitry Andric   case OPW384: return SGPR_384RegClassID;
1485c7dac04cSDimitry Andric   case OPW512: return SGPR_512RegClassID;
148601095a5dSDimitry Andric   }
148701095a5dSDimitry Andric }
148801095a5dSDimitry Andric 
getTtmpClassId(const OpWidthTy Width) const148901095a5dSDimitry Andric unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {
149001095a5dSDimitry Andric   using namespace AMDGPU;
1491044eb2f6SDimitry Andric 
149201095a5dSDimitry Andric   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
149301095a5dSDimitry Andric   switch (Width) {
149401095a5dSDimitry Andric   default: // fall
1495b915e9e0SDimitry Andric   case OPW32:
1496b915e9e0SDimitry Andric   case OPW16:
149771d5a254SDimitry Andric   case OPWV216:
1498b915e9e0SDimitry Andric     return TTMP_32RegClassID;
1499344a3780SDimitry Andric   case OPW64:
1500344a3780SDimitry Andric   case OPWV232: return TTMP_64RegClassID;
150101095a5dSDimitry Andric   case OPW128: return TTMP_128RegClassID;
1502c7dac04cSDimitry Andric   case OPW256: return TTMP_256RegClassID;
1503e3b55780SDimitry Andric   case OPW288: return TTMP_288RegClassID;
1504e3b55780SDimitry Andric   case OPW320: return TTMP_320RegClassID;
1505e3b55780SDimitry Andric   case OPW352: return TTMP_352RegClassID;
1506e3b55780SDimitry Andric   case OPW384: return TTMP_384RegClassID;
1507c7dac04cSDimitry Andric   case OPW512: return TTMP_512RegClassID;
150801095a5dSDimitry Andric   }
150901095a5dSDimitry Andric }
151001095a5dSDimitry Andric 
getTTmpIdx(unsigned Val) const1511044eb2f6SDimitry Andric int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1512044eb2f6SDimitry Andric   using namespace AMDGPU::EncValues;
1513044eb2f6SDimitry Andric 
1514b60736ecSDimitry Andric   unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1515b60736ecSDimitry Andric   unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1516044eb2f6SDimitry Andric 
1517044eb2f6SDimitry Andric   return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1518044eb2f6SDimitry Andric }
1519044eb2f6SDimitry Andric 
decodeSrcOp(const OpWidthTy Width,unsigned Val,bool MandatoryLiteral,unsigned ImmWidth,AMDGPU::OperandSemantics Sema) const1520c0981da4SDimitry Andric MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
15217fa27ce4SDimitry Andric                                           bool MandatoryLiteral,
1522ac9a064cSDimitry Andric                                           unsigned ImmWidth,
1523ac9a064cSDimitry Andric                                           AMDGPU::OperandSemantics Sema) const {
152401095a5dSDimitry Andric   using namespace AMDGPU::EncValues;
1525044eb2f6SDimitry Andric 
1526e6d15924SDimitry Andric   assert(Val < 1024); // enum10
1527e6d15924SDimitry Andric 
1528e6d15924SDimitry Andric   bool IsAGPR = Val & 512;
1529e6d15924SDimitry Andric   Val &= 511;
153001095a5dSDimitry Andric 
153101095a5dSDimitry Andric   if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1532e6d15924SDimitry Andric     return createRegOperand(IsAGPR ? getAgprClassId(Width)
1533e6d15924SDimitry Andric                                    : getVgprClassId(Width), Val - VGPR_MIN);
153401095a5dSDimitry Andric   }
1535b1c73532SDimitry Andric   return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth,
1536ac9a064cSDimitry Andric                             Sema);
1537b1c73532SDimitry Andric }
1538b1c73532SDimitry Andric 
1539ac9a064cSDimitry Andric MCOperand
decodeNonVGPRSrcOp(const OpWidthTy Width,unsigned Val,bool MandatoryLiteral,unsigned ImmWidth,AMDGPU::OperandSemantics Sema) const1540ac9a064cSDimitry Andric AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val,
1541ac9a064cSDimitry Andric                                        bool MandatoryLiteral, unsigned ImmWidth,
1542ac9a064cSDimitry Andric                                        AMDGPU::OperandSemantics Sema) const {
1543b1c73532SDimitry Andric   // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1544b1c73532SDimitry Andric   // decoded earlier.
1545b1c73532SDimitry Andric   assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1546b1c73532SDimitry Andric   using namespace AMDGPU::EncValues;
1547b1c73532SDimitry Andric 
154801095a5dSDimitry Andric   if (Val <= SGPR_MAX) {
1549b60736ecSDimitry Andric     // "SGPR_MIN <= Val" is always true and causes compilation warning.
1550e3b55780SDimitry Andric     static_assert(SGPR_MIN == 0);
155101095a5dSDimitry Andric     return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
155201095a5dSDimitry Andric   }
1553044eb2f6SDimitry Andric 
1554044eb2f6SDimitry Andric   int TTmpIdx = getTTmpIdx(Val);
1555044eb2f6SDimitry Andric   if (TTmpIdx >= 0) {
1556044eb2f6SDimitry Andric     return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
155701095a5dSDimitry Andric   }
155801095a5dSDimitry Andric 
155901095a5dSDimitry Andric   if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
156001095a5dSDimitry Andric     return decodeIntImmed(Val);
156101095a5dSDimitry Andric 
156201095a5dSDimitry Andric   if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
1563ac9a064cSDimitry Andric     return decodeFPImmed(ImmWidth, Val, Sema);
156401095a5dSDimitry Andric 
1565c0981da4SDimitry Andric   if (Val == LITERAL_CONST) {
1566c0981da4SDimitry Andric     if (MandatoryLiteral)
1567c0981da4SDimitry Andric       // Keep a sentinel value for deferred setting
1568c0981da4SDimitry Andric       return MCOperand::createImm(LITERAL_CONST);
1569ac9a064cSDimitry Andric     return decodeLiteralConstant(Sema == AMDGPU::OperandSemantics::FP64);
1570c0981da4SDimitry Andric   }
157101095a5dSDimitry Andric 
1572b915e9e0SDimitry Andric   switch (Width) {
1573b915e9e0SDimitry Andric   case OPW32:
1574b915e9e0SDimitry Andric   case OPW16:
157571d5a254SDimitry Andric   case OPWV216:
1576b915e9e0SDimitry Andric     return decodeSpecialReg32(Val);
1577b915e9e0SDimitry Andric   case OPW64:
1578344a3780SDimitry Andric   case OPWV232:
1579b915e9e0SDimitry Andric     return decodeSpecialReg64(Val);
1580b915e9e0SDimitry Andric   default:
1581b915e9e0SDimitry Andric     llvm_unreachable("unexpected immediate type");
1582b915e9e0SDimitry Andric   }
158301095a5dSDimitry Andric }
158401095a5dSDimitry Andric 
1585145449b1SDimitry Andric // Bit 0 of DstY isn't stored in the instruction, because it's always the
1586145449b1SDimitry Andric // opposite of bit 0 of DstX.
decodeVOPDDstYOp(MCInst & Inst,unsigned Val) const1587145449b1SDimitry Andric MCOperand AMDGPUDisassembler::decodeVOPDDstYOp(MCInst &Inst,
1588145449b1SDimitry Andric                                                unsigned Val) const {
1589145449b1SDimitry Andric   int VDstXInd =
1590145449b1SDimitry Andric       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
1591145449b1SDimitry Andric   assert(VDstXInd != -1);
1592145449b1SDimitry Andric   assert(Inst.getOperand(VDstXInd).isReg());
1593145449b1SDimitry Andric   unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
1594145449b1SDimitry Andric   Val |= ~XDstReg & 1;
1595145449b1SDimitry Andric   auto Width = llvm::AMDGPUDisassembler::OPW32;
1596145449b1SDimitry Andric   return createRegOperand(getVgprClassId(Width), Val);
1597145449b1SDimitry Andric }
1598145449b1SDimitry Andric 
decodeSpecialReg32(unsigned Val) const159901095a5dSDimitry Andric MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
160001095a5dSDimitry Andric   using namespace AMDGPU;
1601044eb2f6SDimitry Andric 
160201095a5dSDimitry Andric   switch (Val) {
1603e3b55780SDimitry Andric   // clang-format off
1604044eb2f6SDimitry Andric   case 102: return createRegOperand(FLAT_SCR_LO);
1605044eb2f6SDimitry Andric   case 103: return createRegOperand(FLAT_SCR_HI);
1606eb11fae6SDimitry Andric   case 104: return createRegOperand(XNACK_MASK_LO);
1607eb11fae6SDimitry Andric   case 105: return createRegOperand(XNACK_MASK_HI);
160801095a5dSDimitry Andric   case 106: return createRegOperand(VCC_LO);
160901095a5dSDimitry Andric   case 107: return createRegOperand(VCC_HI);
1610e6d15924SDimitry Andric   case 108: return createRegOperand(TBA_LO);
1611e6d15924SDimitry Andric   case 109: return createRegOperand(TBA_HI);
1612e6d15924SDimitry Andric   case 110: return createRegOperand(TMA_LO);
1613e6d15924SDimitry Andric   case 111: return createRegOperand(TMA_HI);
1614145449b1SDimitry Andric   case 124:
1615145449b1SDimitry Andric     return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
1616145449b1SDimitry Andric   case 125:
1617145449b1SDimitry Andric     return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
161801095a5dSDimitry Andric   case 126: return createRegOperand(EXEC_LO);
161901095a5dSDimitry Andric   case 127: return createRegOperand(EXEC_HI);
1620e3b55780SDimitry Andric   case 235: return createRegOperand(SRC_SHARED_BASE_LO);
1621e3b55780SDimitry Andric   case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
1622e3b55780SDimitry Andric   case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
1623e3b55780SDimitry Andric   case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
1624e6d15924SDimitry Andric   case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1625e6d15924SDimitry Andric   case 251: return createRegOperand(SRC_VCCZ);
1626e6d15924SDimitry Andric   case 252: return createRegOperand(SRC_EXECZ);
1627e6d15924SDimitry Andric   case 253: return createRegOperand(SRC_SCC);
1628e6d15924SDimitry Andric   case 254: return createRegOperand(LDS_DIRECT);
162901095a5dSDimitry Andric   default: break;
1630e3b55780SDimitry Andric     // clang-format on
163101095a5dSDimitry Andric   }
163201095a5dSDimitry Andric   return errOperand(Val, "unknown operand encoding " + Twine(Val));
163301095a5dSDimitry Andric }
163401095a5dSDimitry Andric 
decodeSpecialReg64(unsigned Val) const163501095a5dSDimitry Andric MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
163601095a5dSDimitry Andric   using namespace AMDGPU;
1637044eb2f6SDimitry Andric 
163801095a5dSDimitry Andric   switch (Val) {
1639044eb2f6SDimitry Andric   case 102: return createRegOperand(FLAT_SCR);
1640eb11fae6SDimitry Andric   case 104: return createRegOperand(XNACK_MASK);
164101095a5dSDimitry Andric   case 106: return createRegOperand(VCC);
1642e6d15924SDimitry Andric   case 108: return createRegOperand(TBA);
1643e6d15924SDimitry Andric   case 110: return createRegOperand(TMA);
1644145449b1SDimitry Andric   case 124:
1645145449b1SDimitry Andric     if (isGFX11Plus())
1646145449b1SDimitry Andric       return createRegOperand(SGPR_NULL);
1647145449b1SDimitry Andric     break;
1648145449b1SDimitry Andric   case 125:
1649145449b1SDimitry Andric     if (!isGFX11Plus())
1650145449b1SDimitry Andric       return createRegOperand(SGPR_NULL);
1651145449b1SDimitry Andric     break;
165201095a5dSDimitry Andric   case 126: return createRegOperand(EXEC);
1653e6d15924SDimitry Andric   case 235: return createRegOperand(SRC_SHARED_BASE);
1654e6d15924SDimitry Andric   case 236: return createRegOperand(SRC_SHARED_LIMIT);
1655e6d15924SDimitry Andric   case 237: return createRegOperand(SRC_PRIVATE_BASE);
1656e6d15924SDimitry Andric   case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
1657e6d15924SDimitry Andric   case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1658e6d15924SDimitry Andric   case 251: return createRegOperand(SRC_VCCZ);
1659e6d15924SDimitry Andric   case 252: return createRegOperand(SRC_EXECZ);
1660e6d15924SDimitry Andric   case 253: return createRegOperand(SRC_SCC);
166101095a5dSDimitry Andric   default: break;
166201095a5dSDimitry Andric   }
166301095a5dSDimitry Andric   return errOperand(Val, "unknown operand encoding " + Twine(Val));
166401095a5dSDimitry Andric }
166501095a5dSDimitry Andric 
1666ac9a064cSDimitry Andric MCOperand
decodeSDWASrc(const OpWidthTy Width,const unsigned Val,unsigned ImmWidth,AMDGPU::OperandSemantics Sema) const1667ac9a064cSDimitry Andric AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, const unsigned Val,
1668ac9a064cSDimitry Andric                                   unsigned ImmWidth,
1669ac9a064cSDimitry Andric                                   AMDGPU::OperandSemantics Sema) const {
1670ab44ce3dSDimitry Andric   using namespace AMDGPU::SDWA;
1671eb11fae6SDimitry Andric   using namespace AMDGPU::EncValues;
1672ab44ce3dSDimitry Andric 
16737fa27ce4SDimitry Andric   if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
16747fa27ce4SDimitry Andric       STI.hasFeature(AMDGPU::FeatureGFX10)) {
1675e6d15924SDimitry Andric     // XXX: cast to int is needed to avoid stupid warning:
16769df3605dSDimitry Andric     // compare with unsigned is always true
1677e6d15924SDimitry Andric     if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
1678ab44ce3dSDimitry Andric         Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1679ab44ce3dSDimitry Andric       return createRegOperand(getVgprClassId(Width),
1680ab44ce3dSDimitry Andric                               Val - SDWA9EncValues::SRC_VGPR_MIN);
1681ab44ce3dSDimitry Andric     }
1682ab44ce3dSDimitry Andric     if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1683b60736ecSDimitry Andric         Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
1684e6d15924SDimitry Andric                               : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
1685ab44ce3dSDimitry Andric       return createSRegOperand(getSgprClassId(Width),
1686ab44ce3dSDimitry Andric                                Val - SDWA9EncValues::SRC_SGPR_MIN);
1687ab44ce3dSDimitry Andric     }
1688044eb2f6SDimitry Andric     if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
1689044eb2f6SDimitry Andric         Val <= SDWA9EncValues::SRC_TTMP_MAX) {
1690044eb2f6SDimitry Andric       return createSRegOperand(getTtmpClassId(Width),
1691044eb2f6SDimitry Andric                                Val - SDWA9EncValues::SRC_TTMP_MIN);
1692044eb2f6SDimitry Andric     }
1693ab44ce3dSDimitry Andric 
1694eb11fae6SDimitry Andric     const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
1695eb11fae6SDimitry Andric 
1696eb11fae6SDimitry Andric     if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
1697eb11fae6SDimitry Andric       return decodeIntImmed(SVal);
1698eb11fae6SDimitry Andric 
1699eb11fae6SDimitry Andric     if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
1700ac9a064cSDimitry Andric       return decodeFPImmed(ImmWidth, SVal, Sema);
1701eb11fae6SDimitry Andric 
1702eb11fae6SDimitry Andric     return decodeSpecialReg32(SVal);
170308bbd35aSDimitry Andric   }
1704ac9a064cSDimitry Andric   if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands))
1705ac9a064cSDimitry Andric     return createRegOperand(getVgprClassId(Width), Val);
170608bbd35aSDimitry Andric   llvm_unreachable("unsupported target");
1707ab44ce3dSDimitry Andric }
1708ab44ce3dSDimitry Andric 
decodeSDWASrc16(unsigned Val) const170908bbd35aSDimitry Andric MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const {
1710ac9a064cSDimitry Andric   return decodeSDWASrc(OPW16, Val, 16, AMDGPU::OperandSemantics::FP16);
1711ab44ce3dSDimitry Andric }
1712ab44ce3dSDimitry Andric 
decodeSDWASrc32(unsigned Val) const171308bbd35aSDimitry Andric MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const {
1714ac9a064cSDimitry Andric   return decodeSDWASrc(OPW32, Val, 32, AMDGPU::OperandSemantics::FP32);
1715ab44ce3dSDimitry Andric }
1716ab44ce3dSDimitry Andric 
decodeSDWAVopcDst(unsigned Val) const171708bbd35aSDimitry Andric MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
1718ab44ce3dSDimitry Andric   using namespace AMDGPU::SDWA;
1719ab44ce3dSDimitry Andric 
17207fa27ce4SDimitry Andric   assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
17217fa27ce4SDimitry Andric           STI.hasFeature(AMDGPU::FeatureGFX10)) &&
1722e6d15924SDimitry Andric          "SDWAVopcDst should be present only on GFX9+");
1723e6d15924SDimitry Andric 
17247fa27ce4SDimitry Andric   bool IsWave64 = STI.hasFeature(AMDGPU::FeatureWavefrontSize64);
1725e6d15924SDimitry Andric 
1726ab44ce3dSDimitry Andric   if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
1727ab44ce3dSDimitry Andric     Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
1728044eb2f6SDimitry Andric 
1729044eb2f6SDimitry Andric     int TTmpIdx = getTTmpIdx(Val);
1730044eb2f6SDimitry Andric     if (TTmpIdx >= 0) {
17311d5ae102SDimitry Andric       auto TTmpClsId = getTtmpClassId(IsWave64 ? OPW64 : OPW32);
17321d5ae102SDimitry Andric       return createSRegOperand(TTmpClsId, TTmpIdx);
1733ac9a064cSDimitry Andric     }
1734ac9a064cSDimitry Andric     if (Val > SGPR_MAX) {
1735ac9a064cSDimitry Andric       return IsWave64 ? decodeSpecialReg64(Val) : decodeSpecialReg32(Val);
1736ac9a064cSDimitry Andric     }
1737e6d15924SDimitry Andric     return createSRegOperand(getSgprClassId(IsWave64 ? OPW64 : OPW32), Val);
1738ab44ce3dSDimitry Andric   }
1739e6d15924SDimitry Andric   return createRegOperand(IsWave64 ? AMDGPU::VCC : AMDGPU::VCC_LO);
1740ab44ce3dSDimitry Andric }
1741ab44ce3dSDimitry Andric 
decodeBoolReg(unsigned Val) const1742e6d15924SDimitry Andric MCOperand AMDGPUDisassembler::decodeBoolReg(unsigned Val) const {
17437fa27ce4SDimitry Andric   return STI.hasFeature(AMDGPU::FeatureWavefrontSize64)
17447fa27ce4SDimitry Andric              ? decodeSrcOp(OPW64, Val)
17457fa27ce4SDimitry Andric              : decodeSrcOp(OPW32, Val);
1746e6d15924SDimitry Andric }
1747e6d15924SDimitry Andric 
decodeSplitBarrier(unsigned Val) const1748312c0ed1SDimitry Andric MCOperand AMDGPUDisassembler::decodeSplitBarrier(unsigned Val) const {
1749312c0ed1SDimitry Andric   return decodeSrcOp(OPW32, Val);
1750312c0ed1SDimitry Andric }
1751312c0ed1SDimitry Andric 
decodeDpp8FI(unsigned Val) const1752ac9a064cSDimitry Andric MCOperand AMDGPUDisassembler::decodeDpp8FI(unsigned Val) const {
1753ac9a064cSDimitry Andric   if (Val != AMDGPU::DPP::DPP8_FI_0 && Val != AMDGPU::DPP::DPP8_FI_1)
1754ac9a064cSDimitry Andric     return MCOperand();
1755ac9a064cSDimitry Andric   return MCOperand::createImm(Val);
1756ac9a064cSDimitry Andric }
1757ac9a064cSDimitry Andric 
decodeVersionImm(unsigned Imm) const1758ac9a064cSDimitry Andric MCOperand AMDGPUDisassembler::decodeVersionImm(unsigned Imm) const {
1759ac9a064cSDimitry Andric   using VersionField = AMDGPU::EncodingField<7, 0>;
1760ac9a064cSDimitry Andric   using W64Bit = AMDGPU::EncodingBit<13>;
1761ac9a064cSDimitry Andric   using W32Bit = AMDGPU::EncodingBit<14>;
1762ac9a064cSDimitry Andric   using MDPBit = AMDGPU::EncodingBit<15>;
1763ac9a064cSDimitry Andric   using Encoding = AMDGPU::EncodingFields<VersionField, W64Bit, W32Bit, MDPBit>;
1764ac9a064cSDimitry Andric 
1765ac9a064cSDimitry Andric   auto [Version, W64, W32, MDP] = Encoding::decode(Imm);
1766ac9a064cSDimitry Andric 
1767ac9a064cSDimitry Andric   // Decode into a plain immediate if any unused bits are raised.
1768ac9a064cSDimitry Andric   if (Encoding::encode(Version, W64, W32, MDP) != Imm)
1769ac9a064cSDimitry Andric     return MCOperand::createImm(Imm);
1770ac9a064cSDimitry Andric 
1771ac9a064cSDimitry Andric   const auto &Versions = AMDGPU::UCVersion::getGFXVersions();
1772ac9a064cSDimitry Andric   auto I = find_if(Versions,
1773ac9a064cSDimitry Andric                    [Version = Version](const AMDGPU::UCVersion::GFXVersion &V) {
1774ac9a064cSDimitry Andric                      return V.Code == Version;
1775ac9a064cSDimitry Andric                    });
1776ac9a064cSDimitry Andric   MCContext &Ctx = getContext();
1777ac9a064cSDimitry Andric   const MCExpr *E;
1778ac9a064cSDimitry Andric   if (I == Versions.end())
1779ac9a064cSDimitry Andric     E = MCConstantExpr::create(Version, Ctx);
1780ac9a064cSDimitry Andric   else
1781ac9a064cSDimitry Andric     E = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(I->Symbol), Ctx);
1782ac9a064cSDimitry Andric 
1783ac9a064cSDimitry Andric   if (W64)
1784ac9a064cSDimitry Andric     E = MCBinaryExpr::createOr(E, UCVersionW64Expr, Ctx);
1785ac9a064cSDimitry Andric   if (W32)
1786ac9a064cSDimitry Andric     E = MCBinaryExpr::createOr(E, UCVersionW32Expr, Ctx);
1787ac9a064cSDimitry Andric   if (MDP)
1788ac9a064cSDimitry Andric     E = MCBinaryExpr::createOr(E, UCVersionMDPExpr, Ctx);
1789ac9a064cSDimitry Andric 
1790ac9a064cSDimitry Andric   return MCOperand::createExpr(E);
1791ac9a064cSDimitry Andric }
1792ac9a064cSDimitry Andric 
isVI() const1793044eb2f6SDimitry Andric bool AMDGPUDisassembler::isVI() const {
17947fa27ce4SDimitry Andric   return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
1795044eb2f6SDimitry Andric }
1796044eb2f6SDimitry Andric 
isGFX9() const1797b60736ecSDimitry Andric bool AMDGPUDisassembler::isGFX9() const { return AMDGPU::isGFX9(STI); }
1798b60736ecSDimitry Andric 
isGFX90A() const1799344a3780SDimitry Andric bool AMDGPUDisassembler::isGFX90A() const {
18007fa27ce4SDimitry Andric   return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
1801344a3780SDimitry Andric }
1802344a3780SDimitry Andric 
isGFX9Plus() const1803b60736ecSDimitry Andric bool AMDGPUDisassembler::isGFX9Plus() const { return AMDGPU::isGFX9Plus(STI); }
1804b60736ecSDimitry Andric 
isGFX10() const1805b60736ecSDimitry Andric bool AMDGPUDisassembler::isGFX10() const { return AMDGPU::isGFX10(STI); }
1806b60736ecSDimitry Andric 
isGFX10Plus() const1807b60736ecSDimitry Andric bool AMDGPUDisassembler::isGFX10Plus() const {
1808b60736ecSDimitry Andric   return AMDGPU::isGFX10Plus(STI);
1809044eb2f6SDimitry Andric }
1810044eb2f6SDimitry Andric 
isGFX11() const1811145449b1SDimitry Andric bool AMDGPUDisassembler::isGFX11() const {
18127fa27ce4SDimitry Andric   return STI.hasFeature(AMDGPU::FeatureGFX11);
1813145449b1SDimitry Andric }
1814145449b1SDimitry Andric 
isGFX11Plus() const1815145449b1SDimitry Andric bool AMDGPUDisassembler::isGFX11Plus() const {
1816145449b1SDimitry Andric   return AMDGPU::isGFX11Plus(STI);
1817145449b1SDimitry Andric }
1818145449b1SDimitry Andric 
isGFX12() const1819ac9a064cSDimitry Andric bool AMDGPUDisassembler::isGFX12() const {
1820ac9a064cSDimitry Andric   return STI.hasFeature(AMDGPU::FeatureGFX12);
1821ac9a064cSDimitry Andric }
1822ac9a064cSDimitry Andric 
isGFX12Plus() const1823b1c73532SDimitry Andric bool AMDGPUDisassembler::isGFX12Plus() const {
1824b1c73532SDimitry Andric   return AMDGPU::isGFX12Plus(STI);
1825b1c73532SDimitry Andric }
1826145449b1SDimitry Andric 
hasArchitectedFlatScratch() const1827344a3780SDimitry Andric bool AMDGPUDisassembler::hasArchitectedFlatScratch() const {
18287fa27ce4SDimitry Andric   return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
1829344a3780SDimitry Andric }
1830344a3780SDimitry Andric 
hasKernargPreload() const1831b1c73532SDimitry Andric bool AMDGPUDisassembler::hasKernargPreload() const {
1832b1c73532SDimitry Andric   return AMDGPU::hasKernargPreload(STI);
1833b1c73532SDimitry Andric }
1834b1c73532SDimitry Andric 
1835b60736ecSDimitry Andric //===----------------------------------------------------------------------===//
1836b60736ecSDimitry Andric // AMDGPU specific symbol handling
1837b60736ecSDimitry Andric //===----------------------------------------------------------------------===//
1838ac9a064cSDimitry Andric 
1839ac9a064cSDimitry Andric /// Print a string describing the reserved bit range specified by Mask with
1840ac9a064cSDimitry Andric /// offset BaseBytes for use in error comments. Mask is a single continuous
1841ac9a064cSDimitry Andric /// range of 1s surrounded by zeros. The format here is meant to align with the
1842ac9a064cSDimitry Andric /// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html.
getBitRangeFromMask(uint32_t Mask,unsigned BaseBytes)1843ac9a064cSDimitry Andric static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) {
1844ac9a064cSDimitry Andric   SmallString<32> Result;
1845ac9a064cSDimitry Andric   raw_svector_ostream S(Result);
1846ac9a064cSDimitry Andric 
1847ac9a064cSDimitry Andric   int TrailingZeros = llvm::countr_zero(Mask);
1848ac9a064cSDimitry Andric   int PopCount = llvm::popcount(Mask);
1849ac9a064cSDimitry Andric 
1850ac9a064cSDimitry Andric   if (PopCount == 1) {
1851ac9a064cSDimitry Andric     S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
1852ac9a064cSDimitry Andric   } else {
1853ac9a064cSDimitry Andric     S << "bits in range ("
1854ac9a064cSDimitry Andric       << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
1855ac9a064cSDimitry Andric       << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
1856ac9a064cSDimitry Andric   }
1857ac9a064cSDimitry Andric 
1858ac9a064cSDimitry Andric   return Result;
1859ac9a064cSDimitry Andric }
1860ac9a064cSDimitry Andric 
18617fa27ce4SDimitry Andric #define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
1862b60736ecSDimitry Andric #define PRINT_DIRECTIVE(DIRECTIVE, MASK)                                       \
1863b60736ecSDimitry Andric   do {                                                                         \
18647fa27ce4SDimitry Andric     KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n';            \
18657fa27ce4SDimitry Andric   } while (0)
18667fa27ce4SDimitry Andric #define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)                        \
18677fa27ce4SDimitry Andric   do {                                                                         \
18687fa27ce4SDimitry Andric     KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " "       \
18697fa27ce4SDimitry Andric              << GET_FIELD(MASK) << '\n';                                       \
1870b60736ecSDimitry Andric   } while (0)
1871b60736ecSDimitry Andric 
1872ac9a064cSDimitry Andric #define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG)                              \
1873ac9a064cSDimitry Andric   do {                                                                         \
1874ac9a064cSDimitry Andric     if (FourByteBuffer & (MASK)) {                                             \
1875ac9a064cSDimitry Andric       return createStringError(std::errc::invalid_argument,                    \
1876ac9a064cSDimitry Andric                                "kernel descriptor " DESC                       \
1877ac9a064cSDimitry Andric                                " reserved %s set" MSG,                         \
1878ac9a064cSDimitry Andric                                getBitRangeFromMask((MASK), 0).c_str());        \
1879ac9a064cSDimitry Andric     }                                                                          \
1880ac9a064cSDimitry Andric   } while (0)
1881ac9a064cSDimitry Andric 
1882ac9a064cSDimitry Andric #define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
1883ac9a064cSDimitry Andric #define CHECK_RESERVED_BITS_MSG(MASK, MSG)                                     \
1884ac9a064cSDimitry Andric   CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
1885ac9a064cSDimitry Andric #define CHECK_RESERVED_BITS_DESC(MASK, DESC)                                   \
1886ac9a064cSDimitry Andric   CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
1887ac9a064cSDimitry Andric #define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)                          \
1888ac9a064cSDimitry Andric   CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
1889ac9a064cSDimitry Andric 
1890b60736ecSDimitry Andric // NOLINTNEXTLINE(readability-identifier-naming)
decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer,raw_string_ostream & KdStream) const1891ac9a064cSDimitry Andric Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
1892b60736ecSDimitry Andric     uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1893b60736ecSDimitry Andric   using namespace amdhsa;
1894b60736ecSDimitry Andric   StringRef Indent = "\t";
1895b60736ecSDimitry Andric 
1896b60736ecSDimitry Andric   // We cannot accurately backward compute #VGPRs used from
1897b60736ecSDimitry Andric   // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
1898b60736ecSDimitry Andric   // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
1899b60736ecSDimitry Andric   // simply calculate the inverse of what the assembler does.
1900b60736ecSDimitry Andric 
1901b60736ecSDimitry Andric   uint32_t GranulatedWorkitemVGPRCount =
19027fa27ce4SDimitry Andric       GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
1903b60736ecSDimitry Andric 
19047fa27ce4SDimitry Andric   uint32_t NextFreeVGPR =
19057fa27ce4SDimitry Andric       (GranulatedWorkitemVGPRCount + 1) *
19067fa27ce4SDimitry Andric       AMDGPU::IsaInfo::getVGPREncodingGranule(&STI, EnableWavefrontSize32);
1907b60736ecSDimitry Andric 
1908b60736ecSDimitry Andric   KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
1909b60736ecSDimitry Andric 
1910b60736ecSDimitry Andric   // We cannot backward compute values used to calculate
1911b60736ecSDimitry Andric   // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
1912b60736ecSDimitry Andric   // directives can't be computed:
1913b60736ecSDimitry Andric   // .amdhsa_reserve_vcc
1914b60736ecSDimitry Andric   // .amdhsa_reserve_flat_scratch
1915b60736ecSDimitry Andric   // .amdhsa_reserve_xnack_mask
1916b60736ecSDimitry Andric   // They take their respective default values if not specified in the assembly.
1917b60736ecSDimitry Andric   //
1918b60736ecSDimitry Andric   // GRANULATED_WAVEFRONT_SGPR_COUNT
1919b60736ecSDimitry Andric   //    = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
1920b60736ecSDimitry Andric   //
1921b60736ecSDimitry Andric   // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
1922b60736ecSDimitry Andric   // are set to 0. So while disassembling we consider that:
1923b60736ecSDimitry Andric   //
1924b60736ecSDimitry Andric   // GRANULATED_WAVEFRONT_SGPR_COUNT
1925b60736ecSDimitry Andric   //    = f(NEXT_FREE_SGPR + 0 + 0 + 0)
1926b60736ecSDimitry Andric   //
1927b60736ecSDimitry Andric   // The disassembler cannot recover the original values of those 3 directives.
1928b60736ecSDimitry Andric 
1929b60736ecSDimitry Andric   uint32_t GranulatedWavefrontSGPRCount =
19307fa27ce4SDimitry Andric       GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
1931b60736ecSDimitry Andric 
1932ac9a064cSDimitry Andric   if (isGFX10Plus())
1933ac9a064cSDimitry Andric     CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
1934ac9a064cSDimitry Andric                             "must be zero on gfx10+");
1935b60736ecSDimitry Andric 
1936b60736ecSDimitry Andric   uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
1937b60736ecSDimitry Andric                           AMDGPU::IsaInfo::getSGPREncodingGranule(&STI);
1938b60736ecSDimitry Andric 
1939b60736ecSDimitry Andric   KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
1940344a3780SDimitry Andric   if (!hasArchitectedFlatScratch())
1941b60736ecSDimitry Andric     KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
1942b60736ecSDimitry Andric   KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';
1943b60736ecSDimitry Andric   KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
1944b60736ecSDimitry Andric 
1945ac9a064cSDimitry Andric   CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
1946b60736ecSDimitry Andric 
1947b60736ecSDimitry Andric   PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
1948b60736ecSDimitry Andric                   COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
1949b60736ecSDimitry Andric   PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
1950b60736ecSDimitry Andric                   COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
1951b60736ecSDimitry Andric   PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
1952b60736ecSDimitry Andric                   COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
1953b60736ecSDimitry Andric   PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
1954b60736ecSDimitry Andric                   COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
1955b60736ecSDimitry Andric 
1956ac9a064cSDimitry Andric   CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
1957b60736ecSDimitry Andric 
1958312c0ed1SDimitry Andric   if (!isGFX12Plus())
1959312c0ed1SDimitry Andric     PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
1960312c0ed1SDimitry Andric                     COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
1961b60736ecSDimitry Andric 
1962ac9a064cSDimitry Andric   CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
1963b60736ecSDimitry Andric 
1964312c0ed1SDimitry Andric   if (!isGFX12Plus())
1965312c0ed1SDimitry Andric     PRINT_DIRECTIVE(".amdhsa_ieee_mode",
1966312c0ed1SDimitry Andric                     COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
1967b60736ecSDimitry Andric 
1968ac9a064cSDimitry Andric   CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
1969ac9a064cSDimitry Andric   CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
1970b60736ecSDimitry Andric 
1971b1c73532SDimitry Andric   if (isGFX9Plus())
1972b1c73532SDimitry Andric     PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
1973b60736ecSDimitry Andric 
1974b1c73532SDimitry Andric   if (!isGFX9Plus())
1975ac9a064cSDimitry Andric     CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
1976ac9a064cSDimitry Andric                                  "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
1977ac9a064cSDimitry Andric 
1978ac9a064cSDimitry Andric   CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED1, "COMPUTE_PGM_RSRC1");
1979ac9a064cSDimitry Andric 
1980b1c73532SDimitry Andric   if (!isGFX10Plus())
1981ac9a064cSDimitry Andric     CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2,
1982ac9a064cSDimitry Andric                                  "COMPUTE_PGM_RSRC1", "must be zero pre-gfx10");
1983b60736ecSDimitry Andric 
1984b60736ecSDimitry Andric   if (isGFX10Plus()) {
1985b60736ecSDimitry Andric     PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
1986b1c73532SDimitry Andric                     COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
1987b1c73532SDimitry Andric     PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
1988b1c73532SDimitry Andric     PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
1989b60736ecSDimitry Andric   }
1990312c0ed1SDimitry Andric 
1991312c0ed1SDimitry Andric   if (isGFX12Plus())
1992312c0ed1SDimitry Andric     PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
1993312c0ed1SDimitry Andric                     COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
1994312c0ed1SDimitry Andric 
1995ac9a064cSDimitry Andric   return true;
1996b60736ecSDimitry Andric }
1997b60736ecSDimitry Andric 
1998b60736ecSDimitry Andric // NOLINTNEXTLINE(readability-identifier-naming)
decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer,raw_string_ostream & KdStream) const1999ac9a064cSDimitry Andric Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2(
2000b60736ecSDimitry Andric     uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2001b60736ecSDimitry Andric   using namespace amdhsa;
2002b60736ecSDimitry Andric   StringRef Indent = "\t";
2003344a3780SDimitry Andric   if (hasArchitectedFlatScratch())
2004344a3780SDimitry Andric     PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
2005344a3780SDimitry Andric                     COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2006344a3780SDimitry Andric   else
2007344a3780SDimitry Andric     PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
2008b60736ecSDimitry Andric                     COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2009b60736ecSDimitry Andric   PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
2010b60736ecSDimitry Andric                   COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2011b60736ecSDimitry Andric   PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
2012b60736ecSDimitry Andric                   COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2013b60736ecSDimitry Andric   PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
2014b60736ecSDimitry Andric                   COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2015b60736ecSDimitry Andric   PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
2016b60736ecSDimitry Andric                   COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2017b60736ecSDimitry Andric   PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
2018b60736ecSDimitry Andric                   COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2019b60736ecSDimitry Andric 
2020ac9a064cSDimitry Andric   CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH);
2021ac9a064cSDimitry Andric   CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY);
2022ac9a064cSDimitry Andric   CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE);
2023b60736ecSDimitry Andric 
2024b60736ecSDimitry Andric   PRINT_DIRECTIVE(
2025b60736ecSDimitry Andric       ".amdhsa_exception_fp_ieee_invalid_op",
2026b60736ecSDimitry Andric       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2027b60736ecSDimitry Andric   PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
2028b60736ecSDimitry Andric                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2029b60736ecSDimitry Andric   PRINT_DIRECTIVE(
2030b60736ecSDimitry Andric       ".amdhsa_exception_fp_ieee_div_zero",
2031b60736ecSDimitry Andric       COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2032b60736ecSDimitry Andric   PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
2033b60736ecSDimitry Andric                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2034b60736ecSDimitry Andric   PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
2035b60736ecSDimitry Andric                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2036b60736ecSDimitry Andric   PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
2037b60736ecSDimitry Andric                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2038b60736ecSDimitry Andric   PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
2039b60736ecSDimitry Andric                   COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2040b60736ecSDimitry Andric 
2041ac9a064cSDimitry Andric   CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2");
2042b60736ecSDimitry Andric 
2043ac9a064cSDimitry Andric   return true;
2044b60736ecSDimitry Andric }
2045b60736ecSDimitry Andric 
20467fa27ce4SDimitry Andric // NOLINTNEXTLINE(readability-identifier-naming)
decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer,raw_string_ostream & KdStream) const2047ac9a064cSDimitry Andric Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
20487fa27ce4SDimitry Andric     uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
20497fa27ce4SDimitry Andric   using namespace amdhsa;
20507fa27ce4SDimitry Andric   StringRef Indent = "\t";
20517fa27ce4SDimitry Andric   if (isGFX90A()) {
20527fa27ce4SDimitry Andric     KdStream << Indent << ".amdhsa_accum_offset "
20537fa27ce4SDimitry Andric              << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
20547fa27ce4SDimitry Andric              << '\n';
2055ac9a064cSDimitry Andric 
20567fa27ce4SDimitry Andric     PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2057ac9a064cSDimitry Andric 
2058ac9a064cSDimitry Andric     CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0,
2059ac9a064cSDimitry Andric                                  "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2060ac9a064cSDimitry Andric     CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1,
2061ac9a064cSDimitry Andric                                  "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
20627fa27ce4SDimitry Andric   } else if (isGFX10Plus()) {
206377dbea07SDimitry Andric     // Bits [0-3].
206477dbea07SDimitry Andric     if (!isGFX12Plus()) {
20657fa27ce4SDimitry Andric       if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
20667fa27ce4SDimitry Andric         PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
206777dbea07SDimitry Andric                         COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
20687fa27ce4SDimitry Andric       } else {
20697fa27ce4SDimitry Andric         PRINT_PSEUDO_DIRECTIVE_COMMENT(
207077dbea07SDimitry Andric             "SHARED_VGPR_COUNT",
207177dbea07SDimitry Andric             COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
20727fa27ce4SDimitry Andric       }
2073b1c73532SDimitry Andric     } else {
2074ac9a064cSDimitry Andric       CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0,
2075ac9a064cSDimitry Andric                                    "COMPUTE_PGM_RSRC3",
2076ac9a064cSDimitry Andric                                    "must be zero on gfx12+");
2077b1c73532SDimitry Andric     }
2078b1c73532SDimitry Andric 
207977dbea07SDimitry Andric     // Bits [4-11].
208077dbea07SDimitry Andric     if (isGFX11()) {
208177dbea07SDimitry Andric       PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
208277dbea07SDimitry Andric                                      COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
208377dbea07SDimitry Andric       PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
208477dbea07SDimitry Andric                                      COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
208577dbea07SDimitry Andric       PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
208677dbea07SDimitry Andric                                      COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
208777dbea07SDimitry Andric     } else if (isGFX12Plus()) {
208877dbea07SDimitry Andric       PRINT_PSEUDO_DIRECTIVE_COMMENT(
208977dbea07SDimitry Andric           "INST_PREF_SIZE", COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
209077dbea07SDimitry Andric     } else {
2091ac9a064cSDimitry Andric       CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
2092ac9a064cSDimitry Andric                                    "COMPUTE_PGM_RSRC3",
2093ac9a064cSDimitry Andric                                    "must be zero on gfx10");
209477dbea07SDimitry Andric     }
209577dbea07SDimitry Andric 
209677dbea07SDimitry Andric     // Bits [12].
2097ac9a064cSDimitry Andric     CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2,
2098ac9a064cSDimitry Andric                                  "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2099b1c73532SDimitry Andric 
210077dbea07SDimitry Andric     // Bits [13].
210177dbea07SDimitry Andric     if (isGFX12Plus()) {
210277dbea07SDimitry Andric       PRINT_PSEUDO_DIRECTIVE_COMMENT("GLG_EN",
210377dbea07SDimitry Andric                                      COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
210477dbea07SDimitry Andric     } else {
2105ac9a064cSDimitry Andric       CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3,
2106ac9a064cSDimitry Andric                                    "COMPUTE_PGM_RSRC3",
2107ac9a064cSDimitry Andric                                    "must be zero on gfx10 or gfx11");
210877dbea07SDimitry Andric     }
210977dbea07SDimitry Andric 
211077dbea07SDimitry Andric     // Bits [14-30].
2111ac9a064cSDimitry Andric     CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED4,
2112ac9a064cSDimitry Andric                                  "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
211377dbea07SDimitry Andric 
211477dbea07SDimitry Andric     // Bits [31].
2115b1c73532SDimitry Andric     if (isGFX11Plus()) {
21167fa27ce4SDimitry Andric       PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
211777dbea07SDimitry Andric                                      COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2118b1c73532SDimitry Andric     } else {
2119ac9a064cSDimitry Andric       CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED5,
2120ac9a064cSDimitry Andric                                    "COMPUTE_PGM_RSRC3",
2121ac9a064cSDimitry Andric                                    "must be zero on gfx10");
2122b1c73532SDimitry Andric     }
21237fa27ce4SDimitry Andric   } else if (FourByteBuffer) {
2124ac9a064cSDimitry Andric     return createStringError(
2125ac9a064cSDimitry Andric         std::errc::invalid_argument,
2126ac9a064cSDimitry Andric         "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
21277fa27ce4SDimitry Andric   }
2128ac9a064cSDimitry Andric   return true;
21297fa27ce4SDimitry Andric }
21307fa27ce4SDimitry Andric #undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2131b60736ecSDimitry Andric #undef PRINT_DIRECTIVE
21327fa27ce4SDimitry Andric #undef GET_FIELD
2133ac9a064cSDimitry Andric #undef CHECK_RESERVED_BITS_IMPL
2134ac9a064cSDimitry Andric #undef CHECK_RESERVED_BITS
2135ac9a064cSDimitry Andric #undef CHECK_RESERVED_BITS_MSG
2136ac9a064cSDimitry Andric #undef CHECK_RESERVED_BITS_DESC
2137ac9a064cSDimitry Andric #undef CHECK_RESERVED_BITS_DESC_MSG
2138b60736ecSDimitry Andric 
2139ac9a064cSDimitry Andric /// Create an error object to return from onSymbolStart for reserved kernel
2140ac9a064cSDimitry Andric /// descriptor bits being set.
createReservedKDBitsError(uint32_t Mask,unsigned BaseBytes,const char * Msg="")2141ac9a064cSDimitry Andric static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes,
2142ac9a064cSDimitry Andric                                        const char *Msg = "") {
2143ac9a064cSDimitry Andric   return createStringError(
2144ac9a064cSDimitry Andric       std::errc::invalid_argument, "kernel descriptor reserved %s set%s%s",
2145ac9a064cSDimitry Andric       getBitRangeFromMask(Mask, BaseBytes).c_str(), *Msg ? ", " : "", Msg);
2146ac9a064cSDimitry Andric }
2147ac9a064cSDimitry Andric 
2148ac9a064cSDimitry Andric /// Create an error object to return from onSymbolStart for reserved kernel
2149ac9a064cSDimitry Andric /// descriptor bytes being set.
createReservedKDBytesError(unsigned BaseInBytes,unsigned WidthInBytes)2150ac9a064cSDimitry Andric static Error createReservedKDBytesError(unsigned BaseInBytes,
2151ac9a064cSDimitry Andric                                         unsigned WidthInBytes) {
2152ac9a064cSDimitry Andric   // Create an error comment in the same format as the "Kernel Descriptor"
2153ac9a064cSDimitry Andric   // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor .
2154ac9a064cSDimitry Andric   return createStringError(
2155ac9a064cSDimitry Andric       std::errc::invalid_argument,
2156ac9a064cSDimitry Andric       "kernel descriptor reserved bits in range (%u:%u) set",
2157ac9a064cSDimitry Andric       (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2158ac9a064cSDimitry Andric }
2159ac9a064cSDimitry Andric 
decodeKernelDescriptorDirective(DataExtractor::Cursor & Cursor,ArrayRef<uint8_t> Bytes,raw_string_ostream & KdStream) const2160ac9a064cSDimitry Andric Expected<bool> AMDGPUDisassembler::decodeKernelDescriptorDirective(
2161b60736ecSDimitry Andric     DataExtractor::Cursor &Cursor, ArrayRef<uint8_t> Bytes,
2162b60736ecSDimitry Andric     raw_string_ostream &KdStream) const {
2163b60736ecSDimitry Andric #define PRINT_DIRECTIVE(DIRECTIVE, MASK)                                       \
2164b60736ecSDimitry Andric   do {                                                                         \
2165b60736ecSDimitry Andric     KdStream << Indent << DIRECTIVE " "                                        \
2166b60736ecSDimitry Andric              << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n';            \
2167b60736ecSDimitry Andric   } while (0)
2168b60736ecSDimitry Andric 
2169b60736ecSDimitry Andric   uint16_t TwoByteBuffer = 0;
2170b60736ecSDimitry Andric   uint32_t FourByteBuffer = 0;
2171b60736ecSDimitry Andric 
2172b60736ecSDimitry Andric   StringRef ReservedBytes;
2173b60736ecSDimitry Andric   StringRef Indent = "\t";
2174b60736ecSDimitry Andric 
2175b60736ecSDimitry Andric   assert(Bytes.size() == 64);
2176b60736ecSDimitry Andric   DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
2177b60736ecSDimitry Andric 
2178b60736ecSDimitry Andric   switch (Cursor.tell()) {
2179b60736ecSDimitry Andric   case amdhsa::GROUP_SEGMENT_FIXED_SIZE_OFFSET:
2180b60736ecSDimitry Andric     FourByteBuffer = DE.getU32(Cursor);
2181b60736ecSDimitry Andric     KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2182b60736ecSDimitry Andric              << '\n';
2183ac9a064cSDimitry Andric     return true;
2184b60736ecSDimitry Andric 
2185b60736ecSDimitry Andric   case amdhsa::PRIVATE_SEGMENT_FIXED_SIZE_OFFSET:
2186b60736ecSDimitry Andric     FourByteBuffer = DE.getU32(Cursor);
2187b60736ecSDimitry Andric     KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2188b60736ecSDimitry Andric              << FourByteBuffer << '\n';
2189ac9a064cSDimitry Andric     return true;
2190b60736ecSDimitry Andric 
2191344a3780SDimitry Andric   case amdhsa::KERNARG_SIZE_OFFSET:
2192344a3780SDimitry Andric     FourByteBuffer = DE.getU32(Cursor);
2193344a3780SDimitry Andric     KdStream << Indent << ".amdhsa_kernarg_size "
2194344a3780SDimitry Andric              << FourByteBuffer << '\n';
2195ac9a064cSDimitry Andric     return true;
2196344a3780SDimitry Andric 
2197b60736ecSDimitry Andric   case amdhsa::RESERVED0_OFFSET:
2198344a3780SDimitry Andric     // 4 reserved bytes, must be 0.
2199344a3780SDimitry Andric     ReservedBytes = DE.getBytes(Cursor, 4);
2200344a3780SDimitry Andric     for (int I = 0; I < 4; ++I) {
2201ac9a064cSDimitry Andric       if (ReservedBytes[I] != 0)
2202ac9a064cSDimitry Andric         return createReservedKDBytesError(amdhsa::RESERVED0_OFFSET, 4);
2203b60736ecSDimitry Andric     }
2204ac9a064cSDimitry Andric     return true;
2205b60736ecSDimitry Andric 
2206b60736ecSDimitry Andric   case amdhsa::KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET:
2207b60736ecSDimitry Andric     // KERNEL_CODE_ENTRY_BYTE_OFFSET
2208b60736ecSDimitry Andric     // So far no directive controls this for Code Object V3, so simply skip for
2209b60736ecSDimitry Andric     // disassembly.
2210b60736ecSDimitry Andric     DE.skip(Cursor, 8);
2211ac9a064cSDimitry Andric     return true;
2212b60736ecSDimitry Andric 
2213b60736ecSDimitry Andric   case amdhsa::RESERVED1_OFFSET:
2214b60736ecSDimitry Andric     // 20 reserved bytes, must be 0.
2215b60736ecSDimitry Andric     ReservedBytes = DE.getBytes(Cursor, 20);
2216b60736ecSDimitry Andric     for (int I = 0; I < 20; ++I) {
2217ac9a064cSDimitry Andric       if (ReservedBytes[I] != 0)
2218ac9a064cSDimitry Andric         return createReservedKDBytesError(amdhsa::RESERVED1_OFFSET, 20);
2219b60736ecSDimitry Andric     }
2220ac9a064cSDimitry Andric     return true;
2221b60736ecSDimitry Andric 
2222b60736ecSDimitry Andric   case amdhsa::COMPUTE_PGM_RSRC3_OFFSET:
2223b60736ecSDimitry Andric     FourByteBuffer = DE.getU32(Cursor);
22247fa27ce4SDimitry Andric     return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2225b60736ecSDimitry Andric 
2226b60736ecSDimitry Andric   case amdhsa::COMPUTE_PGM_RSRC1_OFFSET:
2227b60736ecSDimitry Andric     FourByteBuffer = DE.getU32(Cursor);
22287fa27ce4SDimitry Andric     return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2229b60736ecSDimitry Andric 
2230b60736ecSDimitry Andric   case amdhsa::COMPUTE_PGM_RSRC2_OFFSET:
2231b60736ecSDimitry Andric     FourByteBuffer = DE.getU32(Cursor);
22327fa27ce4SDimitry Andric     return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2233b60736ecSDimitry Andric 
2234b60736ecSDimitry Andric   case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET:
2235b60736ecSDimitry Andric     using namespace amdhsa;
2236b60736ecSDimitry Andric     TwoByteBuffer = DE.getU16(Cursor);
2237b60736ecSDimitry Andric 
2238344a3780SDimitry Andric     if (!hasArchitectedFlatScratch())
2239b60736ecSDimitry Andric       PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2240b60736ecSDimitry Andric                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2241b60736ecSDimitry Andric     PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2242b60736ecSDimitry Andric                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2243b60736ecSDimitry Andric     PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2244b60736ecSDimitry Andric                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2245b60736ecSDimitry Andric     PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2246b60736ecSDimitry Andric                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2247b60736ecSDimitry Andric     PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2248b60736ecSDimitry Andric                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2249344a3780SDimitry Andric     if (!hasArchitectedFlatScratch())
2250b60736ecSDimitry Andric       PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2251b60736ecSDimitry Andric                       KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2252b60736ecSDimitry Andric     PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2253b60736ecSDimitry Andric                     KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2254b60736ecSDimitry Andric 
2255b60736ecSDimitry Andric     if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2256ac9a064cSDimitry Andric       return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED0,
2257ac9a064cSDimitry Andric                                        amdhsa::KERNEL_CODE_PROPERTIES_OFFSET);
2258b60736ecSDimitry Andric 
2259b60736ecSDimitry Andric     // Reserved for GFX9
2260b60736ecSDimitry Andric     if (isGFX9() &&
2261b60736ecSDimitry Andric         (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2262ac9a064cSDimitry Andric       return createReservedKDBitsError(
2263ac9a064cSDimitry Andric           KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2264ac9a064cSDimitry Andric           amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, "must be zero on gfx9");
2265ac9a064cSDimitry Andric     }
2266ac9a064cSDimitry Andric     if (isGFX10Plus()) {
2267b60736ecSDimitry Andric       PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2268b60736ecSDimitry Andric                       KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2269b60736ecSDimitry Andric     }
2270b60736ecSDimitry Andric 
2271ac9a064cSDimitry Andric     if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
22724b4fe385SDimitry Andric       PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
22734b4fe385SDimitry Andric                       KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
22744b4fe385SDimitry Andric 
2275ac9a064cSDimitry Andric     if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2276ac9a064cSDimitry Andric       return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED1,
2277ac9a064cSDimitry Andric                                        amdhsa::KERNEL_CODE_PROPERTIES_OFFSET);
2278ac9a064cSDimitry Andric     }
2279b60736ecSDimitry Andric 
2280ac9a064cSDimitry Andric     return true;
2281b60736ecSDimitry Andric 
2282b1c73532SDimitry Andric   case amdhsa::KERNARG_PRELOAD_OFFSET:
2283b1c73532SDimitry Andric     using namespace amdhsa;
2284b1c73532SDimitry Andric     TwoByteBuffer = DE.getU16(Cursor);
2285b1c73532SDimitry Andric     if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2286b1c73532SDimitry Andric       PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2287b1c73532SDimitry Andric                       KERNARG_PRELOAD_SPEC_LENGTH);
2288b1c73532SDimitry Andric     }
2289b1c73532SDimitry Andric 
2290b1c73532SDimitry Andric     if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2291b1c73532SDimitry Andric       PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2292b1c73532SDimitry Andric                       KERNARG_PRELOAD_SPEC_OFFSET);
2293b1c73532SDimitry Andric     }
2294ac9a064cSDimitry Andric     return true;
2295b1c73532SDimitry Andric 
2296b1c73532SDimitry Andric   case amdhsa::RESERVED3_OFFSET:
2297b1c73532SDimitry Andric     // 4 bytes from here are reserved, must be 0.
2298b1c73532SDimitry Andric     ReservedBytes = DE.getBytes(Cursor, 4);
2299b1c73532SDimitry Andric     for (int I = 0; I < 4; ++I) {
2300b60736ecSDimitry Andric       if (ReservedBytes[I] != 0)
2301ac9a064cSDimitry Andric         return createReservedKDBytesError(amdhsa::RESERVED3_OFFSET, 4);
2302b60736ecSDimitry Andric     }
2303ac9a064cSDimitry Andric     return true;
2304b60736ecSDimitry Andric 
2305b60736ecSDimitry Andric   default:
2306b60736ecSDimitry Andric     llvm_unreachable("Unhandled index. Case statements cover everything.");
2307ac9a064cSDimitry Andric     return true;
2308b60736ecSDimitry Andric   }
2309b60736ecSDimitry Andric #undef PRINT_DIRECTIVE
2310b60736ecSDimitry Andric }
2311b60736ecSDimitry Andric 
decodeKernelDescriptor(StringRef KdName,ArrayRef<uint8_t> Bytes,uint64_t KdAddress) const2312ac9a064cSDimitry Andric Expected<bool> AMDGPUDisassembler::decodeKernelDescriptor(
2313b60736ecSDimitry Andric     StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2314ac9a064cSDimitry Andric 
2315b60736ecSDimitry Andric   // CP microcode requires the kernel descriptor to be 64 aligned.
2316b60736ecSDimitry Andric   if (Bytes.size() != 64 || KdAddress % 64 != 0)
2317ac9a064cSDimitry Andric     return createStringError(std::errc::invalid_argument,
2318ac9a064cSDimitry Andric                              "kernel descriptor must be 64-byte aligned");
2319b60736ecSDimitry Andric 
23207fa27ce4SDimitry Andric   // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
23217fa27ce4SDimitry Andric   // requires us to know the setting of .amdhsa_wavefront_size32 in order to
23227fa27ce4SDimitry Andric   // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
23237fa27ce4SDimitry Andric   // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
23247fa27ce4SDimitry Andric   // when required.
23257fa27ce4SDimitry Andric   if (isGFX10Plus()) {
23267fa27ce4SDimitry Andric     uint16_t KernelCodeProperties =
23277fa27ce4SDimitry Andric         support::endian::read16(&Bytes[amdhsa::KERNEL_CODE_PROPERTIES_OFFSET],
2328b1c73532SDimitry Andric                                 llvm::endianness::little);
23297fa27ce4SDimitry Andric     EnableWavefrontSize32 =
23307fa27ce4SDimitry Andric         AMDHSA_BITS_GET(KernelCodeProperties,
23317fa27ce4SDimitry Andric                         amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
23327fa27ce4SDimitry Andric   }
23337fa27ce4SDimitry Andric 
2334b60736ecSDimitry Andric   std::string Kd;
2335b60736ecSDimitry Andric   raw_string_ostream KdStream(Kd);
2336b60736ecSDimitry Andric   KdStream << ".amdhsa_kernel " << KdName << '\n';
2337b60736ecSDimitry Andric 
2338b60736ecSDimitry Andric   DataExtractor::Cursor C(0);
2339b60736ecSDimitry Andric   while (C && C.tell() < Bytes.size()) {
2340ac9a064cSDimitry Andric     Expected<bool> Res = decodeKernelDescriptorDirective(C, Bytes, KdStream);
2341b60736ecSDimitry Andric 
2342b60736ecSDimitry Andric     cantFail(C.takeError());
2343b60736ecSDimitry Andric 
2344ac9a064cSDimitry Andric     if (!Res)
2345ac9a064cSDimitry Andric       return Res;
2346b60736ecSDimitry Andric   }
2347b60736ecSDimitry Andric   KdStream << ".end_amdhsa_kernel\n";
2348b60736ecSDimitry Andric   outs() << KdStream.str();
2349ac9a064cSDimitry Andric   return true;
2350b60736ecSDimitry Andric }
2351b60736ecSDimitry Andric 
onSymbolStart(SymbolInfoTy & Symbol,uint64_t & Size,ArrayRef<uint8_t> Bytes,uint64_t Address) const2352ac9a064cSDimitry Andric Expected<bool> AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol,
2353ac9a064cSDimitry Andric                                                  uint64_t &Size,
2354ac9a064cSDimitry Andric                                                  ArrayRef<uint8_t> Bytes,
2355ac9a064cSDimitry Andric                                                  uint64_t Address) const {
2356b60736ecSDimitry Andric   // Right now only kernel descriptor needs to be handled.
2357b60736ecSDimitry Andric   // We ignore all other symbols for target specific handling.
2358b60736ecSDimitry Andric   // TODO:
2359b60736ecSDimitry Andric   // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2360b60736ecSDimitry Andric   // Object V2 and V3 when symbols are marked protected.
2361b60736ecSDimitry Andric 
2362b60736ecSDimitry Andric   // amd_kernel_code_t for Code Object V2.
2363b60736ecSDimitry Andric   if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2364b60736ecSDimitry Andric     Size = 256;
2365ac9a064cSDimitry Andric     return createStringError(std::errc::invalid_argument,
2366ac9a064cSDimitry Andric                              "code object v2 is not supported");
2367b60736ecSDimitry Andric   }
2368b60736ecSDimitry Andric 
2369b60736ecSDimitry Andric   // Code Object V3 kernel descriptors.
2370b60736ecSDimitry Andric   StringRef Name = Symbol.Name;
2371312c0ed1SDimitry Andric   if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(StringRef(".kd"))) {
2372b60736ecSDimitry Andric     Size = 64; // Size = 64 regardless of success or failure.
2373b60736ecSDimitry Andric     return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
2374b60736ecSDimitry Andric   }
2375ac9a064cSDimitry Andric 
2376ac9a064cSDimitry Andric   return false;
2377ac9a064cSDimitry Andric }
2378ac9a064cSDimitry Andric 
createConstantSymbolExpr(StringRef Id,int64_t Val)2379ac9a064cSDimitry Andric const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id,
2380ac9a064cSDimitry Andric                                                            int64_t Val) {
2381ac9a064cSDimitry Andric   MCContext &Ctx = getContext();
2382ac9a064cSDimitry Andric   MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2383ac9a064cSDimitry Andric   // Note: only set value to Val on a new symbol in case an dissassembler
2384ac9a064cSDimitry Andric   // has already been initialized in this context.
2385ac9a064cSDimitry Andric   if (!Sym->isVariable()) {
2386ac9a064cSDimitry Andric     Sym->setVariableValue(MCConstantExpr::create(Val, Ctx));
2387ac9a064cSDimitry Andric   } else {
2388ac9a064cSDimitry Andric     int64_t Res = ~Val;
2389ac9a064cSDimitry Andric     bool Valid = Sym->getVariableValue()->evaluateAsAbsolute(Res);
2390ac9a064cSDimitry Andric     if (!Valid || Res != Val)
2391ac9a064cSDimitry Andric       Ctx.reportWarning(SMLoc(), "unsupported redefinition of " + Id);
2392ac9a064cSDimitry Andric   }
2393ac9a064cSDimitry Andric   return MCSymbolRefExpr::create(Sym, Ctx);
2394e6d15924SDimitry Andric }
2395e6d15924SDimitry Andric 
2396b915e9e0SDimitry Andric //===----------------------------------------------------------------------===//
2397b915e9e0SDimitry Andric // AMDGPUSymbolizer
2398b915e9e0SDimitry Andric //===----------------------------------------------------------------------===//
2399b915e9e0SDimitry Andric 
2400b915e9e0SDimitry Andric // Try to find symbol name for specified label
tryAddingSymbolicOperand(MCInst & Inst,raw_ostream &,int64_t Value,uint64_t,bool IsBranch,uint64_t,uint64_t,uint64_t)2401145449b1SDimitry Andric bool AMDGPUSymbolizer::tryAddingSymbolicOperand(
2402145449b1SDimitry Andric     MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2403145449b1SDimitry Andric     uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2404145449b1SDimitry Andric     uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2405b915e9e0SDimitry Andric 
2406b915e9e0SDimitry Andric   if (!IsBranch) {
2407b915e9e0SDimitry Andric     return false;
2408b915e9e0SDimitry Andric   }
2409b915e9e0SDimitry Andric 
2410b915e9e0SDimitry Andric   auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2411eb11fae6SDimitry Andric   if (!Symbols)
2412eb11fae6SDimitry Andric     return false;
2413eb11fae6SDimitry Andric 
2414b60736ecSDimitry Andric   auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
2415b60736ecSDimitry Andric     return Val.Addr == static_cast<uint64_t>(Value) &&
2416b60736ecSDimitry Andric            Val.Type == ELF::STT_NOTYPE;
2417b915e9e0SDimitry Andric   });
2418b915e9e0SDimitry Andric   if (Result != Symbols->end()) {
2419cfca06d7SDimitry Andric     auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
2420b915e9e0SDimitry Andric     const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
2421b915e9e0SDimitry Andric     Inst.addOperand(MCOperand::createExpr(Add));
2422b915e9e0SDimitry Andric     return true;
2423b915e9e0SDimitry Andric   }
2424344a3780SDimitry Andric   // Add to list of referenced addresses, so caller can synthesize a label.
2425344a3780SDimitry Andric   ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
2426b915e9e0SDimitry Andric   return false;
2427b915e9e0SDimitry Andric }
2428b915e9e0SDimitry Andric 
tryAddingPcLoadReferenceComment(raw_ostream & cStream,int64_t Value,uint64_t Address)2429b915e9e0SDimitry Andric void AMDGPUSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
2430b915e9e0SDimitry Andric                                                        int64_t Value,
2431b915e9e0SDimitry Andric                                                        uint64_t Address) {
2432b915e9e0SDimitry Andric   llvm_unreachable("unimplemented");
2433b915e9e0SDimitry Andric }
2434b915e9e0SDimitry Andric 
2435b915e9e0SDimitry Andric //===----------------------------------------------------------------------===//
2436b915e9e0SDimitry Andric // Initialization
2437b915e9e0SDimitry Andric //===----------------------------------------------------------------------===//
2438b915e9e0SDimitry Andric 
createAMDGPUSymbolizer(const Triple &,LLVMOpInfoCallback,LLVMSymbolLookupCallback,void * DisInfo,MCContext * Ctx,std::unique_ptr<MCRelocationInfo> && RelInfo)2439b915e9e0SDimitry Andric static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/,
2440b915e9e0SDimitry Andric                               LLVMOpInfoCallback /*GetOpInfo*/,
2441b915e9e0SDimitry Andric                               LLVMSymbolLookupCallback /*SymbolLookUp*/,
2442b915e9e0SDimitry Andric                               void *DisInfo,
2443b915e9e0SDimitry Andric                               MCContext *Ctx,
2444b915e9e0SDimitry Andric                               std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2445b915e9e0SDimitry Andric   return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2446b915e9e0SDimitry Andric }
2447b915e9e0SDimitry Andric 
createAMDGPUDisassembler(const Target & T,const MCSubtargetInfo & STI,MCContext & Ctx)244801095a5dSDimitry Andric static MCDisassembler *createAMDGPUDisassembler(const Target &T,
244901095a5dSDimitry Andric                                                 const MCSubtargetInfo &STI,
245001095a5dSDimitry Andric                                                 MCContext &Ctx) {
2451044eb2f6SDimitry Andric   return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
245201095a5dSDimitry Andric }
245301095a5dSDimitry Andric 
LLVMInitializeAMDGPUDisassembler()2454706b4fc4SDimitry Andric extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler() {
2455b915e9e0SDimitry Andric   TargetRegistry::RegisterMCDisassembler(getTheGCNTarget(),
2456b915e9e0SDimitry Andric                                          createAMDGPUDisassembler);
2457b915e9e0SDimitry Andric   TargetRegistry::RegisterMCSymbolizer(getTheGCNTarget(),
2458b915e9e0SDimitry Andric                                        createAMDGPUSymbolizer);
245901095a5dSDimitry Andric }
2460