1044eb2f6SDimitry Andric //===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
201095a5dSDimitry Andric //
3e6d15924SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e6d15924SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e6d15924SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
601095a5dSDimitry Andric //
701095a5dSDimitry Andric //===----------------------------------------------------------------------===//
801095a5dSDimitry Andric //
901095a5dSDimitry Andric //===----------------------------------------------------------------------===//
1001095a5dSDimitry Andric //
1101095a5dSDimitry Andric /// \file
1201095a5dSDimitry Andric ///
1301095a5dSDimitry Andric /// This file contains definition for AMDGPU ISA disassembler
1401095a5dSDimitry Andric //
1501095a5dSDimitry Andric //===----------------------------------------------------------------------===//
1601095a5dSDimitry Andric
1701095a5dSDimitry Andric // ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
1801095a5dSDimitry Andric
19044eb2f6SDimitry Andric #include "Disassembler/AMDGPUDisassembler.h"
20eb11fae6SDimitry Andric #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
21145449b1SDimitry Andric #include "SIDefines.h"
22145449b1SDimitry Andric #include "SIRegisterInfo.h"
23e6d15924SDimitry Andric #include "TargetInfo/AMDGPUTargetInfo.h"
24ac9a064cSDimitry Andric #include "Utils/AMDGPUAsmUtils.h"
2501095a5dSDimitry Andric #include "Utils/AMDGPUBaseInfo.h"
26b60736ecSDimitry Andric #include "llvm-c/DisassemblerTypes.h"
27145449b1SDimitry Andric #include "llvm/BinaryFormat/ELF.h"
28e6d15924SDimitry Andric #include "llvm/MC/MCAsmInfo.h"
2901095a5dSDimitry Andric #include "llvm/MC/MCContext.h"
30145449b1SDimitry Andric #include "llvm/MC/MCDecoderOps.h"
31044eb2f6SDimitry Andric #include "llvm/MC/MCExpr.h"
32c0981da4SDimitry Andric #include "llvm/MC/MCInstrDesc.h"
33145449b1SDimitry Andric #include "llvm/MC/MCRegisterInfo.h"
34145449b1SDimitry Andric #include "llvm/MC/MCSubtargetInfo.h"
35145449b1SDimitry Andric #include "llvm/MC/TargetRegistry.h"
36b60736ecSDimitry Andric #include "llvm/Support/AMDHSAKernelDescriptor.h"
3701095a5dSDimitry Andric
3801095a5dSDimitry Andric using namespace llvm;
3901095a5dSDimitry Andric
4001095a5dSDimitry Andric #define DEBUG_TYPE "amdgpu-disassembler"
4101095a5dSDimitry Andric
42b60736ecSDimitry Andric #define SGPR_MAX \
43b60736ecSDimitry Andric (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
44e6d15924SDimitry Andric : AMDGPU::EncValues::SGPR_MAX_SI)
45e6d15924SDimitry Andric
46044eb2f6SDimitry Andric using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
4701095a5dSDimitry Andric
addDefaultWaveSize(const MCSubtargetInfo & STI,MCContext & Ctx)48ac9a064cSDimitry Andric static const MCSubtargetInfo &addDefaultWaveSize(const MCSubtargetInfo &STI,
49ac9a064cSDimitry Andric MCContext &Ctx) {
50ac9a064cSDimitry Andric if (!STI.hasFeature(AMDGPU::FeatureWavefrontSize64) &&
51ac9a064cSDimitry Andric !STI.hasFeature(AMDGPU::FeatureWavefrontSize32)) {
52ac9a064cSDimitry Andric MCSubtargetInfo &STICopy = Ctx.getSubtargetCopy(STI);
53ac9a064cSDimitry Andric // If there is no default wave size it must be a generation before gfx10,
54ac9a064cSDimitry Andric // these have FeatureWavefrontSize64 in their definition already. For gfx10+
55ac9a064cSDimitry Andric // set wave32 as a default.
56ac9a064cSDimitry Andric STICopy.ToggleFeature(AMDGPU::FeatureWavefrontSize32);
57ac9a064cSDimitry Andric return STICopy;
58ac9a064cSDimitry Andric }
59ac9a064cSDimitry Andric
60ac9a064cSDimitry Andric return STI;
61ac9a064cSDimitry Andric }
62ac9a064cSDimitry Andric
AMDGPUDisassembler(const MCSubtargetInfo & STI,MCContext & Ctx,MCInstrInfo const * MCII)63e6d15924SDimitry Andric AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
647fa27ce4SDimitry Andric MCContext &Ctx, MCInstrInfo const *MCII)
65ac9a064cSDimitry Andric : MCDisassembler(addDefaultWaveSize(STI, Ctx), Ctx), MCII(MCII),
66ac9a064cSDimitry Andric MRI(*Ctx.getRegisterInfo()), MAI(*Ctx.getAsmInfo()),
67ac9a064cSDimitry Andric TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
68ac9a064cSDimitry Andric CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
69e6d15924SDimitry Andric // ToDo: AMDGPUDisassembler supports only VI ISA.
707fa27ce4SDimitry Andric if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
71e6d15924SDimitry Andric report_fatal_error("Disassembly not yet supported for subtarget");
72ac9a064cSDimitry Andric
73ac9a064cSDimitry Andric for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
74ac9a064cSDimitry Andric createConstantSymbolExpr(Symbol, Code);
75ac9a064cSDimitry Andric
76ac9a064cSDimitry Andric UCVersionW64Expr = createConstantSymbolExpr("UC_VERSION_W64_BIT", 0x2000);
77ac9a064cSDimitry Andric UCVersionW32Expr = createConstantSymbolExpr("UC_VERSION_W32_BIT", 0x4000);
78ac9a064cSDimitry Andric UCVersionMDPExpr = createConstantSymbolExpr("UC_VERSION_MDP_BIT", 0x8000);
79ac9a064cSDimitry Andric }
80ac9a064cSDimitry Andric
setABIVersion(unsigned Version)81ac9a064cSDimitry Andric void AMDGPUDisassembler::setABIVersion(unsigned Version) {
82ac9a064cSDimitry Andric CodeObjectVersion = AMDGPU::getAMDHSACodeObjectVersion(Version);
83e6d15924SDimitry Andric }
84e6d15924SDimitry Andric
8501095a5dSDimitry Andric inline static MCDisassembler::DecodeStatus
addOperand(MCInst & Inst,const MCOperand & Opnd)8601095a5dSDimitry Andric addOperand(MCInst &Inst, const MCOperand& Opnd) {
8701095a5dSDimitry Andric Inst.addOperand(Opnd);
8801095a5dSDimitry Andric return Opnd.isValid() ?
8901095a5dSDimitry Andric MCDisassembler::Success :
90706b4fc4SDimitry Andric MCDisassembler::Fail;
9101095a5dSDimitry Andric }
9201095a5dSDimitry Andric
insertNamedMCOperand(MCInst & MI,const MCOperand & Op,uint16_t NameIdx)9308bbd35aSDimitry Andric static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op,
9408bbd35aSDimitry Andric uint16_t NameIdx) {
9508bbd35aSDimitry Andric int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), NameIdx);
9608bbd35aSDimitry Andric if (OpIdx != -1) {
9708bbd35aSDimitry Andric auto I = MI.begin();
9808bbd35aSDimitry Andric std::advance(I, OpIdx);
9908bbd35aSDimitry Andric MI.insert(I, Op);
10008bbd35aSDimitry Andric }
10108bbd35aSDimitry Andric return OpIdx;
10208bbd35aSDimitry Andric }
10308bbd35aSDimitry Andric
decodeSOPPBrTarget(MCInst & Inst,unsigned Imm,uint64_t Addr,const MCDisassembler * Decoder)1047fa27ce4SDimitry Andric static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
105145449b1SDimitry Andric uint64_t Addr,
106145449b1SDimitry Andric const MCDisassembler *Decoder) {
107b915e9e0SDimitry Andric auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
108b915e9e0SDimitry Andric
109e6d15924SDimitry Andric // Our branches take a simm16, but we need two extra bits to account for the
110e6d15924SDimitry Andric // factor of 4.
111b915e9e0SDimitry Andric APInt SignedOffset(18, Imm * 4, true);
112b915e9e0SDimitry Andric int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue();
113b915e9e0SDimitry Andric
114145449b1SDimitry Andric if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
115b915e9e0SDimitry Andric return MCDisassembler::Success;
116b915e9e0SDimitry Andric return addOperand(Inst, MCOperand::createImm(Imm));
117b915e9e0SDimitry Andric }
118b915e9e0SDimitry Andric
decodeSMEMOffset(MCInst & Inst,unsigned Imm,uint64_t Addr,const MCDisassembler * Decoder)119145449b1SDimitry Andric static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
120145449b1SDimitry Andric const MCDisassembler *Decoder) {
121cfca06d7SDimitry Andric auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
122cfca06d7SDimitry Andric int64_t Offset;
123b1c73532SDimitry Andric if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
124b1c73532SDimitry Andric Offset = SignExtend64<24>(Imm);
125b1c73532SDimitry Andric } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
126cfca06d7SDimitry Andric Offset = Imm & 0xFFFFF;
127cfca06d7SDimitry Andric } else { // GFX9+ supports 21-bit signed offsets.
128cfca06d7SDimitry Andric Offset = SignExtend64<21>(Imm);
129cfca06d7SDimitry Andric }
130cfca06d7SDimitry Andric return addOperand(Inst, MCOperand::createImm(Offset));
131cfca06d7SDimitry Andric }
132cfca06d7SDimitry Andric
decodeBoolReg(MCInst & Inst,unsigned Val,uint64_t Addr,const MCDisassembler * Decoder)133145449b1SDimitry Andric static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
134145449b1SDimitry Andric const MCDisassembler *Decoder) {
135e6d15924SDimitry Andric auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
136e6d15924SDimitry Andric return addOperand(Inst, DAsm->decodeBoolReg(Val));
137e6d15924SDimitry Andric }
138e6d15924SDimitry Andric
decodeSplitBarrier(MCInst & Inst,unsigned Val,uint64_t Addr,const MCDisassembler * Decoder)139312c0ed1SDimitry Andric static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
140312c0ed1SDimitry Andric uint64_t Addr,
141312c0ed1SDimitry Andric const MCDisassembler *Decoder) {
142312c0ed1SDimitry Andric auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
143312c0ed1SDimitry Andric return addOperand(Inst, DAsm->decodeSplitBarrier(Val));
144312c0ed1SDimitry Andric }
145312c0ed1SDimitry Andric
decodeDpp8FI(MCInst & Inst,unsigned Val,uint64_t Addr,const MCDisassembler * Decoder)146ac9a064cSDimitry Andric static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
147ac9a064cSDimitry Andric const MCDisassembler *Decoder) {
148ac9a064cSDimitry Andric auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
149ac9a064cSDimitry Andric return addOperand(Inst, DAsm->decodeDpp8FI(Val));
150ac9a064cSDimitry Andric }
151ac9a064cSDimitry Andric
152ab44ce3dSDimitry Andric #define DECODE_OPERAND(StaticDecoderName, DecoderName) \
153145449b1SDimitry Andric static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
15401095a5dSDimitry Andric uint64_t /*Addr*/, \
155145449b1SDimitry Andric const MCDisassembler *Decoder) { \
15601095a5dSDimitry Andric auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
157ab44ce3dSDimitry Andric return addOperand(Inst, DAsm->DecoderName(Imm)); \
15801095a5dSDimitry Andric }
15901095a5dSDimitry Andric
1607fa27ce4SDimitry Andric // Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
1617fa27ce4SDimitry Andric // number of register. Used by VGPR only and AGPR only operands.
1627fa27ce4SDimitry Andric #define DECODE_OPERAND_REG_8(RegClass) \
1637fa27ce4SDimitry Andric static DecodeStatus Decode##RegClass##RegisterClass( \
1647fa27ce4SDimitry Andric MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
1657fa27ce4SDimitry Andric const MCDisassembler *Decoder) { \
1667fa27ce4SDimitry Andric assert(Imm < (1 << 8) && "8-bit encoding"); \
1677fa27ce4SDimitry Andric auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
1687fa27ce4SDimitry Andric return addOperand( \
1697fa27ce4SDimitry Andric Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
170b915e9e0SDimitry Andric }
171b915e9e0SDimitry Andric
1727fa27ce4SDimitry Andric #define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm, MandatoryLiteral, \
1737fa27ce4SDimitry Andric ImmWidth) \
1747fa27ce4SDimitry Andric static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
1757fa27ce4SDimitry Andric const MCDisassembler *Decoder) { \
1767fa27ce4SDimitry Andric assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
1777fa27ce4SDimitry Andric auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
1787fa27ce4SDimitry Andric return addOperand(Inst, \
1797fa27ce4SDimitry Andric DAsm->decodeSrcOp(AMDGPUDisassembler::OpWidth, EncImm, \
1807fa27ce4SDimitry Andric MandatoryLiteral, ImmWidth)); \
18171d5a254SDimitry Andric }
18201095a5dSDimitry Andric
decodeSrcOp(MCInst & Inst,unsigned EncSize,AMDGPUDisassembler::OpWidthTy OpWidth,unsigned Imm,unsigned EncImm,bool MandatoryLiteral,unsigned ImmWidth,AMDGPU::OperandSemantics Sema,const MCDisassembler * Decoder)183ac9a064cSDimitry Andric static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
184ac9a064cSDimitry Andric AMDGPUDisassembler::OpWidthTy OpWidth,
185ac9a064cSDimitry Andric unsigned Imm, unsigned EncImm,
186ac9a064cSDimitry Andric bool MandatoryLiteral, unsigned ImmWidth,
187ac9a064cSDimitry Andric AMDGPU::OperandSemantics Sema,
188ac9a064cSDimitry Andric const MCDisassembler *Decoder) {
189ac9a064cSDimitry Andric assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
190ac9a064cSDimitry Andric auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
191ac9a064cSDimitry Andric return addOperand(Inst, DAsm->decodeSrcOp(OpWidth, EncImm, MandatoryLiteral,
192ac9a064cSDimitry Andric ImmWidth, Sema));
193ac9a064cSDimitry Andric }
194ac9a064cSDimitry Andric
1957fa27ce4SDimitry Andric // Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
1967fa27ce4SDimitry Andric // get register class. Used by SGPR only operands.
1977fa27ce4SDimitry Andric #define DECODE_OPERAND_REG_7(RegClass, OpWidth) \
1987fa27ce4SDimitry Andric DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm, false, 0)
199344a3780SDimitry Andric
2007fa27ce4SDimitry Andric // Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
2017fa27ce4SDimitry Andric // Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
2027fa27ce4SDimitry Andric // Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
2037fa27ce4SDimitry Andric // Used by AV_ register classes (AGPR or VGPR only register operands).
204ac9a064cSDimitry Andric template <AMDGPUDisassembler::OpWidthTy OpWidth>
decodeAV10(MCInst & Inst,unsigned Imm,uint64_t,const MCDisassembler * Decoder)205ac9a064cSDimitry Andric static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
206ac9a064cSDimitry Andric const MCDisassembler *Decoder) {
207ac9a064cSDimitry Andric return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm | AMDGPU::EncValues::IS_VGPR,
208ac9a064cSDimitry Andric false, 0, AMDGPU::OperandSemantics::INT, Decoder);
209ac9a064cSDimitry Andric }
210e6d15924SDimitry Andric
2117fa27ce4SDimitry Andric // Decoder for Src(9-bit encoding) registers only.
212ac9a064cSDimitry Andric template <AMDGPUDisassembler::OpWidthTy OpWidth>
decodeSrcReg9(MCInst & Inst,unsigned Imm,uint64_t,const MCDisassembler * Decoder)213ac9a064cSDimitry Andric static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
214ac9a064cSDimitry Andric uint64_t /* Addr */,
215ac9a064cSDimitry Andric const MCDisassembler *Decoder) {
216ac9a064cSDimitry Andric return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, 0,
217ac9a064cSDimitry Andric AMDGPU::OperandSemantics::INT, Decoder);
218ac9a064cSDimitry Andric }
219e6d15924SDimitry Andric
2207fa27ce4SDimitry Andric // Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
2217fa27ce4SDimitry Andric // Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
2227fa27ce4SDimitry Andric // only.
223ac9a064cSDimitry Andric template <AMDGPUDisassembler::OpWidthTy OpWidth>
decodeSrcA9(MCInst & Inst,unsigned Imm,uint64_t,const MCDisassembler * Decoder)224ac9a064cSDimitry Andric static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
225ac9a064cSDimitry Andric const MCDisassembler *Decoder) {
226ac9a064cSDimitry Andric return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, 0,
227ac9a064cSDimitry Andric AMDGPU::OperandSemantics::INT, Decoder);
228ac9a064cSDimitry Andric }
229344a3780SDimitry Andric
2307fa27ce4SDimitry Andric // Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
2317fa27ce4SDimitry Andric // Imm{9} is acc, registers only.
232ac9a064cSDimitry Andric template <AMDGPUDisassembler::OpWidthTy OpWidth>
decodeSrcAV10(MCInst & Inst,unsigned Imm,uint64_t,const MCDisassembler * Decoder)233ac9a064cSDimitry Andric static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
234ac9a064cSDimitry Andric uint64_t /* Addr */,
235ac9a064cSDimitry Andric const MCDisassembler *Decoder) {
236ac9a064cSDimitry Andric return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, false, 0,
237ac9a064cSDimitry Andric AMDGPU::OperandSemantics::INT, Decoder);
238ac9a064cSDimitry Andric }
239e6d15924SDimitry Andric
2407fa27ce4SDimitry Andric // Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
2417fa27ce4SDimitry Andric // register from RegClass or immediate. Registers that don't belong to RegClass
2427fa27ce4SDimitry Andric // will be decoded and InstPrinter will report warning. Immediate will be
2437fa27ce4SDimitry Andric // decoded into constant of size ImmWidth, should match width of immediate used
2447fa27ce4SDimitry Andric // by OperandType (important for floating point types).
245ac9a064cSDimitry Andric template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
246ac9a064cSDimitry Andric unsigned OperandSemantics>
decodeSrcRegOrImm9(MCInst & Inst,unsigned Imm,uint64_t,const MCDisassembler * Decoder)247ac9a064cSDimitry Andric static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
248ac9a064cSDimitry Andric uint64_t /* Addr */,
249ac9a064cSDimitry Andric const MCDisassembler *Decoder) {
250ac9a064cSDimitry Andric return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, false, ImmWidth,
251ac9a064cSDimitry Andric (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
252ac9a064cSDimitry Andric }
253aca2e42cSDimitry Andric
2547fa27ce4SDimitry Andric // Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
2557fa27ce4SDimitry Andric // and decode using 'enum10' from decodeSrcOp.
256ac9a064cSDimitry Andric template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
257ac9a064cSDimitry Andric unsigned OperandSemantics>
decodeSrcRegOrImmA9(MCInst & Inst,unsigned Imm,uint64_t,const MCDisassembler * Decoder)258ac9a064cSDimitry Andric static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
259ac9a064cSDimitry Andric uint64_t /* Addr */,
260ac9a064cSDimitry Andric const MCDisassembler *Decoder) {
261ac9a064cSDimitry Andric return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, false, ImmWidth,
262ac9a064cSDimitry Andric (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
263ac9a064cSDimitry Andric }
264e6d15924SDimitry Andric
265ac9a064cSDimitry Andric template <AMDGPUDisassembler::OpWidthTy OpWidth, unsigned ImmWidth,
266ac9a064cSDimitry Andric unsigned OperandSemantics>
decodeSrcRegOrImmDeferred9(MCInst & Inst,unsigned Imm,uint64_t,const MCDisassembler * Decoder)267ac9a064cSDimitry Andric static DecodeStatus decodeSrcRegOrImmDeferred9(MCInst &Inst, unsigned Imm,
268ac9a064cSDimitry Andric uint64_t /* Addr */,
269ac9a064cSDimitry Andric const MCDisassembler *Decoder) {
270ac9a064cSDimitry Andric return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, true, ImmWidth,
271ac9a064cSDimitry Andric (AMDGPU::OperandSemantics)OperandSemantics, Decoder);
272ac9a064cSDimitry Andric }
273e6d15924SDimitry Andric
2747fa27ce4SDimitry Andric // Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
2757fa27ce4SDimitry Andric // when RegisterClass is used as an operand. Most often used for destination
2767fa27ce4SDimitry Andric // operands.
277344a3780SDimitry Andric
2787fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(VGPR_32)
DECODE_OPERAND_REG_8(VGPR_32_Lo128)2797fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(VGPR_32_Lo128)
2807fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(VReg_64)
2817fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(VReg_96)
2827fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(VReg_128)
2837fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(VReg_256)
2847fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(VReg_288)
2857fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(VReg_352)
2867fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(VReg_384)
2877fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(VReg_512)
2887fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(VReg_1024)
289344a3780SDimitry Andric
2907fa27ce4SDimitry Andric DECODE_OPERAND_REG_7(SReg_32, OPW32)
291312c0ed1SDimitry Andric DECODE_OPERAND_REG_7(SReg_32_XEXEC, OPW32)
2927fa27ce4SDimitry Andric DECODE_OPERAND_REG_7(SReg_32_XM0_XEXEC, OPW32)
2937fa27ce4SDimitry Andric DECODE_OPERAND_REG_7(SReg_32_XEXEC_HI, OPW32)
2947fa27ce4SDimitry Andric DECODE_OPERAND_REG_7(SReg_64, OPW64)
2957fa27ce4SDimitry Andric DECODE_OPERAND_REG_7(SReg_64_XEXEC, OPW64)
296312c0ed1SDimitry Andric DECODE_OPERAND_REG_7(SReg_96, OPW96)
2977fa27ce4SDimitry Andric DECODE_OPERAND_REG_7(SReg_128, OPW128)
2987fa27ce4SDimitry Andric DECODE_OPERAND_REG_7(SReg_256, OPW256)
2997fa27ce4SDimitry Andric DECODE_OPERAND_REG_7(SReg_512, OPW512)
300344a3780SDimitry Andric
3017fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(AGPR_32)
3027fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(AReg_64)
3037fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(AReg_128)
3047fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(AReg_256)
3057fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(AReg_512)
3067fa27ce4SDimitry Andric DECODE_OPERAND_REG_8(AReg_1024)
307344a3780SDimitry Andric
308b1c73532SDimitry Andric static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm,
309b1c73532SDimitry Andric uint64_t /*Addr*/,
310b1c73532SDimitry Andric const MCDisassembler *Decoder) {
311b1c73532SDimitry Andric assert(isUInt<10>(Imm) && "10-bit encoding expected");
312b1c73532SDimitry Andric assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
313b1c73532SDimitry Andric
314b1c73532SDimitry Andric bool IsHi = Imm & (1 << 9);
315b1c73532SDimitry Andric unsigned RegIdx = Imm & 0xff;
316b1c73532SDimitry Andric auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
317b1c73532SDimitry Andric return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
318b1c73532SDimitry Andric }
319b1c73532SDimitry Andric
320b1c73532SDimitry Andric static DecodeStatus
DecodeVGPR_16_Lo128RegisterClass(MCInst & Inst,unsigned Imm,uint64_t,const MCDisassembler * Decoder)321b1c73532SDimitry Andric DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/,
322b1c73532SDimitry Andric const MCDisassembler *Decoder) {
323b1c73532SDimitry Andric assert(isUInt<8>(Imm) && "8-bit encoding expected");
324b1c73532SDimitry Andric
325b1c73532SDimitry Andric bool IsHi = Imm & (1 << 7);
326b1c73532SDimitry Andric unsigned RegIdx = Imm & 0x7f;
327b1c73532SDimitry Andric auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
328b1c73532SDimitry Andric return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
329b1c73532SDimitry Andric }
330b1c73532SDimitry Andric
decodeOperand_VSrcT16_Lo128(MCInst & Inst,unsigned Imm,uint64_t,const MCDisassembler * Decoder)331b1c73532SDimitry Andric static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm,
332b1c73532SDimitry Andric uint64_t /*Addr*/,
333b1c73532SDimitry Andric const MCDisassembler *Decoder) {
334b1c73532SDimitry Andric assert(isUInt<9>(Imm) && "9-bit encoding expected");
335b1c73532SDimitry Andric
336b1c73532SDimitry Andric const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
337b1c73532SDimitry Andric bool IsVGPR = Imm & (1 << 8);
338b1c73532SDimitry Andric if (IsVGPR) {
339b1c73532SDimitry Andric bool IsHi = Imm & (1 << 7);
340b1c73532SDimitry Andric unsigned RegIdx = Imm & 0x7f;
341b1c73532SDimitry Andric return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
342b1c73532SDimitry Andric }
343b1c73532SDimitry Andric return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
344b1c73532SDimitry Andric Imm & 0xFF, false, 16));
345b1c73532SDimitry Andric }
346b1c73532SDimitry Andric
decodeOperand_VSrcT16(MCInst & Inst,unsigned Imm,uint64_t,const MCDisassembler * Decoder)347b1c73532SDimitry Andric static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
348b1c73532SDimitry Andric uint64_t /*Addr*/,
349b1c73532SDimitry Andric const MCDisassembler *Decoder) {
350b1c73532SDimitry Andric assert(isUInt<10>(Imm) && "10-bit encoding expected");
351b1c73532SDimitry Andric
352b1c73532SDimitry Andric const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
353b1c73532SDimitry Andric bool IsVGPR = Imm & (1 << 8);
354b1c73532SDimitry Andric if (IsVGPR) {
355b1c73532SDimitry Andric bool IsHi = Imm & (1 << 9);
356b1c73532SDimitry Andric unsigned RegIdx = Imm & 0xff;
357b1c73532SDimitry Andric return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
358b1c73532SDimitry Andric }
359b1c73532SDimitry Andric return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
360b1c73532SDimitry Andric Imm & 0xFF, false, 16));
361b1c73532SDimitry Andric }
3627fa27ce4SDimitry Andric
decodeOperand_KImmFP(MCInst & Inst,unsigned Imm,uint64_t Addr,const MCDisassembler * Decoder)3637fa27ce4SDimitry Andric static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
364145449b1SDimitry Andric uint64_t Addr,
365145449b1SDimitry Andric const MCDisassembler *Decoder) {
366c0981da4SDimitry Andric const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
367c0981da4SDimitry Andric return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
368c0981da4SDimitry Andric }
369c0981da4SDimitry Andric
decodeOperandVOPDDstY(MCInst & Inst,unsigned Val,uint64_t Addr,const void * Decoder)370145449b1SDimitry Andric static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
371145449b1SDimitry Andric uint64_t Addr, const void *Decoder) {
372145449b1SDimitry Andric const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
373145449b1SDimitry Andric return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
374145449b1SDimitry Andric }
375145449b1SDimitry Andric
IsAGPROperand(const MCInst & Inst,int OpIdx,const MCRegisterInfo * MRI)376344a3780SDimitry Andric static bool IsAGPROperand(const MCInst &Inst, int OpIdx,
377344a3780SDimitry Andric const MCRegisterInfo *MRI) {
378344a3780SDimitry Andric if (OpIdx < 0)
379344a3780SDimitry Andric return false;
380344a3780SDimitry Andric
381344a3780SDimitry Andric const MCOperand &Op = Inst.getOperand(OpIdx);
382344a3780SDimitry Andric if (!Op.isReg())
383344a3780SDimitry Andric return false;
384344a3780SDimitry Andric
385344a3780SDimitry Andric unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
386344a3780SDimitry Andric auto Reg = Sub ? Sub : Op.getReg();
387344a3780SDimitry Andric return Reg >= AMDGPU::AGPR0 && Reg <= AMDGPU::AGPR255;
388344a3780SDimitry Andric }
389344a3780SDimitry Andric
decodeAVLdSt(MCInst & Inst,unsigned Imm,AMDGPUDisassembler::OpWidthTy Opw,const MCDisassembler * Decoder)390ac9a064cSDimitry Andric static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
391344a3780SDimitry Andric AMDGPUDisassembler::OpWidthTy Opw,
392145449b1SDimitry Andric const MCDisassembler *Decoder) {
393344a3780SDimitry Andric auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
394344a3780SDimitry Andric if (!DAsm->isGFX90A()) {
395344a3780SDimitry Andric Imm &= 511;
396344a3780SDimitry Andric } else {
397344a3780SDimitry Andric // If atomic has both vdata and vdst their register classes are tied.
398344a3780SDimitry Andric // The bit is decoded along with the vdst, first operand. We need to
399344a3780SDimitry Andric // change register class to AGPR if vdst was AGPR.
400344a3780SDimitry Andric // If a DS instruction has both data0 and data1 their register classes
401344a3780SDimitry Andric // are also tied.
402344a3780SDimitry Andric unsigned Opc = Inst.getOpcode();
403344a3780SDimitry Andric uint64_t TSFlags = DAsm->getMCII()->get(Opc).TSFlags;
404344a3780SDimitry Andric uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
405344a3780SDimitry Andric : AMDGPU::OpName::vdata;
406344a3780SDimitry Andric const MCRegisterInfo *MRI = DAsm->getContext().getRegisterInfo();
407344a3780SDimitry Andric int DataIdx = AMDGPU::getNamedOperandIdx(Opc, DataNameIdx);
408344a3780SDimitry Andric if ((int)Inst.getNumOperands() == DataIdx) {
409344a3780SDimitry Andric int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
410344a3780SDimitry Andric if (IsAGPROperand(Inst, DstIdx, MRI))
411344a3780SDimitry Andric Imm |= 512;
412344a3780SDimitry Andric }
413344a3780SDimitry Andric
414344a3780SDimitry Andric if (TSFlags & SIInstrFlags::DS) {
415344a3780SDimitry Andric int Data2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1);
416344a3780SDimitry Andric if ((int)Inst.getNumOperands() == Data2Idx &&
417344a3780SDimitry Andric IsAGPROperand(Inst, DataIdx, MRI))
418344a3780SDimitry Andric Imm |= 512;
419344a3780SDimitry Andric }
420344a3780SDimitry Andric }
421344a3780SDimitry Andric return addOperand(Inst, DAsm->decodeSrcOp(Opw, Imm | 256));
422344a3780SDimitry Andric }
423344a3780SDimitry Andric
424ac9a064cSDimitry Andric template <AMDGPUDisassembler::OpWidthTy Opw>
decodeAVLdSt(MCInst & Inst,unsigned Imm,uint64_t,const MCDisassembler * Decoder)425ac9a064cSDimitry Andric static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
426ac9a064cSDimitry Andric uint64_t /* Addr */,
427ac9a064cSDimitry Andric const MCDisassembler *Decoder) {
428ac9a064cSDimitry Andric return decodeAVLdSt(Inst, Imm, Opw, Decoder);
429ac9a064cSDimitry Andric }
430ac9a064cSDimitry Andric
decodeOperand_VSrc_f64(MCInst & Inst,unsigned Imm,uint64_t Addr,const MCDisassembler * Decoder)431b1c73532SDimitry Andric static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
432b1c73532SDimitry Andric uint64_t Addr,
433b1c73532SDimitry Andric const MCDisassembler *Decoder) {
434b1c73532SDimitry Andric assert(Imm < (1 << 9) && "9-bit encoding");
435b1c73532SDimitry Andric auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
436ac9a064cSDimitry Andric return addOperand(Inst,
437ac9a064cSDimitry Andric DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm, false, 64,
438ac9a064cSDimitry Andric AMDGPU::OperandSemantics::FP64));
439e3b55780SDimitry Andric }
440e3b55780SDimitry Andric
44108bbd35aSDimitry Andric #define DECODE_SDWA(DecName) \
44208bbd35aSDimitry Andric DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
443ab44ce3dSDimitry Andric
44408bbd35aSDimitry Andric DECODE_SDWA(Src32)
DECODE_SDWA(Src16)44508bbd35aSDimitry Andric DECODE_SDWA(Src16)
44608bbd35aSDimitry Andric DECODE_SDWA(VopcDst)
447ab44ce3dSDimitry Andric
448ac9a064cSDimitry Andric static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm,
449ac9a064cSDimitry Andric uint64_t /* Addr */,
450ac9a064cSDimitry Andric const MCDisassembler *Decoder) {
451ac9a064cSDimitry Andric auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
452ac9a064cSDimitry Andric return addOperand(Inst, DAsm->decodeVersionImm(Imm));
453ac9a064cSDimitry Andric }
454ac9a064cSDimitry Andric
45501095a5dSDimitry Andric #include "AMDGPUGenDisassemblerTables.inc"
45601095a5dSDimitry Andric
45701095a5dSDimitry Andric //===----------------------------------------------------------------------===//
45801095a5dSDimitry Andric //
45901095a5dSDimitry Andric //===----------------------------------------------------------------------===//
46001095a5dSDimitry Andric
eatBytes(ArrayRef<uint8_t> & Bytes)46101095a5dSDimitry Andric template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
46201095a5dSDimitry Andric assert(Bytes.size() >= sizeof(T));
463b1c73532SDimitry Andric const auto Res =
464b1c73532SDimitry Andric support::endian::read<T, llvm::endianness::little>(Bytes.data());
46501095a5dSDimitry Andric Bytes = Bytes.slice(sizeof(T));
46601095a5dSDimitry Andric return Res;
46701095a5dSDimitry Andric }
46801095a5dSDimitry Andric
eat12Bytes(ArrayRef<uint8_t> & Bytes)469145449b1SDimitry Andric static inline DecoderUInt128 eat12Bytes(ArrayRef<uint8_t> &Bytes) {
470145449b1SDimitry Andric assert(Bytes.size() >= 12);
471b1c73532SDimitry Andric uint64_t Lo =
472b1c73532SDimitry Andric support::endian::read<uint64_t, llvm::endianness::little>(Bytes.data());
473145449b1SDimitry Andric Bytes = Bytes.slice(8);
474b1c73532SDimitry Andric uint64_t Hi =
475b1c73532SDimitry Andric support::endian::read<uint32_t, llvm::endianness::little>(Bytes.data());
476145449b1SDimitry Andric Bytes = Bytes.slice(4);
477145449b1SDimitry Andric return DecoderUInt128(Lo, Hi);
47801095a5dSDimitry Andric }
47901095a5dSDimitry Andric
getInstruction(MCInst & MI,uint64_t & Size,ArrayRef<uint8_t> Bytes_,uint64_t Address,raw_ostream & CS) const48001095a5dSDimitry Andric DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
48101095a5dSDimitry Andric ArrayRef<uint8_t> Bytes_,
48201095a5dSDimitry Andric uint64_t Address,
48301095a5dSDimitry Andric raw_ostream &CS) const {
484e6d15924SDimitry Andric unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
48501095a5dSDimitry Andric Bytes = Bytes_.slice(0, MaxInstBytesNum);
48601095a5dSDimitry Andric
487ac9a064cSDimitry Andric // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
488ac9a064cSDimitry Andric // there are fewer bytes left). This will be overridden on success.
489ac9a064cSDimitry Andric Size = std::min((size_t)4, Bytes_.size());
490ac9a064cSDimitry Andric
49101095a5dSDimitry Andric do {
49201095a5dSDimitry Andric // ToDo: better to switch encoding length using some bit predicate
49301095a5dSDimitry Andric // but it is unknown yet, so try all we can
49401095a5dSDimitry Andric
49501095a5dSDimitry Andric // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
49601095a5dSDimitry Andric // encodings
497145449b1SDimitry Andric if (isGFX11Plus() && Bytes.size() >= 12 ) {
498145449b1SDimitry Andric DecoderUInt128 DecW = eat12Bytes(Bytes);
499312c0ed1SDimitry Andric
500ac9a064cSDimitry Andric if (isGFX11() &&
501ac9a064cSDimitry Andric tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
502ac9a064cSDimitry Andric DecW, Address, CS))
503145449b1SDimitry Andric break;
504b1c73532SDimitry Andric
505ac9a064cSDimitry Andric if (isGFX12() &&
506ac9a064cSDimitry Andric tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
507ac9a064cSDimitry Andric DecW, Address, CS))
508b1c73532SDimitry Andric break;
5094df029ccSDimitry Andric
510ac9a064cSDimitry Andric if (isGFX12() &&
511ac9a064cSDimitry Andric tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
5124df029ccSDimitry Andric break;
513ac9a064cSDimitry Andric
514145449b1SDimitry Andric // Reinitialize Bytes
515145449b1SDimitry Andric Bytes = Bytes_.slice(0, MaxInstBytesNum);
516ac9a064cSDimitry Andric }
517145449b1SDimitry Andric
51801095a5dSDimitry Andric if (Bytes.size() >= 8) {
51901095a5dSDimitry Andric const uint64_t QW = eatBytes<uint64_t>(Bytes);
520e6d15924SDimitry Andric
521ac9a064cSDimitry Andric if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
522ac9a064cSDimitry Andric tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS))
523cfca06d7SDimitry Andric break;
524ac9a064cSDimitry Andric
525ac9a064cSDimitry Andric if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
526ac9a064cSDimitry Andric tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS))
527cfca06d7SDimitry Andric break;
528eb11fae6SDimitry Andric
529eb11fae6SDimitry Andric // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
530eb11fae6SDimitry Andric // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
531eb11fae6SDimitry Andric // table first so we print the correct name.
532ac9a064cSDimitry Andric if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
533ac9a064cSDimitry Andric tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS))
534eb11fae6SDimitry Andric break;
53501095a5dSDimitry Andric
536ac9a064cSDimitry Andric if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
537ac9a064cSDimitry Andric tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS))
538ac9a064cSDimitry Andric break;
539ac9a064cSDimitry Andric
540ac9a064cSDimitry Andric if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
541ac9a064cSDimitry Andric tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
542ac9a064cSDimitry Andric break;
543ac9a064cSDimitry Andric
544ac9a064cSDimitry Andric if ((isVI() || isGFX9()) &&
545ac9a064cSDimitry Andric tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
546ac9a064cSDimitry Andric break;
547ac9a064cSDimitry Andric
548ac9a064cSDimitry Andric if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
549ac9a064cSDimitry Andric break;
550ac9a064cSDimitry Andric
551ac9a064cSDimitry Andric if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
552ac9a064cSDimitry Andric break;
553ac9a064cSDimitry Andric
554ac9a064cSDimitry Andric if (isGFX12() &&
555ac9a064cSDimitry Andric tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
556ac9a064cSDimitry Andric Address, CS))
557ac9a064cSDimitry Andric break;
558ac9a064cSDimitry Andric
559ac9a064cSDimitry Andric if (isGFX11() &&
560ac9a064cSDimitry Andric tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
561ac9a064cSDimitry Andric Address, CS))
562ac9a064cSDimitry Andric break;
563ac9a064cSDimitry Andric
564ac9a064cSDimitry Andric if (isGFX11() &&
565ac9a064cSDimitry Andric tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
566ac9a064cSDimitry Andric break;
567ac9a064cSDimitry Andric
568ac9a064cSDimitry Andric if (isGFX12() &&
569ac9a064cSDimitry Andric tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
570ac9a064cSDimitry Andric break;
571ac9a064cSDimitry Andric
572ac9a064cSDimitry Andric // Reinitialize Bytes
57301095a5dSDimitry Andric Bytes = Bytes_.slice(0, MaxInstBytesNum);
574ac9a064cSDimitry Andric }
57501095a5dSDimitry Andric
57601095a5dSDimitry Andric // Try decode 32-bit instruction
577ac9a064cSDimitry Andric if (Bytes.size() >= 4) {
57801095a5dSDimitry Andric const uint32_t DW = eatBytes<uint32_t>(Bytes);
57901095a5dSDimitry Andric
580ac9a064cSDimitry Andric if ((isVI() || isGFX9()) &&
581ac9a064cSDimitry Andric tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
582ac9a064cSDimitry Andric break;
58301095a5dSDimitry Andric
584ac9a064cSDimitry Andric if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
585ac9a064cSDimitry Andric break;
586044eb2f6SDimitry Andric
587ac9a064cSDimitry Andric if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
588ac9a064cSDimitry Andric break;
589ac9a064cSDimitry Andric
590ac9a064cSDimitry Andric if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
591ac9a064cSDimitry Andric tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS))
592ac9a064cSDimitry Andric break;
593ac9a064cSDimitry Andric
594ac9a064cSDimitry Andric if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
595ac9a064cSDimitry Andric tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
596ac9a064cSDimitry Andric break;
597ac9a064cSDimitry Andric
598ac9a064cSDimitry Andric if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
599ac9a064cSDimitry Andric break;
600ac9a064cSDimitry Andric
601ac9a064cSDimitry Andric if (isGFX11() &&
602ac9a064cSDimitry Andric tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
603ac9a064cSDimitry Andric Address, CS))
604ac9a064cSDimitry Andric break;
605ac9a064cSDimitry Andric
606ac9a064cSDimitry Andric if (isGFX12() &&
607ac9a064cSDimitry Andric tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
608ac9a064cSDimitry Andric Address, CS))
609344a3780SDimitry Andric break;
610344a3780SDimitry Andric }
611344a3780SDimitry Andric
612ac9a064cSDimitry Andric return MCDisassembler::Fail;
61301095a5dSDimitry Andric } while (false);
61401095a5dSDimitry Andric
615ac9a064cSDimitry Andric if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
616ac9a064cSDimitry Andric if (isMacDPP(MI))
617ac9a064cSDimitry Andric convertMacDPPInst(MI);
618ac9a064cSDimitry Andric
619ac9a064cSDimitry Andric if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
620ac9a064cSDimitry Andric convertVOP3PDPPInst(MI);
621ac9a064cSDimitry Andric else if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC) ||
622ac9a064cSDimitry Andric AMDGPU::isVOPC64DPP(MI.getOpcode()))
623ac9a064cSDimitry Andric convertVOPCDPPInst(MI); // Special VOP3 case
624ac9a064cSDimitry Andric else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) !=
625ac9a064cSDimitry Andric -1)
626ac9a064cSDimitry Andric convertDPP8Inst(MI);
627ac9a064cSDimitry Andric else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
628ac9a064cSDimitry Andric convertVOP3DPPInst(MI); // Regular VOP3 case
629ac9a064cSDimitry Andric }
630ac9a064cSDimitry Andric
631ac9a064cSDimitry Andric if (AMDGPU::isMAC(MI.getOpcode())) {
63271d5a254SDimitry Andric // Insert dummy unused src2_modifiers.
63308bbd35aSDimitry Andric insertNamedMCOperand(MI, MCOperand::createImm(0),
63471d5a254SDimitry Andric AMDGPU::OpName::src2_modifiers);
63571d5a254SDimitry Andric }
63671d5a254SDimitry Andric
637ac9a064cSDimitry Andric if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
638ac9a064cSDimitry Andric MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
639ac9a064cSDimitry Andric // Insert dummy unused src2_modifiers.
640ac9a064cSDimitry Andric insertNamedMCOperand(MI, MCOperand::createImm(0),
641ac9a064cSDimitry Andric AMDGPU::OpName::src2_modifiers);
642ac9a064cSDimitry Andric }
643ac9a064cSDimitry Andric
644ac9a064cSDimitry Andric if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
64599aabd70SDimitry Andric !AMDGPU::hasGDS(STI)) {
64699aabd70SDimitry Andric insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
64799aabd70SDimitry Andric }
64899aabd70SDimitry Andric
649ac9a064cSDimitry Andric if (MCII->get(MI.getOpcode()).TSFlags &
650ac9a064cSDimitry Andric (SIInstrFlags::MUBUF | SIInstrFlags::FLAT | SIInstrFlags::SMRD)) {
651344a3780SDimitry Andric int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
652344a3780SDimitry Andric AMDGPU::OpName::cpol);
653344a3780SDimitry Andric if (CPolPos != -1) {
654344a3780SDimitry Andric unsigned CPol =
655344a3780SDimitry Andric (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
656344a3780SDimitry Andric AMDGPU::CPol::GLC : 0;
657344a3780SDimitry Andric if (MI.getNumOperands() <= (unsigned)CPolPos) {
658344a3780SDimitry Andric insertNamedMCOperand(MI, MCOperand::createImm(CPol),
659344a3780SDimitry Andric AMDGPU::OpName::cpol);
660344a3780SDimitry Andric } else if (CPol) {
661344a3780SDimitry Andric MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
662344a3780SDimitry Andric }
663344a3780SDimitry Andric }
664344a3780SDimitry Andric }
665344a3780SDimitry Andric
666ac9a064cSDimitry Andric if ((MCII->get(MI.getOpcode()).TSFlags &
667344a3780SDimitry Andric (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) &&
6687fa27ce4SDimitry Andric (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
669344a3780SDimitry Andric // GFX90A lost TFE, its place is occupied by ACC.
670344a3780SDimitry Andric int TFEOpIdx =
671344a3780SDimitry Andric AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
672344a3780SDimitry Andric if (TFEOpIdx != -1) {
673344a3780SDimitry Andric auto TFEIter = MI.begin();
674344a3780SDimitry Andric std::advance(TFEIter, TFEOpIdx);
675344a3780SDimitry Andric MI.insert(TFEIter, MCOperand::createImm(0));
676344a3780SDimitry Andric }
677344a3780SDimitry Andric }
678344a3780SDimitry Andric
679ac9a064cSDimitry Andric if (MCII->get(MI.getOpcode()).TSFlags &
680ac9a064cSDimitry Andric (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) {
681344a3780SDimitry Andric int SWZOpIdx =
682344a3780SDimitry Andric AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
683344a3780SDimitry Andric if (SWZOpIdx != -1) {
684344a3780SDimitry Andric auto SWZIter = MI.begin();
685344a3780SDimitry Andric std::advance(SWZIter, SWZOpIdx);
686344a3780SDimitry Andric MI.insert(SWZIter, MCOperand::createImm(0));
687344a3780SDimitry Andric }
688b60736ecSDimitry Andric }
689b60736ecSDimitry Andric
690ac9a064cSDimitry Andric if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::MIMG) {
691e6d15924SDimitry Andric int VAddr0Idx =
692e6d15924SDimitry Andric AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
693e6d15924SDimitry Andric int RsrcIdx =
694e6d15924SDimitry Andric AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
695e6d15924SDimitry Andric unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
696e6d15924SDimitry Andric if (VAddr0Idx >= 0 && NSAArgs > 0) {
697e6d15924SDimitry Andric unsigned NSAWords = (NSAArgs + 3) / 4;
698ac9a064cSDimitry Andric if (Bytes.size() < 4 * NSAWords)
699ac9a064cSDimitry Andric return MCDisassembler::Fail;
700e6d15924SDimitry Andric for (unsigned i = 0; i < NSAArgs; ++i) {
701145449b1SDimitry Andric const unsigned VAddrIdx = VAddr0Idx + 1 + i;
702e3b55780SDimitry Andric auto VAddrRCID =
703e3b55780SDimitry Andric MCII->get(MI.getOpcode()).operands()[VAddrIdx].RegClass;
704ac9a064cSDimitry Andric MI.insert(MI.begin() + VAddrIdx, createRegOperand(VAddrRCID, Bytes[i]));
705e6d15924SDimitry Andric }
706e6d15924SDimitry Andric Bytes = Bytes.slice(4 * NSAWords);
707e6d15924SDimitry Andric }
708ac9a064cSDimitry Andric
709ac9a064cSDimitry Andric convertMIMGInst(MI);
710e6d15924SDimitry Andric }
711e6d15924SDimitry Andric
712ac9a064cSDimitry Andric if (MCII->get(MI.getOpcode()).TSFlags &
713ac9a064cSDimitry Andric (SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE))
714ac9a064cSDimitry Andric convertMIMGInst(MI);
715044eb2f6SDimitry Andric
716ac9a064cSDimitry Andric if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
717ac9a064cSDimitry Andric convertEXPInst(MI);
718b1c73532SDimitry Andric
719ac9a064cSDimitry Andric if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
720ac9a064cSDimitry Andric convertVINTERPInst(MI);
721145449b1SDimitry Andric
722ac9a064cSDimitry Andric if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
723ac9a064cSDimitry Andric convertSDWAInst(MI);
72408bbd35aSDimitry Andric
725e6d15924SDimitry Andric int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
726e6d15924SDimitry Andric AMDGPU::OpName::vdst_in);
727e6d15924SDimitry Andric if (VDstIn_Idx != -1) {
728e6d15924SDimitry Andric int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
729e6d15924SDimitry Andric MCOI::OperandConstraint::TIED_TO);
730e6d15924SDimitry Andric if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
731e6d15924SDimitry Andric !MI.getOperand(VDstIn_Idx).isReg() ||
732e6d15924SDimitry Andric MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
733e6d15924SDimitry Andric if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
734e6d15924SDimitry Andric MI.erase(&MI.getOperand(VDstIn_Idx));
735e6d15924SDimitry Andric insertNamedMCOperand(MI,
736e6d15924SDimitry Andric MCOperand::createReg(MI.getOperand(Tied).getReg()),
737e6d15924SDimitry Andric AMDGPU::OpName::vdst_in);
738e6d15924SDimitry Andric }
739e6d15924SDimitry Andric }
740e6d15924SDimitry Andric
741c0981da4SDimitry Andric int ImmLitIdx =
742c0981da4SDimitry Andric AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::imm);
743e3b55780SDimitry Andric bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
744ac9a064cSDimitry Andric if (ImmLitIdx != -1 && !IsSOPK)
745ac9a064cSDimitry Andric convertFMAanyK(MI, ImmLitIdx);
746c0981da4SDimitry Andric
747ac9a064cSDimitry Andric Size = MaxInstBytesNum - Bytes.size();
748ac9a064cSDimitry Andric return MCDisassembler::Success;
74901095a5dSDimitry Andric }
75001095a5dSDimitry Andric
convertEXPInst(MCInst & MI) const751ac9a064cSDimitry Andric void AMDGPUDisassembler::convertEXPInst(MCInst &MI) const {
752b1c73532SDimitry Andric if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
753145449b1SDimitry Andric // The MCInst still has these fields even though they are no longer encoded
754145449b1SDimitry Andric // in the GFX11 instruction.
755145449b1SDimitry Andric insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
756145449b1SDimitry Andric insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
757145449b1SDimitry Andric }
758145449b1SDimitry Andric }
759145449b1SDimitry Andric
convertVINTERPInst(MCInst & MI) const760ac9a064cSDimitry Andric void AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const {
761145449b1SDimitry Andric if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 ||
762b1c73532SDimitry Andric MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx12 ||
763145449b1SDimitry Andric MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 ||
764b1c73532SDimitry Andric MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx12 ||
765145449b1SDimitry Andric MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx11 ||
766b1c73532SDimitry Andric MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx12 ||
767b1c73532SDimitry Andric MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11 ||
768b1c73532SDimitry Andric MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx12) {
769145449b1SDimitry Andric // The MCInst has this field that is not directly encoded in the
770145449b1SDimitry Andric // instruction.
771145449b1SDimitry Andric insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
772145449b1SDimitry Andric }
773145449b1SDimitry Andric }
774145449b1SDimitry Andric
convertSDWAInst(MCInst & MI) const775ac9a064cSDimitry Andric void AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
7767fa27ce4SDimitry Andric if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
7777fa27ce4SDimitry Andric STI.hasFeature(AMDGPU::FeatureGFX10)) {
778e3b55780SDimitry Andric if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
77908bbd35aSDimitry Andric // VOPC - insert clamp
78008bbd35aSDimitry Andric insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
7817fa27ce4SDimitry Andric } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
78208bbd35aSDimitry Andric int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
78308bbd35aSDimitry Andric if (SDst != -1) {
78408bbd35aSDimitry Andric // VOPC - insert VCC register as sdst
785044eb2f6SDimitry Andric insertNamedMCOperand(MI, createRegOperand(AMDGPU::VCC),
78608bbd35aSDimitry Andric AMDGPU::OpName::sdst);
78708bbd35aSDimitry Andric } else {
78808bbd35aSDimitry Andric // VOP1/2 - insert omod if present in instruction
78908bbd35aSDimitry Andric insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
79008bbd35aSDimitry Andric }
79108bbd35aSDimitry Andric }
79208bbd35aSDimitry Andric }
79308bbd35aSDimitry Andric
7944b4fe385SDimitry Andric struct VOPModifiers {
7954b4fe385SDimitry Andric unsigned OpSel = 0;
7964b4fe385SDimitry Andric unsigned OpSelHi = 0;
7974b4fe385SDimitry Andric unsigned NegLo = 0;
7984b4fe385SDimitry Andric unsigned NegHi = 0;
7994b4fe385SDimitry Andric };
8004b4fe385SDimitry Andric
8014b4fe385SDimitry Andric // Reconstruct values of VOP3/VOP3P operands such as op_sel.
8024b4fe385SDimitry Andric // Note that these values do not affect disassembler output,
8034b4fe385SDimitry Andric // so this is only necessary for consistency with src_modifiers.
collectVOPModifiers(const MCInst & MI,bool IsVOP3P=false)8044b4fe385SDimitry Andric static VOPModifiers collectVOPModifiers(const MCInst &MI,
8054b4fe385SDimitry Andric bool IsVOP3P = false) {
8064b4fe385SDimitry Andric VOPModifiers Modifiers;
8074b4fe385SDimitry Andric unsigned Opc = MI.getOpcode();
8084b4fe385SDimitry Andric const int ModOps[] = {AMDGPU::OpName::src0_modifiers,
8094b4fe385SDimitry Andric AMDGPU::OpName::src1_modifiers,
8104b4fe385SDimitry Andric AMDGPU::OpName::src2_modifiers};
8114b4fe385SDimitry Andric for (int J = 0; J < 3; ++J) {
8124b4fe385SDimitry Andric int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
8134b4fe385SDimitry Andric if (OpIdx == -1)
8144b4fe385SDimitry Andric continue;
8154b4fe385SDimitry Andric
8164b4fe385SDimitry Andric unsigned Val = MI.getOperand(OpIdx).getImm();
8174b4fe385SDimitry Andric
8184b4fe385SDimitry Andric Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
8194b4fe385SDimitry Andric if (IsVOP3P) {
8204b4fe385SDimitry Andric Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
8214b4fe385SDimitry Andric Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
8224b4fe385SDimitry Andric Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
8234b4fe385SDimitry Andric } else if (J == 0) {
8244b4fe385SDimitry Andric Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
8254b4fe385SDimitry Andric }
8264b4fe385SDimitry Andric }
8274b4fe385SDimitry Andric
8284b4fe385SDimitry Andric return Modifiers;
8294b4fe385SDimitry Andric }
8304b4fe385SDimitry Andric
831ac9a064cSDimitry Andric // Instructions decode the op_sel/suffix bits into the src_modifier
832ac9a064cSDimitry Andric // operands. Copy those bits into the src operands for true16 VGPRs.
convertTrue16OpSel(MCInst & MI) const833ac9a064cSDimitry Andric void AMDGPUDisassembler::convertTrue16OpSel(MCInst &MI) const {
834ac9a064cSDimitry Andric const unsigned Opc = MI.getOpcode();
835ac9a064cSDimitry Andric const MCRegisterClass &ConversionRC =
836ac9a064cSDimitry Andric MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
837ac9a064cSDimitry Andric constexpr std::array<std::tuple<int, int, unsigned>, 4> OpAndOpMods = {
838ac9a064cSDimitry Andric {{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
839ac9a064cSDimitry Andric SISrcMods::OP_SEL_0},
840ac9a064cSDimitry Andric {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
841ac9a064cSDimitry Andric SISrcMods::OP_SEL_0},
842ac9a064cSDimitry Andric {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
843ac9a064cSDimitry Andric SISrcMods::OP_SEL_0},
844ac9a064cSDimitry Andric {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
845ac9a064cSDimitry Andric SISrcMods::DST_OP_SEL}}};
846ac9a064cSDimitry Andric for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
847ac9a064cSDimitry Andric int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
848ac9a064cSDimitry Andric int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName);
849ac9a064cSDimitry Andric if (OpIdx == -1 || OpModsIdx == -1)
850ac9a064cSDimitry Andric continue;
851ac9a064cSDimitry Andric MCOperand &Op = MI.getOperand(OpIdx);
852ac9a064cSDimitry Andric if (!Op.isReg())
853ac9a064cSDimitry Andric continue;
854ac9a064cSDimitry Andric if (!ConversionRC.contains(Op.getReg()))
855ac9a064cSDimitry Andric continue;
856ac9a064cSDimitry Andric unsigned OpEnc = MRI.getEncodingValue(Op.getReg());
857ac9a064cSDimitry Andric const MCOperand &OpMods = MI.getOperand(OpModsIdx);
858ac9a064cSDimitry Andric unsigned ModVal = OpMods.getImm();
859ac9a064cSDimitry Andric if (ModVal & OpSelMask) { // isHi
860ac9a064cSDimitry Andric unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
861ac9a064cSDimitry Andric Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1));
862ac9a064cSDimitry Andric }
863ac9a064cSDimitry Andric }
864ac9a064cSDimitry Andric }
865ac9a064cSDimitry Andric
866e3b55780SDimitry Andric // MAC opcodes have special old and src2 operands.
867e3b55780SDimitry Andric // src2 is tied to dst, while old is not tied (but assumed to be).
isMacDPP(MCInst & MI) const868e3b55780SDimitry Andric bool AMDGPUDisassembler::isMacDPP(MCInst &MI) const {
869e3b55780SDimitry Andric constexpr int DST_IDX = 0;
870e3b55780SDimitry Andric auto Opcode = MI.getOpcode();
871e3b55780SDimitry Andric const auto &Desc = MCII->get(Opcode);
872e3b55780SDimitry Andric auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
873e3b55780SDimitry Andric
874e3b55780SDimitry Andric if (OldIdx != -1 && Desc.getOperandConstraint(
875e3b55780SDimitry Andric OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
876e3b55780SDimitry Andric assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
877e3b55780SDimitry Andric assert(Desc.getOperandConstraint(
878e3b55780SDimitry Andric AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
879e3b55780SDimitry Andric MCOI::OperandConstraint::TIED_TO) == DST_IDX);
880e3b55780SDimitry Andric (void)DST_IDX;
881e3b55780SDimitry Andric return true;
882e3b55780SDimitry Andric }
883e3b55780SDimitry Andric
884e3b55780SDimitry Andric return false;
885e3b55780SDimitry Andric }
886e3b55780SDimitry Andric
887e3b55780SDimitry Andric // Create dummy old operand and insert dummy unused src2_modifiers
convertMacDPPInst(MCInst & MI) const888e3b55780SDimitry Andric void AMDGPUDisassembler::convertMacDPPInst(MCInst &MI) const {
889e3b55780SDimitry Andric assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
890e3b55780SDimitry Andric insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
891e3b55780SDimitry Andric insertNamedMCOperand(MI, MCOperand::createImm(0),
892e3b55780SDimitry Andric AMDGPU::OpName::src2_modifiers);
893e3b55780SDimitry Andric }
894e3b55780SDimitry Andric
convertDPP8Inst(MCInst & MI) const895ac9a064cSDimitry Andric void AMDGPUDisassembler::convertDPP8Inst(MCInst &MI) const {
896e6d15924SDimitry Andric unsigned Opc = MI.getOpcode();
897ac9a064cSDimitry Andric
898ac9a064cSDimitry Andric int VDstInIdx =
899ac9a064cSDimitry Andric AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
900ac9a064cSDimitry Andric if (VDstInIdx != -1)
901ac9a064cSDimitry Andric insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
902e3b55780SDimitry Andric
903e3b55780SDimitry Andric unsigned DescNumOps = MCII->get(Opc).getNumOperands();
904e3b55780SDimitry Andric if (MI.getNumOperands() < DescNumOps &&
905e3b55780SDimitry Andric AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
906ac9a064cSDimitry Andric convertTrue16OpSel(MI);
9074b4fe385SDimitry Andric auto Mods = collectVOPModifiers(MI);
9084b4fe385SDimitry Andric insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
9094b4fe385SDimitry Andric AMDGPU::OpName::op_sel);
910145449b1SDimitry Andric } else {
911e6d15924SDimitry Andric // Insert dummy unused src modifiers.
912e6d15924SDimitry Andric if (MI.getNumOperands() < DescNumOps &&
913e3b55780SDimitry Andric AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
914e6d15924SDimitry Andric insertNamedMCOperand(MI, MCOperand::createImm(0),
915e6d15924SDimitry Andric AMDGPU::OpName::src0_modifiers);
916e6d15924SDimitry Andric
917e6d15924SDimitry Andric if (MI.getNumOperands() < DescNumOps &&
918e3b55780SDimitry Andric AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
919e6d15924SDimitry Andric insertNamedMCOperand(MI, MCOperand::createImm(0),
920e6d15924SDimitry Andric AMDGPU::OpName::src1_modifiers);
921145449b1SDimitry Andric }
922e3b55780SDimitry Andric }
923e6d15924SDimitry Andric
convertVOP3DPPInst(MCInst & MI) const924ac9a064cSDimitry Andric void AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
925ac9a064cSDimitry Andric convertTrue16OpSel(MI);
926ac9a064cSDimitry Andric
927ac9a064cSDimitry Andric int VDstInIdx =
928ac9a064cSDimitry Andric AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
929ac9a064cSDimitry Andric if (VDstInIdx != -1)
930ac9a064cSDimitry Andric insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
931e3b55780SDimitry Andric
9324b4fe385SDimitry Andric unsigned Opc = MI.getOpcode();
9334b4fe385SDimitry Andric unsigned DescNumOps = MCII->get(Opc).getNumOperands();
9344b4fe385SDimitry Andric if (MI.getNumOperands() < DescNumOps &&
935e3b55780SDimitry Andric AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9364b4fe385SDimitry Andric auto Mods = collectVOPModifiers(MI);
9374b4fe385SDimitry Andric insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
9384b4fe385SDimitry Andric AMDGPU::OpName::op_sel);
9394b4fe385SDimitry Andric }
9404b4fe385SDimitry Andric }
9414b4fe385SDimitry Andric
942e6d15924SDimitry Andric // Note that before gfx10, the MIMG encoding provided no information about
943e6d15924SDimitry Andric // VADDR size. Consequently, decoded instructions always show address as if it
944e6d15924SDimitry Andric // has 1 dword, which could be not really so.
convertMIMGInst(MCInst & MI) const945ac9a064cSDimitry Andric void AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
946b1c73532SDimitry Andric auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
947eb11fae6SDimitry Andric
948eb11fae6SDimitry Andric int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
949eb11fae6SDimitry Andric AMDGPU::OpName::vdst);
950eb11fae6SDimitry Andric
951044eb2f6SDimitry Andric int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
952044eb2f6SDimitry Andric AMDGPU::OpName::vdata);
953e6d15924SDimitry Andric int VAddr0Idx =
954e6d15924SDimitry Andric AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
955ac9a064cSDimitry Andric int RsrcOpName = (TSFlags & SIInstrFlags::MIMG) ? AMDGPU::OpName::srsrc
956b1c73532SDimitry Andric : AMDGPU::OpName::rsrc;
957b1c73532SDimitry Andric int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);
958044eb2f6SDimitry Andric int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
959044eb2f6SDimitry Andric AMDGPU::OpName::dmask);
960eb11fae6SDimitry Andric
961eb11fae6SDimitry Andric int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
962eb11fae6SDimitry Andric AMDGPU::OpName::tfe);
963eb11fae6SDimitry Andric int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
964eb11fae6SDimitry Andric AMDGPU::OpName::d16);
965eb11fae6SDimitry Andric
966c0981da4SDimitry Andric const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
967c0981da4SDimitry Andric const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
968c0981da4SDimitry Andric AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
969c0981da4SDimitry Andric
970eb11fae6SDimitry Andric assert(VDataIdx != -1);
971c0981da4SDimitry Andric if (BaseOpcode->BVH) {
972c0981da4SDimitry Andric // Add A16 operand for intersect_ray instructions
9737fa27ce4SDimitry Andric addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
974ac9a064cSDimitry Andric return;
975b60736ecSDimitry Andric }
976eb11fae6SDimitry Andric
977eb11fae6SDimitry Andric bool IsAtomic = (VDstIdx != -1);
978b1c73532SDimitry Andric bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
979b1c73532SDimitry Andric bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
980e6d15924SDimitry Andric bool IsNSA = false;
9817fa27ce4SDimitry Andric bool IsPartialNSA = false;
982e6d15924SDimitry Andric unsigned AddrSize = Info->VAddrDwords;
983044eb2f6SDimitry Andric
984145449b1SDimitry Andric if (isGFX10Plus()) {
985e6d15924SDimitry Andric unsigned DimIdx =
986e6d15924SDimitry Andric AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
987344a3780SDimitry Andric int A16Idx =
988344a3780SDimitry Andric AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
989e6d15924SDimitry Andric const AMDGPU::MIMGDimInfo *Dim =
990e6d15924SDimitry Andric AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
991344a3780SDimitry Andric const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
992e6d15924SDimitry Andric
993344a3780SDimitry Andric AddrSize =
994344a3780SDimitry Andric AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
995344a3780SDimitry Andric
996b1c73532SDimitry Andric // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
997b1c73532SDimitry Andric // VIMAGE insts other than BVH never use vaddr4.
998145449b1SDimitry Andric IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
999b1c73532SDimitry Andric Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1000b1c73532SDimitry Andric Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
1001e6d15924SDimitry Andric if (!IsNSA) {
1002b1c73532SDimitry Andric if (!IsVSample && AddrSize > 12)
1003e6d15924SDimitry Andric AddrSize = 16;
1004e6d15924SDimitry Andric } else {
1005e6d15924SDimitry Andric if (AddrSize > Info->VAddrDwords) {
10067fa27ce4SDimitry Andric if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
10077fa27ce4SDimitry Andric // The NSA encoding does not contain enough operands for the
10087fa27ce4SDimitry Andric // combination of base opcode / dimension. Should this be an error?
1009ac9a064cSDimitry Andric return;
1010e6d15924SDimitry Andric }
10117fa27ce4SDimitry Andric IsPartialNSA = true;
10127fa27ce4SDimitry Andric }
1013e6d15924SDimitry Andric }
1014e6d15924SDimitry Andric }
1015e6d15924SDimitry Andric
1016e6d15924SDimitry Andric unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
1017e3b55780SDimitry Andric unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1);
1018044eb2f6SDimitry Andric
1019eb11fae6SDimitry Andric bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
1020eb11fae6SDimitry Andric if (D16 && AMDGPU::hasPackedD16(STI)) {
1021eb11fae6SDimitry Andric DstSize = (DstSize + 1) / 2;
1022eb11fae6SDimitry Andric }
1023eb11fae6SDimitry Andric
1024344a3780SDimitry Andric if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
1025b60736ecSDimitry Andric DstSize += 1;
1026eb11fae6SDimitry Andric
1027e6d15924SDimitry Andric if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1028ac9a064cSDimitry Andric return;
1029e6d15924SDimitry Andric
1030e6d15924SDimitry Andric int NewOpcode =
1031e6d15924SDimitry Andric AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
1032eb11fae6SDimitry Andric if (NewOpcode == -1)
1033ac9a064cSDimitry Andric return;
1034eb11fae6SDimitry Andric
1035e6d15924SDimitry Andric // Widen the register to the correct number of enabled channels.
1036e6d15924SDimitry Andric unsigned NewVdata = AMDGPU::NoRegister;
1037e6d15924SDimitry Andric if (DstSize != Info->VDataDwords) {
1038e3b55780SDimitry Andric auto DataRCID = MCII->get(NewOpcode).operands()[VDataIdx].RegClass;
1039044eb2f6SDimitry Andric
1040eb11fae6SDimitry Andric // Get first subregister of VData
1041044eb2f6SDimitry Andric unsigned Vdata0 = MI.getOperand(VDataIdx).getReg();
1042eb11fae6SDimitry Andric unsigned VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1043eb11fae6SDimitry Andric Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1044eb11fae6SDimitry Andric
1045e6d15924SDimitry Andric NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
1046e6d15924SDimitry Andric &MRI.getRegClass(DataRCID));
1047044eb2f6SDimitry Andric if (NewVdata == AMDGPU::NoRegister) {
1048044eb2f6SDimitry Andric // It's possible to encode this such that the low register + enabled
1049044eb2f6SDimitry Andric // components exceeds the register count.
1050ac9a064cSDimitry Andric return;
1051044eb2f6SDimitry Andric }
1052e6d15924SDimitry Andric }
1053e6d15924SDimitry Andric
10547fa27ce4SDimitry Andric // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
10557fa27ce4SDimitry Andric // If using partial NSA on GFX11+ widen last address register.
10567fa27ce4SDimitry Andric int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
10577fa27ce4SDimitry Andric unsigned NewVAddrSA = AMDGPU::NoRegister;
10587fa27ce4SDimitry Andric if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
10597fa27ce4SDimitry Andric AddrSize != Info->VAddrDwords) {
10607fa27ce4SDimitry Andric unsigned VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
10617fa27ce4SDimitry Andric unsigned VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
10627fa27ce4SDimitry Andric VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1063e6d15924SDimitry Andric
10647fa27ce4SDimitry Andric auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
10657fa27ce4SDimitry Andric NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0,
1066e6d15924SDimitry Andric &MRI.getRegClass(AddrRCID));
10677fa27ce4SDimitry Andric if (!NewVAddrSA)
1068ac9a064cSDimitry Andric return;
1069e6d15924SDimitry Andric }
1070044eb2f6SDimitry Andric
1071044eb2f6SDimitry Andric MI.setOpcode(NewOpcode);
1072e6d15924SDimitry Andric
1073e6d15924SDimitry Andric if (NewVdata != AMDGPU::NoRegister) {
1074044eb2f6SDimitry Andric MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
1075eb11fae6SDimitry Andric
1076eb11fae6SDimitry Andric if (IsAtomic) {
1077eb11fae6SDimitry Andric // Atomic operations have an additional operand (a copy of data)
1078eb11fae6SDimitry Andric MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
1079eb11fae6SDimitry Andric }
1080e6d15924SDimitry Andric }
1081e6d15924SDimitry Andric
10827fa27ce4SDimitry Andric if (NewVAddrSA) {
10837fa27ce4SDimitry Andric MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA);
1084e6d15924SDimitry Andric } else if (IsNSA) {
1085e6d15924SDimitry Andric assert(AddrSize <= Info->VAddrDwords);
1086e6d15924SDimitry Andric MI.erase(MI.begin() + VAddr0Idx + AddrSize,
1087e6d15924SDimitry Andric MI.begin() + VAddr0Idx + Info->VAddrDwords);
1088e6d15924SDimitry Andric }
1089044eb2f6SDimitry Andric }
1090044eb2f6SDimitry Andric
1091145449b1SDimitry Andric // Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1092145449b1SDimitry Andric // decoder only adds to src_modifiers, so manually add the bits to the other
1093145449b1SDimitry Andric // operands.
convertVOP3PDPPInst(MCInst & MI) const1094ac9a064cSDimitry Andric void AMDGPUDisassembler::convertVOP3PDPPInst(MCInst &MI) const {
1095145449b1SDimitry Andric unsigned Opc = MI.getOpcode();
1096145449b1SDimitry Andric unsigned DescNumOps = MCII->get(Opc).getNumOperands();
10974b4fe385SDimitry Andric auto Mods = collectVOPModifiers(MI, true);
1098145449b1SDimitry Andric
1099145449b1SDimitry Andric if (MI.getNumOperands() < DescNumOps &&
1100e3b55780SDimitry Andric AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1101145449b1SDimitry Andric insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1102145449b1SDimitry Andric
1103145449b1SDimitry Andric if (MI.getNumOperands() < DescNumOps &&
1104e3b55780SDimitry Andric AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
11054b4fe385SDimitry Andric insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSel),
1106145449b1SDimitry Andric AMDGPU::OpName::op_sel);
1107145449b1SDimitry Andric if (MI.getNumOperands() < DescNumOps &&
1108e3b55780SDimitry Andric AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
11094b4fe385SDimitry Andric insertNamedMCOperand(MI, MCOperand::createImm(Mods.OpSelHi),
1110145449b1SDimitry Andric AMDGPU::OpName::op_sel_hi);
1111145449b1SDimitry Andric if (MI.getNumOperands() < DescNumOps &&
1112e3b55780SDimitry Andric AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
11134b4fe385SDimitry Andric insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegLo),
1114145449b1SDimitry Andric AMDGPU::OpName::neg_lo);
1115145449b1SDimitry Andric if (MI.getNumOperands() < DescNumOps &&
1116e3b55780SDimitry Andric AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
11174b4fe385SDimitry Andric insertNamedMCOperand(MI, MCOperand::createImm(Mods.NegHi),
1118145449b1SDimitry Andric AMDGPU::OpName::neg_hi);
1119145449b1SDimitry Andric }
1120145449b1SDimitry Andric
1121145449b1SDimitry Andric // Create dummy old operand and insert optional operands
convertVOPCDPPInst(MCInst & MI) const1122ac9a064cSDimitry Andric void AMDGPUDisassembler::convertVOPCDPPInst(MCInst &MI) const {
1123145449b1SDimitry Andric unsigned Opc = MI.getOpcode();
1124145449b1SDimitry Andric unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1125145449b1SDimitry Andric
1126145449b1SDimitry Andric if (MI.getNumOperands() < DescNumOps &&
1127e3b55780SDimitry Andric AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1128145449b1SDimitry Andric insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1129145449b1SDimitry Andric
1130145449b1SDimitry Andric if (MI.getNumOperands() < DescNumOps &&
1131e3b55780SDimitry Andric AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1132145449b1SDimitry Andric insertNamedMCOperand(MI, MCOperand::createImm(0),
1133145449b1SDimitry Andric AMDGPU::OpName::src0_modifiers);
1134145449b1SDimitry Andric
1135145449b1SDimitry Andric if (MI.getNumOperands() < DescNumOps &&
1136e3b55780SDimitry Andric AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1137145449b1SDimitry Andric insertNamedMCOperand(MI, MCOperand::createImm(0),
1138145449b1SDimitry Andric AMDGPU::OpName::src1_modifiers);
1139145449b1SDimitry Andric }
1140145449b1SDimitry Andric
convertFMAanyK(MCInst & MI,int ImmLitIdx) const1141ac9a064cSDimitry Andric void AMDGPUDisassembler::convertFMAanyK(MCInst &MI, int ImmLitIdx) const {
1142c0981da4SDimitry Andric assert(HasLiteral && "Should have decoded a literal");
1143c0981da4SDimitry Andric const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
1144c0981da4SDimitry Andric unsigned DescNumOps = Desc.getNumOperands();
1145145449b1SDimitry Andric insertNamedMCOperand(MI, MCOperand::createImm(Literal),
1146145449b1SDimitry Andric AMDGPU::OpName::immDeferred);
1147c0981da4SDimitry Andric assert(DescNumOps == MI.getNumOperands());
1148c0981da4SDimitry Andric for (unsigned I = 0; I < DescNumOps; ++I) {
1149c0981da4SDimitry Andric auto &Op = MI.getOperand(I);
1150e3b55780SDimitry Andric auto OpType = Desc.operands()[I].OperandType;
1151c0981da4SDimitry Andric bool IsDeferredOp = (OpType == AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED ||
1152c0981da4SDimitry Andric OpType == AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED);
1153c0981da4SDimitry Andric if (Op.isImm() && Op.getImm() == AMDGPU::EncValues::LITERAL_CONST &&
1154c0981da4SDimitry Andric IsDeferredOp)
1155c0981da4SDimitry Andric Op.setImm(Literal);
1156c0981da4SDimitry Andric }
1157c0981da4SDimitry Andric }
1158c0981da4SDimitry Andric
getRegClassName(unsigned RegClassID) const115901095a5dSDimitry Andric const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
116001095a5dSDimitry Andric return getContext().getRegisterInfo()->
116101095a5dSDimitry Andric getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
116201095a5dSDimitry Andric }
116301095a5dSDimitry Andric
116401095a5dSDimitry Andric inline
errOperand(unsigned V,const Twine & ErrMsg) const116501095a5dSDimitry Andric MCOperand AMDGPUDisassembler::errOperand(unsigned V,
116601095a5dSDimitry Andric const Twine& ErrMsg) const {
116701095a5dSDimitry Andric *CommentStream << "Error: " + ErrMsg;
116801095a5dSDimitry Andric
116901095a5dSDimitry Andric // ToDo: add support for error operands to MCInst.h
117001095a5dSDimitry Andric // return MCOperand::createError(V);
117101095a5dSDimitry Andric return MCOperand();
117201095a5dSDimitry Andric }
117301095a5dSDimitry Andric
117401095a5dSDimitry Andric inline
createRegOperand(unsigned int RegId) const117501095a5dSDimitry Andric MCOperand AMDGPUDisassembler::createRegOperand(unsigned int RegId) const {
1176044eb2f6SDimitry Andric return MCOperand::createReg(AMDGPU::getMCReg(RegId, STI));
117701095a5dSDimitry Andric }
117801095a5dSDimitry Andric
117901095a5dSDimitry Andric inline
createRegOperand(unsigned RegClassID,unsigned Val) const118001095a5dSDimitry Andric MCOperand AMDGPUDisassembler::createRegOperand(unsigned RegClassID,
118101095a5dSDimitry Andric unsigned Val) const {
118201095a5dSDimitry Andric const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
118301095a5dSDimitry Andric if (Val >= RegCl.getNumRegs())
118401095a5dSDimitry Andric return errOperand(Val, Twine(getRegClassName(RegClassID)) +
118501095a5dSDimitry Andric ": unknown register " + Twine(Val));
118601095a5dSDimitry Andric return createRegOperand(RegCl.getRegister(Val));
118701095a5dSDimitry Andric }
118801095a5dSDimitry Andric
118901095a5dSDimitry Andric inline
createSRegOperand(unsigned SRegClassID,unsigned Val) const119001095a5dSDimitry Andric MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
119101095a5dSDimitry Andric unsigned Val) const {
119201095a5dSDimitry Andric // ToDo: SI/CI have 104 SGPRs, VI - 102
119301095a5dSDimitry Andric // Valery: here we accepting as much as we can, let assembler sort it out
119401095a5dSDimitry Andric int shift = 0;
119501095a5dSDimitry Andric switch (SRegClassID) {
119601095a5dSDimitry Andric case AMDGPU::SGPR_32RegClassID:
119701095a5dSDimitry Andric case AMDGPU::TTMP_32RegClassID:
119801095a5dSDimitry Andric break;
119901095a5dSDimitry Andric case AMDGPU::SGPR_64RegClassID:
120001095a5dSDimitry Andric case AMDGPU::TTMP_64RegClassID:
120101095a5dSDimitry Andric shift = 1;
120201095a5dSDimitry Andric break;
1203312c0ed1SDimitry Andric case AMDGPU::SGPR_96RegClassID:
1204312c0ed1SDimitry Andric case AMDGPU::TTMP_96RegClassID:
120501095a5dSDimitry Andric case AMDGPU::SGPR_128RegClassID:
120601095a5dSDimitry Andric case AMDGPU::TTMP_128RegClassID:
120701095a5dSDimitry Andric // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
120801095a5dSDimitry Andric // this bundle?
1209c7dac04cSDimitry Andric case AMDGPU::SGPR_256RegClassID:
1210c7dac04cSDimitry Andric case AMDGPU::TTMP_256RegClassID:
121101095a5dSDimitry Andric // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
121201095a5dSDimitry Andric // this bundle?
1213e3b55780SDimitry Andric case AMDGPU::SGPR_288RegClassID:
1214e3b55780SDimitry Andric case AMDGPU::TTMP_288RegClassID:
1215e3b55780SDimitry Andric case AMDGPU::SGPR_320RegClassID:
1216e3b55780SDimitry Andric case AMDGPU::TTMP_320RegClassID:
1217e3b55780SDimitry Andric case AMDGPU::SGPR_352RegClassID:
1218e3b55780SDimitry Andric case AMDGPU::TTMP_352RegClassID:
1219e3b55780SDimitry Andric case AMDGPU::SGPR_384RegClassID:
1220e3b55780SDimitry Andric case AMDGPU::TTMP_384RegClassID:
1221c7dac04cSDimitry Andric case AMDGPU::SGPR_512RegClassID:
1222c7dac04cSDimitry Andric case AMDGPU::TTMP_512RegClassID:
122301095a5dSDimitry Andric shift = 2;
122401095a5dSDimitry Andric break;
122501095a5dSDimitry Andric // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
122601095a5dSDimitry Andric // this bundle?
122701095a5dSDimitry Andric default:
1228b915e9e0SDimitry Andric llvm_unreachable("unhandled register class");
122901095a5dSDimitry Andric }
1230b915e9e0SDimitry Andric
1231b915e9e0SDimitry Andric if (Val % (1 << shift)) {
123201095a5dSDimitry Andric *CommentStream << "Warning: " << getRegClassName(SRegClassID)
123301095a5dSDimitry Andric << ": scalar reg isn't aligned " << Val;
1234b915e9e0SDimitry Andric }
1235b915e9e0SDimitry Andric
123601095a5dSDimitry Andric return createRegOperand(SRegClassID, Val >> shift);
123701095a5dSDimitry Andric }
123801095a5dSDimitry Andric
createVGPR16Operand(unsigned RegIdx,bool IsHi) const1239b1c73532SDimitry Andric MCOperand AMDGPUDisassembler::createVGPR16Operand(unsigned RegIdx,
1240b1c73532SDimitry Andric bool IsHi) const {
124177dbea07SDimitry Andric unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
124277dbea07SDimitry Andric return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16);
1243b1c73532SDimitry Andric }
1244b1c73532SDimitry Andric
1245c0981da4SDimitry Andric // Decode Literals for insts which always have a literal in the encoding
1246c0981da4SDimitry Andric MCOperand
decodeMandatoryLiteralConstant(unsigned Val) const1247c0981da4SDimitry Andric AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
1248c0981da4SDimitry Andric if (HasLiteral) {
1249145449b1SDimitry Andric assert(
1250145449b1SDimitry Andric AMDGPU::hasVOPD(STI) &&
1251145449b1SDimitry Andric "Should only decode multiple kimm with VOPD, check VSrc operand types");
1252c0981da4SDimitry Andric if (Literal != Val)
1253c0981da4SDimitry Andric return errOperand(Val, "More than one unique literal is illegal");
1254c0981da4SDimitry Andric }
1255c0981da4SDimitry Andric HasLiteral = true;
1256c0981da4SDimitry Andric Literal = Val;
1257c0981da4SDimitry Andric return MCOperand::createImm(Literal);
1258c0981da4SDimitry Andric }
1259c0981da4SDimitry Andric
decodeLiteralConstant(bool ExtendFP64) const1260b1c73532SDimitry Andric MCOperand AMDGPUDisassembler::decodeLiteralConstant(bool ExtendFP64) const {
126101095a5dSDimitry Andric // For now all literal constants are supposed to be unsigned integer
126201095a5dSDimitry Andric // ToDo: deal with signed/unsigned 64-bit integer constants
126301095a5dSDimitry Andric // ToDo: deal with float/double constants
1264b5630dbaSDimitry Andric if (!HasLiteral) {
1265b5630dbaSDimitry Andric if (Bytes.size() < 4) {
126601095a5dSDimitry Andric return errOperand(0, "cannot read literal, inst bytes left " +
126701095a5dSDimitry Andric Twine(Bytes.size()));
1268b5630dbaSDimitry Andric }
1269b5630dbaSDimitry Andric HasLiteral = true;
1270b1c73532SDimitry Andric Literal = Literal64 = eatBytes<uint32_t>(Bytes);
1271b1c73532SDimitry Andric if (ExtendFP64)
1272b1c73532SDimitry Andric Literal64 <<= 32;
1273b5630dbaSDimitry Andric }
1274b1c73532SDimitry Andric return MCOperand::createImm(ExtendFP64 ? Literal64 : Literal);
127501095a5dSDimitry Andric }
127601095a5dSDimitry Andric
decodeIntImmed(unsigned Imm)127701095a5dSDimitry Andric MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
127801095a5dSDimitry Andric using namespace AMDGPU::EncValues;
1279044eb2f6SDimitry Andric
128001095a5dSDimitry Andric assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
128101095a5dSDimitry Andric return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
128201095a5dSDimitry Andric (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
128301095a5dSDimitry Andric (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
128401095a5dSDimitry Andric // Cast prevents negative overflow.
128501095a5dSDimitry Andric }
128601095a5dSDimitry Andric
getInlineImmVal32(unsigned Imm)1287b915e9e0SDimitry Andric static int64_t getInlineImmVal32(unsigned Imm) {
1288b915e9e0SDimitry Andric switch (Imm) {
1289b915e9e0SDimitry Andric case 240:
12907fa27ce4SDimitry Andric return llvm::bit_cast<uint32_t>(0.5f);
1291b915e9e0SDimitry Andric case 241:
12927fa27ce4SDimitry Andric return llvm::bit_cast<uint32_t>(-0.5f);
1293b915e9e0SDimitry Andric case 242:
12947fa27ce4SDimitry Andric return llvm::bit_cast<uint32_t>(1.0f);
1295b915e9e0SDimitry Andric case 243:
12967fa27ce4SDimitry Andric return llvm::bit_cast<uint32_t>(-1.0f);
1297b915e9e0SDimitry Andric case 244:
12987fa27ce4SDimitry Andric return llvm::bit_cast<uint32_t>(2.0f);
1299b915e9e0SDimitry Andric case 245:
13007fa27ce4SDimitry Andric return llvm::bit_cast<uint32_t>(-2.0f);
1301b915e9e0SDimitry Andric case 246:
13027fa27ce4SDimitry Andric return llvm::bit_cast<uint32_t>(4.0f);
1303b915e9e0SDimitry Andric case 247:
13047fa27ce4SDimitry Andric return llvm::bit_cast<uint32_t>(-4.0f);
1305b915e9e0SDimitry Andric case 248: // 1 / (2 * PI)
1306b915e9e0SDimitry Andric return 0x3e22f983;
1307b915e9e0SDimitry Andric default:
1308b915e9e0SDimitry Andric llvm_unreachable("invalid fp inline imm");
1309b915e9e0SDimitry Andric }
1310b915e9e0SDimitry Andric }
1311b915e9e0SDimitry Andric
getInlineImmVal64(unsigned Imm)1312b915e9e0SDimitry Andric static int64_t getInlineImmVal64(unsigned Imm) {
1313b915e9e0SDimitry Andric switch (Imm) {
1314b915e9e0SDimitry Andric case 240:
13157fa27ce4SDimitry Andric return llvm::bit_cast<uint64_t>(0.5);
1316b915e9e0SDimitry Andric case 241:
13177fa27ce4SDimitry Andric return llvm::bit_cast<uint64_t>(-0.5);
1318b915e9e0SDimitry Andric case 242:
13197fa27ce4SDimitry Andric return llvm::bit_cast<uint64_t>(1.0);
1320b915e9e0SDimitry Andric case 243:
13217fa27ce4SDimitry Andric return llvm::bit_cast<uint64_t>(-1.0);
1322b915e9e0SDimitry Andric case 244:
13237fa27ce4SDimitry Andric return llvm::bit_cast<uint64_t>(2.0);
1324b915e9e0SDimitry Andric case 245:
13257fa27ce4SDimitry Andric return llvm::bit_cast<uint64_t>(-2.0);
1326b915e9e0SDimitry Andric case 246:
13277fa27ce4SDimitry Andric return llvm::bit_cast<uint64_t>(4.0);
1328b915e9e0SDimitry Andric case 247:
13297fa27ce4SDimitry Andric return llvm::bit_cast<uint64_t>(-4.0);
1330b915e9e0SDimitry Andric case 248: // 1 / (2 * PI)
1331b915e9e0SDimitry Andric return 0x3fc45f306dc9c882;
1332b915e9e0SDimitry Andric default:
1333b915e9e0SDimitry Andric llvm_unreachable("invalid fp inline imm");
1334b915e9e0SDimitry Andric }
1335b915e9e0SDimitry Andric }
1336b915e9e0SDimitry Andric
getInlineImmValF16(unsigned Imm)1337ac9a064cSDimitry Andric static int64_t getInlineImmValF16(unsigned Imm) {
1338b915e9e0SDimitry Andric switch (Imm) {
1339b915e9e0SDimitry Andric case 240:
1340b915e9e0SDimitry Andric return 0x3800;
1341b915e9e0SDimitry Andric case 241:
1342b915e9e0SDimitry Andric return 0xB800;
1343b915e9e0SDimitry Andric case 242:
1344b915e9e0SDimitry Andric return 0x3C00;
1345b915e9e0SDimitry Andric case 243:
1346b915e9e0SDimitry Andric return 0xBC00;
1347b915e9e0SDimitry Andric case 244:
1348b915e9e0SDimitry Andric return 0x4000;
1349b915e9e0SDimitry Andric case 245:
1350b915e9e0SDimitry Andric return 0xC000;
1351b915e9e0SDimitry Andric case 246:
1352b915e9e0SDimitry Andric return 0x4400;
1353b915e9e0SDimitry Andric case 247:
1354b915e9e0SDimitry Andric return 0xC400;
1355b915e9e0SDimitry Andric case 248: // 1 / (2 * PI)
1356b915e9e0SDimitry Andric return 0x3118;
1357b915e9e0SDimitry Andric default:
1358b915e9e0SDimitry Andric llvm_unreachable("invalid fp inline imm");
1359b915e9e0SDimitry Andric }
1360b915e9e0SDimitry Andric }
1361b915e9e0SDimitry Andric
getInlineImmValBF16(unsigned Imm)1362ac9a064cSDimitry Andric static int64_t getInlineImmValBF16(unsigned Imm) {
1363ac9a064cSDimitry Andric switch (Imm) {
1364ac9a064cSDimitry Andric case 240:
1365ac9a064cSDimitry Andric return 0x3F00;
1366ac9a064cSDimitry Andric case 241:
1367ac9a064cSDimitry Andric return 0xBF00;
1368ac9a064cSDimitry Andric case 242:
1369ac9a064cSDimitry Andric return 0x3F80;
1370ac9a064cSDimitry Andric case 243:
1371ac9a064cSDimitry Andric return 0xBF80;
1372ac9a064cSDimitry Andric case 244:
1373ac9a064cSDimitry Andric return 0x4000;
1374ac9a064cSDimitry Andric case 245:
1375ac9a064cSDimitry Andric return 0xC000;
1376ac9a064cSDimitry Andric case 246:
1377ac9a064cSDimitry Andric return 0x4080;
1378ac9a064cSDimitry Andric case 247:
1379ac9a064cSDimitry Andric return 0xC080;
1380ac9a064cSDimitry Andric case 248: // 1 / (2 * PI)
1381ac9a064cSDimitry Andric return 0x3E22;
1382ac9a064cSDimitry Andric default:
1383ac9a064cSDimitry Andric llvm_unreachable("invalid fp inline imm");
1384ac9a064cSDimitry Andric }
1385ac9a064cSDimitry Andric }
1386ac9a064cSDimitry Andric
getInlineImmVal16(unsigned Imm,AMDGPU::OperandSemantics Sema)1387ac9a064cSDimitry Andric static int64_t getInlineImmVal16(unsigned Imm, AMDGPU::OperandSemantics Sema) {
1388ac9a064cSDimitry Andric return (Sema == AMDGPU::OperandSemantics::BF16) ? getInlineImmValBF16(Imm)
1389ac9a064cSDimitry Andric : getInlineImmValF16(Imm);
1390ac9a064cSDimitry Andric }
1391ac9a064cSDimitry Andric
decodeFPImmed(unsigned ImmWidth,unsigned Imm,AMDGPU::OperandSemantics Sema)1392ac9a064cSDimitry Andric MCOperand AMDGPUDisassembler::decodeFPImmed(unsigned ImmWidth, unsigned Imm,
1393ac9a064cSDimitry Andric AMDGPU::OperandSemantics Sema) {
1394ac9a064cSDimitry Andric assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN &&
1395ac9a064cSDimitry Andric Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX);
1396b915e9e0SDimitry Andric
139701095a5dSDimitry Andric // ToDo: case 248: 1/(2*PI) - is allowed only on VI
13987fa27ce4SDimitry Andric // ImmWidth 0 is a default case where operand should not allow immediates.
13997fa27ce4SDimitry Andric // Imm value is still decoded into 32 bit immediate operand, inst printer will
14007fa27ce4SDimitry Andric // use it to print verbose error message.
14017fa27ce4SDimitry Andric switch (ImmWidth) {
14027fa27ce4SDimitry Andric case 0:
14037fa27ce4SDimitry Andric case 32:
1404b915e9e0SDimitry Andric return MCOperand::createImm(getInlineImmVal32(Imm));
14057fa27ce4SDimitry Andric case 64:
1406b915e9e0SDimitry Andric return MCOperand::createImm(getInlineImmVal64(Imm));
14077fa27ce4SDimitry Andric case 16:
1408ac9a064cSDimitry Andric return MCOperand::createImm(getInlineImmVal16(Imm, Sema));
1409b915e9e0SDimitry Andric default:
1410b915e9e0SDimitry Andric llvm_unreachable("implement me");
141101095a5dSDimitry Andric }
141201095a5dSDimitry Andric }
141301095a5dSDimitry Andric
getVgprClassId(const OpWidthTy Width) const141401095a5dSDimitry Andric unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {
141501095a5dSDimitry Andric using namespace AMDGPU;
1416044eb2f6SDimitry Andric
141701095a5dSDimitry Andric assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
141801095a5dSDimitry Andric switch (Width) {
141901095a5dSDimitry Andric default: // fall
1420b915e9e0SDimitry Andric case OPW32:
1421b915e9e0SDimitry Andric case OPW16:
142271d5a254SDimitry Andric case OPWV216:
1423b915e9e0SDimitry Andric return VGPR_32RegClassID;
1424344a3780SDimitry Andric case OPW64:
1425344a3780SDimitry Andric case OPWV232: return VReg_64RegClassID;
1426344a3780SDimitry Andric case OPW96: return VReg_96RegClassID;
142701095a5dSDimitry Andric case OPW128: return VReg_128RegClassID;
1428344a3780SDimitry Andric case OPW160: return VReg_160RegClassID;
1429344a3780SDimitry Andric case OPW256: return VReg_256RegClassID;
1430e3b55780SDimitry Andric case OPW288: return VReg_288RegClassID;
1431e3b55780SDimitry Andric case OPW320: return VReg_320RegClassID;
1432e3b55780SDimitry Andric case OPW352: return VReg_352RegClassID;
1433e3b55780SDimitry Andric case OPW384: return VReg_384RegClassID;
1434344a3780SDimitry Andric case OPW512: return VReg_512RegClassID;
1435344a3780SDimitry Andric case OPW1024: return VReg_1024RegClassID;
143601095a5dSDimitry Andric }
143701095a5dSDimitry Andric }
143801095a5dSDimitry Andric
getAgprClassId(const OpWidthTy Width) const1439e6d15924SDimitry Andric unsigned AMDGPUDisassembler::getAgprClassId(const OpWidthTy Width) const {
1440e6d15924SDimitry Andric using namespace AMDGPU;
1441e6d15924SDimitry Andric
1442e6d15924SDimitry Andric assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
1443e6d15924SDimitry Andric switch (Width) {
1444e6d15924SDimitry Andric default: // fall
1445e6d15924SDimitry Andric case OPW32:
1446e6d15924SDimitry Andric case OPW16:
1447e6d15924SDimitry Andric case OPWV216:
1448e6d15924SDimitry Andric return AGPR_32RegClassID;
1449344a3780SDimitry Andric case OPW64:
1450344a3780SDimitry Andric case OPWV232: return AReg_64RegClassID;
1451344a3780SDimitry Andric case OPW96: return AReg_96RegClassID;
1452e6d15924SDimitry Andric case OPW128: return AReg_128RegClassID;
1453344a3780SDimitry Andric case OPW160: return AReg_160RegClassID;
1454cfca06d7SDimitry Andric case OPW256: return AReg_256RegClassID;
1455e3b55780SDimitry Andric case OPW288: return AReg_288RegClassID;
1456e3b55780SDimitry Andric case OPW320: return AReg_320RegClassID;
1457e3b55780SDimitry Andric case OPW352: return AReg_352RegClassID;
1458e3b55780SDimitry Andric case OPW384: return AReg_384RegClassID;
1459e6d15924SDimitry Andric case OPW512: return AReg_512RegClassID;
1460e6d15924SDimitry Andric case OPW1024: return AReg_1024RegClassID;
1461e6d15924SDimitry Andric }
1462e6d15924SDimitry Andric }
1463e6d15924SDimitry Andric
1464e6d15924SDimitry Andric
getSgprClassId(const OpWidthTy Width) const146501095a5dSDimitry Andric unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const {
146601095a5dSDimitry Andric using namespace AMDGPU;
1467044eb2f6SDimitry Andric
146801095a5dSDimitry Andric assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
146901095a5dSDimitry Andric switch (Width) {
147001095a5dSDimitry Andric default: // fall
1471b915e9e0SDimitry Andric case OPW32:
1472b915e9e0SDimitry Andric case OPW16:
147371d5a254SDimitry Andric case OPWV216:
1474b915e9e0SDimitry Andric return SGPR_32RegClassID;
1475344a3780SDimitry Andric case OPW64:
1476344a3780SDimitry Andric case OPWV232: return SGPR_64RegClassID;
1477344a3780SDimitry Andric case OPW96: return SGPR_96RegClassID;
147801095a5dSDimitry Andric case OPW128: return SGPR_128RegClassID;
1479344a3780SDimitry Andric case OPW160: return SGPR_160RegClassID;
1480c7dac04cSDimitry Andric case OPW256: return SGPR_256RegClassID;
1481e3b55780SDimitry Andric case OPW288: return SGPR_288RegClassID;
1482e3b55780SDimitry Andric case OPW320: return SGPR_320RegClassID;
1483e3b55780SDimitry Andric case OPW352: return SGPR_352RegClassID;
1484e3b55780SDimitry Andric case OPW384: return SGPR_384RegClassID;
1485c7dac04cSDimitry Andric case OPW512: return SGPR_512RegClassID;
148601095a5dSDimitry Andric }
148701095a5dSDimitry Andric }
148801095a5dSDimitry Andric
getTtmpClassId(const OpWidthTy Width) const148901095a5dSDimitry Andric unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {
149001095a5dSDimitry Andric using namespace AMDGPU;
1491044eb2f6SDimitry Andric
149201095a5dSDimitry Andric assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
149301095a5dSDimitry Andric switch (Width) {
149401095a5dSDimitry Andric default: // fall
1495b915e9e0SDimitry Andric case OPW32:
1496b915e9e0SDimitry Andric case OPW16:
149771d5a254SDimitry Andric case OPWV216:
1498b915e9e0SDimitry Andric return TTMP_32RegClassID;
1499344a3780SDimitry Andric case OPW64:
1500344a3780SDimitry Andric case OPWV232: return TTMP_64RegClassID;
150101095a5dSDimitry Andric case OPW128: return TTMP_128RegClassID;
1502c7dac04cSDimitry Andric case OPW256: return TTMP_256RegClassID;
1503e3b55780SDimitry Andric case OPW288: return TTMP_288RegClassID;
1504e3b55780SDimitry Andric case OPW320: return TTMP_320RegClassID;
1505e3b55780SDimitry Andric case OPW352: return TTMP_352RegClassID;
1506e3b55780SDimitry Andric case OPW384: return TTMP_384RegClassID;
1507c7dac04cSDimitry Andric case OPW512: return TTMP_512RegClassID;
150801095a5dSDimitry Andric }
150901095a5dSDimitry Andric }
151001095a5dSDimitry Andric
getTTmpIdx(unsigned Val) const1511044eb2f6SDimitry Andric int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1512044eb2f6SDimitry Andric using namespace AMDGPU::EncValues;
1513044eb2f6SDimitry Andric
1514b60736ecSDimitry Andric unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1515b60736ecSDimitry Andric unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1516044eb2f6SDimitry Andric
1517044eb2f6SDimitry Andric return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1518044eb2f6SDimitry Andric }
1519044eb2f6SDimitry Andric
decodeSrcOp(const OpWidthTy Width,unsigned Val,bool MandatoryLiteral,unsigned ImmWidth,AMDGPU::OperandSemantics Sema) const1520c0981da4SDimitry Andric MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
15217fa27ce4SDimitry Andric bool MandatoryLiteral,
1522ac9a064cSDimitry Andric unsigned ImmWidth,
1523ac9a064cSDimitry Andric AMDGPU::OperandSemantics Sema) const {
152401095a5dSDimitry Andric using namespace AMDGPU::EncValues;
1525044eb2f6SDimitry Andric
1526e6d15924SDimitry Andric assert(Val < 1024); // enum10
1527e6d15924SDimitry Andric
1528e6d15924SDimitry Andric bool IsAGPR = Val & 512;
1529e6d15924SDimitry Andric Val &= 511;
153001095a5dSDimitry Andric
153101095a5dSDimitry Andric if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1532e6d15924SDimitry Andric return createRegOperand(IsAGPR ? getAgprClassId(Width)
1533e6d15924SDimitry Andric : getVgprClassId(Width), Val - VGPR_MIN);
153401095a5dSDimitry Andric }
1535b1c73532SDimitry Andric return decodeNonVGPRSrcOp(Width, Val & 0xFF, MandatoryLiteral, ImmWidth,
1536ac9a064cSDimitry Andric Sema);
1537b1c73532SDimitry Andric }
1538b1c73532SDimitry Andric
1539ac9a064cSDimitry Andric MCOperand
decodeNonVGPRSrcOp(const OpWidthTy Width,unsigned Val,bool MandatoryLiteral,unsigned ImmWidth,AMDGPU::OperandSemantics Sema) const1540ac9a064cSDimitry Andric AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val,
1541ac9a064cSDimitry Andric bool MandatoryLiteral, unsigned ImmWidth,
1542ac9a064cSDimitry Andric AMDGPU::OperandSemantics Sema) const {
1543b1c73532SDimitry Andric // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1544b1c73532SDimitry Andric // decoded earlier.
1545b1c73532SDimitry Andric assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1546b1c73532SDimitry Andric using namespace AMDGPU::EncValues;
1547b1c73532SDimitry Andric
154801095a5dSDimitry Andric if (Val <= SGPR_MAX) {
1549b60736ecSDimitry Andric // "SGPR_MIN <= Val" is always true and causes compilation warning.
1550e3b55780SDimitry Andric static_assert(SGPR_MIN == 0);
155101095a5dSDimitry Andric return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
155201095a5dSDimitry Andric }
1553044eb2f6SDimitry Andric
1554044eb2f6SDimitry Andric int TTmpIdx = getTTmpIdx(Val);
1555044eb2f6SDimitry Andric if (TTmpIdx >= 0) {
1556044eb2f6SDimitry Andric return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
155701095a5dSDimitry Andric }
155801095a5dSDimitry Andric
155901095a5dSDimitry Andric if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
156001095a5dSDimitry Andric return decodeIntImmed(Val);
156101095a5dSDimitry Andric
156201095a5dSDimitry Andric if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
1563ac9a064cSDimitry Andric return decodeFPImmed(ImmWidth, Val, Sema);
156401095a5dSDimitry Andric
1565c0981da4SDimitry Andric if (Val == LITERAL_CONST) {
1566c0981da4SDimitry Andric if (MandatoryLiteral)
1567c0981da4SDimitry Andric // Keep a sentinel value for deferred setting
1568c0981da4SDimitry Andric return MCOperand::createImm(LITERAL_CONST);
1569ac9a064cSDimitry Andric return decodeLiteralConstant(Sema == AMDGPU::OperandSemantics::FP64);
1570c0981da4SDimitry Andric }
157101095a5dSDimitry Andric
1572b915e9e0SDimitry Andric switch (Width) {
1573b915e9e0SDimitry Andric case OPW32:
1574b915e9e0SDimitry Andric case OPW16:
157571d5a254SDimitry Andric case OPWV216:
1576b915e9e0SDimitry Andric return decodeSpecialReg32(Val);
1577b915e9e0SDimitry Andric case OPW64:
1578344a3780SDimitry Andric case OPWV232:
1579b915e9e0SDimitry Andric return decodeSpecialReg64(Val);
1580b915e9e0SDimitry Andric default:
1581b915e9e0SDimitry Andric llvm_unreachable("unexpected immediate type");
1582b915e9e0SDimitry Andric }
158301095a5dSDimitry Andric }
158401095a5dSDimitry Andric
1585145449b1SDimitry Andric // Bit 0 of DstY isn't stored in the instruction, because it's always the
1586145449b1SDimitry Andric // opposite of bit 0 of DstX.
decodeVOPDDstYOp(MCInst & Inst,unsigned Val) const1587145449b1SDimitry Andric MCOperand AMDGPUDisassembler::decodeVOPDDstYOp(MCInst &Inst,
1588145449b1SDimitry Andric unsigned Val) const {
1589145449b1SDimitry Andric int VDstXInd =
1590145449b1SDimitry Andric AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
1591145449b1SDimitry Andric assert(VDstXInd != -1);
1592145449b1SDimitry Andric assert(Inst.getOperand(VDstXInd).isReg());
1593145449b1SDimitry Andric unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
1594145449b1SDimitry Andric Val |= ~XDstReg & 1;
1595145449b1SDimitry Andric auto Width = llvm::AMDGPUDisassembler::OPW32;
1596145449b1SDimitry Andric return createRegOperand(getVgprClassId(Width), Val);
1597145449b1SDimitry Andric }
1598145449b1SDimitry Andric
decodeSpecialReg32(unsigned Val) const159901095a5dSDimitry Andric MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
160001095a5dSDimitry Andric using namespace AMDGPU;
1601044eb2f6SDimitry Andric
160201095a5dSDimitry Andric switch (Val) {
1603e3b55780SDimitry Andric // clang-format off
1604044eb2f6SDimitry Andric case 102: return createRegOperand(FLAT_SCR_LO);
1605044eb2f6SDimitry Andric case 103: return createRegOperand(FLAT_SCR_HI);
1606eb11fae6SDimitry Andric case 104: return createRegOperand(XNACK_MASK_LO);
1607eb11fae6SDimitry Andric case 105: return createRegOperand(XNACK_MASK_HI);
160801095a5dSDimitry Andric case 106: return createRegOperand(VCC_LO);
160901095a5dSDimitry Andric case 107: return createRegOperand(VCC_HI);
1610e6d15924SDimitry Andric case 108: return createRegOperand(TBA_LO);
1611e6d15924SDimitry Andric case 109: return createRegOperand(TBA_HI);
1612e6d15924SDimitry Andric case 110: return createRegOperand(TMA_LO);
1613e6d15924SDimitry Andric case 111: return createRegOperand(TMA_HI);
1614145449b1SDimitry Andric case 124:
1615145449b1SDimitry Andric return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
1616145449b1SDimitry Andric case 125:
1617145449b1SDimitry Andric return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
161801095a5dSDimitry Andric case 126: return createRegOperand(EXEC_LO);
161901095a5dSDimitry Andric case 127: return createRegOperand(EXEC_HI);
1620e3b55780SDimitry Andric case 235: return createRegOperand(SRC_SHARED_BASE_LO);
1621e3b55780SDimitry Andric case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
1622e3b55780SDimitry Andric case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
1623e3b55780SDimitry Andric case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
1624e6d15924SDimitry Andric case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1625e6d15924SDimitry Andric case 251: return createRegOperand(SRC_VCCZ);
1626e6d15924SDimitry Andric case 252: return createRegOperand(SRC_EXECZ);
1627e6d15924SDimitry Andric case 253: return createRegOperand(SRC_SCC);
1628e6d15924SDimitry Andric case 254: return createRegOperand(LDS_DIRECT);
162901095a5dSDimitry Andric default: break;
1630e3b55780SDimitry Andric // clang-format on
163101095a5dSDimitry Andric }
163201095a5dSDimitry Andric return errOperand(Val, "unknown operand encoding " + Twine(Val));
163301095a5dSDimitry Andric }
163401095a5dSDimitry Andric
decodeSpecialReg64(unsigned Val) const163501095a5dSDimitry Andric MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
163601095a5dSDimitry Andric using namespace AMDGPU;
1637044eb2f6SDimitry Andric
163801095a5dSDimitry Andric switch (Val) {
1639044eb2f6SDimitry Andric case 102: return createRegOperand(FLAT_SCR);
1640eb11fae6SDimitry Andric case 104: return createRegOperand(XNACK_MASK);
164101095a5dSDimitry Andric case 106: return createRegOperand(VCC);
1642e6d15924SDimitry Andric case 108: return createRegOperand(TBA);
1643e6d15924SDimitry Andric case 110: return createRegOperand(TMA);
1644145449b1SDimitry Andric case 124:
1645145449b1SDimitry Andric if (isGFX11Plus())
1646145449b1SDimitry Andric return createRegOperand(SGPR_NULL);
1647145449b1SDimitry Andric break;
1648145449b1SDimitry Andric case 125:
1649145449b1SDimitry Andric if (!isGFX11Plus())
1650145449b1SDimitry Andric return createRegOperand(SGPR_NULL);
1651145449b1SDimitry Andric break;
165201095a5dSDimitry Andric case 126: return createRegOperand(EXEC);
1653e6d15924SDimitry Andric case 235: return createRegOperand(SRC_SHARED_BASE);
1654e6d15924SDimitry Andric case 236: return createRegOperand(SRC_SHARED_LIMIT);
1655e6d15924SDimitry Andric case 237: return createRegOperand(SRC_PRIVATE_BASE);
1656e6d15924SDimitry Andric case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
1657e6d15924SDimitry Andric case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
1658e6d15924SDimitry Andric case 251: return createRegOperand(SRC_VCCZ);
1659e6d15924SDimitry Andric case 252: return createRegOperand(SRC_EXECZ);
1660e6d15924SDimitry Andric case 253: return createRegOperand(SRC_SCC);
166101095a5dSDimitry Andric default: break;
166201095a5dSDimitry Andric }
166301095a5dSDimitry Andric return errOperand(Val, "unknown operand encoding " + Twine(Val));
166401095a5dSDimitry Andric }
166501095a5dSDimitry Andric
1666ac9a064cSDimitry Andric MCOperand
decodeSDWASrc(const OpWidthTy Width,const unsigned Val,unsigned ImmWidth,AMDGPU::OperandSemantics Sema) const1667ac9a064cSDimitry Andric AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, const unsigned Val,
1668ac9a064cSDimitry Andric unsigned ImmWidth,
1669ac9a064cSDimitry Andric AMDGPU::OperandSemantics Sema) const {
1670ab44ce3dSDimitry Andric using namespace AMDGPU::SDWA;
1671eb11fae6SDimitry Andric using namespace AMDGPU::EncValues;
1672ab44ce3dSDimitry Andric
16737fa27ce4SDimitry Andric if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
16747fa27ce4SDimitry Andric STI.hasFeature(AMDGPU::FeatureGFX10)) {
1675e6d15924SDimitry Andric // XXX: cast to int is needed to avoid stupid warning:
16769df3605dSDimitry Andric // compare with unsigned is always true
1677e6d15924SDimitry Andric if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
1678ab44ce3dSDimitry Andric Val <= SDWA9EncValues::SRC_VGPR_MAX) {
1679ab44ce3dSDimitry Andric return createRegOperand(getVgprClassId(Width),
1680ab44ce3dSDimitry Andric Val - SDWA9EncValues::SRC_VGPR_MIN);
1681ab44ce3dSDimitry Andric }
1682ab44ce3dSDimitry Andric if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
1683b60736ecSDimitry Andric Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
1684e6d15924SDimitry Andric : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
1685ab44ce3dSDimitry Andric return createSRegOperand(getSgprClassId(Width),
1686ab44ce3dSDimitry Andric Val - SDWA9EncValues::SRC_SGPR_MIN);
1687ab44ce3dSDimitry Andric }
1688044eb2f6SDimitry Andric if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
1689044eb2f6SDimitry Andric Val <= SDWA9EncValues::SRC_TTMP_MAX) {
1690044eb2f6SDimitry Andric return createSRegOperand(getTtmpClassId(Width),
1691044eb2f6SDimitry Andric Val - SDWA9EncValues::SRC_TTMP_MIN);
1692044eb2f6SDimitry Andric }
1693ab44ce3dSDimitry Andric
1694eb11fae6SDimitry Andric const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
1695eb11fae6SDimitry Andric
1696eb11fae6SDimitry Andric if (INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX)
1697eb11fae6SDimitry Andric return decodeIntImmed(SVal);
1698eb11fae6SDimitry Andric
1699eb11fae6SDimitry Andric if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
1700ac9a064cSDimitry Andric return decodeFPImmed(ImmWidth, SVal, Sema);
1701eb11fae6SDimitry Andric
1702eb11fae6SDimitry Andric return decodeSpecialReg32(SVal);
170308bbd35aSDimitry Andric }
1704ac9a064cSDimitry Andric if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands))
1705ac9a064cSDimitry Andric return createRegOperand(getVgprClassId(Width), Val);
170608bbd35aSDimitry Andric llvm_unreachable("unsupported target");
1707ab44ce3dSDimitry Andric }
1708ab44ce3dSDimitry Andric
decodeSDWASrc16(unsigned Val) const170908bbd35aSDimitry Andric MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const {
1710ac9a064cSDimitry Andric return decodeSDWASrc(OPW16, Val, 16, AMDGPU::OperandSemantics::FP16);
1711ab44ce3dSDimitry Andric }
1712ab44ce3dSDimitry Andric
decodeSDWASrc32(unsigned Val) const171308bbd35aSDimitry Andric MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const {
1714ac9a064cSDimitry Andric return decodeSDWASrc(OPW32, Val, 32, AMDGPU::OperandSemantics::FP32);
1715ab44ce3dSDimitry Andric }
1716ab44ce3dSDimitry Andric
decodeSDWAVopcDst(unsigned Val) const171708bbd35aSDimitry Andric MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
1718ab44ce3dSDimitry Andric using namespace AMDGPU::SDWA;
1719ab44ce3dSDimitry Andric
17207fa27ce4SDimitry Andric assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
17217fa27ce4SDimitry Andric STI.hasFeature(AMDGPU::FeatureGFX10)) &&
1722e6d15924SDimitry Andric "SDWAVopcDst should be present only on GFX9+");
1723e6d15924SDimitry Andric
17247fa27ce4SDimitry Andric bool IsWave64 = STI.hasFeature(AMDGPU::FeatureWavefrontSize64);
1725e6d15924SDimitry Andric
1726ab44ce3dSDimitry Andric if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
1727ab44ce3dSDimitry Andric Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
1728044eb2f6SDimitry Andric
1729044eb2f6SDimitry Andric int TTmpIdx = getTTmpIdx(Val);
1730044eb2f6SDimitry Andric if (TTmpIdx >= 0) {
17311d5ae102SDimitry Andric auto TTmpClsId = getTtmpClassId(IsWave64 ? OPW64 : OPW32);
17321d5ae102SDimitry Andric return createSRegOperand(TTmpClsId, TTmpIdx);
1733ac9a064cSDimitry Andric }
1734ac9a064cSDimitry Andric if (Val > SGPR_MAX) {
1735ac9a064cSDimitry Andric return IsWave64 ? decodeSpecialReg64(Val) : decodeSpecialReg32(Val);
1736ac9a064cSDimitry Andric }
1737e6d15924SDimitry Andric return createSRegOperand(getSgprClassId(IsWave64 ? OPW64 : OPW32), Val);
1738ab44ce3dSDimitry Andric }
1739e6d15924SDimitry Andric return createRegOperand(IsWave64 ? AMDGPU::VCC : AMDGPU::VCC_LO);
1740ab44ce3dSDimitry Andric }
1741ab44ce3dSDimitry Andric
decodeBoolReg(unsigned Val) const1742e6d15924SDimitry Andric MCOperand AMDGPUDisassembler::decodeBoolReg(unsigned Val) const {
17437fa27ce4SDimitry Andric return STI.hasFeature(AMDGPU::FeatureWavefrontSize64)
17447fa27ce4SDimitry Andric ? decodeSrcOp(OPW64, Val)
17457fa27ce4SDimitry Andric : decodeSrcOp(OPW32, Val);
1746e6d15924SDimitry Andric }
1747e6d15924SDimitry Andric
decodeSplitBarrier(unsigned Val) const1748312c0ed1SDimitry Andric MCOperand AMDGPUDisassembler::decodeSplitBarrier(unsigned Val) const {
1749312c0ed1SDimitry Andric return decodeSrcOp(OPW32, Val);
1750312c0ed1SDimitry Andric }
1751312c0ed1SDimitry Andric
decodeDpp8FI(unsigned Val) const1752ac9a064cSDimitry Andric MCOperand AMDGPUDisassembler::decodeDpp8FI(unsigned Val) const {
1753ac9a064cSDimitry Andric if (Val != AMDGPU::DPP::DPP8_FI_0 && Val != AMDGPU::DPP::DPP8_FI_1)
1754ac9a064cSDimitry Andric return MCOperand();
1755ac9a064cSDimitry Andric return MCOperand::createImm(Val);
1756ac9a064cSDimitry Andric }
1757ac9a064cSDimitry Andric
decodeVersionImm(unsigned Imm) const1758ac9a064cSDimitry Andric MCOperand AMDGPUDisassembler::decodeVersionImm(unsigned Imm) const {
1759ac9a064cSDimitry Andric using VersionField = AMDGPU::EncodingField<7, 0>;
1760ac9a064cSDimitry Andric using W64Bit = AMDGPU::EncodingBit<13>;
1761ac9a064cSDimitry Andric using W32Bit = AMDGPU::EncodingBit<14>;
1762ac9a064cSDimitry Andric using MDPBit = AMDGPU::EncodingBit<15>;
1763ac9a064cSDimitry Andric using Encoding = AMDGPU::EncodingFields<VersionField, W64Bit, W32Bit, MDPBit>;
1764ac9a064cSDimitry Andric
1765ac9a064cSDimitry Andric auto [Version, W64, W32, MDP] = Encoding::decode(Imm);
1766ac9a064cSDimitry Andric
1767ac9a064cSDimitry Andric // Decode into a plain immediate if any unused bits are raised.
1768ac9a064cSDimitry Andric if (Encoding::encode(Version, W64, W32, MDP) != Imm)
1769ac9a064cSDimitry Andric return MCOperand::createImm(Imm);
1770ac9a064cSDimitry Andric
1771ac9a064cSDimitry Andric const auto &Versions = AMDGPU::UCVersion::getGFXVersions();
1772ac9a064cSDimitry Andric auto I = find_if(Versions,
1773ac9a064cSDimitry Andric [Version = Version](const AMDGPU::UCVersion::GFXVersion &V) {
1774ac9a064cSDimitry Andric return V.Code == Version;
1775ac9a064cSDimitry Andric });
1776ac9a064cSDimitry Andric MCContext &Ctx = getContext();
1777ac9a064cSDimitry Andric const MCExpr *E;
1778ac9a064cSDimitry Andric if (I == Versions.end())
1779ac9a064cSDimitry Andric E = MCConstantExpr::create(Version, Ctx);
1780ac9a064cSDimitry Andric else
1781ac9a064cSDimitry Andric E = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(I->Symbol), Ctx);
1782ac9a064cSDimitry Andric
1783ac9a064cSDimitry Andric if (W64)
1784ac9a064cSDimitry Andric E = MCBinaryExpr::createOr(E, UCVersionW64Expr, Ctx);
1785ac9a064cSDimitry Andric if (W32)
1786ac9a064cSDimitry Andric E = MCBinaryExpr::createOr(E, UCVersionW32Expr, Ctx);
1787ac9a064cSDimitry Andric if (MDP)
1788ac9a064cSDimitry Andric E = MCBinaryExpr::createOr(E, UCVersionMDPExpr, Ctx);
1789ac9a064cSDimitry Andric
1790ac9a064cSDimitry Andric return MCOperand::createExpr(E);
1791ac9a064cSDimitry Andric }
1792ac9a064cSDimitry Andric
isVI() const1793044eb2f6SDimitry Andric bool AMDGPUDisassembler::isVI() const {
17947fa27ce4SDimitry Andric return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
1795044eb2f6SDimitry Andric }
1796044eb2f6SDimitry Andric
isGFX9() const1797b60736ecSDimitry Andric bool AMDGPUDisassembler::isGFX9() const { return AMDGPU::isGFX9(STI); }
1798b60736ecSDimitry Andric
isGFX90A() const1799344a3780SDimitry Andric bool AMDGPUDisassembler::isGFX90A() const {
18007fa27ce4SDimitry Andric return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
1801344a3780SDimitry Andric }
1802344a3780SDimitry Andric
isGFX9Plus() const1803b60736ecSDimitry Andric bool AMDGPUDisassembler::isGFX9Plus() const { return AMDGPU::isGFX9Plus(STI); }
1804b60736ecSDimitry Andric
isGFX10() const1805b60736ecSDimitry Andric bool AMDGPUDisassembler::isGFX10() const { return AMDGPU::isGFX10(STI); }
1806b60736ecSDimitry Andric
isGFX10Plus() const1807b60736ecSDimitry Andric bool AMDGPUDisassembler::isGFX10Plus() const {
1808b60736ecSDimitry Andric return AMDGPU::isGFX10Plus(STI);
1809044eb2f6SDimitry Andric }
1810044eb2f6SDimitry Andric
isGFX11() const1811145449b1SDimitry Andric bool AMDGPUDisassembler::isGFX11() const {
18127fa27ce4SDimitry Andric return STI.hasFeature(AMDGPU::FeatureGFX11);
1813145449b1SDimitry Andric }
1814145449b1SDimitry Andric
isGFX11Plus() const1815145449b1SDimitry Andric bool AMDGPUDisassembler::isGFX11Plus() const {
1816145449b1SDimitry Andric return AMDGPU::isGFX11Plus(STI);
1817145449b1SDimitry Andric }
1818145449b1SDimitry Andric
isGFX12() const1819ac9a064cSDimitry Andric bool AMDGPUDisassembler::isGFX12() const {
1820ac9a064cSDimitry Andric return STI.hasFeature(AMDGPU::FeatureGFX12);
1821ac9a064cSDimitry Andric }
1822ac9a064cSDimitry Andric
isGFX12Plus() const1823b1c73532SDimitry Andric bool AMDGPUDisassembler::isGFX12Plus() const {
1824b1c73532SDimitry Andric return AMDGPU::isGFX12Plus(STI);
1825b1c73532SDimitry Andric }
1826145449b1SDimitry Andric
hasArchitectedFlatScratch() const1827344a3780SDimitry Andric bool AMDGPUDisassembler::hasArchitectedFlatScratch() const {
18287fa27ce4SDimitry Andric return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
1829344a3780SDimitry Andric }
1830344a3780SDimitry Andric
hasKernargPreload() const1831b1c73532SDimitry Andric bool AMDGPUDisassembler::hasKernargPreload() const {
1832b1c73532SDimitry Andric return AMDGPU::hasKernargPreload(STI);
1833b1c73532SDimitry Andric }
1834b1c73532SDimitry Andric
1835b60736ecSDimitry Andric //===----------------------------------------------------------------------===//
1836b60736ecSDimitry Andric // AMDGPU specific symbol handling
1837b60736ecSDimitry Andric //===----------------------------------------------------------------------===//
1838ac9a064cSDimitry Andric
1839ac9a064cSDimitry Andric /// Print a string describing the reserved bit range specified by Mask with
1840ac9a064cSDimitry Andric /// offset BaseBytes for use in error comments. Mask is a single continuous
1841ac9a064cSDimitry Andric /// range of 1s surrounded by zeros. The format here is meant to align with the
1842ac9a064cSDimitry Andric /// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html.
getBitRangeFromMask(uint32_t Mask,unsigned BaseBytes)1843ac9a064cSDimitry Andric static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) {
1844ac9a064cSDimitry Andric SmallString<32> Result;
1845ac9a064cSDimitry Andric raw_svector_ostream S(Result);
1846ac9a064cSDimitry Andric
1847ac9a064cSDimitry Andric int TrailingZeros = llvm::countr_zero(Mask);
1848ac9a064cSDimitry Andric int PopCount = llvm::popcount(Mask);
1849ac9a064cSDimitry Andric
1850ac9a064cSDimitry Andric if (PopCount == 1) {
1851ac9a064cSDimitry Andric S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
1852ac9a064cSDimitry Andric } else {
1853ac9a064cSDimitry Andric S << "bits in range ("
1854ac9a064cSDimitry Andric << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
1855ac9a064cSDimitry Andric << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
1856ac9a064cSDimitry Andric }
1857ac9a064cSDimitry Andric
1858ac9a064cSDimitry Andric return Result;
1859ac9a064cSDimitry Andric }
1860ac9a064cSDimitry Andric
18617fa27ce4SDimitry Andric #define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
1862b60736ecSDimitry Andric #define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
1863b60736ecSDimitry Andric do { \
18647fa27ce4SDimitry Andric KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
18657fa27ce4SDimitry Andric } while (0)
18667fa27ce4SDimitry Andric #define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
18677fa27ce4SDimitry Andric do { \
18687fa27ce4SDimitry Andric KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
18697fa27ce4SDimitry Andric << GET_FIELD(MASK) << '\n'; \
1870b60736ecSDimitry Andric } while (0)
1871b60736ecSDimitry Andric
1872ac9a064cSDimitry Andric #define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
1873ac9a064cSDimitry Andric do { \
1874ac9a064cSDimitry Andric if (FourByteBuffer & (MASK)) { \
1875ac9a064cSDimitry Andric return createStringError(std::errc::invalid_argument, \
1876ac9a064cSDimitry Andric "kernel descriptor " DESC \
1877ac9a064cSDimitry Andric " reserved %s set" MSG, \
1878ac9a064cSDimitry Andric getBitRangeFromMask((MASK), 0).c_str()); \
1879ac9a064cSDimitry Andric } \
1880ac9a064cSDimitry Andric } while (0)
1881ac9a064cSDimitry Andric
1882ac9a064cSDimitry Andric #define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
1883ac9a064cSDimitry Andric #define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
1884ac9a064cSDimitry Andric CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
1885ac9a064cSDimitry Andric #define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
1886ac9a064cSDimitry Andric CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
1887ac9a064cSDimitry Andric #define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
1888ac9a064cSDimitry Andric CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
1889ac9a064cSDimitry Andric
1890b60736ecSDimitry Andric // NOLINTNEXTLINE(readability-identifier-naming)
decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer,raw_string_ostream & KdStream) const1891ac9a064cSDimitry Andric Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
1892b60736ecSDimitry Andric uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
1893b60736ecSDimitry Andric using namespace amdhsa;
1894b60736ecSDimitry Andric StringRef Indent = "\t";
1895b60736ecSDimitry Andric
1896b60736ecSDimitry Andric // We cannot accurately backward compute #VGPRs used from
1897b60736ecSDimitry Andric // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
1898b60736ecSDimitry Andric // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
1899b60736ecSDimitry Andric // simply calculate the inverse of what the assembler does.
1900b60736ecSDimitry Andric
1901b60736ecSDimitry Andric uint32_t GranulatedWorkitemVGPRCount =
19027fa27ce4SDimitry Andric GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
1903b60736ecSDimitry Andric
19047fa27ce4SDimitry Andric uint32_t NextFreeVGPR =
19057fa27ce4SDimitry Andric (GranulatedWorkitemVGPRCount + 1) *
19067fa27ce4SDimitry Andric AMDGPU::IsaInfo::getVGPREncodingGranule(&STI, EnableWavefrontSize32);
1907b60736ecSDimitry Andric
1908b60736ecSDimitry Andric KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
1909b60736ecSDimitry Andric
1910b60736ecSDimitry Andric // We cannot backward compute values used to calculate
1911b60736ecSDimitry Andric // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
1912b60736ecSDimitry Andric // directives can't be computed:
1913b60736ecSDimitry Andric // .amdhsa_reserve_vcc
1914b60736ecSDimitry Andric // .amdhsa_reserve_flat_scratch
1915b60736ecSDimitry Andric // .amdhsa_reserve_xnack_mask
1916b60736ecSDimitry Andric // They take their respective default values if not specified in the assembly.
1917b60736ecSDimitry Andric //
1918b60736ecSDimitry Andric // GRANULATED_WAVEFRONT_SGPR_COUNT
1919b60736ecSDimitry Andric // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
1920b60736ecSDimitry Andric //
1921b60736ecSDimitry Andric // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
1922b60736ecSDimitry Andric // are set to 0. So while disassembling we consider that:
1923b60736ecSDimitry Andric //
1924b60736ecSDimitry Andric // GRANULATED_WAVEFRONT_SGPR_COUNT
1925b60736ecSDimitry Andric // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
1926b60736ecSDimitry Andric //
1927b60736ecSDimitry Andric // The disassembler cannot recover the original values of those 3 directives.
1928b60736ecSDimitry Andric
1929b60736ecSDimitry Andric uint32_t GranulatedWavefrontSGPRCount =
19307fa27ce4SDimitry Andric GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
1931b60736ecSDimitry Andric
1932ac9a064cSDimitry Andric if (isGFX10Plus())
1933ac9a064cSDimitry Andric CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
1934ac9a064cSDimitry Andric "must be zero on gfx10+");
1935b60736ecSDimitry Andric
1936b60736ecSDimitry Andric uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
1937b60736ecSDimitry Andric AMDGPU::IsaInfo::getSGPREncodingGranule(&STI);
1938b60736ecSDimitry Andric
1939b60736ecSDimitry Andric KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
1940344a3780SDimitry Andric if (!hasArchitectedFlatScratch())
1941b60736ecSDimitry Andric KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
1942b60736ecSDimitry Andric KdStream << Indent << ".amdhsa_reserve_xnack_mask " << 0 << '\n';
1943b60736ecSDimitry Andric KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
1944b60736ecSDimitry Andric
1945ac9a064cSDimitry Andric CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
1946b60736ecSDimitry Andric
1947b60736ecSDimitry Andric PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
1948b60736ecSDimitry Andric COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
1949b60736ecSDimitry Andric PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
1950b60736ecSDimitry Andric COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
1951b60736ecSDimitry Andric PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
1952b60736ecSDimitry Andric COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
1953b60736ecSDimitry Andric PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
1954b60736ecSDimitry Andric COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
1955b60736ecSDimitry Andric
1956ac9a064cSDimitry Andric CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
1957b60736ecSDimitry Andric
1958312c0ed1SDimitry Andric if (!isGFX12Plus())
1959312c0ed1SDimitry Andric PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
1960312c0ed1SDimitry Andric COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
1961b60736ecSDimitry Andric
1962ac9a064cSDimitry Andric CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
1963b60736ecSDimitry Andric
1964312c0ed1SDimitry Andric if (!isGFX12Plus())
1965312c0ed1SDimitry Andric PRINT_DIRECTIVE(".amdhsa_ieee_mode",
1966312c0ed1SDimitry Andric COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
1967b60736ecSDimitry Andric
1968ac9a064cSDimitry Andric CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
1969ac9a064cSDimitry Andric CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
1970b60736ecSDimitry Andric
1971b1c73532SDimitry Andric if (isGFX9Plus())
1972b1c73532SDimitry Andric PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
1973b60736ecSDimitry Andric
1974b1c73532SDimitry Andric if (!isGFX9Plus())
1975ac9a064cSDimitry Andric CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
1976ac9a064cSDimitry Andric "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
1977ac9a064cSDimitry Andric
1978ac9a064cSDimitry Andric CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED1, "COMPUTE_PGM_RSRC1");
1979ac9a064cSDimitry Andric
1980b1c73532SDimitry Andric if (!isGFX10Plus())
1981ac9a064cSDimitry Andric CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2,
1982ac9a064cSDimitry Andric "COMPUTE_PGM_RSRC1", "must be zero pre-gfx10");
1983b60736ecSDimitry Andric
1984b60736ecSDimitry Andric if (isGFX10Plus()) {
1985b60736ecSDimitry Andric PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
1986b1c73532SDimitry Andric COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
1987b1c73532SDimitry Andric PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
1988b1c73532SDimitry Andric PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
1989b60736ecSDimitry Andric }
1990312c0ed1SDimitry Andric
1991312c0ed1SDimitry Andric if (isGFX12Plus())
1992312c0ed1SDimitry Andric PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
1993312c0ed1SDimitry Andric COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
1994312c0ed1SDimitry Andric
1995ac9a064cSDimitry Andric return true;
1996b60736ecSDimitry Andric }
1997b60736ecSDimitry Andric
1998b60736ecSDimitry Andric // NOLINTNEXTLINE(readability-identifier-naming)
decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer,raw_string_ostream & KdStream) const1999ac9a064cSDimitry Andric Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2(
2000b60736ecSDimitry Andric uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2001b60736ecSDimitry Andric using namespace amdhsa;
2002b60736ecSDimitry Andric StringRef Indent = "\t";
2003344a3780SDimitry Andric if (hasArchitectedFlatScratch())
2004344a3780SDimitry Andric PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
2005344a3780SDimitry Andric COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2006344a3780SDimitry Andric else
2007344a3780SDimitry Andric PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
2008b60736ecSDimitry Andric COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2009b60736ecSDimitry Andric PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
2010b60736ecSDimitry Andric COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2011b60736ecSDimitry Andric PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
2012b60736ecSDimitry Andric COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2013b60736ecSDimitry Andric PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
2014b60736ecSDimitry Andric COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2015b60736ecSDimitry Andric PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
2016b60736ecSDimitry Andric COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2017b60736ecSDimitry Andric PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
2018b60736ecSDimitry Andric COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2019b60736ecSDimitry Andric
2020ac9a064cSDimitry Andric CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH);
2021ac9a064cSDimitry Andric CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY);
2022ac9a064cSDimitry Andric CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE);
2023b60736ecSDimitry Andric
2024b60736ecSDimitry Andric PRINT_DIRECTIVE(
2025b60736ecSDimitry Andric ".amdhsa_exception_fp_ieee_invalid_op",
2026b60736ecSDimitry Andric COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2027b60736ecSDimitry Andric PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
2028b60736ecSDimitry Andric COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2029b60736ecSDimitry Andric PRINT_DIRECTIVE(
2030b60736ecSDimitry Andric ".amdhsa_exception_fp_ieee_div_zero",
2031b60736ecSDimitry Andric COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2032b60736ecSDimitry Andric PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
2033b60736ecSDimitry Andric COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2034b60736ecSDimitry Andric PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
2035b60736ecSDimitry Andric COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2036b60736ecSDimitry Andric PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
2037b60736ecSDimitry Andric COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2038b60736ecSDimitry Andric PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
2039b60736ecSDimitry Andric COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2040b60736ecSDimitry Andric
2041ac9a064cSDimitry Andric CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2");
2042b60736ecSDimitry Andric
2043ac9a064cSDimitry Andric return true;
2044b60736ecSDimitry Andric }
2045b60736ecSDimitry Andric
20467fa27ce4SDimitry Andric // NOLINTNEXTLINE(readability-identifier-naming)
decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer,raw_string_ostream & KdStream) const2047ac9a064cSDimitry Andric Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
20487fa27ce4SDimitry Andric uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
20497fa27ce4SDimitry Andric using namespace amdhsa;
20507fa27ce4SDimitry Andric StringRef Indent = "\t";
20517fa27ce4SDimitry Andric if (isGFX90A()) {
20527fa27ce4SDimitry Andric KdStream << Indent << ".amdhsa_accum_offset "
20537fa27ce4SDimitry Andric << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
20547fa27ce4SDimitry Andric << '\n';
2055ac9a064cSDimitry Andric
20567fa27ce4SDimitry Andric PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2057ac9a064cSDimitry Andric
2058ac9a064cSDimitry Andric CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0,
2059ac9a064cSDimitry Andric "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2060ac9a064cSDimitry Andric CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1,
2061ac9a064cSDimitry Andric "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
20627fa27ce4SDimitry Andric } else if (isGFX10Plus()) {
206377dbea07SDimitry Andric // Bits [0-3].
206477dbea07SDimitry Andric if (!isGFX12Plus()) {
20657fa27ce4SDimitry Andric if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
20667fa27ce4SDimitry Andric PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
206777dbea07SDimitry Andric COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
20687fa27ce4SDimitry Andric } else {
20697fa27ce4SDimitry Andric PRINT_PSEUDO_DIRECTIVE_COMMENT(
207077dbea07SDimitry Andric "SHARED_VGPR_COUNT",
207177dbea07SDimitry Andric COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
20727fa27ce4SDimitry Andric }
2073b1c73532SDimitry Andric } else {
2074ac9a064cSDimitry Andric CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0,
2075ac9a064cSDimitry Andric "COMPUTE_PGM_RSRC3",
2076ac9a064cSDimitry Andric "must be zero on gfx12+");
2077b1c73532SDimitry Andric }
2078b1c73532SDimitry Andric
207977dbea07SDimitry Andric // Bits [4-11].
208077dbea07SDimitry Andric if (isGFX11()) {
208177dbea07SDimitry Andric PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
208277dbea07SDimitry Andric COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
208377dbea07SDimitry Andric PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
208477dbea07SDimitry Andric COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
208577dbea07SDimitry Andric PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
208677dbea07SDimitry Andric COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
208777dbea07SDimitry Andric } else if (isGFX12Plus()) {
208877dbea07SDimitry Andric PRINT_PSEUDO_DIRECTIVE_COMMENT(
208977dbea07SDimitry Andric "INST_PREF_SIZE", COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
209077dbea07SDimitry Andric } else {
2091ac9a064cSDimitry Andric CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
2092ac9a064cSDimitry Andric "COMPUTE_PGM_RSRC3",
2093ac9a064cSDimitry Andric "must be zero on gfx10");
209477dbea07SDimitry Andric }
209577dbea07SDimitry Andric
209677dbea07SDimitry Andric // Bits [12].
2097ac9a064cSDimitry Andric CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2,
2098ac9a064cSDimitry Andric "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2099b1c73532SDimitry Andric
210077dbea07SDimitry Andric // Bits [13].
210177dbea07SDimitry Andric if (isGFX12Plus()) {
210277dbea07SDimitry Andric PRINT_PSEUDO_DIRECTIVE_COMMENT("GLG_EN",
210377dbea07SDimitry Andric COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
210477dbea07SDimitry Andric } else {
2105ac9a064cSDimitry Andric CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3,
2106ac9a064cSDimitry Andric "COMPUTE_PGM_RSRC3",
2107ac9a064cSDimitry Andric "must be zero on gfx10 or gfx11");
210877dbea07SDimitry Andric }
210977dbea07SDimitry Andric
211077dbea07SDimitry Andric // Bits [14-30].
2111ac9a064cSDimitry Andric CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED4,
2112ac9a064cSDimitry Andric "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
211377dbea07SDimitry Andric
211477dbea07SDimitry Andric // Bits [31].
2115b1c73532SDimitry Andric if (isGFX11Plus()) {
21167fa27ce4SDimitry Andric PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
211777dbea07SDimitry Andric COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2118b1c73532SDimitry Andric } else {
2119ac9a064cSDimitry Andric CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED5,
2120ac9a064cSDimitry Andric "COMPUTE_PGM_RSRC3",
2121ac9a064cSDimitry Andric "must be zero on gfx10");
2122b1c73532SDimitry Andric }
21237fa27ce4SDimitry Andric } else if (FourByteBuffer) {
2124ac9a064cSDimitry Andric return createStringError(
2125ac9a064cSDimitry Andric std::errc::invalid_argument,
2126ac9a064cSDimitry Andric "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
21277fa27ce4SDimitry Andric }
2128ac9a064cSDimitry Andric return true;
21297fa27ce4SDimitry Andric }
21307fa27ce4SDimitry Andric #undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2131b60736ecSDimitry Andric #undef PRINT_DIRECTIVE
21327fa27ce4SDimitry Andric #undef GET_FIELD
2133ac9a064cSDimitry Andric #undef CHECK_RESERVED_BITS_IMPL
2134ac9a064cSDimitry Andric #undef CHECK_RESERVED_BITS
2135ac9a064cSDimitry Andric #undef CHECK_RESERVED_BITS_MSG
2136ac9a064cSDimitry Andric #undef CHECK_RESERVED_BITS_DESC
2137ac9a064cSDimitry Andric #undef CHECK_RESERVED_BITS_DESC_MSG
2138b60736ecSDimitry Andric
2139ac9a064cSDimitry Andric /// Create an error object to return from onSymbolStart for reserved kernel
2140ac9a064cSDimitry Andric /// descriptor bits being set.
createReservedKDBitsError(uint32_t Mask,unsigned BaseBytes,const char * Msg="")2141ac9a064cSDimitry Andric static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes,
2142ac9a064cSDimitry Andric const char *Msg = "") {
2143ac9a064cSDimitry Andric return createStringError(
2144ac9a064cSDimitry Andric std::errc::invalid_argument, "kernel descriptor reserved %s set%s%s",
2145ac9a064cSDimitry Andric getBitRangeFromMask(Mask, BaseBytes).c_str(), *Msg ? ", " : "", Msg);
2146ac9a064cSDimitry Andric }
2147ac9a064cSDimitry Andric
2148ac9a064cSDimitry Andric /// Create an error object to return from onSymbolStart for reserved kernel
2149ac9a064cSDimitry Andric /// descriptor bytes being set.
createReservedKDBytesError(unsigned BaseInBytes,unsigned WidthInBytes)2150ac9a064cSDimitry Andric static Error createReservedKDBytesError(unsigned BaseInBytes,
2151ac9a064cSDimitry Andric unsigned WidthInBytes) {
2152ac9a064cSDimitry Andric // Create an error comment in the same format as the "Kernel Descriptor"
2153ac9a064cSDimitry Andric // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor .
2154ac9a064cSDimitry Andric return createStringError(
2155ac9a064cSDimitry Andric std::errc::invalid_argument,
2156ac9a064cSDimitry Andric "kernel descriptor reserved bits in range (%u:%u) set",
2157ac9a064cSDimitry Andric (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2158ac9a064cSDimitry Andric }
2159ac9a064cSDimitry Andric
decodeKernelDescriptorDirective(DataExtractor::Cursor & Cursor,ArrayRef<uint8_t> Bytes,raw_string_ostream & KdStream) const2160ac9a064cSDimitry Andric Expected<bool> AMDGPUDisassembler::decodeKernelDescriptorDirective(
2161b60736ecSDimitry Andric DataExtractor::Cursor &Cursor, ArrayRef<uint8_t> Bytes,
2162b60736ecSDimitry Andric raw_string_ostream &KdStream) const {
2163b60736ecSDimitry Andric #define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2164b60736ecSDimitry Andric do { \
2165b60736ecSDimitry Andric KdStream << Indent << DIRECTIVE " " \
2166b60736ecSDimitry Andric << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2167b60736ecSDimitry Andric } while (0)
2168b60736ecSDimitry Andric
2169b60736ecSDimitry Andric uint16_t TwoByteBuffer = 0;
2170b60736ecSDimitry Andric uint32_t FourByteBuffer = 0;
2171b60736ecSDimitry Andric
2172b60736ecSDimitry Andric StringRef ReservedBytes;
2173b60736ecSDimitry Andric StringRef Indent = "\t";
2174b60736ecSDimitry Andric
2175b60736ecSDimitry Andric assert(Bytes.size() == 64);
2176b60736ecSDimitry Andric DataExtractor DE(Bytes, /*IsLittleEndian=*/true, /*AddressSize=*/8);
2177b60736ecSDimitry Andric
2178b60736ecSDimitry Andric switch (Cursor.tell()) {
2179b60736ecSDimitry Andric case amdhsa::GROUP_SEGMENT_FIXED_SIZE_OFFSET:
2180b60736ecSDimitry Andric FourByteBuffer = DE.getU32(Cursor);
2181b60736ecSDimitry Andric KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2182b60736ecSDimitry Andric << '\n';
2183ac9a064cSDimitry Andric return true;
2184b60736ecSDimitry Andric
2185b60736ecSDimitry Andric case amdhsa::PRIVATE_SEGMENT_FIXED_SIZE_OFFSET:
2186b60736ecSDimitry Andric FourByteBuffer = DE.getU32(Cursor);
2187b60736ecSDimitry Andric KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2188b60736ecSDimitry Andric << FourByteBuffer << '\n';
2189ac9a064cSDimitry Andric return true;
2190b60736ecSDimitry Andric
2191344a3780SDimitry Andric case amdhsa::KERNARG_SIZE_OFFSET:
2192344a3780SDimitry Andric FourByteBuffer = DE.getU32(Cursor);
2193344a3780SDimitry Andric KdStream << Indent << ".amdhsa_kernarg_size "
2194344a3780SDimitry Andric << FourByteBuffer << '\n';
2195ac9a064cSDimitry Andric return true;
2196344a3780SDimitry Andric
2197b60736ecSDimitry Andric case amdhsa::RESERVED0_OFFSET:
2198344a3780SDimitry Andric // 4 reserved bytes, must be 0.
2199344a3780SDimitry Andric ReservedBytes = DE.getBytes(Cursor, 4);
2200344a3780SDimitry Andric for (int I = 0; I < 4; ++I) {
2201ac9a064cSDimitry Andric if (ReservedBytes[I] != 0)
2202ac9a064cSDimitry Andric return createReservedKDBytesError(amdhsa::RESERVED0_OFFSET, 4);
2203b60736ecSDimitry Andric }
2204ac9a064cSDimitry Andric return true;
2205b60736ecSDimitry Andric
2206b60736ecSDimitry Andric case amdhsa::KERNEL_CODE_ENTRY_BYTE_OFFSET_OFFSET:
2207b60736ecSDimitry Andric // KERNEL_CODE_ENTRY_BYTE_OFFSET
2208b60736ecSDimitry Andric // So far no directive controls this for Code Object V3, so simply skip for
2209b60736ecSDimitry Andric // disassembly.
2210b60736ecSDimitry Andric DE.skip(Cursor, 8);
2211ac9a064cSDimitry Andric return true;
2212b60736ecSDimitry Andric
2213b60736ecSDimitry Andric case amdhsa::RESERVED1_OFFSET:
2214b60736ecSDimitry Andric // 20 reserved bytes, must be 0.
2215b60736ecSDimitry Andric ReservedBytes = DE.getBytes(Cursor, 20);
2216b60736ecSDimitry Andric for (int I = 0; I < 20; ++I) {
2217ac9a064cSDimitry Andric if (ReservedBytes[I] != 0)
2218ac9a064cSDimitry Andric return createReservedKDBytesError(amdhsa::RESERVED1_OFFSET, 20);
2219b60736ecSDimitry Andric }
2220ac9a064cSDimitry Andric return true;
2221b60736ecSDimitry Andric
2222b60736ecSDimitry Andric case amdhsa::COMPUTE_PGM_RSRC3_OFFSET:
2223b60736ecSDimitry Andric FourByteBuffer = DE.getU32(Cursor);
22247fa27ce4SDimitry Andric return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2225b60736ecSDimitry Andric
2226b60736ecSDimitry Andric case amdhsa::COMPUTE_PGM_RSRC1_OFFSET:
2227b60736ecSDimitry Andric FourByteBuffer = DE.getU32(Cursor);
22287fa27ce4SDimitry Andric return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2229b60736ecSDimitry Andric
2230b60736ecSDimitry Andric case amdhsa::COMPUTE_PGM_RSRC2_OFFSET:
2231b60736ecSDimitry Andric FourByteBuffer = DE.getU32(Cursor);
22327fa27ce4SDimitry Andric return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2233b60736ecSDimitry Andric
2234b60736ecSDimitry Andric case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET:
2235b60736ecSDimitry Andric using namespace amdhsa;
2236b60736ecSDimitry Andric TwoByteBuffer = DE.getU16(Cursor);
2237b60736ecSDimitry Andric
2238344a3780SDimitry Andric if (!hasArchitectedFlatScratch())
2239b60736ecSDimitry Andric PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2240b60736ecSDimitry Andric KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2241b60736ecSDimitry Andric PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2242b60736ecSDimitry Andric KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2243b60736ecSDimitry Andric PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2244b60736ecSDimitry Andric KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2245b60736ecSDimitry Andric PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2246b60736ecSDimitry Andric KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2247b60736ecSDimitry Andric PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2248b60736ecSDimitry Andric KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2249344a3780SDimitry Andric if (!hasArchitectedFlatScratch())
2250b60736ecSDimitry Andric PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2251b60736ecSDimitry Andric KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2252b60736ecSDimitry Andric PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2253b60736ecSDimitry Andric KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2254b60736ecSDimitry Andric
2255b60736ecSDimitry Andric if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2256ac9a064cSDimitry Andric return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED0,
2257ac9a064cSDimitry Andric amdhsa::KERNEL_CODE_PROPERTIES_OFFSET);
2258b60736ecSDimitry Andric
2259b60736ecSDimitry Andric // Reserved for GFX9
2260b60736ecSDimitry Andric if (isGFX9() &&
2261b60736ecSDimitry Andric (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2262ac9a064cSDimitry Andric return createReservedKDBitsError(
2263ac9a064cSDimitry Andric KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2264ac9a064cSDimitry Andric amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, "must be zero on gfx9");
2265ac9a064cSDimitry Andric }
2266ac9a064cSDimitry Andric if (isGFX10Plus()) {
2267b60736ecSDimitry Andric PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2268b60736ecSDimitry Andric KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2269b60736ecSDimitry Andric }
2270b60736ecSDimitry Andric
2271ac9a064cSDimitry Andric if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
22724b4fe385SDimitry Andric PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
22734b4fe385SDimitry Andric KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
22744b4fe385SDimitry Andric
2275ac9a064cSDimitry Andric if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2276ac9a064cSDimitry Andric return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED1,
2277ac9a064cSDimitry Andric amdhsa::KERNEL_CODE_PROPERTIES_OFFSET);
2278ac9a064cSDimitry Andric }
2279b60736ecSDimitry Andric
2280ac9a064cSDimitry Andric return true;
2281b60736ecSDimitry Andric
2282b1c73532SDimitry Andric case amdhsa::KERNARG_PRELOAD_OFFSET:
2283b1c73532SDimitry Andric using namespace amdhsa;
2284b1c73532SDimitry Andric TwoByteBuffer = DE.getU16(Cursor);
2285b1c73532SDimitry Andric if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2286b1c73532SDimitry Andric PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2287b1c73532SDimitry Andric KERNARG_PRELOAD_SPEC_LENGTH);
2288b1c73532SDimitry Andric }
2289b1c73532SDimitry Andric
2290b1c73532SDimitry Andric if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2291b1c73532SDimitry Andric PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2292b1c73532SDimitry Andric KERNARG_PRELOAD_SPEC_OFFSET);
2293b1c73532SDimitry Andric }
2294ac9a064cSDimitry Andric return true;
2295b1c73532SDimitry Andric
2296b1c73532SDimitry Andric case amdhsa::RESERVED3_OFFSET:
2297b1c73532SDimitry Andric // 4 bytes from here are reserved, must be 0.
2298b1c73532SDimitry Andric ReservedBytes = DE.getBytes(Cursor, 4);
2299b1c73532SDimitry Andric for (int I = 0; I < 4; ++I) {
2300b60736ecSDimitry Andric if (ReservedBytes[I] != 0)
2301ac9a064cSDimitry Andric return createReservedKDBytesError(amdhsa::RESERVED3_OFFSET, 4);
2302b60736ecSDimitry Andric }
2303ac9a064cSDimitry Andric return true;
2304b60736ecSDimitry Andric
2305b60736ecSDimitry Andric default:
2306b60736ecSDimitry Andric llvm_unreachable("Unhandled index. Case statements cover everything.");
2307ac9a064cSDimitry Andric return true;
2308b60736ecSDimitry Andric }
2309b60736ecSDimitry Andric #undef PRINT_DIRECTIVE
2310b60736ecSDimitry Andric }
2311b60736ecSDimitry Andric
decodeKernelDescriptor(StringRef KdName,ArrayRef<uint8_t> Bytes,uint64_t KdAddress) const2312ac9a064cSDimitry Andric Expected<bool> AMDGPUDisassembler::decodeKernelDescriptor(
2313b60736ecSDimitry Andric StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2314ac9a064cSDimitry Andric
2315b60736ecSDimitry Andric // CP microcode requires the kernel descriptor to be 64 aligned.
2316b60736ecSDimitry Andric if (Bytes.size() != 64 || KdAddress % 64 != 0)
2317ac9a064cSDimitry Andric return createStringError(std::errc::invalid_argument,
2318ac9a064cSDimitry Andric "kernel descriptor must be 64-byte aligned");
2319b60736ecSDimitry Andric
23207fa27ce4SDimitry Andric // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
23217fa27ce4SDimitry Andric // requires us to know the setting of .amdhsa_wavefront_size32 in order to
23227fa27ce4SDimitry Andric // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
23237fa27ce4SDimitry Andric // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
23247fa27ce4SDimitry Andric // when required.
23257fa27ce4SDimitry Andric if (isGFX10Plus()) {
23267fa27ce4SDimitry Andric uint16_t KernelCodeProperties =
23277fa27ce4SDimitry Andric support::endian::read16(&Bytes[amdhsa::KERNEL_CODE_PROPERTIES_OFFSET],
2328b1c73532SDimitry Andric llvm::endianness::little);
23297fa27ce4SDimitry Andric EnableWavefrontSize32 =
23307fa27ce4SDimitry Andric AMDHSA_BITS_GET(KernelCodeProperties,
23317fa27ce4SDimitry Andric amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
23327fa27ce4SDimitry Andric }
23337fa27ce4SDimitry Andric
2334b60736ecSDimitry Andric std::string Kd;
2335b60736ecSDimitry Andric raw_string_ostream KdStream(Kd);
2336b60736ecSDimitry Andric KdStream << ".amdhsa_kernel " << KdName << '\n';
2337b60736ecSDimitry Andric
2338b60736ecSDimitry Andric DataExtractor::Cursor C(0);
2339b60736ecSDimitry Andric while (C && C.tell() < Bytes.size()) {
2340ac9a064cSDimitry Andric Expected<bool> Res = decodeKernelDescriptorDirective(C, Bytes, KdStream);
2341b60736ecSDimitry Andric
2342b60736ecSDimitry Andric cantFail(C.takeError());
2343b60736ecSDimitry Andric
2344ac9a064cSDimitry Andric if (!Res)
2345ac9a064cSDimitry Andric return Res;
2346b60736ecSDimitry Andric }
2347b60736ecSDimitry Andric KdStream << ".end_amdhsa_kernel\n";
2348b60736ecSDimitry Andric outs() << KdStream.str();
2349ac9a064cSDimitry Andric return true;
2350b60736ecSDimitry Andric }
2351b60736ecSDimitry Andric
onSymbolStart(SymbolInfoTy & Symbol,uint64_t & Size,ArrayRef<uint8_t> Bytes,uint64_t Address) const2352ac9a064cSDimitry Andric Expected<bool> AMDGPUDisassembler::onSymbolStart(SymbolInfoTy &Symbol,
2353ac9a064cSDimitry Andric uint64_t &Size,
2354ac9a064cSDimitry Andric ArrayRef<uint8_t> Bytes,
2355ac9a064cSDimitry Andric uint64_t Address) const {
2356b60736ecSDimitry Andric // Right now only kernel descriptor needs to be handled.
2357b60736ecSDimitry Andric // We ignore all other symbols for target specific handling.
2358b60736ecSDimitry Andric // TODO:
2359b60736ecSDimitry Andric // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2360b60736ecSDimitry Andric // Object V2 and V3 when symbols are marked protected.
2361b60736ecSDimitry Andric
2362b60736ecSDimitry Andric // amd_kernel_code_t for Code Object V2.
2363b60736ecSDimitry Andric if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2364b60736ecSDimitry Andric Size = 256;
2365ac9a064cSDimitry Andric return createStringError(std::errc::invalid_argument,
2366ac9a064cSDimitry Andric "code object v2 is not supported");
2367b60736ecSDimitry Andric }
2368b60736ecSDimitry Andric
2369b60736ecSDimitry Andric // Code Object V3 kernel descriptors.
2370b60736ecSDimitry Andric StringRef Name = Symbol.Name;
2371312c0ed1SDimitry Andric if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(StringRef(".kd"))) {
2372b60736ecSDimitry Andric Size = 64; // Size = 64 regardless of success or failure.
2373b60736ecSDimitry Andric return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
2374b60736ecSDimitry Andric }
2375ac9a064cSDimitry Andric
2376ac9a064cSDimitry Andric return false;
2377ac9a064cSDimitry Andric }
2378ac9a064cSDimitry Andric
createConstantSymbolExpr(StringRef Id,int64_t Val)2379ac9a064cSDimitry Andric const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id,
2380ac9a064cSDimitry Andric int64_t Val) {
2381ac9a064cSDimitry Andric MCContext &Ctx = getContext();
2382ac9a064cSDimitry Andric MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2383ac9a064cSDimitry Andric // Note: only set value to Val on a new symbol in case an dissassembler
2384ac9a064cSDimitry Andric // has already been initialized in this context.
2385ac9a064cSDimitry Andric if (!Sym->isVariable()) {
2386ac9a064cSDimitry Andric Sym->setVariableValue(MCConstantExpr::create(Val, Ctx));
2387ac9a064cSDimitry Andric } else {
2388ac9a064cSDimitry Andric int64_t Res = ~Val;
2389ac9a064cSDimitry Andric bool Valid = Sym->getVariableValue()->evaluateAsAbsolute(Res);
2390ac9a064cSDimitry Andric if (!Valid || Res != Val)
2391ac9a064cSDimitry Andric Ctx.reportWarning(SMLoc(), "unsupported redefinition of " + Id);
2392ac9a064cSDimitry Andric }
2393ac9a064cSDimitry Andric return MCSymbolRefExpr::create(Sym, Ctx);
2394e6d15924SDimitry Andric }
2395e6d15924SDimitry Andric
2396b915e9e0SDimitry Andric //===----------------------------------------------------------------------===//
2397b915e9e0SDimitry Andric // AMDGPUSymbolizer
2398b915e9e0SDimitry Andric //===----------------------------------------------------------------------===//
2399b915e9e0SDimitry Andric
2400b915e9e0SDimitry Andric // Try to find symbol name for specified label
tryAddingSymbolicOperand(MCInst & Inst,raw_ostream &,int64_t Value,uint64_t,bool IsBranch,uint64_t,uint64_t,uint64_t)2401145449b1SDimitry Andric bool AMDGPUSymbolizer::tryAddingSymbolicOperand(
2402145449b1SDimitry Andric MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2403145449b1SDimitry Andric uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2404145449b1SDimitry Andric uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2405b915e9e0SDimitry Andric
2406b915e9e0SDimitry Andric if (!IsBranch) {
2407b915e9e0SDimitry Andric return false;
2408b915e9e0SDimitry Andric }
2409b915e9e0SDimitry Andric
2410b915e9e0SDimitry Andric auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2411eb11fae6SDimitry Andric if (!Symbols)
2412eb11fae6SDimitry Andric return false;
2413eb11fae6SDimitry Andric
2414b60736ecSDimitry Andric auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
2415b60736ecSDimitry Andric return Val.Addr == static_cast<uint64_t>(Value) &&
2416b60736ecSDimitry Andric Val.Type == ELF::STT_NOTYPE;
2417b915e9e0SDimitry Andric });
2418b915e9e0SDimitry Andric if (Result != Symbols->end()) {
2419cfca06d7SDimitry Andric auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
2420b915e9e0SDimitry Andric const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
2421b915e9e0SDimitry Andric Inst.addOperand(MCOperand::createExpr(Add));
2422b915e9e0SDimitry Andric return true;
2423b915e9e0SDimitry Andric }
2424344a3780SDimitry Andric // Add to list of referenced addresses, so caller can synthesize a label.
2425344a3780SDimitry Andric ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
2426b915e9e0SDimitry Andric return false;
2427b915e9e0SDimitry Andric }
2428b915e9e0SDimitry Andric
tryAddingPcLoadReferenceComment(raw_ostream & cStream,int64_t Value,uint64_t Address)2429b915e9e0SDimitry Andric void AMDGPUSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
2430b915e9e0SDimitry Andric int64_t Value,
2431b915e9e0SDimitry Andric uint64_t Address) {
2432b915e9e0SDimitry Andric llvm_unreachable("unimplemented");
2433b915e9e0SDimitry Andric }
2434b915e9e0SDimitry Andric
2435b915e9e0SDimitry Andric //===----------------------------------------------------------------------===//
2436b915e9e0SDimitry Andric // Initialization
2437b915e9e0SDimitry Andric //===----------------------------------------------------------------------===//
2438b915e9e0SDimitry Andric
createAMDGPUSymbolizer(const Triple &,LLVMOpInfoCallback,LLVMSymbolLookupCallback,void * DisInfo,MCContext * Ctx,std::unique_ptr<MCRelocationInfo> && RelInfo)2439b915e9e0SDimitry Andric static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/,
2440b915e9e0SDimitry Andric LLVMOpInfoCallback /*GetOpInfo*/,
2441b915e9e0SDimitry Andric LLVMSymbolLookupCallback /*SymbolLookUp*/,
2442b915e9e0SDimitry Andric void *DisInfo,
2443b915e9e0SDimitry Andric MCContext *Ctx,
2444b915e9e0SDimitry Andric std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2445b915e9e0SDimitry Andric return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2446b915e9e0SDimitry Andric }
2447b915e9e0SDimitry Andric
createAMDGPUDisassembler(const Target & T,const MCSubtargetInfo & STI,MCContext & Ctx)244801095a5dSDimitry Andric static MCDisassembler *createAMDGPUDisassembler(const Target &T,
244901095a5dSDimitry Andric const MCSubtargetInfo &STI,
245001095a5dSDimitry Andric MCContext &Ctx) {
2451044eb2f6SDimitry Andric return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
245201095a5dSDimitry Andric }
245301095a5dSDimitry Andric
LLVMInitializeAMDGPUDisassembler()2454706b4fc4SDimitry Andric extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler() {
2455b915e9e0SDimitry Andric TargetRegistry::RegisterMCDisassembler(getTheGCNTarget(),
2456b915e9e0SDimitry Andric createAMDGPUDisassembler);
2457b915e9e0SDimitry Andric TargetRegistry::RegisterMCSymbolizer(getTheGCNTarget(),
2458b915e9e0SDimitry Andric createAMDGPUSymbolizer);
245901095a5dSDimitry Andric }
2460