xref: /src/contrib/llvm-project/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1050e163aSDimitry Andric //==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==//
2050e163aSDimitry Andric //
3e6d15924SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e6d15924SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e6d15924SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6050e163aSDimitry Andric //
7050e163aSDimitry Andric //===----------------------------------------------------------------------===//
8050e163aSDimitry Andric ///
9050e163aSDimitry Andric /// \file
10eb11fae6SDimitry Andric /// This file is part of the WebAssembly Disassembler.
11050e163aSDimitry Andric ///
12050e163aSDimitry Andric /// It contains code to translate the data produced by the decoder into
13050e163aSDimitry Andric /// MCInsts.
14050e163aSDimitry Andric ///
15050e163aSDimitry Andric //===----------------------------------------------------------------------===//
16050e163aSDimitry Andric 
177fa27ce4SDimitry Andric #include "MCTargetDesc/WebAssemblyMCTypeUtilities.h"
18e6d15924SDimitry Andric #include "TargetInfo/WebAssemblyTargetInfo.h"
19ac9a064cSDimitry Andric #include "llvm/BinaryFormat/Wasm.h"
20050e163aSDimitry Andric #include "llvm/MC/MCContext.h"
21145449b1SDimitry Andric #include "llvm/MC/MCDecoderOps.h"
2201095a5dSDimitry Andric #include "llvm/MC/MCDisassembler/MCDisassembler.h"
23050e163aSDimitry Andric #include "llvm/MC/MCInst.h"
24050e163aSDimitry Andric #include "llvm/MC/MCInstrInfo.h"
25050e163aSDimitry Andric #include "llvm/MC/MCSubtargetInfo.h"
26050e163aSDimitry Andric #include "llvm/MC/MCSymbol.h"
271d5ae102SDimitry Andric #include "llvm/MC/MCSymbolWasm.h"
28c0981da4SDimitry Andric #include "llvm/MC/TargetRegistry.h"
29b1c73532SDimitry Andric #include "llvm/Support/Casting.h"
30050e163aSDimitry Andric #include "llvm/Support/Endian.h"
31eb11fae6SDimitry Andric #include "llvm/Support/LEB128.h"
32eb11fae6SDimitry Andric 
33050e163aSDimitry Andric using namespace llvm;
34050e163aSDimitry Andric 
35050e163aSDimitry Andric #define DEBUG_TYPE "wasm-disassembler"
36050e163aSDimitry Andric 
37eb11fae6SDimitry Andric using DecodeStatus = MCDisassembler::DecodeStatus;
38eb11fae6SDimitry Andric 
39eb11fae6SDimitry Andric #include "WebAssemblyGenDisassemblerTables.inc"
40eb11fae6SDimitry Andric 
41050e163aSDimitry Andric namespace {
42d8e91e46SDimitry Andric static constexpr int WebAssemblyInstructionTableSize = 256;
43d8e91e46SDimitry Andric 
44050e163aSDimitry Andric class WebAssemblyDisassembler final : public MCDisassembler {
45050e163aSDimitry Andric   std::unique_ptr<const MCInstrInfo> MCII;
46050e163aSDimitry Andric 
47050e163aSDimitry Andric   DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
48050e163aSDimitry Andric                               ArrayRef<uint8_t> Bytes, uint64_t Address,
49050e163aSDimitry Andric                               raw_ostream &CStream) const override;
50ac9a064cSDimitry Andric 
51ac9a064cSDimitry Andric   Expected<bool> onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size,
52ac9a064cSDimitry Andric                                ArrayRef<uint8_t> Bytes,
53ac9a064cSDimitry Andric                                uint64_t Address) const override;
54050e163aSDimitry Andric 
55050e163aSDimitry Andric public:
WebAssemblyDisassembler(const MCSubtargetInfo & STI,MCContext & Ctx,std::unique_ptr<const MCInstrInfo> MCII)56050e163aSDimitry Andric   WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
57050e163aSDimitry Andric                           std::unique_ptr<const MCInstrInfo> MCII)
58050e163aSDimitry Andric       : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {}
59050e163aSDimitry Andric };
60050e163aSDimitry Andric } // end anonymous namespace
61050e163aSDimitry Andric 
createWebAssemblyDisassembler(const Target & T,const MCSubtargetInfo & STI,MCContext & Ctx)62050e163aSDimitry Andric static MCDisassembler *createWebAssemblyDisassembler(const Target &T,
63050e163aSDimitry Andric                                                      const MCSubtargetInfo &STI,
64050e163aSDimitry Andric                                                      MCContext &Ctx) {
65050e163aSDimitry Andric   std::unique_ptr<const MCInstrInfo> MCII(T.createMCInstrInfo());
66050e163aSDimitry Andric   return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII));
67050e163aSDimitry Andric }
68050e163aSDimitry Andric 
69706b4fc4SDimitry Andric extern "C" LLVM_EXTERNAL_VISIBILITY void
LLVMInitializeWebAssemblyDisassembler()70706b4fc4SDimitry Andric LLVMInitializeWebAssemblyDisassembler() {
71050e163aSDimitry Andric   // Register the disassembler for each target.
72b915e9e0SDimitry Andric   TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget32(),
73050e163aSDimitry Andric                                          createWebAssemblyDisassembler);
74b915e9e0SDimitry Andric   TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget64(),
75050e163aSDimitry Andric                                          createWebAssemblyDisassembler);
76050e163aSDimitry Andric }
77050e163aSDimitry Andric 
nextByte(ArrayRef<uint8_t> Bytes,uint64_t & Size)78eb11fae6SDimitry Andric static int nextByte(ArrayRef<uint8_t> Bytes, uint64_t &Size) {
79eb11fae6SDimitry Andric   if (Size >= Bytes.size())
80eb11fae6SDimitry Andric     return -1;
81eb11fae6SDimitry Andric   auto V = Bytes[Size];
82eb11fae6SDimitry Andric   Size++;
83eb11fae6SDimitry Andric   return V;
84eb11fae6SDimitry Andric }
85eb11fae6SDimitry Andric 
nextLEB(int64_t & Val,ArrayRef<uint8_t> Bytes,uint64_t & Size,bool Signed)86d8e91e46SDimitry Andric static bool nextLEB(int64_t &Val, ArrayRef<uint8_t> Bytes, uint64_t &Size,
87e6d15924SDimitry Andric                     bool Signed) {
88eb11fae6SDimitry Andric   unsigned N = 0;
89eb11fae6SDimitry Andric   const char *Error = nullptr;
90d8e91e46SDimitry Andric   Val = Signed ? decodeSLEB128(Bytes.data() + Size, &N,
91eb11fae6SDimitry Andric                                Bytes.data() + Bytes.size(), &Error)
92d8e91e46SDimitry Andric                : static_cast<int64_t>(decodeULEB128(Bytes.data() + Size, &N,
93d8e91e46SDimitry Andric                                                     Bytes.data() + Bytes.size(),
94d8e91e46SDimitry Andric                                                     &Error));
95eb11fae6SDimitry Andric   if (Error)
96eb11fae6SDimitry Andric     return false;
97eb11fae6SDimitry Andric   Size += N;
98d8e91e46SDimitry Andric   return true;
99d8e91e46SDimitry Andric }
100d8e91e46SDimitry Andric 
parseLEBImmediate(MCInst & MI,uint64_t & Size,ArrayRef<uint8_t> Bytes,bool Signed)101d8e91e46SDimitry Andric static bool parseLEBImmediate(MCInst &MI, uint64_t &Size,
102d8e91e46SDimitry Andric                               ArrayRef<uint8_t> Bytes, bool Signed) {
103d8e91e46SDimitry Andric   int64_t Val;
104d8e91e46SDimitry Andric   if (!nextLEB(Val, Bytes, Size, Signed))
105d8e91e46SDimitry Andric     return false;
106eb11fae6SDimitry Andric   MI.addOperand(MCOperand::createImm(Val));
107eb11fae6SDimitry Andric   return true;
108eb11fae6SDimitry Andric }
109eb11fae6SDimitry Andric 
110eb11fae6SDimitry Andric template <typename T>
parseImmediate(MCInst & MI,uint64_t & Size,ArrayRef<uint8_t> Bytes)111d8e91e46SDimitry Andric bool parseImmediate(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes) {
112eb11fae6SDimitry Andric   if (Size + sizeof(T) > Bytes.size())
113eb11fae6SDimitry Andric     return false;
114b1c73532SDimitry Andric   T Val =
115b1c73532SDimitry Andric       support::endian::read<T, llvm::endianness::little>(Bytes.data() + Size);
116eb11fae6SDimitry Andric   Size += sizeof(T);
117d8e91e46SDimitry Andric   if (std::is_floating_point<T>::value) {
118344a3780SDimitry Andric     MI.addOperand(
119344a3780SDimitry Andric         MCOperand::createDFPImm(bit_cast<uint64_t>(static_cast<double>(Val))));
120d8e91e46SDimitry Andric   } else {
121d8e91e46SDimitry Andric     MI.addOperand(MCOperand::createImm(static_cast<int64_t>(Val)));
122d8e91e46SDimitry Andric   }
123eb11fae6SDimitry Andric   return true;
124eb11fae6SDimitry Andric }
125eb11fae6SDimitry Andric 
onSymbolStart(SymbolInfoTy & Symbol,uint64_t & Size,ArrayRef<uint8_t> Bytes,uint64_t Address) const126ac9a064cSDimitry Andric Expected<bool> WebAssemblyDisassembler::onSymbolStart(SymbolInfoTy &Symbol,
127ac9a064cSDimitry Andric                                                       uint64_t &Size,
128e3b55780SDimitry Andric                                                       ArrayRef<uint8_t> Bytes,
129ac9a064cSDimitry Andric                                                       uint64_t Address) const {
130e6d15924SDimitry Andric   Size = 0;
131ac9a064cSDimitry Andric   if (Symbol.Type == wasm::WASM_SYMBOL_TYPE_SECTION) {
132e6d15924SDimitry Andric     // Start of a code section: we're parsing only the function count.
133e6d15924SDimitry Andric     int64_t FunctionCount;
134e6d15924SDimitry Andric     if (!nextLEB(FunctionCount, Bytes, Size, false))
135ac9a064cSDimitry Andric       return false;
136e6d15924SDimitry Andric     outs() << "        # " << FunctionCount << " functions in section.";
137e6d15924SDimitry Andric   } else {
138e6d15924SDimitry Andric     // Parse the start of a single function.
139e6d15924SDimitry Andric     int64_t BodySize, LocalEntryCount;
140e6d15924SDimitry Andric     if (!nextLEB(BodySize, Bytes, Size, false) ||
141e6d15924SDimitry Andric         !nextLEB(LocalEntryCount, Bytes, Size, false))
142ac9a064cSDimitry Andric       return false;
143e6d15924SDimitry Andric     if (LocalEntryCount) {
144e6d15924SDimitry Andric       outs() << "        .local ";
145e6d15924SDimitry Andric       for (int64_t I = 0; I < LocalEntryCount; I++) {
146e6d15924SDimitry Andric         int64_t Count, Type;
147e6d15924SDimitry Andric         if (!nextLEB(Count, Bytes, Size, false) ||
148e6d15924SDimitry Andric             !nextLEB(Type, Bytes, Size, false))
149ac9a064cSDimitry Andric           return false;
150e6d15924SDimitry Andric         for (int64_t J = 0; J < Count; J++) {
151e6d15924SDimitry Andric           if (I || J)
152e6d15924SDimitry Andric             outs() << ", ";
153e6d15924SDimitry Andric           outs() << WebAssembly::anyTypeToString(Type);
154e6d15924SDimitry Andric         }
155e6d15924SDimitry Andric       }
156e6d15924SDimitry Andric     }
157e6d15924SDimitry Andric   }
158e6d15924SDimitry Andric   outs() << "\n";
159ac9a064cSDimitry Andric   return true;
160e6d15924SDimitry Andric }
161e6d15924SDimitry Andric 
getInstruction(MCInst & MI,uint64_t & Size,ArrayRef<uint8_t> Bytes,uint64_t,raw_ostream & CS) const162050e163aSDimitry Andric MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
163050e163aSDimitry Andric     MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/,
164706b4fc4SDimitry Andric     raw_ostream &CS) const {
165eb11fae6SDimitry Andric   CommentStream = &CS;
166eb11fae6SDimitry Andric   Size = 0;
167d8e91e46SDimitry Andric   int Opc = nextByte(Bytes, Size);
168eb11fae6SDimitry Andric   if (Opc < 0)
169050e163aSDimitry Andric     return MCDisassembler::Fail;
170eb11fae6SDimitry Andric   const auto *WasmInst = &InstructionTable0[Opc];
171eb11fae6SDimitry Andric   // If this is a prefix byte, indirect to another table.
172eb11fae6SDimitry Andric   if (WasmInst->ET == ET_Prefix) {
173eb11fae6SDimitry Andric     WasmInst = nullptr;
174eb11fae6SDimitry Andric     // Linear search, so far only 2 entries.
175eb11fae6SDimitry Andric     for (auto PT = PrefixTable; PT->Table; PT++) {
176eb11fae6SDimitry Andric       if (PT->Prefix == Opc) {
177eb11fae6SDimitry Andric         WasmInst = PT->Table;
178eb11fae6SDimitry Andric         break;
179eb11fae6SDimitry Andric       }
180eb11fae6SDimitry Andric     }
181eb11fae6SDimitry Andric     if (!WasmInst)
182eb11fae6SDimitry Andric       return MCDisassembler::Fail;
183d8e91e46SDimitry Andric     int64_t PrefixedOpc;
184e6d15924SDimitry Andric     if (!nextLEB(PrefixedOpc, Bytes, Size, false))
185eb11fae6SDimitry Andric       return MCDisassembler::Fail;
186d8e91e46SDimitry Andric     if (PrefixedOpc < 0 || PrefixedOpc >= WebAssemblyInstructionTableSize)
187d8e91e46SDimitry Andric       return MCDisassembler::Fail;
188d8e91e46SDimitry Andric     WasmInst += PrefixedOpc;
189eb11fae6SDimitry Andric   }
190eb11fae6SDimitry Andric   if (WasmInst->ET == ET_Unused)
191eb11fae6SDimitry Andric     return MCDisassembler::Fail;
192eb11fae6SDimitry Andric   // At this point we must have a valid instruction to decode.
193eb11fae6SDimitry Andric   assert(WasmInst->ET == ET_Instruction);
194eb11fae6SDimitry Andric   MI.setOpcode(WasmInst->Opcode);
195eb11fae6SDimitry Andric   // Parse any operands.
196eb11fae6SDimitry Andric   for (uint8_t OPI = 0; OPI < WasmInst->NumOperands; OPI++) {
197d8e91e46SDimitry Andric     auto OT = OperandTable[WasmInst->OperandStart + OPI];
198d8e91e46SDimitry Andric     switch (OT) {
199eb11fae6SDimitry Andric     // ULEB operands:
200eb11fae6SDimitry Andric     case WebAssembly::OPERAND_BASIC_BLOCK:
201eb11fae6SDimitry Andric     case WebAssembly::OPERAND_LOCAL:
202eb11fae6SDimitry Andric     case WebAssembly::OPERAND_GLOBAL:
203eb11fae6SDimitry Andric     case WebAssembly::OPERAND_FUNCTION32:
204b60736ecSDimitry Andric     case WebAssembly::OPERAND_TABLE:
205eb11fae6SDimitry Andric     case WebAssembly::OPERAND_OFFSET32:
206cfca06d7SDimitry Andric     case WebAssembly::OPERAND_OFFSET64:
207eb11fae6SDimitry Andric     case WebAssembly::OPERAND_P2ALIGN:
208eb11fae6SDimitry Andric     case WebAssembly::OPERAND_TYPEINDEX:
209344a3780SDimitry Andric     case WebAssembly::OPERAND_TAG:
210eb11fae6SDimitry Andric     case MCOI::OPERAND_IMMEDIATE: {
211eb11fae6SDimitry Andric       if (!parseLEBImmediate(MI, Size, Bytes, false))
212eb11fae6SDimitry Andric         return MCDisassembler::Fail;
213eb11fae6SDimitry Andric       break;
214eb11fae6SDimitry Andric     }
215eb11fae6SDimitry Andric     // SLEB operands:
216eb11fae6SDimitry Andric     case WebAssembly::OPERAND_I32IMM:
217d8e91e46SDimitry Andric     case WebAssembly::OPERAND_I64IMM: {
218eb11fae6SDimitry Andric       if (!parseLEBImmediate(MI, Size, Bytes, true))
219eb11fae6SDimitry Andric         return MCDisassembler::Fail;
220eb11fae6SDimitry Andric       break;
221eb11fae6SDimitry Andric     }
2221d5ae102SDimitry Andric     // block_type operands:
223d8e91e46SDimitry Andric     case WebAssembly::OPERAND_SIGNATURE: {
2241d5ae102SDimitry Andric       int64_t Val;
2251d5ae102SDimitry Andric       uint64_t PrevSize = Size;
2261d5ae102SDimitry Andric       if (!nextLEB(Val, Bytes, Size, true))
227d8e91e46SDimitry Andric         return MCDisassembler::Fail;
2281d5ae102SDimitry Andric       if (Val < 0) {
2291d5ae102SDimitry Andric         // Negative values are single septet value types or empty types
2301d5ae102SDimitry Andric         if (Size != PrevSize + 1) {
2311d5ae102SDimitry Andric           MI.addOperand(
2321d5ae102SDimitry Andric               MCOperand::createImm(int64_t(WebAssembly::BlockType::Invalid)));
2331d5ae102SDimitry Andric         } else {
2341d5ae102SDimitry Andric           MI.addOperand(MCOperand::createImm(Val & 0x7f));
2351d5ae102SDimitry Andric         }
2361d5ae102SDimitry Andric       } else {
2371d5ae102SDimitry Andric         // We don't have access to the signature, so create a symbol without one
2381d5ae102SDimitry Andric         MCSymbol *Sym = getContext().createTempSymbol("typeindex", true);
2391d5ae102SDimitry Andric         auto *WasmSym = cast<MCSymbolWasm>(Sym);
2401d5ae102SDimitry Andric         WasmSym->setType(wasm::WASM_SYMBOL_TYPE_FUNCTION);
2411d5ae102SDimitry Andric         const MCExpr *Expr = MCSymbolRefExpr::create(
2421d5ae102SDimitry Andric             WasmSym, MCSymbolRefExpr::VK_WASM_TYPEINDEX, getContext());
2431d5ae102SDimitry Andric         MI.addOperand(MCOperand::createExpr(Expr));
2441d5ae102SDimitry Andric       }
245d8e91e46SDimitry Andric       break;
246d8e91e46SDimitry Andric     }
247eb11fae6SDimitry Andric     // FP operands.
248eb11fae6SDimitry Andric     case WebAssembly::OPERAND_F32IMM: {
249d8e91e46SDimitry Andric       if (!parseImmediate<float>(MI, Size, Bytes))
250eb11fae6SDimitry Andric         return MCDisassembler::Fail;
251eb11fae6SDimitry Andric       break;
252eb11fae6SDimitry Andric     }
253eb11fae6SDimitry Andric     case WebAssembly::OPERAND_F64IMM: {
254d8e91e46SDimitry Andric       if (!parseImmediate<double>(MI, Size, Bytes))
255eb11fae6SDimitry Andric         return MCDisassembler::Fail;
256eb11fae6SDimitry Andric       break;
257eb11fae6SDimitry Andric     }
258d8e91e46SDimitry Andric     // Vector lane operands (not LEB encoded).
259d8e91e46SDimitry Andric     case WebAssembly::OPERAND_VEC_I8IMM: {
260d8e91e46SDimitry Andric       if (!parseImmediate<uint8_t>(MI, Size, Bytes))
261d8e91e46SDimitry Andric         return MCDisassembler::Fail;
262eb11fae6SDimitry Andric       break;
263eb11fae6SDimitry Andric     }
264d8e91e46SDimitry Andric     case WebAssembly::OPERAND_VEC_I16IMM: {
265d8e91e46SDimitry Andric       if (!parseImmediate<uint16_t>(MI, Size, Bytes))
266d8e91e46SDimitry Andric         return MCDisassembler::Fail;
267d8e91e46SDimitry Andric       break;
268d8e91e46SDimitry Andric     }
269d8e91e46SDimitry Andric     case WebAssembly::OPERAND_VEC_I32IMM: {
270d8e91e46SDimitry Andric       if (!parseImmediate<uint32_t>(MI, Size, Bytes))
271d8e91e46SDimitry Andric         return MCDisassembler::Fail;
272d8e91e46SDimitry Andric       break;
273d8e91e46SDimitry Andric     }
274d8e91e46SDimitry Andric     case WebAssembly::OPERAND_VEC_I64IMM: {
275d8e91e46SDimitry Andric       if (!parseImmediate<uint64_t>(MI, Size, Bytes))
276d8e91e46SDimitry Andric         return MCDisassembler::Fail;
277d8e91e46SDimitry Andric       break;
278d8e91e46SDimitry Andric     }
279d8e91e46SDimitry Andric     case WebAssembly::OPERAND_BRLIST: {
280d8e91e46SDimitry Andric       int64_t TargetTableLen;
281d8e91e46SDimitry Andric       if (!nextLEB(TargetTableLen, Bytes, Size, false))
282d8e91e46SDimitry Andric         return MCDisassembler::Fail;
283d8e91e46SDimitry Andric       for (int64_t I = 0; I < TargetTableLen; I++) {
284d8e91e46SDimitry Andric         if (!parseLEBImmediate(MI, Size, Bytes, false))
285d8e91e46SDimitry Andric           return MCDisassembler::Fail;
286d8e91e46SDimitry Andric       }
287d8e91e46SDimitry Andric       // Default case.
288d8e91e46SDimitry Andric       if (!parseLEBImmediate(MI, Size, Bytes, false))
289d8e91e46SDimitry Andric         return MCDisassembler::Fail;
290d8e91e46SDimitry Andric       break;
291d8e91e46SDimitry Andric     }
292d8e91e46SDimitry Andric     case MCOI::OPERAND_REGISTER:
293d8e91e46SDimitry Andric       // The tablegen header currently does not have any register operands since
294d8e91e46SDimitry Andric       // we use only the stack (_S) instructions.
295d8e91e46SDimitry Andric       // If you hit this that probably means a bad instruction definition in
296d8e91e46SDimitry Andric       // tablegen.
297d8e91e46SDimitry Andric       llvm_unreachable("Register operand in WebAssemblyDisassembler");
298eb11fae6SDimitry Andric     default:
299eb11fae6SDimitry Andric       llvm_unreachable("Unknown operand type in WebAssemblyDisassembler");
300eb11fae6SDimitry Andric     }
301eb11fae6SDimitry Andric   }
302eb11fae6SDimitry Andric   return MCDisassembler::Success;
303050e163aSDimitry Andric }
304