130815c53SDimitry Andric //===-- DataExtractor.cpp -------------------------------------------------===//
230815c53SDimitry Andric //
3e6d15924SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e6d15924SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e6d15924SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
630815c53SDimitry Andric //
730815c53SDimitry Andric //===----------------------------------------------------------------------===//
830815c53SDimitry Andric
930815c53SDimitry Andric #include "llvm/Support/DataExtractor.h"
107fa27ce4SDimitry Andric #include "llvm/ADT/StringExtras.h"
111d5ae102SDimitry Andric #include "llvm/Support/Errc.h"
1230815c53SDimitry Andric #include "llvm/Support/ErrorHandling.h"
13e6d15924SDimitry Andric #include "llvm/Support/LEB128.h"
141d5ae102SDimitry Andric #include "llvm/Support/SwapByteOrder.h"
151d5ae102SDimitry Andric
1630815c53SDimitry Andric using namespace llvm;
1730815c53SDimitry Andric
prepareRead(uint64_t Offset,uint64_t Size,Error * E) const18cfca06d7SDimitry Andric bool DataExtractor::prepareRead(uint64_t Offset, uint64_t Size,
19cfca06d7SDimitry Andric Error *E) const {
20cfca06d7SDimitry Andric if (isValidOffsetForDataOfSize(Offset, Size))
21cfca06d7SDimitry Andric return true;
22cfca06d7SDimitry Andric if (E) {
23cfca06d7SDimitry Andric if (Offset <= Data.size())
24cfca06d7SDimitry Andric *E = createStringError(
25cfca06d7SDimitry Andric errc::illegal_byte_sequence,
26cfca06d7SDimitry Andric "unexpected end of data at offset 0x%zx while reading [0x%" PRIx64
27cfca06d7SDimitry Andric ", 0x%" PRIx64 ")",
28cfca06d7SDimitry Andric Data.size(), Offset, Offset + Size);
29cfca06d7SDimitry Andric else
30cfca06d7SDimitry Andric *E = createStringError(errc::invalid_argument,
31cfca06d7SDimitry Andric "offset 0x%" PRIx64
32cfca06d7SDimitry Andric " is beyond the end of data at 0x%zx",
33cfca06d7SDimitry Andric Offset, Data.size());
34cfca06d7SDimitry Andric }
35cfca06d7SDimitry Andric return false;
361d5ae102SDimitry Andric }
371d5ae102SDimitry Andric
isError(Error * E)381d5ae102SDimitry Andric static bool isError(Error *E) { return E && *E; }
391d5ae102SDimitry Andric
4030815c53SDimitry Andric template <typename T>
getU(uint64_t * offset_ptr,Error * Err) const41cfca06d7SDimitry Andric T DataExtractor::getU(uint64_t *offset_ptr, Error *Err) const {
421d5ae102SDimitry Andric ErrorAsOutParameter ErrAsOut(Err);
4330815c53SDimitry Andric T val = 0;
441d5ae102SDimitry Andric if (isError(Err))
451d5ae102SDimitry Andric return val;
461d5ae102SDimitry Andric
471d5ae102SDimitry Andric uint64_t offset = *offset_ptr;
48cfca06d7SDimitry Andric if (!prepareRead(offset, sizeof(T), Err))
491d5ae102SDimitry Andric return val;
50cfca06d7SDimitry Andric std::memcpy(&val, &Data.data()[offset], sizeof(val));
51cfca06d7SDimitry Andric if (sys::IsLittleEndianHost != IsLittleEndian)
525ca98fd9SDimitry Andric sys::swapByteOrder(val);
5330815c53SDimitry Andric
5430815c53SDimitry Andric // Advance the offset
5530815c53SDimitry Andric *offset_ptr += sizeof(val);
5630815c53SDimitry Andric return val;
5730815c53SDimitry Andric }
5830815c53SDimitry Andric
5930815c53SDimitry Andric template <typename T>
getUs(uint64_t * offset_ptr,T * dst,uint32_t count,Error * Err) const60cfca06d7SDimitry Andric T *DataExtractor::getUs(uint64_t *offset_ptr, T *dst, uint32_t count,
61cfca06d7SDimitry Andric Error *Err) const {
621d5ae102SDimitry Andric ErrorAsOutParameter ErrAsOut(Err);
631d5ae102SDimitry Andric if (isError(Err))
641d5ae102SDimitry Andric return nullptr;
6530815c53SDimitry Andric
661d5ae102SDimitry Andric uint64_t offset = *offset_ptr;
671d5ae102SDimitry Andric
68cfca06d7SDimitry Andric if (!prepareRead(offset, sizeof(*dst) * count, Err))
691d5ae102SDimitry Andric return nullptr;
7030815c53SDimitry Andric for (T *value_ptr = dst, *end = dst + count; value_ptr != end;
7130815c53SDimitry Andric ++value_ptr, offset += sizeof(*dst))
72cfca06d7SDimitry Andric *value_ptr = getU<T>(offset_ptr, Err);
7330815c53SDimitry Andric // Advance the offset
7430815c53SDimitry Andric *offset_ptr = offset;
7530815c53SDimitry Andric // Return a non-NULL pointer to the converted data as an indicator of
7630815c53SDimitry Andric // success
7730815c53SDimitry Andric return dst;
7830815c53SDimitry Andric }
7930815c53SDimitry Andric
getU8(uint64_t * offset_ptr,llvm::Error * Err) const801d5ae102SDimitry Andric uint8_t DataExtractor::getU8(uint64_t *offset_ptr, llvm::Error *Err) const {
81cfca06d7SDimitry Andric return getU<uint8_t>(offset_ptr, Err);
8230815c53SDimitry Andric }
8330815c53SDimitry Andric
getU8(uint64_t * offset_ptr,uint8_t * dst,uint32_t count) const84cfca06d7SDimitry Andric uint8_t *DataExtractor::getU8(uint64_t *offset_ptr, uint8_t *dst,
85cfca06d7SDimitry Andric uint32_t count) const {
86cfca06d7SDimitry Andric return getUs<uint8_t>(offset_ptr, dst, count, nullptr);
8730815c53SDimitry Andric }
8830815c53SDimitry Andric
getU8(Cursor & C,uint8_t * Dst,uint32_t Count) const891d5ae102SDimitry Andric uint8_t *DataExtractor::getU8(Cursor &C, uint8_t *Dst, uint32_t Count) const {
90cfca06d7SDimitry Andric return getUs<uint8_t>(&C.Offset, Dst, Count, &C.Err);
9130815c53SDimitry Andric }
9230815c53SDimitry Andric
getU16(uint64_t * offset_ptr,llvm::Error * Err) const931d5ae102SDimitry Andric uint16_t DataExtractor::getU16(uint64_t *offset_ptr, llvm::Error *Err) const {
94cfca06d7SDimitry Andric return getU<uint16_t>(offset_ptr, Err);
951d5ae102SDimitry Andric }
961d5ae102SDimitry Andric
getU16(uint64_t * offset_ptr,uint16_t * dst,uint32_t count) const971d5ae102SDimitry Andric uint16_t *DataExtractor::getU16(uint64_t *offset_ptr, uint16_t *dst,
9830815c53SDimitry Andric uint32_t count) const {
99cfca06d7SDimitry Andric return getUs<uint16_t>(offset_ptr, dst, count, nullptr);
10030815c53SDimitry Andric }
10130815c53SDimitry Andric
getU24(uint64_t * OffsetPtr,Error * Err) const102cfca06d7SDimitry Andric uint32_t DataExtractor::getU24(uint64_t *OffsetPtr, Error *Err) const {
103cfca06d7SDimitry Andric uint24_t ExtractedVal = getU<uint24_t>(OffsetPtr, Err);
10408bbd35aSDimitry Andric // The 3 bytes are in the correct byte order for the host.
10508bbd35aSDimitry Andric return ExtractedVal.getAsUint32(sys::IsLittleEndianHost);
10608bbd35aSDimitry Andric }
10708bbd35aSDimitry Andric
getU32(uint64_t * offset_ptr,llvm::Error * Err) const1081d5ae102SDimitry Andric uint32_t DataExtractor::getU32(uint64_t *offset_ptr, llvm::Error *Err) const {
109cfca06d7SDimitry Andric return getU<uint32_t>(offset_ptr, Err);
11030815c53SDimitry Andric }
11130815c53SDimitry Andric
getU32(uint64_t * offset_ptr,uint32_t * dst,uint32_t count) const1121d5ae102SDimitry Andric uint32_t *DataExtractor::getU32(uint64_t *offset_ptr, uint32_t *dst,
11330815c53SDimitry Andric uint32_t count) const {
114cfca06d7SDimitry Andric return getUs<uint32_t>(offset_ptr, dst, count, nullptr);
11530815c53SDimitry Andric }
11630815c53SDimitry Andric
getU64(uint64_t * offset_ptr,llvm::Error * Err) const1171d5ae102SDimitry Andric uint64_t DataExtractor::getU64(uint64_t *offset_ptr, llvm::Error *Err) const {
118cfca06d7SDimitry Andric return getU<uint64_t>(offset_ptr, Err);
11930815c53SDimitry Andric }
12030815c53SDimitry Andric
getU64(uint64_t * offset_ptr,uint64_t * dst,uint32_t count) const1211d5ae102SDimitry Andric uint64_t *DataExtractor::getU64(uint64_t *offset_ptr, uint64_t *dst,
12230815c53SDimitry Andric uint32_t count) const {
123cfca06d7SDimitry Andric return getUs<uint64_t>(offset_ptr, dst, count, nullptr);
12430815c53SDimitry Andric }
12530815c53SDimitry Andric
getUnsigned(uint64_t * offset_ptr,uint32_t byte_size,llvm::Error * Err) const1261d5ae102SDimitry Andric uint64_t DataExtractor::getUnsigned(uint64_t *offset_ptr, uint32_t byte_size,
1271d5ae102SDimitry Andric llvm::Error *Err) const {
12830815c53SDimitry Andric switch (byte_size) {
12930815c53SDimitry Andric case 1:
1301d5ae102SDimitry Andric return getU8(offset_ptr, Err);
13130815c53SDimitry Andric case 2:
1321d5ae102SDimitry Andric return getU16(offset_ptr, Err);
13330815c53SDimitry Andric case 4:
1341d5ae102SDimitry Andric return getU32(offset_ptr, Err);
13530815c53SDimitry Andric case 8:
1361d5ae102SDimitry Andric return getU64(offset_ptr, Err);
13730815c53SDimitry Andric }
13830815c53SDimitry Andric llvm_unreachable("getUnsigned unhandled case!");
13930815c53SDimitry Andric }
14030815c53SDimitry Andric
14130815c53SDimitry Andric int64_t
getSigned(uint64_t * offset_ptr,uint32_t byte_size) const1421d5ae102SDimitry Andric DataExtractor::getSigned(uint64_t *offset_ptr, uint32_t byte_size) const {
14330815c53SDimitry Andric switch (byte_size) {
14430815c53SDimitry Andric case 1:
14530815c53SDimitry Andric return (int8_t)getU8(offset_ptr);
14630815c53SDimitry Andric case 2:
14730815c53SDimitry Andric return (int16_t)getU16(offset_ptr);
14830815c53SDimitry Andric case 4:
14930815c53SDimitry Andric return (int32_t)getU32(offset_ptr);
15030815c53SDimitry Andric case 8:
15130815c53SDimitry Andric return (int64_t)getU64(offset_ptr);
15230815c53SDimitry Andric }
15330815c53SDimitry Andric llvm_unreachable("getSigned unhandled case!");
15430815c53SDimitry Andric }
15530815c53SDimitry Andric
getCStrRef(uint64_t * OffsetPtr,Error * Err) const156cfca06d7SDimitry Andric StringRef DataExtractor::getCStrRef(uint64_t *OffsetPtr, Error *Err) const {
157cfca06d7SDimitry Andric ErrorAsOutParameter ErrAsOut(Err);
158cfca06d7SDimitry Andric if (isError(Err))
159cfca06d7SDimitry Andric return StringRef();
16030815c53SDimitry Andric
161cfca06d7SDimitry Andric uint64_t Start = *OffsetPtr;
162148779dfSDimitry Andric StringRef::size_type Pos = Data.find('\0', Start);
163148779dfSDimitry Andric if (Pos != StringRef::npos) {
164cfca06d7SDimitry Andric *OffsetPtr = Pos + 1;
165148779dfSDimitry Andric return StringRef(Data.data() + Start, Pos - Start);
166148779dfSDimitry Andric }
167cfca06d7SDimitry Andric if (Err)
168cfca06d7SDimitry Andric *Err = createStringError(errc::illegal_byte_sequence,
169cfca06d7SDimitry Andric "no null terminated string at offset 0x%" PRIx64,
170cfca06d7SDimitry Andric Start);
171148779dfSDimitry Andric return StringRef();
172148779dfSDimitry Andric }
173148779dfSDimitry Andric
getFixedLengthString(uint64_t * OffsetPtr,uint64_t Length,StringRef TrimChars) const174cfca06d7SDimitry Andric StringRef DataExtractor::getFixedLengthString(uint64_t *OffsetPtr,
175cfca06d7SDimitry Andric uint64_t Length,
176cfca06d7SDimitry Andric StringRef TrimChars) const {
177cfca06d7SDimitry Andric StringRef Bytes(getBytes(OffsetPtr, Length));
178cfca06d7SDimitry Andric return Bytes.trim(TrimChars);
179cfca06d7SDimitry Andric }
180cfca06d7SDimitry Andric
getBytes(uint64_t * OffsetPtr,uint64_t Length,Error * Err) const181cfca06d7SDimitry Andric StringRef DataExtractor::getBytes(uint64_t *OffsetPtr, uint64_t Length,
182cfca06d7SDimitry Andric Error *Err) const {
1831d5ae102SDimitry Andric ErrorAsOutParameter ErrAsOut(Err);
1841d5ae102SDimitry Andric if (isError(Err))
185cfca06d7SDimitry Andric return StringRef();
186cfca06d7SDimitry Andric
187cfca06d7SDimitry Andric if (!prepareRead(*OffsetPtr, Length, Err))
188cfca06d7SDimitry Andric return StringRef();
189cfca06d7SDimitry Andric
190cfca06d7SDimitry Andric StringRef Result = Data.substr(*OffsetPtr, Length);
191cfca06d7SDimitry Andric *OffsetPtr += Length;
192cfca06d7SDimitry Andric return Result;
193cfca06d7SDimitry Andric }
194cfca06d7SDimitry Andric
195cfca06d7SDimitry Andric template <typename T>
getLEB128(StringRef Data,uint64_t * OffsetPtr,Error * Err,T (& Decoder)(const uint8_t * p,unsigned * n,const uint8_t * end,const char ** error))196cfca06d7SDimitry Andric static T getLEB128(StringRef Data, uint64_t *OffsetPtr, Error *Err,
197cfca06d7SDimitry Andric T (&Decoder)(const uint8_t *p, unsigned *n,
198cfca06d7SDimitry Andric const uint8_t *end, const char **error)) {
199cfca06d7SDimitry Andric ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(Data);
200cfca06d7SDimitry Andric assert(*OffsetPtr <= Bytes.size());
201cfca06d7SDimitry Andric ErrorAsOutParameter ErrAsOut(Err);
202cfca06d7SDimitry Andric if (isError(Err))
203cfca06d7SDimitry Andric return T();
204e6d15924SDimitry Andric
205b1c73532SDimitry Andric const char *error = nullptr;
206e6d15924SDimitry Andric unsigned bytes_read;
207cfca06d7SDimitry Andric T result =
208cfca06d7SDimitry Andric Decoder(Bytes.data() + *OffsetPtr, &bytes_read, Bytes.end(), &error);
2091d5ae102SDimitry Andric if (error) {
2101d5ae102SDimitry Andric if (Err)
211cfca06d7SDimitry Andric *Err = createStringError(errc::illegal_byte_sequence,
212cfca06d7SDimitry Andric "unable to decode LEB128 at offset 0x%8.8" PRIx64
213cfca06d7SDimitry Andric ": %s",
214cfca06d7SDimitry Andric *OffsetPtr, error);
215cfca06d7SDimitry Andric return T();
2161d5ae102SDimitry Andric }
217cfca06d7SDimitry Andric *OffsetPtr += bytes_read;
21830815c53SDimitry Andric return result;
21930815c53SDimitry Andric }
22030815c53SDimitry Andric
getULEB128(uint64_t * offset_ptr,Error * Err) const221cfca06d7SDimitry Andric uint64_t DataExtractor::getULEB128(uint64_t *offset_ptr, Error *Err) const {
222cfca06d7SDimitry Andric return getLEB128(Data, offset_ptr, Err, decodeULEB128);
223cfca06d7SDimitry Andric }
224e6d15924SDimitry Andric
getSLEB128(uint64_t * offset_ptr,Error * Err) const225cfca06d7SDimitry Andric int64_t DataExtractor::getSLEB128(uint64_t *offset_ptr, Error *Err) const {
226cfca06d7SDimitry Andric return getLEB128(Data, offset_ptr, Err, decodeSLEB128);
22730815c53SDimitry Andric }
2281d5ae102SDimitry Andric
skip(Cursor & C,uint64_t Length) const2291d5ae102SDimitry Andric void DataExtractor::skip(Cursor &C, uint64_t Length) const {
2301d5ae102SDimitry Andric ErrorAsOutParameter ErrAsOut(&C.Err);
2311d5ae102SDimitry Andric if (isError(&C.Err))
2321d5ae102SDimitry Andric return;
2331d5ae102SDimitry Andric
234cfca06d7SDimitry Andric if (prepareRead(C.Offset, Length, &C.Err))
2351d5ae102SDimitry Andric C.Offset += Length;
2361d5ae102SDimitry Andric }
237