1cfca06d7SDimitry Andric //===-- StringExtractor.cpp -----------------------------------------------===//
2f034231aSEd Maste //
35f29bb8aSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45f29bb8aSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
55f29bb8aSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6f034231aSEd Maste //
7f034231aSEd Maste //===----------------------------------------------------------------------===//
8f034231aSEd Maste
9027f1c96SDimitry Andric #include "lldb/Utility/StringExtractor.h"
10cfca06d7SDimitry Andric #include "llvm/ADT/StringExtras.h"
11f034231aSEd Maste
1214f1b3e8SDimitry Andric #include <tuple>
1374a628f7SDimitry Andric
14344a3780SDimitry Andric #include <cctype>
15344a3780SDimitry Andric #include <cstdlib>
16344a3780SDimitry Andric #include <cstring>
17f034231aSEd Maste
xdigit_to_sint(char ch)1814f1b3e8SDimitry Andric static inline int xdigit_to_sint(char ch) {
19f034231aSEd Maste if (ch >= 'a' && ch <= 'f')
20f034231aSEd Maste return 10 + ch - 'a';
21f034231aSEd Maste if (ch >= 'A' && ch <= 'F')
22f034231aSEd Maste return 10 + ch - 'A';
23205afe67SEd Maste if (ch >= '0' && ch <= '9')
24f034231aSEd Maste return ch - '0';
25205afe67SEd Maste return -1;
26f034231aSEd Maste }
27f034231aSEd Maste
28f034231aSEd Maste // StringExtractor constructor
StringExtractor()29344a3780SDimitry Andric StringExtractor::StringExtractor() : m_packet() {}
3014f1b3e8SDimitry Andric
StringExtractor(llvm::StringRef packet_str)31145449b1SDimitry Andric StringExtractor::StringExtractor(llvm::StringRef packet_str) : m_packet() {
3214f1b3e8SDimitry Andric m_packet.assign(packet_str.begin(), packet_str.end());
33f034231aSEd Maste }
34f034231aSEd Maste
StringExtractor(const char * packet_cstr)35145449b1SDimitry Andric StringExtractor::StringExtractor(const char *packet_cstr) : m_packet() {
36f034231aSEd Maste if (packet_cstr)
37f034231aSEd Maste m_packet.assign(packet_cstr);
38f034231aSEd Maste }
39f034231aSEd Maste
40f034231aSEd Maste // Destructor
41344a3780SDimitry Andric StringExtractor::~StringExtractor() = default;
42f034231aSEd Maste
GetChar(char fail_value)4314f1b3e8SDimitry Andric char StringExtractor::GetChar(char fail_value) {
4414f1b3e8SDimitry Andric if (m_index < m_packet.size()) {
45f034231aSEd Maste char ch = m_packet[m_index];
46f034231aSEd Maste ++m_index;
47f034231aSEd Maste return ch;
48f034231aSEd Maste }
49f034231aSEd Maste m_index = UINT64_MAX;
50f034231aSEd Maste return fail_value;
51f034231aSEd Maste }
52f034231aSEd Maste
53f73363f1SDimitry Andric // If a pair of valid hex digits exist at the head of the StringExtractor they
54f73363f1SDimitry Andric // are decoded into an unsigned byte and returned by this function
55205afe67SEd Maste //
56205afe67SEd Maste // If there is not a pair of valid hex digits at the head of the
57205afe67SEd Maste // StringExtractor, it is left unchanged and -1 is returned
DecodeHexU8()5814f1b3e8SDimitry Andric int StringExtractor::DecodeHexU8() {
59f3fbd1c0SDimitry Andric SkipSpaces();
6014f1b3e8SDimitry Andric if (GetBytesLeft() < 2) {
61205afe67SEd Maste return -1;
62205afe67SEd Maste }
63205afe67SEd Maste const int hi_nibble = xdigit_to_sint(m_packet[m_index]);
64205afe67SEd Maste const int lo_nibble = xdigit_to_sint(m_packet[m_index + 1]);
6514f1b3e8SDimitry Andric if (hi_nibble == -1 || lo_nibble == -1) {
66205afe67SEd Maste return -1;
67205afe67SEd Maste }
68205afe67SEd Maste m_index += 2;
695f29bb8aSDimitry Andric return static_cast<uint8_t>((hi_nibble << 4) + lo_nibble);
70205afe67SEd Maste }
71205afe67SEd Maste
72f73363f1SDimitry Andric // Extract an unsigned character from two hex ASCII chars in the packet string,
73f73363f1SDimitry Andric // or return fail_value on failure
GetHexU8(uint8_t fail_value,bool set_eof_on_fail)7414f1b3e8SDimitry Andric uint8_t StringExtractor::GetHexU8(uint8_t fail_value, bool set_eof_on_fail) {
75f73363f1SDimitry Andric // On success, fail_value will be overwritten with the next character in the
76f73363f1SDimitry Andric // stream
77e81d9d49SDimitry Andric GetHexU8Ex(fail_value, set_eof_on_fail);
78e81d9d49SDimitry Andric return fail_value;
79e81d9d49SDimitry Andric }
80e81d9d49SDimitry Andric
GetHexU8Ex(uint8_t & ch,bool set_eof_on_fail)8114f1b3e8SDimitry Andric bool StringExtractor::GetHexU8Ex(uint8_t &ch, bool set_eof_on_fail) {
82205afe67SEd Maste int byte = DecodeHexU8();
8314f1b3e8SDimitry Andric if (byte == -1) {
84f034231aSEd Maste if (set_eof_on_fail || m_index >= m_packet.size())
85f034231aSEd Maste m_index = UINT64_MAX;
86e81d9d49SDimitry Andric // ch should not be changed in case of failure
87e81d9d49SDimitry Andric return false;
88f034231aSEd Maste }
895f29bb8aSDimitry Andric ch = static_cast<uint8_t>(byte);
90e81d9d49SDimitry Andric return true;
91205afe67SEd Maste }
92f034231aSEd Maste
GetU32(uint32_t fail_value,int base)9314f1b3e8SDimitry Andric uint32_t StringExtractor::GetU32(uint32_t fail_value, int base) {
9414f1b3e8SDimitry Andric if (m_index < m_packet.size()) {
950cac4ca3SEd Maste char *end = nullptr;
96f034231aSEd Maste const char *start = m_packet.c_str();
97f21a844fSEd Maste const char *cstr = start + m_index;
985e95aa85SEd Maste uint32_t result = static_cast<uint32_t>(::strtoul(cstr, &end, base));
99f034231aSEd Maste
10014f1b3e8SDimitry Andric if (end && end != cstr) {
101f21a844fSEd Maste m_index = end - start;
102f21a844fSEd Maste return result;
103f21a844fSEd Maste }
104f21a844fSEd Maste }
105f21a844fSEd Maste return fail_value;
106f21a844fSEd Maste }
107f21a844fSEd Maste
GetS32(int32_t fail_value,int base)10814f1b3e8SDimitry Andric int32_t StringExtractor::GetS32(int32_t fail_value, int base) {
10914f1b3e8SDimitry Andric if (m_index < m_packet.size()) {
1100cac4ca3SEd Maste char *end = nullptr;
111f21a844fSEd Maste const char *start = m_packet.c_str();
112f21a844fSEd Maste const char *cstr = start + m_index;
1135e95aa85SEd Maste int32_t result = static_cast<int32_t>(::strtol(cstr, &end, base));
114f21a844fSEd Maste
11514f1b3e8SDimitry Andric if (end && end != cstr) {
116f21a844fSEd Maste m_index = end - start;
117f21a844fSEd Maste return result;
118f21a844fSEd Maste }
119f21a844fSEd Maste }
120f21a844fSEd Maste return fail_value;
121f21a844fSEd Maste }
122f21a844fSEd Maste
GetU64(uint64_t fail_value,int base)12314f1b3e8SDimitry Andric uint64_t StringExtractor::GetU64(uint64_t fail_value, int base) {
12414f1b3e8SDimitry Andric if (m_index < m_packet.size()) {
1250cac4ca3SEd Maste char *end = nullptr;
126f21a844fSEd Maste const char *start = m_packet.c_str();
127f21a844fSEd Maste const char *cstr = start + m_index;
128f21a844fSEd Maste uint64_t result = ::strtoull(cstr, &end, base);
129f21a844fSEd Maste
13014f1b3e8SDimitry Andric if (end && end != cstr) {
131f21a844fSEd Maste m_index = end - start;
132f21a844fSEd Maste return result;
133f21a844fSEd Maste }
134f21a844fSEd Maste }
135f21a844fSEd Maste return fail_value;
136f21a844fSEd Maste }
137f21a844fSEd Maste
GetS64(int64_t fail_value,int base)13814f1b3e8SDimitry Andric int64_t StringExtractor::GetS64(int64_t fail_value, int base) {
13914f1b3e8SDimitry Andric if (m_index < m_packet.size()) {
1400cac4ca3SEd Maste char *end = nullptr;
141f21a844fSEd Maste const char *start = m_packet.c_str();
142f21a844fSEd Maste const char *cstr = start + m_index;
143f21a844fSEd Maste int64_t result = ::strtoll(cstr, &end, base);
144f21a844fSEd Maste
14514f1b3e8SDimitry Andric if (end && end != cstr) {
146f034231aSEd Maste m_index = end - start;
147f034231aSEd Maste return result;
148f034231aSEd Maste }
149f034231aSEd Maste }
150f034231aSEd Maste return fail_value;
151f034231aSEd Maste }
152f034231aSEd Maste
GetHexMaxU32(bool little_endian,uint32_t fail_value)15314f1b3e8SDimitry Andric uint32_t StringExtractor::GetHexMaxU32(bool little_endian,
15414f1b3e8SDimitry Andric uint32_t fail_value) {
155f034231aSEd Maste uint32_t result = 0;
156f034231aSEd Maste uint32_t nibble_count = 0;
157f034231aSEd Maste
158f3fbd1c0SDimitry Andric SkipSpaces();
15914f1b3e8SDimitry Andric if (little_endian) {
160f034231aSEd Maste uint32_t shift_amount = 0;
16114f1b3e8SDimitry Andric while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
162f034231aSEd Maste // Make sure we don't exceed the size of a uint32_t...
16314f1b3e8SDimitry Andric if (nibble_count >= (sizeof(uint32_t) * 2)) {
164f034231aSEd Maste m_index = UINT64_MAX;
165f034231aSEd Maste return fail_value;
166f034231aSEd Maste }
167f034231aSEd Maste
168f034231aSEd Maste uint8_t nibble_lo;
169f034231aSEd Maste uint8_t nibble_hi = xdigit_to_sint(m_packet[m_index]);
170f034231aSEd Maste ++m_index;
17114f1b3e8SDimitry Andric if (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
172f034231aSEd Maste nibble_lo = xdigit_to_sint(m_packet[m_index]);
173f034231aSEd Maste ++m_index;
1745f29bb8aSDimitry Andric result |= (static_cast<uint32_t>(nibble_hi) << (shift_amount + 4));
1755f29bb8aSDimitry Andric result |= (static_cast<uint32_t>(nibble_lo) << shift_amount);
176f034231aSEd Maste nibble_count += 2;
177f034231aSEd Maste shift_amount += 8;
17814f1b3e8SDimitry Andric } else {
1795f29bb8aSDimitry Andric result |= (static_cast<uint32_t>(nibble_hi) << shift_amount);
180f034231aSEd Maste nibble_count += 1;
181f034231aSEd Maste shift_amount += 4;
182f034231aSEd Maste }
183f034231aSEd Maste }
18414f1b3e8SDimitry Andric } else {
18514f1b3e8SDimitry Andric while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
186f034231aSEd Maste // Make sure we don't exceed the size of a uint32_t...
18714f1b3e8SDimitry Andric if (nibble_count >= (sizeof(uint32_t) * 2)) {
188f034231aSEd Maste m_index = UINT64_MAX;
189f034231aSEd Maste return fail_value;
190f034231aSEd Maste }
191f034231aSEd Maste
192f034231aSEd Maste uint8_t nibble = xdigit_to_sint(m_packet[m_index]);
193f034231aSEd Maste // Big Endian
194f034231aSEd Maste result <<= 4;
195f034231aSEd Maste result |= nibble;
196f034231aSEd Maste
197f034231aSEd Maste ++m_index;
198f034231aSEd Maste ++nibble_count;
199f034231aSEd Maste }
200f034231aSEd Maste }
201f034231aSEd Maste return result;
202f034231aSEd Maste }
203f034231aSEd Maste
GetHexMaxU64(bool little_endian,uint64_t fail_value)20414f1b3e8SDimitry Andric uint64_t StringExtractor::GetHexMaxU64(bool little_endian,
20514f1b3e8SDimitry Andric uint64_t fail_value) {
206f034231aSEd Maste uint64_t result = 0;
207f034231aSEd Maste uint32_t nibble_count = 0;
208f034231aSEd Maste
209f3fbd1c0SDimitry Andric SkipSpaces();
21014f1b3e8SDimitry Andric if (little_endian) {
211f034231aSEd Maste uint32_t shift_amount = 0;
21214f1b3e8SDimitry Andric while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
213f034231aSEd Maste // Make sure we don't exceed the size of a uint64_t...
21414f1b3e8SDimitry Andric if (nibble_count >= (sizeof(uint64_t) * 2)) {
215f034231aSEd Maste m_index = UINT64_MAX;
216f034231aSEd Maste return fail_value;
217f034231aSEd Maste }
218f034231aSEd Maste
219f034231aSEd Maste uint8_t nibble_lo;
220f034231aSEd Maste uint8_t nibble_hi = xdigit_to_sint(m_packet[m_index]);
221f034231aSEd Maste ++m_index;
22214f1b3e8SDimitry Andric if (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
223f034231aSEd Maste nibble_lo = xdigit_to_sint(m_packet[m_index]);
224f034231aSEd Maste ++m_index;
2255f29bb8aSDimitry Andric result |= (static_cast<uint64_t>(nibble_hi) << (shift_amount + 4));
2265f29bb8aSDimitry Andric result |= (static_cast<uint64_t>(nibble_lo) << shift_amount);
227f034231aSEd Maste nibble_count += 2;
228f034231aSEd Maste shift_amount += 8;
22914f1b3e8SDimitry Andric } else {
2305f29bb8aSDimitry Andric result |= (static_cast<uint64_t>(nibble_hi) << shift_amount);
231f034231aSEd Maste nibble_count += 1;
232f034231aSEd Maste shift_amount += 4;
233f034231aSEd Maste }
234f034231aSEd Maste }
23514f1b3e8SDimitry Andric } else {
23614f1b3e8SDimitry Andric while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
237f034231aSEd Maste // Make sure we don't exceed the size of a uint64_t...
23814f1b3e8SDimitry Andric if (nibble_count >= (sizeof(uint64_t) * 2)) {
239f034231aSEd Maste m_index = UINT64_MAX;
240f034231aSEd Maste return fail_value;
241f034231aSEd Maste }
242f034231aSEd Maste
243f034231aSEd Maste uint8_t nibble = xdigit_to_sint(m_packet[m_index]);
244f034231aSEd Maste // Big Endian
245f034231aSEd Maste result <<= 4;
246f034231aSEd Maste result |= nibble;
247f034231aSEd Maste
248f034231aSEd Maste ++m_index;
249f034231aSEd Maste ++nibble_count;
250f034231aSEd Maste }
251f034231aSEd Maste }
252f034231aSEd Maste return result;
253f034231aSEd Maste }
254f034231aSEd Maste
ConsumeFront(const llvm::StringRef & str)25561b440f5SDimitry Andric bool StringExtractor::ConsumeFront(const llvm::StringRef &str) {
25661b440f5SDimitry Andric llvm::StringRef S = GetStringRef();
257312c0ed1SDimitry Andric if (!S.starts_with(str))
25861b440f5SDimitry Andric return false;
25961b440f5SDimitry Andric else
26061b440f5SDimitry Andric m_index += str.size();
26161b440f5SDimitry Andric return true;
26261b440f5SDimitry Andric }
26361b440f5SDimitry Andric
GetHexBytes(llvm::MutableArrayRef<uint8_t> dest,uint8_t fail_fill_value)26414f1b3e8SDimitry Andric size_t StringExtractor::GetHexBytes(llvm::MutableArrayRef<uint8_t> dest,
26514f1b3e8SDimitry Andric uint8_t fail_fill_value) {
266f034231aSEd Maste size_t bytes_extracted = 0;
26714f1b3e8SDimitry Andric while (!dest.empty() && GetBytesLeft() > 0) {
26814f1b3e8SDimitry Andric dest[0] = GetHexU8(fail_fill_value);
26914f1b3e8SDimitry Andric if (!IsGood())
270f034231aSEd Maste break;
27114f1b3e8SDimitry Andric ++bytes_extracted;
27214f1b3e8SDimitry Andric dest = dest.drop_front();
273f034231aSEd Maste }
274f034231aSEd Maste
27514f1b3e8SDimitry Andric if (!dest.empty())
27614f1b3e8SDimitry Andric ::memset(dest.data(), fail_fill_value, dest.size());
277f034231aSEd Maste
278f034231aSEd Maste return bytes_extracted;
279f034231aSEd Maste }
280f034231aSEd Maste
281f73363f1SDimitry Andric // Decodes all valid hex encoded bytes at the head of the StringExtractor,
282f73363f1SDimitry Andric // limited by dst_len.
283205afe67SEd Maste //
284205afe67SEd Maste // Returns the number of bytes successfully decoded
GetHexBytesAvail(llvm::MutableArrayRef<uint8_t> dest)28514f1b3e8SDimitry Andric size_t StringExtractor::GetHexBytesAvail(llvm::MutableArrayRef<uint8_t> dest) {
286205afe67SEd Maste size_t bytes_extracted = 0;
28714f1b3e8SDimitry Andric while (!dest.empty()) {
288205afe67SEd Maste int decode = DecodeHexU8();
289205afe67SEd Maste if (decode == -1)
290205afe67SEd Maste break;
2915f29bb8aSDimitry Andric dest[0] = static_cast<uint8_t>(decode);
29214f1b3e8SDimitry Andric dest = dest.drop_front();
29314f1b3e8SDimitry Andric ++bytes_extracted;
294205afe67SEd Maste }
295205afe67SEd Maste return bytes_extracted;
296205afe67SEd Maste }
297f034231aSEd Maste
GetHexByteString(std::string & str)29814f1b3e8SDimitry Andric size_t StringExtractor::GetHexByteString(std::string &str) {
299f034231aSEd Maste str.clear();
30014f1b3e8SDimitry Andric str.reserve(GetBytesLeft() / 2);
301f034231aSEd Maste char ch;
302f034231aSEd Maste while ((ch = GetHexU8()) != '\0')
303f034231aSEd Maste str.append(1, ch);
304f034231aSEd Maste return str.size();
305f034231aSEd Maste }
306f034231aSEd Maste
GetHexByteStringFixedLength(std::string & str,uint32_t nibble_length)30714f1b3e8SDimitry Andric size_t StringExtractor::GetHexByteStringFixedLength(std::string &str,
30814f1b3e8SDimitry Andric uint32_t nibble_length) {
3090cac4ca3SEd Maste str.clear();
3100cac4ca3SEd Maste
3110cac4ca3SEd Maste uint32_t nibble_count = 0;
31214f1b3e8SDimitry Andric for (const char *pch = Peek();
31314f1b3e8SDimitry Andric (nibble_count < nibble_length) && (pch != nullptr);
31414f1b3e8SDimitry Andric str.append(1, GetHexU8(0, false)), pch = Peek(), nibble_count += 2) {
31514f1b3e8SDimitry Andric }
3160cac4ca3SEd Maste
3170cac4ca3SEd Maste return str.size();
3180cac4ca3SEd Maste }
3190cac4ca3SEd Maste
GetHexByteStringTerminatedBy(std::string & str,char terminator)32014f1b3e8SDimitry Andric size_t StringExtractor::GetHexByteStringTerminatedBy(std::string &str,
32114f1b3e8SDimitry Andric char terminator) {
322f21a844fSEd Maste str.clear();
323f21a844fSEd Maste char ch;
324f21a844fSEd Maste while ((ch = GetHexU8(0, false)) != '\0')
325f21a844fSEd Maste str.append(1, ch);
326f21a844fSEd Maste if (Peek() && *Peek() == terminator)
327f21a844fSEd Maste return str.size();
3280cac4ca3SEd Maste
329f21a844fSEd Maste str.clear();
330f21a844fSEd Maste return str.size();
331f21a844fSEd Maste }
332f21a844fSEd Maste
GetNameColonValue(llvm::StringRef & name,llvm::StringRef & value)33314f1b3e8SDimitry Andric bool StringExtractor::GetNameColonValue(llvm::StringRef &name,
33414f1b3e8SDimitry Andric llvm::StringRef &value) {
335f73363f1SDimitry Andric // Read something in the form of NNNN:VVVV; where NNNN is any character that
336f73363f1SDimitry Andric // is not a colon, followed by a ':' character, then a value (one or more ';'
337f73363f1SDimitry Andric // chars), followed by a ';'
33814f1b3e8SDimitry Andric if (m_index >= m_packet.size())
33914f1b3e8SDimitry Andric return fail();
34014f1b3e8SDimitry Andric
34114f1b3e8SDimitry Andric llvm::StringRef view(m_packet);
34214f1b3e8SDimitry Andric if (view.empty())
34314f1b3e8SDimitry Andric return fail();
34414f1b3e8SDimitry Andric
34514f1b3e8SDimitry Andric llvm::StringRef a, b, c, d;
34614f1b3e8SDimitry Andric view = view.substr(m_index);
34714f1b3e8SDimitry Andric std::tie(a, b) = view.split(':');
34814f1b3e8SDimitry Andric if (a.empty() || b.empty())
34914f1b3e8SDimitry Andric return fail();
35014f1b3e8SDimitry Andric std::tie(c, d) = b.split(';');
35114f1b3e8SDimitry Andric if (b == c && d.empty())
35214f1b3e8SDimitry Andric return fail();
35314f1b3e8SDimitry Andric
35414f1b3e8SDimitry Andric name = a;
35514f1b3e8SDimitry Andric value = c;
35614f1b3e8SDimitry Andric if (d.empty())
35714f1b3e8SDimitry Andric m_index = m_packet.size();
35814f1b3e8SDimitry Andric else {
35914f1b3e8SDimitry Andric size_t bytes_consumed = d.data() - view.data();
36014f1b3e8SDimitry Andric m_index += bytes_consumed;
36114f1b3e8SDimitry Andric }
362f034231aSEd Maste return true;
363f034231aSEd Maste }
364027f1c96SDimitry Andric
SkipSpaces()36514f1b3e8SDimitry Andric void StringExtractor::SkipSpaces() {
366027f1c96SDimitry Andric const size_t n = m_packet.size();
367cfca06d7SDimitry Andric while (m_index < n && llvm::isSpace(m_packet[m_index]))
368027f1c96SDimitry Andric ++m_index;
369027f1c96SDimitry Andric }
370