1 //===-- StringExtractor.cpp -------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "lldb/Utility/StringExtractor.h"
10 
11 #include <tuple>
12 
13 #include <ctype.h>
14 #include <stdlib.h>
15 #include <string.h>
16 
17 static inline int xdigit_to_sint(char ch) {
18   if (ch >= 'a' && ch <= 'f')
19     return 10 + ch - 'a';
20   if (ch >= 'A' && ch <= 'F')
21     return 10 + ch - 'A';
22   if (ch >= '0' && ch <= '9')
23     return ch - '0';
24   return -1;
25 }
26 
27 // StringExtractor constructor
28 StringExtractor::StringExtractor() : m_packet(), m_index(0) {}
29 
30 StringExtractor::StringExtractor(llvm::StringRef packet_str)
31     : m_packet(), m_index(0) {
32   m_packet.assign(packet_str.begin(), packet_str.end());
33 }
34 
35 StringExtractor::StringExtractor(const char *packet_cstr)
36     : m_packet(), m_index(0) {
37   if (packet_cstr)
38     m_packet.assign(packet_cstr);
39 }
40 
41 // Destructor
42 StringExtractor::~StringExtractor() {}
43 
44 char StringExtractor::GetChar(char fail_value) {
45   if (m_index < m_packet.size()) {
46     char ch = m_packet[m_index];
47     ++m_index;
48     return ch;
49   }
50   m_index = UINT64_MAX;
51   return fail_value;
52 }
53 
54 // If a pair of valid hex digits exist at the head of the StringExtractor they
55 // are decoded into an unsigned byte and returned by this function
56 //
57 // If there is not a pair of valid hex digits at the head of the
58 // StringExtractor, it is left unchanged and -1 is returned
59 int StringExtractor::DecodeHexU8() {
60   SkipSpaces();
61   if (GetBytesLeft() < 2) {
62     return -1;
63   }
64   const int hi_nibble = xdigit_to_sint(m_packet[m_index]);
65   const int lo_nibble = xdigit_to_sint(m_packet[m_index + 1]);
66   if (hi_nibble == -1 || lo_nibble == -1) {
67     return -1;
68   }
69   m_index += 2;
70   return static_cast<uint8_t>((hi_nibble << 4) + lo_nibble);
71 }
72 
73 // Extract an unsigned character from two hex ASCII chars in the packet string,
74 // or return fail_value on failure
75 uint8_t StringExtractor::GetHexU8(uint8_t fail_value, bool set_eof_on_fail) {
76   // On success, fail_value will be overwritten with the next character in the
77   // stream
78   GetHexU8Ex(fail_value, set_eof_on_fail);
79   return fail_value;
80 }
81 
82 bool StringExtractor::GetHexU8Ex(uint8_t &ch, bool set_eof_on_fail) {
83   int byte = DecodeHexU8();
84   if (byte == -1) {
85     if (set_eof_on_fail || m_index >= m_packet.size())
86       m_index = UINT64_MAX;
87     // ch should not be changed in case of failure
88     return false;
89   }
90   ch = static_cast<uint8_t>(byte);
91   return true;
92 }
93 
94 uint32_t StringExtractor::GetU32(uint32_t fail_value, int base) {
95   if (m_index < m_packet.size()) {
96     char *end = nullptr;
97     const char *start = m_packet.c_str();
98     const char *cstr = start + m_index;
99     uint32_t result = static_cast<uint32_t>(::strtoul(cstr, &end, base));
100 
101     if (end && end != cstr) {
102       m_index = end - start;
103       return result;
104     }
105   }
106   return fail_value;
107 }
108 
109 int32_t StringExtractor::GetS32(int32_t fail_value, int base) {
110   if (m_index < m_packet.size()) {
111     char *end = nullptr;
112     const char *start = m_packet.c_str();
113     const char *cstr = start + m_index;
114     int32_t result = static_cast<int32_t>(::strtol(cstr, &end, base));
115 
116     if (end && end != cstr) {
117       m_index = end - start;
118       return result;
119     }
120   }
121   return fail_value;
122 }
123 
124 uint64_t StringExtractor::GetU64(uint64_t fail_value, int base) {
125   if (m_index < m_packet.size()) {
126     char *end = nullptr;
127     const char *start = m_packet.c_str();
128     const char *cstr = start + m_index;
129     uint64_t result = ::strtoull(cstr, &end, base);
130 
131     if (end && end != cstr) {
132       m_index = end - start;
133       return result;
134     }
135   }
136   return fail_value;
137 }
138 
139 int64_t StringExtractor::GetS64(int64_t fail_value, int base) {
140   if (m_index < m_packet.size()) {
141     char *end = nullptr;
142     const char *start = m_packet.c_str();
143     const char *cstr = start + m_index;
144     int64_t result = ::strtoll(cstr, &end, base);
145 
146     if (end && end != cstr) {
147       m_index = end - start;
148       return result;
149     }
150   }
151   return fail_value;
152 }
153 
154 uint32_t StringExtractor::GetHexMaxU32(bool little_endian,
155                                        uint32_t fail_value) {
156   uint32_t result = 0;
157   uint32_t nibble_count = 0;
158 
159   SkipSpaces();
160   if (little_endian) {
161     uint32_t shift_amount = 0;
162     while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
163       // Make sure we don't exceed the size of a uint32_t...
164       if (nibble_count >= (sizeof(uint32_t) * 2)) {
165         m_index = UINT64_MAX;
166         return fail_value;
167       }
168 
169       uint8_t nibble_lo;
170       uint8_t nibble_hi = xdigit_to_sint(m_packet[m_index]);
171       ++m_index;
172       if (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
173         nibble_lo = xdigit_to_sint(m_packet[m_index]);
174         ++m_index;
175         result |= (static_cast<uint32_t>(nibble_hi) << (shift_amount + 4));
176         result |= (static_cast<uint32_t>(nibble_lo) << shift_amount);
177         nibble_count += 2;
178         shift_amount += 8;
179       } else {
180         result |= (static_cast<uint32_t>(nibble_hi) << shift_amount);
181         nibble_count += 1;
182         shift_amount += 4;
183       }
184     }
185   } else {
186     while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
187       // Make sure we don't exceed the size of a uint32_t...
188       if (nibble_count >= (sizeof(uint32_t) * 2)) {
189         m_index = UINT64_MAX;
190         return fail_value;
191       }
192 
193       uint8_t nibble = xdigit_to_sint(m_packet[m_index]);
194       // Big Endian
195       result <<= 4;
196       result |= nibble;
197 
198       ++m_index;
199       ++nibble_count;
200     }
201   }
202   return result;
203 }
204 
205 uint64_t StringExtractor::GetHexMaxU64(bool little_endian,
206                                        uint64_t fail_value) {
207   uint64_t result = 0;
208   uint32_t nibble_count = 0;
209 
210   SkipSpaces();
211   if (little_endian) {
212     uint32_t shift_amount = 0;
213     while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
214       // Make sure we don't exceed the size of a uint64_t...
215       if (nibble_count >= (sizeof(uint64_t) * 2)) {
216         m_index = UINT64_MAX;
217         return fail_value;
218       }
219 
220       uint8_t nibble_lo;
221       uint8_t nibble_hi = xdigit_to_sint(m_packet[m_index]);
222       ++m_index;
223       if (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
224         nibble_lo = xdigit_to_sint(m_packet[m_index]);
225         ++m_index;
226         result |= (static_cast<uint64_t>(nibble_hi) << (shift_amount + 4));
227         result |= (static_cast<uint64_t>(nibble_lo) << shift_amount);
228         nibble_count += 2;
229         shift_amount += 8;
230       } else {
231         result |= (static_cast<uint64_t>(nibble_hi) << shift_amount);
232         nibble_count += 1;
233         shift_amount += 4;
234       }
235     }
236   } else {
237     while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
238       // Make sure we don't exceed the size of a uint64_t...
239       if (nibble_count >= (sizeof(uint64_t) * 2)) {
240         m_index = UINT64_MAX;
241         return fail_value;
242       }
243 
244       uint8_t nibble = xdigit_to_sint(m_packet[m_index]);
245       // Big Endian
246       result <<= 4;
247       result |= nibble;
248 
249       ++m_index;
250       ++nibble_count;
251     }
252   }
253   return result;
254 }
255 
256 bool StringExtractor::ConsumeFront(const llvm::StringRef &str) {
257   llvm::StringRef S = GetStringRef();
258   if (!S.startswith(str))
259     return false;
260   else
261     m_index += str.size();
262   return true;
263 }
264 
265 size_t StringExtractor::GetHexBytes(llvm::MutableArrayRef<uint8_t> dest,
266                                     uint8_t fail_fill_value) {
267   size_t bytes_extracted = 0;
268   while (!dest.empty() && GetBytesLeft() > 0) {
269     dest[0] = GetHexU8(fail_fill_value);
270     if (!IsGood())
271       break;
272     ++bytes_extracted;
273     dest = dest.drop_front();
274   }
275 
276   if (!dest.empty())
277     ::memset(dest.data(), fail_fill_value, dest.size());
278 
279   return bytes_extracted;
280 }
281 
282 // Decodes all valid hex encoded bytes at the head of the StringExtractor,
283 // limited by dst_len.
284 //
285 // Returns the number of bytes successfully decoded
286 size_t StringExtractor::GetHexBytesAvail(llvm::MutableArrayRef<uint8_t> dest) {
287   size_t bytes_extracted = 0;
288   while (!dest.empty()) {
289     int decode = DecodeHexU8();
290     if (decode == -1)
291       break;
292     dest[0] = static_cast<uint8_t>(decode);
293     dest = dest.drop_front();
294     ++bytes_extracted;
295   }
296   return bytes_extracted;
297 }
298 
299 size_t StringExtractor::GetHexByteString(std::string &str) {
300   str.clear();
301   str.reserve(GetBytesLeft() / 2);
302   char ch;
303   while ((ch = GetHexU8()) != '\0')
304     str.append(1, ch);
305   return str.size();
306 }
307 
308 size_t StringExtractor::GetHexByteStringFixedLength(std::string &str,
309                                                     uint32_t nibble_length) {
310   str.clear();
311 
312   uint32_t nibble_count = 0;
313   for (const char *pch = Peek();
314        (nibble_count < nibble_length) && (pch != nullptr);
315        str.append(1, GetHexU8(0, false)), pch = Peek(), nibble_count += 2) {
316   }
317 
318   return str.size();
319 }
320 
321 size_t StringExtractor::GetHexByteStringTerminatedBy(std::string &str,
322                                                      char terminator) {
323   str.clear();
324   char ch;
325   while ((ch = GetHexU8(0, false)) != '\0')
326     str.append(1, ch);
327   if (Peek() && *Peek() == terminator)
328     return str.size();
329 
330   str.clear();
331   return str.size();
332 }
333 
334 bool StringExtractor::GetNameColonValue(llvm::StringRef &name,
335                                         llvm::StringRef &value) {
336   // Read something in the form of NNNN:VVVV; where NNNN is any character that
337   // is not a colon, followed by a ':' character, then a value (one or more ';'
338   // chars), followed by a ';'
339   if (m_index >= m_packet.size())
340     return fail();
341 
342   llvm::StringRef view(m_packet);
343   if (view.empty())
344     return fail();
345 
346   llvm::StringRef a, b, c, d;
347   view = view.substr(m_index);
348   std::tie(a, b) = view.split(':');
349   if (a.empty() || b.empty())
350     return fail();
351   std::tie(c, d) = b.split(';');
352   if (b == c && d.empty())
353     return fail();
354 
355   name = a;
356   value = c;
357   if (d.empty())
358     m_index = m_packet.size();
359   else {
360     size_t bytes_consumed = d.data() - view.data();
361     m_index += bytes_consumed;
362   }
363   return true;
364 }
365 
366 void StringExtractor::SkipSpaces() {
367   const size_t n = m_packet.size();
368   while (m_index < n && isspace(m_packet[m_index]))
369     ++m_index;
370 }
371