1 //===-- StringExtractor.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "lldb/Utility/StringExtractor.h"
10 #include "llvm/ADT/StringExtras.h"
11 
12 #include <tuple>
13 
14 #include <cctype>
15 #include <cstdlib>
16 #include <cstring>
17 
18 static inline int xdigit_to_sint(char ch) {
19   if (ch >= 'a' && ch <= 'f')
20     return 10 + ch - 'a';
21   if (ch >= 'A' && ch <= 'F')
22     return 10 + ch - 'A';
23   if (ch >= '0' && ch <= '9')
24     return ch - '0';
25   return -1;
26 }
27 
28 // StringExtractor constructor
29 StringExtractor::StringExtractor() : m_packet() {}
30 
31 StringExtractor::StringExtractor(llvm::StringRef packet_str)
32     : m_packet(), m_index(0) {
33   m_packet.assign(packet_str.begin(), packet_str.end());
34 }
35 
36 StringExtractor::StringExtractor(const char *packet_cstr)
37     : m_packet(), m_index(0) {
38   if (packet_cstr)
39     m_packet.assign(packet_cstr);
40 }
41 
42 // Destructor
43 StringExtractor::~StringExtractor() = default;
44 
45 char StringExtractor::GetChar(char fail_value) {
46   if (m_index < m_packet.size()) {
47     char ch = m_packet[m_index];
48     ++m_index;
49     return ch;
50   }
51   m_index = UINT64_MAX;
52   return fail_value;
53 }
54 
55 // If a pair of valid hex digits exist at the head of the StringExtractor they
56 // are decoded into an unsigned byte and returned by this function
57 //
58 // If there is not a pair of valid hex digits at the head of the
59 // StringExtractor, it is left unchanged and -1 is returned
60 int StringExtractor::DecodeHexU8() {
61   SkipSpaces();
62   if (GetBytesLeft() < 2) {
63     return -1;
64   }
65   const int hi_nibble = xdigit_to_sint(m_packet[m_index]);
66   const int lo_nibble = xdigit_to_sint(m_packet[m_index + 1]);
67   if (hi_nibble == -1 || lo_nibble == -1) {
68     return -1;
69   }
70   m_index += 2;
71   return static_cast<uint8_t>((hi_nibble << 4) + lo_nibble);
72 }
73 
74 // Extract an unsigned character from two hex ASCII chars in the packet string,
75 // or return fail_value on failure
76 uint8_t StringExtractor::GetHexU8(uint8_t fail_value, bool set_eof_on_fail) {
77   // On success, fail_value will be overwritten with the next character in the
78   // stream
79   GetHexU8Ex(fail_value, set_eof_on_fail);
80   return fail_value;
81 }
82 
83 bool StringExtractor::GetHexU8Ex(uint8_t &ch, bool set_eof_on_fail) {
84   int byte = DecodeHexU8();
85   if (byte == -1) {
86     if (set_eof_on_fail || m_index >= m_packet.size())
87       m_index = UINT64_MAX;
88     // ch should not be changed in case of failure
89     return false;
90   }
91   ch = static_cast<uint8_t>(byte);
92   return true;
93 }
94 
95 uint32_t StringExtractor::GetU32(uint32_t fail_value, int base) {
96   if (m_index < m_packet.size()) {
97     char *end = nullptr;
98     const char *start = m_packet.c_str();
99     const char *cstr = start + m_index;
100     uint32_t result = static_cast<uint32_t>(::strtoul(cstr, &end, base));
101 
102     if (end && end != cstr) {
103       m_index = end - start;
104       return result;
105     }
106   }
107   return fail_value;
108 }
109 
110 int32_t StringExtractor::GetS32(int32_t fail_value, int base) {
111   if (m_index < m_packet.size()) {
112     char *end = nullptr;
113     const char *start = m_packet.c_str();
114     const char *cstr = start + m_index;
115     int32_t result = static_cast<int32_t>(::strtol(cstr, &end, base));
116 
117     if (end && end != cstr) {
118       m_index = end - start;
119       return result;
120     }
121   }
122   return fail_value;
123 }
124 
125 uint64_t StringExtractor::GetU64(uint64_t fail_value, int base) {
126   if (m_index < m_packet.size()) {
127     char *end = nullptr;
128     const char *start = m_packet.c_str();
129     const char *cstr = start + m_index;
130     uint64_t result = ::strtoull(cstr, &end, base);
131 
132     if (end && end != cstr) {
133       m_index = end - start;
134       return result;
135     }
136   }
137   return fail_value;
138 }
139 
140 int64_t StringExtractor::GetS64(int64_t fail_value, int base) {
141   if (m_index < m_packet.size()) {
142     char *end = nullptr;
143     const char *start = m_packet.c_str();
144     const char *cstr = start + m_index;
145     int64_t result = ::strtoll(cstr, &end, base);
146 
147     if (end && end != cstr) {
148       m_index = end - start;
149       return result;
150     }
151   }
152   return fail_value;
153 }
154 
155 uint32_t StringExtractor::GetHexMaxU32(bool little_endian,
156                                        uint32_t fail_value) {
157   uint32_t result = 0;
158   uint32_t nibble_count = 0;
159 
160   SkipSpaces();
161   if (little_endian) {
162     uint32_t shift_amount = 0;
163     while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
164       // Make sure we don't exceed the size of a uint32_t...
165       if (nibble_count >= (sizeof(uint32_t) * 2)) {
166         m_index = UINT64_MAX;
167         return fail_value;
168       }
169 
170       uint8_t nibble_lo;
171       uint8_t nibble_hi = xdigit_to_sint(m_packet[m_index]);
172       ++m_index;
173       if (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
174         nibble_lo = xdigit_to_sint(m_packet[m_index]);
175         ++m_index;
176         result |= (static_cast<uint32_t>(nibble_hi) << (shift_amount + 4));
177         result |= (static_cast<uint32_t>(nibble_lo) << shift_amount);
178         nibble_count += 2;
179         shift_amount += 8;
180       } else {
181         result |= (static_cast<uint32_t>(nibble_hi) << shift_amount);
182         nibble_count += 1;
183         shift_amount += 4;
184       }
185     }
186   } else {
187     while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
188       // Make sure we don't exceed the size of a uint32_t...
189       if (nibble_count >= (sizeof(uint32_t) * 2)) {
190         m_index = UINT64_MAX;
191         return fail_value;
192       }
193 
194       uint8_t nibble = xdigit_to_sint(m_packet[m_index]);
195       // Big Endian
196       result <<= 4;
197       result |= nibble;
198 
199       ++m_index;
200       ++nibble_count;
201     }
202   }
203   return result;
204 }
205 
206 uint64_t StringExtractor::GetHexMaxU64(bool little_endian,
207                                        uint64_t fail_value) {
208   uint64_t result = 0;
209   uint32_t nibble_count = 0;
210 
211   SkipSpaces();
212   if (little_endian) {
213     uint32_t shift_amount = 0;
214     while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
215       // Make sure we don't exceed the size of a uint64_t...
216       if (nibble_count >= (sizeof(uint64_t) * 2)) {
217         m_index = UINT64_MAX;
218         return fail_value;
219       }
220 
221       uint8_t nibble_lo;
222       uint8_t nibble_hi = xdigit_to_sint(m_packet[m_index]);
223       ++m_index;
224       if (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
225         nibble_lo = xdigit_to_sint(m_packet[m_index]);
226         ++m_index;
227         result |= (static_cast<uint64_t>(nibble_hi) << (shift_amount + 4));
228         result |= (static_cast<uint64_t>(nibble_lo) << shift_amount);
229         nibble_count += 2;
230         shift_amount += 8;
231       } else {
232         result |= (static_cast<uint64_t>(nibble_hi) << shift_amount);
233         nibble_count += 1;
234         shift_amount += 4;
235       }
236     }
237   } else {
238     while (m_index < m_packet.size() && ::isxdigit(m_packet[m_index])) {
239       // Make sure we don't exceed the size of a uint64_t...
240       if (nibble_count >= (sizeof(uint64_t) * 2)) {
241         m_index = UINT64_MAX;
242         return fail_value;
243       }
244 
245       uint8_t nibble = xdigit_to_sint(m_packet[m_index]);
246       // Big Endian
247       result <<= 4;
248       result |= nibble;
249 
250       ++m_index;
251       ++nibble_count;
252     }
253   }
254   return result;
255 }
256 
257 bool StringExtractor::ConsumeFront(const llvm::StringRef &str) {
258   llvm::StringRef S = GetStringRef();
259   if (!S.startswith(str))
260     return false;
261   else
262     m_index += str.size();
263   return true;
264 }
265 
266 size_t StringExtractor::GetHexBytes(llvm::MutableArrayRef<uint8_t> dest,
267                                     uint8_t fail_fill_value) {
268   size_t bytes_extracted = 0;
269   while (!dest.empty() && GetBytesLeft() > 0) {
270     dest[0] = GetHexU8(fail_fill_value);
271     if (!IsGood())
272       break;
273     ++bytes_extracted;
274     dest = dest.drop_front();
275   }
276 
277   if (!dest.empty())
278     ::memset(dest.data(), fail_fill_value, dest.size());
279 
280   return bytes_extracted;
281 }
282 
283 // Decodes all valid hex encoded bytes at the head of the StringExtractor,
284 // limited by dst_len.
285 //
286 // Returns the number of bytes successfully decoded
287 size_t StringExtractor::GetHexBytesAvail(llvm::MutableArrayRef<uint8_t> dest) {
288   size_t bytes_extracted = 0;
289   while (!dest.empty()) {
290     int decode = DecodeHexU8();
291     if (decode == -1)
292       break;
293     dest[0] = static_cast<uint8_t>(decode);
294     dest = dest.drop_front();
295     ++bytes_extracted;
296   }
297   return bytes_extracted;
298 }
299 
300 size_t StringExtractor::GetHexByteString(std::string &str) {
301   str.clear();
302   str.reserve(GetBytesLeft() / 2);
303   char ch;
304   while ((ch = GetHexU8()) != '\0')
305     str.append(1, ch);
306   return str.size();
307 }
308 
309 size_t StringExtractor::GetHexByteStringFixedLength(std::string &str,
310                                                     uint32_t nibble_length) {
311   str.clear();
312 
313   uint32_t nibble_count = 0;
314   for (const char *pch = Peek();
315        (nibble_count < nibble_length) && (pch != nullptr);
316        str.append(1, GetHexU8(0, false)), pch = Peek(), nibble_count += 2) {
317   }
318 
319   return str.size();
320 }
321 
322 size_t StringExtractor::GetHexByteStringTerminatedBy(std::string &str,
323                                                      char terminator) {
324   str.clear();
325   char ch;
326   while ((ch = GetHexU8(0, false)) != '\0')
327     str.append(1, ch);
328   if (Peek() && *Peek() == terminator)
329     return str.size();
330 
331   str.clear();
332   return str.size();
333 }
334 
335 bool StringExtractor::GetNameColonValue(llvm::StringRef &name,
336                                         llvm::StringRef &value) {
337   // Read something in the form of NNNN:VVVV; where NNNN is any character that
338   // is not a colon, followed by a ':' character, then a value (one or more ';'
339   // chars), followed by a ';'
340   if (m_index >= m_packet.size())
341     return fail();
342 
343   llvm::StringRef view(m_packet);
344   if (view.empty())
345     return fail();
346 
347   llvm::StringRef a, b, c, d;
348   view = view.substr(m_index);
349   std::tie(a, b) = view.split(':');
350   if (a.empty() || b.empty())
351     return fail();
352   std::tie(c, d) = b.split(';');
353   if (b == c && d.empty())
354     return fail();
355 
356   name = a;
357   value = c;
358   if (d.empty())
359     m_index = m_packet.size();
360   else {
361     size_t bytes_consumed = d.data() - view.data();
362     m_index += bytes_consumed;
363   }
364   return true;
365 }
366 
367 void StringExtractor::SkipSpaces() {
368   const size_t n = m_packet.size();
369   while (m_index < n && llvm::isSpace(m_packet[m_index]))
370     ++m_index;
371 }
372