1 //
2 // Copyright Aliaksei Levin (levlam@telegram.org), Arseny Smirnov (arseny30@gmail.com) 2014-2021
3 //
4 // Distributed under the Boost Software License, Version 1.0. (See accompanying
5 // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
6 //
7 #pragma once
8 
9 #include "td/utils/buffer.h"
10 #include "td/utils/common.h"
11 #include "td/utils/format.h"
12 #include "td/utils/logging.h"
13 #include "td/utils/Slice.h"
14 #include "td/utils/SliceBuilder.h"
15 #include "td/utils/Status.h"
16 #include "td/utils/UInt.h"
17 #include "td/utils/utf8.h"
18 
19 #include <array>
20 #include <cstring>
21 #include <limits>
22 #include <memory>
23 #include <string>
24 
25 namespace td {
26 
27 class TlParser {
28   const unsigned char *data = nullptr;
29   size_t data_len = 0;
30   size_t left_len = 0;
31   size_t error_pos = std::numeric_limits<size_t>::max();
32   std::string error;
33 
34   std::unique_ptr<int32[]> data_buf;
35   static constexpr size_t SMALL_DATA_ARRAY_SIZE = 6;
36   std::array<int32, SMALL_DATA_ARRAY_SIZE> small_data_array;
37 
38   alignas(4) static const unsigned char empty_data[sizeof(UInt256)];
39 
40  public:
41   explicit TlParser(Slice slice);
42 
43   TlParser(const TlParser &other) = delete;
44   TlParser &operator=(const TlParser &other) = delete;
45 
46   void set_error(const string &error_message);
47 
get_error()48   const char *get_error() const {
49     if (error.empty()) {
50       return nullptr;
51     }
52     return error.c_str();
53   }
54 
get_error_pos()55   size_t get_error_pos() const {
56     return error_pos;
57   }
58 
get_status()59   Status get_status() const {
60     if (error.empty()) {
61       return Status::OK();
62     }
63     return Status::Error(PSLICE() << error << " at " << error_pos);
64   }
65 
check_len(const size_t len)66   void check_len(const size_t len) {
67     if (unlikely(left_len < len)) {
68       set_error("Not enough data to read");
69     } else {
70       left_len -= len;
71     }
72   }
73 
can_prefetch_int()74   bool can_prefetch_int() const {
75     return get_left_len() >= sizeof(int32);
76   }
77 
prefetch_int_unsafe()78   int32 prefetch_int_unsafe() const {
79     int32 result;
80     std::memcpy(&result, data, sizeof(int32));
81     return result;
82   }
83 
fetch_int_unsafe()84   int32 fetch_int_unsafe() {
85     int32 result;
86     std::memcpy(&result, data, sizeof(int32));
87     data += sizeof(int32);
88     return result;
89   }
90 
fetch_int()91   int32 fetch_int() {
92     check_len(sizeof(int32));
93     return fetch_int_unsafe();
94   }
95 
fetch_long_unsafe()96   int64 fetch_long_unsafe() {
97     int64 result;
98     std::memcpy(&result, data, sizeof(int64));
99     data += sizeof(int64);
100     return result;
101   }
102 
fetch_long()103   int64 fetch_long() {
104     check_len(sizeof(int64));
105     return fetch_long_unsafe();
106   }
107 
fetch_double_unsafe()108   double fetch_double_unsafe() {
109     double result;
110     std::memcpy(&result, data, sizeof(double));
111     data += sizeof(double);
112     return result;
113   }
114 
fetch_double()115   double fetch_double() {
116     check_len(sizeof(double));
117     return fetch_double_unsafe();
118   }
119 
120   template <class T>
fetch_binary_unsafe()121   T fetch_binary_unsafe() {
122     T result;
123     std::memcpy(&result, data, sizeof(T));
124     data += sizeof(T);
125     return result;
126   }
127 
128   template <class T>
fetch_binary()129   T fetch_binary() {
130     static_assert(sizeof(T) <= sizeof(empty_data), "too big fetch_binary");
131     //static_assert(sizeof(T) % sizeof(int32) == 0, "wrong call to fetch_binary");
132     check_len(sizeof(T));
133     return fetch_binary_unsafe<T>();
134   }
135 
136   template <class T>
fetch_string()137   T fetch_string() {
138     check_len(sizeof(int32));
139     size_t result_len = *data;
140     const unsigned char *result_begin;
141     size_t result_aligned_len;
142     if (result_len < 254) {
143       result_begin = data + 1;
144       result_aligned_len = (result_len >> 2) << 2;
145       data += sizeof(int32);
146     } else if (result_len == 254) {
147       result_len = data[1] + (data[2] << 8) + (data[3] << 16);
148       result_begin = data + 4;
149       result_aligned_len = ((result_len + 3) >> 2) << 2;
150       data += sizeof(int32);
151     } else {
152       check_len(sizeof(int32));
153       auto result_len_uint64 = static_cast<uint64>(data[1]) + (static_cast<uint64>(data[2]) << 8) +
154                                (static_cast<uint64>(data[3]) << 16) + (static_cast<uint64>(data[4]) << 24) +
155                                (static_cast<uint64>(data[5]) << 32) + (static_cast<uint64>(data[6]) << 40) +
156                                (static_cast<uint64>(data[7]) << 48);
157       if (result_len_uint64 > std::numeric_limits<size_t>::max() - 3) {
158         set_error("Too big string found");
159         return T();
160       }
161       result_len = static_cast<size_t>(result_len_uint64);
162       result_begin = data + 8;
163       result_aligned_len = ((result_len + 3) >> 2) << 2;
164       data += sizeof(int64);
165     }
166     check_len(result_aligned_len);
167     if (!error.empty()) {
168       return T();
169     }
170     data += result_aligned_len;
171     return T(reinterpret_cast<const char *>(result_begin), result_len);
172   }
173 
174   template <class T>
fetch_string_raw(const size_t size)175   T fetch_string_raw(const size_t size) {
176     //CHECK(size % sizeof(int32) == 0);
177     check_len(size);
178     if (!error.empty()) {
179       return T();
180     }
181     auto result = reinterpret_cast<const char *>(data);
182     data += size;
183     return T(result, size);
184   }
185 
fetch_end()186   void fetch_end() {
187     if (left_len) {
188       set_error("Too much data to fetch");
189     }
190   }
191 
get_left_len()192   size_t get_left_len() const {
193     return left_len;
194   }
195 };
196 
197 class TlBufferParser : public TlParser {
198  public:
TlBufferParser(const BufferSlice * buffer_slice)199   explicit TlBufferParser(const BufferSlice *buffer_slice) : TlParser(buffer_slice->as_slice()), parent_(buffer_slice) {
200   }
201 
202   template <class T>
fetch_string()203   T fetch_string() {
204     auto result = TlParser::fetch_string<T>();
205     for (auto &c : result) {
206       if (c == '\0') {
207         c = ' ';
208       }
209     }
210     if (check_utf8(result)) {
211       return result;
212     }
213     CHECK(!result.empty());
214     LOG(WARNING) << "Wrong UTF-8 string [[" << result << "]] in " << format::as_hex_dump<4>(parent_->as_slice());
215 
216     // trying to remove last character
217     size_t new_size = result.size() - 1;
218     while (new_size != 0 && !is_utf8_character_first_code_unit(static_cast<unsigned char>(result[new_size]))) {
219       new_size--;
220     }
221     result.resize(new_size);
222     if (check_utf8(result)) {
223       return result;
224     }
225 
226     return T();
227   }
228 
229   template <class T>
fetch_string_raw(const size_t size)230   T fetch_string_raw(const size_t size) {
231     return TlParser::fetch_string_raw<T>(size);
232   }
233 
234  private:
235   const BufferSlice *parent_;
236 
237   BufferSlice as_buffer_slice(Slice slice);
238 };
239 
240 template <>
241 inline BufferSlice TlBufferParser::fetch_string<BufferSlice>() {
242   return as_buffer_slice(TlParser::fetch_string<Slice>());
243 }
244 
245 template <>
246 inline BufferSlice TlBufferParser::fetch_string_raw<BufferSlice>(const size_t size) {
247   return as_buffer_slice(TlParser::fetch_string_raw<Slice>(size));
248 }
249 
250 }  // namespace td
251