1 /* <!-- copyright */
2 /*
3 * aria2 - The high speed download utility
4 *
5 * Copyright (C) 2012 Tatsuhiro Tsujikawa
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 *
21 * In addition, as a special exception, the copyright holders give
22 * permission to link the code of portions of this program with the
23 * OpenSSL library under certain conditions as described in each
24 * individual source file, and distribute linked combinations
25 * including the two.
26 * You must obey the GNU General Public License in all respects
27 * for all of the code used other than OpenSSL. If you modify
28 * file(s) with this exception, you may extend this exception to your
29 * version of the file(s), but you are not obligated to do so. If you
30 * do not wish to do so, delete this exception statement from your
31 * version. If you delete this exception statement from all source
32 * files in the program, then also delete it here.
33 */
34 /* copyright --> */
35 #include "BencodeParser.h"
36
37 #include <cassert>
38
39 #include "StructParserStateMachine.h"
40 #include "util.h"
41
42 namespace aria2 {
43
44 namespace bittorrent {
45
46 namespace {
47 enum {
48 BENCODE_FINISH,
49 BENCODE_ERROR,
50 BENCODE_INITIAL,
51 BENCODE_VALUE,
52 BENCODE_DICT_KEY,
53 BENCODE_DICT_VAL,
54 BENCODE_LIST,
55 BENCODE_STRING_LEN,
56 BENCODE_STRING,
57 BENCODE_NUMBER_SIGN,
58 BENCODE_NUMBER,
59 BENCODE_FLOAT_NUMBER_IGNORE,
60 };
61 } // namespace
62
BencodeParser(StructParserStateMachine * psm)63 BencodeParser::BencodeParser(StructParserStateMachine* psm)
64 : psm_(psm),
65 currentState_(BENCODE_INITIAL),
66 strLength_(0),
67 numberSign_(1),
68 number_(0),
69 numConsumed_(0),
70 lastError_(0)
71 {
72 stateStack_.push(BENCODE_FINISH);
73 }
74
75 BencodeParser::~BencodeParser() = default;
76
parseUpdate(const char * data,size_t size)77 ssize_t BencodeParser::parseUpdate(const char* data, size_t size)
78 {
79 size_t i;
80 if (currentState_ == BENCODE_FINISH) {
81 return 0;
82 }
83 else if (currentState_ == BENCODE_ERROR) {
84 return lastError_;
85 }
86 for (i = 0; i < size && currentState_ != BENCODE_FINISH; ++i) {
87 char c = data[i];
88 switch (currentState_) {
89 case BENCODE_LIST:
90 if (c == 'e') {
91 onListEnd();
92 break;
93 }
94 else {
95 int rv = pushState(currentState_);
96 if (rv < 0) {
97 return rv;
98 }
99 currentState_ = BENCODE_VALUE;
100 runBeginCallback(STRUCT_ARRAY_DATA_T);
101 }
102 // Fall through
103 case BENCODE_INITIAL:
104 case BENCODE_VALUE:
105 switch (c) {
106 case 'd': {
107 currentState_ = BENCODE_DICT_KEY;
108 runBeginCallback(STRUCT_DICT_T);
109 break;
110 }
111 case 'l':
112 currentState_ = BENCODE_LIST;
113 runBeginCallback(STRUCT_ARRAY_T);
114 break;
115 case 'i':
116 number_ = 0;
117 numberSign_ = 1;
118 numConsumed_ = 0;
119 currentState_ = BENCODE_NUMBER_SIGN;
120 runBeginCallback(STRUCT_NUMBER_T);
121 break;
122 default:
123 if (util::isDigit(c)) {
124 strLength_ = c - '0';
125 numConsumed_ = 1;
126 currentState_ = BENCODE_STRING_LEN;
127 runBeginCallback(STRUCT_STRING_T);
128 break;
129 }
130 else {
131 currentState_ = BENCODE_ERROR;
132 return lastError_ = ERR_UNEXPECTED_CHAR_BEFORE_VAL;
133 }
134 }
135 break;
136 case BENCODE_DICT_KEY: {
137 if (c == 'e') {
138 onDictEnd();
139 break;
140 }
141 int rv = pushState(currentState_);
142 if (rv < 0) {
143 return rv;
144 }
145 strLength_ = 0;
146 numConsumed_ = 0;
147 runBeginCallback(STRUCT_DICT_KEY_T);
148 currentState_ = BENCODE_STRING_LEN;
149 // Fall through
150 }
151 case BENCODE_STRING_LEN: {
152 size_t j;
153 for (j = i; j < size && in(data[j], '0', '9'); ++j) {
154 if ((INT64_MAX - (data[j] - '0')) / 10 < strLength_) {
155 currentState_ = BENCODE_ERROR;
156 return lastError_ = ERR_STRING_LENGTH_OUT_OF_RANGE;
157 }
158 strLength_ *= 10;
159 strLength_ += data[j] - '0';
160 }
161 numConsumed_ += j - i;
162 if (j != size) {
163 if (data[j] != ':' || numConsumed_ == 0) {
164 currentState_ = BENCODE_ERROR;
165 return lastError_ = ERR_INVALID_STRING_LENGTH;
166 }
167 i = j;
168 currentState_ = BENCODE_STRING;
169 if (strLength_ == 0) {
170 runCharactersCallback(nullptr, 0);
171 onStringEnd();
172 }
173 }
174 else {
175 i = j - 1;
176 }
177 break;
178 }
179 case BENCODE_STRING: {
180 size_t nread = std::min(static_cast<int64_t>(size - i), strLength_);
181 runCharactersCallback(&data[i], nread);
182 strLength_ -= nread;
183 i += nread - 1;
184 if (strLength_ == 0) {
185 onStringEnd();
186 }
187 break;
188 }
189 case BENCODE_NUMBER_SIGN: {
190 switch (c) {
191 case '+':
192 numberSign_ = 1;
193 currentState_ = BENCODE_NUMBER;
194 break;
195 case '-':
196 numberSign_ = -1;
197 currentState_ = BENCODE_NUMBER;
198 break;
199 default:
200 if (util::isDigit(c)) {
201 number_ = c - '0';
202 numConsumed_ = 1;
203 currentState_ = BENCODE_NUMBER;
204 }
205 }
206 break;
207 }
208 case BENCODE_NUMBER: {
209 size_t j;
210 for (j = i; j < size && in(data[j], '0', '9'); ++j) {
211 if ((INT64_MAX - (data[j] - '0')) / 10 < number_) {
212 currentState_ = BENCODE_ERROR;
213 return lastError_ = ERR_NUMBER_OUT_OF_RANGE;
214 }
215 number_ *= 10;
216 number_ += data[j] - '0';
217 }
218 numConsumed_ += j - i;
219 if (j != size) {
220 if (numConsumed_ == 0) {
221 currentState_ = BENCODE_ERROR;
222 return lastError_ = ERR_INVALID_NUMBER;
223 }
224
225 auto c = data[j];
226 if (util::isDigit(c) || c == '.' || c == 'E' || c == '+' || c == '-') {
227 // some torrent generator adds floating point number in
228 // scientific notation (e.g., -1.134E+3) in integer field.
229 // In this case, just skip these bytes until we find 'e'.
230 number_ = 0;
231 numConsumed_ = 0;
232 currentState_ = BENCODE_FLOAT_NUMBER_IGNORE;
233
234 i = j;
235
236 break;
237 }
238
239 if (c != 'e') {
240 currentState_ = BENCODE_ERROR;
241 return lastError_ = ERR_INVALID_NUMBER;
242 }
243
244 i = j;
245
246 onNumberEnd();
247 }
248 else {
249 i = j - 1;
250 }
251 break;
252 }
253 case BENCODE_FLOAT_NUMBER_IGNORE: {
254 auto c = data[i];
255 if (util::isDigit(c) || c == '.' || c == 'E' || c == '+' || c == '-') {
256 continue;
257 }
258
259 if (c != 'e') {
260 currentState_ = BENCODE_ERROR;
261 return lastError_ = ERR_INVALID_FLOAT_NUMBER;
262 }
263
264 onNumberEnd();
265 break;
266 }
267 }
268 }
269 return i;
270 }
271
parseFinal(const char * data,size_t len)272 ssize_t BencodeParser::parseFinal(const char* data, size_t len)
273 {
274 ssize_t rv;
275 rv = parseUpdate(data, len);
276 if (rv >= 0) {
277 if (currentState_ != BENCODE_FINISH && currentState_ != BENCODE_INITIAL) {
278 rv = ERR_PREMATURE_DATA;
279 }
280 }
281 return rv;
282 }
283
reset()284 void BencodeParser::reset()
285 {
286 psm_->reset();
287 currentState_ = BENCODE_INITIAL;
288 lastError_ = 0;
289 while (!stateStack_.empty()) {
290 stateStack_.pop();
291 }
292 stateStack_.push(BENCODE_FINISH);
293 }
294
onStringEnd()295 void BencodeParser::onStringEnd()
296 {
297 runEndCallback(stateTop() == BENCODE_DICT_KEY ? STRUCT_DICT_KEY_T
298 : STRUCT_STRING_T);
299 onValueEnd();
300 }
301
onNumberEnd()302 void BencodeParser::onNumberEnd()
303 {
304 runNumberCallback(numberSign_ * number_);
305 runEndCallback(STRUCT_NUMBER_T);
306 onValueEnd();
307 }
308
onDictEnd()309 void BencodeParser::onDictEnd()
310 {
311 runEndCallback(STRUCT_DICT_T);
312 onValueEnd();
313 }
314
onListEnd()315 void BencodeParser::onListEnd()
316 {
317 runEndCallback(STRUCT_ARRAY_T);
318 onValueEnd();
319 }
320
onValueEnd()321 void BencodeParser::onValueEnd()
322 {
323 switch (stateTop()) {
324 case BENCODE_DICT_KEY:
325 popState();
326 pushState(BENCODE_DICT_VAL);
327 currentState_ = BENCODE_VALUE;
328 runBeginCallback(STRUCT_DICT_DATA_T);
329 break;
330 case BENCODE_DICT_VAL:
331 runEndCallback(STRUCT_DICT_DATA_T);
332 popState();
333 currentState_ = BENCODE_DICT_KEY;
334 break;
335 case BENCODE_LIST:
336 runEndCallback(STRUCT_ARRAY_DATA_T);
337 popState();
338 currentState_ = BENCODE_LIST;
339 break;
340 default:
341 assert(stateTop() == BENCODE_FINISH);
342 currentState_ = stateTop();
343 break;
344 }
345 }
346
pushState(int state)347 int BencodeParser::pushState(int state)
348 {
349 if (stateStack_.size() >= 50) {
350 return ERR_STRUCTURE_TOO_DEEP;
351 }
352 else {
353 stateStack_.push(state);
354 return 0;
355 }
356 }
357
stateTop() const358 int BencodeParser::stateTop() const { return stateStack_.top(); }
359
popState()360 int BencodeParser::popState()
361 {
362 int state = stateStack_.top();
363 stateStack_.pop();
364 return state;
365 }
366
runBeginCallback(int elementType)367 void BencodeParser::runBeginCallback(int elementType)
368 {
369 // switch(elementType) {
370 // case STRUCT_DICT_T:
371 // std::cout << "object start" << std::endl;
372 // break;
373 // case STRUCT_DICT_KEY_T:
374 // std::cout << "object key start" << std::endl;
375 // break;
376 // case STRUCT_DICT_DATA_T:
377 // std::cout << "object data start" << std::endl;
378 // break;
379 // case STRUCT_ARRAY_T:
380 // std::cout << "array start" << std::endl;
381 // break;
382 // case STRUCT_ARRAY_DATA_T:
383 // std::cout << "array data start" << std::endl;
384 // break;
385 // case STRUCT_STRING_T:
386 // std::cout << "string start" << std::endl;
387 // break;
388 // case STRUCT_NUMBER_T:
389 // std::cout << "number start" << std::endl;
390 // break;
391 // case STRUCT_BOOL_T:
392 // std::cout << "bool start" << std::endl;
393 // break;
394 // case STRUCT_NULL_T:
395 // std::cout << "null start" << std::endl;
396 // break;
397 // default:
398 // break;
399 // };
400 psm_->beginElement(elementType);
401 }
402
runEndCallback(int elementType)403 void BencodeParser::runEndCallback(int elementType)
404 {
405 // switch(elementType) {
406 // case STRUCT_DICT_T:
407 // std::cout << "object end" << std::endl;
408 // break;
409 // case STRUCT_DICT_KEY_T:
410 // std::cout << "object key end" << std::endl;
411 // break;
412 // case STRUCT_DICT_DATA_T:
413 // std::cout << "object data end" << std::endl;
414 // break;
415 // case STRUCT_ARRAY_T:
416 // std::cout << "array end" << std::endl;
417 // break;
418 // case STRUCT_ARRAY_DATA_T:
419 // std::cout << "array data end" << std::endl;
420 // break;
421 // case STRUCT_STRING_T:
422 // std::cout << "string end" << std::endl;
423 // break;
424 // case STRUCT_NUMBER_T:
425 // std::cout << "number end" << std::endl;
426 // break;
427 // case STRUCT_BOOL_T:
428 // std::cout << "bool end" << std::endl;
429 // break;
430 // case STRUCT_NULL_T:
431 // std::cout << "null end" << std::endl;
432 // break;
433 // default:
434 // break;
435 // };
436 psm_->endElement(elementType);
437 }
438
runCharactersCallback(const char * data,size_t len)439 void BencodeParser::runCharactersCallback(const char* data, size_t len)
440 {
441 psm_->charactersCallback(data, len);
442 }
443
runNumberCallback(int64_t number)444 void BencodeParser::runNumberCallback(int64_t number)
445 {
446 psm_->numberCallback(number, 0, 0);
447 }
448
449 } // namespace bittorrent
450
451 } // namespace aria2
452