1 /* <!-- copyright */
2 /*
3  * aria2 - The high speed download utility
4  *
5  * Copyright (C) 2012 Tatsuhiro Tsujikawa
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  *
21  * In addition, as a special exception, the copyright holders give
22  * permission to link the code of portions of this program with the
23  * OpenSSL library under certain conditions as described in each
24  * individual source file, and distribute linked combinations
25  * including the two.
26  * You must obey the GNU General Public License in all respects
27  * for all of the code used other than OpenSSL.  If you modify
28  * file(s) with this exception, you may extend this exception to your
29  * version of the file(s), but you are not obligated to do so.  If you
30  * do not wish to do so, delete this exception statement from your
31  * version.  If you delete this exception statement from all source
32  * files in the program, then also delete it here.
33  */
34 /* copyright --> */
35 #include "BencodeParser.h"
36 
37 #include <cassert>
38 
39 #include "StructParserStateMachine.h"
40 #include "util.h"
41 
42 namespace aria2 {
43 
44 namespace bittorrent {
45 
46 namespace {
47 enum {
48   BENCODE_FINISH,
49   BENCODE_ERROR,
50   BENCODE_INITIAL,
51   BENCODE_VALUE,
52   BENCODE_DICT_KEY,
53   BENCODE_DICT_VAL,
54   BENCODE_LIST,
55   BENCODE_STRING_LEN,
56   BENCODE_STRING,
57   BENCODE_NUMBER_SIGN,
58   BENCODE_NUMBER,
59   BENCODE_FLOAT_NUMBER_IGNORE,
60 };
61 } // namespace
62 
BencodeParser(StructParserStateMachine * psm)63 BencodeParser::BencodeParser(StructParserStateMachine* psm)
64     : psm_(psm),
65       currentState_(BENCODE_INITIAL),
66       strLength_(0),
67       numberSign_(1),
68       number_(0),
69       numConsumed_(0),
70       lastError_(0)
71 {
72   stateStack_.push(BENCODE_FINISH);
73 }
74 
75 BencodeParser::~BencodeParser() = default;
76 
parseUpdate(const char * data,size_t size)77 ssize_t BencodeParser::parseUpdate(const char* data, size_t size)
78 {
79   size_t i;
80   if (currentState_ == BENCODE_FINISH) {
81     return 0;
82   }
83   else if (currentState_ == BENCODE_ERROR) {
84     return lastError_;
85   }
86   for (i = 0; i < size && currentState_ != BENCODE_FINISH; ++i) {
87     char c = data[i];
88     switch (currentState_) {
89     case BENCODE_LIST:
90       if (c == 'e') {
91         onListEnd();
92         break;
93       }
94       else {
95         int rv = pushState(currentState_);
96         if (rv < 0) {
97           return rv;
98         }
99         currentState_ = BENCODE_VALUE;
100         runBeginCallback(STRUCT_ARRAY_DATA_T);
101       }
102     // Fall through
103     case BENCODE_INITIAL:
104     case BENCODE_VALUE:
105       switch (c) {
106       case 'd': {
107         currentState_ = BENCODE_DICT_KEY;
108         runBeginCallback(STRUCT_DICT_T);
109         break;
110       }
111       case 'l':
112         currentState_ = BENCODE_LIST;
113         runBeginCallback(STRUCT_ARRAY_T);
114         break;
115       case 'i':
116         number_ = 0;
117         numberSign_ = 1;
118         numConsumed_ = 0;
119         currentState_ = BENCODE_NUMBER_SIGN;
120         runBeginCallback(STRUCT_NUMBER_T);
121         break;
122       default:
123         if (util::isDigit(c)) {
124           strLength_ = c - '0';
125           numConsumed_ = 1;
126           currentState_ = BENCODE_STRING_LEN;
127           runBeginCallback(STRUCT_STRING_T);
128           break;
129         }
130         else {
131           currentState_ = BENCODE_ERROR;
132           return lastError_ = ERR_UNEXPECTED_CHAR_BEFORE_VAL;
133         }
134       }
135       break;
136     case BENCODE_DICT_KEY: {
137       if (c == 'e') {
138         onDictEnd();
139         break;
140       }
141       int rv = pushState(currentState_);
142       if (rv < 0) {
143         return rv;
144       }
145       strLength_ = 0;
146       numConsumed_ = 0;
147       runBeginCallback(STRUCT_DICT_KEY_T);
148       currentState_ = BENCODE_STRING_LEN;
149       // Fall through
150     }
151     case BENCODE_STRING_LEN: {
152       size_t j;
153       for (j = i; j < size && in(data[j], '0', '9'); ++j) {
154         if ((INT64_MAX - (data[j] - '0')) / 10 < strLength_) {
155           currentState_ = BENCODE_ERROR;
156           return lastError_ = ERR_STRING_LENGTH_OUT_OF_RANGE;
157         }
158         strLength_ *= 10;
159         strLength_ += data[j] - '0';
160       }
161       numConsumed_ += j - i;
162       if (j != size) {
163         if (data[j] != ':' || numConsumed_ == 0) {
164           currentState_ = BENCODE_ERROR;
165           return lastError_ = ERR_INVALID_STRING_LENGTH;
166         }
167         i = j;
168         currentState_ = BENCODE_STRING;
169         if (strLength_ == 0) {
170           runCharactersCallback(nullptr, 0);
171           onStringEnd();
172         }
173       }
174       else {
175         i = j - 1;
176       }
177       break;
178     }
179     case BENCODE_STRING: {
180       size_t nread = std::min(static_cast<int64_t>(size - i), strLength_);
181       runCharactersCallback(&data[i], nread);
182       strLength_ -= nread;
183       i += nread - 1;
184       if (strLength_ == 0) {
185         onStringEnd();
186       }
187       break;
188     }
189     case BENCODE_NUMBER_SIGN: {
190       switch (c) {
191       case '+':
192         numberSign_ = 1;
193         currentState_ = BENCODE_NUMBER;
194         break;
195       case '-':
196         numberSign_ = -1;
197         currentState_ = BENCODE_NUMBER;
198         break;
199       default:
200         if (util::isDigit(c)) {
201           number_ = c - '0';
202           numConsumed_ = 1;
203           currentState_ = BENCODE_NUMBER;
204         }
205       }
206       break;
207     }
208     case BENCODE_NUMBER: {
209       size_t j;
210       for (j = i; j < size && in(data[j], '0', '9'); ++j) {
211         if ((INT64_MAX - (data[j] - '0')) / 10 < number_) {
212           currentState_ = BENCODE_ERROR;
213           return lastError_ = ERR_NUMBER_OUT_OF_RANGE;
214         }
215         number_ *= 10;
216         number_ += data[j] - '0';
217       }
218       numConsumed_ += j - i;
219       if (j != size) {
220         if (numConsumed_ == 0) {
221           currentState_ = BENCODE_ERROR;
222           return lastError_ = ERR_INVALID_NUMBER;
223         }
224 
225         auto c = data[j];
226         if (util::isDigit(c) || c == '.' || c == 'E' || c == '+' || c == '-') {
227           // some torrent generator adds floating point number in
228           // scientific notation (e.g., -1.134E+3) in integer field.
229           // In this case, just skip these bytes until we find 'e'.
230           number_ = 0;
231           numConsumed_ = 0;
232           currentState_ = BENCODE_FLOAT_NUMBER_IGNORE;
233 
234           i = j;
235 
236           break;
237         }
238 
239         if (c != 'e') {
240           currentState_ = BENCODE_ERROR;
241           return lastError_ = ERR_INVALID_NUMBER;
242         }
243 
244         i = j;
245 
246         onNumberEnd();
247       }
248       else {
249         i = j - 1;
250       }
251       break;
252     }
253     case BENCODE_FLOAT_NUMBER_IGNORE: {
254       auto c = data[i];
255       if (util::isDigit(c) || c == '.' || c == 'E' || c == '+' || c == '-') {
256         continue;
257       }
258 
259       if (c != 'e') {
260         currentState_ = BENCODE_ERROR;
261         return lastError_ = ERR_INVALID_FLOAT_NUMBER;
262       }
263 
264       onNumberEnd();
265       break;
266     }
267     }
268   }
269   return i;
270 }
271 
parseFinal(const char * data,size_t len)272 ssize_t BencodeParser::parseFinal(const char* data, size_t len)
273 {
274   ssize_t rv;
275   rv = parseUpdate(data, len);
276   if (rv >= 0) {
277     if (currentState_ != BENCODE_FINISH && currentState_ != BENCODE_INITIAL) {
278       rv = ERR_PREMATURE_DATA;
279     }
280   }
281   return rv;
282 }
283 
reset()284 void BencodeParser::reset()
285 {
286   psm_->reset();
287   currentState_ = BENCODE_INITIAL;
288   lastError_ = 0;
289   while (!stateStack_.empty()) {
290     stateStack_.pop();
291   }
292   stateStack_.push(BENCODE_FINISH);
293 }
294 
onStringEnd()295 void BencodeParser::onStringEnd()
296 {
297   runEndCallback(stateTop() == BENCODE_DICT_KEY ? STRUCT_DICT_KEY_T
298                                                 : STRUCT_STRING_T);
299   onValueEnd();
300 }
301 
onNumberEnd()302 void BencodeParser::onNumberEnd()
303 {
304   runNumberCallback(numberSign_ * number_);
305   runEndCallback(STRUCT_NUMBER_T);
306   onValueEnd();
307 }
308 
onDictEnd()309 void BencodeParser::onDictEnd()
310 {
311   runEndCallback(STRUCT_DICT_T);
312   onValueEnd();
313 }
314 
onListEnd()315 void BencodeParser::onListEnd()
316 {
317   runEndCallback(STRUCT_ARRAY_T);
318   onValueEnd();
319 }
320 
onValueEnd()321 void BencodeParser::onValueEnd()
322 {
323   switch (stateTop()) {
324   case BENCODE_DICT_KEY:
325     popState();
326     pushState(BENCODE_DICT_VAL);
327     currentState_ = BENCODE_VALUE;
328     runBeginCallback(STRUCT_DICT_DATA_T);
329     break;
330   case BENCODE_DICT_VAL:
331     runEndCallback(STRUCT_DICT_DATA_T);
332     popState();
333     currentState_ = BENCODE_DICT_KEY;
334     break;
335   case BENCODE_LIST:
336     runEndCallback(STRUCT_ARRAY_DATA_T);
337     popState();
338     currentState_ = BENCODE_LIST;
339     break;
340   default:
341     assert(stateTop() == BENCODE_FINISH);
342     currentState_ = stateTop();
343     break;
344   }
345 }
346 
pushState(int state)347 int BencodeParser::pushState(int state)
348 {
349   if (stateStack_.size() >= 50) {
350     return ERR_STRUCTURE_TOO_DEEP;
351   }
352   else {
353     stateStack_.push(state);
354     return 0;
355   }
356 }
357 
stateTop() const358 int BencodeParser::stateTop() const { return stateStack_.top(); }
359 
popState()360 int BencodeParser::popState()
361 {
362   int state = stateStack_.top();
363   stateStack_.pop();
364   return state;
365 }
366 
runBeginCallback(int elementType)367 void BencodeParser::runBeginCallback(int elementType)
368 {
369   // switch(elementType) {
370   // case STRUCT_DICT_T:
371   //   std::cout << "object start" << std::endl;
372   //   break;
373   // case STRUCT_DICT_KEY_T:
374   //   std::cout << "object key start" << std::endl;
375   //   break;
376   // case STRUCT_DICT_DATA_T:
377   //   std::cout << "object data start" << std::endl;
378   //   break;
379   // case STRUCT_ARRAY_T:
380   //   std::cout << "array start" << std::endl;
381   //   break;
382   // case STRUCT_ARRAY_DATA_T:
383   //   std::cout << "array data start" << std::endl;
384   //   break;
385   // case STRUCT_STRING_T:
386   //   std::cout << "string start" << std::endl;
387   //   break;
388   // case STRUCT_NUMBER_T:
389   //   std::cout << "number start" << std::endl;
390   //   break;
391   // case STRUCT_BOOL_T:
392   //   std::cout << "bool start" << std::endl;
393   //   break;
394   // case STRUCT_NULL_T:
395   //   std::cout << "null start" << std::endl;
396   //   break;
397   // default:
398   //   break;
399   // };
400   psm_->beginElement(elementType);
401 }
402 
runEndCallback(int elementType)403 void BencodeParser::runEndCallback(int elementType)
404 {
405   // switch(elementType) {
406   // case STRUCT_DICT_T:
407   //   std::cout << "object end" << std::endl;
408   //   break;
409   // case STRUCT_DICT_KEY_T:
410   //   std::cout << "object key end" << std::endl;
411   //   break;
412   // case STRUCT_DICT_DATA_T:
413   //   std::cout << "object data end" << std::endl;
414   //   break;
415   // case STRUCT_ARRAY_T:
416   //   std::cout << "array end" << std::endl;
417   //   break;
418   // case STRUCT_ARRAY_DATA_T:
419   //   std::cout << "array data end" << std::endl;
420   //   break;
421   // case STRUCT_STRING_T:
422   //   std::cout << "string end" << std::endl;
423   //   break;
424   // case STRUCT_NUMBER_T:
425   //   std::cout << "number end" << std::endl;
426   //   break;
427   // case STRUCT_BOOL_T:
428   //   std::cout << "bool end" << std::endl;
429   //   break;
430   // case STRUCT_NULL_T:
431   //   std::cout << "null end" << std::endl;
432   //   break;
433   // default:
434   //   break;
435   // };
436   psm_->endElement(elementType);
437 }
438 
runCharactersCallback(const char * data,size_t len)439 void BencodeParser::runCharactersCallback(const char* data, size_t len)
440 {
441   psm_->charactersCallback(data, len);
442 }
443 
runNumberCallback(int64_t number)444 void BencodeParser::runNumberCallback(int64_t number)
445 {
446   psm_->numberCallback(number, 0, 0);
447 }
448 
449 } // namespace bittorrent
450 
451 } // namespace aria2
452