1 /* <!-- copyright */
2 /*
3  * aria2 - The high speed download utility
4  *
5  * Copyright (C) 2012 Tatsuhiro Tsujikawa
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  *
21  * In addition, as a special exception, the copyright holders give
22  * permission to link the code of portions of this program with the
23  * OpenSSL library under certain conditions as described in each
24  * individual source file, and distribute linked combinations
25  * including the two.
26  * You must obey the GNU General Public License in all respects
27  * for all of the code used other than OpenSSL.  If you modify
28  * file(s) with this exception, you may extend this exception to your
29  * version of the file(s), but you are not obligated to do so.  If you
30  * do not wish to do so, delete this exception statement from your
31  * version.  If you delete this exception statement from all source
32  * files in the program, then also delete it here.
33  */
34 /* copyright --> */
35 #include "HttpHeaderProcessor.h"
36 
37 #include <vector>
38 
39 #include "HttpHeader.h"
40 #include "message.h"
41 #include "util.h"
42 #include "DlRetryEx.h"
43 #include "DlAbortEx.h"
44 #include "A2STR.h"
45 #include "error_code.h"
46 
47 namespace aria2 {
48 
49 namespace {
50 enum {
51   // Server mode
52   PREV_METHOD,
53   METHOD,
54   PREV_PATH,
55   PATH,
56   PREV_REQ_VERSION,
57   REQ_VERSION,
58   // Client mode,
59   PREV_RES_VERSION,
60   RES_VERSION,
61   PREV_STATUS_CODE,
62   STATUS_CODE,
63   PREV_REASON_PHRASE,
64   REASON_PHRASE,
65   // name/value header fields
66   PREV_EOL,
67   PREV_FIELD_NAME,
68   FIELD_NAME,
69   PREV_FIELD_VALUE,
70   FIELD_VALUE,
71   // End of header
72   PREV_EOH,
73   HEADERS_COMPLETE
74 };
75 } // namespace
76 
HttpHeaderProcessor(ParserMode mode)77 HttpHeaderProcessor::HttpHeaderProcessor(ParserMode mode)
78     : mode_(mode),
79       state_(mode == CLIENT_PARSER ? PREV_RES_VERSION : PREV_METHOD),
80       lastBytesProcessed_(0),
81       lastFieldHdKey_(HttpHeader::MAX_INTERESTING_HEADER),
82       result_(make_unique<HttpHeader>())
83 {
84 }
85 
86 HttpHeaderProcessor::~HttpHeaderProcessor() = default;
87 
88 namespace {
getToken(std::string & buf,const unsigned char * data,size_t length,size_t off)89 size_t getToken(std::string& buf, const unsigned char* data, size_t length,
90                 size_t off)
91 {
92   size_t j = off;
93   while (j < length && !util::isLws(data[j]) && !util::isCRLF(data[j])) {
94     ++j;
95   }
96   buf.append(&data[off], &data[j]);
97   return j - 1;
98 }
99 } // namespace
100 
101 namespace {
getFieldNameToken(std::string & buf,const unsigned char * data,size_t length,size_t off)102 size_t getFieldNameToken(std::string& buf, const unsigned char* data,
103                          size_t length, size_t off)
104 {
105   size_t j = off;
106   while (j < length && data[j] != ':' && !util::isLws(data[j]) &&
107          !util::isCRLF(data[j])) {
108     ++j;
109   }
110   buf.append(&data[off], &data[j]);
111   return j - 1;
112 }
113 } // namespace
114 
115 namespace {
getText(std::string & buf,const unsigned char * data,size_t length,size_t off)116 size_t getText(std::string& buf, const unsigned char* data, size_t length,
117                size_t off)
118 {
119   size_t j = off;
120   while (j < length && !util::isCRLF(data[j])) {
121     ++j;
122   }
123   buf.append(&data[off], &data[j]);
124   return j - 1;
125 }
126 } // namespace
127 
128 namespace {
ignoreText(std::string & buf,const unsigned char * data,size_t length,size_t off)129 size_t ignoreText(std::string& buf, const unsigned char* data, size_t length,
130                   size_t off)
131 {
132   size_t j = off;
133   while (j < length && !util::isCRLF(data[j])) {
134     ++j;
135   }
136   return j - 1;
137 }
138 } // namespace
139 
parse(const unsigned char * data,size_t length)140 bool HttpHeaderProcessor::parse(const unsigned char* data, size_t length)
141 {
142   size_t i;
143   lastBytesProcessed_ = 0;
144   for (i = 0; i < length; ++i) {
145     unsigned char c = data[i];
146     switch (state_) {
147     case PREV_METHOD:
148       if (util::isLws(c) || util::isCRLF(c)) {
149         throw DL_ABORT_EX("Bad Request-Line: missing method");
150       }
151 
152       i = getToken(buf_, data, length, i);
153       state_ = METHOD;
154       break;
155 
156     case METHOD:
157       if (util::isLws(c)) {
158         result_->setMethod(buf_);
159         buf_.clear();
160         state_ = PREV_PATH;
161         break;
162       }
163 
164       if (util::isCRLF(c)) {
165         throw DL_ABORT_EX("Bad Request-Line: missing request-target");
166       }
167 
168       i = getToken(buf_, data, length, i);
169       break;
170 
171     case PREV_PATH:
172       if (util::isCRLF(c)) {
173         throw DL_ABORT_EX("Bad Request-Line: missing request-target");
174       }
175 
176       if (util::isLws(c)) {
177         break;
178       }
179 
180       i = getToken(buf_, data, length, i);
181       state_ = PATH;
182       break;
183 
184     case PATH:
185       if (util::isLws(c)) {
186         result_->setRequestPath(buf_);
187         buf_.clear();
188         state_ = PREV_REQ_VERSION;
189         break;
190       }
191 
192       if (util::isCRLF(c)) {
193         throw DL_ABORT_EX("Bad Request-Line: missing HTTP-version");
194       }
195 
196       i = getToken(buf_, data, length, i);
197       break;
198 
199     case PREV_REQ_VERSION:
200       if (util::isCRLF(c)) {
201         throw DL_ABORT_EX("Bad Request-Line: missing HTTP-version");
202       }
203 
204       if (util::isLws(c)) {
205         break;
206       }
207 
208       i = getToken(buf_, data, length, i);
209       state_ = REQ_VERSION;
210       break;
211 
212     case REQ_VERSION:
213       if (util::isCRLF(c)) {
214         result_->setVersion(buf_);
215         buf_.clear();
216         state_ = c == '\n' ? PREV_FIELD_NAME : PREV_EOL;
217         break;
218       }
219 
220       if (util::isLws(c)) {
221         throw DL_ABORT_EX("Bad Request-Line: LWS after HTTP-version");
222       }
223 
224       i = getToken(buf_, data, length, i);
225       break;
226 
227     case PREV_RES_VERSION:
228       if (util::isLws(c) || util::isCRLF(c)) {
229         throw DL_ABORT_EX("Bad Status-Line: missing HTTP-version");
230       }
231 
232       i = getToken(buf_, data, length, i);
233       state_ = RES_VERSION;
234       break;
235 
236     case RES_VERSION:
237       if (util::isLws(c)) {
238         result_->setVersion(buf_);
239         buf_.clear();
240         state_ = PREV_STATUS_CODE;
241         break;
242       }
243 
244       if (util::isCRLF(c)) {
245         throw DL_ABORT_EX("Bad Status-Line: missing status-code");
246       }
247 
248       break;
249 
250     case PREV_STATUS_CODE:
251       if (util::isCRLF(c)) {
252         throw DL_ABORT_EX("Bad Status-Line: missing status-code");
253       }
254 
255       if (!util::isLws(c)) {
256         state_ = STATUS_CODE;
257         i = getToken(buf_, data, length, i);
258       }
259 
260       break;
261 
262     case STATUS_CODE:
263       if (!util::isLws(c) && !util::isCRLF(c)) {
264         i = getToken(buf_, data, length, i);
265         break;
266       }
267 
268       {
269         int statusCode = -1;
270         if (buf_.size() == 3 && util::isNumber(buf_.begin(), buf_.end())) {
271           statusCode =
272               (buf_[0] - '0') * 100 + (buf_[1] - '0') * 10 + (buf_[2] - '0');
273         }
274         if (statusCode < 100) {
275           throw DL_ABORT_EX("Bad status code: bad status-code");
276         }
277         result_->setStatusCode(statusCode);
278         buf_.clear();
279       }
280       if (c == '\r') {
281         state_ = PREV_EOL;
282         break;
283       }
284 
285       if (c == '\n') {
286         state_ = PREV_FIELD_NAME;
287         break;
288       }
289 
290       state_ = PREV_REASON_PHRASE;
291       break;
292 
293     case PREV_REASON_PHRASE:
294       if (util::isCRLF(c)) {
295         // The reason-phrase is completely optional.
296         state_ = c == '\n' ? PREV_FIELD_NAME : PREV_EOL;
297         break;
298       }
299 
300       if (util::isLws(c)) {
301         break;
302       }
303 
304       state_ = REASON_PHRASE;
305       i = getText(buf_, data, length, i);
306       break;
307 
308     case REASON_PHRASE:
309       if (util::isCRLF(c)) {
310         result_->setReasonPhrase(buf_);
311         buf_.clear();
312         state_ = c == '\n' ? PREV_FIELD_NAME : PREV_EOL;
313         break;
314       }
315 
316       i = getText(buf_, data, length, i);
317       break;
318 
319     case PREV_EOL:
320       if (c != '\n') {
321         throw DL_ABORT_EX("Bad HTTP header: missing LF");
322       }
323 
324       state_ = PREV_FIELD_NAME;
325       break;
326 
327     case PREV_FIELD_NAME:
328       if (util::isLws(c)) {
329         if (lastFieldName_.empty()) {
330           throw DL_ABORT_EX("Bad HTTP header: field name starts with LWS");
331         }
332         // Evil Multi-line header field
333         state_ = FIELD_VALUE;
334         break;
335       }
336 
337       if (!lastFieldName_.empty()) {
338         if (lastFieldHdKey_ != HttpHeader::MAX_INTERESTING_HEADER) {
339           result_->put(lastFieldHdKey_, util::strip(buf_));
340         }
341         lastFieldName_.clear();
342         lastFieldHdKey_ = HttpHeader::MAX_INTERESTING_HEADER;
343         buf_.clear();
344       }
345       if (c == '\n') {
346         state_ = HEADERS_COMPLETE;
347         break;
348       }
349 
350       if (c == '\r') {
351         state_ = PREV_EOH;
352         break;
353       }
354 
355       if (c == ':') {
356         throw DL_ABORT_EX("Bad HTTP header: field name starts with ':'");
357       }
358 
359       state_ = FIELD_NAME;
360       i = getFieldNameToken(lastFieldName_, data, length, i);
361       break;
362 
363     case FIELD_NAME:
364       if (util::isLws(c) || util::isCRLF(c)) {
365         throw DL_ABORT_EX("Bad HTTP header: missing ':'");
366       }
367 
368       if (c == ':') {
369         util::lowercase(lastFieldName_);
370         lastFieldHdKey_ = idInterestingHeader(lastFieldName_.c_str());
371         state_ = PREV_FIELD_VALUE;
372         break;
373       }
374 
375       i = getFieldNameToken(lastFieldName_, data, length, i);
376       break;
377 
378     case PREV_FIELD_VALUE:
379       if (c == '\r') {
380         state_ = PREV_EOL;
381         break;
382       }
383 
384       if (c == '\n') {
385         state_ = PREV_FIELD_NAME;
386         break;
387       }
388 
389       if (util::isLws(c)) {
390         break;
391       }
392 
393       state_ = FIELD_VALUE;
394       if (lastFieldHdKey_ == HttpHeader::MAX_INTERESTING_HEADER) {
395         i = ignoreText(buf_, data, length, i);
396         break;
397       }
398 
399       i = getText(buf_, data, length, i);
400       break;
401 
402     case FIELD_VALUE:
403       if (c == '\r') {
404         state_ = PREV_EOL;
405         break;
406       }
407 
408       if (c == '\n') {
409         state_ = PREV_FIELD_NAME;
410         break;
411       }
412 
413       if (lastFieldHdKey_ == HttpHeader::MAX_INTERESTING_HEADER) {
414         i = ignoreText(buf_, data, length, i);
415         break;
416       }
417 
418       i = getText(buf_, data, length, i);
419       break;
420 
421     case PREV_EOH:
422       if (c != '\n') {
423         throw DL_ABORT_EX("Bad HTTP header: "
424                           "missing LF at the end of the header");
425       }
426 
427       state_ = HEADERS_COMPLETE;
428       break;
429 
430     case HEADERS_COMPLETE:
431       goto fin;
432     }
433   }
434 
435 fin:
436   // See Apache's documentation
437   // http://httpd.apache.org/docs/2.2/en/mod/core.html about size
438   // limit of HTTP headers. The page states that the number of request
439   // fields rarely exceeds 20.
440   if (lastFieldName_.size() > 1024 || buf_.size() > 8_k) {
441     throw DL_ABORT_EX("Too large HTTP header");
442   }
443 
444   lastBytesProcessed_ = i;
445   headers_.append(&data[0], &data[i]);
446 
447   if (state_ != HEADERS_COMPLETE) {
448     return false;
449   }
450 
451   // If both transfer-encoding and (content-length or content-range)
452   // are present, delete content-length and content-range.  RFC 7230
453   // says that sender must not send both transfer-encoding and
454   // content-length.  If both present, transfer-encoding overrides
455   // content-length.  There is no text about transfer-encoding and
456   // content-range.  But there is no reason to send transfer-encoding
457   // when range is set.
458   if (result_->defined(HttpHeader::TRANSFER_ENCODING)) {
459     result_->remove(HttpHeader::CONTENT_LENGTH);
460     result_->remove(HttpHeader::CONTENT_RANGE);
461   }
462 
463   return true;
464 }
465 
parse(const std::string & data)466 bool HttpHeaderProcessor::parse(const std::string& data)
467 {
468   return parse(reinterpret_cast<const unsigned char*>(data.c_str()),
469                data.size());
470 }
471 
getLastBytesProcessed() const472 size_t HttpHeaderProcessor::getLastBytesProcessed() const
473 {
474   return lastBytesProcessed_;
475 }
476 
clear()477 void HttpHeaderProcessor::clear()
478 {
479   state_ = (mode_ == CLIENT_PARSER ? PREV_RES_VERSION : PREV_METHOD);
480   lastBytesProcessed_ = 0;
481   buf_.clear();
482   lastFieldName_.clear();
483   lastFieldHdKey_ = HttpHeader::MAX_INTERESTING_HEADER;
484   result_ = make_unique<HttpHeader>();
485   headers_.clear();
486 }
487 
getResult()488 std::unique_ptr<HttpHeader> HttpHeaderProcessor::getResult()
489 {
490   return std::move(result_);
491 }
492 
getHeaderString() const493 std::string HttpHeaderProcessor::getHeaderString() const { return headers_; }
494 
495 } // namespace aria2
496