1 /* <!-- copyright */
2 /*
3 * aria2 - The high speed download utility
4 *
5 * Copyright (C) 2012 Tatsuhiro Tsujikawa
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 *
21 * In addition, as a special exception, the copyright holders give
22 * permission to link the code of portions of this program with the
23 * OpenSSL library under certain conditions as described in each
24 * individual source file, and distribute linked combinations
25 * including the two.
26 * You must obey the GNU General Public License in all respects
27 * for all of the code used other than OpenSSL. If you modify
28 * file(s) with this exception, you may extend this exception to your
29 * version of the file(s), but you are not obligated to do so. If you
30 * do not wish to do so, delete this exception statement from your
31 * version. If you delete this exception statement from all source
32 * files in the program, then also delete it here.
33 */
34 /* copyright --> */
35 #include "HttpHeaderProcessor.h"
36
37 #include <vector>
38
39 #include "HttpHeader.h"
40 #include "message.h"
41 #include "util.h"
42 #include "DlRetryEx.h"
43 #include "DlAbortEx.h"
44 #include "A2STR.h"
45 #include "error_code.h"
46
47 namespace aria2 {
48
49 namespace {
50 enum {
51 // Server mode
52 PREV_METHOD,
53 METHOD,
54 PREV_PATH,
55 PATH,
56 PREV_REQ_VERSION,
57 REQ_VERSION,
58 // Client mode,
59 PREV_RES_VERSION,
60 RES_VERSION,
61 PREV_STATUS_CODE,
62 STATUS_CODE,
63 PREV_REASON_PHRASE,
64 REASON_PHRASE,
65 // name/value header fields
66 PREV_EOL,
67 PREV_FIELD_NAME,
68 FIELD_NAME,
69 PREV_FIELD_VALUE,
70 FIELD_VALUE,
71 // End of header
72 PREV_EOH,
73 HEADERS_COMPLETE
74 };
75 } // namespace
76
HttpHeaderProcessor(ParserMode mode)77 HttpHeaderProcessor::HttpHeaderProcessor(ParserMode mode)
78 : mode_(mode),
79 state_(mode == CLIENT_PARSER ? PREV_RES_VERSION : PREV_METHOD),
80 lastBytesProcessed_(0),
81 lastFieldHdKey_(HttpHeader::MAX_INTERESTING_HEADER),
82 result_(make_unique<HttpHeader>())
83 {
84 }
85
86 HttpHeaderProcessor::~HttpHeaderProcessor() = default;
87
88 namespace {
getToken(std::string & buf,const unsigned char * data,size_t length,size_t off)89 size_t getToken(std::string& buf, const unsigned char* data, size_t length,
90 size_t off)
91 {
92 size_t j = off;
93 while (j < length && !util::isLws(data[j]) && !util::isCRLF(data[j])) {
94 ++j;
95 }
96 buf.append(&data[off], &data[j]);
97 return j - 1;
98 }
99 } // namespace
100
101 namespace {
getFieldNameToken(std::string & buf,const unsigned char * data,size_t length,size_t off)102 size_t getFieldNameToken(std::string& buf, const unsigned char* data,
103 size_t length, size_t off)
104 {
105 size_t j = off;
106 while (j < length && data[j] != ':' && !util::isLws(data[j]) &&
107 !util::isCRLF(data[j])) {
108 ++j;
109 }
110 buf.append(&data[off], &data[j]);
111 return j - 1;
112 }
113 } // namespace
114
115 namespace {
getText(std::string & buf,const unsigned char * data,size_t length,size_t off)116 size_t getText(std::string& buf, const unsigned char* data, size_t length,
117 size_t off)
118 {
119 size_t j = off;
120 while (j < length && !util::isCRLF(data[j])) {
121 ++j;
122 }
123 buf.append(&data[off], &data[j]);
124 return j - 1;
125 }
126 } // namespace
127
128 namespace {
ignoreText(std::string & buf,const unsigned char * data,size_t length,size_t off)129 size_t ignoreText(std::string& buf, const unsigned char* data, size_t length,
130 size_t off)
131 {
132 size_t j = off;
133 while (j < length && !util::isCRLF(data[j])) {
134 ++j;
135 }
136 return j - 1;
137 }
138 } // namespace
139
parse(const unsigned char * data,size_t length)140 bool HttpHeaderProcessor::parse(const unsigned char* data, size_t length)
141 {
142 size_t i;
143 lastBytesProcessed_ = 0;
144 for (i = 0; i < length; ++i) {
145 unsigned char c = data[i];
146 switch (state_) {
147 case PREV_METHOD:
148 if (util::isLws(c) || util::isCRLF(c)) {
149 throw DL_ABORT_EX("Bad Request-Line: missing method");
150 }
151
152 i = getToken(buf_, data, length, i);
153 state_ = METHOD;
154 break;
155
156 case METHOD:
157 if (util::isLws(c)) {
158 result_->setMethod(buf_);
159 buf_.clear();
160 state_ = PREV_PATH;
161 break;
162 }
163
164 if (util::isCRLF(c)) {
165 throw DL_ABORT_EX("Bad Request-Line: missing request-target");
166 }
167
168 i = getToken(buf_, data, length, i);
169 break;
170
171 case PREV_PATH:
172 if (util::isCRLF(c)) {
173 throw DL_ABORT_EX("Bad Request-Line: missing request-target");
174 }
175
176 if (util::isLws(c)) {
177 break;
178 }
179
180 i = getToken(buf_, data, length, i);
181 state_ = PATH;
182 break;
183
184 case PATH:
185 if (util::isLws(c)) {
186 result_->setRequestPath(buf_);
187 buf_.clear();
188 state_ = PREV_REQ_VERSION;
189 break;
190 }
191
192 if (util::isCRLF(c)) {
193 throw DL_ABORT_EX("Bad Request-Line: missing HTTP-version");
194 }
195
196 i = getToken(buf_, data, length, i);
197 break;
198
199 case PREV_REQ_VERSION:
200 if (util::isCRLF(c)) {
201 throw DL_ABORT_EX("Bad Request-Line: missing HTTP-version");
202 }
203
204 if (util::isLws(c)) {
205 break;
206 }
207
208 i = getToken(buf_, data, length, i);
209 state_ = REQ_VERSION;
210 break;
211
212 case REQ_VERSION:
213 if (util::isCRLF(c)) {
214 result_->setVersion(buf_);
215 buf_.clear();
216 state_ = c == '\n' ? PREV_FIELD_NAME : PREV_EOL;
217 break;
218 }
219
220 if (util::isLws(c)) {
221 throw DL_ABORT_EX("Bad Request-Line: LWS after HTTP-version");
222 }
223
224 i = getToken(buf_, data, length, i);
225 break;
226
227 case PREV_RES_VERSION:
228 if (util::isLws(c) || util::isCRLF(c)) {
229 throw DL_ABORT_EX("Bad Status-Line: missing HTTP-version");
230 }
231
232 i = getToken(buf_, data, length, i);
233 state_ = RES_VERSION;
234 break;
235
236 case RES_VERSION:
237 if (util::isLws(c)) {
238 result_->setVersion(buf_);
239 buf_.clear();
240 state_ = PREV_STATUS_CODE;
241 break;
242 }
243
244 if (util::isCRLF(c)) {
245 throw DL_ABORT_EX("Bad Status-Line: missing status-code");
246 }
247
248 break;
249
250 case PREV_STATUS_CODE:
251 if (util::isCRLF(c)) {
252 throw DL_ABORT_EX("Bad Status-Line: missing status-code");
253 }
254
255 if (!util::isLws(c)) {
256 state_ = STATUS_CODE;
257 i = getToken(buf_, data, length, i);
258 }
259
260 break;
261
262 case STATUS_CODE:
263 if (!util::isLws(c) && !util::isCRLF(c)) {
264 i = getToken(buf_, data, length, i);
265 break;
266 }
267
268 {
269 int statusCode = -1;
270 if (buf_.size() == 3 && util::isNumber(buf_.begin(), buf_.end())) {
271 statusCode =
272 (buf_[0] - '0') * 100 + (buf_[1] - '0') * 10 + (buf_[2] - '0');
273 }
274 if (statusCode < 100) {
275 throw DL_ABORT_EX("Bad status code: bad status-code");
276 }
277 result_->setStatusCode(statusCode);
278 buf_.clear();
279 }
280 if (c == '\r') {
281 state_ = PREV_EOL;
282 break;
283 }
284
285 if (c == '\n') {
286 state_ = PREV_FIELD_NAME;
287 break;
288 }
289
290 state_ = PREV_REASON_PHRASE;
291 break;
292
293 case PREV_REASON_PHRASE:
294 if (util::isCRLF(c)) {
295 // The reason-phrase is completely optional.
296 state_ = c == '\n' ? PREV_FIELD_NAME : PREV_EOL;
297 break;
298 }
299
300 if (util::isLws(c)) {
301 break;
302 }
303
304 state_ = REASON_PHRASE;
305 i = getText(buf_, data, length, i);
306 break;
307
308 case REASON_PHRASE:
309 if (util::isCRLF(c)) {
310 result_->setReasonPhrase(buf_);
311 buf_.clear();
312 state_ = c == '\n' ? PREV_FIELD_NAME : PREV_EOL;
313 break;
314 }
315
316 i = getText(buf_, data, length, i);
317 break;
318
319 case PREV_EOL:
320 if (c != '\n') {
321 throw DL_ABORT_EX("Bad HTTP header: missing LF");
322 }
323
324 state_ = PREV_FIELD_NAME;
325 break;
326
327 case PREV_FIELD_NAME:
328 if (util::isLws(c)) {
329 if (lastFieldName_.empty()) {
330 throw DL_ABORT_EX("Bad HTTP header: field name starts with LWS");
331 }
332 // Evil Multi-line header field
333 state_ = FIELD_VALUE;
334 break;
335 }
336
337 if (!lastFieldName_.empty()) {
338 if (lastFieldHdKey_ != HttpHeader::MAX_INTERESTING_HEADER) {
339 result_->put(lastFieldHdKey_, util::strip(buf_));
340 }
341 lastFieldName_.clear();
342 lastFieldHdKey_ = HttpHeader::MAX_INTERESTING_HEADER;
343 buf_.clear();
344 }
345 if (c == '\n') {
346 state_ = HEADERS_COMPLETE;
347 break;
348 }
349
350 if (c == '\r') {
351 state_ = PREV_EOH;
352 break;
353 }
354
355 if (c == ':') {
356 throw DL_ABORT_EX("Bad HTTP header: field name starts with ':'");
357 }
358
359 state_ = FIELD_NAME;
360 i = getFieldNameToken(lastFieldName_, data, length, i);
361 break;
362
363 case FIELD_NAME:
364 if (util::isLws(c) || util::isCRLF(c)) {
365 throw DL_ABORT_EX("Bad HTTP header: missing ':'");
366 }
367
368 if (c == ':') {
369 util::lowercase(lastFieldName_);
370 lastFieldHdKey_ = idInterestingHeader(lastFieldName_.c_str());
371 state_ = PREV_FIELD_VALUE;
372 break;
373 }
374
375 i = getFieldNameToken(lastFieldName_, data, length, i);
376 break;
377
378 case PREV_FIELD_VALUE:
379 if (c == '\r') {
380 state_ = PREV_EOL;
381 break;
382 }
383
384 if (c == '\n') {
385 state_ = PREV_FIELD_NAME;
386 break;
387 }
388
389 if (util::isLws(c)) {
390 break;
391 }
392
393 state_ = FIELD_VALUE;
394 if (lastFieldHdKey_ == HttpHeader::MAX_INTERESTING_HEADER) {
395 i = ignoreText(buf_, data, length, i);
396 break;
397 }
398
399 i = getText(buf_, data, length, i);
400 break;
401
402 case FIELD_VALUE:
403 if (c == '\r') {
404 state_ = PREV_EOL;
405 break;
406 }
407
408 if (c == '\n') {
409 state_ = PREV_FIELD_NAME;
410 break;
411 }
412
413 if (lastFieldHdKey_ == HttpHeader::MAX_INTERESTING_HEADER) {
414 i = ignoreText(buf_, data, length, i);
415 break;
416 }
417
418 i = getText(buf_, data, length, i);
419 break;
420
421 case PREV_EOH:
422 if (c != '\n') {
423 throw DL_ABORT_EX("Bad HTTP header: "
424 "missing LF at the end of the header");
425 }
426
427 state_ = HEADERS_COMPLETE;
428 break;
429
430 case HEADERS_COMPLETE:
431 goto fin;
432 }
433 }
434
435 fin:
436 // See Apache's documentation
437 // http://httpd.apache.org/docs/2.2/en/mod/core.html about size
438 // limit of HTTP headers. The page states that the number of request
439 // fields rarely exceeds 20.
440 if (lastFieldName_.size() > 1024 || buf_.size() > 8_k) {
441 throw DL_ABORT_EX("Too large HTTP header");
442 }
443
444 lastBytesProcessed_ = i;
445 headers_.append(&data[0], &data[i]);
446
447 if (state_ != HEADERS_COMPLETE) {
448 return false;
449 }
450
451 // If both transfer-encoding and (content-length or content-range)
452 // are present, delete content-length and content-range. RFC 7230
453 // says that sender must not send both transfer-encoding and
454 // content-length. If both present, transfer-encoding overrides
455 // content-length. There is no text about transfer-encoding and
456 // content-range. But there is no reason to send transfer-encoding
457 // when range is set.
458 if (result_->defined(HttpHeader::TRANSFER_ENCODING)) {
459 result_->remove(HttpHeader::CONTENT_LENGTH);
460 result_->remove(HttpHeader::CONTENT_RANGE);
461 }
462
463 return true;
464 }
465
parse(const std::string & data)466 bool HttpHeaderProcessor::parse(const std::string& data)
467 {
468 return parse(reinterpret_cast<const unsigned char*>(data.c_str()),
469 data.size());
470 }
471
getLastBytesProcessed() const472 size_t HttpHeaderProcessor::getLastBytesProcessed() const
473 {
474 return lastBytesProcessed_;
475 }
476
clear()477 void HttpHeaderProcessor::clear()
478 {
479 state_ = (mode_ == CLIENT_PARSER ? PREV_RES_VERSION : PREV_METHOD);
480 lastBytesProcessed_ = 0;
481 buf_.clear();
482 lastFieldName_.clear();
483 lastFieldHdKey_ = HttpHeader::MAX_INTERESTING_HEADER;
484 result_ = make_unique<HttpHeader>();
485 headers_.clear();
486 }
487
getResult()488 std::unique_ptr<HttpHeader> HttpHeaderProcessor::getResult()
489 {
490 return std::move(result_);
491 }
492
getHeaderString() const493 std::string HttpHeaderProcessor::getHeaderString() const { return headers_; }
494
495 } // namespace aria2
496