1 /***************************************************************************
2 * Copyright (c) 2009-2010 Open Information Security Foundation
3 * Copyright (c) 2010-2013 Qualys, Inc.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are
8 * met:
9 *
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12
13 * - Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16
17 * - Neither the name of the Qualys, Inc. nor the names of its
18 * contributors may be used to endorse or promote products derived from
19 * this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 ***************************************************************************/
33
34 /**
35 * @file
36 * @author Ivan Ristic <ivanr@webkreator.com>
37 */
38
39 #include "htp_config_auto.h"
40
41 #include "htp_private.h"
42
43 /**
44 * Determines the type of a Content-Disposition parameter.
45 *
46 * @param[in] data
47 * @param[in] startpos
48 * @param[in] pos
49 * @return CD_PARAM_OTHER, CD_PARAM_NAME or CD_PARAM_FILENAME.
50 */
htp_mpartp_cd_param_type(unsigned char * data,size_t startpos,size_t endpos)51 static int htp_mpartp_cd_param_type(unsigned char *data, size_t startpos, size_t endpos) {
52 if ((endpos - startpos) == 4) {
53 if (memcmp(data + startpos, "name", 4) == 0) return CD_PARAM_NAME;
54 } else if ((endpos - startpos) == 8) {
55 if (memcmp(data + startpos, "filename", 8) == 0) return CD_PARAM_FILENAME;
56 }
57
58 return CD_PARAM_OTHER;
59 }
60
htp_mpartp_get_multipart(htp_mpartp_t * parser)61 htp_multipart_t *htp_mpartp_get_multipart(htp_mpartp_t *parser) {
62 return &(parser->multipart);
63 }
64
65 /**
66 * Decodes a C-D header value. This is impossible to do correctly without a
67 * parsing personality because most browsers are broken:
68 * - Firefox encodes " as \", and \ is not encoded.
69 * - Chrome encodes " as %22.
70 * - IE encodes " as \", and \ is not encoded.
71 * - Opera encodes " as \" and \ as \\.
72 * @param[in] b
73 */
htp_mpart_decode_quoted_cd_value_inplace(bstr * b)74 static void htp_mpart_decode_quoted_cd_value_inplace(bstr *b) {
75 unsigned char *s = bstr_ptr(b);
76 unsigned char *d = bstr_ptr(b);
77 size_t len = bstr_len(b);
78 size_t pos = 0;
79
80 while (pos < len) {
81 // Ignore \ when before \ or ".
82 if ((*s == '\\')&&(pos + 1 < len)&&((*(s + 1) == '"')||(*(s + 1) == '\\'))) {
83 s++;
84 pos++;
85 }
86
87 *d++ = *s++;
88 pos++;
89 }
90
91 bstr_adjust_len(b, len - (s - d));
92 }
93
94 /**
95 * Parses the Content-Disposition part header.
96 *
97 * @param[in] part
98 * @return HTP_OK on success (header found and parsed), HTP_DECLINED if there is no C-D header or if
99 * it could not be processed, and HTP_ERROR on fatal error.
100 */
htp_mpart_part_parse_c_d(htp_multipart_part_t * part)101 htp_status_t htp_mpart_part_parse_c_d(htp_multipart_part_t *part) {
102 // Find the C-D header.
103 htp_header_t *h = htp_table_get_c(part->headers, "content-disposition");
104 if (h == NULL) {
105 part->parser->multipart.flags |= HTP_MULTIPART_PART_UNKNOWN;
106 return HTP_DECLINED;
107 }
108
109 // Require "form-data" at the beginning of the header.
110 if (bstr_index_of_c(h->value, "form-data") != 0) {
111 part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
112 return HTP_DECLINED;
113 }
114
115 // The parsing starts here.
116 unsigned char *data = bstr_ptr(h->value);
117 size_t len = bstr_len(h->value);
118 size_t pos = 9; // Start after "form-data"
119
120 // Main parameter parsing loop (once per parameter).
121 while (pos < len) {
122 // Ignore whitespace.
123 while ((pos < len) && isspace(data[pos])) pos++;
124 if (pos == len) {
125 part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
126 return HTP_DECLINED;
127 }
128
129 // Expecting a semicolon.
130 if (data[pos] != ';') {
131 part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
132 return HTP_DECLINED;
133 }
134 pos++;
135
136 // Go over the whitespace before parameter name.
137 while ((pos < len) && isspace(data[pos])) pos++;
138 if (pos == len) {
139 part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
140 return HTP_DECLINED;
141 }
142
143 // Found the starting position of the parameter name.
144 size_t start = pos;
145
146 // Look for the ending position.
147 while ((pos < len) && (!isspace(data[pos]) && (data[pos] != '='))) pos++;
148 if (pos == len) {
149 part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
150 return HTP_DECLINED;
151 }
152
153 // Ending position is in "pos" now.
154
155 // Determine parameter type ("name", "filename", or other).
156 int param_type = htp_mpartp_cd_param_type(data, start, pos);
157
158 // Ignore whitespace after parameter name, if any.
159 while ((pos < len) && isspace(data[pos])) pos++;
160 if (pos == len) {
161 part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
162 return HTP_DECLINED;
163 }
164
165 // Equals.
166 if (data[pos] != '=') {
167 part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
168 return HTP_DECLINED;
169 }
170 pos++;
171
172 // Go over the whitespace before the parameter value.
173 while ((pos < len) && isspace(data[pos])) pos++;
174 if (pos == len) {
175 part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
176 return HTP_DECLINED;
177 }
178
179 // Expecting a double quote.
180 if (data[pos] != '"') {
181 // Bare string or non-standard quoting, which we don't like.
182 part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
183 return HTP_DECLINED;
184 }
185
186 pos++; // Over the double quote.
187
188 // We have the starting position of the value.
189 start = pos;
190
191 // Find the end of the value.
192 while ((pos < len) && (data[pos] != '"')) {
193 // Check for escaping.
194 if (data[pos] == '\\') {
195 if (pos + 1 >= len) {
196 // A backslash as the last character in the C-D header.
197 part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
198 return HTP_DECLINED;
199 }
200
201 // Allow " and \ to be escaped.
202 if ((data[pos + 1] == '"')||(data[pos + 1] == '\\')) {
203 // Go over the quoted character.
204 pos++;
205 }
206 }
207
208 pos++;
209 }
210
211 // If we've reached the end of the string that means the
212 // value was not terminated properly (the second double quote is missing).
213 if (pos == len) {
214 part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
215 return HTP_DECLINED;
216 }
217
218 // Expecting the terminating double quote.
219 if (data[pos] != '"') {
220 part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
221 return HTP_DECLINED;
222 }
223
224 pos++; // Over the terminating double quote.
225
226 // Finally, process the parameter value.
227
228 switch (param_type) {
229 case CD_PARAM_NAME:
230 // Check that we have not seen the name parameter already.
231 if (part->name != NULL) {
232 part->parser->multipart.flags |= HTP_MULTIPART_CD_PARAM_REPEATED;
233 return HTP_DECLINED;
234 }
235
236 part->name = bstr_dup_mem(data + start, pos - start - 1);
237 if (part->name == NULL) return HTP_ERROR;
238
239 htp_mpart_decode_quoted_cd_value_inplace(part->name);
240
241 break;
242
243 case CD_PARAM_FILENAME:
244 // Check that we have not seen the filename parameter already.
245 if (part->file != NULL) {
246 part->parser->multipart.flags |= HTP_MULTIPART_CD_PARAM_REPEATED;
247 return HTP_DECLINED;
248 }
249
250 part->file = calloc(1, sizeof (htp_file_t));
251 if (part->file == NULL) return HTP_ERROR;
252
253 part->file->fd = -1;
254 part->file->source = HTP_FILE_MULTIPART;
255
256 part->file->filename = bstr_dup_mem(data + start, pos - start - 1);
257 if (part->file->filename == NULL) {
258 free(part->file);
259 return HTP_ERROR;
260 }
261
262 htp_mpart_decode_quoted_cd_value_inplace(part->file->filename);
263
264 break;
265
266 default:
267 // Unknown parameter.
268 part->parser->multipart.flags |= HTP_MULTIPART_CD_PARAM_UNKNOWN;
269 return HTP_DECLINED;
270 break;
271 }
272
273 // Continue to parse the next parameter, if any.
274 }
275
276 return HTP_OK;
277 }
278
279 /**
280 * Parses the Content-Type part header, if present.
281 *
282 * @param[in] part
283 * @return HTP_OK on success, HTP_DECLINED if the C-T header is not present, and HTP_ERROR on failure.
284 */
htp_mpart_part_parse_c_t(htp_multipart_part_t * part)285 static htp_status_t htp_mpart_part_parse_c_t(htp_multipart_part_t *part) {
286 htp_header_t *h = (htp_header_t *) htp_table_get_c(part->headers, "content-type");
287 if (h == NULL) return HTP_DECLINED;
288 return htp_parse_ct_header(h->value, &part->content_type);
289 }
290
291 /**
292 * Processes part headers.
293 *
294 * @param[in] part
295 * @return HTP_OK on success, HTP_ERROR on failure.
296 */
htp_mpart_part_process_headers(htp_multipart_part_t * part)297 htp_status_t htp_mpart_part_process_headers(htp_multipart_part_t *part) {
298 if (htp_mpart_part_parse_c_d(part) == HTP_ERROR) return HTP_ERROR;
299 if (htp_mpart_part_parse_c_t(part) == HTP_ERROR) return HTP_ERROR;
300
301 return HTP_OK;
302 }
303
304 /**
305 * Parses one part header.
306 *
307 * @param[in] part
308 * @param[in] data
309 * @param[in] len
310 * @return HTP_OK on success, HTP_DECLINED on parsing error, HTP_ERROR on fatal error.
311 */
htp_mpartp_parse_header(htp_multipart_part_t * part,const unsigned char * data,size_t len)312 htp_status_t htp_mpartp_parse_header(htp_multipart_part_t *part, const unsigned char *data, size_t len) {
313 size_t name_start, name_end;
314 size_t value_start, value_end;
315
316 // We do not allow NUL bytes here.
317 if (memchr(data, '\0', len) != NULL) {
318 part->parser->multipart.flags |= HTP_MULTIPART_NUL_BYTE;
319 return HTP_DECLINED;
320 }
321
322 name_start = 0;
323
324 // Look for the starting position of the name first.
325 size_t colon_pos = 0;
326
327 while ((colon_pos < len)&&(htp_is_space(data[colon_pos]))) colon_pos++;
328 if (colon_pos != 0) {
329 // Whitespace before header name.
330 part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID;
331 return HTP_DECLINED;
332 }
333
334 // Now look for the colon.
335 while ((colon_pos < len) && (data[colon_pos] != ':')) colon_pos++;
336
337 if (colon_pos == len) {
338 // Missing colon.
339 part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID;
340 return HTP_DECLINED;
341 }
342
343 if (colon_pos == 0) {
344 // Empty header name.
345 part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID;
346 return HTP_DECLINED;
347 }
348
349 name_end = colon_pos;
350
351 // Ignore LWS after header name.
352 size_t prev = name_end;
353 while ((prev > name_start) && (htp_is_lws(data[prev - 1]))) {
354 prev--;
355 name_end--;
356
357 // LWS after field name. Not allowing for now.
358 part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID;
359 return HTP_DECLINED;
360 }
361
362 // Header value.
363
364 value_start = colon_pos + 1;
365
366 // Ignore LWS before value.
367 while ((value_start < len) && (htp_is_lws(data[value_start]))) value_start++;
368
369 if (value_start == len) {
370 // No header value.
371 part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID;
372 return HTP_DECLINED;
373 }
374
375 // Assume the value is at the end.
376 value_end = len;
377
378 // Check that the header name is a token.
379 size_t i = name_start;
380 while (i < name_end) {
381 if (!htp_is_token(data[i])) {
382 part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID;
383 return HTP_DECLINED;
384 }
385
386 i++;
387 }
388
389 // Now extract the name and the value.
390 htp_header_t *h = calloc(1, sizeof (htp_header_t));
391 if (h == NULL) return HTP_ERROR;
392
393 h->name = bstr_dup_mem(data + name_start, name_end - name_start);
394 if (h->name == NULL) {
395 free(h);
396 return HTP_ERROR;
397 }
398
399 h->value = bstr_dup_mem(data + value_start, value_end - value_start);
400 if (h->value == NULL) {
401 bstr_free(h->name);
402 free(h);
403 return HTP_ERROR;
404 }
405
406 if ((bstr_cmp_c_nocase(h->name, "content-disposition") != 0) && (bstr_cmp_c_nocase(h->name, "content-type") != 0)) {
407 part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_UNKNOWN;
408 }
409
410 // Check if the header already exists.
411 htp_header_t * h_existing = htp_table_get(part->headers, h->name);
412 if (h_existing != NULL) {
413 // Add to the existing header.
414 bstr *new_value = bstr_expand(h_existing->value, bstr_len(h_existing->value)
415 + 2 + bstr_len(h->value));
416 if (new_value == NULL) {
417 bstr_free(h->name);
418 bstr_free(h->value);
419 free(h);
420 return HTP_ERROR;
421 }
422
423 h_existing->value = new_value;
424 bstr_add_mem_noex(h_existing->value, ", ", 2);
425 bstr_add_noex(h_existing->value, h->value);
426
427 // The header is no longer needed.
428 bstr_free(h->name);
429 bstr_free(h->value);
430 free(h);
431
432 // Keep track of same-name headers.
433 h_existing->flags |= HTP_MULTIPART_PART_HEADER_REPEATED;
434 part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_REPEATED;
435 } else {
436 // Add as a new header.
437 if (htp_table_add(part->headers, h->name, h) != HTP_OK) {
438 bstr_free(h->value);
439 bstr_free(h->name);
440 free(h);
441 return HTP_ERROR;
442 }
443 }
444
445 return HTP_OK;
446 }
447
448 /**
449 * Creates a new Multipart part.
450 *
451 * @param[in] parser
452 * @return New part instance, or NULL on memory allocation failure.
453 */
htp_mpart_part_create(htp_mpartp_t * parser)454 htp_multipart_part_t *htp_mpart_part_create(htp_mpartp_t *parser) {
455 htp_multipart_part_t * part = calloc(1, sizeof (htp_multipart_part_t));
456 if (part == NULL) return NULL;
457
458 part->headers = htp_table_create(4);
459 if (part->headers == NULL) {
460 free(part);
461 return NULL;
462 }
463
464 part->parser = parser;
465 bstr_builder_clear(parser->part_data_pieces);
466 bstr_builder_clear(parser->part_header_pieces);
467
468 return part;
469 }
470
471 /**
472 * Destroys a part.
473 *
474 * @param[in] part
475 * @param[in] gave_up_data
476 */
htp_mpart_part_destroy(htp_multipart_part_t * part,int gave_up_data)477 void htp_mpart_part_destroy(htp_multipart_part_t *part, int gave_up_data) {
478 if (part == NULL) return;
479
480 if (part->file != NULL) {
481 bstr_free(part->file->filename);
482
483 if (part->file->tmpname != NULL) {
484 unlink(part->file->tmpname);
485 free(part->file->tmpname);
486 }
487
488 free(part->file);
489 part->file = NULL;
490 }
491
492 if ((!gave_up_data) || (part->type != MULTIPART_PART_TEXT)) {
493 bstr_free(part->name);
494 bstr_free(part->value);
495 }
496
497 bstr_free(part->content_type);
498
499 if (part->headers != NULL) {
500 htp_header_t *h = NULL;
501 for (size_t i = 0, n = htp_table_size(part->headers); i < n; i++) {
502 h = htp_table_get_index(part->headers, i, NULL);
503 bstr_free(h->name);
504 bstr_free(h->value);
505 free(h);
506 }
507
508 htp_table_destroy(part->headers);
509 }
510
511 free(part);
512 }
513
514 /**
515 * Finalizes part processing.
516 *
517 * @param[in] part
518 * @return HTP_OK on success, HTP_ERROR on failure.
519 */
htp_mpart_part_finalize_data(htp_multipart_part_t * part)520 htp_status_t htp_mpart_part_finalize_data(htp_multipart_part_t *part) {
521 // Determine if this part is the epilogue.
522
523 if (part->parser->multipart.flags & HTP_MULTIPART_SEEN_LAST_BOUNDARY) {
524 if (part->type == MULTIPART_PART_UNKNOWN) {
525 // Assume that the unknown part after the last boundary is the epilogue.
526 part->parser->current_part->type = MULTIPART_PART_EPILOGUE;
527
528 // But if we've already seen a part we thought was the epilogue,
529 // raise HTP_MULTIPART_PART_UNKNOWN. Multiple epilogues are not allowed.
530 if (part->parser->multipart.flags & HTP_MULTIPART_HAS_EPILOGUE) {
531 part->parser->multipart.flags |= HTP_MULTIPART_PART_UNKNOWN;
532 }
533
534 part->parser->multipart.flags |= HTP_MULTIPART_HAS_EPILOGUE;
535 } else {
536 part->parser->multipart.flags |= HTP_MULTIPART_PART_AFTER_LAST_BOUNDARY;
537 }
538 }
539
540 // Sanity checks.
541
542 // Have we seen complete part headers? If we have not, that means that the part ended prematurely.
543 if ((part->parser->current_part->type != MULTIPART_PART_EPILOGUE) && (part->parser->current_part_mode != MODE_DATA)) {
544 part->parser->multipart.flags |= HTP_MULTIPART_PART_INCOMPLETE;
545 }
546
547 // Have we been able to determine the part type? If not, this means
548 // that the part did not contain the C-D header.
549 if (part->type == MULTIPART_PART_UNKNOWN) {
550 part->parser->multipart.flags |= HTP_MULTIPART_PART_UNKNOWN;
551 }
552
553 // Finalize part value.
554
555 if (part->type == MULTIPART_PART_FILE) {
556 // Notify callbacks about the end of the file.
557 htp_mpartp_run_request_file_data_hook(part, NULL, 0);
558
559 // If we are storing the file to disk, close the file descriptor.
560 if (part->file->fd != -1) {
561 close(part->file->fd);
562 }
563 } else {
564 // Combine value pieces into a single buffer.
565 if (bstr_builder_size(part->parser->part_data_pieces) > 0) {
566 part->value = bstr_builder_to_str(part->parser->part_data_pieces);
567 bstr_builder_clear(part->parser->part_data_pieces);
568 }
569 }
570
571 return HTP_OK;
572 }
573
htp_mpartp_run_request_file_data_hook(htp_multipart_part_t * part,const unsigned char * data,size_t len)574 htp_status_t htp_mpartp_run_request_file_data_hook(htp_multipart_part_t *part, const unsigned char *data, size_t len) {
575 if (part->parser->cfg == NULL) return HTP_OK;
576
577 // Keep track of the file length.
578 part->file->len += len;
579
580 // Package data for the callbacks.
581 htp_file_data_t file_data;
582 file_data.file = part->file;
583 file_data.data = data;
584 file_data.len = (const size_t) len;
585
586 // Send data to callbacks
587 htp_status_t rc = htp_hook_run_all(part->parser->cfg->hook_request_file_data, &file_data);
588 if (rc != HTP_OK) return rc;
589
590 return HTP_OK;
591 }
592
593 /**
594 * Handles part data.
595 *
596 * @param[in] part
597 * @param[in] data
598 * @param[in] len
599 * @param[in] is_line
600 * @return HTP_OK on success, HTP_ERROR on failure.
601 */
htp_mpart_part_handle_data(htp_multipart_part_t * part,const unsigned char * data,size_t len,int is_line)602 htp_status_t htp_mpart_part_handle_data(htp_multipart_part_t *part, const unsigned char *data, size_t len, int is_line) {
603 #if HTP_DEBUG
604 fprintf(stderr, "Part type %d mode %d is_line %d\n", part->type, part->parser->current_part_mode, is_line);
605 fprint_raw_data(stderr, "htp_mpart_part_handle_data: data chunk", data, len);
606 #endif
607
608 // Keep track of raw part length.
609 part->len += len;
610
611 // If we're processing a part that came after the last boundary, then we're not sure if it
612 // is the epilogue part or some other part (in case of evasion attempt). For that reason we
613 // will keep all its data in the part_data_pieces structure. If it ends up not being the
614 // epilogue, this structure will be cleared.
615 if ((part->parser->multipart.flags & HTP_MULTIPART_SEEN_LAST_BOUNDARY) && (part->type == MULTIPART_PART_UNKNOWN)) {
616 bstr_builder_append_mem(part->parser->part_data_pieces, data, len);
617 }
618
619 if (part->parser->current_part_mode == MODE_LINE) {
620 // Line mode.
621
622 if (is_line) {
623 // End of the line.
624
625 bstr *line = NULL;
626
627 // If this line came to us in pieces, combine them now into a single buffer.
628 if (bstr_builder_size(part->parser->part_header_pieces) > 0) {
629 bstr_builder_append_mem(part->parser->part_header_pieces, data, len);
630 line = bstr_builder_to_str(part->parser->part_header_pieces);
631 if (line == NULL) return HTP_ERROR;
632 bstr_builder_clear(part->parser->part_header_pieces);
633
634 data = bstr_ptr(line);
635 len = bstr_len(line);
636 }
637
638 // Ignore the line endings.
639 if (len > 1) {
640 if (data[len - 1] == LF) len--;
641 if (data[len - 1] == CR) len--;
642 } else if (len > 0) {
643 if (data[len - 1] == LF) len--;
644 }
645
646 // Is it an empty line?
647 if (len == 0) {
648 // Empty line; process headers and switch to data mode.
649
650 // Process the pending header, if any.
651 if (part->parser->pending_header_line != NULL) {
652 if (htp_mpartp_parse_header(part, bstr_ptr(part->parser->pending_header_line),
653 bstr_len(part->parser->pending_header_line)) == HTP_ERROR)
654 {
655 bstr_free(line);
656 return HTP_ERROR;
657 }
658
659 bstr_free(part->parser->pending_header_line);
660 part->parser->pending_header_line = NULL;
661 }
662
663 if (htp_mpart_part_process_headers(part) == HTP_ERROR) {
664 bstr_free(line);
665 return HTP_ERROR;
666 }
667
668 part->parser->current_part_mode = MODE_DATA;
669 bstr_builder_clear(part->parser->part_header_pieces);
670
671 if (part->file != NULL) {
672 // Changing part type because we have a filename.
673 part->type = MULTIPART_PART_FILE;
674
675 if ((part->parser->extract_files) && (part->parser->file_count < part->parser->extract_limit)) {
676 char buf[255];
677
678 strncpy(buf, part->parser->extract_dir, 254);
679 strncat(buf, "/libhtp-multipart-file-XXXXXX", 254 - strlen(buf));
680
681 part->file->tmpname = strdup(buf);
682 if (part->file->tmpname == NULL) {
683 bstr_free(line);
684 return HTP_ERROR;
685 }
686
687 mode_t previous_mask = umask(S_IXUSR | S_IRWXG | S_IRWXO);
688 part->file->fd = mkstemp(part->file->tmpname);
689 umask(previous_mask);
690
691 if (part->file->fd < 0) {
692 bstr_free(line);
693 return HTP_ERROR;
694 }
695
696 part->parser->file_count++;
697 }
698 } else if (part->name != NULL) {
699 // Changing part type because we have a name.
700 part->type = MULTIPART_PART_TEXT;
701 bstr_builder_clear(part->parser->part_data_pieces);
702 } else {
703 // Do nothing; the type stays MULTIPART_PART_UNKNOWN.
704 }
705 } else {
706 // Not an empty line.
707
708 // Is there a pending header?
709 if (part->parser->pending_header_line == NULL) {
710 if (line != NULL) {
711 part->parser->pending_header_line = line;
712 line = NULL;
713 } else {
714 part->parser->pending_header_line = bstr_dup_mem(data, len);
715 if (part->parser->pending_header_line == NULL) return HTP_ERROR;
716 }
717 } else {
718 // Is this a folded line?
719 if (isspace(data[0])) {
720 // Folding; add to the existing line.
721 part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_FOLDING;
722 part->parser->pending_header_line = bstr_add_mem(part->parser->pending_header_line, data, len);
723 if (part->parser->pending_header_line == NULL) {
724 bstr_free(line);
725 return HTP_ERROR;
726 }
727 } else {
728 // Process the pending header line.
729 if (htp_mpartp_parse_header(part, bstr_ptr(part->parser->pending_header_line),
730 bstr_len(part->parser->pending_header_line)) == HTP_ERROR)
731 {
732 bstr_free(line);
733 return HTP_ERROR;
734 }
735
736 bstr_free(part->parser->pending_header_line);
737
738 if (line != NULL) {
739 part->parser->pending_header_line = line;
740 line = NULL;
741 } else {
742 part->parser->pending_header_line = bstr_dup_mem(data, len);
743 if (part->parser->pending_header_line == NULL) return HTP_ERROR;
744 }
745 }
746 }
747 }
748
749 bstr_free(line);
750 line = NULL;
751 } else {
752 // Not end of line; keep the data chunk for later.
753 bstr_builder_append_mem(part->parser->part_header_pieces, data, len);
754 }
755 } else {
756 // Data mode; keep the data chunk for later (but not if it is a file).
757 switch (part->type) {
758 case MULTIPART_PART_EPILOGUE:
759 case MULTIPART_PART_PREAMBLE:
760 case MULTIPART_PART_TEXT:
761 case MULTIPART_PART_UNKNOWN:
762 // Make a copy of the data in RAM.
763 bstr_builder_append_mem(part->parser->part_data_pieces, data, len);
764 break;
765
766 case MULTIPART_PART_FILE:
767 // Invoke file data callbacks.
768 htp_mpartp_run_request_file_data_hook(part, data, len);
769
770 // Optionally, store the data in a file.
771 if (part->file->fd != -1) {
772 if (write(part->file->fd, data, len) < 0) {
773 return HTP_ERROR;
774 }
775 }
776 break;
777
778 default:
779 // Internal error.
780 return HTP_ERROR;
781 break;
782 }
783 }
784
785 return HTP_OK;
786 }
787
788 /**
789 * Handles data, creating new parts as necessary.
790 *
791 * @param[in] mpartp
792 * @param[in] data
793 * @param[in] len
794 * @param[in] is_line
795 * @return HTP_OK on success, HTP_ERROR on failure.
796 */
htp_mpartp_handle_data(htp_mpartp_t * parser,const unsigned char * data,size_t len,int is_line)797 static htp_status_t htp_mpartp_handle_data(htp_mpartp_t *parser, const unsigned char *data, size_t len, int is_line) {
798 if (len == 0) return HTP_OK;
799
800 // Do we have a part already?
801 if (parser->current_part == NULL) {
802 // Create a new part.
803 parser->current_part = htp_mpart_part_create(parser);
804 if (parser->current_part == NULL) return HTP_ERROR;
805
806 if (parser->multipart.boundary_count == 0) {
807 // We haven't seen a boundary yet, so this must be the preamble part.
808 parser->current_part->type = MULTIPART_PART_PREAMBLE;
809 parser->multipart.flags |= HTP_MULTIPART_HAS_PREAMBLE;
810 parser->current_part_mode = MODE_DATA;
811 } else {
812 // Part after preamble.
813 parser->current_part_mode = MODE_LINE;
814 }
815
816 // Add part to the list.
817 htp_list_push(parser->multipart.parts, parser->current_part);
818
819 #ifdef HTP_DEBUG
820 fprintf(stderr, "Created new part type %d\n", parser->current_part->type);
821 #endif
822 }
823
824 // Send data to the part.
825 return htp_mpart_part_handle_data(parser->current_part, data, len, is_line);
826 }
827
828 /**
829 * Handles a boundary event, which means that it will finalize a part if one exists.
830 *
831 * @param[in] mpartp
832 * @return HTP_OK on success, HTP_ERROR on failure.
833 */
htp_mpartp_handle_boundary(htp_mpartp_t * parser)834 static htp_status_t htp_mpartp_handle_boundary(htp_mpartp_t *parser) {
835 #if HTP_DEBUG
836 fprintf(stderr, "htp_mpartp_handle_boundary\n");
837 #endif
838
839 if (parser->current_part != NULL) {
840 if (htp_mpart_part_finalize_data(parser->current_part) != HTP_OK) {
841 return HTP_ERROR;
842 }
843
844 // We're done with this part
845 parser->current_part = NULL;
846
847 // Revert to line mode
848 parser->current_part_mode = MODE_LINE;
849 }
850
851 return HTP_OK;
852 }
853
htp_mpartp_init_boundary(htp_mpartp_t * parser,unsigned char * data,size_t len)854 static htp_status_t htp_mpartp_init_boundary(htp_mpartp_t *parser, unsigned char *data, size_t len) {
855 if ((parser == NULL) || (data == NULL)) return HTP_ERROR;
856
857 // Copy the boundary and convert it to lowercase.
858
859 parser->multipart.boundary_len = len + 4;
860 parser->multipart.boundary = malloc(parser->multipart.boundary_len + 1);
861 if (parser->multipart.boundary == NULL) return HTP_ERROR;
862
863 parser->multipart.boundary[0] = CR;
864 parser->multipart.boundary[1] = LF;
865 parser->multipart.boundary[2] = '-';
866 parser->multipart.boundary[3] = '-';
867
868 for (size_t i = 0; i < len; i++) {
869 parser->multipart.boundary[i + 4] = data[i];
870 }
871
872 parser->multipart.boundary[parser->multipart.boundary_len] = '\0';
873
874 // We're starting in boundary-matching mode. The first boundary can appear without the
875 // CRLF, and our starting state expects that. If we encounter non-boundary data, the
876 // state will switch to data mode. Then, if the data is CRLF or LF, we will go back
877 // to boundary matching. Thus, we handle all the possibilities.
878
879 parser->parser_state = STATE_BOUNDARY;
880 parser->boundary_match_pos = 2;
881
882 return HTP_OK;
883 }
884
htp_mpartp_create(htp_cfg_t * cfg,bstr * boundary,uint64_t flags)885 htp_mpartp_t *htp_mpartp_create(htp_cfg_t *cfg, bstr *boundary, uint64_t flags) {
886 if ((cfg == NULL) || (boundary == NULL)) return NULL;
887
888 htp_mpartp_t *parser = calloc(1, sizeof (htp_mpartp_t));
889 if (parser == NULL) return NULL;
890
891 parser->cfg = cfg;
892
893 parser->boundary_pieces = bstr_builder_create();
894 if (parser->boundary_pieces == NULL) {
895 htp_mpartp_destroy(parser);
896 return NULL;
897 }
898
899 parser->part_data_pieces = bstr_builder_create();
900 if (parser->part_data_pieces == NULL) {
901 htp_mpartp_destroy(parser);
902 return NULL;
903 }
904
905 parser->part_header_pieces = bstr_builder_create();
906 if (parser->part_header_pieces == NULL) {
907 htp_mpartp_destroy(parser);
908 return NULL;
909 }
910
911 parser->multipart.parts = htp_list_create(64);
912 if (parser->multipart.parts == NULL) {
913 htp_mpartp_destroy(parser);
914 return NULL;
915 }
916
917 parser->multipart.flags = flags;
918 parser->parser_state = STATE_INIT;
919 parser->extract_files = cfg->extract_request_files;
920 parser->extract_dir = cfg->tmpdir;
921 if (cfg->extract_request_files_limit >= 0) {
922 parser->extract_limit = cfg->extract_request_files_limit;
923 } else {
924 parser->extract_limit = DEFAULT_FILE_EXTRACT_LIMIT;
925 }
926 parser->handle_data = htp_mpartp_handle_data;
927 parser->handle_boundary = htp_mpartp_handle_boundary;
928
929 // Initialize the boundary.
930 htp_status_t rc = htp_mpartp_init_boundary(parser, bstr_ptr(boundary), bstr_len(boundary));
931 if (rc != HTP_OK) {
932 htp_mpartp_destroy(parser);
933 return NULL;
934 }
935
936 // On success, the ownership of the boundary parameter
937 // is transferred to us. We made a copy, and so we
938 // don't need it any more.
939 bstr_free(boundary);
940
941 return parser;
942 }
943
htp_mpartp_destroy(htp_mpartp_t * parser)944 void htp_mpartp_destroy(htp_mpartp_t *parser) {
945 if (parser == NULL) return;
946
947 if (parser->multipart.boundary != NULL) {
948 free(parser->multipart.boundary);
949 }
950
951 bstr_builder_destroy(parser->boundary_pieces);
952 bstr_builder_destroy(parser->part_header_pieces);
953 bstr_free(parser->pending_header_line);
954 bstr_builder_destroy(parser->part_data_pieces);
955
956 // Free the parts.
957 if (parser->multipart.parts != NULL) {
958 for (size_t i = 0, n = htp_list_size(parser->multipart.parts); i < n; i++) {
959 htp_multipart_part_t * part = htp_list_get(parser->multipart.parts, i);
960 htp_mpart_part_destroy(part, parser->gave_up_data);
961 }
962
963 htp_list_destroy(parser->multipart.parts);
964 }
965
966 free(parser);
967 }
968
969 /**
970 * Processes set-aside data.
971 *
972 * @param[in] mpartp
973 * @param[in] data
974 * @param[in] pos
975 * @param[in] startpos
976 * @param[in] return_pos
977 * @param[in] matched
978 * @return HTP_OK on success, HTP_ERROR on failure.
979 */
htp_martp_process_aside(htp_mpartp_t * parser,int matched)980 static htp_status_t htp_martp_process_aside(htp_mpartp_t *parser, int matched) {
981 // The stored data pieces can contain up to one line. If we're in data mode and there
982 // was no boundary match, things are straightforward -- we process everything as data.
983 // If there was a match, we need to take care to not send the line ending as data, nor
984 // anything that follows (because it's going to be a part of the boundary). Similarly,
985 // when we are in line mode, we need to split the first data chunk, processing the first
986 // part as line and the second part as data.
987
988 #ifdef HTP_DEBUG
989 fprintf(stderr, "mpartp_process_aside matched %d current_part_mode %d\n", matched, parser->current_part_mode);
990 #endif
991
992 // Do we need to do any chunk splitting?
993 if (matched || (parser->current_part_mode == MODE_LINE)) {
994 // Line mode or boundary match
995
996 // Process the CR byte, if set aside.
997 if ((!matched) && (parser->cr_aside)) {
998 // Treat as part data, when there is not a match.
999 parser->handle_data(parser, (unsigned char *) &"\r", 1, /* not a line */ 0);
1000 parser->cr_aside = 0;
1001 } else {
1002 // Treat as boundary, when there is a match.
1003 parser->cr_aside = 0;
1004 }
1005
1006 // We know that we went to match a boundary because
1007 // we saw a new line. Now we have to find that line and
1008 // process it. It's either going to be in the current chunk,
1009 // or in the first stored chunk.
1010 if (bstr_builder_size(parser->boundary_pieces) > 0) {
1011 int first = 1;
1012 for (size_t i = 0, n = htp_list_size(parser->boundary_pieces->pieces); i < n; i++) {
1013 bstr *b = htp_list_get(parser->boundary_pieces->pieces, i);
1014
1015 if (first) {
1016 first = 0;
1017
1018 // Split the first chunk.
1019
1020 if (!matched) {
1021 // In line mode, we are OK with line endings.
1022 parser->handle_data(parser, bstr_ptr(b), parser->boundary_candidate_pos, /* line */ 1);
1023 } else {
1024 // But if there was a match, the line ending belongs to the boundary.
1025 unsigned char *dx = bstr_ptr(b);
1026 size_t lx = parser->boundary_candidate_pos;
1027
1028 // Remove LF or CRLF.
1029 if ((lx > 0) && (dx[lx - 1] == LF)) {
1030 lx--;
1031 // Remove CR.
1032 if ((lx > 0) && (dx[lx - 1] == CR)) {
1033 lx--;
1034 }
1035 }
1036
1037 parser->handle_data(parser, dx, lx, /* not a line */ 0);
1038 }
1039
1040 // The second part of the split chunks belongs to the boundary
1041 // when matched, data otherwise.
1042 if (!matched) {
1043 parser->handle_data(parser, bstr_ptr(b) + parser->boundary_candidate_pos,
1044 bstr_len(b) - parser->boundary_candidate_pos, /* not a line */ 0);
1045 }
1046 } else {
1047 // Do not send data if there was a boundary match. The stored
1048 // data belongs to the boundary.
1049 if (!matched) {
1050 parser->handle_data(parser, bstr_ptr(b), bstr_len(b), /* not a line */ 0);
1051 }
1052 }
1053 }
1054
1055 bstr_builder_clear(parser->boundary_pieces);
1056 }
1057 } else {
1058 // Data mode and no match.
1059
1060 // In data mode, we process the lone CR byte as data.
1061 if (parser->cr_aside) {
1062 parser->handle_data(parser, (const unsigned char *)&"\r", 1, /* not a line */ 0);
1063 parser->cr_aside = 0;
1064 }
1065
1066 // We then process any pieces that we might have stored, also as data.
1067 if (bstr_builder_size(parser->boundary_pieces) > 0) {
1068 for (size_t i = 0, n = htp_list_size(parser->boundary_pieces->pieces); i < n; i++) {
1069 bstr *b = htp_list_get(parser->boundary_pieces->pieces, i);
1070 parser->handle_data(parser, bstr_ptr(b), bstr_len(b), /* not a line */ 0);
1071 }
1072
1073 bstr_builder_clear(parser->boundary_pieces);
1074 }
1075 }
1076
1077 return HTP_OK;
1078 }
1079
htp_mpartp_finalize(htp_mpartp_t * parser)1080 htp_status_t htp_mpartp_finalize(htp_mpartp_t *parser) {
1081 if (parser->current_part != NULL) {
1082 // Process buffered data, if any.
1083 htp_martp_process_aside(parser, 0);
1084
1085 // Finalize the last part.
1086 if (htp_mpart_part_finalize_data(parser->current_part) != HTP_OK) return HTP_ERROR;
1087
1088 // It is OK to end abruptly in the epilogue part, but not in any other.
1089 if (parser->current_part->type != MULTIPART_PART_EPILOGUE) {
1090 parser->multipart.flags |= HTP_MULTIPART_INCOMPLETE;
1091 }
1092 }
1093
1094 bstr_builder_clear(parser->boundary_pieces);
1095
1096 return HTP_OK;
1097 }
1098
htp_mpartp_parse(htp_mpartp_t * parser,const void * _data,size_t len)1099 htp_status_t htp_mpartp_parse(htp_mpartp_t *parser, const void *_data, size_t len) {
1100 unsigned char *data = (unsigned char *) _data;
1101
1102 // The current position in the entire input buffer.
1103 size_t pos = 0;
1104
1105 // The position of the first unprocessed byte of data. We split the
1106 // input buffer into smaller chunks, according to their purpose. Once
1107 // an entire such smaller chunk is processed, we move to the next
1108 // and update startpos.
1109 size_t startpos = 0;
1110
1111 // The position of the (possible) boundary. We investigate for possible
1112 // boundaries whenever we encounter CRLF or just LF. If we don't find a
1113 // boundary we need to go back, and this is what data_return_pos helps with.
1114 size_t data_return_pos = 0;
1115
1116 #if HTP_DEBUG
1117 fprint_raw_data(stderr, "htp_mpartp_parse: data chunk", data, len);
1118 #endif
1119
1120 // While there's data in the input buffer.
1121
1122 while (pos < len) {
1123
1124 STATE_SWITCH:
1125 #if HTP_DEBUG
1126 fprintf(stderr, "htp_mpartp_parse: state %d pos %zd startpos %zd\n", parser->parser_state, pos, startpos);
1127 #endif
1128
1129 switch (parser->parser_state) {
1130
1131 case STATE_INIT:
1132 // Incomplete initialization.
1133 return HTP_ERROR;
1134 break;
1135
1136 case STATE_DATA: // Handle part data.
1137
1138 // While there's data in the input buffer.
1139
1140 while (pos < len) {
1141 // Check for a CRLF-terminated line.
1142 if (data[pos] == CR) {
1143 // We have a CR byte.
1144
1145 // Is this CR the last byte in the input buffer?
1146 if (pos + 1 == len) {
1147 // We have CR as the last byte in input. We are going to process
1148 // what we have in the buffer as data, except for the CR byte,
1149 // which we're going to leave for later. If it happens that a
1150 // CR is followed by a LF and then a boundary, the CR is going
1151 // to be discarded.
1152 pos++; // Advance over CR.
1153 parser->cr_aside = 1;
1154 } else {
1155 // We have CR and at least one more byte in the buffer, so we
1156 // are able to test for the LF byte too.
1157 if (data[pos + 1] == LF) {
1158 pos += 2; // Advance over CR and LF.
1159
1160 parser->multipart.flags |= HTP_MULTIPART_CRLF_LINE;
1161
1162 // Prepare to switch to boundary testing.
1163 data_return_pos = pos;
1164 parser->boundary_candidate_pos = pos - startpos;
1165 parser->boundary_match_pos = 2; // After LF; position of the first dash.
1166 parser->parser_state = STATE_BOUNDARY;
1167
1168 goto STATE_SWITCH;
1169 } else {
1170 // This is not a new line; advance over the
1171 // byte and clear the CR set-aside flag.
1172 pos++;
1173 parser->cr_aside = 0;
1174 }
1175 }
1176 } else if (data[pos] == LF) { // Check for a LF-terminated line.
1177 pos++; // Advance over LF.
1178
1179 // Did we have a CR in the previous input chunk?
1180 if (parser->cr_aside == 0) {
1181 parser->multipart.flags |= HTP_MULTIPART_LF_LINE;
1182 } else {
1183 parser->multipart.flags |= HTP_MULTIPART_CRLF_LINE;
1184 }
1185
1186 // Prepare to switch to boundary testing.
1187 data_return_pos = pos;
1188 parser->boundary_candidate_pos = pos - startpos;
1189 parser->boundary_match_pos = 2; // After LF; position of the first dash.
1190 parser->parser_state = STATE_BOUNDARY;
1191
1192 goto STATE_SWITCH;
1193 } else {
1194 // Take one byte from input
1195 pos++;
1196
1197 // Earlier we might have set aside a CR byte not knowing if the next
1198 // byte is a LF. Now we know that it is not, and so we can release the CR.
1199 if (parser->cr_aside) {
1200 parser->handle_data(parser, (unsigned char *) &"\r", 1, /* not a line */ 0);
1201 parser->cr_aside = 0;
1202 }
1203 }
1204 } // while
1205
1206 // No more data in the input buffer; process the data chunk.
1207 parser->handle_data(parser, data + startpos, pos - startpos - parser->cr_aside, /* not a line */ 0);
1208
1209 break;
1210
1211 case STATE_BOUNDARY: // Handle a possible boundary.
1212 while (pos < len) {
1213 #ifdef HTP_DEBUG
1214 fprintf(stderr, "boundary (len %zd pos %zd char %d) data char %d\n", parser->multipart.boundary_len,
1215 parser->boundary_match_pos, parser->multipart.boundary[parser->boundary_match_pos], tolower(data[pos]));
1216 #endif
1217
1218 // Check if the bytes match.
1219 if (!(data[pos] == parser->multipart.boundary[parser->boundary_match_pos])) {
1220 // Boundary mismatch.
1221
1222 // Process stored (buffered) data.
1223 htp_martp_process_aside(parser, /* no match */ 0);
1224
1225 // Return back where data parsing left off.
1226 if (parser->current_part_mode == MODE_LINE) {
1227 // In line mode, we process the line.
1228 parser->handle_data(parser, data + startpos, data_return_pos - startpos, /* line */ 1);
1229 startpos = data_return_pos;
1230 } else {
1231 // In data mode, we go back where we left off.
1232 pos = data_return_pos;
1233 }
1234
1235 parser->parser_state = STATE_DATA;
1236
1237 goto STATE_SWITCH;
1238 }
1239
1240 // Consume one matched boundary byte
1241 pos++;
1242 parser->boundary_match_pos++;
1243
1244 // Have we seen all boundary bytes?
1245 if (parser->boundary_match_pos == parser->multipart.boundary_len) {
1246 // Boundary match!
1247
1248 // Process stored (buffered) data.
1249 htp_martp_process_aside(parser, /* boundary match */ 1);
1250
1251 // Process data prior to the boundary in the current input buffer.
1252 // Because we know this is the last chunk before boundary, we can
1253 // remove the line endings.
1254 size_t dlen = data_return_pos - startpos;
1255 if ((dlen > 0) && (data[startpos + dlen - 1] == LF)) dlen--;
1256 if ((dlen > 0) && (data[startpos + dlen - 1] == CR)) dlen--;
1257 parser->handle_data(parser, data + startpos, dlen, /* line */ 1);
1258
1259 // Keep track of how many boundaries we've seen.
1260 parser->multipart.boundary_count++;
1261
1262 if (parser->multipart.flags & HTP_MULTIPART_SEEN_LAST_BOUNDARY) {
1263 parser->multipart.flags |= HTP_MULTIPART_PART_AFTER_LAST_BOUNDARY;
1264 }
1265
1266 // Run boundary match.
1267 parser->handle_boundary(parser);
1268
1269 // We now need to check if this is the last boundary in the payload
1270 parser->parser_state = STATE_BOUNDARY_IS_LAST2;
1271
1272 goto STATE_SWITCH;
1273 }
1274 } // while
1275
1276 // No more data in the input buffer; store (buffer) the unprocessed
1277 // part for later, for after we find out if this is a boundary.
1278 bstr_builder_append_mem(parser->boundary_pieces, data + startpos, len - startpos);
1279
1280 break;
1281
1282 case STATE_BOUNDARY_IS_LAST2:
1283 // Examine the first byte after the last boundary character. If it is
1284 // a dash, then we maybe processing the last boundary in the payload. If
1285 // it is not, move to eat all bytes until the end of the line.
1286
1287 if (data[pos] == '-') {
1288 // Found one dash, now go to check the next position.
1289 pos++;
1290 parser->parser_state = STATE_BOUNDARY_IS_LAST1;
1291 } else {
1292 // This is not the last boundary. Change state but
1293 // do not advance the position, allowing the next
1294 // state to process the byte.
1295 parser->parser_state = STATE_BOUNDARY_EAT_LWS;
1296 }
1297 break;
1298
1299 case STATE_BOUNDARY_IS_LAST1:
1300 // Examine the byte after the first dash; expected to be another dash.
1301 // If not, eat all bytes until the end of the line.
1302
1303 if (data[pos] == '-') {
1304 // This is indeed the last boundary in the payload.
1305 pos++;
1306 parser->multipart.flags |= HTP_MULTIPART_SEEN_LAST_BOUNDARY;
1307 parser->parser_state = STATE_BOUNDARY_EAT_LWS;
1308 } else {
1309 // The second character is not a dash, and so this is not
1310 // the final boundary. Raise the flag for the first dash,
1311 // and change state to consume the rest of the boundary line.
1312 parser->multipart.flags |= HTP_MULTIPART_BBOUNDARY_NLWS_AFTER;
1313 parser->parser_state = STATE_BOUNDARY_EAT_LWS;
1314 }
1315 break;
1316
1317 case STATE_BOUNDARY_EAT_LWS:
1318 if (data[pos] == CR) {
1319 // CR byte, which could indicate a CRLF line ending.
1320 pos++;
1321 parser->parser_state = STATE_BOUNDARY_EAT_LWS_CR;
1322 } else if (data[pos] == LF) {
1323 // LF line ending; we're done with boundary processing; data bytes follow.
1324 pos++;
1325 startpos = pos;
1326 parser->multipart.flags |= HTP_MULTIPART_LF_LINE;
1327 parser->parser_state = STATE_DATA;
1328 } else {
1329 if (htp_is_lws(data[pos])) {
1330 // Linear white space is allowed here.
1331 parser->multipart.flags |= HTP_MULTIPART_BBOUNDARY_LWS_AFTER;
1332 pos++;
1333 } else {
1334 // Unexpected byte; consume, but remain in the same state.
1335 parser->multipart.flags |= HTP_MULTIPART_BBOUNDARY_NLWS_AFTER;
1336 pos++;
1337 }
1338 }
1339 break;
1340
1341 case STATE_BOUNDARY_EAT_LWS_CR:
1342 if (data[pos] == LF) {
1343 // CRLF line ending; we're done with boundary processing; data bytes follow.
1344 pos++;
1345 startpos = pos;
1346 parser->multipart.flags |= HTP_MULTIPART_CRLF_LINE;
1347 parser->parser_state = STATE_DATA;
1348 } else {
1349 // Not a line ending; start again, but do not process this byte.
1350 parser->multipart.flags |= HTP_MULTIPART_BBOUNDARY_NLWS_AFTER;
1351 parser->parser_state = STATE_BOUNDARY_EAT_LWS;
1352 }
1353 break;
1354 } // switch
1355 }
1356
1357 return HTP_OK;
1358 }
1359
htp_mpartp_validate_boundary(bstr * boundary,uint64_t * flags)1360 static void htp_mpartp_validate_boundary(bstr *boundary, uint64_t *flags) {
1361 /*
1362
1363 RFC 1341:
1364
1365 The only mandatory parameter for the multipart Content-Type
1366 is the boundary parameter, which consists of 1 to 70
1367 characters from a set of characters known to be very robust
1368 through email gateways, and NOT ending with white space.
1369 (If a boundary appears to end with white space, the white
1370 space must be presumed to have been added by a gateway, and
1371 should be deleted.) It is formally specified by the
1372 following BNF:
1373
1374 boundary := 0*69<bchars> bcharsnospace
1375
1376 bchars := bcharsnospace / " "
1377
1378 bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" / "_"
1379 / "," / "-" / "." / "/" / ":" / "=" / "?"
1380 */
1381
1382 /*
1383 Chrome: Content-Type: multipart/form-data; boundary=----WebKitFormBoundaryT4AfwQCOgIxNVwlD
1384 Firefox: Content-Type: multipart/form-data; boundary=---------------------------21071316483088
1385 MSIE: Content-Type: multipart/form-data; boundary=---------------------------7dd13e11c0452
1386 Opera: Content-Type: multipart/form-data; boundary=----------2JL5oh7QWEDwyBllIRc7fh
1387 Safari: Content-Type: multipart/form-data; boundary=----WebKitFormBoundaryre6zL3b0BelnTY5S
1388 */
1389
1390 unsigned char *data = bstr_ptr(boundary);
1391 size_t len = bstr_len(boundary);
1392
1393 // The RFC allows up to 70 characters. In real life,
1394 // boundaries tend to be shorter.
1395 if ((len == 0) || (len > 70)) {
1396 *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
1397 }
1398
1399 // Check boundary characters. This check is stricter than the
1400 // RFC, which seems to allow many separator characters.
1401 size_t pos = 0;
1402 while (pos < len) {
1403 if (!(((data[pos] >= '0') && (data[pos] <= '9'))
1404 || ((data[pos] >= 'a') && (data[pos] <= 'z'))
1405 || ((data[pos] >= 'A') && (data[pos] <= 'Z'))
1406 || (data[pos] == '-'))) {
1407
1408 switch (data[pos]) {
1409 case '\'':
1410 case '(':
1411 case ')':
1412 case '+':
1413 case '_':
1414 case ',':
1415 case '.':
1416 case '/':
1417 case ':':
1418 case '=':
1419 case '?':
1420 // These characters are allowed by the RFC, but not common.
1421 *flags |= HTP_MULTIPART_HBOUNDARY_UNUSUAL;
1422 break;
1423
1424 default:
1425 // Invalid character.
1426 *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
1427 break;
1428 }
1429 }
1430
1431 pos++;
1432 }
1433 }
1434
htp_mpartp_validate_content_type(bstr * content_type,uint64_t * flags)1435 static void htp_mpartp_validate_content_type(bstr *content_type, uint64_t *flags) {
1436 unsigned char *data = bstr_ptr(content_type);
1437 size_t len = bstr_len(content_type);
1438 size_t counter = 0;
1439
1440 while (len > 0) {
1441 int i = bstr_util_mem_index_of_c_nocase(data, len, "boundary");
1442 if (i == -1) break;
1443
1444 data = data + i;
1445 len = len - i;
1446
1447 // In order to work around the fact that WebKit actually uses
1448 // the word "boundary" in their boundary, we also require one
1449 // equals character the follow the words.
1450 // "multipart/form-data; boundary=----WebKitFormBoundaryT4AfwQCOgIxNVwlD"
1451 if (memchr(data, '=', len) == NULL) break;
1452
1453 counter++;
1454
1455 // Check for case variations.
1456 for (size_t j = 0; j < 8; j++) {
1457 if (!((*data >= 'a') && (*data <= 'z'))) {
1458 *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
1459 }
1460
1461 data++;
1462 len--;
1463 }
1464 }
1465
1466 // How many boundaries have we seen?
1467 if (counter > 1) {
1468 *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
1469 }
1470 }
1471
htp_mpartp_find_boundary(bstr * content_type,bstr ** boundary,uint64_t * flags)1472 htp_status_t htp_mpartp_find_boundary(bstr *content_type, bstr **boundary, uint64_t *flags) {
1473 if ((content_type == NULL) || (boundary == NULL) || (flags == NULL)) return HTP_ERROR;
1474
1475 // Our approach is to ignore the MIME type and instead just look for
1476 // the boundary. This approach is more reliable in the face of various
1477 // evasion techniques that focus on submitting invalid MIME types.
1478
1479 // Reset flags.
1480 *flags = 0;
1481
1482 // Look for the boundary, case insensitive.
1483 int i = bstr_index_of_c_nocase(content_type, "boundary");
1484 if (i == -1) return HTP_DECLINED;
1485
1486 unsigned char *data = bstr_ptr(content_type) + i + 8;
1487 size_t len = bstr_len(content_type) - i - 8;
1488
1489 // Look for the boundary value.
1490 size_t pos = 0;
1491 while ((pos < len) && (data[pos] != '=')) {
1492 if (htp_is_space(data[pos])) {
1493 // It is unusual to see whitespace before the equals sign.
1494 *flags |= HTP_MULTIPART_HBOUNDARY_UNUSUAL;
1495 } else {
1496 // But seeing a non-whitespace character may indicate evasion.
1497 *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
1498 }
1499
1500 pos++;
1501 }
1502
1503 if (pos >= len) {
1504 // No equals sign in the header.
1505 *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
1506 return HTP_DECLINED;
1507 }
1508
1509 // Go over the '=' character.
1510 pos++;
1511
1512 // Ignore any whitespace after the equals sign.
1513 while ((pos < len) && (htp_is_space(data[pos]))) {
1514 if (htp_is_space(data[pos])) {
1515 // It is unusual to see whitespace after
1516 // the equals sign.
1517 *flags |= HTP_MULTIPART_HBOUNDARY_UNUSUAL;
1518 }
1519
1520 pos++;
1521 }
1522
1523 if (pos >= len) {
1524 // No value after the equals sign.
1525 *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
1526 return HTP_DECLINED;
1527 }
1528
1529 if (data[pos] == '"') {
1530 // Quoted boundary.
1531
1532 // Possibly not very unusual, but let's see.
1533 *flags |= HTP_MULTIPART_HBOUNDARY_UNUSUAL;
1534
1535 pos++; // Over the double quote.
1536 size_t startpos = pos; // Starting position of the boundary.
1537
1538 // Look for the terminating double quote.
1539 while ((pos < len) && (data[pos] != '"')) pos++;
1540
1541 if (pos >= len) {
1542 // Ran out of space without seeing
1543 // the terminating double quote.
1544 *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
1545
1546 // Include the starting double quote in the boundary.
1547 startpos--;
1548 }
1549
1550 *boundary = bstr_dup_mem(data + startpos, pos - startpos);
1551 if (*boundary == NULL) return HTP_ERROR;
1552
1553 pos++; // Over the double quote.
1554 } else {
1555 // Boundary not quoted.
1556
1557 size_t startpos = pos;
1558
1559 // Find the end of the boundary. For the time being, we replicate
1560 // the behavior of PHP 5.4.x. This may result with a boundary that's
1561 // closer to what would be accepted in real life. Our subsequent
1562 // checks of boundary characters will catch irregularities.
1563 while ((pos < len) && (data[pos] != ',') && (data[pos] != ';') && (!htp_is_space(data[pos]))) pos++;
1564
1565 *boundary = bstr_dup_mem(data + startpos, pos - startpos);
1566 if (*boundary == NULL) return HTP_ERROR;
1567 }
1568
1569 // Check for a zero-length boundary.
1570 if (bstr_len(*boundary) == 0) {
1571 *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
1572 bstr_free(*boundary);
1573 *boundary = NULL;
1574 return HTP_DECLINED;
1575 }
1576
1577 // Allow only whitespace characters after the boundary.
1578 int seen_space = 0, seen_non_space = 0;
1579
1580 while (pos < len) {
1581 if (!htp_is_space(data[pos])) {
1582 seen_non_space = 1;
1583 } else {
1584 seen_space = 1;
1585 }
1586
1587 pos++;
1588 }
1589
1590 // Raise INVALID if we see any non-space characters,
1591 // but raise UNUSUAL if we see _only_ space characters.
1592 if (seen_non_space) {
1593 *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
1594 } else if (seen_space) {
1595 *flags |= HTP_MULTIPART_HBOUNDARY_UNUSUAL;
1596 }
1597
1598 #ifdef HTP_DEBUG
1599 fprint_bstr(stderr, "Multipart boundary", *boundary);
1600 #endif
1601
1602 // Validate boundary characters.
1603 htp_mpartp_validate_boundary(*boundary, flags);
1604
1605 // Correlate with the MIME type. This might be a tad too
1606 // sensitive because it may catch non-browser access with sloppy
1607 // implementations, but let's go with it for now.
1608 if (bstr_begins_with_c(content_type, "multipart/form-data;") == 0) {
1609 *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
1610 }
1611
1612 htp_mpartp_validate_content_type(content_type, flags);
1613
1614 return HTP_OK;
1615 }
1616