1 /***************************************************************************
2  * Copyright (c) 2009-2010 Open Information Security Foundation
3  * Copyright (c) 2010-2013 Qualys, Inc.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are
8  * met:
9  *
10  * - Redistributions of source code must retain the above copyright
11  *   notice, this list of conditions and the following disclaimer.
12 
13  * - Redistributions in binary form must reproduce the above copyright
14  *   notice, this list of conditions and the following disclaimer in the
15  *   documentation and/or other materials provided with the distribution.
16 
17  * - Neither the name of the Qualys, Inc. nor the names of its
18  *   contributors may be used to endorse or promote products derived from
19  *   this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  ***************************************************************************/
33 
34 /**
35  * @file
36  * @author Ivan Ristic <ivanr@webkreator.com>
37  */
38 
39 #include "htp_config_auto.h"
40 
41 #include "htp_private.h"
42 
43 /**
44  * Determines the type of a Content-Disposition parameter.
45  *
46  * @param[in] data
47  * @param[in] startpos
48  * @param[in] pos
49  * @return CD_PARAM_OTHER, CD_PARAM_NAME or CD_PARAM_FILENAME.
50  */
htp_mpartp_cd_param_type(unsigned char * data,size_t startpos,size_t endpos)51 static int htp_mpartp_cd_param_type(unsigned char *data, size_t startpos, size_t endpos) {
52     if ((endpos - startpos) == 4) {
53         if (memcmp(data + startpos, "name", 4) == 0) return CD_PARAM_NAME;
54     } else if ((endpos - startpos) == 8) {
55         if (memcmp(data + startpos, "filename", 8) == 0) return CD_PARAM_FILENAME;
56     }
57 
58     return CD_PARAM_OTHER;
59 }
60 
htp_mpartp_get_multipart(htp_mpartp_t * parser)61 htp_multipart_t *htp_mpartp_get_multipart(htp_mpartp_t *parser) {
62     return &(parser->multipart);
63 }
64 
65 /**
66  * Decodes a C-D header value. This is impossible to do correctly without a
67  * parsing personality because most browsers are broken:
68  *  - Firefox encodes " as \", and \ is not encoded.
69  *  - Chrome encodes " as %22.
70  *  - IE encodes " as \", and \ is not encoded.
71  *  - Opera encodes " as \" and \ as \\.
72  * @param[in] b
73  */
htp_mpart_decode_quoted_cd_value_inplace(bstr * b)74 static void htp_mpart_decode_quoted_cd_value_inplace(bstr *b) {
75     unsigned char *s = bstr_ptr(b);
76     unsigned char *d = bstr_ptr(b);
77     size_t len = bstr_len(b);
78     size_t pos = 0;
79 
80     while (pos < len) {
81         // Ignore \ when before \ or ".
82         if ((*s == '\\')&&(pos + 1 < len)&&((*(s + 1) == '"')||(*(s + 1) == '\\'))) {
83             s++;
84             pos++;
85         }
86 
87         *d++ = *s++;
88         pos++;
89     }
90 
91     bstr_adjust_len(b, len - (s - d));
92 }
93 
94 /**
95  * Parses the Content-Disposition part header.
96  *
97  * @param[in] part
98  * @return HTP_OK on success (header found and parsed), HTP_DECLINED if there is no C-D header or if
99  *         it could not be processed, and HTP_ERROR on fatal error.
100  */
htp_mpart_part_parse_c_d(htp_multipart_part_t * part)101 htp_status_t htp_mpart_part_parse_c_d(htp_multipart_part_t *part) {
102     // Find the C-D header.
103     htp_header_t *h = htp_table_get_c(part->headers, "content-disposition");
104     if (h == NULL) {
105         part->parser->multipart.flags |= HTP_MULTIPART_PART_UNKNOWN;
106         return HTP_DECLINED;
107     }
108 
109     // Require "form-data" at the beginning of the header.
110     if (bstr_index_of_c(h->value, "form-data") != 0) {
111         part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
112         return HTP_DECLINED;
113     }
114 
115     // The parsing starts here.
116     unsigned char *data = bstr_ptr(h->value);
117     size_t len = bstr_len(h->value);
118     size_t pos = 9; // Start after "form-data"
119 
120     // Main parameter parsing loop (once per parameter).
121     while (pos < len) {
122         // Ignore whitespace.
123         while ((pos < len) && isspace(data[pos])) pos++;
124         if (pos == len) {
125             part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
126             return HTP_DECLINED;
127         }
128 
129         // Expecting a semicolon.
130         if (data[pos] != ';') {
131             part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
132             return HTP_DECLINED;
133         }
134         pos++;
135 
136         // Go over the whitespace before parameter name.
137         while ((pos < len) && isspace(data[pos])) pos++;
138         if (pos == len) {
139             part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
140             return HTP_DECLINED;
141         }
142 
143         // Found the starting position of the parameter name.
144         size_t start = pos;
145 
146         // Look for the ending position.
147         while ((pos < len) && (!isspace(data[pos]) && (data[pos] != '='))) pos++;
148         if (pos == len) {
149             part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
150             return HTP_DECLINED;
151         }
152 
153         // Ending position is in "pos" now.
154 
155         // Determine parameter type ("name", "filename", or other).
156         int param_type = htp_mpartp_cd_param_type(data, start, pos);
157 
158         // Ignore whitespace after parameter name, if any.
159         while ((pos < len) && isspace(data[pos])) pos++;
160         if (pos == len) {
161             part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
162             return HTP_DECLINED;
163         }
164 
165         // Equals.
166         if (data[pos] != '=') {
167             part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
168             return HTP_DECLINED;
169         }
170         pos++;
171 
172         // Go over the whitespace before the parameter value.
173         while ((pos < len) && isspace(data[pos])) pos++;
174         if (pos == len) {
175             part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
176             return HTP_DECLINED;
177         }
178 
179         // Expecting a double quote.
180         if (data[pos] != '"') {
181             // Bare string or non-standard quoting, which we don't like.
182             part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
183             return HTP_DECLINED;
184         }
185 
186         pos++; // Over the double quote.
187 
188         // We have the starting position of the value.
189         start = pos;
190 
191         // Find the end of the value.
192         while ((pos < len) && (data[pos] != '"')) {
193             // Check for escaping.
194             if (data[pos] == '\\') {
195                 if (pos + 1 >= len) {
196                     // A backslash as the last character in the C-D header.
197                     part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
198                     return HTP_DECLINED;
199                 }
200 
201                 // Allow " and \ to be escaped.
202                 if ((data[pos + 1] == '"')||(data[pos + 1] == '\\')) {
203                     // Go over the quoted character.
204                     pos++;
205                 }
206             }
207 
208             pos++;
209         }
210 
211         // If we've reached the end of the string that means the
212         // value was not terminated properly (the second double quote is missing).
213         if (pos == len) {
214             part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
215             return HTP_DECLINED;
216         }
217 
218         // Expecting the terminating double quote.
219         if (data[pos] != '"') {
220             part->parser->multipart.flags |= HTP_MULTIPART_CD_SYNTAX_INVALID;
221             return HTP_DECLINED;
222         }
223 
224         pos++; // Over the terminating double quote.
225 
226         // Finally, process the parameter value.
227 
228         switch (param_type) {
229             case CD_PARAM_NAME:
230                 // Check that we have not seen the name parameter already.
231                 if (part->name != NULL) {
232                     part->parser->multipart.flags |= HTP_MULTIPART_CD_PARAM_REPEATED;
233                     return HTP_DECLINED;
234                 }
235 
236                 part->name = bstr_dup_mem(data + start, pos - start - 1);
237                 if (part->name == NULL) return HTP_ERROR;
238 
239                 htp_mpart_decode_quoted_cd_value_inplace(part->name);
240 
241                 break;
242 
243             case CD_PARAM_FILENAME:
244                 // Check that we have not seen the filename parameter already.
245                 if (part->file != NULL) {
246                     part->parser->multipart.flags |= HTP_MULTIPART_CD_PARAM_REPEATED;
247                     return HTP_DECLINED;
248                 }
249 
250                 part->file = calloc(1, sizeof (htp_file_t));
251                 if (part->file == NULL) return HTP_ERROR;
252 
253                 part->file->fd = -1;
254                 part->file->source = HTP_FILE_MULTIPART;
255 
256                 part->file->filename = bstr_dup_mem(data + start, pos - start - 1);
257                 if (part->file->filename == NULL) {
258                     free(part->file);
259                     return HTP_ERROR;
260                 }
261 
262                 htp_mpart_decode_quoted_cd_value_inplace(part->file->filename);
263 
264                 break;
265 
266             default:
267                 // Unknown parameter.
268                 part->parser->multipart.flags |= HTP_MULTIPART_CD_PARAM_UNKNOWN;
269                 return HTP_DECLINED;
270                 break;
271         }
272 
273         // Continue to parse the next parameter, if any.
274     }
275 
276     return HTP_OK;
277 }
278 
279 /**
280  * Parses the Content-Type part header, if present.
281  *
282  * @param[in] part
283  * @return HTP_OK on success, HTP_DECLINED if the C-T header is not present, and HTP_ERROR on failure.
284  */
htp_mpart_part_parse_c_t(htp_multipart_part_t * part)285 static htp_status_t htp_mpart_part_parse_c_t(htp_multipart_part_t *part) {
286     htp_header_t *h = (htp_header_t *) htp_table_get_c(part->headers, "content-type");
287     if (h == NULL) return HTP_DECLINED;
288     return htp_parse_ct_header(h->value, &part->content_type);
289 }
290 
291 /**
292  * Processes part headers.
293  *
294  * @param[in] part
295  * @return HTP_OK on success, HTP_ERROR on failure.
296  */
htp_mpart_part_process_headers(htp_multipart_part_t * part)297 htp_status_t htp_mpart_part_process_headers(htp_multipart_part_t *part) {
298     if (htp_mpart_part_parse_c_d(part) == HTP_ERROR) return HTP_ERROR;
299     if (htp_mpart_part_parse_c_t(part) == HTP_ERROR) return HTP_ERROR;
300 
301     return HTP_OK;
302 }
303 
304 /**
305  * Parses one part header.
306  *
307  * @param[in] part
308  * @param[in] data
309  * @param[in] len
310  * @return HTP_OK on success, HTP_DECLINED on parsing error, HTP_ERROR on fatal error.
311  */
htp_mpartp_parse_header(htp_multipart_part_t * part,const unsigned char * data,size_t len)312 htp_status_t htp_mpartp_parse_header(htp_multipart_part_t *part, const unsigned char *data, size_t len) {
313     size_t name_start, name_end;
314     size_t value_start, value_end;
315 
316     // We do not allow NUL bytes here.
317     if (memchr(data, '\0', len) != NULL) {
318         part->parser->multipart.flags |= HTP_MULTIPART_NUL_BYTE;
319         return HTP_DECLINED;
320     }
321 
322     name_start = 0;
323 
324     // Look for the starting position of the name first.
325     size_t colon_pos = 0;
326 
327     while ((colon_pos < len)&&(htp_is_space(data[colon_pos]))) colon_pos++;
328     if (colon_pos != 0) {
329         // Whitespace before header name.
330         part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID;
331         return HTP_DECLINED;
332     }
333 
334     // Now look for the colon.
335     while ((colon_pos < len) && (data[colon_pos] != ':')) colon_pos++;
336 
337     if (colon_pos == len) {
338         // Missing colon.
339         part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID;
340         return HTP_DECLINED;
341     }
342 
343     if (colon_pos == 0) {
344         // Empty header name.
345         part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID;
346         return HTP_DECLINED;
347     }
348 
349     name_end = colon_pos;
350 
351     // Ignore LWS after header name.
352     size_t prev = name_end;
353     while ((prev > name_start) && (htp_is_lws(data[prev - 1]))) {
354         prev--;
355         name_end--;
356 
357         // LWS after field name. Not allowing for now.
358         part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID;
359         return HTP_DECLINED;
360     }
361 
362     // Header value.
363 
364     value_start = colon_pos + 1;
365 
366     // Ignore LWS before value.
367     while ((value_start < len) && (htp_is_lws(data[value_start]))) value_start++;
368 
369     if (value_start == len) {
370         // No header value.
371         part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID;
372         return HTP_DECLINED;
373     }
374 
375     // Assume the value is at the end.
376     value_end = len;
377 
378     // Check that the header name is a token.
379     size_t i = name_start;
380     while (i < name_end) {
381         if (!htp_is_token(data[i])) {
382             part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_INVALID;
383             return HTP_DECLINED;
384         }
385 
386         i++;
387     }
388 
389     // Now extract the name and the value.
390     htp_header_t *h = calloc(1, sizeof (htp_header_t));
391     if (h == NULL) return HTP_ERROR;
392 
393     h->name = bstr_dup_mem(data + name_start, name_end - name_start);
394     if (h->name == NULL) {
395         free(h);
396         return HTP_ERROR;
397     }
398 
399     h->value = bstr_dup_mem(data + value_start, value_end - value_start);
400     if (h->value == NULL) {
401         bstr_free(h->name);
402         free(h);
403         return HTP_ERROR;
404     }
405 
406     if ((bstr_cmp_c_nocase(h->name, "content-disposition") != 0) && (bstr_cmp_c_nocase(h->name, "content-type") != 0)) {
407         part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_UNKNOWN;
408     }
409 
410     // Check if the header already exists.
411     htp_header_t * h_existing = htp_table_get(part->headers, h->name);
412     if (h_existing != NULL) {
413         // Add to the existing header.
414         bstr *new_value = bstr_expand(h_existing->value, bstr_len(h_existing->value)
415                 + 2 + bstr_len(h->value));
416         if (new_value == NULL) {
417             bstr_free(h->name);
418             bstr_free(h->value);
419             free(h);
420             return HTP_ERROR;
421         }
422 
423         h_existing->value = new_value;
424         bstr_add_mem_noex(h_existing->value, ", ", 2);
425         bstr_add_noex(h_existing->value, h->value);
426 
427         // The header is no longer needed.
428         bstr_free(h->name);
429         bstr_free(h->value);
430         free(h);
431 
432         // Keep track of same-name headers.
433         h_existing->flags |= HTP_MULTIPART_PART_HEADER_REPEATED;
434         part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_REPEATED;
435     } else {
436         // Add as a new header.
437         if (htp_table_add(part->headers, h->name, h) != HTP_OK) {
438             bstr_free(h->value);
439             bstr_free(h->name);
440             free(h);
441             return HTP_ERROR;
442         }
443     }
444 
445     return HTP_OK;
446 }
447 
448 /**
449  * Creates a new Multipart part.
450  *
451  * @param[in] parser
452  * @return New part instance, or NULL on memory allocation failure.
453  */
htp_mpart_part_create(htp_mpartp_t * parser)454 htp_multipart_part_t *htp_mpart_part_create(htp_mpartp_t *parser) {
455     htp_multipart_part_t * part = calloc(1, sizeof (htp_multipart_part_t));
456     if (part == NULL) return NULL;
457 
458     part->headers = htp_table_create(4);
459     if (part->headers == NULL) {
460         free(part);
461         return NULL;
462     }
463 
464     part->parser = parser;
465     bstr_builder_clear(parser->part_data_pieces);
466     bstr_builder_clear(parser->part_header_pieces);
467 
468     return part;
469 }
470 
471 /**
472  * Destroys a part.
473  *
474  * @param[in] part
475  * @param[in] gave_up_data
476  */
htp_mpart_part_destroy(htp_multipart_part_t * part,int gave_up_data)477 void htp_mpart_part_destroy(htp_multipart_part_t *part, int gave_up_data) {
478     if (part == NULL) return;
479 
480     if (part->file != NULL) {
481         bstr_free(part->file->filename);
482 
483         if (part->file->tmpname != NULL) {
484             unlink(part->file->tmpname);
485             free(part->file->tmpname);
486         }
487 
488         free(part->file);
489         part->file = NULL;
490     }
491 
492     if ((!gave_up_data) || (part->type != MULTIPART_PART_TEXT)) {
493         bstr_free(part->name);
494         bstr_free(part->value);
495     }
496 
497     bstr_free(part->content_type);
498 
499     if (part->headers != NULL) {
500         htp_header_t *h = NULL;
501         for (size_t i = 0, n = htp_table_size(part->headers); i < n; i++) {
502             h = htp_table_get_index(part->headers, i, NULL);
503             bstr_free(h->name);
504             bstr_free(h->value);
505             free(h);
506         }
507 
508         htp_table_destroy(part->headers);
509     }
510 
511     free(part);
512 }
513 
514 /**
515  * Finalizes part processing.
516  *
517  * @param[in] part
518  * @return HTP_OK on success, HTP_ERROR on failure.
519  */
htp_mpart_part_finalize_data(htp_multipart_part_t * part)520 htp_status_t htp_mpart_part_finalize_data(htp_multipart_part_t *part) {
521     // Determine if this part is the epilogue.
522 
523     if (part->parser->multipart.flags & HTP_MULTIPART_SEEN_LAST_BOUNDARY) {
524         if (part->type == MULTIPART_PART_UNKNOWN) {
525             // Assume that the unknown part after the last boundary is the epilogue.
526             part->parser->current_part->type = MULTIPART_PART_EPILOGUE;
527 
528             // But if we've already seen a part we thought was the epilogue,
529             // raise HTP_MULTIPART_PART_UNKNOWN. Multiple epilogues are not allowed.
530             if (part->parser->multipart.flags & HTP_MULTIPART_HAS_EPILOGUE) {
531                 part->parser->multipart.flags |= HTP_MULTIPART_PART_UNKNOWN;
532             }
533 
534             part->parser->multipart.flags |= HTP_MULTIPART_HAS_EPILOGUE;
535         } else {
536             part->parser->multipart.flags |= HTP_MULTIPART_PART_AFTER_LAST_BOUNDARY;
537         }
538     }
539 
540     // Sanity checks.
541 
542     // Have we seen complete part headers? If we have not, that means that the part ended prematurely.
543     if ((part->parser->current_part->type != MULTIPART_PART_EPILOGUE) && (part->parser->current_part_mode != MODE_DATA)) {
544         part->parser->multipart.flags |= HTP_MULTIPART_PART_INCOMPLETE;
545     }
546 
547     // Have we been able to determine the part type? If not, this means
548     // that the part did not contain the C-D header.
549     if (part->type == MULTIPART_PART_UNKNOWN) {
550         part->parser->multipart.flags |= HTP_MULTIPART_PART_UNKNOWN;
551     }
552 
553     // Finalize part value.
554 
555     if (part->type == MULTIPART_PART_FILE) {
556         // Notify callbacks about the end of the file.
557         htp_mpartp_run_request_file_data_hook(part, NULL, 0);
558 
559         // If we are storing the file to disk, close the file descriptor.
560         if (part->file->fd != -1) {
561             close(part->file->fd);
562         }
563     } else {
564         // Combine value pieces into a single buffer.
565         if (bstr_builder_size(part->parser->part_data_pieces) > 0) {
566             part->value = bstr_builder_to_str(part->parser->part_data_pieces);
567             bstr_builder_clear(part->parser->part_data_pieces);
568         }
569     }
570 
571     return HTP_OK;
572 }
573 
htp_mpartp_run_request_file_data_hook(htp_multipart_part_t * part,const unsigned char * data,size_t len)574 htp_status_t htp_mpartp_run_request_file_data_hook(htp_multipart_part_t *part, const unsigned char *data, size_t len) {
575     if (part->parser->cfg == NULL) return HTP_OK;
576 
577     // Keep track of the file length.
578     part->file->len += len;
579 
580     // Package data for the callbacks.
581     htp_file_data_t file_data;
582     file_data.file = part->file;
583     file_data.data = data;
584     file_data.len = (const size_t) len;
585 
586     // Send data to callbacks
587     htp_status_t rc = htp_hook_run_all(part->parser->cfg->hook_request_file_data, &file_data);
588     if (rc != HTP_OK) return rc;
589 
590     return HTP_OK;
591 }
592 
593 /**
594  * Handles part data.
595  *
596  * @param[in] part
597  * @param[in] data
598  * @param[in] len
599  * @param[in] is_line
600  * @return HTP_OK on success, HTP_ERROR on failure.
601  */
htp_mpart_part_handle_data(htp_multipart_part_t * part,const unsigned char * data,size_t len,int is_line)602 htp_status_t htp_mpart_part_handle_data(htp_multipart_part_t *part, const unsigned char *data, size_t len, int is_line) {
603     #if HTP_DEBUG
604     fprintf(stderr, "Part type %d mode %d is_line %d\n", part->type, part->parser->current_part_mode, is_line);
605     fprint_raw_data(stderr, "htp_mpart_part_handle_data: data chunk", data, len);
606     #endif
607 
608     // Keep track of raw part length.
609     part->len += len;
610 
611     // If we're processing a part that came after the last boundary, then we're not sure if it
612     // is the epilogue part or some other part (in case of evasion attempt). For that reason we
613     // will keep all its data in the part_data_pieces structure. If it ends up not being the
614     // epilogue, this structure will be cleared.
615     if ((part->parser->multipart.flags & HTP_MULTIPART_SEEN_LAST_BOUNDARY) && (part->type == MULTIPART_PART_UNKNOWN)) {
616         bstr_builder_append_mem(part->parser->part_data_pieces, data, len);
617     }
618 
619     if (part->parser->current_part_mode == MODE_LINE) {
620         // Line mode.
621 
622         if (is_line) {
623             // End of the line.
624 
625             bstr *line = NULL;
626 
627             // If this line came to us in pieces, combine them now into a single buffer.
628             if (bstr_builder_size(part->parser->part_header_pieces) > 0) {
629                 bstr_builder_append_mem(part->parser->part_header_pieces, data, len);
630                 line = bstr_builder_to_str(part->parser->part_header_pieces);
631                 if (line == NULL) return HTP_ERROR;
632                 bstr_builder_clear(part->parser->part_header_pieces);
633 
634                 data = bstr_ptr(line);
635                 len = bstr_len(line);
636             }
637 
638             // Ignore the line endings.
639             if (len > 1) {
640                 if (data[len - 1] == LF) len--;
641                 if (data[len - 1] == CR) len--;
642             } else if (len > 0) {
643                 if (data[len - 1] == LF) len--;
644             }
645 
646             // Is it an empty line?
647             if (len == 0) {
648                 // Empty line; process headers and switch to data mode.
649 
650                 // Process the pending header, if any.
651                 if (part->parser->pending_header_line != NULL) {
652                     if (htp_mpartp_parse_header(part, bstr_ptr(part->parser->pending_header_line),
653                             bstr_len(part->parser->pending_header_line)) == HTP_ERROR)
654                     {
655                         bstr_free(line);
656                         return HTP_ERROR;
657                     }
658 
659                     bstr_free(part->parser->pending_header_line);
660                     part->parser->pending_header_line = NULL;
661                 }
662 
663                 if (htp_mpart_part_process_headers(part) == HTP_ERROR) {
664                     bstr_free(line);
665                     return HTP_ERROR;
666                 }
667 
668                 part->parser->current_part_mode = MODE_DATA;
669                 bstr_builder_clear(part->parser->part_header_pieces);
670 
671                 if (part->file != NULL) {
672                     // Changing part type because we have a filename.
673                     part->type = MULTIPART_PART_FILE;
674 
675                     if ((part->parser->extract_files) && (part->parser->file_count < part->parser->extract_limit)) {
676                         char buf[255];
677 
678                         strncpy(buf, part->parser->extract_dir, 254);
679                         strncat(buf, "/libhtp-multipart-file-XXXXXX", 254 - strlen(buf));
680 
681                         part->file->tmpname = strdup(buf);
682                         if (part->file->tmpname == NULL) {
683                             bstr_free(line);
684                             return HTP_ERROR;
685                         }
686 
687                         mode_t previous_mask = umask(S_IXUSR | S_IRWXG | S_IRWXO);
688                         part->file->fd = mkstemp(part->file->tmpname);
689                         umask(previous_mask);
690 
691                         if (part->file->fd < 0) {
692                             bstr_free(line);
693                             return HTP_ERROR;
694                         }
695 
696                         part->parser->file_count++;
697                     }
698                 } else if (part->name != NULL) {
699                     // Changing part type because we have a name.
700                     part->type = MULTIPART_PART_TEXT;
701                     bstr_builder_clear(part->parser->part_data_pieces);
702                 } else {
703                     // Do nothing; the type stays MULTIPART_PART_UNKNOWN.
704                 }
705             } else {
706                 // Not an empty line.
707 
708                 // Is there a pending header?
709                 if (part->parser->pending_header_line == NULL) {
710                     if (line != NULL) {
711                         part->parser->pending_header_line = line;
712                         line = NULL;
713                     } else {
714                         part->parser->pending_header_line = bstr_dup_mem(data, len);
715                         if (part->parser->pending_header_line == NULL) return HTP_ERROR;
716                     }
717                 } else {
718                     // Is this a folded line?
719                     if (isspace(data[0])) {
720                         // Folding; add to the existing line.
721                         part->parser->multipart.flags |= HTP_MULTIPART_PART_HEADER_FOLDING;
722                         part->parser->pending_header_line = bstr_add_mem(part->parser->pending_header_line, data, len);
723                         if (part->parser->pending_header_line == NULL) {
724                             bstr_free(line);
725                             return HTP_ERROR;
726                         }
727                     } else {
728                         // Process the pending header line.
729                         if (htp_mpartp_parse_header(part, bstr_ptr(part->parser->pending_header_line),
730                                 bstr_len(part->parser->pending_header_line)) == HTP_ERROR)
731                         {
732                             bstr_free(line);
733                             return HTP_ERROR;
734                         }
735 
736                         bstr_free(part->parser->pending_header_line);
737 
738                         if (line != NULL) {
739                             part->parser->pending_header_line = line;
740                             line = NULL;
741                         } else {
742                             part->parser->pending_header_line = bstr_dup_mem(data, len);
743                             if (part->parser->pending_header_line == NULL) return HTP_ERROR;
744                         }
745                     }
746                 }
747             }
748 
749             bstr_free(line);
750             line = NULL;
751         } else {
752             // Not end of line; keep the data chunk for later.
753             bstr_builder_append_mem(part->parser->part_header_pieces, data, len);
754         }
755     } else {
756         // Data mode; keep the data chunk for later (but not if it is a file).
757         switch (part->type) {
758             case MULTIPART_PART_EPILOGUE:
759             case MULTIPART_PART_PREAMBLE:
760             case MULTIPART_PART_TEXT:
761             case MULTIPART_PART_UNKNOWN:
762                 // Make a copy of the data in RAM.
763                 bstr_builder_append_mem(part->parser->part_data_pieces, data, len);
764                 break;
765 
766             case MULTIPART_PART_FILE:
767                 // Invoke file data callbacks.
768                 htp_mpartp_run_request_file_data_hook(part, data, len);
769 
770                 // Optionally, store the data in a file.
771                 if (part->file->fd != -1) {
772                     if (write(part->file->fd, data, len) < 0) {
773                         return HTP_ERROR;
774                     }
775                 }
776                 break;
777 
778             default:
779                 // Internal error.
780                 return HTP_ERROR;
781                 break;
782         }
783     }
784 
785     return HTP_OK;
786 }
787 
788 /**
789  * Handles data, creating new parts as necessary.
790  *
791  * @param[in] mpartp
792  * @param[in] data
793  * @param[in] len
794  * @param[in] is_line
795  * @return HTP_OK on success, HTP_ERROR on failure.
796  */
htp_mpartp_handle_data(htp_mpartp_t * parser,const unsigned char * data,size_t len,int is_line)797 static htp_status_t htp_mpartp_handle_data(htp_mpartp_t *parser, const unsigned char *data, size_t len, int is_line) {
798     if (len == 0) return HTP_OK;
799 
800     // Do we have a part already?
801     if (parser->current_part == NULL) {
802         // Create a new part.
803         parser->current_part = htp_mpart_part_create(parser);
804         if (parser->current_part == NULL) return HTP_ERROR;
805 
806         if (parser->multipart.boundary_count == 0) {
807             // We haven't seen a boundary yet, so this must be the preamble part.
808             parser->current_part->type = MULTIPART_PART_PREAMBLE;
809             parser->multipart.flags |= HTP_MULTIPART_HAS_PREAMBLE;
810             parser->current_part_mode = MODE_DATA;
811         } else {
812             // Part after preamble.
813             parser->current_part_mode = MODE_LINE;
814         }
815 
816         // Add part to the list.
817         htp_list_push(parser->multipart.parts, parser->current_part);
818 
819         #ifdef HTP_DEBUG
820         fprintf(stderr, "Created new part type %d\n", parser->current_part->type);
821         #endif
822     }
823 
824     // Send data to the part.
825     return htp_mpart_part_handle_data(parser->current_part, data, len, is_line);
826 }
827 
828 /**
829  * Handles a boundary event, which means that it will finalize a part if one exists.
830  *
831  * @param[in] mpartp
832  * @return HTP_OK on success, HTP_ERROR on failure.
833  */
htp_mpartp_handle_boundary(htp_mpartp_t * parser)834 static htp_status_t htp_mpartp_handle_boundary(htp_mpartp_t *parser) {
835     #if HTP_DEBUG
836     fprintf(stderr, "htp_mpartp_handle_boundary\n");
837     #endif
838 
839     if (parser->current_part != NULL) {
840         if (htp_mpart_part_finalize_data(parser->current_part) != HTP_OK) {
841             return HTP_ERROR;
842         }
843 
844         // We're done with this part
845         parser->current_part = NULL;
846 
847         // Revert to line mode
848         parser->current_part_mode = MODE_LINE;
849     }
850 
851     return HTP_OK;
852 }
853 
htp_mpartp_init_boundary(htp_mpartp_t * parser,unsigned char * data,size_t len)854 static htp_status_t htp_mpartp_init_boundary(htp_mpartp_t *parser, unsigned char *data, size_t len) {
855     if ((parser == NULL) || (data == NULL)) return HTP_ERROR;
856 
857     // Copy the boundary and convert it to lowercase.
858 
859     parser->multipart.boundary_len = len + 4;
860     parser->multipart.boundary = malloc(parser->multipart.boundary_len + 1);
861     if (parser->multipart.boundary == NULL) return HTP_ERROR;
862 
863     parser->multipart.boundary[0] = CR;
864     parser->multipart.boundary[1] = LF;
865     parser->multipart.boundary[2] = '-';
866     parser->multipart.boundary[3] = '-';
867 
868     for (size_t i = 0; i < len; i++) {
869         parser->multipart.boundary[i + 4] = data[i];
870     }
871 
872     parser->multipart.boundary[parser->multipart.boundary_len] = '\0';
873 
874     // We're starting in boundary-matching mode. The first boundary can appear without the
875     // CRLF, and our starting state expects that. If we encounter non-boundary data, the
876     // state will switch to data mode. Then, if the data is CRLF or LF, we will go back
877     // to boundary matching. Thus, we handle all the possibilities.
878 
879     parser->parser_state = STATE_BOUNDARY;
880     parser->boundary_match_pos = 2;
881 
882     return HTP_OK;
883 }
884 
htp_mpartp_create(htp_cfg_t * cfg,bstr * boundary,uint64_t flags)885 htp_mpartp_t *htp_mpartp_create(htp_cfg_t *cfg, bstr *boundary, uint64_t flags) {
886     if ((cfg == NULL) || (boundary == NULL)) return NULL;
887 
888     htp_mpartp_t *parser = calloc(1, sizeof (htp_mpartp_t));
889     if (parser == NULL) return NULL;
890 
891     parser->cfg = cfg;
892 
893     parser->boundary_pieces = bstr_builder_create();
894     if (parser->boundary_pieces == NULL) {
895         htp_mpartp_destroy(parser);
896         return NULL;
897     }
898 
899     parser->part_data_pieces = bstr_builder_create();
900     if (parser->part_data_pieces == NULL) {
901         htp_mpartp_destroy(parser);
902         return NULL;
903     }
904 
905     parser->part_header_pieces = bstr_builder_create();
906     if (parser->part_header_pieces == NULL) {
907         htp_mpartp_destroy(parser);
908         return NULL;
909     }
910 
911     parser->multipart.parts = htp_list_create(64);
912     if (parser->multipart.parts == NULL) {
913         htp_mpartp_destroy(parser);
914         return NULL;
915     }
916 
917     parser->multipart.flags = flags;
918     parser->parser_state = STATE_INIT;
919     parser->extract_files = cfg->extract_request_files;
920     parser->extract_dir = cfg->tmpdir;
921     if (cfg->extract_request_files_limit >= 0) {
922         parser->extract_limit = cfg->extract_request_files_limit;
923     } else {
924         parser->extract_limit = DEFAULT_FILE_EXTRACT_LIMIT;
925     }
926     parser->handle_data = htp_mpartp_handle_data;
927     parser->handle_boundary = htp_mpartp_handle_boundary;
928 
929     // Initialize the boundary.
930     htp_status_t rc = htp_mpartp_init_boundary(parser, bstr_ptr(boundary), bstr_len(boundary));
931     if (rc != HTP_OK) {
932         htp_mpartp_destroy(parser);
933         return NULL;
934     }
935 
936     // On success, the ownership of the boundary parameter
937     // is transferred to us. We made a copy, and so we
938     // don't need it any more.
939     bstr_free(boundary);
940 
941     return parser;
942 }
943 
htp_mpartp_destroy(htp_mpartp_t * parser)944 void htp_mpartp_destroy(htp_mpartp_t *parser) {
945     if (parser == NULL) return;
946 
947     if (parser->multipart.boundary != NULL) {
948         free(parser->multipart.boundary);
949     }
950 
951     bstr_builder_destroy(parser->boundary_pieces);
952     bstr_builder_destroy(parser->part_header_pieces);
953     bstr_free(parser->pending_header_line);
954     bstr_builder_destroy(parser->part_data_pieces);
955 
956     // Free the parts.
957     if (parser->multipart.parts != NULL) {
958         for (size_t i = 0, n = htp_list_size(parser->multipart.parts); i < n; i++) {
959             htp_multipart_part_t * part = htp_list_get(parser->multipart.parts, i);
960             htp_mpart_part_destroy(part, parser->gave_up_data);
961         }
962 
963         htp_list_destroy(parser->multipart.parts);
964     }
965 
966     free(parser);
967 }
968 
969 /**
970  * Processes set-aside data.
971  *
972  * @param[in] mpartp
973  * @param[in] data
974  * @param[in] pos
975  * @param[in] startpos
976  * @param[in] return_pos
977  * @param[in] matched
978  * @return HTP_OK on success, HTP_ERROR on failure.
979  */
htp_martp_process_aside(htp_mpartp_t * parser,int matched)980 static htp_status_t htp_martp_process_aside(htp_mpartp_t *parser, int matched) {
981     // The stored data pieces can contain up to one line. If we're in data mode and there
982     // was no boundary match, things are straightforward -- we process everything as data.
983     // If there was a match, we need to take care to not send the line ending as data, nor
984     // anything that follows (because it's going to be a part of the boundary). Similarly,
985     // when we are in line mode, we need to split the first data chunk, processing the first
986     // part as line and the second part as data.
987 
988     #ifdef HTP_DEBUG
989     fprintf(stderr, "mpartp_process_aside matched %d current_part_mode %d\n", matched, parser->current_part_mode);
990     #endif
991 
992     // Do we need to do any chunk splitting?
993     if (matched || (parser->current_part_mode == MODE_LINE)) {
994         // Line mode or boundary match
995 
996         // Process the CR byte, if set aside.
997         if ((!matched) && (parser->cr_aside)) {
998             // Treat as part data, when there is not a match.
999             parser->handle_data(parser, (unsigned char *) &"\r", 1, /* not a line */ 0);
1000             parser->cr_aside = 0;
1001         } else {
1002             // Treat as boundary, when there is a match.
1003             parser->cr_aside = 0;
1004         }
1005 
1006         // We know that we went to match a boundary because
1007         // we saw a new line. Now we have to find that line and
1008         // process it. It's either going to be in the current chunk,
1009         // or in the first stored chunk.
1010         if (bstr_builder_size(parser->boundary_pieces) > 0) {
1011             int first = 1;
1012             for (size_t i = 0, n = htp_list_size(parser->boundary_pieces->pieces); i < n; i++) {
1013                 bstr *b = htp_list_get(parser->boundary_pieces->pieces, i);
1014 
1015                 if (first) {
1016                     first = 0;
1017 
1018                     // Split the first chunk.
1019 
1020                     if (!matched) {
1021                         // In line mode, we are OK with line endings.
1022                         parser->handle_data(parser, bstr_ptr(b), parser->boundary_candidate_pos, /* line */ 1);
1023                     } else {
1024                         // But if there was a match, the line ending belongs to the boundary.
1025                         unsigned char *dx = bstr_ptr(b);
1026                         size_t lx = parser->boundary_candidate_pos;
1027 
1028                         // Remove LF or CRLF.
1029                         if ((lx > 0) && (dx[lx - 1] == LF)) {
1030                             lx--;
1031                             // Remove CR.
1032                             if ((lx > 0) && (dx[lx - 1] == CR)) {
1033                                 lx--;
1034                             }
1035                         }
1036 
1037                         parser->handle_data(parser, dx, lx, /* not a line */ 0);
1038                     }
1039 
1040                     // The second part of the split chunks belongs to the boundary
1041                     // when matched, data otherwise.
1042                     if (!matched) {
1043                         parser->handle_data(parser, bstr_ptr(b) + parser->boundary_candidate_pos,
1044                                 bstr_len(b) - parser->boundary_candidate_pos, /* not a line */ 0);
1045                     }
1046                 } else {
1047                     // Do not send data if there was a boundary match. The stored
1048                     // data belongs to the boundary.
1049                     if (!matched) {
1050                         parser->handle_data(parser, bstr_ptr(b), bstr_len(b), /* not a line */ 0);
1051                     }
1052                 }
1053             }
1054 
1055             bstr_builder_clear(parser->boundary_pieces);
1056         }
1057     } else {
1058         // Data mode and no match.
1059 
1060         // In data mode, we process the lone CR byte as data.
1061         if (parser->cr_aside) {
1062             parser->handle_data(parser, (const unsigned char *)&"\r", 1, /* not a line */ 0);
1063             parser->cr_aside = 0;
1064         }
1065 
1066         // We then process any pieces that we might have stored, also as data.
1067         if (bstr_builder_size(parser->boundary_pieces) > 0) {
1068             for (size_t i = 0, n = htp_list_size(parser->boundary_pieces->pieces); i < n; i++) {
1069                 bstr *b = htp_list_get(parser->boundary_pieces->pieces, i);
1070                 parser->handle_data(parser, bstr_ptr(b), bstr_len(b), /* not a line */ 0);
1071             }
1072 
1073             bstr_builder_clear(parser->boundary_pieces);
1074         }
1075     }
1076 
1077     return HTP_OK;
1078 }
1079 
htp_mpartp_finalize(htp_mpartp_t * parser)1080 htp_status_t htp_mpartp_finalize(htp_mpartp_t *parser) {
1081     if (parser->current_part != NULL) {
1082         // Process buffered data, if any.
1083         htp_martp_process_aside(parser, 0);
1084 
1085         // Finalize the last part.
1086         if (htp_mpart_part_finalize_data(parser->current_part) != HTP_OK) return HTP_ERROR;
1087 
1088         // It is OK to end abruptly in the epilogue part, but not in any other.
1089         if (parser->current_part->type != MULTIPART_PART_EPILOGUE) {
1090             parser->multipart.flags |= HTP_MULTIPART_INCOMPLETE;
1091         }
1092     }
1093 
1094     bstr_builder_clear(parser->boundary_pieces);
1095 
1096     return HTP_OK;
1097 }
1098 
htp_mpartp_parse(htp_mpartp_t * parser,const void * _data,size_t len)1099 htp_status_t htp_mpartp_parse(htp_mpartp_t *parser, const void *_data, size_t len) {
1100     unsigned char *data = (unsigned char *) _data;
1101 
1102     // The current position in the entire input buffer.
1103     size_t pos = 0;
1104 
1105     // The position of the first unprocessed byte of data. We split the
1106     // input buffer into smaller chunks, according to their purpose. Once
1107     // an entire such smaller chunk is processed, we move to the next
1108     // and update startpos.
1109     size_t startpos = 0;
1110 
1111     // The position of the (possible) boundary. We investigate for possible
1112     // boundaries whenever we encounter CRLF or just LF. If we don't find a
1113     // boundary we need to go back, and this is what data_return_pos helps with.
1114     size_t data_return_pos = 0;
1115 
1116     #if HTP_DEBUG
1117     fprint_raw_data(stderr, "htp_mpartp_parse: data chunk", data, len);
1118     #endif
1119 
1120     // While there's data in the input buffer.
1121 
1122     while (pos < len) {
1123 
1124 STATE_SWITCH:
1125         #if HTP_DEBUG
1126         fprintf(stderr, "htp_mpartp_parse: state %d pos %zd startpos %zd\n", parser->parser_state, pos, startpos);
1127         #endif
1128 
1129         switch (parser->parser_state) {
1130 
1131             case STATE_INIT:
1132                 // Incomplete initialization.
1133                 return HTP_ERROR;
1134                 break;
1135 
1136             case STATE_DATA: // Handle part data.
1137 
1138                 // While there's data in the input buffer.
1139 
1140                 while (pos < len) {
1141                     // Check for a CRLF-terminated line.
1142                     if (data[pos] == CR) {
1143                         // We have a CR byte.
1144 
1145                         // Is this CR the last byte in the input buffer?
1146                         if (pos + 1 == len) {
1147                             // We have CR as the last byte in input. We are going to process
1148                             // what we have in the buffer as data, except for the CR byte,
1149                             // which we're going to leave for later. If it happens that a
1150                             // CR is followed by a LF and then a boundary, the CR is going
1151                             // to be discarded.
1152                             pos++; // Advance over CR.
1153                             parser->cr_aside = 1;
1154                         } else {
1155                             // We have CR and at least one more byte in the buffer, so we
1156                             // are able to test for the LF byte too.
1157                             if (data[pos + 1] == LF) {
1158                                 pos += 2; // Advance over CR and LF.
1159 
1160                                 parser->multipart.flags |= HTP_MULTIPART_CRLF_LINE;
1161 
1162                                 // Prepare to switch to boundary testing.
1163                                 data_return_pos = pos;
1164                                 parser->boundary_candidate_pos = pos - startpos;
1165                                 parser->boundary_match_pos = 2; // After LF; position of the first dash.
1166                                 parser->parser_state = STATE_BOUNDARY;
1167 
1168                                 goto STATE_SWITCH;
1169                             } else {
1170                                 // This is not a new line; advance over the
1171                                 // byte and clear the CR set-aside flag.
1172                                 pos++;
1173                                 parser->cr_aside = 0;
1174                             }
1175                         }
1176                     } else if (data[pos] == LF) { // Check for a LF-terminated line.
1177                         pos++; // Advance over LF.
1178 
1179                         // Did we have a CR in the previous input chunk?
1180                         if (parser->cr_aside == 0) {
1181                             parser->multipart.flags |= HTP_MULTIPART_LF_LINE;
1182                         } else {
1183                             parser->multipart.flags |= HTP_MULTIPART_CRLF_LINE;
1184                         }
1185 
1186                         // Prepare to switch to boundary testing.
1187                         data_return_pos = pos;
1188                         parser->boundary_candidate_pos = pos - startpos;
1189                         parser->boundary_match_pos = 2; // After LF; position of the first dash.
1190                         parser->parser_state = STATE_BOUNDARY;
1191 
1192                         goto STATE_SWITCH;
1193                     } else {
1194                         // Take one byte from input
1195                         pos++;
1196 
1197                         // Earlier we might have set aside a CR byte not knowing if the next
1198                         // byte is a LF. Now we know that it is not, and so we can release the CR.
1199                         if (parser->cr_aside) {
1200                             parser->handle_data(parser, (unsigned char *) &"\r", 1, /* not a line */ 0);
1201                             parser->cr_aside = 0;
1202                         }
1203                     }
1204                 } // while
1205 
1206                 // No more data in the input buffer; process the data chunk.
1207                 parser->handle_data(parser, data + startpos, pos - startpos - parser->cr_aside, /* not a line */ 0);
1208 
1209                 break;
1210 
1211             case STATE_BOUNDARY: // Handle a possible boundary.
1212                 while (pos < len) {
1213                     #ifdef HTP_DEBUG
1214                     fprintf(stderr, "boundary (len %zd pos %zd char %d) data char %d\n", parser->multipart.boundary_len,
1215                             parser->boundary_match_pos, parser->multipart.boundary[parser->boundary_match_pos], tolower(data[pos]));
1216                     #endif
1217 
1218                     // Check if the bytes match.
1219                     if (!(data[pos] == parser->multipart.boundary[parser->boundary_match_pos])) {
1220                         // Boundary mismatch.
1221 
1222                         // Process stored (buffered) data.
1223                         htp_martp_process_aside(parser, /* no match */ 0);
1224 
1225                         // Return back where data parsing left off.
1226                         if (parser->current_part_mode == MODE_LINE) {
1227                             // In line mode, we process the line.
1228                             parser->handle_data(parser, data + startpos, data_return_pos - startpos, /* line */ 1);
1229                             startpos = data_return_pos;
1230                         } else {
1231                             // In data mode, we go back where we left off.
1232                             pos = data_return_pos;
1233                         }
1234 
1235                         parser->parser_state = STATE_DATA;
1236 
1237                         goto STATE_SWITCH;
1238                     }
1239 
1240                     // Consume one matched boundary byte
1241                     pos++;
1242                     parser->boundary_match_pos++;
1243 
1244                     // Have we seen all boundary bytes?
1245                     if (parser->boundary_match_pos == parser->multipart.boundary_len) {
1246                         // Boundary match!
1247 
1248                         // Process stored (buffered) data.
1249                         htp_martp_process_aside(parser, /* boundary match */ 1);
1250 
1251                         // Process data prior to the boundary in the current input buffer.
1252                         // Because we know this is the last chunk before boundary, we can
1253                         // remove the line endings.
1254                         size_t dlen = data_return_pos - startpos;
1255                         if ((dlen > 0) && (data[startpos + dlen - 1] == LF)) dlen--;
1256                         if ((dlen > 0) && (data[startpos + dlen - 1] == CR)) dlen--;
1257                         parser->handle_data(parser, data + startpos, dlen, /* line */ 1);
1258 
1259                         // Keep track of how many boundaries we've seen.
1260                         parser->multipart.boundary_count++;
1261 
1262                         if (parser->multipart.flags & HTP_MULTIPART_SEEN_LAST_BOUNDARY) {
1263                             parser->multipart.flags |= HTP_MULTIPART_PART_AFTER_LAST_BOUNDARY;
1264                         }
1265 
1266                         // Run boundary match.
1267                         parser->handle_boundary(parser);
1268 
1269                         // We now need to check if this is the last boundary in the payload
1270                         parser->parser_state = STATE_BOUNDARY_IS_LAST2;
1271 
1272                         goto STATE_SWITCH;
1273                     }
1274                 } // while
1275 
1276                 // No more data in the input buffer; store (buffer) the unprocessed
1277                 // part for later, for after we find out if this is a boundary.
1278                 bstr_builder_append_mem(parser->boundary_pieces, data + startpos, len - startpos);
1279 
1280                 break;
1281 
1282             case STATE_BOUNDARY_IS_LAST2:
1283                 // Examine the first byte after the last boundary character. If it is
1284                 // a dash, then we maybe processing the last boundary in the payload. If
1285                 // it is not, move to eat all bytes until the end of the line.
1286 
1287                 if (data[pos] == '-') {
1288                     // Found one dash, now go to check the next position.
1289                     pos++;
1290                     parser->parser_state = STATE_BOUNDARY_IS_LAST1;
1291                 } else {
1292                     // This is not the last boundary. Change state but
1293                     // do not advance the position, allowing the next
1294                     // state to process the byte.
1295                     parser->parser_state = STATE_BOUNDARY_EAT_LWS;
1296                 }
1297                 break;
1298 
1299             case STATE_BOUNDARY_IS_LAST1:
1300                 // Examine the byte after the first dash; expected to be another dash.
1301                 // If not, eat all bytes until the end of the line.
1302 
1303                 if (data[pos] == '-') {
1304                     // This is indeed the last boundary in the payload.
1305                     pos++;
1306                     parser->multipart.flags |= HTP_MULTIPART_SEEN_LAST_BOUNDARY;
1307                     parser->parser_state = STATE_BOUNDARY_EAT_LWS;
1308                 } else {
1309                     // The second character is not a dash, and so this is not
1310                     // the final boundary. Raise the flag for the first dash,
1311                     // and change state to consume the rest of the boundary line.
1312                     parser->multipart.flags |= HTP_MULTIPART_BBOUNDARY_NLWS_AFTER;
1313                     parser->parser_state = STATE_BOUNDARY_EAT_LWS;
1314                 }
1315                 break;
1316 
1317             case STATE_BOUNDARY_EAT_LWS:
1318                 if (data[pos] == CR) {
1319                     // CR byte, which could indicate a CRLF line ending.
1320                     pos++;
1321                     parser->parser_state = STATE_BOUNDARY_EAT_LWS_CR;
1322                 } else if (data[pos] == LF) {
1323                     // LF line ending; we're done with boundary processing; data bytes follow.
1324                     pos++;
1325                     startpos = pos;
1326                     parser->multipart.flags |= HTP_MULTIPART_LF_LINE;
1327                     parser->parser_state = STATE_DATA;
1328                 } else {
1329                     if (htp_is_lws(data[pos])) {
1330                         // Linear white space is allowed here.
1331                         parser->multipart.flags |= HTP_MULTIPART_BBOUNDARY_LWS_AFTER;
1332                         pos++;
1333                     } else {
1334                         // Unexpected byte; consume, but remain in the same state.
1335                         parser->multipart.flags |= HTP_MULTIPART_BBOUNDARY_NLWS_AFTER;
1336                         pos++;
1337                     }
1338                 }
1339                 break;
1340 
1341             case STATE_BOUNDARY_EAT_LWS_CR:
1342                 if (data[pos] == LF) {
1343                     // CRLF line ending; we're done with boundary processing; data bytes follow.
1344                     pos++;
1345                     startpos = pos;
1346                     parser->multipart.flags |= HTP_MULTIPART_CRLF_LINE;
1347                     parser->parser_state = STATE_DATA;
1348                 } else {
1349                     // Not a line ending; start again, but do not process this byte.
1350                     parser->multipart.flags |= HTP_MULTIPART_BBOUNDARY_NLWS_AFTER;
1351                     parser->parser_state = STATE_BOUNDARY_EAT_LWS;
1352                 }
1353                 break;
1354         } // switch
1355     }
1356 
1357     return HTP_OK;
1358 }
1359 
htp_mpartp_validate_boundary(bstr * boundary,uint64_t * flags)1360 static void htp_mpartp_validate_boundary(bstr *boundary, uint64_t *flags) {
1361     /*
1362 
1363     RFC 1341:
1364 
1365     The only mandatory parameter for the multipart  Content-Type
1366     is  the  boundary  parameter,  which  consists  of  1  to 70
1367     characters from a set of characters known to be very  robust
1368     through  email  gateways,  and  NOT ending with white space.
1369     (If a boundary appears to end with white  space,  the  white
1370     space  must be presumed to have been added by a gateway, and
1371     should  be  deleted.)   It  is  formally  specified  by  the
1372     following BNF:
1373 
1374     boundary := 0*69<bchars> bcharsnospace
1375 
1376     bchars := bcharsnospace / " "
1377 
1378     bcharsnospace :=    DIGIT / ALPHA / "'" / "(" / ")" / "+" / "_"
1379                           / "," / "-" / "." / "/" / ":" / "=" / "?"
1380      */
1381 
1382     /*
1383      Chrome: Content-Type: multipart/form-data; boundary=----WebKitFormBoundaryT4AfwQCOgIxNVwlD
1384     Firefox: Content-Type: multipart/form-data; boundary=---------------------------21071316483088
1385        MSIE: Content-Type: multipart/form-data; boundary=---------------------------7dd13e11c0452
1386       Opera: Content-Type: multipart/form-data; boundary=----------2JL5oh7QWEDwyBllIRc7fh
1387      Safari: Content-Type: multipart/form-data; boundary=----WebKitFormBoundaryre6zL3b0BelnTY5S
1388      */
1389 
1390     unsigned char *data = bstr_ptr(boundary);
1391     size_t len = bstr_len(boundary);
1392 
1393     // The RFC allows up to 70 characters. In real life,
1394     // boundaries tend to be shorter.
1395     if ((len == 0) || (len > 70)) {
1396         *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
1397     }
1398 
1399     // Check boundary characters. This check is stricter than the
1400     // RFC, which seems to allow many separator characters.
1401     size_t pos = 0;
1402     while (pos < len) {
1403         if (!(((data[pos] >= '0') && (data[pos] <= '9'))
1404                 || ((data[pos] >= 'a') && (data[pos] <= 'z'))
1405                 || ((data[pos] >= 'A') && (data[pos] <= 'Z'))
1406                 || (data[pos] == '-'))) {
1407 
1408             switch (data[pos]) {
1409                 case '\'':
1410                 case '(':
1411                 case ')':
1412                 case '+':
1413                 case '_':
1414                 case ',':
1415                 case '.':
1416                 case '/':
1417                 case ':':
1418                 case '=':
1419                 case '?':
1420                     // These characters are allowed by the RFC, but not common.
1421                     *flags |= HTP_MULTIPART_HBOUNDARY_UNUSUAL;
1422                     break;
1423 
1424                 default:
1425                     // Invalid character.
1426                     *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
1427                     break;
1428             }
1429         }
1430 
1431         pos++;
1432     }
1433 }
1434 
htp_mpartp_validate_content_type(bstr * content_type,uint64_t * flags)1435 static void htp_mpartp_validate_content_type(bstr *content_type, uint64_t *flags) {
1436     unsigned char *data = bstr_ptr(content_type);
1437     size_t len = bstr_len(content_type);
1438     size_t counter = 0;
1439 
1440     while (len > 0) {
1441         int i = bstr_util_mem_index_of_c_nocase(data, len, "boundary");
1442         if (i == -1) break;
1443 
1444         data = data + i;
1445         len = len - i;
1446 
1447         // In order to work around the fact that WebKit actually uses
1448         // the word "boundary" in their boundary, we also require one
1449         // equals character the follow the words.
1450         // "multipart/form-data; boundary=----WebKitFormBoundaryT4AfwQCOgIxNVwlD"
1451         if (memchr(data, '=', len) == NULL) break;
1452 
1453         counter++;
1454 
1455         // Check for case variations.
1456         for (size_t j = 0; j < 8; j++) {
1457             if (!((*data >= 'a') && (*data <= 'z'))) {
1458                 *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
1459             }
1460 
1461             data++;
1462             len--;
1463         }
1464     }
1465 
1466     // How many boundaries have we seen?
1467     if (counter > 1) {
1468         *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
1469     }
1470 }
1471 
htp_mpartp_find_boundary(bstr * content_type,bstr ** boundary,uint64_t * flags)1472 htp_status_t htp_mpartp_find_boundary(bstr *content_type, bstr **boundary, uint64_t *flags) {
1473     if ((content_type == NULL) || (boundary == NULL) || (flags == NULL)) return HTP_ERROR;
1474 
1475     // Our approach is to ignore the MIME type and instead just look for
1476     // the boundary. This approach is more reliable in the face of various
1477     // evasion techniques that focus on submitting invalid MIME types.
1478 
1479     // Reset flags.
1480     *flags = 0;
1481 
1482     // Look for the boundary, case insensitive.
1483     int i = bstr_index_of_c_nocase(content_type, "boundary");
1484     if (i == -1) return HTP_DECLINED;
1485 
1486     unsigned char *data = bstr_ptr(content_type) + i + 8;
1487     size_t len = bstr_len(content_type) - i - 8;
1488 
1489     // Look for the boundary value.
1490     size_t pos = 0;
1491     while ((pos < len) && (data[pos] != '=')) {
1492         if (htp_is_space(data[pos])) {
1493             // It is unusual to see whitespace before the equals sign.
1494             *flags |= HTP_MULTIPART_HBOUNDARY_UNUSUAL;
1495         } else {
1496             // But seeing a non-whitespace character may indicate evasion.
1497             *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
1498         }
1499 
1500         pos++;
1501     }
1502 
1503     if (pos >= len) {
1504         // No equals sign in the header.
1505         *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
1506         return HTP_DECLINED;
1507     }
1508 
1509     // Go over the '=' character.
1510     pos++;
1511 
1512     // Ignore any whitespace after the equals sign.
1513     while ((pos < len) && (htp_is_space(data[pos]))) {
1514         if (htp_is_space(data[pos])) {
1515             // It is unusual to see whitespace after
1516             // the equals sign.
1517             *flags |= HTP_MULTIPART_HBOUNDARY_UNUSUAL;
1518         }
1519 
1520         pos++;
1521     }
1522 
1523     if (pos >= len) {
1524         // No value after the equals sign.
1525         *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
1526         return HTP_DECLINED;
1527     }
1528 
1529     if (data[pos] == '"') {
1530         // Quoted boundary.
1531 
1532         // Possibly not very unusual, but let's see.
1533         *flags |= HTP_MULTIPART_HBOUNDARY_UNUSUAL;
1534 
1535         pos++; // Over the double quote.
1536         size_t startpos = pos; // Starting position of the boundary.
1537 
1538         // Look for the terminating double quote.
1539         while ((pos < len) && (data[pos] != '"')) pos++;
1540 
1541         if (pos >= len) {
1542             // Ran out of space without seeing
1543             // the terminating double quote.
1544             *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
1545 
1546             // Include the starting double quote in the boundary.
1547             startpos--;
1548         }
1549 
1550         *boundary = bstr_dup_mem(data + startpos, pos - startpos);
1551         if (*boundary == NULL) return HTP_ERROR;
1552 
1553         pos++; // Over the double quote.
1554     } else {
1555         // Boundary not quoted.
1556 
1557         size_t startpos = pos;
1558 
1559         // Find the end of the boundary. For the time being, we replicate
1560         // the behavior of PHP 5.4.x. This may result with a boundary that's
1561         // closer to what would be accepted in real life. Our subsequent
1562         // checks of boundary characters will catch irregularities.
1563         while ((pos < len) && (data[pos] != ',') && (data[pos] != ';') && (!htp_is_space(data[pos]))) pos++;
1564 
1565         *boundary = bstr_dup_mem(data + startpos, pos - startpos);
1566         if (*boundary == NULL) return HTP_ERROR;
1567     }
1568 
1569     // Check for a zero-length boundary.
1570     if (bstr_len(*boundary) == 0) {
1571         *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
1572         bstr_free(*boundary);
1573         *boundary = NULL;
1574         return HTP_DECLINED;
1575     }
1576 
1577     // Allow only whitespace characters after the boundary.
1578     int seen_space = 0, seen_non_space = 0;
1579 
1580     while (pos < len) {
1581         if (!htp_is_space(data[pos])) {
1582             seen_non_space = 1;
1583         } else {
1584             seen_space = 1;
1585         }
1586 
1587         pos++;
1588     }
1589 
1590     // Raise INVALID if we see any non-space characters,
1591     // but raise UNUSUAL if we see _only_ space characters.
1592     if (seen_non_space) {
1593         *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
1594     } else if (seen_space) {
1595         *flags |= HTP_MULTIPART_HBOUNDARY_UNUSUAL;
1596     }
1597 
1598     #ifdef HTP_DEBUG
1599     fprint_bstr(stderr, "Multipart boundary", *boundary);
1600     #endif
1601 
1602     // Validate boundary characters.
1603     htp_mpartp_validate_boundary(*boundary, flags);
1604 
1605     // Correlate with the MIME type. This might be a tad too
1606     // sensitive because it may catch non-browser access with sloppy
1607     // implementations, but let's go with it for now.
1608     if (bstr_begins_with_c(content_type, "multipart/form-data;") == 0) {
1609         *flags |= HTP_MULTIPART_HBOUNDARY_INVALID;
1610     }
1611 
1612     htp_mpartp_validate_content_type(content_type, flags);
1613 
1614     return HTP_OK;
1615 }
1616