1 /*************************************************************************** 2 * Copyright (c) 2009-2010 Open Information Security Foundation 3 * Copyright (c) 2010-2013 Qualys, Inc. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are 8 * met: 9 * 10 * - Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 13 * - Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 17 * - Neither the name of the Qualys, Inc. nor the names of its 18 * contributors may be used to endorse or promote products derived from 19 * this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 ***************************************************************************/ 33 34 /** 35 * @file 36 * @author Ivan Ristic <ivanr@webkreator.com> 37 */ 38 39 #ifndef _HTP_MULTIPART_PRIVATE_H 40 #define _HTP_MULTIPART_PRIVATE_H 41 42 #ifdef __cplusplus 43 extern "C" { 44 #endif 45 46 #include "htp_multipart.h" 47 48 #define CD_PARAM_OTHER 0 49 #define CD_PARAM_NAME 1 50 #define CD_PARAM_FILENAME 2 51 52 #define DEFAULT_FILE_EXTRACT_LIMIT 16 53 54 enum htp_part_mode_t { 55 /** When in line mode, the parser is handling part headers. */ 56 MODE_LINE = 0, 57 58 /** When in data mode, the parser is consuming part data. */ 59 MODE_DATA = 1 60 }; 61 62 enum htp_multipart_state_t { 63 /** Initial state, after the parser has been created but before the boundary initialized. */ 64 STATE_INIT = 0, 65 66 /** Processing data, waiting for a new line (which might indicate a new boundary). */ 67 STATE_DATA = 1, 68 69 /** Testing a potential boundary. */ 70 STATE_BOUNDARY = 2, 71 72 /** Checking the first byte after a boundary. */ 73 STATE_BOUNDARY_IS_LAST1 = 3, 74 75 /** Checking the second byte after a boundary. */ 76 STATE_BOUNDARY_IS_LAST2 = 4, 77 78 /** Consuming linear whitespace after a boundary. */ 79 STATE_BOUNDARY_EAT_LWS = 5, 80 81 /** Used after a CR byte is detected in STATE_BOUNDARY_EAT_LWS. */ 82 STATE_BOUNDARY_EAT_LWS_CR = 6 83 }; 84 85 struct htp_mpartp_t { 86 htp_multipart_t multipart; 87 88 htp_cfg_t *cfg; 89 90 int extract_files; 91 92 int extract_limit; 93 94 char *extract_dir; 95 96 int file_count; 97 98 // Parsing callbacks 99 100 int (*handle_data)(htp_mpartp_t *mpartp, const unsigned char *data, 101 size_t len, int line_end); 102 int (*handle_boundary)(htp_mpartp_t *mpartp); 103 104 // Internal parsing fields; move into a private structure 105 106 /** 107 * Parser state; one of MULTIPART_STATE_* constants. 108 */ 109 enum htp_multipart_state_t parser_state; 110 111 /** 112 * Keeps track of the current position in the boundary matching progress. 113 * When this field reaches boundary_len, we have a boundary match. 114 */ 115 size_t boundary_match_pos; 116 117 /** 118 * Pointer to the part that is currently being processed. 119 */ 120 htp_multipart_part_t *current_part; 121 122 /** 123 * This parser consists of two layers: the outer layer is charged with 124 * finding parts, and the internal layer handles part data. There is an 125 * interesting interaction between the two parsers. Because the 126 * outer layer is seeing every line (it has to, in order to test for 127 * boundaries), it also effectively also splits input into lines. The 128 * inner parser deals with two areas: first is the headers, which are 129 * line based, followed by binary data. When parsing headers, the inner 130 * parser can reuse the lines identified by the outer parser. In this 131 * variable we keep the current parsing mode of the part, which helps 132 * us process input data more efficiently. The possible values are 133 * MULTIPART_MODE_LINE and MULTIPART_MODE_DATA. 134 */ 135 enum htp_part_mode_t current_part_mode; 136 137 /** 138 * Used for buffering when a potential boundary is fragmented 139 * across many input data buffers. On a match, the data stored here is 140 * discarded. When there is no match, the buffer is processed as data 141 * (belonging to the currently active part). 142 */ 143 bstr_builder_t *boundary_pieces; 144 145 bstr_builder_t *part_header_pieces; 146 147 bstr *pending_header_line; 148 149 /** 150 * Stores text part pieces until the entire part is seen, at which 151 * point the pieces are assembled into a single buffer, and the 152 * builder cleared. 153 */ 154 bstr_builder_t *part_data_pieces; 155 156 /** 157 * The offset of the current boundary candidate, relative to the most 158 * recent data chunk (first unprocessed chunk of data). 159 */ 160 size_t boundary_candidate_pos; 161 162 /** 163 * When we encounter a CR as the last byte in a buffer, we don't know 164 * if the byte is part of a CRLF combination. If it is, then the CR 165 * might be a part of a boundary. But if it is not, it's current 166 * part's data. Because we know how to handle everything before the 167 * CR, we do, and we use this flag to indicate that a CR byte is 168 * effectively being buffered. This is probably a case of premature 169 * optimization, but I am going to leave it in for now. 170 */ 171 int cr_aside; 172 173 /** 174 * When set, indicates that this parser no longer owns names and 175 * values of MULTIPART_PART_TEXT parts. It is used to avoid data 176 * duplication when the parser is used by LibHTP internally. 177 */ 178 int gave_up_data; 179 }; 180 181 htp_status_t htp_mpartp_run_request_file_data_hook(htp_multipart_part_t *part, const unsigned char *data, size_t len); 182 183 htp_status_t htp_mpart_part_process_headers(htp_multipart_part_t *part); 184 185 htp_status_t htp_mpartp_parse_header(htp_multipart_part_t *part, const unsigned char *data, size_t len); 186 187 htp_status_t htp_mpart_part_handle_data(htp_multipart_part_t *part, const unsigned char *data, size_t len, int is_line); 188 189 int htp_mpartp_is_boundary_character(int c); 190 191 htp_multipart_part_t *htp_mpart_part_create(htp_mpartp_t *parser); 192 193 htp_status_t htp_mpart_part_finalize_data(htp_multipart_part_t *part); 194 195 void htp_mpart_part_destroy(htp_multipart_part_t *part, int gave_up_data); 196 197 htp_status_t htp_mpart_part_parse_c_d(htp_multipart_part_t *part); 198 199 #ifdef __cplusplus 200 } 201 #endif 202 203 #endif /* _HTP_MULTIPART_PRIVATE_H */ 204