1 /*************************************************************************** 2 * Copyright (c) 2009-2010 Open Information Security Foundation 3 * Copyright (c) 2010-2013 Qualys, Inc. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are 8 * met: 9 * 10 * - Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 13 * - Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 17 * - Neither the name of the Qualys, Inc. nor the names of its 18 * contributors may be used to endorse or promote products derived from 19 * this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 ***************************************************************************/ 33 34 /** 35 * @file 36 * @author Ivan Ristic <ivanr@webkreator.com> 37 */ 38 39 #ifndef _HTP_MULTIPART_H 40 #define _HTP_MULTIPART_H 41 42 #ifdef __cplusplus 43 extern "C" { 44 #endif 45 46 #include "bstr.h" 47 #include "htp.h" 48 #include "htp_table.h" 49 50 51 // Constants and enums. 52 53 /** 54 * Seen a LF line in the payload. LF lines are not allowed, but 55 * some clients do use them and some backends do accept them. Mixing 56 * LF and CRLF lines within some payload might be unusual. 57 */ 58 #define HTP_MULTIPART_LF_LINE 0x0001 59 60 /** Seen a CRLF line in the payload. This is normal and expected. */ 61 #define HTP_MULTIPART_CRLF_LINE 0x0002 62 63 /** Seen LWS after a boundary instance in the body. Unusual. */ 64 #define HTP_MULTIPART_BBOUNDARY_LWS_AFTER 0x0004 65 66 /** Seen non-LWS content after a boundary instance in the body. Highly unusual. */ 67 #define HTP_MULTIPART_BBOUNDARY_NLWS_AFTER 0x0008 68 69 /** 70 * Payload has a preamble part. Might not be that unusual. 71 */ 72 #define HTP_MULTIPART_HAS_PREAMBLE 0x0010 73 74 /** 75 * Payload has an epilogue part. Unusual. 76 */ 77 #define HTP_MULTIPART_HAS_EPILOGUE 0x0020 78 79 /** 80 * The last boundary was seen in the payload. Absence of the last boundary 81 * may not break parsing with some (most?) backends, but it means that the payload 82 * is not well formed. Can occur if the client gives up, or if the connection is 83 * interrupted. Incomplete payloads should be blocked whenever possible. 84 */ 85 #define HTP_MULTIPART_SEEN_LAST_BOUNDARY 0x0040 86 87 /** 88 * There was a part after the last boundary. This is highly irregular 89 * and indicative of evasion. 90 */ 91 #define HTP_MULTIPART_PART_AFTER_LAST_BOUNDARY 0x0080 92 93 /** 94 * The payloads ends abruptly, without proper termination. Can occur if the client gives up, 95 * or if the connection is interrupted. When this flag is raised, HTP_MULTIPART_PART_INCOMPLETE 96 * will also be raised for the part that was only partially processed. (But the opposite may not 97 * always be the case -- there are other ways in which a part can be left incomplete.) 98 */ 99 #define HTP_MULTIPART_INCOMPLETE 0x0100 100 101 /** The boundary in the Content-Type header is invalid. */ 102 #define HTP_MULTIPART_HBOUNDARY_INVALID 0x0200 103 104 /** 105 * The boundary in the Content-Type header is unusual. This may mean that evasion 106 * is attempted, but it could also mean that we have encountered a client that does 107 * not do things in the way it should. 108 */ 109 #define HTP_MULTIPART_HBOUNDARY_UNUSUAL 0x0400 110 111 /** 112 * The boundary in the Content-Type header is quoted. This is very unusual, 113 * and may be indicative of an evasion attempt. 114 */ 115 #define HTP_MULTIPART_HBOUNDARY_QUOTED 0x0800 116 117 /** Header folding was used in part headers. Very unusual. */ 118 #define HTP_MULTIPART_PART_HEADER_FOLDING 0x1000 119 120 /** 121 * A part of unknown type was encountered, which probably means that the part is lacking 122 * a Content-Disposition header, or that the header is invalid. Highly unusual. 123 */ 124 #define HTP_MULTIPART_PART_UNKNOWN 0x2000 125 126 /** There was a repeated part header, possibly in an attempt to confuse the parser. Very unusual. */ 127 #define HTP_MULTIPART_PART_HEADER_REPEATED 0x4000 128 129 /** Unknown part header encountered. */ 130 #define HTP_MULTIPART_PART_HEADER_UNKNOWN 0x8000 131 132 /** Invalid part header encountered. */ 133 #define HTP_MULTIPART_PART_HEADER_INVALID 0x10000 134 135 /** Part type specified in the C-D header is neither MULTIPART_PART_TEXT nor MULTIPART_PART_FILE. */ 136 #define HTP_MULTIPART_CD_TYPE_INVALID 0x20000 137 138 /** Content-Disposition part header with multiple parameters with the same name. */ 139 #define HTP_MULTIPART_CD_PARAM_REPEATED 0x40000 140 141 /** Unknown Content-Disposition parameter. */ 142 #define HTP_MULTIPART_CD_PARAM_UNKNOWN 0x80000 143 144 /** Invalid Content-Disposition syntax. */ 145 #define HTP_MULTIPART_CD_SYNTAX_INVALID 0x100000 146 147 /** 148 * There is an abruptly terminated part. This can happen when the payload itself is abruptly 149 * terminated (in which case HTP_MULTIPART_INCOMPLETE) will be raised. However, it can also 150 * happen when a boundary is seen before any part data. 151 */ 152 #define HTP_MULTIPART_PART_INCOMPLETE 0x200000 153 154 /** A NUL byte was seen in a part header area. */ 155 #define HTP_MULTIPART_NUL_BYTE 0x400000 156 157 /** A collection of flags that all indicate an invalid C-D header. */ 158 #define HTP_MULTIPART_CD_INVALID ( \ 159 HTP_MULTIPART_CD_TYPE_INVALID | \ 160 HTP_MULTIPART_CD_PARAM_REPEATED | \ 161 HTP_MULTIPART_CD_PARAM_UNKNOWN | \ 162 HTP_MULTIPART_CD_SYNTAX_INVALID ) 163 164 /** A collection of flags that all indicate an invalid part. */ 165 #define HTP_MULTIPART_PART_INVALID ( \ 166 HTP_MULTIPART_CD_INVALID | \ 167 HTP_MULTIPART_NUL_BYTE | \ 168 HTP_MULTIPART_PART_UNKNOWN | \ 169 HTP_MULTIPART_PART_HEADER_REPEATED | \ 170 HTP_MULTIPART_PART_INCOMPLETE | \ 171 HTP_MULTIPART_PART_HEADER_UNKNOWN | \ 172 HTP_MULTIPART_PART_HEADER_INVALID ) 173 174 /** A collection of flags that all indicate an invalid Multipart payload. */ 175 #define HTP_MULTIPART_INVALID ( \ 176 HTP_MULTIPART_PART_INVALID | \ 177 HTP_MULTIPART_PART_AFTER_LAST_BOUNDARY | \ 178 HTP_MULTIPART_INCOMPLETE | \ 179 HTP_MULTIPART_HBOUNDARY_INVALID ) 180 181 /** A collection of flags that all indicate an unusual Multipart payload. */ 182 #define HTP_MULTIPART_UNUSUAL ( \ 183 HTP_MULTIPART_INVALID | \ 184 HTP_MULTIPART_PART_HEADER_FOLDING | \ 185 HTP_MULTIPART_BBOUNDARY_NLWS_AFTER | \ 186 HTP_MULTIPART_HAS_EPILOGUE | \ 187 HTP_MULTIPART_HBOUNDARY_UNUSUAL \ 188 HTP_MULTIPART_HBOUNDARY_QUOTED ) 189 190 /** A collection of flags that all indicate an unusual Multipart payload, with a low sensitivity to irregularities. */ 191 #define HTP_MULTIPART_UNUSUAL_PARANOID ( \ 192 HTP_MULTIPART_UNUSUAL | \ 193 HTP_MULTIPART_LF_LINE | \ 194 HTP_MULTIPART_BBOUNDARY_LWS_AFTER | \ 195 HTP_MULTIPART_HAS_PREAMBLE ) 196 197 #define HTP_MULTIPART_MIME_TYPE "multipart/form-data" 198 199 enum htp_multipart_type_t { 200 201 /** Unknown part. */ 202 MULTIPART_PART_UNKNOWN = 0, 203 204 /** Text (parameter) part. */ 205 MULTIPART_PART_TEXT = 1, 206 207 /** File part. */ 208 MULTIPART_PART_FILE = 2, 209 210 /** Free-text part before the first boundary. */ 211 MULTIPART_PART_PREAMBLE = 3, 212 213 /** Free-text part after the last boundary. */ 214 MULTIPART_PART_EPILOGUE = 4 215 }; 216 217 218 // Structures 219 220 /** 221 * Holds multipart parser configuration and state. Private. 222 */ 223 typedef struct htp_mpartp_t htp_mpartp_t; 224 225 /** 226 * Holds information related to a multipart body. 227 */ 228 typedef struct htp_multipart_t { 229 /** Multipart boundary. */ 230 char *boundary; 231 232 /** Boundary length. */ 233 size_t boundary_len; 234 235 /** How many boundaries were there? */ 236 int boundary_count; 237 238 /** List of parts, in the order in which they appeared in the body. */ 239 htp_list_t *parts; 240 241 /** Parsing flags. */ 242 uint64_t flags; 243 } htp_multipart_t; 244 245 /** 246 * Holds information related to a part. 247 */ 248 typedef struct htp_multipart_part_t { 249 /** Pointer to the parser. */ 250 htp_mpartp_t *parser; 251 252 /** Part type; see the MULTIPART_PART_* constants. */ 253 enum htp_multipart_type_t type; 254 255 /** Raw part length (i.e., headers and data). */ 256 size_t len; 257 258 /** Part name, from the Content-Disposition header. Can be NULL. */ 259 bstr *name; 260 261 /** 262 * Part value; the contents depends on the type of the part: 263 * 1) NULL for files; 2) contains complete part contents for 264 * preamble and epilogue parts (they have no headers), and 265 * 3) data only (headers excluded) for text and unknown parts. 266 */ 267 bstr *value; 268 269 /** Part content type, from the Content-Type header. Can be NULL. */ 270 bstr *content_type; 271 272 /** Part headers (htp_header_t instances), using header name as the key. */ 273 htp_table_t *headers; 274 275 /** File data, available only for MULTIPART_PART_FILE parts. */ 276 htp_file_t *file; 277 } htp_multipart_part_t; 278 279 280 // Functions 281 282 /** 283 * Creates a new multipart/form-data parser. On a successful invocation, 284 * the ownership of the boundary parameter is transferred to the parser. 285 * 286 * @param[in] cfg 287 * @param[in] boundary 288 * @param[in] flags 289 * @return New parser instance, or NULL on memory allocation failure. 290 */ 291 htp_mpartp_t *htp_mpartp_create(htp_cfg_t *cfg, bstr *boundary, uint64_t flags); 292 293 /** 294 * Looks for boundary in the supplied Content-Type request header. The extracted 295 * boundary will be allocated on the heap. 296 * 297 * @param[in] content_type 298 * @param[out] boundary 299 * @param[out] multipart_flags Multipart flags, which are not compatible from general LibHTP flags. 300 * @return HTP_OK on success (boundary found), HTP_DECLINED if boundary was not found, 301 * and HTP_ERROR on failure. Flags may be set on HTP_OK and HTP_DECLINED. For 302 * example, if a boundary could not be extracted but there is indication that 303 * one is present, HTP_MULTIPART_HBOUNDARY_INVALID will be set. 304 */ 305 htp_status_t htp_mpartp_find_boundary(bstr *content_type, bstr **boundary, uint64_t *multipart_flags); 306 307 /** 308 * Returns the multipart structure created by the parser. 309 * 310 * @param[in] parser 311 * @return The main multipart structure. 312 */ 313 htp_multipart_t *htp_mpartp_get_multipart(htp_mpartp_t *parser); 314 315 /** 316 * Destroys the provided parser. 317 * 318 * @param[in] parser 319 */ 320 void htp_mpartp_destroy(htp_mpartp_t *parser); 321 322 /** 323 * Finalize parsing. 324 * 325 * @param[in] parser 326 * @returns HTP_OK on success, HTP_ERROR on failure. 327 */ 328 htp_status_t htp_mpartp_finalize(htp_mpartp_t *parser); 329 330 /** 331 * Parses a chunk of multipart/form-data data. This function should be called 332 * as many times as necessary until all data has been consumed. 333 * 334 * @param[in] parser 335 * @param[in] data 336 * @param[in] len 337 * @return HTP_OK on success, HTP_ERROR on failure. 338 */ 339 htp_status_t htp_mpartp_parse(htp_mpartp_t *parser, const void *data, size_t len); 340 341 #ifdef __cplusplus 342 } 343 #endif 344 345 #endif /* _HTP_MULTIPART_H */ 346