1 /***************************************************************************
2  * Copyright (c) 2009-2010 Open Information Security Foundation
3  * Copyright (c) 2010-2013 Qualys, Inc.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are
8  * met:
9  *
10  * - Redistributions of source code must retain the above copyright
11  *   notice, this list of conditions and the following disclaimer.
12 
13  * - Redistributions in binary form must reproduce the above copyright
14  *   notice, this list of conditions and the following disclaimer in the
15  *   documentation and/or other materials provided with the distribution.
16 
17  * - Neither the name of the Qualys, Inc. nor the names of its
18  *   contributors may be used to endorse or promote products derived from
19  *   this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  ***************************************************************************/
33 
34 /**
35  * @file
36  * @author Ivan Ristic <ivanr@webkreator.com>
37  */
38 
39 #ifndef _HTP_MULTIPART_H
40 #define	_HTP_MULTIPART_H
41 
42 #ifdef __cplusplus
43 extern "C" {
44 #endif
45 
46 #include "bstr.h"
47 #include "htp.h"
48 #include "htp_table.h"
49 
50 
51 // Constants and enums.
52 
53 /**
54  * Seen a LF line in the payload. LF lines are not allowed, but
55  * some clients do use them and some backends do accept them. Mixing
56  * LF and CRLF lines within some payload might be unusual.
57  */
58 #define HTP_MULTIPART_LF_LINE                   0x0001
59 
60 /** Seen a CRLF line in the payload. This is normal and expected. */
61 #define HTP_MULTIPART_CRLF_LINE                 0x0002
62 
63 /** Seen LWS after a boundary instance in the body. Unusual. */
64 #define HTP_MULTIPART_BBOUNDARY_LWS_AFTER       0x0004
65 
66 /** Seen non-LWS content after a boundary instance in the body. Highly unusual. */
67 #define HTP_MULTIPART_BBOUNDARY_NLWS_AFTER      0x0008
68 
69 /**
70  * Payload has a preamble part. Might not be that unusual.
71  */
72 #define HTP_MULTIPART_HAS_PREAMBLE              0x0010
73 
74 /**
75  * Payload has an epilogue part. Unusual.
76  */
77 #define HTP_MULTIPART_HAS_EPILOGUE              0x0020
78 
79 /**
80  * The last boundary was seen in the payload. Absence of the last boundary
81  * may not break parsing with some (most?) backends, but it means that the payload
82  * is not well formed. Can occur if the client gives up, or if the connection is
83  * interrupted. Incomplete payloads should be blocked whenever possible.
84  */
85 #define HTP_MULTIPART_SEEN_LAST_BOUNDARY        0x0040
86 
87 /**
88  * There was a part after the last boundary. This is highly irregular
89  * and indicative of evasion.
90  */
91 #define HTP_MULTIPART_PART_AFTER_LAST_BOUNDARY  0x0080
92 
93 /**
94  * The payloads ends abruptly, without proper termination. Can occur if the client gives up,
95  * or if the connection is interrupted. When this flag is raised, HTP_MULTIPART_PART_INCOMPLETE
96  * will also be raised for the part that was only partially processed. (But the opposite may not
97  * always be the case -- there are other ways in which a part can be left incomplete.)
98  */
99 #define HTP_MULTIPART_INCOMPLETE                0x0100
100 
101 /** The boundary in the Content-Type header is invalid. */
102 #define HTP_MULTIPART_HBOUNDARY_INVALID         0x0200
103 
104 /**
105  * The boundary in the Content-Type header is unusual. This may mean that evasion
106  * is attempted, but it could also mean that we have encountered a client that does
107  * not do things in the way it should.
108  */
109 #define HTP_MULTIPART_HBOUNDARY_UNUSUAL         0x0400
110 
111 /**
112  * The boundary in the Content-Type header is quoted. This is very unusual,
113  * and may be indicative of an evasion attempt.
114  */
115 #define HTP_MULTIPART_HBOUNDARY_QUOTED          0x0800
116 
117 /** Header folding was used in part headers. Very unusual. */
118 #define HTP_MULTIPART_PART_HEADER_FOLDING       0x1000
119 
120 /**
121  * A part of unknown type was encountered, which probably means that the part is lacking
122  * a Content-Disposition header, or that the header is invalid. Highly unusual.
123  */
124 #define HTP_MULTIPART_PART_UNKNOWN              0x2000
125 
126 /** There was a repeated part header, possibly in an attempt to confuse the parser. Very unusual. */
127 #define HTP_MULTIPART_PART_HEADER_REPEATED      0x4000
128 
129 /** Unknown part header encountered. */
130 #define HTP_MULTIPART_PART_HEADER_UNKNOWN       0x8000
131 
132 /** Invalid part header encountered. */
133 #define HTP_MULTIPART_PART_HEADER_INVALID       0x10000
134 
135 /** Part type specified in the C-D header is neither MULTIPART_PART_TEXT nor MULTIPART_PART_FILE. */
136 #define HTP_MULTIPART_CD_TYPE_INVALID           0x20000
137 
138 /** Content-Disposition part header with multiple parameters with the same name. */
139 #define HTP_MULTIPART_CD_PARAM_REPEATED         0x40000
140 
141 /** Unknown Content-Disposition parameter. */
142 #define HTP_MULTIPART_CD_PARAM_UNKNOWN          0x80000
143 
144 /** Invalid Content-Disposition syntax. */
145 #define HTP_MULTIPART_CD_SYNTAX_INVALID         0x100000
146 
147 /**
148  * There is an abruptly terminated part. This can happen when the payload itself is abruptly
149  * terminated (in which case HTP_MULTIPART_INCOMPLETE) will be raised. However, it can also
150  * happen when a boundary is seen before any part data.
151  */
152 #define HTP_MULTIPART_PART_INCOMPLETE           0x200000
153 
154 /** A NUL byte was seen in a part header area. */
155 #define HTP_MULTIPART_NUL_BYTE                  0x400000
156 
157 /** A collection of flags that all indicate an invalid C-D header. */
158 #define HTP_MULTIPART_CD_INVALID ( \
159     HTP_MULTIPART_CD_TYPE_INVALID | \
160     HTP_MULTIPART_CD_PARAM_REPEATED | \
161     HTP_MULTIPART_CD_PARAM_UNKNOWN | \
162     HTP_MULTIPART_CD_SYNTAX_INVALID )
163 
164 /** A collection of flags that all indicate an invalid part. */
165 #define HTP_MULTIPART_PART_INVALID ( \
166     HTP_MULTIPART_CD_INVALID | \
167     HTP_MULTIPART_NUL_BYTE | \
168     HTP_MULTIPART_PART_UNKNOWN | \
169     HTP_MULTIPART_PART_HEADER_REPEATED | \
170     HTP_MULTIPART_PART_INCOMPLETE | \
171     HTP_MULTIPART_PART_HEADER_UNKNOWN | \
172     HTP_MULTIPART_PART_HEADER_INVALID )
173 
174 /** A collection of flags that all indicate an invalid Multipart payload. */
175 #define HTP_MULTIPART_INVALID ( \
176     HTP_MULTIPART_PART_INVALID | \
177     HTP_MULTIPART_PART_AFTER_LAST_BOUNDARY | \
178     HTP_MULTIPART_INCOMPLETE | \
179     HTP_MULTIPART_HBOUNDARY_INVALID )
180 
181 /** A collection of flags that all indicate an unusual Multipart payload. */
182 #define HTP_MULTIPART_UNUSUAL ( \
183     HTP_MULTIPART_INVALID | \
184     HTP_MULTIPART_PART_HEADER_FOLDING | \
185     HTP_MULTIPART_BBOUNDARY_NLWS_AFTER | \
186     HTP_MULTIPART_HAS_EPILOGUE | \
187     HTP_MULTIPART_HBOUNDARY_UNUSUAL \
188     HTP_MULTIPART_HBOUNDARY_QUOTED )
189 
190 /** A collection of flags that all indicate an unusual Multipart payload, with a low sensitivity to irregularities. */
191 #define HTP_MULTIPART_UNUSUAL_PARANOID ( \
192     HTP_MULTIPART_UNUSUAL | \
193     HTP_MULTIPART_LF_LINE | \
194     HTP_MULTIPART_BBOUNDARY_LWS_AFTER | \
195     HTP_MULTIPART_HAS_PREAMBLE )
196 
197 #define HTP_MULTIPART_MIME_TYPE                 "multipart/form-data"
198 
199 enum htp_multipart_type_t {
200 
201     /** Unknown part. */
202     MULTIPART_PART_UNKNOWN = 0,
203 
204     /** Text (parameter) part. */
205     MULTIPART_PART_TEXT = 1,
206 
207     /** File part. */
208     MULTIPART_PART_FILE = 2,
209 
210     /** Free-text part before the first boundary. */
211     MULTIPART_PART_PREAMBLE = 3,
212 
213     /** Free-text part after the last boundary. */
214     MULTIPART_PART_EPILOGUE = 4
215 };
216 
217 
218 // Structures
219 
220 /**
221  * Holds multipart parser configuration and state. Private.
222  */
223 typedef struct htp_mpartp_t htp_mpartp_t;
224 
225 /**
226  * Holds information related to a multipart body.
227  */
228 typedef struct htp_multipart_t {
229     /** Multipart boundary. */
230     char *boundary;
231 
232     /** Boundary length. */
233     size_t boundary_len;
234 
235     /** How many boundaries were there? */
236     int boundary_count;
237 
238     /** List of parts, in the order in which they appeared in the body. */
239     htp_list_t *parts;
240 
241     /** Parsing flags. */
242     uint64_t flags;
243 } htp_multipart_t;
244 
245 /**
246  * Holds information related to a part.
247  */
248 typedef struct htp_multipart_part_t {
249     /** Pointer to the parser. */
250     htp_mpartp_t *parser;
251 
252     /** Part type; see the MULTIPART_PART_* constants. */
253     enum htp_multipart_type_t type;
254 
255     /** Raw part length (i.e., headers and data). */
256     size_t len;
257 
258     /** Part name, from the Content-Disposition header. Can be NULL. */
259     bstr *name;
260 
261     /**
262      * Part value; the contents depends on the type of the part:
263      * 1) NULL for files; 2) contains complete part contents for
264      * preamble and epilogue parts (they have no headers), and
265      * 3) data only (headers excluded) for text and unknown parts.
266      */
267     bstr *value;
268 
269     /** Part content type, from the Content-Type header. Can be NULL. */
270     bstr *content_type;
271 
272     /** Part headers (htp_header_t instances), using header name as the key. */
273     htp_table_t *headers;
274 
275     /** File data, available only for MULTIPART_PART_FILE parts. */
276     htp_file_t *file;
277 } htp_multipart_part_t;
278 
279 
280 // Functions
281 
282 /**
283  * Creates a new multipart/form-data parser. On a successful invocation,
284  * the ownership of the boundary parameter is transferred to the parser.
285  *
286  * @param[in] cfg
287  * @param[in] boundary
288  * @param[in] flags
289  * @return New parser instance, or NULL on memory allocation failure.
290  */
291 htp_mpartp_t *htp_mpartp_create(htp_cfg_t *cfg, bstr *boundary, uint64_t flags);
292 
293 /**
294  * Looks for boundary in the supplied Content-Type request header. The extracted
295  * boundary will be allocated on the heap.
296  *
297  * @param[in] content_type
298  * @param[out] boundary
299  * @param[out] multipart_flags Multipart flags, which are not compatible from general LibHTP flags.
300  * @return HTP_OK on success (boundary found), HTP_DECLINED if boundary was not found,
301  *         and HTP_ERROR on failure. Flags may be set on HTP_OK and HTP_DECLINED. For
302  *         example, if a boundary could not be extracted but there is indication that
303  *         one is present, HTP_MULTIPART_HBOUNDARY_INVALID will be set.
304  */
305 htp_status_t htp_mpartp_find_boundary(bstr *content_type, bstr **boundary, uint64_t *multipart_flags);
306 
307 /**
308  * Returns the multipart structure created by the parser.
309  *
310  * @param[in] parser
311  * @return The main multipart structure.
312  */
313 htp_multipart_t *htp_mpartp_get_multipart(htp_mpartp_t *parser);
314 
315 /**
316  * Destroys the provided parser.
317  *
318  * @param[in] parser
319  */
320 void htp_mpartp_destroy(htp_mpartp_t *parser);
321 
322 /**
323  * Finalize parsing.
324  *
325  * @param[in] parser
326  * @returns HTP_OK on success, HTP_ERROR on failure.
327  */
328 htp_status_t htp_mpartp_finalize(htp_mpartp_t *parser);
329 
330 /**
331  * Parses a chunk of multipart/form-data data. This function should be called
332  * as many times as necessary until all data has been consumed.
333  *
334  * @param[in] parser
335  * @param[in] data
336  * @param[in] len
337  * @return HTP_OK on success, HTP_ERROR on failure.
338  */
339 htp_status_t htp_mpartp_parse(htp_mpartp_t *parser, const void *data, size_t len);
340 
341 #ifdef __cplusplus
342 }
343 #endif
344 
345 #endif	/* _HTP_MULTIPART_H */
346