1 /***************************************************************************
2  * Copyright (c) 2009-2010 Open Information Security Foundation
3  * Copyright (c) 2010-2013 Qualys, Inc.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are
8  * met:
9  *
10  * - Redistributions of source code must retain the above copyright
11  *   notice, this list of conditions and the following disclaimer.
12 
13  * - Redistributions in binary form must reproduce the above copyright
14  *   notice, this list of conditions and the following disclaimer in the
15  *   documentation and/or other materials provided with the distribution.
16 
17  * - Neither the name of the Qualys, Inc. nor the names of its
18  *   contributors may be used to endorse or promote products derived from
19  *   this software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  ***************************************************************************/
33 
34 /**
35  * @file
36  * @author Ivan Ristic <ivanr@webkreator.com>
37  */
38 
39 #include "htp_config_auto.h"
40 
41 #include "htp_private.h"
42 
43 /**
44  * Extract one request header. A header can span multiple lines, in
45  * which case they will be folded into one before parsing is attempted.
46  *
47  * @param[in] connp
48  * @param[in] data
49  * @param[in] len
50  * @return HTP_OK or HTP_ERROR
51  */
htp_process_request_header_generic(htp_connp_t * connp,unsigned char * data,size_t len)52 htp_status_t htp_process_request_header_generic(htp_connp_t *connp, unsigned char *data, size_t len) {
53     // Create a new header structure.
54     htp_header_t *h = calloc(1, sizeof (htp_header_t));
55     if (h == NULL) return HTP_ERROR;
56 
57     // Now try to parse the header.
58     if (htp_parse_request_header_generic(connp, h, data, len) != HTP_OK) {
59         free(h);
60         return HTP_ERROR;
61     }
62 
63     #ifdef HTP_DEBUG
64     fprint_bstr(stderr, "Header name", h->name);
65     fprint_bstr(stderr, "Header value", h->value);
66     #endif
67 
68     // Do we already have a header with the same name?
69     htp_header_t *h_existing = htp_table_get(connp->in_tx->request_headers, h->name);
70     if (h_existing != NULL) {
71         // TODO Do we want to have a list of the headers that are
72         //      allowed to be combined in this way?
73         if ((h_existing->flags & HTP_FIELD_REPEATED) == 0) {
74             // This is the second occurence for this header.
75             htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Repetition for header");
76         } else {
77             // For simplicity reasons, we count the repetitions of all headers
78             if (connp->in_tx->req_header_repetitions < HTP_MAX_HEADERS_REPETITIONS) {
79                 connp->in_tx->req_header_repetitions++;
80             } else {
81                 bstr_free(h->name);
82                 bstr_free(h->value);
83                 free(h);
84                 return HTP_OK;
85             }
86         }
87         // Keep track of repeated same-name headers.
88         h_existing->flags |= HTP_FIELD_REPEATED;
89 
90         // Having multiple C-L headers is against the RFC but
91         // servers may ignore the subsequent headers if the values are the same.
92         if (bstr_cmp_c_nocase(h->name, "Content-Length") == 0) {
93             // Don't use string comparison here because we want to
94             // ignore small formatting differences.
95 
96             int64_t existing_cl = htp_parse_content_length(h_existing->value, NULL);
97             int64_t new_cl = htp_parse_content_length(h->value, NULL);
98             // Ambiguous response C-L value.
99             if ((existing_cl == -1) || (new_cl == -1) || (existing_cl != new_cl)) {
100                 htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Ambiguous request C-L value");
101             }
102             // Ignoring the new C-L header that has the same value as the previous ones.
103         } else {
104             // Add to the existing header.
105             bstr *new_value = bstr_expand(h_existing->value, bstr_len(h_existing->value) + 2 + bstr_len(h->value));
106             if (new_value == NULL) {
107                 bstr_free(h->name);
108                 bstr_free(h->value);
109                 free(h);
110                 return HTP_ERROR;
111             }
112 
113             h_existing->value = new_value;
114             bstr_add_mem_noex(h_existing->value, ", ", 2);
115             bstr_add_noex(h_existing->value, h->value);
116         }
117 
118         // The new header structure is no longer needed.
119         bstr_free(h->name);
120         bstr_free(h->value);
121         free(h);
122     } else {
123         // Add as a new header.
124         if (htp_table_add(connp->in_tx->request_headers, h->name, h) != HTP_OK) {
125             bstr_free(h->name);
126             bstr_free(h->value);
127             free(h);
128         }
129     }
130 
131     return HTP_OK;
132 }
133 
134 /**
135  * Generic request header parser.
136  *
137  * @param[in] connp
138  * @param[in] h
139  * @param[in] data
140  * @param[in] len
141  * @return HTP_OK or HTP_ERROR
142  */
htp_parse_request_header_generic(htp_connp_t * connp,htp_header_t * h,unsigned char * data,size_t len)143 htp_status_t htp_parse_request_header_generic(htp_connp_t *connp, htp_header_t *h, unsigned char *data, size_t len) {
144     size_t name_start, name_end;
145     size_t value_start, value_end;
146 
147     htp_chomp(data, &len);
148 
149     name_start = 0;
150 
151     // Look for the colon.
152     size_t colon_pos = 0;
153     while ((colon_pos < len) && (data[colon_pos] != '\0') && (data[colon_pos] != ':')) colon_pos++;
154 
155     if ((colon_pos == len) || (data[colon_pos] == '\0')) {
156         // Missing colon.
157 
158         h->flags |= HTP_FIELD_UNPARSEABLE;
159 
160         // Log only once per transaction.
161         if (!(connp->in_tx->flags & HTP_FIELD_UNPARSEABLE)) {
162             connp->in_tx->flags |= HTP_FIELD_UNPARSEABLE;
163             htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: colon missing");
164         }
165 
166         // We handle this case as a header with an empty name, with the value equal
167         // to the entire input string.
168 
169         // TODO Apache will respond to this problem with a 400.
170 
171         // Now extract the name and the value
172         h->name = bstr_dup_c("");
173         if (h->name == NULL) return HTP_ERROR;
174 
175         h->value = bstr_dup_mem(data, len);
176         if (h->value == NULL) {
177             bstr_free(h->name);
178             return HTP_ERROR;
179         }
180 
181         return HTP_OK;
182     }
183 
184     if (colon_pos == 0) {
185         // Empty header name.
186 
187         h->flags |= HTP_FIELD_INVALID;
188 
189         // Log only once per transaction.
190         if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) {
191             connp->in_tx->flags |= HTP_FIELD_INVALID;
192             htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: empty name");
193         }
194     }
195 
196     name_end = colon_pos;
197 
198     // Ignore LWS after field-name.
199     size_t prev = name_end;
200     while ((prev > name_start) && (htp_is_lws(data[prev - 1]))) {
201         // LWS after header name.
202 
203         prev--;
204         name_end--;
205 
206         h->flags |= HTP_FIELD_INVALID;
207 
208         // Log only once per transaction.
209         if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) {
210             connp->in_tx->flags |= HTP_FIELD_INVALID;
211             htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: LWS after name");
212         }
213     }
214 
215     // Header value.
216 
217     value_start = colon_pos;
218 
219     // Go over the colon.
220     if (value_start < len) {
221         value_start++;
222     }
223 
224     // Ignore LWS before field-content.
225     while ((value_start < len) && (htp_is_lws(data[value_start]))) {
226         value_start++;
227     }
228 
229     // Look for the end of field-content.
230     value_end = value_start;
231     while ((value_end < len) && (data[value_end] != '\0')) value_end++;
232 
233     // Ignore LWS after field-content.
234     prev = value_end - 1;
235     while ((prev > value_start) && (htp_is_lws(data[prev]))) {
236         prev--;
237         value_end--;
238     }
239 
240     // Check that the header name is a token.
241     size_t i = name_start;
242     while (i < name_end) {
243         if (!htp_is_token(data[i])) {
244             // Incorrectly formed header name.
245 
246             h->flags |= HTP_FIELD_INVALID;
247 
248             // Log only once per transaction.
249             if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) {
250                 connp->in_tx->flags |= HTP_FIELD_INVALID;
251                 htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request header name is not a token");
252             }
253 
254             break;
255         }
256 
257         i++;
258     }
259 
260     // Now extract the name and the value
261     h->name = bstr_dup_mem(data + name_start, name_end - name_start);
262     if (h->name == NULL) return HTP_ERROR;
263 
264     h->value = bstr_dup_mem(data + value_start, value_end - value_start);
265     if (h->value == NULL) {
266         bstr_free(h->name);
267         return HTP_ERROR;
268     }
269 
270     return HTP_OK;
271 }
272 
273 /**
274  * Generic request line parser.
275  *
276  * @param[in] connp
277  * @return HTP_OK or HTP_ERROR
278  */
htp_parse_request_line_generic(htp_connp_t * connp)279 htp_status_t htp_parse_request_line_generic(htp_connp_t *connp) {
280     return htp_parse_request_line_generic_ex(connp, 0 /* NUL does not terminates line */);
281 }
282 
htp_parse_request_line_generic_ex(htp_connp_t * connp,int nul_terminates)283 htp_status_t htp_parse_request_line_generic_ex(htp_connp_t *connp, int nul_terminates) {
284     htp_tx_t *tx = connp->in_tx;
285     unsigned char *data = bstr_ptr(tx->request_line);
286     size_t len = bstr_len(tx->request_line);
287     size_t pos = 0;
288     size_t mstart = 0;
289     size_t start;
290     size_t bad_delim;
291 
292     if (nul_terminates) {
293         // The line ends with the first NUL byte.
294 
295         size_t newlen = 0;
296         while ((pos < len) && (data[pos] != '\0')) {
297             pos++;
298             newlen++;
299         }
300 
301         // Start again, with the new length.
302         len = newlen;
303         pos = 0;
304     }
305 
306     // skip past leading whitespace. IIS allows this
307     while ((pos < len) && htp_is_space(data[pos])) pos++;
308     if (pos) {
309         htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: leading whitespace");
310         mstart = pos;
311 
312         if (connp->cfg->requestline_leading_whitespace_unwanted != HTP_UNWANTED_IGNORE) {
313             // reset mstart so that we copy the whitespace into the method
314             mstart = 0;
315             // set expected response code to this anomaly
316             tx->response_status_expected_number = connp->cfg->requestline_leading_whitespace_unwanted;
317         }
318     }
319 
320     // The request method starts at the beginning of the
321     // line and ends with the first whitespace character.
322     while ((pos < len) && (!htp_is_space(data[pos]))) pos++;
323 
324     // No, we don't care if the method is empty.
325 
326     tx->request_method = bstr_dup_mem(data + mstart, pos - mstart);
327     if (tx->request_method == NULL) return HTP_ERROR;
328 
329     #ifdef HTP_DEBUG
330     fprint_raw_data(stderr, __func__, bstr_ptr(tx->request_method), bstr_len(tx->request_method));
331     #endif
332 
333     tx->request_method_number = htp_convert_method_to_number(tx->request_method);
334 
335     bad_delim = 0;
336     // Ignore whitespace after request method. The RFC allows
337     // for only one SP, but then suggests any number of SP and HT
338     // should be permitted. Apache uses isspace(), which is even
339     // more permitting, so that's what we use here.
340     while ((pos < len) && (isspace(data[pos]))) {
341         if (!bad_delim && data[pos] != 0x20) {
342             bad_delim++;
343         }
344         pos++;
345     }
346 // Too much performance overhead for fuzzing
347 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
348     if (bad_delim) {
349         htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: non-compliant delimiter between Method and URI");
350     }
351 #endif
352 
353     // Is there anything after the request method?
354     if (pos == len) {
355         // No, this looks like a HTTP/0.9 request.
356 
357         tx->is_protocol_0_9 = 1;
358         tx->request_protocol_number = HTP_PROTOCOL_0_9;
359         if (tx->request_method_number == HTP_M_UNKNOWN)
360             htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: unknown method only");
361 
362         return HTP_OK;
363     }
364 
365     start = pos;
366     bad_delim = 0;
367 
368     // The URI ends with the first whitespace.
369     while ((pos < len) && (data[pos] != 0x20)) {
370         if (!bad_delim && htp_is_space(data[pos])) {
371             bad_delim++;
372         }
373         pos++;
374     }
375     /* if we've seen some 'bad' delimiters, we retry with those */
376     if (bad_delim && pos == len) {
377         // special case: even though RFC's allow only SP (0x20), many
378         // implementations allow other delimiters, like tab or other
379         // characters that isspace() accepts.
380         pos = start;
381         while ((pos < len) && (!htp_is_space(data[pos]))) pos++;
382     }
383 // Too much performance overhead for fuzzing
384 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
385     if (bad_delim) {
386         // warn regardless if we've seen non-compliant chars
387         htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: URI contains non-compliant delimiter");
388     }
389 #endif
390 
391     tx->request_uri = bstr_dup_mem(data + start, pos - start);
392     if (tx->request_uri == NULL) return HTP_ERROR;
393 
394     #ifdef HTP_DEBUG
395     fprint_raw_data(stderr, __func__, bstr_ptr(tx->request_uri), bstr_len(tx->request_uri));
396     #endif
397 
398     // Ignore whitespace after URI.
399     while ((pos < len) && (htp_is_space(data[pos]))) pos++;
400 
401     // Is there protocol information available?
402     if (pos == len) {
403         // No, this looks like a HTTP/0.9 request.
404 
405         tx->is_protocol_0_9 = 1;
406         tx->request_protocol_number = HTP_PROTOCOL_0_9;
407         if (tx->request_method_number == HTP_M_UNKNOWN)
408             htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: unknown method and no protocol");
409 
410         return HTP_OK;
411     }
412 
413     // The protocol information continues until the end of the line.
414     tx->request_protocol = bstr_dup_mem(data + pos, len - pos);
415     if (tx->request_protocol == NULL) return HTP_ERROR;
416 
417     tx->request_protocol_number = htp_parse_protocol(tx->request_protocol);
418     if (tx->request_method_number == HTP_M_UNKNOWN && tx->request_protocol_number == HTP_PROTOCOL_INVALID)
419         htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: unknown method and invalid protocol");
420 
421     #ifdef HTP_DEBUG
422     fprint_raw_data(stderr, __func__, bstr_ptr(tx->request_protocol), bstr_len(tx->request_protocol));
423     #endif
424 
425     return HTP_OK;
426 }
427 
428