1 /***************************************************************************
2 * Copyright (c) 2009-2010 Open Information Security Foundation
3 * Copyright (c) 2010-2013 Qualys, Inc.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are
8 * met:
9 *
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12
13 * - Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16
17 * - Neither the name of the Qualys, Inc. nor the names of its
18 * contributors may be used to endorse or promote products derived from
19 * this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 ***************************************************************************/
33
34 /**
35 * @file
36 * @author Ivan Ristic <ivanr@webkreator.com>
37 */
38
39 #include "htp_config_auto.h"
40
41 #include "htp_private.h"
42
43 /**
44 * Extract one request header. A header can span multiple lines, in
45 * which case they will be folded into one before parsing is attempted.
46 *
47 * @param[in] connp
48 * @param[in] data
49 * @param[in] len
50 * @return HTP_OK or HTP_ERROR
51 */
htp_process_request_header_generic(htp_connp_t * connp,unsigned char * data,size_t len)52 htp_status_t htp_process_request_header_generic(htp_connp_t *connp, unsigned char *data, size_t len) {
53 // Create a new header structure.
54 htp_header_t *h = calloc(1, sizeof (htp_header_t));
55 if (h == NULL) return HTP_ERROR;
56
57 // Now try to parse the header.
58 if (htp_parse_request_header_generic(connp, h, data, len) != HTP_OK) {
59 free(h);
60 return HTP_ERROR;
61 }
62
63 #ifdef HTP_DEBUG
64 fprint_bstr(stderr, "Header name", h->name);
65 fprint_bstr(stderr, "Header value", h->value);
66 #endif
67
68 // Do we already have a header with the same name?
69 htp_header_t *h_existing = htp_table_get(connp->in_tx->request_headers, h->name);
70 if (h_existing != NULL) {
71 // TODO Do we want to have a list of the headers that are
72 // allowed to be combined in this way?
73 if ((h_existing->flags & HTP_FIELD_REPEATED) == 0) {
74 // This is the second occurence for this header.
75 htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Repetition for header");
76 } else {
77 // For simplicity reasons, we count the repetitions of all headers
78 if (connp->in_tx->req_header_repetitions < HTP_MAX_HEADERS_REPETITIONS) {
79 connp->in_tx->req_header_repetitions++;
80 } else {
81 bstr_free(h->name);
82 bstr_free(h->value);
83 free(h);
84 return HTP_OK;
85 }
86 }
87 // Keep track of repeated same-name headers.
88 h_existing->flags |= HTP_FIELD_REPEATED;
89
90 // Having multiple C-L headers is against the RFC but
91 // servers may ignore the subsequent headers if the values are the same.
92 if (bstr_cmp_c_nocase(h->name, "Content-Length") == 0) {
93 // Don't use string comparison here because we want to
94 // ignore small formatting differences.
95
96 int64_t existing_cl = htp_parse_content_length(h_existing->value, NULL);
97 int64_t new_cl = htp_parse_content_length(h->value, NULL);
98 // Ambiguous response C-L value.
99 if ((existing_cl == -1) || (new_cl == -1) || (existing_cl != new_cl)) {
100 htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Ambiguous request C-L value");
101 }
102 // Ignoring the new C-L header that has the same value as the previous ones.
103 } else {
104 // Add to the existing header.
105 bstr *new_value = bstr_expand(h_existing->value, bstr_len(h_existing->value) + 2 + bstr_len(h->value));
106 if (new_value == NULL) {
107 bstr_free(h->name);
108 bstr_free(h->value);
109 free(h);
110 return HTP_ERROR;
111 }
112
113 h_existing->value = new_value;
114 bstr_add_mem_noex(h_existing->value, ", ", 2);
115 bstr_add_noex(h_existing->value, h->value);
116 }
117
118 // The new header structure is no longer needed.
119 bstr_free(h->name);
120 bstr_free(h->value);
121 free(h);
122 } else {
123 // Add as a new header.
124 if (htp_table_add(connp->in_tx->request_headers, h->name, h) != HTP_OK) {
125 bstr_free(h->name);
126 bstr_free(h->value);
127 free(h);
128 }
129 }
130
131 return HTP_OK;
132 }
133
134 /**
135 * Generic request header parser.
136 *
137 * @param[in] connp
138 * @param[in] h
139 * @param[in] data
140 * @param[in] len
141 * @return HTP_OK or HTP_ERROR
142 */
htp_parse_request_header_generic(htp_connp_t * connp,htp_header_t * h,unsigned char * data,size_t len)143 htp_status_t htp_parse_request_header_generic(htp_connp_t *connp, htp_header_t *h, unsigned char *data, size_t len) {
144 size_t name_start, name_end;
145 size_t value_start, value_end;
146
147 htp_chomp(data, &len);
148
149 name_start = 0;
150
151 // Look for the colon.
152 size_t colon_pos = 0;
153 while ((colon_pos < len) && (data[colon_pos] != '\0') && (data[colon_pos] != ':')) colon_pos++;
154
155 if ((colon_pos == len) || (data[colon_pos] == '\0')) {
156 // Missing colon.
157
158 h->flags |= HTP_FIELD_UNPARSEABLE;
159
160 // Log only once per transaction.
161 if (!(connp->in_tx->flags & HTP_FIELD_UNPARSEABLE)) {
162 connp->in_tx->flags |= HTP_FIELD_UNPARSEABLE;
163 htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: colon missing");
164 }
165
166 // We handle this case as a header with an empty name, with the value equal
167 // to the entire input string.
168
169 // TODO Apache will respond to this problem with a 400.
170
171 // Now extract the name and the value
172 h->name = bstr_dup_c("");
173 if (h->name == NULL) return HTP_ERROR;
174
175 h->value = bstr_dup_mem(data, len);
176 if (h->value == NULL) {
177 bstr_free(h->name);
178 return HTP_ERROR;
179 }
180
181 return HTP_OK;
182 }
183
184 if (colon_pos == 0) {
185 // Empty header name.
186
187 h->flags |= HTP_FIELD_INVALID;
188
189 // Log only once per transaction.
190 if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) {
191 connp->in_tx->flags |= HTP_FIELD_INVALID;
192 htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: empty name");
193 }
194 }
195
196 name_end = colon_pos;
197
198 // Ignore LWS after field-name.
199 size_t prev = name_end;
200 while ((prev > name_start) && (htp_is_lws(data[prev - 1]))) {
201 // LWS after header name.
202
203 prev--;
204 name_end--;
205
206 h->flags |= HTP_FIELD_INVALID;
207
208 // Log only once per transaction.
209 if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) {
210 connp->in_tx->flags |= HTP_FIELD_INVALID;
211 htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request field invalid: LWS after name");
212 }
213 }
214
215 // Header value.
216
217 value_start = colon_pos;
218
219 // Go over the colon.
220 if (value_start < len) {
221 value_start++;
222 }
223
224 // Ignore LWS before field-content.
225 while ((value_start < len) && (htp_is_lws(data[value_start]))) {
226 value_start++;
227 }
228
229 // Look for the end of field-content.
230 value_end = value_start;
231 while ((value_end < len) && (data[value_end] != '\0')) value_end++;
232
233 // Ignore LWS after field-content.
234 prev = value_end - 1;
235 while ((prev > value_start) && (htp_is_lws(data[prev]))) {
236 prev--;
237 value_end--;
238 }
239
240 // Check that the header name is a token.
241 size_t i = name_start;
242 while (i < name_end) {
243 if (!htp_is_token(data[i])) {
244 // Incorrectly formed header name.
245
246 h->flags |= HTP_FIELD_INVALID;
247
248 // Log only once per transaction.
249 if (!(connp->in_tx->flags & HTP_FIELD_INVALID)) {
250 connp->in_tx->flags |= HTP_FIELD_INVALID;
251 htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request header name is not a token");
252 }
253
254 break;
255 }
256
257 i++;
258 }
259
260 // Now extract the name and the value
261 h->name = bstr_dup_mem(data + name_start, name_end - name_start);
262 if (h->name == NULL) return HTP_ERROR;
263
264 h->value = bstr_dup_mem(data + value_start, value_end - value_start);
265 if (h->value == NULL) {
266 bstr_free(h->name);
267 return HTP_ERROR;
268 }
269
270 return HTP_OK;
271 }
272
273 /**
274 * Generic request line parser.
275 *
276 * @param[in] connp
277 * @return HTP_OK or HTP_ERROR
278 */
htp_parse_request_line_generic(htp_connp_t * connp)279 htp_status_t htp_parse_request_line_generic(htp_connp_t *connp) {
280 return htp_parse_request_line_generic_ex(connp, 0 /* NUL does not terminates line */);
281 }
282
htp_parse_request_line_generic_ex(htp_connp_t * connp,int nul_terminates)283 htp_status_t htp_parse_request_line_generic_ex(htp_connp_t *connp, int nul_terminates) {
284 htp_tx_t *tx = connp->in_tx;
285 unsigned char *data = bstr_ptr(tx->request_line);
286 size_t len = bstr_len(tx->request_line);
287 size_t pos = 0;
288 size_t mstart = 0;
289 size_t start;
290 size_t bad_delim;
291
292 if (nul_terminates) {
293 // The line ends with the first NUL byte.
294
295 size_t newlen = 0;
296 while ((pos < len) && (data[pos] != '\0')) {
297 pos++;
298 newlen++;
299 }
300
301 // Start again, with the new length.
302 len = newlen;
303 pos = 0;
304 }
305
306 // skip past leading whitespace. IIS allows this
307 while ((pos < len) && htp_is_space(data[pos])) pos++;
308 if (pos) {
309 htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: leading whitespace");
310 mstart = pos;
311
312 if (connp->cfg->requestline_leading_whitespace_unwanted != HTP_UNWANTED_IGNORE) {
313 // reset mstart so that we copy the whitespace into the method
314 mstart = 0;
315 // set expected response code to this anomaly
316 tx->response_status_expected_number = connp->cfg->requestline_leading_whitespace_unwanted;
317 }
318 }
319
320 // The request method starts at the beginning of the
321 // line and ends with the first whitespace character.
322 while ((pos < len) && (!htp_is_space(data[pos]))) pos++;
323
324 // No, we don't care if the method is empty.
325
326 tx->request_method = bstr_dup_mem(data + mstart, pos - mstart);
327 if (tx->request_method == NULL) return HTP_ERROR;
328
329 #ifdef HTP_DEBUG
330 fprint_raw_data(stderr, __func__, bstr_ptr(tx->request_method), bstr_len(tx->request_method));
331 #endif
332
333 tx->request_method_number = htp_convert_method_to_number(tx->request_method);
334
335 bad_delim = 0;
336 // Ignore whitespace after request method. The RFC allows
337 // for only one SP, but then suggests any number of SP and HT
338 // should be permitted. Apache uses isspace(), which is even
339 // more permitting, so that's what we use here.
340 while ((pos < len) && (isspace(data[pos]))) {
341 if (!bad_delim && data[pos] != 0x20) {
342 bad_delim++;
343 }
344 pos++;
345 }
346 // Too much performance overhead for fuzzing
347 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
348 if (bad_delim) {
349 htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: non-compliant delimiter between Method and URI");
350 }
351 #endif
352
353 // Is there anything after the request method?
354 if (pos == len) {
355 // No, this looks like a HTTP/0.9 request.
356
357 tx->is_protocol_0_9 = 1;
358 tx->request_protocol_number = HTP_PROTOCOL_0_9;
359 if (tx->request_method_number == HTP_M_UNKNOWN)
360 htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: unknown method only");
361
362 return HTP_OK;
363 }
364
365 start = pos;
366 bad_delim = 0;
367
368 // The URI ends with the first whitespace.
369 while ((pos < len) && (data[pos] != 0x20)) {
370 if (!bad_delim && htp_is_space(data[pos])) {
371 bad_delim++;
372 }
373 pos++;
374 }
375 /* if we've seen some 'bad' delimiters, we retry with those */
376 if (bad_delim && pos == len) {
377 // special case: even though RFC's allow only SP (0x20), many
378 // implementations allow other delimiters, like tab or other
379 // characters that isspace() accepts.
380 pos = start;
381 while ((pos < len) && (!htp_is_space(data[pos]))) pos++;
382 }
383 // Too much performance overhead for fuzzing
384 #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
385 if (bad_delim) {
386 // warn regardless if we've seen non-compliant chars
387 htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: URI contains non-compliant delimiter");
388 }
389 #endif
390
391 tx->request_uri = bstr_dup_mem(data + start, pos - start);
392 if (tx->request_uri == NULL) return HTP_ERROR;
393
394 #ifdef HTP_DEBUG
395 fprint_raw_data(stderr, __func__, bstr_ptr(tx->request_uri), bstr_len(tx->request_uri));
396 #endif
397
398 // Ignore whitespace after URI.
399 while ((pos < len) && (htp_is_space(data[pos]))) pos++;
400
401 // Is there protocol information available?
402 if (pos == len) {
403 // No, this looks like a HTTP/0.9 request.
404
405 tx->is_protocol_0_9 = 1;
406 tx->request_protocol_number = HTP_PROTOCOL_0_9;
407 if (tx->request_method_number == HTP_M_UNKNOWN)
408 htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: unknown method and no protocol");
409
410 return HTP_OK;
411 }
412
413 // The protocol information continues until the end of the line.
414 tx->request_protocol = bstr_dup_mem(data + pos, len - pos);
415 if (tx->request_protocol == NULL) return HTP_ERROR;
416
417 tx->request_protocol_number = htp_parse_protocol(tx->request_protocol);
418 if (tx->request_method_number == HTP_M_UNKNOWN && tx->request_protocol_number == HTP_PROTOCOL_INVALID)
419 htp_log(connp, HTP_LOG_MARK, HTP_LOG_WARNING, 0, "Request line: unknown method and invalid protocol");
420
421 #ifdef HTP_DEBUG
422 fprint_raw_data(stderr, __func__, bstr_ptr(tx->request_protocol), bstr_len(tx->request_protocol));
423 #endif
424
425 return HTP_OK;
426 }
427
428