1 //--------------------------------------------------------------------------
2 // Copyright (C) 2014-2021 Cisco and/or its affiliates. All rights reserved.
3 //
4 // This program is free software; you can redistribute it and/or modify it
5 // under the terms of the GNU General Public License Version 2 as published
6 // by the Free Software Foundation. You may not use, modify or distribute
7 // this program under any other version of the GNU General Public License.
8 //
9 // This program is distributed in the hope that it will be useful, but
10 // WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 // General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License along
15 // with this program; if not, write to the Free Software Foundation, Inc.,
16 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 //--------------------------------------------------------------------------
18 // http_cutter.cc author Tom Peters <thopeter@cisco.com>
19
20 #ifdef HAVE_CONFIG_H
21 #include "config.h"
22 #endif
23
24 #include "http_cutter.h"
25
26 #include "http_common.h"
27 #include "http_enum.h"
28 #include "http_flow_data.h"
29 #include "http_module.h"
30
31 using namespace HttpEnums;
32
cut(const uint8_t * buffer,uint32_t length,HttpInfractions * infractions,HttpEventGen * events,uint32_t,bool,HttpEnums::H2BodyState)33 ScanResult HttpStartCutter::cut(const uint8_t* buffer, uint32_t length,
34 HttpInfractions* infractions, HttpEventGen* events, uint32_t, bool, HttpEnums::H2BodyState)
35 {
36 for (uint32_t k = 0; k < length; k++)
37 {
38 // Discard magic six white space characters CR, LF, Tab, VT, FF, and SP when they occur
39 // before the start line.
40 // If we have seen nothing but white space so far ...
41 if (num_crlf == octets_seen + k)
42 {
43 if (is_sp_tab_cr_lf_vt_ff[buffer[k]])
44 {
45 if (!is_cr_lf[buffer[k]])
46 {
47 // tab, VT, FF, or space between messages
48 *infractions += INF_WS_BETWEEN_MSGS;
49 events->create_event(EVENT_WS_BETWEEN_MSGS);
50 }
51 if (num_crlf < MAX_LEADING_WHITESPACE)
52 {
53 num_crlf++;
54 continue;
55 }
56 else
57 {
58 *infractions += INF_TOO_MUCH_LEADING_WS;
59 events->generate_misformatted_http(buffer, length);
60 return SCAN_ABORT;
61 }
62 }
63 if (num_crlf > 0)
64 {
65 num_flush = k; // current octet not flushed with white space
66 return SCAN_DISCARD;
67 }
68 }
69
70 // If we get this far then the leading white space issue is behind us and num_crlf was
71 // reset to zero
72 if (!validated)
73 {
74 // The purpose of validate() is to quickly and efficiently dispose of obviously wrong
75 // bindings. Passing is no guarantee that the connection is really HTTP, but failing
76 // makes it clear that it isn't.
77 switch (validate(buffer[k], infractions, events))
78 {
79 case V_GOOD:
80 validated = true;
81 break;
82 case V_BAD:
83 *infractions += INF_NOT_HTTP;
84 events->generate_misformatted_http(buffer, length);
85 return SCAN_ABORT;
86 case V_TBD:
87 break;
88 }
89 }
90 if (buffer[k] == '\n')
91 {
92 num_crlf++;
93 if (num_crlf == 1)
94 {
95 // There was no CR before this
96 *infractions += INF_LF_WITHOUT_CR;
97 events->create_event(EVENT_LF_WITHOUT_CR);
98 }
99 num_flush = k+1;
100 return SCAN_FOUND;
101 }
102 if (num_crlf == 1)
103 { // CR not followed by LF
104 *infractions += INF_CR_WITHOUT_LF;
105 events->create_event(EVENT_CR_WITHOUT_LF);
106 num_flush = k; // current octet not flushed
107 return SCAN_FOUND;
108 }
109 if (buffer[k] == '\r')
110 {
111 num_crlf = 1;
112 }
113 }
114 octets_seen += length;
115 return SCAN_NOT_FOUND;
116 }
117
validate(uint8_t octet,HttpInfractions * infractions,HttpEventGen *)118 HttpStartCutter::ValidationResult HttpRequestCutter::validate(uint8_t octet,
119 HttpInfractions* infractions, HttpEventGen*)
120 {
121 // Request line must begin with a method. There is no list of all possible methods because
122 // extension is allowed, so there is no absolute way to tell whether something is a method.
123 // Instead we verify that all its characters are drawn from the RFC list of valid token
124 // characters, that it is followed by a whitespace character, and that it is at most 80
125 // characters long. There is nothing special or specified about 80. It is just more than any
126 // reasonable method name would be. Additionally we check for the first 16 bytes of the HTTP/2
127 // connection preface, which would otherwise pass the aforementioned check.
128
129 static const int max_method_length = 80;
130 static const int preface_len = 16;
131 static const int h1_test_len_in_preface = 4;
132 static const uint8_t h2_connection_preface[] = { 'P', 'R', 'I', ' ', '*', ' ', 'H', 'T', 'T',
133 'P', '/', '2', '.', '0', '\r', '\n' };
134
135 if (check_h2)
136 {
137 if (octet == h2_connection_preface[octets_checked])
138 {
139 octets_checked++;
140 if (octets_checked >= preface_len)
141 {
142 *infractions += INF_HTTP2_IN_HI;
143 return V_BAD;
144 }
145 return V_TBD;
146 }
147 else
148 {
149 if (octets_checked >= h1_test_len_in_preface)
150 return V_GOOD;
151 check_h2 = false;
152 }
153 }
154 if ((octet == ' ') || (octet == '\t'))
155 return V_GOOD;
156 if (!token_char[octet] || ++octets_checked > max_method_length)
157 return V_BAD;
158 return V_TBD;
159 }
160
validate(uint8_t octet,HttpInfractions * infractions,HttpEventGen * events)161 HttpStartCutter::ValidationResult HttpStatusCutter::validate(uint8_t octet,
162 HttpInfractions* infractions, HttpEventGen* events)
163 {
164 // Status line must begin "HTTP/"
165 static const int match_size = 5;
166 static const uint8_t primary_match[match_size] = { 'H', 'T', 'T', 'P', '/' };
167 static const uint8_t secondary_match[match_size] = { 'h', 't', 't', 'p', '/' };
168
169 if (octet != primary_match[octets_checked])
170 {
171 if (octet == secondary_match[octets_checked])
172 {
173 // Lower case is wrong but we can still parse the message
174 *infractions += INF_VERSION_NOT_UPPERCASE;
175 events->create_event(EVENT_VERSION_NOT_UPPERCASE);
176 }
177 else
178 return V_BAD;
179 }
180 if (++octets_checked >= match_size)
181 return V_GOOD;
182 return V_TBD;
183 }
184
cut(const uint8_t * buffer,uint32_t length,HttpInfractions * infractions,HttpEventGen * events,uint32_t,bool,HttpEnums::H2BodyState)185 ScanResult HttpHeaderCutter::cut(const uint8_t* buffer, uint32_t length,
186 HttpInfractions* infractions, HttpEventGen* events, uint32_t, bool, HttpEnums::H2BodyState)
187 {
188 // Header separators: leading \r\n, leading \n, nonleading \r\n\r\n, nonleading \n\r\n,
189 // nonleading \r\n\n, and nonleading \n\n. The separator itself becomes num_excess which is
190 // discarded during reassemble().
191 // \r without \n can (improperly) end the start line or a header line, but not the entire
192 // header block.
193 for (uint32_t k = 0; k < length; k++)
194 {
195 switch (state)
196 {
197 case ZERO:
198 if (buffer[k] == '\r')
199 {
200 state = HALF;
201 num_crlf++;
202 }
203 else if (buffer[k] == '\n')
204 {
205 *infractions += INF_LF_WITHOUT_CR;
206 events->create_event(EVENT_LF_WITHOUT_CR);
207 state = ONE;
208 num_crlf++;
209 }
210 break;
211 case HALF:
212 if (buffer[k] == '\r')
213 {
214 *infractions += INF_CR_WITHOUT_LF;
215 events->create_event(EVENT_CR_WITHOUT_LF);
216 state = THREEHALF;
217 num_crlf++;
218 }
219 else if (buffer[k] == '\n')
220 {
221 state = ONE;
222 num_crlf++;
223 }
224 else
225 {
226 *infractions += INF_CR_WITHOUT_LF;
227 events->create_event(EVENT_CR_WITHOUT_LF);
228 state = ZERO;
229 num_crlf = 0;
230 num_head_lines++;
231 }
232 break;
233 case ONE:
234 if (buffer[k] == '\r')
235 {
236 state = THREEHALF;
237 num_crlf++;
238 }
239 else if (buffer[k] == '\n')
240 {
241 *infractions += INF_LF_WITHOUT_CR;
242 events->create_event(EVENT_LF_WITHOUT_CR);
243 num_crlf++;
244 num_flush = k + 1;
245 return SCAN_FOUND;
246 }
247 else
248 {
249 state = ZERO;
250 num_crlf = 0;
251 num_head_lines++;
252 }
253 break;
254 case THREEHALF:
255 if (buffer[k] == '\r')
256 {
257 *infractions += INF_CR_WITHOUT_LF;
258 events->create_event(EVENT_CR_WITHOUT_LF);
259 num_crlf++;
260 }
261 else if (buffer[k] == '\n')
262 {
263 num_crlf++;
264 num_flush = k + 1;
265 return SCAN_FOUND;
266 }
267 else
268 {
269 *infractions += INF_CR_WITHOUT_LF;
270 events->create_event(EVENT_CR_WITHOUT_LF);
271 state = ZERO;
272 num_crlf = 0;
273 num_head_lines++;
274 }
275 break;
276 }
277 }
278 octets_seen += length;
279 return SCAN_NOT_FOUND;
280 }
281
HttpBodyCutter(bool accelerated_blocking_,ScriptFinder * finder_,CompressId compression_)282 HttpBodyCutter::HttpBodyCutter(bool accelerated_blocking_, ScriptFinder* finder_,
283 CompressId compression_)
284 : accelerated_blocking(accelerated_blocking_), compression(compression_), finder(finder_)
285 {
286 if (accelerated_blocking)
287 {
288 if ((compression == CMP_GZIP) || (compression == CMP_DEFLATE))
289 {
290 compress_stream = new z_stream;
291 compress_stream->zalloc = Z_NULL;
292 compress_stream->zfree = Z_NULL;
293 compress_stream->next_in = Z_NULL;
294 compress_stream->avail_in = 0;
295 const int window_bits = (compression == CMP_GZIP) ?
296 GZIP_WINDOW_BITS : DEFLATE_WINDOW_BITS;
297 if (inflateInit2(compress_stream, window_bits) != Z_OK)
298 {
299 assert(false);
300 compression = CMP_NONE;
301 delete compress_stream;
302 compress_stream = nullptr;
303 }
304 }
305
306 static const uint8_t inspect_string[] = { '<', '/', 's', 'c', 'r', 'i', 'p', 't', '>' };
307 static const uint8_t inspect_upper[] = { '<', '/', 'S', 'C', 'R', 'I', 'P', 'T', '>' };
308
309 match_string = inspect_string;
310 match_string_upper = inspect_upper;
311 string_length = sizeof(inspect_string);
312 }
313 }
314
~HttpBodyCutter()315 HttpBodyCutter::~HttpBodyCutter()
316 {
317 if (compress_stream != nullptr)
318 {
319 inflateEnd(compress_stream);
320 delete compress_stream;
321 }
322 }
323
cut(const uint8_t * buffer,uint32_t length,HttpInfractions *,HttpEventGen *,uint32_t flow_target,bool stretch,HttpEnums::H2BodyState)324 ScanResult HttpBodyClCutter::cut(const uint8_t* buffer, uint32_t length, HttpInfractions*,
325 HttpEventGen*, uint32_t flow_target, bool stretch, HttpEnums::H2BodyState)
326 {
327 assert(remaining > octets_seen);
328
329 // Are we skipping to the next message?
330 if (flow_target == 0)
331 {
332 if (remaining <= length)
333 {
334 num_flush = remaining;
335 remaining = 0;
336 return SCAN_DISCARD;
337 }
338 else
339 {
340 num_flush = length;
341 remaining -= num_flush;
342 return SCAN_DISCARD_PIECE;
343 }
344 }
345
346 // A target that is bigger than the entire rest of the message body makes no sense
347 if (remaining <= flow_target)
348 {
349 flow_target = remaining;
350 stretch = false;
351 }
352
353 if (octets_seen + length < flow_target)
354 {
355 octets_seen += length;
356 return need_accelerated_blocking(buffer, length) ?
357 SCAN_NOT_FOUND_ACCELERATE : SCAN_NOT_FOUND;
358 }
359
360 if (!stretch)
361 {
362 remaining -= flow_target;
363 num_flush = flow_target - octets_seen;
364 if (remaining > 0)
365 {
366 need_accelerated_blocking(buffer, num_flush);
367 return SCAN_FOUND_PIECE;
368 }
369 else
370 return SCAN_FOUND;
371 }
372
373 if (octets_seen + length < remaining)
374 {
375 // The message body continues beyond this segment
376 // Stretch the section to include this entire segment provided it is not too big
377 if (octets_seen + length <= flow_target + MAX_SECTION_STRETCH)
378 num_flush = length;
379 else
380 num_flush = flow_target - octets_seen;
381 remaining -= octets_seen + num_flush;
382 need_accelerated_blocking(buffer, num_flush);
383 return SCAN_FOUND_PIECE;
384 }
385
386 if (remaining - flow_target <= MAX_SECTION_STRETCH)
387 {
388 // Stretch the section to finish the message body
389 num_flush = remaining - octets_seen;
390 remaining = 0;
391 return SCAN_FOUND;
392 }
393
394 // Cannot stretch to the end of the message body. Cut at the original target.
395 num_flush = flow_target - octets_seen;
396 remaining -= flow_target;
397 need_accelerated_blocking(buffer, num_flush);
398 return SCAN_FOUND_PIECE;
399 }
400
cut(const uint8_t * buffer,uint32_t length,HttpInfractions *,HttpEventGen *,uint32_t flow_target,bool stretch,HttpEnums::H2BodyState)401 ScanResult HttpBodyOldCutter::cut(const uint8_t* buffer, uint32_t length, HttpInfractions*,
402 HttpEventGen*, uint32_t flow_target, bool stretch, HttpEnums::H2BodyState)
403 {
404 if (flow_target == 0)
405 {
406 // FIXIT-P Need StreamSplitter::END
407 // With other types of body we would skip to the trailers and/or next message now. But this
408 // will run to connection close so we should just stop processing this flow. But there is
409 // no way to ask stream to do that so we must skip through the rest of the message
410 // ourselves.
411 num_flush = length;
412 return SCAN_DISCARD_PIECE;
413 }
414
415 if (octets_seen + length < flow_target)
416 {
417 // Not enough data yet to create a message section
418 octets_seen += length;
419 return need_accelerated_blocking(buffer, length) ?
420 SCAN_NOT_FOUND_ACCELERATE : SCAN_NOT_FOUND;
421 }
422 else if (stretch && (octets_seen + length <= flow_target + MAX_SECTION_STRETCH))
423 {
424 // Cut the section at the end of this TCP segment to avoid splitting a packet
425 num_flush = length;
426 need_accelerated_blocking(buffer, num_flush);
427 return SCAN_FOUND_PIECE;
428 }
429 else
430 {
431 // Cut the section at the target length. Either stretching is not allowed or the end of
432 // the segment is too far away.
433 num_flush = flow_target - octets_seen;
434 need_accelerated_blocking(buffer, num_flush);
435 return SCAN_FOUND_PIECE;
436 }
437 }
438
transition_to_chunk_bad(bool & accelerate_this_packet)439 void HttpBodyChunkCutter::transition_to_chunk_bad(bool& accelerate_this_packet)
440 {
441 curr_state = CHUNK_BAD;
442 accelerate_this_packet = true;
443 zero_chunk = false;
444 }
445
cut(const uint8_t * buffer,uint32_t length,HttpInfractions * infractions,HttpEventGen * events,uint32_t flow_target,bool stretch,HttpEnums::H2BodyState)446 ScanResult HttpBodyChunkCutter::cut(const uint8_t* buffer, uint32_t length,
447 HttpInfractions* infractions, HttpEventGen* events, uint32_t flow_target, bool stretch,
448 HttpEnums::H2BodyState)
449 {
450 // Are we skipping through the rest of this chunked body to the trailers and the next message?
451 const bool discard_mode = (flow_target == 0);
452
453 const uint32_t adjusted_target = stretch ? MAX_SECTION_STRETCH + flow_target : flow_target;
454
455 bool accelerate_this_packet = false;
456
457 for (int32_t k=0; k < static_cast<int32_t>(length); k++)
458 {
459 switch (curr_state)
460 {
461 case CHUNK_NEWLINES:
462 zero_chunk = true;
463 // Looking for improper CRLFs before the chunk header
464 if (is_cr_lf[buffer[k]])
465 {
466 *infractions += INF_CHUNK_BAD_SEP;
467 events->create_event(EVENT_CHUNK_BAD_SEP);
468 break;
469 }
470 curr_state = CHUNK_LEADING_WS;
471 k--; // Reprocess this octet in the next state
472 break;
473 case CHUNK_LEADING_WS:
474 // Looking for whitespace before the chunk size
475 if (is_sp_tab[buffer[k]])
476 {
477 *infractions += INF_CHUNK_LEADING_WS;
478 events->create_event(EVENT_CHUNK_WHITESPACE);
479 num_leading_ws++;
480 if (num_leading_ws == 5)
481 {
482 events->create_event(EVENT_BROKEN_CHUNK);
483 transition_to_chunk_bad(accelerate_this_packet);
484 k--;
485 }
486 break;
487 }
488 curr_state = CHUNK_ZEROS;
489 k--;
490 break;
491 case CHUNK_ZEROS:
492 // Looking for leading zeros in the chunk size.
493 if (buffer[k] == '0')
494 {
495 num_zeros++;
496 if (num_zeros == 5)
497 {
498 *infractions += INF_CHUNK_ZEROS;
499 events->create_event(EVENT_CHUNK_ZEROS);
500 }
501 break;
502 }
503 curr_state = CHUNK_NUMBER;
504 k--;
505 break;
506 case CHUNK_NUMBER:
507 // Reading the chunk size
508 if (buffer[k] == '\r')
509 {
510 curr_state = CHUNK_HCRLF;
511 }
512 else if (buffer[k] == '\n')
513 {
514 *infractions += INF_CHUNK_BARE_LF;
515 events->create_event(EVENT_CHUNK_BARE_LF);
516 curr_state = CHUNK_HCRLF;
517 k--;
518 }
519 else if (is_sp_tab[buffer[k]])
520 {
521 *infractions += INF_CHUNK_WHITESPACE;
522 events->create_event(EVENT_CHUNK_WHITESPACE);
523 curr_state = CHUNK_TRAILING_WS;
524 }
525 else if (buffer[k] == ';')
526 {
527 *infractions += INF_CHUNK_OPTIONS;
528 events->create_event(EVENT_CHUNK_OPTIONS);
529 curr_state = CHUNK_OPTIONS;
530 }
531 else if (as_hex[buffer[k]] == -1)
532 {
533 // illegal character present in chunk length
534 *infractions += INF_CHUNK_BAD_CHAR;
535 events->create_event(EVENT_BROKEN_CHUNK);
536 transition_to_chunk_bad(accelerate_this_packet);
537 k--;
538 }
539 else
540 {
541 expected = expected * 16 + as_hex[buffer[k]];
542 if ((++digits_seen > 8) || (expected > maximum_chunk_length))
543 {
544 // alert for exceeding configurable limit
545 *infractions += INF_CHUNK_OVER_MAXIMUM;
546 events->create_event(EVENT_LARGE_CHUNK);
547 if (digits_seen > 8)
548 {
549 // overflow protection: absolutely must fit into 32 bits
550 *infractions += INF_CHUNK_TOO_LARGE;
551 events->create_event(EVENT_BROKEN_CHUNK);
552 transition_to_chunk_bad(accelerate_this_packet);
553 k--;
554 }
555 }
556 if (expected != 0)
557 zero_chunk = false;
558 }
559 break;
560 case CHUNK_TRAILING_WS:
561 // Skipping over improper whitespace following the chunk size
562 if (buffer[k] == '\r')
563 {
564 curr_state = CHUNK_HCRLF;
565 }
566 else if (buffer[k] == '\n')
567 {
568 *infractions += INF_CHUNK_BARE_LF;
569 events->create_event(EVENT_CHUNK_BARE_LF);
570 curr_state = CHUNK_HCRLF;
571 k--;
572 }
573 else if (buffer[k] == ';')
574 {
575 *infractions += INF_CHUNK_OPTIONS;
576 events->create_event(EVENT_CHUNK_OPTIONS);
577 curr_state = CHUNK_OPTIONS;
578 }
579 else if (!is_sp_tab[buffer[k]])
580 {
581 // illegal character present in chunk length
582 *infractions += INF_CHUNK_BAD_CHAR;
583 events->create_event(EVENT_BROKEN_CHUNK);
584 transition_to_chunk_bad(accelerate_this_packet);
585 k--;
586 }
587 break;
588 case CHUNK_OPTIONS:
589 // The RFC permits options to follow the chunk size. No one normally does this.
590 if (buffer[k] == '\r')
591 {
592 curr_state = CHUNK_HCRLF;
593 }
594 else if (buffer[k] == '\n')
595 {
596 *infractions += INF_CHUNK_BARE_LF;
597 events->create_event(EVENT_CHUNK_BARE_LF);
598 curr_state = CHUNK_HCRLF;
599 k--;
600 }
601 break;
602 case CHUNK_HCRLF:
603 // The chunk header should end in CRLF and this should be the LF
604 if (buffer[k] != '\n')
605 {
606 // This is qualitatively different from similar bare CR issues because it doesn't
607 // provide a transparent data channel. A recipient is much less likely to implement
608 // tolerance for this irregularity because a chunk that begins with LF is
609 // ambiguous.
610 *infractions += INF_CHUNK_LONE_CR;
611 events->create_event(EVENT_BROKEN_CHUNK);
612 transition_to_chunk_bad(accelerate_this_packet);
613 k--;
614 break;
615 }
616 if (expected > 0)
617 {
618 curr_state = CHUNK_DATA;
619 }
620 else if (num_zeros > 0)
621 {
622 // Terminating zero-length chunk
623 num_good_chunks++;
624 num_flush = k+1;
625 return !discard_mode ? SCAN_FOUND : SCAN_DISCARD;
626 }
627 else
628 {
629 *infractions += INF_CHUNK_NO_LENGTH;
630 events->create_event(EVENT_BROKEN_CHUNK);
631 transition_to_chunk_bad(accelerate_this_packet);
632 k--;
633 }
634 break;
635 case CHUNK_DATA:
636 // Moving through the chunk data
637 {
638 uint32_t skip_amount = (length-k <= expected) ? length-k : expected;
639 if (!discard_mode && (skip_amount > adjusted_target-data_seen))
640 { // Do not exceed requested section size (including stretching)
641 skip_amount = adjusted_target-data_seen;
642 }
643
644 accelerate_this_packet = need_accelerated_blocking(buffer+k, skip_amount) ||
645 accelerate_this_packet;
646
647 k += skip_amount - 1;
648 if ((expected -= skip_amount) == 0)
649 {
650 curr_state = CHUNK_DCRLF1;
651 }
652 if ((data_seen += skip_amount) == adjusted_target)
653 {
654 data_seen = 0;
655 num_flush = k+1;
656 return SCAN_FOUND_PIECE;
657 }
658 break;
659 }
660 case CHUNK_DCRLF1:
661 // The CR from the end-of-chunk CRLF should be here
662 if (buffer[k] == '\r')
663 {
664 curr_state = CHUNK_DCRLF2;
665 }
666 else if (buffer[k] == '\n')
667 {
668 *infractions += INF_CHUNK_BAD_SEP;
669 events->create_event(EVENT_CHUNK_BAD_SEP);
670 curr_state = CHUNK_DCRLF2;
671 k--;
672 }
673 else
674 {
675 *infractions += INF_CHUNK_BAD_END;
676 events->create_event(EVENT_BROKEN_CHUNK);
677 transition_to_chunk_bad(accelerate_this_packet);
678 k--;
679 }
680 break;
681 case CHUNK_DCRLF2:
682 // The LF from the end-of-chunk CRLF should be here
683 num_good_chunks++;
684 num_leading_ws = 0;
685 num_zeros = 0;
686 expected = 0;
687 digits_seen = 0;
688 curr_state = CHUNK_NEWLINES;
689 if (buffer[k] == '\n')
690 break;
691 *infractions += INF_CHUNK_BAD_SEP;
692 events->create_event(EVENT_CHUNK_BAD_SEP);
693 if (buffer[k] != '\r')
694 k--;
695 break;
696 case CHUNK_BAD:
697 // Chunk reassembly has failed. This is a terminal state but inspection of the body
698 // must go on.
699 // If we are skipping to the trailers and next message the broken chunk thwarts us
700 if (discard_mode)
701 {
702 // FIXIT-P Need StreamSplitter::END
703 // With the broken chunk this will run to connection close so we should just stop
704 // processing this flow. But there is no way to ask stream to do that so we must
705 // skip through the rest of the message ourselves.
706 num_flush = length;
707 return SCAN_DISCARD_PIECE;
708 }
709
710 // When chunk parsing breaks down and we first enter CHUNK_BAD state, it may happen
711 // that there were chunk header bytes between the last good chunk and the point where
712 // the failure occurred. These will not have been counted in data_seen because we
713 // planned to delete them during reassembly. Because they are not part of a valid chunk
714 // they will be reassembled after all. This will overrun the adjusted_target making the
715 // message section a little bigger than planned. It's not important.
716 uint32_t skip_amount = length-k;
717 skip_amount = (skip_amount <= adjusted_target-data_seen) ? skip_amount :
718 adjusted_target-data_seen;
719 accelerate_this_packet = need_accelerated_blocking(buffer+k, skip_amount) ||
720 accelerate_this_packet;
721 k += skip_amount - 1;
722 if ((data_seen += skip_amount) == adjusted_target)
723 {
724 data_seen = 0;
725 num_flush = k+1;
726 return SCAN_FOUND_PIECE;
727 }
728 break;
729 }
730 }
731 if (discard_mode)
732 {
733 num_flush = length;
734 return SCAN_DISCARD_PIECE;
735 }
736
737 if (data_seen >= flow_target)
738 {
739 // We passed the flow_target and stretched to the end of the segment
740 data_seen = 0;
741 num_flush = length;
742 return SCAN_FOUND_PIECE;
743 }
744
745 octets_seen += length;
746
747 if (accelerate_this_packet || (zero_chunk && data_seen))
748 return SCAN_NOT_FOUND_ACCELERATE;
749
750 return SCAN_NOT_FOUND;
751 }
752
cut(const uint8_t * buffer,uint32_t length,HttpInfractions * infractions,HttpEventGen * events,uint32_t flow_target,bool stretch,H2BodyState state)753 ScanResult HttpBodyH2Cutter::cut(const uint8_t* buffer, uint32_t length,
754 HttpInfractions* infractions, HttpEventGen* events, uint32_t flow_target, bool stretch,
755 H2BodyState state)
756 {
757 // If the headers included a content length header (expected length >= 0), check it against the
758 // actual message body length. Alert if it does not match at the end of the message body or if
759 // it overflows during the body (alert once then stop computing).
760 if (expected_body_length >= 0)
761 {
762 if ((total_octets_scanned + length) > expected_body_length)
763 {
764 *infractions += INF_H2_DATA_OVERRUNS_CL;
765 events->create_event(EVENT_H2_DATA_OVERRUNS_CL);
766 expected_body_length = HttpCommon::STAT_NOT_COMPUTE;
767 }
768 else if (state != H2_BODY_NOT_COMPLETE and
769 ((total_octets_scanned + length) < expected_body_length))
770 {
771 *infractions += INF_H2_DATA_UNDERRUNS_CL;
772 events->create_event(EVENT_H2_DATA_UNDERRUNS_CL);
773 }
774 }
775
776 if (flow_target == 0)
777 {
778 num_flush = length;
779 total_octets_scanned += length;
780 if (state != H2_BODY_NOT_COMPLETE)
781 return SCAN_DISCARD;
782
783 return SCAN_DISCARD_PIECE;
784 }
785
786 if (state == H2_BODY_NOT_COMPLETE)
787 {
788 if (octets_seen + length < flow_target)
789 {
790 // Not enough data yet to create a message section
791 octets_seen += length;
792 total_octets_scanned += length;
793 return need_accelerated_blocking(buffer, length) ?
794 SCAN_NOT_FOUND_ACCELERATE : SCAN_NOT_FOUND;
795 }
796 else
797 {
798 if (stretch && (octets_seen + length <= flow_target + MAX_SECTION_STRETCH))
799 num_flush = length;
800 else
801 num_flush = flow_target - octets_seen;
802 total_octets_scanned += num_flush;
803 need_accelerated_blocking(buffer, num_flush);
804 return SCAN_FOUND_PIECE;
805 }
806 }
807 else if (state == H2_BODY_LAST_SEG)
808 {
809 const uint32_t adjusted_target = stretch ? MAX_SECTION_STRETCH + flow_target : flow_target;
810 if (octets_seen + length <= adjusted_target)
811 num_flush = length;
812 else
813 num_flush = flow_target - octets_seen;
814
815 total_octets_scanned += num_flush;
816 if (num_flush == length)
817 return SCAN_FOUND;
818 else
819 return SCAN_FOUND_PIECE;
820 }
821 else
822 {
823 // To end message body when trailers are received or a 0 length data frame with
824 // end of stream set is received, a zero-length buffer is sent to flush
825 assert(length == 0);
826 num_flush = 0;
827 return SCAN_FOUND;
828 }
829 }
830
831 // This method searches the input stream looking for a script or other dangerous content that
832 // requires script detection. Exactly what we are looking for is encapsulated in dangerous().
833 //
834 // Return value true indicates a match and enables the packet that completes the matching sequence
835 // to be sent for partial inspection.
836 //
837 // Any attempt to optimize this code should be mindful that once you skip any part of the message
838 // body, dangerous() loses the ability to unzip subsequent data.
839
need_accelerated_blocking(const uint8_t * data,uint32_t length)840 bool HttpBodyCutter::need_accelerated_blocking(const uint8_t* data, uint32_t length)
841 {
842 const bool need_accelerated_blocking = accelerated_blocking && dangerous(data, length);
843 if (need_accelerated_blocking)
844 HttpModule::increment_peg_counts(PEG_SCRIPT_DETECTION);
845 return need_accelerated_blocking;
846 }
847
find_partial(const uint8_t * input_buf,uint32_t input_length,bool end)848 bool HttpBodyCutter::find_partial(const uint8_t* input_buf, uint32_t input_length, bool end)
849 {
850 for (uint32_t k = 0; k < input_length; k++)
851 {
852 // partial_match is persistent, enabling matches that cross data boundaries
853 if ((input_buf[k] == match_string[partial_match]) ||
854 (input_buf[k] == match_string_upper[partial_match]))
855 {
856 if (++partial_match == string_length)
857 {
858 partial_match = 0;
859 return true;
860 }
861 }
862 else
863 {
864 partial_match = 0;
865 if ( end )
866 return false;
867 }
868 }
869 return false;
870 }
871
872 // Currently we do accelerated blocking when we see a javascript
dangerous(const uint8_t * data,uint32_t length)873 bool HttpBodyCutter::dangerous(const uint8_t* data, uint32_t length)
874 {
875 const uint8_t* input_buf = data;
876 uint32_t input_length = length;
877 uint8_t* decomp_output = nullptr;
878
879 // Zipped flows must be decompressed before we can check them. Unzipping for accelerated
880 // blocking is completely separate from the unzipping done later in reassemble().
881 if ((compression == CMP_GZIP) || (compression == CMP_DEFLATE))
882 {
883 // Previous decompression failures make it impossible to search for scripts
884 if (decompress_failed)
885 return true;
886
887 const uint32_t decomp_buffer_size = MAX_OCTETS;
888 decomp_output = new uint8_t[decomp_buffer_size];
889
890 compress_stream->next_in = const_cast<Bytef*>(data);
891 compress_stream->avail_in = length;
892 compress_stream->next_out = decomp_output;
893 compress_stream->avail_out = decomp_buffer_size;
894
895 int ret_val = inflate(compress_stream, Z_SYNC_FLUSH);
896
897 // Not going to be subtle about this and try to fix decompression problems. If it doesn't
898 // work out we assume it could be dangerous.
899 if (((ret_val != Z_OK) && (ret_val != Z_STREAM_END)) || (compress_stream->avail_in > 0))
900 {
901 decompress_failed = true;
902 delete[] decomp_output;
903 return true;
904 }
905
906 input_buf = decomp_output;
907 input_length = decomp_buffer_size - compress_stream->avail_out;
908 }
909
910 std::unique_ptr<uint8_t[]> uniq(decomp_output);
911
912 if ( input_length > string_length )
913 {
914 if ( partial_match and find_partial(input_buf, input_length, true) )
915 return true;
916
917 if ( finder->search(input_buf, input_length) >= 0 )
918 return true;
919
920 uint32_t delta = input_length - string_length + 1;
921 input_buf += delta;
922 input_length -= delta;
923 }
924
925 if ( find_partial(input_buf, input_length, false) )
926 return true;
927
928 return false;
929 }
930
931