1 //--------------------------------------------------------------------------
2 // Copyright (C) 2014-2021 Cisco and/or its affiliates. All rights reserved.
3 //
4 // This program is free software; you can redistribute it and/or modify it
5 // under the terms of the GNU General Public License Version 2 as published
6 // by the Free Software Foundation.  You may not use, modify or distribute
7 // this program under any other version of the GNU General Public License.
8 //
9 // This program is distributed in the hope that it will be useful, but
10 // WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 // General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License along
15 // with this program; if not, write to the Free Software Foundation, Inc.,
16 // 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17 //--------------------------------------------------------------------------
18 // http_cutter.cc author Tom Peters <thopeter@cisco.com>
19 
20 #ifdef HAVE_CONFIG_H
21 #include "config.h"
22 #endif
23 
24 #include "http_cutter.h"
25 
26 #include "http_common.h"
27 #include "http_enum.h"
28 #include "http_flow_data.h"
29 #include "http_module.h"
30 
31 using namespace HttpEnums;
32 
cut(const uint8_t * buffer,uint32_t length,HttpInfractions * infractions,HttpEventGen * events,uint32_t,bool,HttpEnums::H2BodyState)33 ScanResult HttpStartCutter::cut(const uint8_t* buffer, uint32_t length,
34     HttpInfractions* infractions, HttpEventGen* events, uint32_t, bool, HttpEnums::H2BodyState)
35 {
36     for (uint32_t k = 0; k < length; k++)
37     {
38         // Discard magic six white space characters CR, LF, Tab, VT, FF, and SP when they occur
39         // before the start line.
40         // If we have seen nothing but white space so far ...
41         if (num_crlf == octets_seen + k)
42         {
43             if (is_sp_tab_cr_lf_vt_ff[buffer[k]])
44             {
45                 if (!is_cr_lf[buffer[k]])
46                 {
47                     // tab, VT, FF, or space between messages
48                     *infractions += INF_WS_BETWEEN_MSGS;
49                     events->create_event(EVENT_WS_BETWEEN_MSGS);
50                 }
51                 if (num_crlf < MAX_LEADING_WHITESPACE)
52                 {
53                     num_crlf++;
54                     continue;
55                 }
56                 else
57                 {
58                     *infractions += INF_TOO_MUCH_LEADING_WS;
59                     events->generate_misformatted_http(buffer, length);
60                     return SCAN_ABORT;
61                 }
62             }
63             if (num_crlf > 0)
64             {
65                 num_flush = k;     // current octet not flushed with white space
66                 return SCAN_DISCARD;
67             }
68         }
69 
70         // If we get this far then the leading white space issue is behind us and num_crlf was
71         // reset to zero
72         if (!validated)
73         {
74             // The purpose of validate() is to quickly and efficiently dispose of obviously wrong
75             // bindings. Passing is no guarantee that the connection is really HTTP, but failing
76             // makes it clear that it isn't.
77             switch (validate(buffer[k], infractions, events))
78             {
79             case V_GOOD:
80                 validated = true;
81                 break;
82             case V_BAD:
83                 *infractions += INF_NOT_HTTP;
84                 events->generate_misformatted_http(buffer, length);
85                 return SCAN_ABORT;
86             case V_TBD:
87                 break;
88             }
89         }
90         if (buffer[k] == '\n')
91         {
92             num_crlf++;
93             if (num_crlf == 1)
94             {
95                 // There was no CR before this
96                 *infractions += INF_LF_WITHOUT_CR;
97                 events->create_event(EVENT_LF_WITHOUT_CR);
98             }
99             num_flush = k+1;
100             return SCAN_FOUND;
101         }
102         if (num_crlf == 1)
103         {   // CR not followed by LF
104             *infractions += INF_CR_WITHOUT_LF;
105             events->create_event(EVENT_CR_WITHOUT_LF);
106             num_flush = k;                      // current octet not flushed
107             return SCAN_FOUND;
108         }
109         if (buffer[k] == '\r')
110         {
111             num_crlf = 1;
112         }
113     }
114     octets_seen += length;
115     return SCAN_NOT_FOUND;
116 }
117 
validate(uint8_t octet,HttpInfractions * infractions,HttpEventGen *)118 HttpStartCutter::ValidationResult HttpRequestCutter::validate(uint8_t octet,
119     HttpInfractions* infractions, HttpEventGen*)
120 {
121     // Request line must begin with a method. There is no list of all possible methods because
122     // extension is allowed, so there is no absolute way to tell whether something is a method.
123     // Instead we verify that all its characters are drawn from the RFC list of valid token
124     // characters, that it is followed by a whitespace character, and that it is at most 80
125     // characters long. There is nothing special or specified about 80. It is just more than any
126     // reasonable method name would be. Additionally we check for the first 16 bytes of the HTTP/2
127     // connection preface, which would otherwise pass the aforementioned check.
128 
129     static const int max_method_length = 80;
130     static const int preface_len = 16;
131     static const int h1_test_len_in_preface = 4;
132     static const uint8_t h2_connection_preface[] = { 'P', 'R', 'I', ' ', '*', ' ', 'H', 'T', 'T',
133         'P', '/', '2', '.', '0', '\r', '\n' };
134 
135     if (check_h2)
136     {
137         if (octet == h2_connection_preface[octets_checked])
138         {
139             octets_checked++;
140             if (octets_checked >= preface_len)
141             {
142                 *infractions += INF_HTTP2_IN_HI;
143                 return V_BAD;
144             }
145             return V_TBD;
146         }
147         else
148         {
149             if (octets_checked >= h1_test_len_in_preface)
150                 return V_GOOD;
151             check_h2 = false;
152         }
153     }
154     if ((octet == ' ') || (octet == '\t'))
155         return V_GOOD;
156     if (!token_char[octet] || ++octets_checked > max_method_length)
157         return V_BAD;
158     return V_TBD;
159 }
160 
validate(uint8_t octet,HttpInfractions * infractions,HttpEventGen * events)161 HttpStartCutter::ValidationResult HttpStatusCutter::validate(uint8_t octet,
162     HttpInfractions* infractions, HttpEventGen* events)
163 {
164     // Status line must begin "HTTP/"
165     static const int match_size = 5;
166     static const uint8_t primary_match[match_size] = { 'H', 'T', 'T', 'P', '/' };
167     static const uint8_t secondary_match[match_size] = { 'h', 't', 't', 'p', '/' };
168 
169     if (octet != primary_match[octets_checked])
170     {
171         if (octet == secondary_match[octets_checked])
172         {
173             // Lower case is wrong but we can still parse the message
174             *infractions += INF_VERSION_NOT_UPPERCASE;
175             events->create_event(EVENT_VERSION_NOT_UPPERCASE);
176         }
177         else
178             return V_BAD;
179     }
180     if (++octets_checked >= match_size)
181         return V_GOOD;
182     return V_TBD;
183 }
184 
cut(const uint8_t * buffer,uint32_t length,HttpInfractions * infractions,HttpEventGen * events,uint32_t,bool,HttpEnums::H2BodyState)185 ScanResult HttpHeaderCutter::cut(const uint8_t* buffer, uint32_t length,
186     HttpInfractions* infractions, HttpEventGen* events, uint32_t, bool, HttpEnums::H2BodyState)
187 {
188     // Header separators: leading \r\n, leading \n, nonleading \r\n\r\n, nonleading \n\r\n,
189     // nonleading \r\n\n, and nonleading \n\n. The separator itself becomes num_excess which is
190     // discarded during reassemble().
191     // \r without \n can (improperly) end the start line or a header line, but not the entire
192     // header block.
193     for (uint32_t k = 0; k < length; k++)
194     {
195         switch (state)
196         {
197         case ZERO:
198             if (buffer[k] == '\r')
199             {
200                 state = HALF;
201                 num_crlf++;
202             }
203             else if (buffer[k] == '\n')
204             {
205                 *infractions += INF_LF_WITHOUT_CR;
206                 events->create_event(EVENT_LF_WITHOUT_CR);
207                 state = ONE;
208                 num_crlf++;
209             }
210             break;
211         case HALF:
212             if (buffer[k] == '\r')
213             {
214                 *infractions += INF_CR_WITHOUT_LF;
215                 events->create_event(EVENT_CR_WITHOUT_LF);
216                 state = THREEHALF;
217                 num_crlf++;
218             }
219             else if (buffer[k] == '\n')
220             {
221                 state = ONE;
222                 num_crlf++;
223             }
224             else
225             {
226                 *infractions += INF_CR_WITHOUT_LF;
227                 events->create_event(EVENT_CR_WITHOUT_LF);
228                 state = ZERO;
229                 num_crlf = 0;
230                 num_head_lines++;
231             }
232             break;
233         case ONE:
234             if (buffer[k] == '\r')
235             {
236                 state = THREEHALF;
237                 num_crlf++;
238             }
239             else if (buffer[k] == '\n')
240             {
241                 *infractions += INF_LF_WITHOUT_CR;
242                 events->create_event(EVENT_LF_WITHOUT_CR);
243                 num_crlf++;
244                 num_flush = k + 1;
245                 return SCAN_FOUND;
246             }
247             else
248             {
249                 state = ZERO;
250                 num_crlf = 0;
251                 num_head_lines++;
252             }
253             break;
254         case THREEHALF:
255             if (buffer[k] == '\r')
256             {
257                 *infractions += INF_CR_WITHOUT_LF;
258                 events->create_event(EVENT_CR_WITHOUT_LF);
259                 num_crlf++;
260             }
261             else if (buffer[k] == '\n')
262             {
263                 num_crlf++;
264                 num_flush = k + 1;
265                 return SCAN_FOUND;
266             }
267             else
268             {
269                 *infractions += INF_CR_WITHOUT_LF;
270                 events->create_event(EVENT_CR_WITHOUT_LF);
271                 state = ZERO;
272                 num_crlf = 0;
273                 num_head_lines++;
274             }
275             break;
276         }
277     }
278     octets_seen += length;
279     return SCAN_NOT_FOUND;
280 }
281 
HttpBodyCutter(bool accelerated_blocking_,ScriptFinder * finder_,CompressId compression_)282 HttpBodyCutter::HttpBodyCutter(bool accelerated_blocking_, ScriptFinder* finder_,
283     CompressId compression_)
284     : accelerated_blocking(accelerated_blocking_), compression(compression_), finder(finder_)
285 {
286     if (accelerated_blocking)
287     {
288         if ((compression == CMP_GZIP) || (compression == CMP_DEFLATE))
289         {
290             compress_stream = new z_stream;
291             compress_stream->zalloc = Z_NULL;
292             compress_stream->zfree = Z_NULL;
293             compress_stream->next_in = Z_NULL;
294             compress_stream->avail_in = 0;
295             const int window_bits = (compression == CMP_GZIP) ?
296                 GZIP_WINDOW_BITS : DEFLATE_WINDOW_BITS;
297             if (inflateInit2(compress_stream, window_bits) != Z_OK)
298             {
299                 assert(false);
300                 compression = CMP_NONE;
301                 delete compress_stream;
302                 compress_stream = nullptr;
303             }
304         }
305 
306         static const uint8_t inspect_string[] = { '<', '/', 's', 'c', 'r', 'i', 'p', 't', '>' };
307         static const uint8_t inspect_upper[] = { '<', '/', 'S', 'C', 'R', 'I', 'P', 'T', '>' };
308 
309         match_string = inspect_string;
310         match_string_upper = inspect_upper;
311         string_length = sizeof(inspect_string);
312     }
313 }
314 
~HttpBodyCutter()315 HttpBodyCutter::~HttpBodyCutter()
316 {
317     if (compress_stream != nullptr)
318     {
319         inflateEnd(compress_stream);
320         delete compress_stream;
321     }
322 }
323 
cut(const uint8_t * buffer,uint32_t length,HttpInfractions *,HttpEventGen *,uint32_t flow_target,bool stretch,HttpEnums::H2BodyState)324 ScanResult HttpBodyClCutter::cut(const uint8_t* buffer, uint32_t length, HttpInfractions*,
325     HttpEventGen*, uint32_t flow_target, bool stretch, HttpEnums::H2BodyState)
326 {
327     assert(remaining > octets_seen);
328 
329     // Are we skipping to the next message?
330     if (flow_target == 0)
331     {
332         if (remaining <= length)
333         {
334             num_flush = remaining;
335             remaining = 0;
336             return SCAN_DISCARD;
337         }
338         else
339         {
340             num_flush = length;
341             remaining -= num_flush;
342             return SCAN_DISCARD_PIECE;
343         }
344     }
345 
346     // A target that is bigger than the entire rest of the message body makes no sense
347     if (remaining <= flow_target)
348     {
349         flow_target = remaining;
350         stretch = false;
351     }
352 
353     if (octets_seen + length < flow_target)
354     {
355         octets_seen += length;
356         return need_accelerated_blocking(buffer, length) ?
357             SCAN_NOT_FOUND_ACCELERATE : SCAN_NOT_FOUND;
358     }
359 
360     if (!stretch)
361     {
362         remaining -= flow_target;
363         num_flush = flow_target - octets_seen;
364         if (remaining > 0)
365         {
366             need_accelerated_blocking(buffer, num_flush);
367             return SCAN_FOUND_PIECE;
368         }
369         else
370             return SCAN_FOUND;
371     }
372 
373     if (octets_seen + length < remaining)
374     {
375         // The message body continues beyond this segment
376         // Stretch the section to include this entire segment provided it is not too big
377         if (octets_seen + length <= flow_target + MAX_SECTION_STRETCH)
378             num_flush = length;
379         else
380             num_flush = flow_target - octets_seen;
381         remaining -= octets_seen + num_flush;
382         need_accelerated_blocking(buffer, num_flush);
383         return SCAN_FOUND_PIECE;
384     }
385 
386     if (remaining - flow_target <= MAX_SECTION_STRETCH)
387     {
388         // Stretch the section to finish the message body
389         num_flush = remaining - octets_seen;
390         remaining = 0;
391         return SCAN_FOUND;
392     }
393 
394     // Cannot stretch to the end of the message body. Cut at the original target.
395     num_flush = flow_target - octets_seen;
396     remaining -= flow_target;
397     need_accelerated_blocking(buffer, num_flush);
398     return SCAN_FOUND_PIECE;
399 }
400 
cut(const uint8_t * buffer,uint32_t length,HttpInfractions *,HttpEventGen *,uint32_t flow_target,bool stretch,HttpEnums::H2BodyState)401 ScanResult HttpBodyOldCutter::cut(const uint8_t* buffer, uint32_t length, HttpInfractions*,
402     HttpEventGen*, uint32_t flow_target, bool stretch, HttpEnums::H2BodyState)
403 {
404     if (flow_target == 0)
405     {
406         // FIXIT-P Need StreamSplitter::END
407         // With other types of body we would skip to the trailers and/or next message now. But this
408         // will run to connection close so we should just stop processing this flow. But there is
409         // no way to ask stream to do that so we must skip through the rest of the message
410         // ourselves.
411         num_flush = length;
412         return SCAN_DISCARD_PIECE;
413     }
414 
415     if (octets_seen + length < flow_target)
416     {
417         // Not enough data yet to create a message section
418         octets_seen += length;
419         return need_accelerated_blocking(buffer, length) ?
420             SCAN_NOT_FOUND_ACCELERATE : SCAN_NOT_FOUND;
421     }
422     else if (stretch && (octets_seen + length <= flow_target + MAX_SECTION_STRETCH))
423     {
424         // Cut the section at the end of this TCP segment to avoid splitting a packet
425         num_flush = length;
426         need_accelerated_blocking(buffer, num_flush);
427         return SCAN_FOUND_PIECE;
428     }
429     else
430     {
431         // Cut the section at the target length. Either stretching is not allowed or the end of
432         // the segment is too far away.
433         num_flush = flow_target - octets_seen;
434         need_accelerated_blocking(buffer, num_flush);
435         return SCAN_FOUND_PIECE;
436     }
437 }
438 
transition_to_chunk_bad(bool & accelerate_this_packet)439 void HttpBodyChunkCutter::transition_to_chunk_bad(bool& accelerate_this_packet)
440 {
441     curr_state = CHUNK_BAD;
442     accelerate_this_packet = true;
443     zero_chunk = false;
444 }
445 
cut(const uint8_t * buffer,uint32_t length,HttpInfractions * infractions,HttpEventGen * events,uint32_t flow_target,bool stretch,HttpEnums::H2BodyState)446 ScanResult HttpBodyChunkCutter::cut(const uint8_t* buffer, uint32_t length,
447     HttpInfractions* infractions, HttpEventGen* events, uint32_t flow_target, bool stretch,
448     HttpEnums::H2BodyState)
449 {
450     // Are we skipping through the rest of this chunked body to the trailers and the next message?
451     const bool discard_mode = (flow_target == 0);
452 
453     const uint32_t adjusted_target = stretch ? MAX_SECTION_STRETCH + flow_target : flow_target;
454 
455     bool accelerate_this_packet = false;
456 
457     for (int32_t k=0; k < static_cast<int32_t>(length); k++)
458     {
459         switch (curr_state)
460         {
461         case CHUNK_NEWLINES:
462             zero_chunk = true;
463             // Looking for improper CRLFs before the chunk header
464             if (is_cr_lf[buffer[k]])
465             {
466                 *infractions += INF_CHUNK_BAD_SEP;
467                 events->create_event(EVENT_CHUNK_BAD_SEP);
468                 break;
469             }
470             curr_state = CHUNK_LEADING_WS;
471             k--; // Reprocess this octet in the next state
472             break;
473         case CHUNK_LEADING_WS:
474             // Looking for whitespace before the chunk size
475             if (is_sp_tab[buffer[k]])
476             {
477                 *infractions += INF_CHUNK_LEADING_WS;
478                 events->create_event(EVENT_CHUNK_WHITESPACE);
479                 num_leading_ws++;
480                 if (num_leading_ws == 5)
481                 {
482                     events->create_event(EVENT_BROKEN_CHUNK);
483                     transition_to_chunk_bad(accelerate_this_packet);
484                     k--;
485                 }
486                 break;
487             }
488             curr_state = CHUNK_ZEROS;
489             k--;
490             break;
491         case CHUNK_ZEROS:
492             // Looking for leading zeros in the chunk size.
493             if (buffer[k] == '0')
494             {
495                 num_zeros++;
496                 if (num_zeros == 5)
497                 {
498                     *infractions += INF_CHUNK_ZEROS;
499                     events->create_event(EVENT_CHUNK_ZEROS);
500                 }
501                 break;
502             }
503             curr_state = CHUNK_NUMBER;
504             k--;
505             break;
506         case CHUNK_NUMBER:
507             // Reading the chunk size
508             if (buffer[k] == '\r')
509             {
510                 curr_state = CHUNK_HCRLF;
511             }
512             else if (buffer[k] == '\n')
513             {
514                 *infractions += INF_CHUNK_BARE_LF;
515                 events->create_event(EVENT_CHUNK_BARE_LF);
516                 curr_state = CHUNK_HCRLF;
517                 k--;
518             }
519             else if (is_sp_tab[buffer[k]])
520             {
521                 *infractions += INF_CHUNK_WHITESPACE;
522                 events->create_event(EVENT_CHUNK_WHITESPACE);
523                 curr_state = CHUNK_TRAILING_WS;
524             }
525             else if (buffer[k] == ';')
526             {
527                 *infractions += INF_CHUNK_OPTIONS;
528                 events->create_event(EVENT_CHUNK_OPTIONS);
529                 curr_state = CHUNK_OPTIONS;
530             }
531             else if (as_hex[buffer[k]] == -1)
532             {
533                 // illegal character present in chunk length
534                 *infractions += INF_CHUNK_BAD_CHAR;
535                 events->create_event(EVENT_BROKEN_CHUNK);
536                 transition_to_chunk_bad(accelerate_this_packet);
537                 k--;
538             }
539             else
540             {
541                 expected = expected * 16 + as_hex[buffer[k]];
542                 if ((++digits_seen > 8) || (expected > maximum_chunk_length))
543                 {
544                     // alert for exceeding configurable limit
545                     *infractions += INF_CHUNK_OVER_MAXIMUM;
546                     events->create_event(EVENT_LARGE_CHUNK);
547                     if (digits_seen > 8)
548                     {
549                         // overflow protection: absolutely must fit into 32 bits
550                         *infractions += INF_CHUNK_TOO_LARGE;
551                         events->create_event(EVENT_BROKEN_CHUNK);
552                         transition_to_chunk_bad(accelerate_this_packet);
553                         k--;
554                     }
555                 }
556                 if (expected != 0)
557                     zero_chunk = false;
558             }
559             break;
560         case CHUNK_TRAILING_WS:
561             // Skipping over improper whitespace following the chunk size
562             if (buffer[k] == '\r')
563             {
564                 curr_state = CHUNK_HCRLF;
565             }
566             else if (buffer[k] == '\n')
567             {
568                 *infractions += INF_CHUNK_BARE_LF;
569                 events->create_event(EVENT_CHUNK_BARE_LF);
570                 curr_state = CHUNK_HCRLF;
571                 k--;
572             }
573             else if (buffer[k] == ';')
574             {
575                 *infractions += INF_CHUNK_OPTIONS;
576                 events->create_event(EVENT_CHUNK_OPTIONS);
577                 curr_state = CHUNK_OPTIONS;
578             }
579             else if (!is_sp_tab[buffer[k]])
580             {
581                 // illegal character present in chunk length
582                 *infractions += INF_CHUNK_BAD_CHAR;
583                 events->create_event(EVENT_BROKEN_CHUNK);
584                 transition_to_chunk_bad(accelerate_this_packet);
585                 k--;
586             }
587             break;
588         case CHUNK_OPTIONS:
589             // The RFC permits options to follow the chunk size. No one normally does this.
590             if (buffer[k] == '\r')
591             {
592                 curr_state = CHUNK_HCRLF;
593             }
594             else if (buffer[k] == '\n')
595             {
596                 *infractions += INF_CHUNK_BARE_LF;
597                 events->create_event(EVENT_CHUNK_BARE_LF);
598                 curr_state = CHUNK_HCRLF;
599                 k--;
600             }
601             break;
602         case CHUNK_HCRLF:
603             // The chunk header should end in CRLF and this should be the LF
604             if (buffer[k] != '\n')
605             {
606                 // This is qualitatively different from similar bare CR issues because it doesn't
607                 // provide a transparent data channel. A recipient is much less likely to implement
608                 // tolerance for this irregularity because a chunk that begins with LF is
609                 // ambiguous.
610                 *infractions += INF_CHUNK_LONE_CR;
611                 events->create_event(EVENT_BROKEN_CHUNK);
612                 transition_to_chunk_bad(accelerate_this_packet);
613                 k--;
614                 break;
615             }
616             if (expected > 0)
617             {
618                 curr_state = CHUNK_DATA;
619             }
620             else if (num_zeros > 0)
621             {
622                 // Terminating zero-length chunk
623                 num_good_chunks++;
624                 num_flush = k+1;
625                 return !discard_mode ? SCAN_FOUND : SCAN_DISCARD;
626             }
627             else
628             {
629                 *infractions += INF_CHUNK_NO_LENGTH;
630                 events->create_event(EVENT_BROKEN_CHUNK);
631                 transition_to_chunk_bad(accelerate_this_packet);
632                 k--;
633             }
634             break;
635         case CHUNK_DATA:
636             // Moving through the chunk data
637           {
638             uint32_t skip_amount = (length-k <= expected) ? length-k : expected;
639             if (!discard_mode && (skip_amount > adjusted_target-data_seen))
640             { // Do not exceed requested section size (including stretching)
641                 skip_amount = adjusted_target-data_seen;
642             }
643 
644             accelerate_this_packet = need_accelerated_blocking(buffer+k, skip_amount) ||
645                 accelerate_this_packet;
646 
647             k += skip_amount - 1;
648             if ((expected -= skip_amount) == 0)
649             {
650                 curr_state = CHUNK_DCRLF1;
651             }
652             if ((data_seen += skip_amount) == adjusted_target)
653             {
654                 data_seen = 0;
655                 num_flush = k+1;
656                 return SCAN_FOUND_PIECE;
657             }
658             break;
659           }
660         case CHUNK_DCRLF1:
661             // The CR from the end-of-chunk CRLF should be here
662             if (buffer[k] == '\r')
663             {
664                 curr_state = CHUNK_DCRLF2;
665             }
666             else if (buffer[k] == '\n')
667             {
668                 *infractions += INF_CHUNK_BAD_SEP;
669                 events->create_event(EVENT_CHUNK_BAD_SEP);
670                 curr_state = CHUNK_DCRLF2;
671                 k--;
672             }
673             else
674             {
675                 *infractions += INF_CHUNK_BAD_END;
676                 events->create_event(EVENT_BROKEN_CHUNK);
677                 transition_to_chunk_bad(accelerate_this_packet);
678                 k--;
679             }
680             break;
681         case CHUNK_DCRLF2:
682             // The LF from the end-of-chunk CRLF should be here
683             num_good_chunks++;
684             num_leading_ws = 0;
685             num_zeros = 0;
686             expected = 0;
687             digits_seen = 0;
688             curr_state = CHUNK_NEWLINES;
689             if (buffer[k] == '\n')
690                 break;
691             *infractions += INF_CHUNK_BAD_SEP;
692             events->create_event(EVENT_CHUNK_BAD_SEP);
693             if (buffer[k] != '\r')
694                 k--;
695             break;
696         case CHUNK_BAD:
697             // Chunk reassembly has failed. This is a terminal state but inspection of the body
698             // must go on.
699             // If we are skipping to the trailers and next message the broken chunk thwarts us
700             if (discard_mode)
701             {
702                 // FIXIT-P Need StreamSplitter::END
703                 // With the broken chunk this will run to connection close so we should just stop
704                 // processing this flow. But there is no way to ask stream to do that so we must
705                 // skip through the rest of the message ourselves.
706                 num_flush = length;
707                 return SCAN_DISCARD_PIECE;
708             }
709 
710             // When chunk parsing breaks down and we first enter CHUNK_BAD state, it may happen
711             // that there were chunk header bytes between the last good chunk and the point where
712             // the failure occurred. These will not have been counted in data_seen because we
713             // planned to delete them during reassembly. Because they are not part of a valid chunk
714             // they will be reassembled after all. This will overrun the adjusted_target making the
715             // message section a little bigger than planned. It's not important.
716             uint32_t skip_amount = length-k;
717             skip_amount = (skip_amount <= adjusted_target-data_seen) ? skip_amount :
718                 adjusted_target-data_seen;
719             accelerate_this_packet = need_accelerated_blocking(buffer+k, skip_amount) ||
720                 accelerate_this_packet;
721             k += skip_amount - 1;
722             if ((data_seen += skip_amount) == adjusted_target)
723             {
724                 data_seen = 0;
725                 num_flush = k+1;
726                 return SCAN_FOUND_PIECE;
727             }
728             break;
729         }
730     }
731     if (discard_mode)
732     {
733         num_flush = length;
734         return SCAN_DISCARD_PIECE;
735     }
736 
737     if (data_seen >= flow_target)
738     {
739         // We passed the flow_target and stretched to the end of the segment
740         data_seen = 0;
741         num_flush = length;
742         return SCAN_FOUND_PIECE;
743     }
744 
745     octets_seen += length;
746 
747     if (accelerate_this_packet || (zero_chunk && data_seen))
748         return SCAN_NOT_FOUND_ACCELERATE;
749 
750     return SCAN_NOT_FOUND;
751 }
752 
cut(const uint8_t * buffer,uint32_t length,HttpInfractions * infractions,HttpEventGen * events,uint32_t flow_target,bool stretch,H2BodyState state)753 ScanResult HttpBodyH2Cutter::cut(const uint8_t* buffer, uint32_t length,
754     HttpInfractions* infractions, HttpEventGen* events, uint32_t flow_target, bool stretch,
755     H2BodyState state)
756 {
757     // If the headers included a content length header (expected length >= 0), check it against the
758     // actual message body length. Alert if it does not match at the end of the message body or if
759     // it overflows during the body (alert once then stop computing).
760     if (expected_body_length >= 0)
761     {
762         if ((total_octets_scanned + length) > expected_body_length)
763         {
764             *infractions += INF_H2_DATA_OVERRUNS_CL;
765             events->create_event(EVENT_H2_DATA_OVERRUNS_CL);
766             expected_body_length = HttpCommon::STAT_NOT_COMPUTE;
767         }
768         else if (state != H2_BODY_NOT_COMPLETE and
769             ((total_octets_scanned + length) < expected_body_length))
770         {
771             *infractions += INF_H2_DATA_UNDERRUNS_CL;
772             events->create_event(EVENT_H2_DATA_UNDERRUNS_CL);
773         }
774     }
775 
776     if (flow_target == 0)
777     {
778         num_flush = length;
779         total_octets_scanned += length;
780         if (state != H2_BODY_NOT_COMPLETE)
781             return SCAN_DISCARD;
782 
783         return SCAN_DISCARD_PIECE;
784     }
785 
786     if (state == H2_BODY_NOT_COMPLETE)
787     {
788         if (octets_seen + length < flow_target)
789         {
790             // Not enough data yet to create a message section
791             octets_seen += length;
792             total_octets_scanned += length;
793             return need_accelerated_blocking(buffer, length) ?
794                 SCAN_NOT_FOUND_ACCELERATE : SCAN_NOT_FOUND;
795         }
796         else
797         {
798             if (stretch && (octets_seen + length <= flow_target + MAX_SECTION_STRETCH))
799                 num_flush = length;
800             else
801                 num_flush = flow_target - octets_seen;
802             total_octets_scanned += num_flush;
803             need_accelerated_blocking(buffer, num_flush);
804             return SCAN_FOUND_PIECE;
805         }
806     }
807     else if (state == H2_BODY_LAST_SEG)
808     {
809         const uint32_t adjusted_target = stretch ? MAX_SECTION_STRETCH + flow_target : flow_target;
810         if (octets_seen + length <= adjusted_target)
811             num_flush = length;
812         else
813             num_flush = flow_target - octets_seen;
814 
815         total_octets_scanned += num_flush;
816         if (num_flush == length)
817             return SCAN_FOUND;
818         else
819             return SCAN_FOUND_PIECE;
820     }
821     else
822     {
823         // To end message body when trailers are received or a 0 length data frame with
824         // end of stream set is received, a zero-length buffer is sent to flush
825         assert(length == 0);
826         num_flush = 0;
827         return SCAN_FOUND;
828     }
829 }
830 
831 // This method searches the input stream looking for a script or other dangerous content that
832 // requires script detection. Exactly what we are looking for is encapsulated in dangerous().
833 //
834 // Return value true indicates a match and enables the packet that completes the matching sequence
835 // to be sent for partial inspection.
836 //
837 // Any attempt to optimize this code should be mindful that once you skip any part of the message
838 // body, dangerous() loses the ability to unzip subsequent data.
839 
need_accelerated_blocking(const uint8_t * data,uint32_t length)840 bool HttpBodyCutter::need_accelerated_blocking(const uint8_t* data, uint32_t length)
841 {
842     const bool need_accelerated_blocking = accelerated_blocking && dangerous(data, length);
843     if (need_accelerated_blocking)
844         HttpModule::increment_peg_counts(PEG_SCRIPT_DETECTION);
845     return need_accelerated_blocking;
846 }
847 
find_partial(const uint8_t * input_buf,uint32_t input_length,bool end)848 bool HttpBodyCutter::find_partial(const uint8_t* input_buf, uint32_t input_length, bool end)
849 {
850     for (uint32_t k = 0; k < input_length; k++)
851     {
852         // partial_match is persistent, enabling matches that cross data boundaries
853         if ((input_buf[k] == match_string[partial_match]) ||
854             (input_buf[k] == match_string_upper[partial_match]))
855         {
856             if (++partial_match == string_length)
857             {
858                 partial_match = 0;
859                 return true;
860             }
861         }
862         else
863         {
864             partial_match = 0;
865             if ( end )
866                 return false;
867         }
868     }
869     return false;
870 }
871 
872 // Currently we do accelerated blocking when we see a javascript
dangerous(const uint8_t * data,uint32_t length)873 bool HttpBodyCutter::dangerous(const uint8_t* data, uint32_t length)
874 {
875     const uint8_t* input_buf = data;
876     uint32_t input_length = length;
877     uint8_t* decomp_output = nullptr;
878 
879     // Zipped flows must be decompressed before we can check them. Unzipping for accelerated
880     // blocking is completely separate from the unzipping done later in reassemble().
881     if ((compression == CMP_GZIP) || (compression == CMP_DEFLATE))
882     {
883         // Previous decompression failures make it impossible to search for scripts
884         if (decompress_failed)
885             return true;
886 
887         const uint32_t decomp_buffer_size = MAX_OCTETS;
888         decomp_output = new uint8_t[decomp_buffer_size];
889 
890         compress_stream->next_in = const_cast<Bytef*>(data);
891         compress_stream->avail_in = length;
892         compress_stream->next_out = decomp_output;
893         compress_stream->avail_out = decomp_buffer_size;
894 
895         int ret_val = inflate(compress_stream, Z_SYNC_FLUSH);
896 
897         // Not going to be subtle about this and try to fix decompression problems. If it doesn't
898         // work out we assume it could be dangerous.
899         if (((ret_val != Z_OK) && (ret_val != Z_STREAM_END)) || (compress_stream->avail_in > 0))
900         {
901             decompress_failed = true;
902             delete[] decomp_output;
903             return true;
904         }
905 
906         input_buf = decomp_output;
907         input_length = decomp_buffer_size - compress_stream->avail_out;
908     }
909 
910     std::unique_ptr<uint8_t[]> uniq(decomp_output);
911 
912     if ( input_length > string_length )
913     {
914         if ( partial_match and find_partial(input_buf, input_length, true) )
915             return true;
916 
917         if ( finder->search(input_buf, input_length) >= 0 )
918             return true;
919 
920         uint32_t delta = input_length - string_length + 1;
921         input_buf += delta;
922         input_length -= delta;
923     }
924 
925     if ( find_partial(input_buf, input_length, false) )
926         return true;
927 
928     return false;
929 }
930 
931