1 /*
2  ** Copyright (C) 2014-2021 Cisco and/or its affiliates. All rights reserved.
3  ** Copyright (C) 1998-2013 Sourcefire, Inc.
4  **
5  ** Writen by Bhagyashree Bantwal <bbantwal@cisco.com>
6  **
7  ** This program is free software; you can redistribute it and/or modify
8  ** it under the terms of the GNU General Public License Version 2 as
9  ** published by the Free Software Foundation.  You may not use, modify or
10  ** distribute this program under any other version of the GNU General
11  ** Public License.
12  **
13  ** This program is distributed in the hope that it will be useful,
14  ** but WITHOUT ANY WARRANTY; without even the implied warranty of
15  ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  ** GNU General Public License for more details.
17  **
18  ** You should have received a copy of the GNU General Public License
19  ** along with this program; if not, write to the Free Software
20  ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
21  */
22 
23 #ifdef HAVE_CONFIG_H
24 #include "config.h"
25 #endif
26 
27 #include "util_unfold.h"
28 
29 
30 /* Given a string, removes header folding (\r\n followed by linear whitespace)
31  * and exits when the end of a header is found, defined as \n followed by a
32  * non-whitespace.  This is especially helpful for HTML.
33  */
sf_unfold_header(const uint8_t * inbuf,uint32_t inbuf_size,uint8_t * outbuf,uint32_t outbuf_size,uint32_t * output_bytes,int trim_spaces,int * folded)34 int sf_unfold_header(const uint8_t *inbuf, uint32_t inbuf_size, uint8_t *outbuf,
35                           uint32_t outbuf_size, uint32_t *output_bytes, int trim_spaces, int *folded)
36 {
37     int num_spaces = 0;
38     const uint8_t *cursor, *endofinbuf;
39     uint8_t *outbuf_ptr;
40 
41     uint32_t n = 0;
42 
43     int httpheaderfolding = 0;
44     int folding_present = 0;
45 
46     cursor = inbuf;
47     endofinbuf = inbuf + inbuf_size;
48     outbuf_ptr = outbuf;
49 
50     /* Keep adding chars until we get to the end of the line.  If we get to the
51      * end of the line and the next line starts with a tab or space, add the space
52      * to the buffer and keep reading.  If the next line does not start with a
53      * tab or space, stop reading because that's the end of the header. */
54     while((cursor < endofinbuf) && (n < outbuf_size))
55     {
56         if(((*cursor == ' ') || (*cursor == '\t')))
57         {
58             if(folding_present)
59                 num_spaces++;
60             if(httpheaderfolding)
61             {
62                 num_spaces++;
63                 folding_present = 1;
64                 httpheaderfolding = 0;
65             }
66             else if(!trim_spaces)
67             {
68                 /* Spaces are valid except after CRs */
69                 *outbuf_ptr++ = *cursor;
70             }
71         }
72         else if((*cursor == '\n') && (httpheaderfolding != 1))
73         {
74             /* Can't have multiple LFs in a row, but if we get one it
75              * needs to be followed by at least one space */
76             httpheaderfolding = 1;
77         }
78         else if((*cursor == '\r') && !httpheaderfolding)
79         {
80             /* CR needs to be followed by LF and can't start a line */
81             httpheaderfolding = 2;
82         }
83         else if(!httpheaderfolding)
84         {
85             *outbuf_ptr++ = *cursor;
86             n++;
87         }
88         else
89         {
90             /* We have reached the end of the header */
91             /* Unless we get multiple CRs, which is suspicious, but not for us to decide */
92             break;
93         }
94         cursor++;
95     }
96     if(n < outbuf_size)
97         *outbuf_ptr = '\0';
98     else
99         outbuf[outbuf_size - 1] = '\0';
100 
101     *output_bytes = outbuf_ptr - outbuf;
102     if(folded)
103         *folded = num_spaces;
104     return 0;
105 }
106 
107 
108 /* Strips the CRLF from the input buffer */
109 
sf_strip_CRLF(const uint8_t * inbuf,uint32_t inbuf_size,uint8_t * outbuf,uint32_t outbuf_size,uint32_t * output_bytes)110 int sf_strip_CRLF(const uint8_t *inbuf, uint32_t inbuf_size, uint8_t *outbuf,
111               uint32_t outbuf_size, uint32_t *output_bytes)
112 {
113     const uint8_t *cursor, *endofinbuf;
114     uint8_t *outbuf_ptr;
115     uint32_t n = 0;
116 
117     if( !inbuf || !outbuf)
118         return -1;
119 
120     cursor = inbuf;
121     endofinbuf = inbuf + inbuf_size;
122     outbuf_ptr = outbuf;
123     while((cursor < endofinbuf) && (n < outbuf_size))
124     {
125         if((*cursor != '\n') && (*cursor != '\r'))
126         {
127             *outbuf_ptr++ = *cursor;
128             n++;
129         }
130         cursor++;
131     }
132 
133     if(output_bytes)
134         *output_bytes = outbuf_ptr - outbuf;
135 
136     return(0);
137 }
138 
139 /* Strips the LWS at the end of line.
140  * Only strips the LWS before LF or CRLF
141  */
142 
sf_strip_LWS(const uint8_t * inbuf,uint32_t inbuf_size,uint8_t * outbuf,uint32_t outbuf_size,uint32_t * output_bytes)143 int sf_strip_LWS(const uint8_t *inbuf, uint32_t inbuf_size, uint8_t *outbuf,
144           uint32_t outbuf_size, uint32_t *output_bytes)
145 {
146     const uint8_t *cursor, *endofinbuf;
147     uint8_t *outbuf_ptr;
148     uint32_t n = 0;
149     uint8_t lws = 0;
150 
151     if( !inbuf || !outbuf)
152         return -1;
153 
154     cursor = inbuf;
155     endofinbuf = inbuf + inbuf_size;
156     outbuf_ptr = outbuf;
157     while((cursor < endofinbuf) && (n < outbuf_size))
158     {
159         if((*cursor != '\n') && (*cursor != '\r'))
160         {
161             if((*cursor != ' ') && (*cursor != '\t'))
162                 lws = 0;
163             else
164                 lws = 1;
165             *outbuf_ptr++ = *cursor;
166             n++;
167         }
168         else
169         {
170             if(lws)
171             {
172                 lws = 0;
173                 while( n > 0 )
174                 {
175                     if((*(outbuf_ptr-1) != ' ') && (*(outbuf_ptr-1) !='\t'))
176                         break;
177                     n--;
178                     outbuf_ptr--;
179                 }
180             }
181 
182             *outbuf_ptr++ = *cursor;
183             n++;
184         }
185         cursor++;
186         }
187 
188         if(output_bytes)
189             *output_bytes = outbuf_ptr - outbuf;
190 
191         return(0);
192 }
193