1 /*
2 ** Copyright (C) 2014-2021 Cisco and/or its affiliates. All rights reserved.
3 ** Copyright (C) 1998-2013 Sourcefire, Inc.
4 **
5 ** Writen by Bhagyashree Bantwal <bbantwal@cisco.com>
6 **
7 ** This program is free software; you can redistribute it and/or modify
8 ** it under the terms of the GNU General Public License Version 2 as
9 ** published by the Free Software Foundation. You may not use, modify or
10 ** distribute this program under any other version of the GNU General
11 ** Public License.
12 **
13 ** This program is distributed in the hope that it will be useful,
14 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ** GNU General Public License for more details.
17 **
18 ** You should have received a copy of the GNU General Public License
19 ** along with this program; if not, write to the Free Software
20 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 */
22
23 #ifdef HAVE_CONFIG_H
24 #include "config.h"
25 #endif
26
27 #include "util_unfold.h"
28
29
30 /* Given a string, removes header folding (\r\n followed by linear whitespace)
31 * and exits when the end of a header is found, defined as \n followed by a
32 * non-whitespace. This is especially helpful for HTML.
33 */
sf_unfold_header(const uint8_t * inbuf,uint32_t inbuf_size,uint8_t * outbuf,uint32_t outbuf_size,uint32_t * output_bytes,int trim_spaces,int * folded)34 int sf_unfold_header(const uint8_t *inbuf, uint32_t inbuf_size, uint8_t *outbuf,
35 uint32_t outbuf_size, uint32_t *output_bytes, int trim_spaces, int *folded)
36 {
37 int num_spaces = 0;
38 const uint8_t *cursor, *endofinbuf;
39 uint8_t *outbuf_ptr;
40
41 uint32_t n = 0;
42
43 int httpheaderfolding = 0;
44 int folding_present = 0;
45
46 cursor = inbuf;
47 endofinbuf = inbuf + inbuf_size;
48 outbuf_ptr = outbuf;
49
50 /* Keep adding chars until we get to the end of the line. If we get to the
51 * end of the line and the next line starts with a tab or space, add the space
52 * to the buffer and keep reading. If the next line does not start with a
53 * tab or space, stop reading because that's the end of the header. */
54 while((cursor < endofinbuf) && (n < outbuf_size))
55 {
56 if(((*cursor == ' ') || (*cursor == '\t')))
57 {
58 if(folding_present)
59 num_spaces++;
60 if(httpheaderfolding)
61 {
62 num_spaces++;
63 folding_present = 1;
64 httpheaderfolding = 0;
65 }
66 else if(!trim_spaces)
67 {
68 /* Spaces are valid except after CRs */
69 *outbuf_ptr++ = *cursor;
70 }
71 }
72 else if((*cursor == '\n') && (httpheaderfolding != 1))
73 {
74 /* Can't have multiple LFs in a row, but if we get one it
75 * needs to be followed by at least one space */
76 httpheaderfolding = 1;
77 }
78 else if((*cursor == '\r') && !httpheaderfolding)
79 {
80 /* CR needs to be followed by LF and can't start a line */
81 httpheaderfolding = 2;
82 }
83 else if(!httpheaderfolding)
84 {
85 *outbuf_ptr++ = *cursor;
86 n++;
87 }
88 else
89 {
90 /* We have reached the end of the header */
91 /* Unless we get multiple CRs, which is suspicious, but not for us to decide */
92 break;
93 }
94 cursor++;
95 }
96 if(n < outbuf_size)
97 *outbuf_ptr = '\0';
98 else
99 outbuf[outbuf_size - 1] = '\0';
100
101 *output_bytes = outbuf_ptr - outbuf;
102 if(folded)
103 *folded = num_spaces;
104 return 0;
105 }
106
107
108 /* Strips the CRLF from the input buffer */
109
sf_strip_CRLF(const uint8_t * inbuf,uint32_t inbuf_size,uint8_t * outbuf,uint32_t outbuf_size,uint32_t * output_bytes)110 int sf_strip_CRLF(const uint8_t *inbuf, uint32_t inbuf_size, uint8_t *outbuf,
111 uint32_t outbuf_size, uint32_t *output_bytes)
112 {
113 const uint8_t *cursor, *endofinbuf;
114 uint8_t *outbuf_ptr;
115 uint32_t n = 0;
116
117 if( !inbuf || !outbuf)
118 return -1;
119
120 cursor = inbuf;
121 endofinbuf = inbuf + inbuf_size;
122 outbuf_ptr = outbuf;
123 while((cursor < endofinbuf) && (n < outbuf_size))
124 {
125 if((*cursor != '\n') && (*cursor != '\r'))
126 {
127 *outbuf_ptr++ = *cursor;
128 n++;
129 }
130 cursor++;
131 }
132
133 if(output_bytes)
134 *output_bytes = outbuf_ptr - outbuf;
135
136 return(0);
137 }
138
139 /* Strips the LWS at the end of line.
140 * Only strips the LWS before LF or CRLF
141 */
142
sf_strip_LWS(const uint8_t * inbuf,uint32_t inbuf_size,uint8_t * outbuf,uint32_t outbuf_size,uint32_t * output_bytes)143 int sf_strip_LWS(const uint8_t *inbuf, uint32_t inbuf_size, uint8_t *outbuf,
144 uint32_t outbuf_size, uint32_t *output_bytes)
145 {
146 const uint8_t *cursor, *endofinbuf;
147 uint8_t *outbuf_ptr;
148 uint32_t n = 0;
149 uint8_t lws = 0;
150
151 if( !inbuf || !outbuf)
152 return -1;
153
154 cursor = inbuf;
155 endofinbuf = inbuf + inbuf_size;
156 outbuf_ptr = outbuf;
157 while((cursor < endofinbuf) && (n < outbuf_size))
158 {
159 if((*cursor != '\n') && (*cursor != '\r'))
160 {
161 if((*cursor != ' ') && (*cursor != '\t'))
162 lws = 0;
163 else
164 lws = 1;
165 *outbuf_ptr++ = *cursor;
166 n++;
167 }
168 else
169 {
170 if(lws)
171 {
172 lws = 0;
173 while( n > 0 )
174 {
175 if((*(outbuf_ptr-1) != ' ') && (*(outbuf_ptr-1) !='\t'))
176 break;
177 n--;
178 outbuf_ptr--;
179 }
180 }
181
182 *outbuf_ptr++ = *cursor;
183 n++;
184 }
185 cursor++;
186 }
187
188 if(output_bytes)
189 *output_bytes = outbuf_ptr - outbuf;
190
191 return(0);
192 }
193