1 /*
2  * Authored by Alex Hultman, 2018-2020.
3  * Intellectual property of third-party.
4 
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8 
9  *     http://www.apache.org/licenses/LICENSE-2.0
10 
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 
18 #ifndef UWS_HTTPPARSER_H
19 #define UWS_HTTPPARSER_H
20 
21 // todo: HttpParser is in need of a few clean-ups and refactorings
22 
23 /* The HTTP parser is an independent module subject to unit testing / fuzz testing */
24 
25 #include <string>
26 #include <cstring>
27 #include <algorithm>
28 #include "MoveOnlyFunction.h"
29 
30 #include "BloomFilter.h"
31 #include "ProxyParser.h"
32 #include "QueryParser.h"
33 
34 namespace uWS {
35 
36 /* We require at least this much post padding */
37 static const unsigned int MINIMUM_HTTP_POST_PADDING = 32;
38 
39 struct HttpRequest {
40 
41     friend struct HttpParser;
42 
43 private:
44     const static int MAX_HEADERS = 50;
45     struct Header {
46         std::string_view key, value;
47     } headers[MAX_HEADERS];
48     bool ancientHttp;
49     unsigned int querySeparator;
50     bool didYield;
51     BloomFilter bf;
52     std::pair<int, std::string_view *> currentParameters;
53 
54 public:
isAncientHttpRequest55     bool isAncient() {
56         return ancientHttp;
57     }
58 
getYieldHttpRequest59     bool getYield() {
60         return didYield;
61     }
62 
63     /* Iteration over headers (key, value) */
64     struct HeaderIterator {
65         Header *ptr;
66 
67         bool operator!=(const HeaderIterator &other) const {
68             /* Comparison with end is a special case */
69             if (ptr != other.ptr) {
70                 return other.ptr || ptr->key.length();
71             }
72             return false;
73         }
74 
75         HeaderIterator &operator++() {
76             ptr++;
77             return *this;
78         }
79 
80         std::pair<std::string_view, std::string_view> operator*() const {
81             return {ptr->key, ptr->value};
82         }
83     };
84 
beginHttpRequest85     HeaderIterator begin() {
86         return {headers + 1};
87     }
88 
endHttpRequest89     HeaderIterator end() {
90         return {nullptr};
91     }
92 
93     /* If you do not want to handle this route */
setYieldHttpRequest94     void setYield(bool yield) {
95         didYield = yield;
96     }
97 
getHeaderHttpRequest98     std::string_view getHeader(std::string_view lowerCasedHeader) {
99         if (bf.mightHave(lowerCasedHeader)) {
100             for (Header *h = headers; (++h)->key.length(); ) {
101                 if (h->key.length() == lowerCasedHeader.length() && !strncmp(h->key.data(), lowerCasedHeader.data(), lowerCasedHeader.length())) {
102                     return h->value;
103                 }
104             }
105         }
106         return std::string_view(nullptr, 0);
107     }
108 
getUrlHttpRequest109     std::string_view getUrl() {
110         return std::string_view(headers->value.data(), querySeparator);
111     }
112 
getMethodHttpRequest113     std::string_view getMethod() {
114         return std::string_view(headers->key.data(), headers->key.length());
115     }
116 
117     /* Returns the raw querystring as a whole, still encoded */
getQueryHttpRequest118     std::string_view getQuery() {
119         if (querySeparator < headers->value.length()) {
120             /* Strip the initial ? */
121             return std::string_view(headers->value.data() + querySeparator + 1, headers->value.length() - querySeparator - 1);
122         } else {
123             return std::string_view(nullptr, 0);
124         }
125     }
126 
127     /* Finds and decodes the URI component. */
getQueryHttpRequest128     std::string_view getQuery(std::string_view key) {
129         /* Raw querystring including initial '?' sign */
130         std::string_view queryString = std::string_view(headers->value.data() + querySeparator, headers->value.length() - querySeparator);
131 
132         return getDecodedQueryValue(key, queryString);
133     }
134 
setParametersHttpRequest135     void setParameters(std::pair<int, std::string_view *> parameters) {
136         currentParameters = parameters;
137     }
138 
getParameterHttpRequest139     std::string_view getParameter(unsigned short index) {
140         if (currentParameters.first < (int) index) {
141             return {};
142         } else {
143             return currentParameters.second[index];
144         }
145     }
146 
147 };
148 
149 struct HttpParser {
150 
151 private:
152     std::string fallback;
153     unsigned int remainingStreamingBytes = 0;
154 
155     const size_t MAX_FALLBACK_SIZE = 1024 * 4;
156 
toUnsignedIntegerHttpParser157     static unsigned int toUnsignedInteger(std::string_view str) {
158         unsigned int unsignedIntegerValue = 0;
159         for (char c : str) {
160             unsignedIntegerValue = unsignedIntegerValue * 10u + ((unsigned int) c - (unsigned int) '0');
161         }
162         return unsignedIntegerValue;
163     }
164 
getHeadersHttpParser165     static unsigned int getHeaders(char *postPaddedBuffer, char *end, struct HttpRequest::Header *headers, void *reserved) {
166         char *preliminaryKey, *preliminaryValue, *start = postPaddedBuffer;
167 
168         #ifdef UWS_WITH_PROXY
169             /* ProxyParser is passed as reserved parameter */
170             ProxyParser *pp = (ProxyParser *) reserved;
171 
172             /* Parse PROXY protocol */
173             auto [done, offset] = pp->parse({start, (size_t) (end - postPaddedBuffer)});
174             if (!done) {
175                 /* We do not reset the ProxyParser (on filure) since it is tied to this
176                 * connection, which is really only supposed to ever get one PROXY frame
177                 * anyways. We do however allow multiple PROXY frames to be sent (overwrites former). */
178                 return 0;
179             } else {
180                 /* We have consumed this data so skip it */
181                 start += offset;
182             }
183         #else
184             /* This one is unused */
185             (void) reserved;
186         #endif
187 
188         /* It is critical for fallback buffering logic that we only return with success
189          * if we managed to parse a complete HTTP request (minus data). Returning success
190          * for PROXY means we can end up succeeding, yet leaving bytes in the fallback buffer
191          * which is then removed, and our counters to flip due to overflow and we end up with a crash */
192 
193         for (unsigned int i = 0; i < HttpRequest::MAX_HEADERS; i++) {
194             for (preliminaryKey = postPaddedBuffer; (*postPaddedBuffer != ':') & (*postPaddedBuffer > 32); *(postPaddedBuffer++) |= 32);
195             if (*postPaddedBuffer == '\r') {
196                 if ((postPaddedBuffer != end) & (postPaddedBuffer[1] == '\n') & (i > 0)) {
197                     headers->key = std::string_view(nullptr, 0);
198                     return (unsigned int) ((postPaddedBuffer + 2) - start);
199                 } else {
200                     return 0;
201                 }
202             } else {
203                 headers->key = std::string_view(preliminaryKey, (size_t) (postPaddedBuffer - preliminaryKey));
204                 for (postPaddedBuffer++; (*postPaddedBuffer == ':' || *postPaddedBuffer < 33) && *postPaddedBuffer != '\r'; postPaddedBuffer++);
205                 preliminaryValue = postPaddedBuffer;
206                 postPaddedBuffer = (char *) memchr(postPaddedBuffer, '\r', (size_t) (end - postPaddedBuffer));
207                 if (postPaddedBuffer && postPaddedBuffer[1] == '\n') {
208                     headers->value = std::string_view(preliminaryValue, (size_t) (postPaddedBuffer - preliminaryValue));
209                     postPaddedBuffer += 2;
210                     headers++;
211                 } else {
212                     return 0;
213                 }
214             }
215         }
216         return 0;
217     }
218 
219     // the only caller of getHeaders
220     template <int CONSUME_MINIMALLY>
fenceAndConsumePostPaddedHttpParser221     std::pair<unsigned int, void *> fenceAndConsumePostPadded(char *data, unsigned int length, void *user, void *reserved, HttpRequest *req, MoveOnlyFunction<void *(void *, HttpRequest *)> &requestHandler, MoveOnlyFunction<void *(void *, std::string_view, bool)> &dataHandler) {
222 
223         /* How much data we CONSUMED (to throw away) */
224         unsigned int consumedTotal = 0;
225 
226         /* Fence one byte past end of our buffer (buffer has post padded margins) */
227         data[length] = '\r';
228 
229         for (unsigned int consumed; length && (consumed = getHeaders(data, data + length, req->headers, reserved)); ) {
230             data += consumed;
231             length -= consumed;
232             consumedTotal += consumed;
233 
234             /* Store HTTP version (ancient 1.0 or 1.1) */
235             req->ancientHttp = req->headers->value.length() && (req->headers->value[req->headers->value.length() - 1] == '0');
236 
237             /* Strip away tail of first "header value" aka URL */
238             req->headers->value = std::string_view(req->headers->value.data(), (size_t) std::max<int>(0, (int) req->headers->value.length() - 9));
239 
240             /* Add all headers to bloom filter */
241             req->bf.reset();
242             for (HttpRequest::Header *h = req->headers; (++h)->key.length(); ) {
243                 req->bf.add(h->key);
244             }
245 
246             /* Parse query */
247             const char *querySeparatorPtr = (const char *) memchr(req->headers->value.data(), '?', req->headers->value.length());
248             req->querySeparator = (unsigned int) ((querySeparatorPtr ? querySeparatorPtr : req->headers->value.data() + req->headers->value.length()) - req->headers->value.data());
249 
250             /* If returned socket is not what we put in we need
251              * to break here as we either have upgraded to
252              * WebSockets or otherwise closed the socket. */
253             void *returnedUser = requestHandler(user, req);
254             if (returnedUser != user) {
255                 /* We are upgraded to WebSocket or otherwise broken */
256                 return {consumedTotal, returnedUser};
257             }
258 
259             // todo: do not check this for GET (get should not have a body)
260             // todo: also support reading chunked streams
261             std::string_view contentLengthString = req->getHeader("content-length");
262             if (contentLengthString.length()) {
263                 remainingStreamingBytes = toUnsignedInteger(contentLengthString);
264 
265                 if (!CONSUME_MINIMALLY) {
266                     unsigned int emittable = std::min<unsigned int>(remainingStreamingBytes, length);
267                     dataHandler(user, std::string_view(data, emittable), emittable == remainingStreamingBytes);
268                     remainingStreamingBytes -= emittable;
269 
270                     data += emittable;
271                     length -= emittable;
272                     consumedTotal += emittable;
273                 }
274             } else {
275                 /* Still emit an empty data chunk to signal no data */
276                 dataHandler(user, {}, true);
277             }
278 
279             if (CONSUME_MINIMALLY) {
280                 break;
281             }
282         }
283         return {consumedTotal, user};
284     }
285 
286 public:
consumePostPaddedHttpParser287     void *consumePostPadded(char *data, unsigned int length, void *user, void *reserved, MoveOnlyFunction<void *(void *, HttpRequest *)> &&requestHandler, MoveOnlyFunction<void *(void *, std::string_view, bool)> &&dataHandler, MoveOnlyFunction<void *(void *)> &&errorHandler) {
288 
289         /* This resets BloomFilter by construction, but later we also reset it again.
290          * Optimize this to skip resetting twice (req could be made global) */
291         HttpRequest req;
292 
293         if (remainingStreamingBytes) {
294 
295             // this is exactly the same as below!
296             // todo: refactor this
297             if (remainingStreamingBytes >= length) {
298                 void *returnedUser = dataHandler(user, std::string_view(data, length), remainingStreamingBytes == length);
299                 remainingStreamingBytes -= length;
300                 return returnedUser;
301             } else {
302                 void *returnedUser = dataHandler(user, std::string_view(data, remainingStreamingBytes), true);
303 
304                 data += remainingStreamingBytes;
305                 length -= remainingStreamingBytes;
306 
307                 remainingStreamingBytes = 0;
308 
309                 if (returnedUser != user) {
310                     return returnedUser;
311                 }
312             }
313 
314         } else if (fallback.length()) {
315             unsigned int had = (unsigned int) fallback.length();
316 
317             size_t maxCopyDistance = std::min(MAX_FALLBACK_SIZE - fallback.length(), (size_t) length);
318 
319             /* We don't want fallback to be short string optimized, since we want to move it */
320             fallback.reserve(fallback.length() + maxCopyDistance + std::max<unsigned int>(MINIMUM_HTTP_POST_PADDING, sizeof(std::string)));
321             fallback.append(data, maxCopyDistance);
322 
323             // break here on break
324             std::pair<unsigned int, void *> consumed = fenceAndConsumePostPadded<true>(fallback.data(), (unsigned int) fallback.length(), user, reserved, &req, requestHandler, dataHandler);
325             if (consumed.second != user) {
326                 return consumed.second;
327             }
328 
329             if (consumed.first) {
330 
331                 /* This logic assumes that we consumed everything in fallback buffer.
332                  * This is critically important, as we will get an integer overflow in case
333                  * of "had" being larger than what we consumed, and that we would drop data */
334                 fallback.clear();
335                 data += consumed.first - had;
336                 length -= consumed.first - had;
337 
338                 if (remainingStreamingBytes) {
339                     // this is exactly the same as above!
340                     if (remainingStreamingBytes >= (unsigned int) length) {
341                         void *returnedUser = dataHandler(user, std::string_view(data, length), remainingStreamingBytes == (unsigned int) length);
342                         remainingStreamingBytes -= length;
343                         return returnedUser;
344                     } else {
345                         void *returnedUser = dataHandler(user, std::string_view(data, remainingStreamingBytes), true);
346 
347                         data += remainingStreamingBytes;
348                         length -= remainingStreamingBytes;
349 
350                         remainingStreamingBytes = 0;
351 
352                         if (returnedUser != user) {
353                             return returnedUser;
354                         }
355                     }
356                 }
357 
358             } else {
359                 if (fallback.length() == MAX_FALLBACK_SIZE) {
360                     // note: you don't really need error handler, just return something strange!
361                     // we could have it return a constant pointer to denote error!
362                     return errorHandler(user);
363                 }
364                 return user;
365             }
366         }
367 
368         std::pair<unsigned int, void *> consumed = fenceAndConsumePostPadded<false>(data, length, user, reserved, &req, requestHandler, dataHandler);
369         if (consumed.second != user) {
370             return consumed.second;
371         }
372 
373         data += consumed.first;
374         length -= consumed.first;
375 
376         if (length) {
377             if (length < MAX_FALLBACK_SIZE) {
378                 fallback.append(data, length);
379             } else {
380                 return errorHandler(user);
381             }
382         }
383 
384         // added for now
385         return user;
386     }
387 };
388 
389 }
390 
391 #endif // UWS_HTTPPARSER_H
392