1 /* 2 * Authored by Alex Hultman, 2018-2020. 3 * Intellectual property of third-party. 4 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 9 * http://www.apache.org/licenses/LICENSE-2.0 10 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 #ifndef UWS_HTTPPARSER_H 19 #define UWS_HTTPPARSER_H 20 21 // todo: HttpParser is in need of a few clean-ups and refactorings 22 23 /* The HTTP parser is an independent module subject to unit testing / fuzz testing */ 24 25 #include <string> 26 #include <cstring> 27 #include <algorithm> 28 #include "MoveOnlyFunction.h" 29 30 #include "BloomFilter.h" 31 #include "ProxyParser.h" 32 #include "QueryParser.h" 33 34 namespace uWS { 35 36 /* We require at least this much post padding */ 37 static const unsigned int MINIMUM_HTTP_POST_PADDING = 32; 38 39 struct HttpRequest { 40 41 friend struct HttpParser; 42 43 private: 44 const static int MAX_HEADERS = 50; 45 struct Header { 46 std::string_view key, value; 47 } headers[MAX_HEADERS]; 48 bool ancientHttp; 49 unsigned int querySeparator; 50 bool didYield; 51 BloomFilter bf; 52 std::pair<int, std::string_view *> currentParameters; 53 54 public: isAncientHttpRequest55 bool isAncient() { 56 return ancientHttp; 57 } 58 getYieldHttpRequest59 bool getYield() { 60 return didYield; 61 } 62 63 /* Iteration over headers (key, value) */ 64 struct HeaderIterator { 65 Header *ptr; 66 67 bool operator!=(const HeaderIterator &other) const { 68 /* Comparison with end is a special case */ 69 if (ptr != other.ptr) { 70 return other.ptr || ptr->key.length(); 71 } 72 return false; 73 } 74 75 HeaderIterator &operator++() { 76 ptr++; 77 return *this; 78 } 79 80 std::pair<std::string_view, std::string_view> operator*() const { 81 return {ptr->key, ptr->value}; 82 } 83 }; 84 beginHttpRequest85 HeaderIterator begin() { 86 return {headers + 1}; 87 } 88 endHttpRequest89 HeaderIterator end() { 90 return {nullptr}; 91 } 92 93 /* If you do not want to handle this route */ setYieldHttpRequest94 void setYield(bool yield) { 95 didYield = yield; 96 } 97 getHeaderHttpRequest98 std::string_view getHeader(std::string_view lowerCasedHeader) { 99 if (bf.mightHave(lowerCasedHeader)) { 100 for (Header *h = headers; (++h)->key.length(); ) { 101 if (h->key.length() == lowerCasedHeader.length() && !strncmp(h->key.data(), lowerCasedHeader.data(), lowerCasedHeader.length())) { 102 return h->value; 103 } 104 } 105 } 106 return std::string_view(nullptr, 0); 107 } 108 getUrlHttpRequest109 std::string_view getUrl() { 110 return std::string_view(headers->value.data(), querySeparator); 111 } 112 getMethodHttpRequest113 std::string_view getMethod() { 114 return std::string_view(headers->key.data(), headers->key.length()); 115 } 116 117 /* Returns the raw querystring as a whole, still encoded */ getQueryHttpRequest118 std::string_view getQuery() { 119 if (querySeparator < headers->value.length()) { 120 /* Strip the initial ? */ 121 return std::string_view(headers->value.data() + querySeparator + 1, headers->value.length() - querySeparator - 1); 122 } else { 123 return std::string_view(nullptr, 0); 124 } 125 } 126 127 /* Finds and decodes the URI component. */ getQueryHttpRequest128 std::string_view getQuery(std::string_view key) { 129 /* Raw querystring including initial '?' sign */ 130 std::string_view queryString = std::string_view(headers->value.data() + querySeparator, headers->value.length() - querySeparator); 131 132 return getDecodedQueryValue(key, queryString); 133 } 134 setParametersHttpRequest135 void setParameters(std::pair<int, std::string_view *> parameters) { 136 currentParameters = parameters; 137 } 138 getParameterHttpRequest139 std::string_view getParameter(unsigned short index) { 140 if (currentParameters.first < (int) index) { 141 return {}; 142 } else { 143 return currentParameters.second[index]; 144 } 145 } 146 147 }; 148 149 struct HttpParser { 150 151 private: 152 std::string fallback; 153 unsigned int remainingStreamingBytes = 0; 154 155 const size_t MAX_FALLBACK_SIZE = 1024 * 4; 156 toUnsignedIntegerHttpParser157 static unsigned int toUnsignedInteger(std::string_view str) { 158 unsigned int unsignedIntegerValue = 0; 159 for (char c : str) { 160 unsignedIntegerValue = unsignedIntegerValue * 10u + ((unsigned int) c - (unsigned int) '0'); 161 } 162 return unsignedIntegerValue; 163 } 164 getHeadersHttpParser165 static unsigned int getHeaders(char *postPaddedBuffer, char *end, struct HttpRequest::Header *headers, void *reserved) { 166 char *preliminaryKey, *preliminaryValue, *start = postPaddedBuffer; 167 168 #ifdef UWS_WITH_PROXY 169 /* ProxyParser is passed as reserved parameter */ 170 ProxyParser *pp = (ProxyParser *) reserved; 171 172 /* Parse PROXY protocol */ 173 auto [done, offset] = pp->parse({start, (size_t) (end - postPaddedBuffer)}); 174 if (!done) { 175 /* We do not reset the ProxyParser (on filure) since it is tied to this 176 * connection, which is really only supposed to ever get one PROXY frame 177 * anyways. We do however allow multiple PROXY frames to be sent (overwrites former). */ 178 return 0; 179 } else { 180 /* We have consumed this data so skip it */ 181 start += offset; 182 } 183 #else 184 /* This one is unused */ 185 (void) reserved; 186 #endif 187 188 /* It is critical for fallback buffering logic that we only return with success 189 * if we managed to parse a complete HTTP request (minus data). Returning success 190 * for PROXY means we can end up succeeding, yet leaving bytes in the fallback buffer 191 * which is then removed, and our counters to flip due to overflow and we end up with a crash */ 192 193 for (unsigned int i = 0; i < HttpRequest::MAX_HEADERS; i++) { 194 for (preliminaryKey = postPaddedBuffer; (*postPaddedBuffer != ':') & (*postPaddedBuffer > 32); *(postPaddedBuffer++) |= 32); 195 if (*postPaddedBuffer == '\r') { 196 if ((postPaddedBuffer != end) & (postPaddedBuffer[1] == '\n') & (i > 0)) { 197 headers->key = std::string_view(nullptr, 0); 198 return (unsigned int) ((postPaddedBuffer + 2) - start); 199 } else { 200 return 0; 201 } 202 } else { 203 headers->key = std::string_view(preliminaryKey, (size_t) (postPaddedBuffer - preliminaryKey)); 204 for (postPaddedBuffer++; (*postPaddedBuffer == ':' || *postPaddedBuffer < 33) && *postPaddedBuffer != '\r'; postPaddedBuffer++); 205 preliminaryValue = postPaddedBuffer; 206 postPaddedBuffer = (char *) memchr(postPaddedBuffer, '\r', (size_t) (end - postPaddedBuffer)); 207 if (postPaddedBuffer && postPaddedBuffer[1] == '\n') { 208 headers->value = std::string_view(preliminaryValue, (size_t) (postPaddedBuffer - preliminaryValue)); 209 postPaddedBuffer += 2; 210 headers++; 211 } else { 212 return 0; 213 } 214 } 215 } 216 return 0; 217 } 218 219 // the only caller of getHeaders 220 template <int CONSUME_MINIMALLY> fenceAndConsumePostPaddedHttpParser221 std::pair<unsigned int, void *> fenceAndConsumePostPadded(char *data, unsigned int length, void *user, void *reserved, HttpRequest *req, MoveOnlyFunction<void *(void *, HttpRequest *)> &requestHandler, MoveOnlyFunction<void *(void *, std::string_view, bool)> &dataHandler) { 222 223 /* How much data we CONSUMED (to throw away) */ 224 unsigned int consumedTotal = 0; 225 226 /* Fence one byte past end of our buffer (buffer has post padded margins) */ 227 data[length] = '\r'; 228 229 for (unsigned int consumed; length && (consumed = getHeaders(data, data + length, req->headers, reserved)); ) { 230 data += consumed; 231 length -= consumed; 232 consumedTotal += consumed; 233 234 /* Store HTTP version (ancient 1.0 or 1.1) */ 235 req->ancientHttp = req->headers->value.length() && (req->headers->value[req->headers->value.length() - 1] == '0'); 236 237 /* Strip away tail of first "header value" aka URL */ 238 req->headers->value = std::string_view(req->headers->value.data(), (size_t) std::max<int>(0, (int) req->headers->value.length() - 9)); 239 240 /* Add all headers to bloom filter */ 241 req->bf.reset(); 242 for (HttpRequest::Header *h = req->headers; (++h)->key.length(); ) { 243 req->bf.add(h->key); 244 } 245 246 /* Parse query */ 247 const char *querySeparatorPtr = (const char *) memchr(req->headers->value.data(), '?', req->headers->value.length()); 248 req->querySeparator = (unsigned int) ((querySeparatorPtr ? querySeparatorPtr : req->headers->value.data() + req->headers->value.length()) - req->headers->value.data()); 249 250 /* If returned socket is not what we put in we need 251 * to break here as we either have upgraded to 252 * WebSockets or otherwise closed the socket. */ 253 void *returnedUser = requestHandler(user, req); 254 if (returnedUser != user) { 255 /* We are upgraded to WebSocket or otherwise broken */ 256 return {consumedTotal, returnedUser}; 257 } 258 259 // todo: do not check this for GET (get should not have a body) 260 // todo: also support reading chunked streams 261 std::string_view contentLengthString = req->getHeader("content-length"); 262 if (contentLengthString.length()) { 263 remainingStreamingBytes = toUnsignedInteger(contentLengthString); 264 265 if (!CONSUME_MINIMALLY) { 266 unsigned int emittable = std::min<unsigned int>(remainingStreamingBytes, length); 267 dataHandler(user, std::string_view(data, emittable), emittable == remainingStreamingBytes); 268 remainingStreamingBytes -= emittable; 269 270 data += emittable; 271 length -= emittable; 272 consumedTotal += emittable; 273 } 274 } else { 275 /* Still emit an empty data chunk to signal no data */ 276 dataHandler(user, {}, true); 277 } 278 279 if (CONSUME_MINIMALLY) { 280 break; 281 } 282 } 283 return {consumedTotal, user}; 284 } 285 286 public: consumePostPaddedHttpParser287 void *consumePostPadded(char *data, unsigned int length, void *user, void *reserved, MoveOnlyFunction<void *(void *, HttpRequest *)> &&requestHandler, MoveOnlyFunction<void *(void *, std::string_view, bool)> &&dataHandler, MoveOnlyFunction<void *(void *)> &&errorHandler) { 288 289 /* This resets BloomFilter by construction, but later we also reset it again. 290 * Optimize this to skip resetting twice (req could be made global) */ 291 HttpRequest req; 292 293 if (remainingStreamingBytes) { 294 295 // this is exactly the same as below! 296 // todo: refactor this 297 if (remainingStreamingBytes >= length) { 298 void *returnedUser = dataHandler(user, std::string_view(data, length), remainingStreamingBytes == length); 299 remainingStreamingBytes -= length; 300 return returnedUser; 301 } else { 302 void *returnedUser = dataHandler(user, std::string_view(data, remainingStreamingBytes), true); 303 304 data += remainingStreamingBytes; 305 length -= remainingStreamingBytes; 306 307 remainingStreamingBytes = 0; 308 309 if (returnedUser != user) { 310 return returnedUser; 311 } 312 } 313 314 } else if (fallback.length()) { 315 unsigned int had = (unsigned int) fallback.length(); 316 317 size_t maxCopyDistance = std::min(MAX_FALLBACK_SIZE - fallback.length(), (size_t) length); 318 319 /* We don't want fallback to be short string optimized, since we want to move it */ 320 fallback.reserve(fallback.length() + maxCopyDistance + std::max<unsigned int>(MINIMUM_HTTP_POST_PADDING, sizeof(std::string))); 321 fallback.append(data, maxCopyDistance); 322 323 // break here on break 324 std::pair<unsigned int, void *> consumed = fenceAndConsumePostPadded<true>(fallback.data(), (unsigned int) fallback.length(), user, reserved, &req, requestHandler, dataHandler); 325 if (consumed.second != user) { 326 return consumed.second; 327 } 328 329 if (consumed.first) { 330 331 /* This logic assumes that we consumed everything in fallback buffer. 332 * This is critically important, as we will get an integer overflow in case 333 * of "had" being larger than what we consumed, and that we would drop data */ 334 fallback.clear(); 335 data += consumed.first - had; 336 length -= consumed.first - had; 337 338 if (remainingStreamingBytes) { 339 // this is exactly the same as above! 340 if (remainingStreamingBytes >= (unsigned int) length) { 341 void *returnedUser = dataHandler(user, std::string_view(data, length), remainingStreamingBytes == (unsigned int) length); 342 remainingStreamingBytes -= length; 343 return returnedUser; 344 } else { 345 void *returnedUser = dataHandler(user, std::string_view(data, remainingStreamingBytes), true); 346 347 data += remainingStreamingBytes; 348 length -= remainingStreamingBytes; 349 350 remainingStreamingBytes = 0; 351 352 if (returnedUser != user) { 353 return returnedUser; 354 } 355 } 356 } 357 358 } else { 359 if (fallback.length() == MAX_FALLBACK_SIZE) { 360 // note: you don't really need error handler, just return something strange! 361 // we could have it return a constant pointer to denote error! 362 return errorHandler(user); 363 } 364 return user; 365 } 366 } 367 368 std::pair<unsigned int, void *> consumed = fenceAndConsumePostPadded<false>(data, length, user, reserved, &req, requestHandler, dataHandler); 369 if (consumed.second != user) { 370 return consumed.second; 371 } 372 373 data += consumed.first; 374 length -= consumed.first; 375 376 if (length) { 377 if (length < MAX_FALLBACK_SIZE) { 378 fallback.append(data, length); 379 } else { 380 return errorHandler(user); 381 } 382 } 383 384 // added for now 385 return user; 386 } 387 }; 388 389 } 390 391 #endif // UWS_HTTPPARSER_H 392