1 /* 2 * Copyright (C) 2003-2005 Tommi Maekitalo 3 * 4 * This library is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU Lesser General Public 6 * License as published by the Free Software Foundation; either 7 * version 2.1 of the License, or (at your option) any later version. 8 * 9 * As a special exception, you may use this file as part of a free 10 * software library without restriction. Specifically, if other files 11 * instantiate templates or use macros or inline functions from this 12 * file, or you compile this file and link it with other files to 13 * produce an executable, this file does not by itself cause the 14 * resulting executable to be covered by the GNU General Public 15 * License. This exception does not however invalidate any other 16 * reasons why the executable file might be covered by the GNU Library 17 * General Public License. 18 * 19 * This library is distributed in the hope that it will be useful, 20 * but WITHOUT ANY WARRANTY; without even the implied warranty of 21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 * Lesser General Public License for more details. 23 * 24 * You should have received a copy of the GNU Lesser General Public 25 * License along with this library; if not, write to the Free Software 26 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 27 */ 28 29 30 #include <tnt/httpparser.h> 31 #include <tnt/httperror.h> 32 #include <tnt/httpheader.h> 33 #include <tnt/tntconfig.h> 34 #include <cxxtools/log.h> 35 #include <sstream> 36 #include <algorithm> 37 38 #define SET_STATE(new_state) state = &Parser::new_state 39 40 namespace tnt 41 { 42 namespace 43 { chartoprint(char ch)44 std::string chartoprint(char ch) 45 { 46 const static char hex[] = "0123456789abcdef"; 47 if (std::isprint(ch)) 48 return std::string(1, '\'') + ch + '\''; 49 else 50 return std::string("'\\x") + hex[(ch >> 4) & 0xf] + hex[ch & 0xf] + '\''; 51 } 52 istokenchar(char ch)53 inline bool istokenchar(char ch) 54 { 55 static const char s[] = "\"(),/:;<=>?@[\\]{}"; 56 return std::isalpha(ch) || std::binary_search(s, s + sizeof(s) - 1, ch); 57 } 58 isHexDigit(char ch)59 inline bool isHexDigit(char ch) 60 { 61 return (ch >= '0' && ch <= '9') 62 || (ch >= 'A' && ch <= 'Z') 63 || (ch >= 'a' && ch <= 'z'); 64 } 65 valueOfHexDigit(char ch)66 inline unsigned valueOfHexDigit(char ch) 67 { 68 return ch >= '0' && ch <= '9' ? ch - '0' 69 : ch >= 'a' && ch <= 'z' ? ch - 'a' + 10 70 : ch >= 'A' && ch <= 'Z' ? ch - 'A' + 10 71 : 0; 72 } 73 } 74 75 log_define("tntnet.httpmessage.parser") 76 post(bool ret)77 bool RequestSizeMonitor::post(bool ret) 78 { 79 if (++requestSize > TntConfig::it().maxRequestSize 80 && TntConfig::it().maxRequestSize > 0) 81 { 82 requestSizeExceeded(); 83 return true; 84 } 85 return ret; 86 } 87 requestSizeExceeded()88 void RequestSizeMonitor::requestSizeExceeded() 89 { } 90 reset()91 void HttpRequest::Parser::reset() 92 { 93 message.clear(); 94 SET_STATE(state_cmd0); 95 httpCode = HTTP_OK; 96 failedFlag = false; 97 RequestSizeMonitor::reset(); 98 headerParser.reset(); 99 } 100 state_cmd0(char ch)101 bool HttpRequest::Parser::state_cmd0(char ch) 102 { 103 if (istokenchar(ch)) 104 { 105 message.method[0] = ch; 106 message.methodLen = 1; 107 SET_STATE(state_cmd); 108 } 109 else if (ch != ' ' && ch != '\t') 110 { 111 log_warn("invalid character " << chartoprint(ch) << " in method"); 112 httpCode = HTTP_BAD_REQUEST; 113 failedFlag = true; 114 } 115 return failedFlag; 116 } 117 state_cmd(char ch)118 bool HttpRequest::Parser::state_cmd(char ch) 119 { 120 if (istokenchar(ch)) 121 { 122 if (message.methodLen >= sizeof(message.method) - 1) 123 { 124 log_debug("invalid method field; method=" << std::string(message.method, message.methodLen) << ", len=" << message.methodLen); 125 throw HttpError(HTTP_BAD_REQUEST, "invalid method field"); 126 } 127 message.method[message.methodLen++] = ch; 128 } 129 else if (ch == ' ') 130 { 131 message.method[message.methodLen] = '\0'; 132 log_debug("method=" << message.method); 133 SET_STATE(state_url0); 134 } 135 else 136 { 137 log_warn("invalid character " << chartoprint(ch) << " in method"); 138 httpCode = HTTP_BAD_REQUEST; 139 failedFlag = true; 140 } 141 return failedFlag; 142 } 143 state_url0(char ch)144 bool HttpRequest::Parser::state_url0(char ch) 145 { 146 if (ch == ' ' || ch == '\t') 147 { 148 } 149 else if (ch == '/') 150 { 151 message.url.clear(); 152 message.url.reserve(32); 153 message.url += ch; 154 SET_STATE(state_url); 155 } 156 else if (std::isalpha(ch)) 157 { 158 SET_STATE(state_protocol); 159 } 160 else 161 { 162 log_warn("invalid character " << chartoprint(ch) << " in url"); 163 httpCode = HTTP_BAD_REQUEST; 164 failedFlag = true; 165 } 166 167 return failedFlag; 168 } 169 state_protocol(char ch)170 bool HttpRequest::Parser::state_protocol(char ch) 171 { 172 if (ch == ':') 173 SET_STATE(state_protocol_slash1); 174 else if (!std::isalpha(ch)) 175 { 176 log_warn("invalid character " << chartoprint(ch) << " in url"); 177 httpCode = HTTP_BAD_REQUEST; 178 failedFlag = true; 179 } 180 181 return failedFlag; 182 } 183 state_protocol_slash1(char ch)184 bool HttpRequest::Parser::state_protocol_slash1(char ch) 185 { 186 if (ch == '/') 187 SET_STATE(state_protocol_slash2); 188 else 189 { 190 log_warn("invalid character " << chartoprint(ch) << " in url"); 191 httpCode = HTTP_BAD_REQUEST; 192 failedFlag = true; 193 } 194 195 return failedFlag; 196 } 197 state_protocol_slash2(char ch)198 bool HttpRequest::Parser::state_protocol_slash2(char ch) 199 { 200 if (ch == '/') 201 SET_STATE(state_protocol_host); 202 else 203 { 204 log_warn("invalid character " << chartoprint(ch) << " in url"); 205 httpCode = HTTP_BAD_REQUEST; 206 failedFlag = true; 207 } 208 209 return failedFlag; 210 } 211 state_protocol_host(char ch)212 bool HttpRequest::Parser::state_protocol_host(char ch) 213 { 214 if (ch == '/') 215 { 216 message.url.clear(); 217 message.url.reserve(32); 218 message.url += ch; 219 SET_STATE(state_url); 220 } 221 else if (!std::isalpha(ch) 222 && !std::isdigit(ch) 223 && ch != '[' 224 && ch != ']' 225 && ch != '.' 226 && ch != ':') 227 { 228 log_warn("invalid character " << chartoprint(ch) << " in url"); 229 httpCode = HTTP_BAD_REQUEST; 230 failedFlag = true; 231 } 232 233 return failedFlag; 234 } 235 state_url(char ch)236 bool HttpRequest::Parser::state_url(char ch) 237 { 238 if (ch == '?') 239 { 240 log_debug("url=" << message.url); 241 SET_STATE(state_qparam); 242 } 243 else if (ch == '\r') 244 { 245 log_debug("url=" << message.url); 246 SET_STATE(state_end0); 247 } 248 else if (ch == '\n') 249 { 250 log_debug("url=" << message.url); 251 SET_STATE(state_header); 252 } 253 else if (ch == ' ' || ch == '\t') 254 { 255 log_debug("url=" << message.url); 256 SET_STATE(state_version); 257 } 258 else if (ch == '%') 259 { 260 SET_STATE(state_urlesc); 261 message.url += ch; 262 } 263 else if (ch > ' ') 264 message.url += ch; 265 else 266 { 267 log_warn("invalid character " << chartoprint(ch) << " in url"); 268 httpCode = HTTP_BAD_REQUEST; 269 failedFlag = true; 270 } 271 return failedFlag; 272 } 273 state_urlesc(char ch)274 bool HttpRequest::Parser::state_urlesc(char ch) 275 { 276 if (isHexDigit(ch)) 277 { 278 if (message.url.size() >= 2 && message.url[message.url.size() - 2] == '%') 279 { 280 unsigned v = (valueOfHexDigit(message.url[message.url.size() - 1]) << 4) | valueOfHexDigit(ch); 281 message.url[message.url.size() - 2] = static_cast<char>(v); 282 message.url.resize(message.url.size() - 1); 283 SET_STATE(state_url); 284 } 285 else 286 { 287 message.url += ch; 288 } 289 return false; 290 } 291 else 292 { 293 SET_STATE(state_url); 294 return state_url(ch); 295 } 296 } 297 state_qparam(char ch)298 bool HttpRequest::Parser::state_qparam(char ch) 299 { 300 if (ch == ' ' || ch == '\t') 301 { 302 log_debug("queryString=" << message.queryString); 303 SET_STATE(state_version); 304 } 305 else 306 message.queryString += ch; 307 return false; 308 } 309 state_version(char ch)310 bool HttpRequest::Parser::state_version(char ch) 311 { 312 if (ch == '/') 313 { 314 message.setVersion(0, 0); 315 skipWs(&Parser::state_version_major); 316 } 317 else if (ch == '\r') 318 { 319 log_warn("invalid character " << chartoprint(ch) << " in version"); 320 httpCode = HTTP_BAD_REQUEST; 321 failedFlag = true; 322 } 323 return failedFlag; 324 } 325 state_version_major(char ch)326 bool HttpRequest::Parser::state_version_major(char ch) 327 { 328 if (ch == '.') 329 SET_STATE(state_version_minor0); 330 else if (std::isdigit(ch)) 331 message.setVersion(message.getMajorVersion() * 10 + (ch - '0'), message.getMinorVersion()); 332 else if (ch == ' ' || ch == '\t') 333 SET_STATE(state_version_major_sp); 334 else 335 { 336 log_warn("invalid character " << chartoprint(ch) << " in version-major"); 337 httpCode = HTTP_BAD_REQUEST; 338 failedFlag = true; 339 } 340 return failedFlag; 341 } 342 state_version_major_sp(char ch)343 bool HttpRequest::Parser::state_version_major_sp(char ch) 344 { 345 if (ch == '.') 346 SET_STATE(state_version_minor0); 347 else 348 { 349 log_warn("invalid character " << chartoprint(ch) << " in version-major"); 350 httpCode = HTTP_BAD_REQUEST; 351 failedFlag = true; 352 } 353 return failedFlag; 354 } 355 state_version_minor0(char ch)356 bool HttpRequest::Parser::state_version_minor0(char ch) 357 { 358 return ch == ' ' || ch == '\t' ? failedFlag 359 : state_version_minor(ch); 360 } 361 state_version_minor(char ch)362 bool HttpRequest::Parser::state_version_minor(char ch) 363 { 364 if (ch == '\n') 365 SET_STATE(state_header); 366 else if (ch == ' ' || ch == '\t' || ch == '\r') 367 SET_STATE(state_end0); 368 else if (std::isdigit(ch)) 369 message.setVersion(message.getMajorVersion(), message.getMinorVersion() * 10 + (ch - '0')); 370 else 371 { 372 log_warn("invalid character " << chartoprint(ch) << " in version-minor"); 373 httpCode = HTTP_BAD_REQUEST; 374 failedFlag = true; 375 } 376 return failedFlag; 377 } 378 state_end0(char ch)379 bool HttpRequest::Parser::state_end0(char ch) 380 { 381 if (ch == '\n') 382 SET_STATE(state_header); 383 else if (ch != ' ' && ch != '\t') 384 { 385 log_warn("invalid character " << chartoprint(ch) << " in end"); 386 httpCode = HTTP_BAD_REQUEST; 387 failedFlag = true; 388 } 389 return failedFlag; 390 } 391 state_header(char ch)392 bool HttpRequest::Parser::state_header(char ch) 393 { 394 if (headerParser.parse(ch)) 395 { 396 if (headerParser.failed()) 397 { 398 httpCode = HTTP_BAD_REQUEST; 399 failedFlag = true; 400 return true; 401 } 402 403 const char* content_length_header = message.getHeader(httpheader::contentLength); 404 if (*content_length_header) 405 { 406 bodySize = 0; 407 for (const char* c = content_length_header; *c; ++c) 408 { 409 if (*c > '9' || *c < '0') 410 throw HttpError(HTTP_BAD_REQUEST, "invalid Content-Length"); 411 bodySize = bodySize * 10 + *c - '0'; 412 } 413 414 if (TntConfig::it().maxRequestSize > 0 415 && getCurrentRequestSize() + bodySize > TntConfig::it().maxRequestSize) 416 { 417 requestSizeExceeded(); 418 return true; 419 } 420 421 message.contentSize = bodySize; 422 if (bodySize == 0) 423 return true; 424 else 425 { 426 SET_STATE(state_body); 427 message.body.reserve(bodySize); 428 return false; 429 } 430 } 431 432 return true; 433 } 434 435 return false; 436 } 437 state_body(char ch)438 bool HttpRequest::Parser::state_body(char ch) 439 { 440 message.body += ch; 441 return --bodySize == 0; 442 } 443 requestSizeExceeded()444 void HttpRequest::Parser::requestSizeExceeded() 445 { 446 log_warn("max request size " << TntConfig::it().maxRequestSize << " exceeded"); 447 httpCode = HTTP_REQUEST_ENTITY_TOO_LARGE; 448 failedFlag = true; 449 } 450 } 451