1 /* 2 * Copyright (C) 2009 by Marc Boris Duerner, Tommi Maekitalo 3 * 4 * This library is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU Lesser General Public 6 * License as published by the Free Software Foundation; either 7 * version 2.1 of the License, or (at your option) any later version. 8 * 9 * As a special exception, you may use this file as part of a free 10 * software library without restriction. Specifically, if other files 11 * instantiate templates or use macros or inline functions from this 12 * file, or you compile this file and link it with other files to 13 * produce an executable, this file does not by itself cause the 14 * resulting executable to be covered by the GNU General Public 15 * License. This exception does not however invalidate any other 16 * reasons why the executable file might be covered by the GNU Library 17 * General Public License. 18 * 19 * This library is distributed in the hope that it will be useful, 20 * but WITHOUT ANY WARRANTY; without even the implied warranty of 21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 * Lesser General Public License for more details. 23 * 24 * You should have received a copy of the GNU Lesser General Public 25 * License along with this library; if not, write to the Free Software 26 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 27 */ 28 29 #include "parser.h" 30 #include <cxxtools/http/messageheader.h> 31 #include <cxxtools/log.h> 32 #include <cctype> 33 #include <algorithm> 34 #include <string.h> 35 36 log_define("cxxtools.http.parser") 37 38 namespace cxxtools { 39 40 namespace http { 41 42 namespace 43 { chartoprint(char ch)44 std::string chartoprint(char ch) 45 { 46 const static char hex[] = "0123456789abcdef"; 47 if (std::isprint(ch)) 48 return std::string(1, '\'') + ch + '\''; 49 else 50 return std::string("'\\x") + hex[(ch >> 4) & 0xf] + hex[ch & 0xf] + '\''; 51 } 52 istokenchar(char ch)53 inline bool istokenchar(char ch) 54 { 55 static const char s[] = "\"(),/:;<=>?@[\\]{}"; 56 return std::isalpha(ch) || std::binary_search(s, s + sizeof(s) - 1, ch); 57 } 58 isHexDigit(char ch)59 inline bool isHexDigit(char ch) 60 { 61 return (ch >= '0' && ch <= '9') 62 || (ch >= 'A' && ch <= 'Z') 63 || (ch >= 'a' && ch <= 'z'); 64 } 65 valueOfHexDigit(char ch)66 inline unsigned valueOfHexDigit(char ch) 67 { 68 return ch >= '0' && ch <= '9' ? ch - '0' 69 : ch >= 'a' && ch <= 'z' ? ch - 'a' + 10 70 : ch >= 'A' && ch <= 'Z' ? ch - 'A' + 10 71 : 0; 72 } 73 } 74 onMethod(const std::string & method)75 void HeaderParser::Event::onMethod(const std::string& method) 76 { 77 } 78 onUrl(const std::string & url)79 void HeaderParser::Event::onUrl(const std::string& url) 80 { 81 } 82 onUrlParam(const std::string & q)83 void HeaderParser::Event::onUrlParam(const std::string& q) 84 { 85 } 86 onHttpVersion(unsigned major,unsigned minor)87 void HeaderParser::Event::onHttpVersion(unsigned major, unsigned minor) 88 { 89 } 90 onKey(const std::string & key)91 void HeaderParser::Event::onKey(const std::string& key) 92 { 93 } 94 onValue(const std::string & value)95 void HeaderParser::Event::onValue(const std::string& value) 96 { 97 } 98 onHttpReturn(unsigned ret,const std::string & text)99 void HeaderParser::Event::onHttpReturn(unsigned ret, const std::string& text) 100 { 101 } 102 onEnd()103 void HeaderParser::Event::onEnd() 104 { 105 } 106 onHttpVersion(unsigned major,unsigned minor)107 void HeaderParser::MessageHeaderEvent::onHttpVersion(unsigned major, unsigned minor) 108 { 109 _header.httpVersion(major, minor); 110 } 111 onKey(const std::string & key)112 void HeaderParser::MessageHeaderEvent::onKey(const std::string& key) 113 { 114 strncpy(_key, key.c_str(), MessageHeader::MAXHEADERSIZE); 115 } 116 onValue(const std::string & value)117 void HeaderParser::MessageHeaderEvent::onValue(const std::string& value) 118 { 119 _header.addHeader(_key, value.c_str()); 120 } 121 advance(std::streambuf & sb)122 std::size_t HeaderParser::advance(std::streambuf& sb) 123 { 124 std::size_t ret = 0; 125 126 while (sb.in_avail() > 0) 127 { 128 ++ret; 129 if (parse(sb.sbumpc())) 130 return ret; 131 } 132 133 return ret; 134 } 135 state_cmd0(char ch)136 void HeaderParser::state_cmd0(char ch) 137 { 138 if (istokenchar(ch)) 139 { 140 token.reserve(32); 141 token = ch; 142 state = &HeaderParser::state_cmd; 143 return; 144 } 145 else if (ch != ' ' && ch != '\t') 146 { 147 log_warn("invalid character " << chartoprint(ch) << " in method"); 148 state = &HeaderParser::state_error; 149 return; 150 } 151 else 152 { 153 state = &HeaderParser::state_cmd; 154 return; 155 } 156 } 157 state_cmd(char ch)158 void HeaderParser::state_cmd(char ch) 159 { 160 if (istokenchar(ch)) 161 { 162 token += ch; 163 return; 164 } 165 else if (ch == ' ') 166 { 167 log_debug("method=" << token); 168 ev.onMethod(token); 169 state = &HeaderParser::state_url0; 170 return; 171 } 172 else 173 { 174 log_warn("invalid character " << chartoprint(ch) << " in method"); 175 state = &HeaderParser::state_error; 176 return; 177 } 178 } 179 state_url0(char ch)180 void HeaderParser::state_url0(char ch) 181 { 182 if (ch == ' ' || ch == '\t') 183 { 184 return; 185 } 186 else if (ch == '/' || ch == '*') 187 { 188 token.reserve(32); 189 token = ch; 190 state = &HeaderParser::state_url; 191 return; 192 } 193 else if (std::isalpha(ch)) 194 { 195 token.reserve(32); 196 token = ch; 197 state = &HeaderParser::state_uri_protocol; 198 return; 199 } 200 else 201 { 202 log_warn("invalid character " << chartoprint(ch) << " in url"); 203 state = &HeaderParser::state_error; 204 return; 205 } 206 } 207 state_uri_protocol(char ch)208 void HeaderParser::state_uri_protocol(char ch) 209 { 210 if (std::isalpha(ch)) 211 { 212 } 213 else if (ch == ':') 214 { 215 token.clear(); 216 state = &HeaderParser::state_uri_protocol_e; 217 } 218 else 219 { 220 log_warn("invalid character " << chartoprint(ch) << " in url"); 221 state = &HeaderParser::state_error; 222 } 223 } 224 state_uri_protocol_e(char ch)225 void HeaderParser::state_uri_protocol_e(char ch) 226 { 227 if (token.size() < 2 && ch == '/') 228 { 229 token += ch; 230 } 231 else if (token.size() == 2 && std::isalpha(ch)) 232 { 233 token = ch; 234 state = &HeaderParser::state_uri_host; 235 } 236 else 237 { 238 log_warn("invalid character " << chartoprint(ch) << " in url"); 239 state = &HeaderParser::state_error; 240 } 241 } 242 state_uri_host(char ch)243 void HeaderParser::state_uri_host(char ch) 244 { 245 if (std::isalnum(ch) || ch == '.' || ch == ':' || ch == '[' || ch == ']') 246 { 247 } 248 else if (ch == '/') 249 { 250 token = ch; 251 state = &HeaderParser::state_url; 252 } 253 else 254 { 255 log_warn("invalid character " << chartoprint(ch) << " in url"); 256 state = &HeaderParser::state_error; 257 } 258 } 259 state_url(char ch)260 void HeaderParser::state_url(char ch) 261 { 262 if (ch == '?') 263 { 264 log_debug("url=" << token); 265 ev.onUrl(token); 266 token.clear(); 267 token.reserve(32); 268 state = &HeaderParser::state_qparam; 269 return; 270 } 271 else if (ch == ' ' || ch == '\t') 272 { 273 log_debug("url=" << token); 274 ev.onUrl(token); 275 token.clear(); 276 token.reserve(32); 277 state = &HeaderParser::state_protocol0; 278 return; 279 } 280 else if (ch == '+') 281 { 282 token += ' '; 283 return; 284 } 285 else if (ch == '%') 286 { 287 token += ch; 288 state = &HeaderParser::state_urlesc; 289 return; 290 } 291 else if (ch > ' ') 292 { 293 token += ch; 294 return; 295 } 296 else 297 { 298 log_warn("invalid character " << chartoprint(ch) << " in url"); 299 state = &HeaderParser::state_error; 300 return; 301 } 302 } 303 state_urlesc(char ch)304 void HeaderParser::state_urlesc(char ch) 305 { 306 if (isHexDigit(ch)) 307 { 308 if (token.size() >= 2 && token[token.size() - 2] == '%') 309 { 310 unsigned v = (valueOfHexDigit(token[token.size() - 1]) << 4) | valueOfHexDigit(ch); 311 token[token.size() - 2] = static_cast<char>(v); 312 token.resize(token.size() - 1); 313 state = &HeaderParser::state_url; 314 return; 315 } 316 else 317 { 318 token += ch; 319 return; 320 } 321 } 322 else 323 { 324 log_warn("invalid hex digit " << chartoprint(ch) << " in url"); 325 state = &HeaderParser::state_error; 326 return; 327 } 328 } 329 state_qparam(char ch)330 void HeaderParser::state_qparam(char ch) 331 { 332 if (ch == ' ' || ch == '\t') 333 { 334 log_debug("queryString=" << token); 335 ev.onUrlParam(token); 336 token.clear(); 337 token.reserve(32); 338 state = &HeaderParser::state_protocol0; 339 return; 340 } 341 else 342 { 343 token += ch; 344 return; 345 } 346 } 347 state_protocol0(char ch)348 void HeaderParser::state_protocol0(char ch) 349 { 350 if (ch == ' ' || ch == '\t') 351 { 352 return; 353 } 354 else if (std::isalpha(ch)) 355 { 356 token.reserve(32); 357 token = ch; 358 state = &HeaderParser::state_protocol; 359 return; 360 } 361 else 362 { 363 log_warn("invalid character " << chartoprint(ch) << " in http protocol field"); 364 state = &HeaderParser::state_error; 365 return; 366 } 367 } 368 state_protocol(char ch)369 void HeaderParser::state_protocol(char ch) 370 { 371 if (ch == ' ' || ch == '\t' || ch == '/') 372 { 373 if (token != "HTTP") 374 { 375 log_warn("invalid protocol " << token << " in http protocol field"); 376 state = &HeaderParser::state_error; 377 return; 378 } 379 else 380 { 381 state = (ch == '/' ? &HeaderParser::state_version_major : &HeaderParser::state_version0); 382 return; 383 } 384 } 385 else if (std::isalpha(ch)) 386 { 387 token += std::toupper(ch); 388 return; 389 } 390 else 391 { 392 log_warn("invalid character " << chartoprint(ch) << " in http protocol field"); 393 state = &HeaderParser::state_error; 394 return; 395 } 396 } 397 state_version0(char ch)398 void HeaderParser::state_version0(char ch) 399 { 400 if (ch == ' ' || ch == '\t') 401 { 402 return; 403 } 404 else if (ch == '/') 405 { 406 state = &HeaderParser::state_version_major; 407 return; 408 } 409 else 410 { 411 log_warn("invalid character " << chartoprint(ch) << " in http version field"); 412 state = &HeaderParser::state_error; 413 return; 414 } 415 } 416 state_version_major(char ch)417 void HeaderParser::state_version_major(char ch) 418 { 419 if (ch == ' ' || ch == '\t') 420 { 421 return; 422 } 423 else if (ch == '1') 424 { 425 state = &HeaderParser::state_version_major_e; 426 return; 427 } 428 else 429 { 430 log_warn("invalid character " << chartoprint(ch) << " in http version field"); 431 state = &HeaderParser::state_error; 432 return; 433 } 434 } 435 state_version_major_e(char ch)436 void HeaderParser::state_version_major_e(char ch) 437 { 438 if (ch == ' ' || ch == '\t') 439 { 440 state = &HeaderParser::state_version_major_e; 441 return; 442 } 443 else if (ch == '.') 444 { 445 state = &HeaderParser::state_version_minor; 446 return; 447 } 448 else 449 { 450 log_warn("invalid character " << chartoprint(ch) << " in http version field"); 451 state = &HeaderParser::state_error; 452 return; 453 } 454 } 455 state_version_minor(char ch)456 void HeaderParser::state_version_minor(char ch) 457 { 458 if (ch == ' ' || ch == '\t') 459 { 460 return; 461 } 462 else if (ch == '0' || ch == '1') 463 { 464 ev.onHttpVersion(1, ch - '0'); 465 state = &HeaderParser::state_end0; 466 return; 467 } 468 else 469 { 470 log_warn("invalid character " << chartoprint(ch) << " in http version field"); 471 state = &HeaderParser::state_error; 472 return; 473 } 474 } 475 state_end0(char ch)476 void HeaderParser::state_end0(char ch) 477 { 478 if (ch == '\n') 479 { 480 state = &HeaderParser::state_h0; 481 return; 482 } 483 else if (ch == ' ' || ch == '\t' || ch == '\r') 484 { 485 return; 486 } 487 else 488 { 489 log_warn("invalid character " << chartoprint(ch) << " in http request line"); 490 state = &HeaderParser::state_error; 491 return; 492 } 493 } 494 state_h0(char ch)495 void HeaderParser::state_h0(char ch) 496 { 497 if (ch == ' ' || ch == '\t') 498 { 499 return; 500 } 501 else if (ch > 32 && ch < 127) 502 { 503 token.reserve(32); 504 token = ch; 505 state = &HeaderParser::state_hfieldname; 506 return; 507 } 508 else if (ch == '\r') 509 { 510 state = &HeaderParser::state_hcr; 511 return; 512 } 513 else if (ch == '\n') 514 { 515 ev.onEnd(); 516 state = &HeaderParser::state_end; 517 return; 518 } 519 else 520 { 521 log_warn("invalid character " << chartoprint(ch) << " in http header"); 522 state = &HeaderParser::state_error; 523 return; 524 } 525 } 526 state_hcr(char ch)527 void HeaderParser::state_hcr(char ch) 528 { 529 if (ch == '\n') 530 { 531 ev.onEnd(); 532 state = &HeaderParser::state_end; 533 return; 534 } 535 else 536 { 537 log_warn("invalid character " << chartoprint(ch) << " in http header"); 538 state = &HeaderParser::state_error; 539 return; 540 } 541 } 542 state_hfieldname(char ch)543 void HeaderParser::state_hfieldname(char ch) 544 { 545 if (ch == ':') 546 { 547 ev.onKey(token); 548 state = &HeaderParser::state_hfieldbody0; 549 return; 550 } 551 else if (ch == ' ' || ch == '\t') 552 { 553 ev.onKey(token); 554 state = &HeaderParser::state_hfieldnamespace; 555 return; 556 } 557 else if (ch > 32 && ch < 127) 558 { 559 token += ch; 560 return; 561 } 562 else 563 { 564 log_warn("invalid character " << chartoprint(ch) << " in fieldname"); 565 state = &HeaderParser::state_error; 566 return; 567 } 568 } 569 state_hfieldnamespace(char ch)570 void HeaderParser::state_hfieldnamespace(char ch) 571 { 572 if (ch == ':') 573 { 574 state = &HeaderParser::state_hfieldbody0; 575 return; 576 } 577 else if (ch == ' ' || ch == '\t') 578 { 579 return; 580 } 581 else 582 { 583 log_warn("invalid character " << chartoprint(ch) << " in fieldname"); 584 state = &HeaderParser::state_error; 585 return; 586 } 587 } 588 state_hfieldbody0(char ch)589 void HeaderParser::state_hfieldbody0(char ch) 590 { 591 if (ch == '\r') 592 { 593 state = &HeaderParser::state_hfieldbody_cr; 594 return; 595 } 596 else if (ch == '\n') 597 { 598 state = &HeaderParser::state_hfieldbody_crlf; 599 return; 600 } 601 else if (std::isspace(ch)) 602 { 603 return; 604 } 605 else if (!std::isspace(ch)) 606 { 607 token.reserve(32); 608 token = ch; 609 state = &HeaderParser::state_hfieldbody; 610 return; 611 } 612 } 613 state_hfieldbody(char ch)614 void HeaderParser::state_hfieldbody(char ch) 615 { 616 if (ch == '\r') 617 { 618 state = &HeaderParser::state_hfieldbody_cr; 619 return; 620 } 621 else if (ch == '\n') 622 { 623 state = &HeaderParser::state_hfieldbody_crlf; 624 return; 625 } 626 else 627 { 628 token += ch; 629 return; 630 } 631 } 632 state_hfieldbody_cr(char ch)633 void HeaderParser::state_hfieldbody_cr(char ch) 634 { 635 if (ch == '\n') 636 { 637 state = &HeaderParser::state_hfieldbody_crlf; 638 return; 639 } 640 else 641 { 642 log_warn("invalid character " << chartoprint(ch) << " in fieldbody"); 643 state = &HeaderParser::state_error; 644 return; 645 } 646 647 } 648 state_hfieldbody_crlf(char ch)649 void HeaderParser::state_hfieldbody_crlf(char ch) 650 { 651 if (ch == '\r') 652 { 653 ev.onValue(token); 654 state = &HeaderParser::state_hend_cr; 655 return; 656 } 657 else if (ch == '\n') 658 { 659 ev.onValue(token); 660 ev.onEnd(); 661 state = &HeaderParser::state_end; 662 return; 663 } 664 else if (ch == ' ' || ch == '\t') 665 { 666 token += ch; 667 state = &HeaderParser::state_hfieldbody; 668 return; 669 } 670 else if (ch > 32 && ch < 127) 671 { 672 ev.onValue(token); 673 token.reserve(32); 674 token = ch; 675 state = &HeaderParser::state_hfieldname; 676 return; 677 } 678 else 679 { 680 log_warn("invalid character " << chartoprint(ch) << " in fieldbody"); 681 state = &HeaderParser::state_error; 682 return; 683 } 684 } 685 state_hend_cr(char ch)686 void HeaderParser::state_hend_cr(char ch) 687 { 688 if (ch == '\n') 689 { 690 ev.onEnd(); 691 state = &HeaderParser::state_end; 692 return; 693 } 694 else 695 { 696 log_warn("invalid character " << chartoprint(ch) << " in fieldbody"); 697 state = &HeaderParser::state_error; 698 return; 699 } 700 } 701 state_cl_protocol0(char ch)702 void HeaderParser::state_cl_protocol0(char ch) 703 { 704 if (ch == ' ' || ch == '\t') 705 { 706 return; 707 } 708 else if (std::isalpha(ch)) 709 { 710 token.reserve(32); 711 token = ch; 712 state = &HeaderParser::state_cl_protocol; 713 return; 714 } 715 else 716 { 717 log_warn("invalid character " << chartoprint(ch) << " in http protocol field"); 718 state = &HeaderParser::state_error; 719 return; 720 } 721 } 722 state_cl_protocol(char ch)723 void HeaderParser::state_cl_protocol(char ch) 724 { 725 if (ch == ' ' || ch == '\t' || ch == '/') 726 { 727 if (token != "HTTP") 728 { 729 log_warn("invalid protocol " << token << " in http protocol field"); 730 state = &HeaderParser::state_error; 731 return; 732 } 733 else 734 { 735 state = (ch == '/' ? &HeaderParser::state_cl_version_major : &HeaderParser::state_cl_version0); 736 return; 737 } 738 } 739 else if (std::isalpha(ch)) 740 { 741 token += std::toupper(ch); 742 return; 743 } 744 else 745 { 746 log_warn("invalid character " << chartoprint(ch) << " in http protocol field"); 747 state = &HeaderParser::state_error; 748 return; 749 } 750 } 751 state_cl_version0(char ch)752 void HeaderParser::state_cl_version0(char ch) 753 { 754 if (ch == ' ' || ch == '\t') 755 { 756 return; 757 } 758 else if (ch == '/') 759 { 760 state = &HeaderParser::state_cl_version_major; 761 return; 762 } 763 else 764 { 765 log_warn("invalid character " << chartoprint(ch) << " in http version field"); 766 state = &HeaderParser::state_error; 767 return; 768 } 769 } 770 state_cl_version_major(char ch)771 void HeaderParser::state_cl_version_major(char ch) 772 { 773 if (ch == ' ' || ch == '\t') 774 { 775 return; 776 } 777 else if (ch == '1') 778 { 779 state = &HeaderParser::state_cl_version_major_e; 780 return; 781 } 782 else 783 { 784 log_warn("invalid character " << chartoprint(ch) << " in http version field"); 785 state = &HeaderParser::state_error; 786 return; 787 } 788 } 789 state_cl_version_major_e(char ch)790 void HeaderParser::state_cl_version_major_e(char ch) 791 { 792 if (ch == ' ' || ch == '\t') 793 { 794 state = &HeaderParser::state_cl_version_major_e; 795 return; 796 } 797 else if (ch == '.') 798 { 799 state = &HeaderParser::state_cl_version_minor; 800 return; 801 } 802 else 803 { 804 log_warn("invalid character " << chartoprint(ch) << " in http version field"); 805 state = &HeaderParser::state_error; 806 return; 807 } 808 } 809 state_cl_version_minor(char ch)810 void HeaderParser::state_cl_version_minor(char ch) 811 { 812 if (ch == ' ' || ch == '\t') 813 { 814 return; 815 } 816 else if (ch == '0' || ch == '1') 817 { 818 ev.onHttpVersion(1, ch - '0'); 819 state = &HeaderParser::state_cl_httpresult0; 820 return; 821 } 822 else 823 { 824 log_warn("invalid character " << chartoprint(ch) << " in http result"); 825 state = &HeaderParser::state_error; 826 return; 827 } 828 } 829 state_cl_httpresult0(char ch)830 void HeaderParser::state_cl_httpresult0(char ch) 831 { 832 if (ch == ' ' || ch == '\t') 833 { 834 return; 835 } 836 else if (std::isdigit(ch)) 837 { 838 value = (ch - '0'); 839 state = &HeaderParser::state_cl_httpresult; 840 return; 841 } 842 else 843 { 844 log_warn("invalid character " << chartoprint(ch) << " in http result"); 845 state = &HeaderParser::state_error; 846 return; 847 } 848 } 849 state_cl_httpresult(char ch)850 void HeaderParser::state_cl_httpresult(char ch) 851 { 852 if (std::isdigit(ch)) 853 { 854 value = value * 10 + (ch - '0'); 855 return; 856 } 857 else if (ch == ' ' || ch == '\t') 858 { 859 token.clear(); 860 token.reserve(32); 861 state = &HeaderParser::state_cl_httpresulttext; 862 } 863 } 864 state_cl_httpresulttext(char ch)865 void HeaderParser::state_cl_httpresulttext(char ch) 866 { 867 if (ch == '\r') 868 { 869 ev.onHttpReturn(value, token); 870 state = &HeaderParser::state_cl_httpresult_cr; 871 return; 872 } 873 else if (ch == '\n') 874 { 875 ev.onHttpReturn(value, token); 876 state = &HeaderParser::state_h0; 877 return; 878 } 879 else if (token.empty() && (ch == ' ' || ch == '\t')) 880 { 881 return; 882 } 883 else 884 { 885 token += ch; 886 return; 887 } 888 } 889 state_cl_httpresult_cr(char ch)890 void HeaderParser::state_cl_httpresult_cr(char ch) 891 { 892 if (ch == '\n') 893 { 894 state = &HeaderParser::state_h0; 895 return; 896 } 897 else 898 { 899 log_warn("invalid character " << chartoprint(ch) << " in requestheader"); 900 state = &HeaderParser::state_error; 901 return; 902 } 903 } 904 state_end(char ch)905 void HeaderParser::state_end(char ch) 906 { 907 return; 908 } 909 state_error(char ch)910 void HeaderParser::state_error(char ch) 911 { 912 return; 913 } 914 915 } 916 917 } 918