1 /*
2  * Copyright (C) 2009 by Marc Boris Duerner, Tommi Maekitalo
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * As a special exception, you may use this file as part of a free
10  * software library without restriction. Specifically, if other files
11  * instantiate templates or use macros or inline functions from this
12  * file, or you compile this file and link it with other files to
13  * produce an executable, this file does not by itself cause the
14  * resulting executable to be covered by the GNU General Public
15  * License. This exception does not however invalidate any other
16  * reasons why the executable file might be covered by the GNU Library
17  * General Public License.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
27  */
28 
29 #include "parser.h"
30 #include <cxxtools/http/messageheader.h>
31 #include <cxxtools/log.h>
32 #include <cctype>
33 #include <algorithm>
34 #include <string.h>
35 
36 log_define("cxxtools.http.parser")
37 
38 namespace cxxtools {
39 
40 namespace http {
41 
42     namespace
43     {
chartoprint(char ch)44         std::string chartoprint(char ch)
45         {
46             const static char hex[] = "0123456789abcdef";
47             if (std::isprint(ch))
48                 return std::string(1, '\'') + ch + '\'';
49             else
50                 return std::string("'\\x") + hex[(ch >> 4) & 0xf] + hex[ch & 0xf] + '\'';
51         }
52 
istokenchar(char ch)53         inline bool istokenchar(char ch)
54         {
55             static const char s[] = "\"(),/:;<=>?@[\\]{}";
56             return std::isalpha(ch) || std::binary_search(s, s + sizeof(s) - 1, ch);
57         }
58 
isHexDigit(char ch)59         inline bool isHexDigit(char ch)
60         {
61             return (ch >= '0' && ch <= '9')
62                 || (ch >= 'A' && ch <= 'Z')
63                 || (ch >= 'a' && ch <= 'z');
64         }
65 
valueOfHexDigit(char ch)66         inline unsigned valueOfHexDigit(char ch)
67         {
68             return ch >= '0' && ch <= '9' ? ch - '0'
69                  : ch >= 'a' && ch <= 'z' ? ch - 'a' + 10
70                  : ch >= 'A' && ch <= 'Z' ? ch - 'A' + 10
71                  : 0;
72         }
73     }
74 
onMethod(const std::string & method)75     void HeaderParser::Event::onMethod(const std::string& method)
76     {
77     }
78 
onUrl(const std::string & url)79     void HeaderParser::Event::onUrl(const std::string& url)
80     {
81     }
82 
onUrlParam(const std::string & q)83     void HeaderParser::Event::onUrlParam(const std::string& q)
84     {
85     }
86 
onHttpVersion(unsigned major,unsigned minor)87     void HeaderParser::Event::onHttpVersion(unsigned major, unsigned minor)
88     {
89     }
90 
onKey(const std::string & key)91     void HeaderParser::Event::onKey(const std::string& key)
92     {
93     }
94 
onValue(const std::string & value)95     void HeaderParser::Event::onValue(const std::string& value)
96     {
97     }
98 
onHttpReturn(unsigned ret,const std::string & text)99     void HeaderParser::Event::onHttpReturn(unsigned ret, const std::string& text)
100     {
101     }
102 
onEnd()103     void HeaderParser::Event::onEnd()
104     {
105     }
106 
onHttpVersion(unsigned major,unsigned minor)107     void HeaderParser::MessageHeaderEvent::onHttpVersion(unsigned major, unsigned minor)
108     {
109          _header.httpVersion(major, minor);
110     }
111 
onKey(const std::string & key)112     void HeaderParser::MessageHeaderEvent::onKey(const std::string& key)
113     {
114         strncpy(_key, key.c_str(), MessageHeader::MAXHEADERSIZE);
115     }
116 
onValue(const std::string & value)117     void HeaderParser::MessageHeaderEvent::onValue(const std::string& value)
118     {
119         _header.addHeader(_key, value.c_str());
120     }
121 
advance(std::streambuf & sb)122     std::size_t HeaderParser::advance(std::streambuf& sb)
123     {
124         std::size_t ret = 0;
125 
126         while (sb.in_avail() > 0)
127         {
128             ++ret;
129             if (parse(sb.sbumpc()))
130                 return ret;
131         }
132 
133         return ret;
134     }
135 
state_cmd0(char ch)136     void HeaderParser::state_cmd0(char ch)
137     {
138         if (istokenchar(ch))
139         {
140             token.reserve(32);
141             token = ch;
142             state = &HeaderParser::state_cmd;
143             return;
144         }
145         else if (ch != ' ' && ch != '\t')
146         {
147             log_warn("invalid character " << chartoprint(ch) << " in method");
148             state = &HeaderParser::state_error;
149             return;
150         }
151         else
152         {
153             state = &HeaderParser::state_cmd;
154             return;
155         }
156     }
157 
state_cmd(char ch)158     void HeaderParser::state_cmd(char ch)
159     {
160         if (istokenchar(ch))
161         {
162             token += ch;
163             return;
164         }
165         else if (ch == ' ')
166         {
167             log_debug("method=" << token);
168             ev.onMethod(token);
169             state = &HeaderParser::state_url0;
170             return;
171         }
172         else
173         {
174             log_warn("invalid character " << chartoprint(ch) << " in method");
175             state = &HeaderParser::state_error;
176             return;
177         }
178     }
179 
state_url0(char ch)180     void HeaderParser::state_url0(char ch)
181     {
182         if (ch == ' ' || ch == '\t')
183         {
184             return;
185         }
186         else if (ch == '/' || ch == '*')
187         {
188             token.reserve(32);
189             token = ch;
190             state = &HeaderParser::state_url;
191             return;
192         }
193         else if (std::isalpha(ch))
194         {
195             token.reserve(32);
196             token = ch;
197             state = &HeaderParser::state_uri_protocol;
198             return;
199         }
200         else
201         {
202             log_warn("invalid character " << chartoprint(ch) << " in url");
203             state = &HeaderParser::state_error;
204             return;
205         }
206     }
207 
state_uri_protocol(char ch)208     void HeaderParser::state_uri_protocol(char ch)
209     {
210         if (std::isalpha(ch))
211         {
212         }
213         else if (ch == ':')
214         {
215             token.clear();
216             state = &HeaderParser::state_uri_protocol_e;
217         }
218         else
219         {
220             log_warn("invalid character " << chartoprint(ch) << " in url");
221             state = &HeaderParser::state_error;
222         }
223     }
224 
state_uri_protocol_e(char ch)225     void HeaderParser::state_uri_protocol_e(char ch)
226     {
227         if (token.size() < 2 && ch == '/')
228         {
229             token += ch;
230         }
231         else if (token.size() == 2 && std::isalpha(ch))
232         {
233             token = ch;
234             state = &HeaderParser::state_uri_host;
235         }
236         else
237         {
238             log_warn("invalid character " << chartoprint(ch) << " in url");
239             state = &HeaderParser::state_error;
240         }
241     }
242 
state_uri_host(char ch)243     void HeaderParser::state_uri_host(char ch)
244     {
245         if (std::isalnum(ch) || ch == '.' || ch == ':' || ch == '[' || ch == ']')
246         {
247         }
248         else if (ch == '/')
249         {
250             token = ch;
251             state = &HeaderParser::state_url;
252         }
253         else
254         {
255             log_warn("invalid character " << chartoprint(ch) << " in url");
256             state = &HeaderParser::state_error;
257         }
258     }
259 
state_url(char ch)260     void HeaderParser::state_url(char ch)
261     {
262         if (ch == '?')
263         {
264             log_debug("url=" << token);
265             ev.onUrl(token);
266             token.clear();
267             token.reserve(32);
268             state = &HeaderParser::state_qparam;
269             return;
270         }
271         else if (ch == ' ' || ch == '\t')
272         {
273             log_debug("url=" << token);
274             ev.onUrl(token);
275             token.clear();
276             token.reserve(32);
277             state = &HeaderParser::state_protocol0;
278             return;
279         }
280         else if (ch == '+')
281         {
282             token += ' ';
283             return;
284         }
285         else if (ch == '%')
286         {
287             token += ch;
288             state = &HeaderParser::state_urlesc;
289             return;
290         }
291         else if (ch > ' ')
292         {
293             token += ch;
294             return;
295         }
296         else
297         {
298             log_warn("invalid character " << chartoprint(ch) << " in url");
299             state = &HeaderParser::state_error;
300             return;
301         }
302     }
303 
state_urlesc(char ch)304     void HeaderParser::state_urlesc(char ch)
305     {
306         if (isHexDigit(ch))
307         {
308             if (token.size() >= 2 && token[token.size() - 2] == '%')
309             {
310                 unsigned v = (valueOfHexDigit(token[token.size() - 1]) << 4) | valueOfHexDigit(ch);
311                 token[token.size() - 2] = static_cast<char>(v);
312                 token.resize(token.size() - 1);
313                 state = &HeaderParser::state_url;
314                 return;
315             }
316             else
317             {
318                 token += ch;
319                 return;
320             }
321         }
322         else
323         {
324             log_warn("invalid hex digit " << chartoprint(ch) << " in url");
325             state = &HeaderParser::state_error;
326             return;
327         }
328     }
329 
state_qparam(char ch)330     void HeaderParser::state_qparam(char ch)
331     {
332         if (ch == ' ' || ch == '\t')
333         {
334             log_debug("queryString=" << token);
335             ev.onUrlParam(token);
336             token.clear();
337             token.reserve(32);
338             state = &HeaderParser::state_protocol0;
339             return;
340         }
341         else
342         {
343             token += ch;
344             return;
345         }
346     }
347 
state_protocol0(char ch)348     void HeaderParser::state_protocol0(char ch)
349     {
350         if (ch == ' ' || ch == '\t')
351         {
352             return;
353         }
354         else if (std::isalpha(ch))
355         {
356             token.reserve(32);
357             token = ch;
358             state = &HeaderParser::state_protocol;
359             return;
360         }
361         else
362         {
363             log_warn("invalid character " << chartoprint(ch) << " in http protocol field");
364             state = &HeaderParser::state_error;
365             return;
366         }
367     }
368 
state_protocol(char ch)369     void HeaderParser::state_protocol(char ch)
370     {
371         if (ch == ' ' || ch == '\t' || ch == '/')
372         {
373             if (token != "HTTP")
374             {
375                 log_warn("invalid protocol " << token << " in http protocol field");
376                 state = &HeaderParser::state_error;
377                 return;
378             }
379             else
380             {
381                 state = (ch == '/' ? &HeaderParser::state_version_major : &HeaderParser::state_version0);
382                 return;
383             }
384         }
385         else if (std::isalpha(ch))
386         {
387             token += std::toupper(ch);
388             return;
389         }
390         else
391         {
392             log_warn("invalid character " << chartoprint(ch) << " in http protocol field");
393             state = &HeaderParser::state_error;
394             return;
395         }
396     }
397 
state_version0(char ch)398     void HeaderParser::state_version0(char ch)
399     {
400         if (ch == ' ' || ch == '\t')
401         {
402             return;
403         }
404         else if (ch == '/')
405         {
406             state = &HeaderParser::state_version_major;
407             return;
408         }
409         else
410         {
411             log_warn("invalid character " << chartoprint(ch) << " in http version field");
412             state = &HeaderParser::state_error;
413             return;
414         }
415     }
416 
state_version_major(char ch)417     void HeaderParser::state_version_major(char ch)
418     {
419         if (ch == ' ' || ch == '\t')
420         {
421             return;
422         }
423         else if (ch == '1')
424         {
425             state = &HeaderParser::state_version_major_e;
426             return;
427         }
428         else
429         {
430             log_warn("invalid character " << chartoprint(ch) << " in http version field");
431             state = &HeaderParser::state_error;
432             return;
433         }
434     }
435 
state_version_major_e(char ch)436     void HeaderParser::state_version_major_e(char ch)
437     {
438         if (ch == ' ' || ch == '\t')
439         {
440             state = &HeaderParser::state_version_major_e;
441             return;
442         }
443         else if (ch == '.')
444         {
445             state = &HeaderParser::state_version_minor;
446             return;
447         }
448         else
449         {
450             log_warn("invalid character " << chartoprint(ch) << " in http version field");
451             state = &HeaderParser::state_error;
452             return;
453         }
454     }
455 
state_version_minor(char ch)456     void HeaderParser::state_version_minor(char ch)
457     {
458         if (ch == ' ' || ch == '\t')
459         {
460             return;
461         }
462         else if (ch == '0' || ch == '1')
463         {
464             ev.onHttpVersion(1, ch - '0');
465             state = &HeaderParser::state_end0;
466             return;
467         }
468         else
469         {
470             log_warn("invalid character " << chartoprint(ch) << " in http version field");
471             state = &HeaderParser::state_error;
472             return;
473         }
474     }
475 
state_end0(char ch)476     void HeaderParser::state_end0(char ch)
477     {
478         if (ch == '\n')
479         {
480             state = &HeaderParser::state_h0;
481             return;
482         }
483         else if (ch == ' ' || ch == '\t' || ch == '\r')
484         {
485             return;
486         }
487         else
488         {
489             log_warn("invalid character " << chartoprint(ch) << " in http request line");
490             state = &HeaderParser::state_error;
491             return;
492         }
493     }
494 
state_h0(char ch)495     void HeaderParser::state_h0(char ch)
496     {
497         if (ch == ' ' || ch == '\t')
498         {
499             return;
500         }
501         else if (ch > 32 && ch < 127)
502         {
503             token.reserve(32);
504             token = ch;
505             state = &HeaderParser::state_hfieldname;
506             return;
507         }
508         else if (ch == '\r')
509         {
510             state = &HeaderParser::state_hcr;
511             return;
512         }
513         else if (ch == '\n')
514         {
515             ev.onEnd();
516             state = &HeaderParser::state_end;
517             return;
518         }
519         else
520         {
521             log_warn("invalid character " << chartoprint(ch) << " in http header");
522             state = &HeaderParser::state_error;
523             return;
524         }
525     }
526 
state_hcr(char ch)527     void HeaderParser::state_hcr(char ch)
528     {
529         if (ch == '\n')
530         {
531             ev.onEnd();
532             state = &HeaderParser::state_end;
533             return;
534         }
535         else
536         {
537             log_warn("invalid character " << chartoprint(ch) << " in http header");
538             state = &HeaderParser::state_error;
539             return;
540         }
541     }
542 
state_hfieldname(char ch)543     void HeaderParser::state_hfieldname(char ch)
544     {
545         if (ch == ':')
546         {
547             ev.onKey(token);
548             state = &HeaderParser::state_hfieldbody0;
549             return;
550         }
551         else if (ch == ' ' || ch == '\t')
552         {
553             ev.onKey(token);
554             state = &HeaderParser::state_hfieldnamespace;
555             return;
556         }
557         else if (ch > 32 && ch < 127)
558         {
559             token += ch;
560             return;
561         }
562         else
563         {
564             log_warn("invalid character " << chartoprint(ch) << " in fieldname");
565             state = &HeaderParser::state_error;
566             return;
567         }
568     }
569 
state_hfieldnamespace(char ch)570     void HeaderParser::state_hfieldnamespace(char ch)
571     {
572         if (ch == ':')
573         {
574             state = &HeaderParser::state_hfieldbody0;
575             return;
576         }
577         else if (ch == ' ' || ch == '\t')
578         {
579             return;
580         }
581         else
582         {
583             log_warn("invalid character " << chartoprint(ch) << " in fieldname");
584             state = &HeaderParser::state_error;
585             return;
586         }
587     }
588 
state_hfieldbody0(char ch)589     void HeaderParser::state_hfieldbody0(char ch)
590     {
591         if (ch == '\r')
592         {
593             state = &HeaderParser::state_hfieldbody_cr;
594             return;
595         }
596         else if (ch == '\n')
597         {
598             state = &HeaderParser::state_hfieldbody_crlf;
599             return;
600         }
601         else if (std::isspace(ch))
602         {
603             return;
604         }
605         else if (!std::isspace(ch))
606         {
607             token.reserve(32);
608             token = ch;
609             state = &HeaderParser::state_hfieldbody;
610             return;
611         }
612     }
613 
state_hfieldbody(char ch)614     void HeaderParser::state_hfieldbody(char ch)
615     {
616         if (ch == '\r')
617         {
618             state = &HeaderParser::state_hfieldbody_cr;
619             return;
620         }
621         else if (ch == '\n')
622         {
623             state = &HeaderParser::state_hfieldbody_crlf;
624             return;
625         }
626         else
627         {
628             token += ch;
629             return;
630         }
631     }
632 
state_hfieldbody_cr(char ch)633     void HeaderParser::state_hfieldbody_cr(char ch)
634     {
635         if (ch == '\n')
636         {
637             state = &HeaderParser::state_hfieldbody_crlf;
638             return;
639         }
640         else
641         {
642             log_warn("invalid character " << chartoprint(ch) << " in fieldbody");
643             state = &HeaderParser::state_error;
644             return;
645         }
646 
647     }
648 
state_hfieldbody_crlf(char ch)649     void HeaderParser::state_hfieldbody_crlf(char ch)
650     {
651         if (ch == '\r')
652         {
653             ev.onValue(token);
654             state = &HeaderParser::state_hend_cr;
655             return;
656         }
657         else if (ch == '\n')
658         {
659             ev.onValue(token);
660             ev.onEnd();
661             state = &HeaderParser::state_end;
662             return;
663         }
664         else if (ch == ' ' || ch == '\t')
665         {
666             token += ch;
667             state = &HeaderParser::state_hfieldbody;
668             return;
669         }
670         else if (ch > 32 && ch < 127)
671         {
672             ev.onValue(token);
673             token.reserve(32);
674             token = ch;
675             state = &HeaderParser::state_hfieldname;
676             return;
677         }
678         else
679         {
680             log_warn("invalid character " << chartoprint(ch) << " in fieldbody");
681             state = &HeaderParser::state_error;
682             return;
683         }
684     }
685 
state_hend_cr(char ch)686     void HeaderParser::state_hend_cr(char ch)
687     {
688         if (ch == '\n')
689         {
690             ev.onEnd();
691             state = &HeaderParser::state_end;
692             return;
693         }
694         else
695         {
696             log_warn("invalid character " << chartoprint(ch) << " in fieldbody");
697             state = &HeaderParser::state_error;
698             return;
699         }
700     }
701 
state_cl_protocol0(char ch)702     void HeaderParser::state_cl_protocol0(char ch)
703     {
704         if (ch == ' ' || ch == '\t')
705         {
706             return;
707         }
708         else if (std::isalpha(ch))
709         {
710             token.reserve(32);
711             token = ch;
712             state = &HeaderParser::state_cl_protocol;
713             return;
714         }
715         else
716         {
717             log_warn("invalid character " << chartoprint(ch) << " in http protocol field");
718             state = &HeaderParser::state_error;
719             return;
720         }
721     }
722 
state_cl_protocol(char ch)723     void HeaderParser::state_cl_protocol(char ch)
724     {
725         if (ch == ' ' || ch == '\t' || ch == '/')
726         {
727             if (token != "HTTP")
728             {
729                 log_warn("invalid protocol " << token << " in http protocol field");
730                 state = &HeaderParser::state_error;
731                 return;
732             }
733             else
734             {
735                 state = (ch == '/' ? &HeaderParser::state_cl_version_major : &HeaderParser::state_cl_version0);
736                 return;
737             }
738         }
739         else if (std::isalpha(ch))
740         {
741             token += std::toupper(ch);
742             return;
743         }
744         else
745         {
746             log_warn("invalid character " << chartoprint(ch) << " in http protocol field");
747             state = &HeaderParser::state_error;
748             return;
749         }
750     }
751 
state_cl_version0(char ch)752     void HeaderParser::state_cl_version0(char ch)
753     {
754         if (ch == ' ' || ch == '\t')
755         {
756             return;
757         }
758         else if (ch == '/')
759         {
760             state = &HeaderParser::state_cl_version_major;
761             return;
762         }
763         else
764         {
765             log_warn("invalid character " << chartoprint(ch) << " in http version field");
766             state = &HeaderParser::state_error;
767             return;
768         }
769     }
770 
state_cl_version_major(char ch)771     void HeaderParser::state_cl_version_major(char ch)
772     {
773         if (ch == ' ' || ch == '\t')
774         {
775             return;
776         }
777         else if (ch == '1')
778         {
779             state = &HeaderParser::state_cl_version_major_e;
780             return;
781         }
782         else
783         {
784             log_warn("invalid character " << chartoprint(ch) << " in http version field");
785             state = &HeaderParser::state_error;
786             return;
787         }
788     }
789 
state_cl_version_major_e(char ch)790     void HeaderParser::state_cl_version_major_e(char ch)
791     {
792         if (ch == ' ' || ch == '\t')
793         {
794             state = &HeaderParser::state_cl_version_major_e;
795             return;
796         }
797         else if (ch == '.')
798         {
799             state = &HeaderParser::state_cl_version_minor;
800             return;
801         }
802         else
803         {
804             log_warn("invalid character " << chartoprint(ch) << " in http version field");
805             state = &HeaderParser::state_error;
806             return;
807         }
808     }
809 
state_cl_version_minor(char ch)810     void HeaderParser::state_cl_version_minor(char ch)
811     {
812         if (ch == ' ' || ch == '\t')
813         {
814             return;
815         }
816         else if (ch == '0' || ch == '1')
817         {
818             ev.onHttpVersion(1, ch - '0');
819             state = &HeaderParser::state_cl_httpresult0;
820             return;
821         }
822         else
823         {
824             log_warn("invalid character " << chartoprint(ch) << " in http result");
825             state = &HeaderParser::state_error;
826             return;
827         }
828     }
829 
state_cl_httpresult0(char ch)830     void HeaderParser::state_cl_httpresult0(char ch)
831     {
832         if (ch == ' ' || ch == '\t')
833         {
834             return;
835         }
836         else if (std::isdigit(ch))
837         {
838             value = (ch - '0');
839             state = &HeaderParser::state_cl_httpresult;
840             return;
841         }
842         else
843         {
844             log_warn("invalid character " << chartoprint(ch) << " in http result");
845             state = &HeaderParser::state_error;
846             return;
847         }
848     }
849 
state_cl_httpresult(char ch)850     void HeaderParser::state_cl_httpresult(char ch)
851     {
852         if (std::isdigit(ch))
853         {
854             value = value * 10 + (ch - '0');
855             return;
856         }
857         else if (ch == ' ' || ch == '\t')
858         {
859             token.clear();
860             token.reserve(32);
861             state = &HeaderParser::state_cl_httpresulttext;
862         }
863     }
864 
state_cl_httpresulttext(char ch)865     void HeaderParser::state_cl_httpresulttext(char ch)
866     {
867         if (ch == '\r')
868         {
869             ev.onHttpReturn(value, token);
870             state = &HeaderParser::state_cl_httpresult_cr;
871             return;
872         }
873         else if (ch == '\n')
874         {
875             ev.onHttpReturn(value, token);
876             state = &HeaderParser::state_h0;
877             return;
878         }
879         else if (token.empty() && (ch == ' ' || ch == '\t'))
880         {
881             return;
882         }
883         else
884         {
885             token += ch;
886             return;
887         }
888     }
889 
state_cl_httpresult_cr(char ch)890     void HeaderParser::state_cl_httpresult_cr(char ch)
891     {
892         if (ch == '\n')
893         {
894             state = &HeaderParser::state_h0;
895             return;
896         }
897         else
898         {
899             log_warn("invalid character " << chartoprint(ch) << " in requestheader");
900             state = &HeaderParser::state_error;
901             return;
902         }
903     }
904 
state_end(char ch)905     void HeaderParser::state_end(char ch)
906     {
907         return;
908     }
909 
state_error(char ch)910     void HeaderParser::state_error(char ch)
911     {
912         return;
913     }
914 
915 }
916 
917 }
918