1 /*
2  * Copyright (C) 2003-2005 Tommi Maekitalo
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * As a special exception, you may use this file as part of a free
10  * software library without restriction. Specifically, if other files
11  * instantiate templates or use macros or inline functions from this
12  * file, or you compile this file and link it with other files to
13  * produce an executable, this file does not by itself cause the
14  * resulting executable to be covered by the GNU General Public
15  * License. This exception does not however invalidate any other
16  * reasons why the executable file might be covered by the GNU Library
17  * General Public License.
18  *
19  * This library is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22  * Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public
25  * License along with this library; if not, write to the Free Software
26  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
27  */
28 
29 
30 #include <tnt/httpparser.h>
31 #include <tnt/httperror.h>
32 #include <tnt/httpheader.h>
33 #include <tnt/tntconfig.h>
34 #include <cxxtools/log.h>
35 #include <sstream>
36 #include <algorithm>
37 
38 #define SET_STATE(new_state)  state = &Parser::new_state
39 
40 namespace tnt
41 {
42   namespace
43   {
chartoprint(char ch)44     std::string chartoprint(char ch)
45     {
46       const static char hex[] = "0123456789abcdef";
47       if (std::isprint(ch))
48         return std::string(1, '\'') + ch + '\'';
49       else
50         return std::string("'\\x") + hex[(ch >> 4) & 0xf] + hex[ch & 0xf] + '\'';
51     }
52 
istokenchar(char ch)53     inline bool istokenchar(char ch)
54     {
55       static const char s[] = "\"(),/:;<=>?@[\\]{}";
56       return std::isalpha(ch) || std::binary_search(s, s + sizeof(s) - 1, ch);
57     }
58 
isHexDigit(char ch)59     inline bool isHexDigit(char ch)
60     {
61       return (ch >= '0' && ch <= '9')
62           || (ch >= 'A' && ch <= 'Z')
63           || (ch >= 'a' && ch <= 'z');
64     }
65 
valueOfHexDigit(char ch)66     inline unsigned valueOfHexDigit(char ch)
67     {
68       return ch >= '0' && ch <= '9' ? ch - '0'
69            : ch >= 'a' && ch <= 'z' ? ch - 'a' + 10
70            : ch >= 'A' && ch <= 'Z' ? ch - 'A' + 10
71            : 0;
72     }
73   }
74 
75   log_define("tntnet.httpmessage.parser")
76 
post(bool ret)77   bool RequestSizeMonitor::post(bool ret)
78   {
79     if (++requestSize > TntConfig::it().maxRequestSize
80       && TntConfig::it().maxRequestSize > 0)
81     {
82       requestSizeExceeded();
83       return true;
84     }
85     return ret;
86   }
87 
requestSizeExceeded()88   void RequestSizeMonitor::requestSizeExceeded()
89   { }
90 
reset()91   void HttpRequest::Parser::reset()
92   {
93     message.clear();
94     SET_STATE(state_cmd0);
95     httpCode = HTTP_OK;
96     failedFlag = false;
97     RequestSizeMonitor::reset();
98     headerParser.reset();
99   }
100 
state_cmd0(char ch)101   bool HttpRequest::Parser::state_cmd0(char ch)
102   {
103     if (istokenchar(ch))
104     {
105       message.method[0] = ch;
106       message.methodLen = 1;
107       SET_STATE(state_cmd);
108     }
109     else if (ch != ' ' && ch != '\t')
110     {
111       log_warn("invalid character " << chartoprint(ch) << " in method");
112       httpCode = HTTP_BAD_REQUEST;
113       failedFlag = true;
114     }
115     return failedFlag;
116   }
117 
state_cmd(char ch)118   bool HttpRequest::Parser::state_cmd(char ch)
119   {
120     if (istokenchar(ch))
121     {
122       if (message.methodLen >= sizeof(message.method) - 1)
123       {
124         log_debug("invalid method field; method=" << std::string(message.method, message.methodLen) << ", len=" << message.methodLen);
125         throw HttpError(HTTP_BAD_REQUEST, "invalid method field");
126       }
127       message.method[message.methodLen++] = ch;
128     }
129     else if (ch == ' ')
130     {
131       message.method[message.methodLen] = '\0';
132       log_debug("method=" << message.method);
133       SET_STATE(state_url0);
134     }
135     else
136     {
137       log_warn("invalid character " << chartoprint(ch) << " in method");
138       httpCode = HTTP_BAD_REQUEST;
139       failedFlag = true;
140     }
141     return failedFlag;
142   }
143 
state_url0(char ch)144   bool HttpRequest::Parser::state_url0(char ch)
145   {
146     if (ch == ' ' || ch == '\t')
147     {
148     }
149     else if (ch == '/')
150     {
151       message.url.clear();
152       message.url.reserve(32);
153       message.url += ch;
154       SET_STATE(state_url);
155     }
156     else if (std::isalpha(ch))
157     {
158       SET_STATE(state_protocol);
159     }
160     else
161     {
162       log_warn("invalid character " << chartoprint(ch) << " in url");
163       httpCode = HTTP_BAD_REQUEST;
164       failedFlag = true;
165     }
166 
167     return failedFlag;
168   }
169 
state_protocol(char ch)170   bool HttpRequest::Parser::state_protocol(char ch)
171   {
172     if (ch == ':')
173       SET_STATE(state_protocol_slash1);
174     else if (!std::isalpha(ch))
175     {
176       log_warn("invalid character " << chartoprint(ch) << " in url");
177       httpCode = HTTP_BAD_REQUEST;
178       failedFlag = true;
179     }
180 
181     return failedFlag;
182   }
183 
state_protocol_slash1(char ch)184   bool HttpRequest::Parser::state_protocol_slash1(char ch)
185   {
186     if (ch == '/')
187       SET_STATE(state_protocol_slash2);
188     else
189     {
190       log_warn("invalid character " << chartoprint(ch) << " in url");
191       httpCode = HTTP_BAD_REQUEST;
192       failedFlag = true;
193     }
194 
195     return failedFlag;
196   }
197 
state_protocol_slash2(char ch)198   bool HttpRequest::Parser::state_protocol_slash2(char ch)
199   {
200     if (ch == '/')
201       SET_STATE(state_protocol_host);
202     else
203     {
204       log_warn("invalid character " << chartoprint(ch) << " in url");
205       httpCode = HTTP_BAD_REQUEST;
206       failedFlag = true;
207     }
208 
209     return failedFlag;
210   }
211 
state_protocol_host(char ch)212   bool HttpRequest::Parser::state_protocol_host(char ch)
213   {
214     if (ch == '/')
215     {
216       message.url.clear();
217       message.url.reserve(32);
218       message.url += ch;
219       SET_STATE(state_url);
220     }
221     else if (!std::isalpha(ch)
222            && !std::isdigit(ch)
223            && ch != '['
224            && ch != ']'
225            && ch != '.'
226            && ch != ':')
227     {
228       log_warn("invalid character " << chartoprint(ch) << " in url");
229       httpCode = HTTP_BAD_REQUEST;
230       failedFlag = true;
231     }
232 
233     return failedFlag;
234   }
235 
state_url(char ch)236   bool HttpRequest::Parser::state_url(char ch)
237   {
238     if (ch == '?')
239     {
240       log_debug("url=" << message.url);
241       SET_STATE(state_qparam);
242     }
243     else if (ch == '\r')
244     {
245       log_debug("url=" << message.url);
246       SET_STATE(state_end0);
247     }
248     else if (ch == '\n')
249     {
250       log_debug("url=" << message.url);
251       SET_STATE(state_header);
252     }
253     else if (ch == ' ' || ch == '\t')
254     {
255       log_debug("url=" << message.url);
256       SET_STATE(state_version);
257     }
258     else if (ch == '%')
259     {
260       SET_STATE(state_urlesc);
261       message.url += ch;
262     }
263     else if (ch > ' ')
264       message.url += ch;
265     else
266     {
267       log_warn("invalid character " << chartoprint(ch) << " in url");
268       httpCode = HTTP_BAD_REQUEST;
269       failedFlag = true;
270     }
271     return failedFlag;
272   }
273 
state_urlesc(char ch)274   bool HttpRequest::Parser::state_urlesc(char ch)
275   {
276     if (isHexDigit(ch))
277     {
278       if (message.url.size() >= 2 && message.url[message.url.size() - 2] == '%')
279       {
280         unsigned v = (valueOfHexDigit(message.url[message.url.size() - 1]) << 4) | valueOfHexDigit(ch);
281         message.url[message.url.size() - 2] = static_cast<char>(v);
282         message.url.resize(message.url.size() - 1);
283         SET_STATE(state_url);
284       }
285       else
286       {
287         message.url += ch;
288       }
289       return false;
290     }
291     else
292     {
293       SET_STATE(state_url);
294       return state_url(ch);
295     }
296   }
297 
state_qparam(char ch)298   bool HttpRequest::Parser::state_qparam(char ch)
299   {
300     if (ch == ' ' || ch == '\t')
301     {
302       log_debug("queryString=" << message.queryString);
303       SET_STATE(state_version);
304     }
305     else
306       message.queryString += ch;
307     return false;
308   }
309 
state_version(char ch)310   bool HttpRequest::Parser::state_version(char ch)
311   {
312     if (ch == '/')
313     {
314       message.setVersion(0, 0);
315       skipWs(&Parser::state_version_major);
316     }
317     else if (ch == '\r')
318     {
319       log_warn("invalid character " << chartoprint(ch) << " in version");
320       httpCode = HTTP_BAD_REQUEST;
321       failedFlag = true;
322     }
323     return failedFlag;
324   }
325 
state_version_major(char ch)326   bool HttpRequest::Parser::state_version_major(char ch)
327   {
328     if (ch == '.')
329       SET_STATE(state_version_minor0);
330     else if (std::isdigit(ch))
331       message.setVersion(message.getMajorVersion() * 10 + (ch - '0'), message.getMinorVersion());
332     else if (ch == ' ' || ch == '\t')
333       SET_STATE(state_version_major_sp);
334     else
335     {
336       log_warn("invalid character " << chartoprint(ch) << " in version-major");
337       httpCode = HTTP_BAD_REQUEST;
338       failedFlag = true;
339     }
340     return failedFlag;
341   }
342 
state_version_major_sp(char ch)343   bool HttpRequest::Parser::state_version_major_sp(char ch)
344   {
345     if (ch == '.')
346       SET_STATE(state_version_minor0);
347     else
348     {
349       log_warn("invalid character " << chartoprint(ch) << " in version-major");
350       httpCode = HTTP_BAD_REQUEST;
351       failedFlag = true;
352     }
353     return failedFlag;
354   }
355 
state_version_minor0(char ch)356   bool HttpRequest::Parser::state_version_minor0(char ch)
357   {
358     return ch == ' ' || ch == '\t' ? failedFlag
359                                    : state_version_minor(ch);
360   }
361 
state_version_minor(char ch)362   bool HttpRequest::Parser::state_version_minor(char ch)
363   {
364     if (ch == '\n')
365       SET_STATE(state_header);
366     else if (ch == ' ' || ch == '\t' || ch == '\r')
367       SET_STATE(state_end0);
368     else if (std::isdigit(ch))
369       message.setVersion(message.getMajorVersion(), message.getMinorVersion() * 10 + (ch - '0'));
370     else
371     {
372       log_warn("invalid character " << chartoprint(ch) << " in version-minor");
373       httpCode = HTTP_BAD_REQUEST;
374       failedFlag = true;
375     }
376     return failedFlag;
377   }
378 
state_end0(char ch)379   bool HttpRequest::Parser::state_end0(char ch)
380   {
381     if (ch == '\n')
382       SET_STATE(state_header);
383     else if (ch != ' ' && ch != '\t')
384     {
385       log_warn("invalid character " << chartoprint(ch) << " in end");
386       httpCode = HTTP_BAD_REQUEST;
387       failedFlag = true;
388     }
389     return failedFlag;
390   }
391 
state_header(char ch)392   bool HttpRequest::Parser::state_header(char ch)
393   {
394     if (headerParser.parse(ch))
395     {
396       if (headerParser.failed())
397       {
398         httpCode = HTTP_BAD_REQUEST;
399         failedFlag = true;
400         return true;
401       }
402 
403       const char* content_length_header = message.getHeader(httpheader::contentLength);
404       if (*content_length_header)
405       {
406         bodySize = 0;
407         for (const char* c = content_length_header; *c; ++c)
408         {
409           if (*c > '9' || *c < '0')
410             throw HttpError(HTTP_BAD_REQUEST, "invalid Content-Length");
411           bodySize = bodySize * 10 + *c - '0';
412         }
413 
414         if (TntConfig::it().maxRequestSize > 0
415           && getCurrentRequestSize() + bodySize > TntConfig::it().maxRequestSize)
416         {
417           requestSizeExceeded();
418           return true;
419         }
420 
421         message.contentSize = bodySize;
422         if (bodySize == 0)
423           return true;
424         else
425         {
426           SET_STATE(state_body);
427           message.body.reserve(bodySize);
428           return false;
429         }
430       }
431 
432       return true;
433     }
434 
435     return false;
436   }
437 
state_body(char ch)438   bool HttpRequest::Parser::state_body(char ch)
439   {
440     message.body += ch;
441     return --bodySize == 0;
442   }
443 
requestSizeExceeded()444   void HttpRequest::Parser::requestSizeExceeded()
445   {
446     log_warn("max request size " << TntConfig::it().maxRequestSize << " exceeded");
447     httpCode = HTTP_REQUEST_ENTITY_TOO_LARGE;
448     failedFlag = true;
449   }
450 }
451