1 /*
2 * Copyright (C) 2008 Emweb bv, Herent, Belgium.
3 *
4 * See the LICENSE file for terms of use.
5 * In addition to these terms, permission is also granted to use and
6 * modify these two files (CgiParser.C and CgiParser.h) so long as the
7 * copyright above is maintained, modifications are documented, and
8 * credit is given for any use of the library.
9 *
10 * CGI parser modelled after the PERL implementation cgi-lib.pl 2.18 by
11 * Steven E. Brenner with the following original copyright:
12
13 # Perl Routines to Manipulate CGI input
14 # cgi-lib@pobox.com
15 #
16 # Copyright (c) 1993-1999 Steven E. Brenner
17 # Unpublished work.
18 # Permission granted to use and modify this library so long as the
19 # copyright above is maintained, modifications are documented, and
20 # credit is given for any use of the library.
21 #
22 # Thanks are due to many people for reporting bugs and suggestions
23
24 # For more information, see:
25 # http://cgi-lib.stanford.edu/cgi-lib/
26
27 */
28
29 #include <fstream>
30 #include <stdlib.h>
31
32 #ifdef WT_HAVE_GNU_REGEX
33 #include <regex.h>
34 #else
35 #include <regex>
36 #endif // WT_HAVE_GNU_REGEX
37
38 #include "CgiParser.h"
39 #include "WebRequest.h"
40 #include "WebUtils.h"
41 #include "FileUtils.h"
42
43 #include "Wt/WException.h"
44 #include "Wt/WLogger.h"
45 #include "Wt/Http/Request.h"
46
47 using std::memmove;
48 using std::strcpy;
49 using std::strtol;
50
51 namespace {
52 #ifndef WT_HAVE_GNU_REGEX
53 const std::regex boundary_e("\\bboundary=(?:(?:\"([^\"]+)\")|(\\S+))",
54 std::regex::icase);
55 const std::regex name_e("\\bname=(?:(?:\"([^\"]+)\")|([^\\s:;]+))",
56 std::regex::icase);
57 const std::regex filename_e("\\bfilename=(?:(?:\"([^\"]*)\")|([^\\s:;]+))",
58 std::regex::icase);
59 const std::regex content_e("^\\s*Content-type:"
60 "\\s*(?:(?:\"([^\"]+)\")|([^\\s:;]+))",
61 std::regex::icase);
62 const std::regex content_disposition_e("^\\s*Content-Disposition:",
63 std::regex::icase);
64 const std::regex content_type_e("^\\s*Content-Type:",
65 std::regex::icase);
66
fishValue(const std::string & text,const std::regex & e,std::string & result)67 bool fishValue(const std::string& text,
68 const std::regex& e, std::string& result)
69 {
70 std::smatch what;
71
72 if (std::regex_search(text, what, e)) {
73 result = std::string(what[1]) + std::string(what[2]);
74 return true;
75 } else
76 return false;
77 }
78
regexMatch(const std::string & text,const std::regex & e)79 bool regexMatch(const std::string& text, const std::regex& e)
80 {
81 return std::regex_search(text, e);
82 }
83
84 #else
85 regex_t boundary_e, name_e, filename_e, content_e,
86 content_disposition_e, content_type_e;
87
88 const char *boundary_ep = "\\bboundary=((\"([^\"]*)\")|([^ \t]*))";
89 const char *name_ep = "\\bname=((\"([^\"]*)\")|([^ \t:;]*))";
90 const char *filename_ep = "\\bfilename=((\"([^\"]*)\")|([^ \t:;]*))";
91 const char *content_ep = "^[ \t]*Content-type:"
92 "[ \t]*((\"([^\"]*)\")|([^ \t:;]*))";
93 const char *content_disposition_ep = "^[ \t]*Content-Disposition:";
94 const char *content_type_ep = "^[ \t]*Content-Type:";
95
96 bool fishValue(const std::string& text,
97 regex_t& e1, std::string& result)
98 {
99 regmatch_t pmatch[5];
100 int res = regexec(&e1, text.c_str(), 5, pmatch, 0);
101
102 if (res == 0) {
103 if (pmatch[3].rm_so != -1)
104 result = text.substr(pmatch[3].rm_so,
105 pmatch[3].rm_eo - pmatch[3].rm_so);
106 if (pmatch[4].rm_so != -1)
107 result = text.substr(pmatch[4].rm_so,
108 pmatch[4].rm_eo - pmatch[4].rm_so);
109
110 return true;
111 } else
112 return false;
113 }
114
115 bool regexMatch(const std::string& text, regex_t& e)
116 {
117 regmatch_t pmatch[1];
118
119 return regexec(&e, text.c_str(), 1, pmatch, 0) == 0;
120 }
121
122 class RegInitializer {
123 protected:
124 static bool regInitialized_;
125
126 public:
127 RegInitializer()
128 {}
129
130 ~RegInitializer() {
131 cleanup();
132 }
133
134 static void setup() {
135 if (!regInitialized_) {
136 regcomp(&boundary_e, boundary_ep, REG_ICASE | REG_EXTENDED);
137 regcomp(&name_e, name_ep, REG_ICASE | REG_EXTENDED);
138 regcomp(&filename_e, filename_ep, REG_ICASE | REG_EXTENDED);
139 regcomp(&content_e, content_ep, REG_ICASE | REG_EXTENDED);
140 regcomp(&content_disposition_e, content_disposition_ep,
141 REG_ICASE | REG_EXTENDED);
142 regcomp(&content_type_e, content_type_ep, REG_ICASE | REG_EXTENDED);
143 regInitialized_ = true;
144 }
145 }
146
147 static void cleanup() {
148 if (regInitialized_) {
149 regfree(&boundary_e);
150 regfree(&name_e);
151 regfree(&filename_e);
152 regfree(&content_e);
153 regfree(&content_disposition_e);
154 regfree(&content_type_e);
155 regInitialized_ = false;
156 }
157 }
158 };
159
160 bool RegInitializer::regInitialized_ = false;
161
162 static RegInitializer regInitializer;
163 #endif
164 }
165
166 namespace Wt {
167
168 LOGGER("CgiParser");
169
init()170 void CgiParser::init()
171 {
172 #ifdef WT_HAVE_GNU_REGEX
173 RegInitializer::setup();
174 #endif
175 }
176
CgiParser(::int64_t maxRequestSize,::int64_t maxFormData)177 CgiParser::CgiParser(::int64_t maxRequestSize, ::int64_t maxFormData)
178 : maxFormData_(maxFormData),
179 maxRequestSize_(maxRequestSize)
180 { }
181
parse(WebRequest & request,ReadOption readOption)182 void CgiParser::parse(WebRequest& request, ReadOption readOption)
183 {
184 /*
185 * TODO: optimize this ...
186 */
187 request_ = &request;
188
189 ::int64_t len = request.contentLength();
190 const char *type = request.contentType();
191 const char *meth = request.requestMethod();
192
193 request.postDataExceeded_ = (len > maxRequestSize_ ? len : 0);
194
195 std::string queryString = request.queryString();
196
197 LOG_DEBUG("queryString (len=" << len << "): " << queryString);
198
199 if (!queryString.empty() && request_->parameters_.empty()) {
200 Http::Request::parseFormUrlEncoded(queryString, request_->parameters_);
201 }
202
203 // XDomainRequest cannot set a contentType header, we therefore pass it
204 // as a request parameter
205 if (readOption != ReadHeadersOnly &&
206 strcmp(meth, "POST") == 0 &&
207 ((type && strstr(type, "application/x-www-form-urlencoded") == type) ||
208 (queryString.find("&contentType=x-www-form-urlencoded") !=
209 std::string::npos))) {
210 /*
211 * TODO: parse this stream-based to avoid the malloc here. For now
212 * we protect the maximum that can be POST'ed as form data.
213 */
214 if (len > maxFormData_)
215 throw WException("Oversized application/x-www-form-urlencoded ("
216 + std::to_string(len) + ")");
217
218 auto buf = std::unique_ptr<char[]>(new char[len + 1]);
219
220 request.in().read(buf.get(), len);
221
222 if (request.in().gcount() != (int)len) {
223 throw WException("Unexpected short read.");
224 }
225
226 buf[len] = 0;
227
228 // This is a special Wt feature, I do not think it standard.
229 // For POST, parameters in url-encoded URL are still parsed.
230
231 std::string formQueryString = buf.get();
232
233 LOG_DEBUG("formQueryString (len=" << len << "): " << formQueryString);
234 if (!formQueryString.empty()) {
235 Http::Request::parseFormUrlEncoded(formQueryString, request_->parameters_);
236 }
237 Http::ParameterMap::const_iterator it = request_->parameters_.find("Wt-params");
238 if (it != request_->parameters_.end() && it->second.size() == 1) {
239 Http::Request::parseFormUrlEncoded(it->second[0], request_->parameters_);
240 }
241 }
242
243 if (readOption != ReadHeadersOnly &&
244 type && strstr(type, "multipart/form-data") == type) {
245 if (strcmp(meth, "POST") != 0) {
246 throw WException("Invalid method for multipart/form-data: "
247 + std::string(meth));
248 }
249
250 if (!request.postDataExceeded_)
251 readMultipartData(request, type, len);
252 else if (readOption == ReadBodyAnyway) {
253 for (;len > 0;) {
254 ::int64_t toRead = std::min(::int64_t(BUFSIZE), len);
255 request.in().read(buf_, toRead);
256 if (request.in().gcount() != (::int64_t)toRead)
257 throw WException("CgiParser: short read");
258 len -= toRead;
259 }
260 }
261 }
262 }
263
readMultipartData(WebRequest & request,const std::string type,::int64_t len)264 void CgiParser::readMultipartData(WebRequest& request,
265 const std::string type, ::int64_t len)
266 {
267 std::string boundary;
268
269 if (!fishValue(type, boundary_e, boundary))
270 throw WException("Could not find a boundary for multipart data.");
271
272 boundary = "--" + boundary;
273
274 buflen_ = 0;
275 left_ = len;
276 spoolStream_ = 0;
277 currentKey_.clear();
278
279 if (!parseBody(request, boundary))
280 return;
281
282 for (;;) {
283 if (!parseHead(request))
284 break;
285 if (!parseBody(request,boundary))
286 break;
287 }
288 }
289
290 /*
291 * Read until finding the boundary, saving to resultString or
292 * resultFile. The boundary itself is not consumed.
293 *
294 * tossAtBoundary controls how many characters extra (<0)
295 * or few (>0) are saved at the start of the boundary in the result.
296 */
readUntilBoundary(WebRequest & request,const std::string boundary,int tossAtBoundary,std::string * resultString,std::ostream * resultFile)297 void CgiParser::readUntilBoundary(WebRequest& request,
298 const std::string boundary,
299 int tossAtBoundary,
300 std::string *resultString,
301 std::ostream *resultFile)
302 {
303 int bpos;
304
305 while ((bpos = index(boundary)) == -1) {
306 /*
307 * If we couldn't find it. We need to wind the buffer, but only save
308 * not including the boundary length.
309 */
310 if (left_ == 0)
311 throw WException("CgiParser: reached end of input while seeking end of "
312 "headers or content. Format of CGI input is wrong");
313
314 /* save (up to) BUFSIZE from buffer to file or value string, but
315 * mind the boundary length */
316 int save = std::min((buflen_ - (int)boundary.length()), (int)BUFSIZE);
317
318 if (save > 0) {
319 if (resultString)
320 *resultString += std::string(buf_, save);
321 if (resultFile)
322 resultFile->write(buf_, save);
323
324 /* wind buffer */
325 windBuffer(save);
326 }
327
328 unsigned amt = static_cast<unsigned>
329 (std::min(left_,
330 static_cast< ::int64_t >(BUFSIZE + MAXBOUND - buflen_)));
331
332 request.in().read(buf_ + buflen_, amt);
333 if (request.in().gcount() != (int)amt)
334 throw WException("CgiParser: short read");
335
336 left_ -= amt;
337 buflen_ += amt;
338 }
339
340 if (resultString)
341 *resultString += std::string(buf_, bpos - tossAtBoundary);
342 if (resultFile)
343 resultFile->write(buf_, bpos - tossAtBoundary);
344
345 /* wind buffer */
346 windBuffer(bpos);
347 }
348
windBuffer(int offset)349 void CgiParser::windBuffer(int offset)
350 {
351 if (offset < buflen_) {
352 memmove(buf_, buf_ + offset, buflen_ - offset);
353 buflen_ -= offset;
354 } else
355 buflen_ = 0;
356 }
357
index(const std::string search)358 int CgiParser::index(const std::string search)
359 {
360 std::string bufS = std::string(buf_, buflen_);
361
362 std::string::size_type i = bufS.find(search);
363
364 if (i == std::string::npos)
365 return -1;
366 else
367 return i;
368 }
369
parseHead(WebRequest & request)370 bool CgiParser::parseHead(WebRequest& request)
371 {
372 std::string head;
373 readUntilBoundary(request, "\r\n\r\n", -2, &head, 0);
374
375 std::string name;
376 std::string fn;
377 std::string ctype;
378
379 for (unsigned current = 0; current < head.length();) {
380 /* read line by line */
381 std::string::size_type i = head.find("\r\n", current);
382 const std::string text = head.substr(current, (i == std::string::npos
383 ? std::string::npos
384 : i - current));
385
386 if (regexMatch(text, content_disposition_e)) {
387 fishValue(text, name_e, name);
388 fishValue(text, filename_e, fn);
389 }
390
391 if (regexMatch(text, content_type_e)) {
392 fishValue(text, content_e, ctype);
393 }
394
395 current = i + 2;
396 }
397
398 LOG_DEBUG("name: " << name << " ct: " << ctype << " fn: " << fn);
399
400 currentKey_ = name;
401
402 if (!fn.empty()) {
403 if (!request.postDataExceeded_) {
404 /*
405 * It is not easy to create a std::ostream pointing to a
406 * temporary file name.
407 */
408 std::string spool = FileUtils::createTempFileName();
409
410 spoolStream_ = new std::ofstream(spool.c_str(),
411 std::ios::out | std::ios::binary);
412
413 request_->files_.insert
414 (std::make_pair(name, Http::UploadedFile(spool, fn, ctype)));
415
416 LOG_DEBUG("spooling file to " << spool.c_str());
417
418 } else {
419 spoolStream_ = 0;
420 // Clear currentKey so that file we don't do harm by reading this
421 // giant blob in memory
422 currentKey_ = "";
423 }
424 }
425
426 windBuffer(4);
427
428 return true;
429 }
430
parseBody(WebRequest & request,const std::string boundary)431 bool CgiParser::parseBody(WebRequest& request, const std::string boundary)
432 {
433 std::string value;
434
435 readUntilBoundary(request, boundary, 2,
436 spoolStream_ ? 0 : (!currentKey_.empty() ? &value : 0),
437 spoolStream_);
438
439 if (spoolStream_) {
440 LOG_DEBUG("completed spooling");
441 delete spoolStream_;
442 spoolStream_ = 0;
443 } else {
444 if (!currentKey_.empty()) {
445 LOG_DEBUG("value: \"" << value << "\"");
446 request_->parameters_[currentKey_].push_back(value);
447 }
448 }
449
450 currentKey_.clear();
451
452 if (std::string(buf_ + boundary.length(), 2) == "--")
453 return false;
454
455 windBuffer(boundary.length() + 2);
456
457 return true;
458 }
459
460 } // namespace Wt
461