1 // url.c -- Object representing uniform resource locators
2 // Copyright (C) 2008-2010 Markus Gutschke <markus@shellinabox.com>
3 //
4 // This program is free software; you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License version 2 as
6 // published by the Free Software Foundation.
7 //
8 // This program is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 // GNU General Public License for more details.
12 //
13 // You should have received a copy of the GNU General Public License along
14 // with this program; if not, write to the Free Software Foundation, Inc.,
15 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 //
17 // In addition to these license terms, the author grants the following
18 // additional rights:
19 //
20 // If you modify this program, or any covered work, by linking or
21 // combining it with the OpenSSL project's OpenSSL library (or a
22 // modified version of that library), containing parts covered by the
23 // terms of the OpenSSL or SSLeay licenses, the author
24 // grants you additional permission to convey the resulting work.
25 // Corresponding Source for a non-source form of such a combination
26 // shall include the source code for the parts of OpenSSL used as well
27 // as that of the covered work.
28 //
29 // You may at your option choose to remove this additional permission from
30 // the work, or from any part of it.
31 //
32 // It is possible to build this program in a way that it loads OpenSSL
33 // libraries at run-time. If doing so, the following notices are required
34 // by the OpenSSL and SSLeay licenses:
35 //
36 // This product includes software developed by the OpenSSL Project
37 // for use in the OpenSSL Toolkit. (http://www.openssl.org/)
38 //
39 // This product includes cryptographic software written by Eric Young
40 // (eay@cryptsoft.com)
41 //
42 //
43 // The most up-to-date version of this program is always available from
44 // http://shellinabox.com
45 
46 #include "config.h"
47 
48 #define _XOPEN_SOURCE 500
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <string.h>
52 
53 #ifdef HAVE_STRINGS_H
54 #include <strings.h> // for strncasecmp()
55 #endif
56 
57 #include "libhttp/url.h"
58 
59 #include "logging/logging.h"
60 
61 #ifdef HAVE_UNUSED
62 #defined ATTR_UNUSED __attribute__((unused))
63 #defined UNUSED(x)   do { } while (0)
64 #else
65 #define ATTR_UNUSED
66 #define UNUSED(x)    do { (void)(x); } while (0)
67 #endif
68 
urlUnescape(char * s)69 static char *urlUnescape(char *s) {
70   int warned    = 0;
71   char *r       = s;
72   for (char *u  = s; *u; ) {
73     char ch     = *u++;
74     if (ch == '+') {
75       ch        = ' ';
76     } else if (ch == '%') {
77       char c1   = *u;
78       if ((c1 >= '0' && c1 <= '9') || ((c1 &= ~0x20) >= 'A' && c1 <= 'F')) {
79         ch      = c1 - (c1 > '9' ? 'A' - 10 : '0');
80         char c2 = *++u;
81         if ((c2 >= '0' && c2 <= '9') || ((c2 &= ~0x20) >= 'A' && c2 <= 'F')) {
82           ch    = (ch << 4) + c2 - (c2 > '9' ? 'A' - 10 : '0');
83           ++u;
84         } else if (!warned++) {
85           warn("[http] Malformed URL encoded data \"%s\"!", r);
86         }
87       } else if (!warned++) {
88         warn("[http] Malformed URL encoded data \"%s\"!", r);
89       }
90     }
91     *s++        = ch;
92   }
93   *s            = '\000';
94   return r;
95 }
96 
urlDestroyHashMapEntry(void * arg ATTR_UNUSED,char * key,char * value)97 static void urlDestroyHashMapEntry(void *arg ATTR_UNUSED, char *key,
98                                    char *value) {
99   UNUSED(arg);
100   free(key);
101   free(value);
102 }
103 
urlMakeString(const char * buf,int len)104 static char *urlMakeString(const char *buf, int len) {
105   if (!buf) {
106     return NULL;
107   } else {
108     char *s;
109     check(s = malloc(len + 1));
110     memcpy(s, buf, len);
111     s[len]  = '\000';
112     return s;
113   }
114 }
115 
urlParseQueryString(struct HashMap * hashmap,const char * query,int len)116 static void urlParseQueryString(struct HashMap *hashmap, const char *query, int len) {
117   const char *key   = query;
118   const char *value = NULL;
119   for (const char *ampersand = query; len-- >= 0; ampersand++) {
120     char ch         = len >= 0 ? *ampersand : '\000';
121     if (ch == '=' && !value) {
122       value         = ampersand + 1;
123     } else if (ch == '&' || len < 0) {
124       int kl        = (value ? value-1 : ampersand) - key;
125       int vl        = value ? ampersand - value : 0;
126       if (kl) {
127         char *k     = urlMakeString(key, kl);
128         urlUnescape(k);
129         char *v     = NULL;
130         if (value) {
131           v         = urlMakeString(value, vl);
132           urlUnescape(v);
133         }
134         addToHashMap(hashmap, k, v);
135       }
136       key           = ampersand + 1;
137       value         = NULL;
138     }
139     if (!ch) {
140       break;
141     }
142   }
143 }
144 
urlParseHeaderLine(struct HashMap * hashmap,const char * s,int len)145 static void urlParseHeaderLine(struct HashMap *hashmap, const char *s,
146                                int len) {
147   while (s && len > 0) {
148     while (len > 0 && (*s == ' ' || *s == ';')) {
149       s++;
150       len--;
151     }
152     const char *key   = s;
153     const char *value = NULL;
154     while (len > 0 && *s != ';') {
155       if (*s == '=' && value == NULL) {
156         value         = s + 1;
157       }
158       s++;
159       len--;
160     }
161     int kl            = (value ? value-1 : s) - key;
162     int vl            = value ? s - value : 0;
163     if (kl) {
164       char *k         = urlMakeString(key, kl);
165       for (char *t = k; *t; t++) {
166         if (*t >= 'a' && *t <= 'z') {
167           *t         |= 0x20;
168         }
169       }
170       char *v         = NULL;
171       if (value) {
172         if (vl >= 2 && value[0] == '"' && value[vl-1] == '"') {
173           value++;
174           vl--;
175         }
176         v             = urlMakeString(value, vl);
177       }
178       addToHashMap(hashmap, k, v);
179     }
180   }
181 }
182 
urlMemstr(const char * buf,int len,const char * s)183 static const char *urlMemstr(const char *buf, int len, const char *s) {
184   int sLen        = strlen(s);
185   if (!sLen) {
186     return buf;
187   }
188   while (len >= sLen) {
189     if (len > sLen) {
190       char *first = memchr(buf, *s, len - sLen);
191       if (!first) {
192         return NULL;
193       }
194       len        -= first - buf;
195       buf         = first;
196     }
197     if (!memcmp(buf, s, sLen)) {
198       return buf;
199     }
200     buf++;
201     len--;
202   }
203   return NULL;
204 }
205 
urlMemcmp(const char * buf,int len,const char * s)206 static int urlMemcmp(const char *buf, int len, const char *s) {
207   int sLen = strlen(s);
208   if (len < sLen) {
209     return s[len];
210   } else {
211     return memcmp(buf, s, sLen);
212   }
213 }
214 
urlMemcasecmp(const char * buf,int len,const char * s)215 static int urlMemcasecmp(const char *buf, int len, const char *s) {
216   int sLen = strlen(s);
217   if (len < sLen) {
218     return s[len];
219   } else {
220     return strncasecmp(buf, s, sLen);
221   }
222 }
223 
urlParsePart(struct URL * url,const char * buf,int len)224 static void urlParsePart(struct URL *url, const char *buf, int len) {
225   // Most browsers seem to forget quoting data in the header fields. This
226   // means, it is quite possible for an HTML form to cause the submission of
227   // unparseable "multipart/form-data". If this happens, we just give up
228   // and ignore the malformed data.
229   // Example:
230   // <form method="POST" enctype="multipart/form-data">
231   //   <input type="file" name="&quot;&#13;&#10;X: x=&quot;">
232   //   <input type="submit">
233   // </form>
234   char *name           = NULL;
235   for (const char *eol; !!(eol = urlMemstr(buf, len, "\r\n")); ) {
236     if (buf == eol) {
237       buf             += 2;
238       len             -= 2;
239       if (name) {
240         char *value    = len ? urlMakeString(buf, len) : NULL;
241         addToHashMap(&url->args, name, value);
242         name           = NULL;
243       }
244       break;
245     } else {
246       if (!name && !urlMemcasecmp(buf, len, "content-disposition:")) {
247         struct HashMap fields;
248         initHashMap(&fields, urlDestroyHashMapEntry, NULL);
249         urlParseHeaderLine(&fields, buf + 20, eol - buf - 20);
250         if (getRefFromHashMap(&fields, "form-data")) {
251           // We currently don't bother to deal with binary files (e.g. files
252           // that include NUL characters). If this ever becomes necessary,
253           // we could check for the existence of a "filename" field and use
254           // that as an indicator to store the payload in something other
255           // than "url->args".
256           name         = (char *)getFromHashMap(&fields, "name");
257           if (name && *name) {
258             check(name = strdup(name));
259           }
260         }
261         destroyHashMap(&fields);
262       }
263       len             -= eol - buf + 2;
264       buf              = eol + 2;
265     }
266   }
267   free(name);
268 }
269 
urlParsePostBody(struct URL * url,const struct HttpConnection * http,const char * buf,int len)270 static void urlParsePostBody(struct URL *url,
271                              const struct HttpConnection *http,
272                              const char *buf, int len) {
273   struct HashMap contentType;
274   initHashMap(&contentType, urlDestroyHashMapEntry, NULL);
275   const char *ctHeader     = getFromHashMap(&http->header, "content-type");
276   urlParseHeaderLine(&contentType, ctHeader, ctHeader ? strlen(ctHeader) : 0);
277   if (getRefFromHashMap(&contentType, "application/x-www-form-urlencoded")) {
278     urlParseQueryString(&url->args, buf, len);
279   } else if (getRefFromHashMap(&contentType, "multipart/form-data")) {
280     const char *boundary   = getFromHashMap(&contentType, "boundary");
281     if (boundary && *boundary) {
282       const char *lastPart = NULL;
283       for (const char *part = buf; len > 0; ) {
284         const char *ptr;
285         if ((part == buf && (ptr = urlMemstr(part, len, "--")) != NULL) ||
286             (ptr = urlMemstr(part, len, "\r\n--")) != NULL) {
287           len             -= ptr - part + (part == buf ? 2 : 4);
288           part             = ptr + (part == buf ? 2 : 4);
289           if (!urlMemcmp(part, len, boundary)) {
290             int i          = strlen(boundary);
291             len           -= i;
292             part          += i;
293             if (!urlMemcmp(part, len, "\r\n")) {
294               len         -= 2;
295               part        += 2;
296               if (lastPart) {
297                 urlParsePart(url, lastPart, ptr - lastPart);
298               } else {
299                 if (ptr != buf) {
300                   info("[http] Ignoring prologue before \"multipart/form-data\"!");
301                 }
302               }
303               lastPart     = part;
304             } else if (!urlMemcmp(part, len, "--\r\n")) {
305               len         -= 4;
306               part        += 4;
307               urlParsePart(url, lastPart, ptr - lastPart);
308               lastPart     = NULL;
309               if (len > 0) {
310                 info("[http] Ignoring epilogue past end of \"multipart/"
311 				     "form-data\"!");
312               }
313             }
314           }
315         } else {
316            warn("[http] broken multipart/form-data!");
317            break;
318         }
319       }
320       if (lastPart) {
321         warn("[http] Missing final \"boundary\" for \"multipart/form-data\"!");
322       }
323     } else {
324       warn("[http] Missing \"boundary\" information for \"multipart/form-data\"!");
325     }
326   }
327   destroyHashMap(&contentType);
328 }
329 
newURL(const struct HttpConnection * http,const char * buf,int len)330 struct URL *newURL(const struct HttpConnection *http,
331                    const char *buf, int len) {
332   struct URL *url;
333   check(url = malloc(sizeof(struct URL)));
334   initURL(url, http, buf, len);
335   return url;
336 }
337 
initURL(struct URL * url,const struct HttpConnection * http,const char * buf,int len)338 void initURL(struct URL *url, const struct HttpConnection *http,
339              const char *buf, int len) {
340   url->protocol              = strdup(httpGetProtocol(http));
341   url->user                  = NULL;
342   url->password              = NULL;
343   url->host                  = strdup(httpGetHost(http));
344   url->port                  = httpGetPort(http);
345   url->path                  = strdup(httpGetPath(http));
346   url->pathinfo              = strdup(httpGetPathInfo(http));
347   url->query                 = strdup(httpGetQuery(http));
348   url->anchor                = NULL;
349   url->url                   = NULL;
350   initHashMap(&url->args, urlDestroyHashMapEntry, NULL);
351   if (!strcmp(http->method, "GET")) {
352     check(url->query);
353     urlParseQueryString(&url->args, url->query, strlen(url->query));
354   } else if (!strcmp(http->method, "POST")) {
355     urlParsePostBody(url, http, buf, len);
356   }
357 }
358 
destroyURL(struct URL * url)359 void destroyURL(struct URL *url) {
360   if (url) {
361     free(url->protocol);
362     free(url->user);
363     free(url->password);
364     free(url->host);
365     free(url->path);
366     free(url->pathinfo);
367     free(url->query);
368     free(url->anchor);
369     free(url->url);
370     destroyHashMap(&url->args);
371   }
372 }
373 
deleteURL(struct URL * url)374 void deleteURL(struct URL *url) {
375   destroyURL(url);
376   free(url);
377 }
378 
urlGetProtocol(struct URL * url)379 const char *urlGetProtocol(struct URL *url) {
380   return url->protocol;
381 }
382 
urlGetUser(struct URL * url)383 const char *urlGetUser(struct URL *url) {
384   return url->user;
385 }
386 
urlGetPassword(struct URL * url)387 const char *urlGetPassword(struct URL *url) {
388   return url->password;
389 }
390 
urlGetHost(struct URL * url)391 const char *urlGetHost(struct URL *url) {
392   return url->host;
393 }
394 
urlGetPort(struct URL * url)395 int urlGetPort(struct URL *url) {
396   return url->port;
397 }
398 
urlGetPath(struct URL * url)399 const char *urlGetPath(struct URL *url) {
400   return url->path;
401 }
402 
urlGetPathInfo(struct URL * url)403 const char *urlGetPathInfo(struct URL *url) {
404   return url->pathinfo;
405 }
406 
urlGetQuery(struct URL * url)407 const char *urlGetQuery(struct URL *url) {
408   return url->query;
409 }
410 
urlGetAnchor(struct URL * url)411 const char *urlGetAnchor(struct URL *url) {
412   return url->anchor;
413 }
414 
urlGetURL(struct URL * url)415 const char *urlGetURL(struct URL *url) {
416   if (!url->url) {
417     const char *host           = urlGetHost(url);
418     int s_size                 = 8 + strlen(host) + 25 + strlen(url->path);
419     check(*(char **)&url->url  = malloc(s_size + 1));
420     *url->url                  = '\000';
421     strncat(url->url, url->protocol, s_size);
422     strncat(url->url, "://", s_size);
423     strncat(url->url, host, s_size);
424     if (url->port != (strcmp(url->protocol, "http") ? 443 : 80)) {
425       snprintf(strrchr(url->url, '\000'), 25, ":%d", url->port);
426     }
427     strncat(url->url, url->path, s_size);
428   }
429   return url->url;
430 }
431 
urlGetArgs(struct URL * url)432 const struct HashMap *urlGetArgs(struct URL *url) {
433   return &url->args;
434 }
435 
urlParseQuery(const char * buf,int len)436 struct HashMap *urlParseQuery(const char *buf, int len) {
437   struct HashMap *hashmap = newHashMap(urlDestroyHashMapEntry, NULL);
438   urlParseQueryString(hashmap, buf, len);
439   return hashmap;
440 }
441