1 // url.c -- Object representing uniform resource locators
2 // Copyright (C) 2008-2010 Markus Gutschke <markus@shellinabox.com>
3 //
4 // This program is free software; you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License version 2 as
6 // published by the Free Software Foundation.
7 //
8 // This program is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 // GNU General Public License for more details.
12 //
13 // You should have received a copy of the GNU General Public License along
14 // with this program; if not, write to the Free Software Foundation, Inc.,
15 // 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
16 //
17 // In addition to these license terms, the author grants the following
18 // additional rights:
19 //
20 // If you modify this program, or any covered work, by linking or
21 // combining it with the OpenSSL project's OpenSSL library (or a
22 // modified version of that library), containing parts covered by the
23 // terms of the OpenSSL or SSLeay licenses, the author
24 // grants you additional permission to convey the resulting work.
25 // Corresponding Source for a non-source form of such a combination
26 // shall include the source code for the parts of OpenSSL used as well
27 // as that of the covered work.
28 //
29 // You may at your option choose to remove this additional permission from
30 // the work, or from any part of it.
31 //
32 // It is possible to build this program in a way that it loads OpenSSL
33 // libraries at run-time. If doing so, the following notices are required
34 // by the OpenSSL and SSLeay licenses:
35 //
36 // This product includes software developed by the OpenSSL Project
37 // for use in the OpenSSL Toolkit. (http://www.openssl.org/)
38 //
39 // This product includes cryptographic software written by Eric Young
40 // (eay@cryptsoft.com)
41 //
42 //
43 // The most up-to-date version of this program is always available from
44 // http://shellinabox.com
45
46 #include "config.h"
47
48 #define _XOPEN_SOURCE 500
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <string.h>
52
53 #ifdef HAVE_STRINGS_H
54 #include <strings.h> // for strncasecmp()
55 #endif
56
57 #include "libhttp/url.h"
58
59 #include "logging/logging.h"
60
61 #ifdef HAVE_UNUSED
62 #defined ATTR_UNUSED __attribute__((unused))
63 #defined UNUSED(x) do { } while (0)
64 #else
65 #define ATTR_UNUSED
66 #define UNUSED(x) do { (void)(x); } while (0)
67 #endif
68
urlUnescape(char * s)69 static char *urlUnescape(char *s) {
70 int warned = 0;
71 char *r = s;
72 for (char *u = s; *u; ) {
73 char ch = *u++;
74 if (ch == '+') {
75 ch = ' ';
76 } else if (ch == '%') {
77 char c1 = *u;
78 if ((c1 >= '0' && c1 <= '9') || ((c1 &= ~0x20) >= 'A' && c1 <= 'F')) {
79 ch = c1 - (c1 > '9' ? 'A' - 10 : '0');
80 char c2 = *++u;
81 if ((c2 >= '0' && c2 <= '9') || ((c2 &= ~0x20) >= 'A' && c2 <= 'F')) {
82 ch = (ch << 4) + c2 - (c2 > '9' ? 'A' - 10 : '0');
83 ++u;
84 } else if (!warned++) {
85 warn("[http] Malformed URL encoded data \"%s\"!", r);
86 }
87 } else if (!warned++) {
88 warn("[http] Malformed URL encoded data \"%s\"!", r);
89 }
90 }
91 *s++ = ch;
92 }
93 *s = '\000';
94 return r;
95 }
96
urlDestroyHashMapEntry(void * arg ATTR_UNUSED,char * key,char * value)97 static void urlDestroyHashMapEntry(void *arg ATTR_UNUSED, char *key,
98 char *value) {
99 UNUSED(arg);
100 free(key);
101 free(value);
102 }
103
urlMakeString(const char * buf,int len)104 static char *urlMakeString(const char *buf, int len) {
105 if (!buf) {
106 return NULL;
107 } else {
108 char *s;
109 check(s = malloc(len + 1));
110 memcpy(s, buf, len);
111 s[len] = '\000';
112 return s;
113 }
114 }
115
urlParseQueryString(struct HashMap * hashmap,const char * query,int len)116 static void urlParseQueryString(struct HashMap *hashmap, const char *query, int len) {
117 const char *key = query;
118 const char *value = NULL;
119 for (const char *ampersand = query; len-- >= 0; ampersand++) {
120 char ch = len >= 0 ? *ampersand : '\000';
121 if (ch == '=' && !value) {
122 value = ampersand + 1;
123 } else if (ch == '&' || len < 0) {
124 int kl = (value ? value-1 : ampersand) - key;
125 int vl = value ? ampersand - value : 0;
126 if (kl) {
127 char *k = urlMakeString(key, kl);
128 urlUnescape(k);
129 char *v = NULL;
130 if (value) {
131 v = urlMakeString(value, vl);
132 urlUnescape(v);
133 }
134 addToHashMap(hashmap, k, v);
135 }
136 key = ampersand + 1;
137 value = NULL;
138 }
139 if (!ch) {
140 break;
141 }
142 }
143 }
144
urlParseHeaderLine(struct HashMap * hashmap,const char * s,int len)145 static void urlParseHeaderLine(struct HashMap *hashmap, const char *s,
146 int len) {
147 while (s && len > 0) {
148 while (len > 0 && (*s == ' ' || *s == ';')) {
149 s++;
150 len--;
151 }
152 const char *key = s;
153 const char *value = NULL;
154 while (len > 0 && *s != ';') {
155 if (*s == '=' && value == NULL) {
156 value = s + 1;
157 }
158 s++;
159 len--;
160 }
161 int kl = (value ? value-1 : s) - key;
162 int vl = value ? s - value : 0;
163 if (kl) {
164 char *k = urlMakeString(key, kl);
165 for (char *t = k; *t; t++) {
166 if (*t >= 'a' && *t <= 'z') {
167 *t |= 0x20;
168 }
169 }
170 char *v = NULL;
171 if (value) {
172 if (vl >= 2 && value[0] == '"' && value[vl-1] == '"') {
173 value++;
174 vl--;
175 }
176 v = urlMakeString(value, vl);
177 }
178 addToHashMap(hashmap, k, v);
179 }
180 }
181 }
182
urlMemstr(const char * buf,int len,const char * s)183 static const char *urlMemstr(const char *buf, int len, const char *s) {
184 int sLen = strlen(s);
185 if (!sLen) {
186 return buf;
187 }
188 while (len >= sLen) {
189 if (len > sLen) {
190 char *first = memchr(buf, *s, len - sLen);
191 if (!first) {
192 return NULL;
193 }
194 len -= first - buf;
195 buf = first;
196 }
197 if (!memcmp(buf, s, sLen)) {
198 return buf;
199 }
200 buf++;
201 len--;
202 }
203 return NULL;
204 }
205
urlMemcmp(const char * buf,int len,const char * s)206 static int urlMemcmp(const char *buf, int len, const char *s) {
207 int sLen = strlen(s);
208 if (len < sLen) {
209 return s[len];
210 } else {
211 return memcmp(buf, s, sLen);
212 }
213 }
214
urlMemcasecmp(const char * buf,int len,const char * s)215 static int urlMemcasecmp(const char *buf, int len, const char *s) {
216 int sLen = strlen(s);
217 if (len < sLen) {
218 return s[len];
219 } else {
220 return strncasecmp(buf, s, sLen);
221 }
222 }
223
urlParsePart(struct URL * url,const char * buf,int len)224 static void urlParsePart(struct URL *url, const char *buf, int len) {
225 // Most browsers seem to forget quoting data in the header fields. This
226 // means, it is quite possible for an HTML form to cause the submission of
227 // unparseable "multipart/form-data". If this happens, we just give up
228 // and ignore the malformed data.
229 // Example:
230 // <form method="POST" enctype="multipart/form-data">
231 // <input type="file" name="" X: x="">
232 // <input type="submit">
233 // </form>
234 char *name = NULL;
235 for (const char *eol; !!(eol = urlMemstr(buf, len, "\r\n")); ) {
236 if (buf == eol) {
237 buf += 2;
238 len -= 2;
239 if (name) {
240 char *value = len ? urlMakeString(buf, len) : NULL;
241 addToHashMap(&url->args, name, value);
242 name = NULL;
243 }
244 break;
245 } else {
246 if (!name && !urlMemcasecmp(buf, len, "content-disposition:")) {
247 struct HashMap fields;
248 initHashMap(&fields, urlDestroyHashMapEntry, NULL);
249 urlParseHeaderLine(&fields, buf + 20, eol - buf - 20);
250 if (getRefFromHashMap(&fields, "form-data")) {
251 // We currently don't bother to deal with binary files (e.g. files
252 // that include NUL characters). If this ever becomes necessary,
253 // we could check for the existence of a "filename" field and use
254 // that as an indicator to store the payload in something other
255 // than "url->args".
256 name = (char *)getFromHashMap(&fields, "name");
257 if (name && *name) {
258 check(name = strdup(name));
259 }
260 }
261 destroyHashMap(&fields);
262 }
263 len -= eol - buf + 2;
264 buf = eol + 2;
265 }
266 }
267 free(name);
268 }
269
urlParsePostBody(struct URL * url,const struct HttpConnection * http,const char * buf,int len)270 static void urlParsePostBody(struct URL *url,
271 const struct HttpConnection *http,
272 const char *buf, int len) {
273 struct HashMap contentType;
274 initHashMap(&contentType, urlDestroyHashMapEntry, NULL);
275 const char *ctHeader = getFromHashMap(&http->header, "content-type");
276 urlParseHeaderLine(&contentType, ctHeader, ctHeader ? strlen(ctHeader) : 0);
277 if (getRefFromHashMap(&contentType, "application/x-www-form-urlencoded")) {
278 urlParseQueryString(&url->args, buf, len);
279 } else if (getRefFromHashMap(&contentType, "multipart/form-data")) {
280 const char *boundary = getFromHashMap(&contentType, "boundary");
281 if (boundary && *boundary) {
282 const char *lastPart = NULL;
283 for (const char *part = buf; len > 0; ) {
284 const char *ptr;
285 if ((part == buf && (ptr = urlMemstr(part, len, "--")) != NULL) ||
286 (ptr = urlMemstr(part, len, "\r\n--")) != NULL) {
287 len -= ptr - part + (part == buf ? 2 : 4);
288 part = ptr + (part == buf ? 2 : 4);
289 if (!urlMemcmp(part, len, boundary)) {
290 int i = strlen(boundary);
291 len -= i;
292 part += i;
293 if (!urlMemcmp(part, len, "\r\n")) {
294 len -= 2;
295 part += 2;
296 if (lastPart) {
297 urlParsePart(url, lastPart, ptr - lastPart);
298 } else {
299 if (ptr != buf) {
300 info("[http] Ignoring prologue before \"multipart/form-data\"!");
301 }
302 }
303 lastPart = part;
304 } else if (!urlMemcmp(part, len, "--\r\n")) {
305 len -= 4;
306 part += 4;
307 urlParsePart(url, lastPart, ptr - lastPart);
308 lastPart = NULL;
309 if (len > 0) {
310 info("[http] Ignoring epilogue past end of \"multipart/"
311 "form-data\"!");
312 }
313 }
314 }
315 } else {
316 warn("[http] broken multipart/form-data!");
317 break;
318 }
319 }
320 if (lastPart) {
321 warn("[http] Missing final \"boundary\" for \"multipart/form-data\"!");
322 }
323 } else {
324 warn("[http] Missing \"boundary\" information for \"multipart/form-data\"!");
325 }
326 }
327 destroyHashMap(&contentType);
328 }
329
newURL(const struct HttpConnection * http,const char * buf,int len)330 struct URL *newURL(const struct HttpConnection *http,
331 const char *buf, int len) {
332 struct URL *url;
333 check(url = malloc(sizeof(struct URL)));
334 initURL(url, http, buf, len);
335 return url;
336 }
337
initURL(struct URL * url,const struct HttpConnection * http,const char * buf,int len)338 void initURL(struct URL *url, const struct HttpConnection *http,
339 const char *buf, int len) {
340 url->protocol = strdup(httpGetProtocol(http));
341 url->user = NULL;
342 url->password = NULL;
343 url->host = strdup(httpGetHost(http));
344 url->port = httpGetPort(http);
345 url->path = strdup(httpGetPath(http));
346 url->pathinfo = strdup(httpGetPathInfo(http));
347 url->query = strdup(httpGetQuery(http));
348 url->anchor = NULL;
349 url->url = NULL;
350 initHashMap(&url->args, urlDestroyHashMapEntry, NULL);
351 if (!strcmp(http->method, "GET")) {
352 check(url->query);
353 urlParseQueryString(&url->args, url->query, strlen(url->query));
354 } else if (!strcmp(http->method, "POST")) {
355 urlParsePostBody(url, http, buf, len);
356 }
357 }
358
destroyURL(struct URL * url)359 void destroyURL(struct URL *url) {
360 if (url) {
361 free(url->protocol);
362 free(url->user);
363 free(url->password);
364 free(url->host);
365 free(url->path);
366 free(url->pathinfo);
367 free(url->query);
368 free(url->anchor);
369 free(url->url);
370 destroyHashMap(&url->args);
371 }
372 }
373
deleteURL(struct URL * url)374 void deleteURL(struct URL *url) {
375 destroyURL(url);
376 free(url);
377 }
378
urlGetProtocol(struct URL * url)379 const char *urlGetProtocol(struct URL *url) {
380 return url->protocol;
381 }
382
urlGetUser(struct URL * url)383 const char *urlGetUser(struct URL *url) {
384 return url->user;
385 }
386
urlGetPassword(struct URL * url)387 const char *urlGetPassword(struct URL *url) {
388 return url->password;
389 }
390
urlGetHost(struct URL * url)391 const char *urlGetHost(struct URL *url) {
392 return url->host;
393 }
394
urlGetPort(struct URL * url)395 int urlGetPort(struct URL *url) {
396 return url->port;
397 }
398
urlGetPath(struct URL * url)399 const char *urlGetPath(struct URL *url) {
400 return url->path;
401 }
402
urlGetPathInfo(struct URL * url)403 const char *urlGetPathInfo(struct URL *url) {
404 return url->pathinfo;
405 }
406
urlGetQuery(struct URL * url)407 const char *urlGetQuery(struct URL *url) {
408 return url->query;
409 }
410
urlGetAnchor(struct URL * url)411 const char *urlGetAnchor(struct URL *url) {
412 return url->anchor;
413 }
414
urlGetURL(struct URL * url)415 const char *urlGetURL(struct URL *url) {
416 if (!url->url) {
417 const char *host = urlGetHost(url);
418 int s_size = 8 + strlen(host) + 25 + strlen(url->path);
419 check(*(char **)&url->url = malloc(s_size + 1));
420 *url->url = '\000';
421 strncat(url->url, url->protocol, s_size);
422 strncat(url->url, "://", s_size);
423 strncat(url->url, host, s_size);
424 if (url->port != (strcmp(url->protocol, "http") ? 443 : 80)) {
425 snprintf(strrchr(url->url, '\000'), 25, ":%d", url->port);
426 }
427 strncat(url->url, url->path, s_size);
428 }
429 return url->url;
430 }
431
urlGetArgs(struct URL * url)432 const struct HashMap *urlGetArgs(struct URL *url) {
433 return &url->args;
434 }
435
urlParseQuery(const char * buf,int len)436 struct HashMap *urlParseQuery(const char *buf, int len) {
437 struct HashMap *hashmap = newHashMap(urlDestroyHashMapEntry, NULL);
438 urlParseQueryString(hashmap, buf, len);
439 return hashmap;
440 }
441