1 /**
2 * URL Support
3 *
4 * Copyright (C) 2013-2015
5 * Jeffrey Fulmer - <jeff@joedog.org>, et al.
6 * Copyright (C) 1999 by
7 * Jeffrey Fulmer - <jeff@joedog.org>.
8 *
9 * This file is distributed as part of Siege
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License along
22 * with this program; if not, write to the Free Software Foundation, Inc.
23 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 *--
25 */
26 #ifdef HAVE_CONFIG_H
27 # include <config.h>
28 #endif/*HAVE_CONFIG_H*/
29
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <setup.h>
33 #include <url.h>
34 #include <load.h>
35 #include <perl.h>
36 #include <date.h>
37 #include <util.h>
38 #include <memory.h>
39 #include <notify.h>
40 #include <joedog/boolean.h>
41 #include <joedog/defs.h>
42
43 struct URL_T
44 {
45 int ID;
46 char * url;
47 SCHEME scheme;
48 METHOD method;
49 char * username;
50 char * password;
51 char * hostname;
52 int port;
53 char * path;
54 char * file;
55 char * params;
56 BOOLEAN hasparams;
57 char * query;
58 char * frag;
59 char * request;
60 size_t postlen;
61 char * postdata;
62 char * posttemp;
63 char * conttype;
64 BOOLEAN cached;
65 BOOLEAN redir;
66 };
67
68 size_t URLSIZE = sizeof(struct URL_T);
69
70 private void __url_parse(URL this, char *url);
71 private void __parse_post_data(URL this, char *datap);
72 private char * __url_set_absolute(URL this, char *url);
73 private BOOLEAN __url_has_scheme (char *url);
74 private BOOLEAN __url_has_credentials(char *url);
75 private int __url_default_port(URL this);
76 private char * __url_set_scheme(URL this, char *url);
77 private char * __url_set_password(URL this, char *str);
78 private char * __url_set_username(URL this, char *str);
79 private char * __url_set_hostname(URL this, char *str);
80 private char * __url_set_port(URL this, char *str);
81 private char * __url_set_path(URL this, char *str);
82 private char * __url_set_file(URL this, char *str);
83 private char * __url_set_parameters(URL this, char *str);
84 private char * __url_set_query(URL this, char *str);
85 private char * __url_set_fragment(URL this, char *str);
86 private char * __url_escape(const char *s);
87 private METHOD __url_has_method(const char *url);
88 private void __url_replace(char *url, const char *needle, const char *replacement);
89
90 URL
new_url(char * str)91 new_url(char *str)
92 {
93 URL this;
94 this = xmalloc(URLSIZE);
95 this->ID = 0;
96 this->scheme = HTTP;
97 this->hasparams = FALSE;
98 this->params = NULL;
99 this->redir = FALSE;
100 this->method = GET;
101 this->username = NULL;
102 this->password = NULL;
103 this->hostname = NULL;
104 this->port = 80;
105 this->path = NULL;
106 this->file = NULL;
107 this->params = NULL;
108 this->hasparams = FALSE;
109 this->query = NULL;
110 this->frag = NULL;
111 this->request = NULL;
112 this->postlen = 0;
113 this->postdata = NULL;
114 this->posttemp = NULL;
115 this->conttype = NULL;
116 this->cached = FALSE;
117 this->redir = FALSE;
118 __url_parse(this, str);
119 return this;
120 }
121
122 URL
url_destroy(URL this)123 url_destroy(URL this)
124 {
125 if (this!=NULL) {
126 xfree(this->url);
127 xfree(this->username);
128 xfree(this->password);
129 xfree(this->hostname);
130 if (this->path != NULL && this->path[0] != '\0') {
131 xfree(this->path);
132 }
133 xfree(this->file);
134 xfree(this->query);
135 xfree(this->frag);
136 xfree(this->request);
137 xfree(this->conttype);
138 xfree(this->postdata);
139 xfree(this->posttemp);
140 if (this->hasparams==TRUE) {
141 xfree(this->params);
142 }
143 xfree(this);
144 }
145 return NULL;
146 }
147
148 /**
149 * URL setters
150 */
151 void
url_set_ID(URL this,int ID)152 url_set_ID(URL this, int ID)
153 {
154 this->ID = ID;
155 return;
156 }
157
158 /**
159 * This function is largely for RE-setting the scheme
160 */
161 void
url_set_scheme(URL this,SCHEME scheme)162 url_set_scheme(URL this, SCHEME scheme)
163 {
164 char *tmp;
165 char *str;
166 int n;
167 int len;
168
169 this->scheme = scheme;
170 str = strdup(url_get_scheme_name(this));
171
172 if (this->url != NULL) {
173 tmp = xstrdup(this->url);
174 if (!strncasecmp(tmp, "http:", 5)){
175 n = 7;
176 }
177 if (!strncasecmp(tmp, "https:", 6)){
178 n = 8;
179 }
180 if (!strncasecmp(tmp, "ftp:", 4)){
181 n = 6;
182 }
183 len = strlen(tmp);
184 memmove(tmp, tmp+n, len - n + 1);
185 xfree(this->url);
186 len = strlen(tmp)+strlen(str)+4;
187 this->url = xmalloc(len);
188 memset(this->url, '\0', len);
189 snprintf(this->url, len, "%s://%s", str, tmp);
190 xfree(tmp);
191 xfree(str);
192 }
193 return;
194 }
195
196 /**
197 * if we don't have a hostname at
198 * construction, we can use this
199 * method to add one...
200 */
201 void
url_set_hostname(URL this,char * hostname)202 url_set_hostname(URL this, char *hostname)
203 {
204 size_t len;
205
206 if (empty(hostname)) return;
207
208 xfree(this->hostname);
209 len = strlen(hostname)+1;
210 this->hostname = xmalloc(len);
211 memset(this->hostname, '\0', len);
212 strncpy(this->hostname, hostname, len);
213 return;
214 }
215
216 void
url_set_redirect(URL this,BOOLEAN redir)217 url_set_redirect(URL this, BOOLEAN redir)
218 {
219 this->redir = redir;
220 }
221
222 void
url_set_conttype(URL this,char * type)223 url_set_conttype(URL this, char *type) {
224 this->conttype = xstrdup(type);
225 return;
226 }
227
228 void
url_set_method(URL this,METHOD method)229 url_set_method(URL this, METHOD method) {
230 this->method = method;
231 }
232
233 /**
234 * invoked when post data is read from a file.
235 * see load.c
236 */
237 void
url_set_postdata(URL this,char * postdata,size_t postlen)238 url_set_postdata(URL this, char *postdata, size_t postlen)
239 {
240 this->postlen = postlen;
241 this->postdata = xmalloc(this->postlen+1);
242 memcpy(this->postdata, postdata, this->postlen);
243 this->postdata[this->postlen] = '\0';
244 return;
245 }
246
247 /**
248 * URL getters
249 */
250 public int
url_get_ID(URL this)251 url_get_ID(URL this)
252 {
253 return this->ID;
254 }
255
256 public char *
url_get_absolute(URL this)257 url_get_absolute(URL this)
258 {
259 return (this == NULL) ? "NULL" : this->url;
260 }
261
262 public SCHEME
url_get_scheme(URL this)263 url_get_scheme(URL this)
264 {
265 return this->scheme;
266 }
267
268 public char *
url_get_display(URL this)269 url_get_display(URL this)
270 {
271 if (my.fullurl)
272 return url_get_absolute(this);
273
274 if (this->method == GET)
275 return url_get_request(this);
276
277 return url_get_absolute(this);
278 }
279
280 public char *
url_get_scheme_name(URL this)281 url_get_scheme_name(URL this)
282 {
283 switch (this->scheme) {
284 case HTTP:
285 return "http";
286 case HTTPS:
287 return "https";
288 case FTP:
289 return "ftp";
290 case PROXY:
291 return "proxy";
292 case UNSUPPORTED:
293 default:
294 return "unsupported";
295 }
296 return "unsupported";
297 }
298
299 public char *
url_get_username(URL this)300 url_get_username(URL this)
301 {
302 return this->username;
303 }
304
305 public char *
url_get_password(URL this)306 url_get_password(URL this)
307 {
308 return this->password;
309 }
310
311 public char *
url_get_hostname(URL this)312 url_get_hostname(URL this)
313 {
314 return this->hostname;
315 }
316
317 public int
url_get_port(URL this)318 url_get_port(URL this)
319 {
320 return this->port;
321 }
322
323 public char *
url_get_path(URL this)324 url_get_path(URL this)
325 {
326 return this->path;
327 }
328
329 public char *
url_get_file(URL this)330 url_get_file(URL this)
331 {
332 return this->file;
333 }
334
335 public char *
url_get_request(URL this)336 url_get_request(URL this)
337 {
338 return this->request;
339 }
340
341 public char *
url_get_parameters(URL this)342 url_get_parameters(URL this)
343 {
344 return this->params;
345 }
346
347 public char *
url_get_query(URL this)348 url_get_query(URL this)
349 {
350 return this->query;
351 }
352
353 public char *
url_get_fragment(URL this)354 url_get_fragment(URL this)
355 {
356 return this->frag;
357 }
358
359 public size_t
url_get_postlen(URL this)360 url_get_postlen(URL this) {
361 return this->postlen;
362 }
363
364 public char *
url_get_postdata(URL this)365 url_get_postdata(URL this) {
366 return this->postdata;
367 }
368
369 public char *
url_get_posttemp(URL this)370 url_get_posttemp(URL this) {
371 return this->posttemp;
372 }
373
374 public char *
url_get_conttype(URL this)375 url_get_conttype(URL this) {
376
377 if (this->conttype == NULL) {
378 if (! empty(my.conttype)) {
379 this->conttype = xstrdup(my.conttype);
380 } else {
381 this->conttype = xstrdup("application/x-www-form-urlencoded");
382 }
383 }
384 return this->conttype;
385 }
386
387 public METHOD
url_get_method(URL this)388 url_get_method(URL this) {
389 return this->method;
390 }
391
392 public char *
url_get_method_name(URL this)393 url_get_method_name(URL this) {
394 switch (this->method){
395 case POST:
396 return "POST";
397 case PATCH:
398 return "PATCH";
399 case PUT:
400 return "PUT";
401 case DELETE:
402 return "DELETE";
403 case OPTIONS:
404 return "OPTIONS";
405 case HEAD:
406 return "HEAD";
407 case GET:
408 default:
409 return "GET";
410 }
411 return "GET";
412 }
413
414 BOOLEAN
url_is_redirect(URL this)415 url_is_redirect(URL this)
416 {
417 return this->redir;
418 }
419
420 void
url_set_username(URL this,char * username)421 url_set_username(URL this, char *username)
422 {
423 size_t len = strlen(username);
424
425 this->username = malloc(len+1);
426 memset(this->username, '\0', len+1);
427 memcpy(this->username, username, len);
428 return;
429 }
430
431 void
url_set_password(URL this,char * password)432 url_set_password(URL this, char *password)
433 {
434 size_t len = strlen(password);
435
436 this->password = malloc(len+1);
437 memset(this->password, '\0', len+1);
438 memcpy(this->password, password, len);
439 return;
440 }
441
442 void
url_dump(URL this)443 url_dump(URL this)
444 {
445 printf("URL ID: %d\n", this->ID);
446 printf("Abolute: %s\n", this->url);
447 printf("Scheme: %s\n", url_get_scheme_name(this));
448 printf("Method: %s\n", url_get_method_name(this));
449 printf("Username: %s\n", url_get_username(this));
450 printf("Password: %s\n", url_get_password(this));
451 printf("Hostname: %s\n", url_get_hostname(this));
452 printf("Port: %d\n", url_get_port(this));
453 printf("Path: %s\n", url_get_path(this));
454 printf("File: %s\n", url_get_file(this));
455 printf("Request: %s\n", url_get_request(this));
456 if (this->hasparams==TRUE)
457 printf("Params: %s\n", url_get_parameters(this));
458 printf("Query: %s\n", url_get_query(this));
459 printf("Fragment: %s\n", url_get_fragment(this));
460 printf("Post Len: %d\n", (int)url_get_postlen(this));
461 printf("Post Data: %s\n", url_get_postdata(this));
462 printf("Cont Type: %s\n", url_get_conttype(this));
463 return;
464 }
465
466 URL
url_normalize(URL req,char * location)467 url_normalize(URL req, char *location)
468 {
469 URL ret;
470 char * url;
471 size_t len;
472
473 /**
474 * Should we just do this for all URLs
475 * or just the ones we parse??
476 */
477 __url_replace(location, "&", "&");
478 __url_replace(location, "&", "&");
479
480 len = strlen(url_get_absolute(req)) + strlen(location) + 32;
481
482 if (stristr(location, "data:image/gif")) {
483 // stupid CSS tricks
484 return NULL;
485 }
486
487 if (stristr(location, "://")) {
488 // it's very likely normalized
489 ret = new_url(location);
490
491 // but we better test it...
492 if (strlen(url_get_hostname(ret)) > 1) {
493 return ret;
494 }
495 }
496
497 if ((location[0] != '/') && location[0] != '.' && (strchr(location, '.') != NULL && strchr(location, '/') != NULL)) {
498 /**
499 * This is probably host/path; it doesn't start with relevent path
500 * indicators and it contains the hallmarks of host/path namely at
501 * least one dot and slash
502 */
503 ret = new_url(location);
504 url_set_scheme(ret, url_get_scheme(req));
505 // so we better test it...
506 if (strchr(url_get_hostname(ret), '.') != NULL) {
507 return ret;
508 }
509 }
510
511 if (strstr(location, "localhost") != NULL) {
512 ret = new_url(location);
513 url_set_scheme(ret, url_get_scheme(req));
514 if (strlen(url_get_hostname(ret)) == 9) {
515 // we found and correctly parsed localhost
516 return ret;
517 }
518 }
519
520 /**
521 * If we got this far we better construct it...
522 */
523 url = (char*)malloc(len);
524 memset(url, '\0', len);
525
526 if (location[0] == '/') {
527 if (strlen(location) > 1 && location[1] == '/') {
528 /* starts with // so we should use base protocol */
529 snprintf(url, len, "%s:%s", url_get_scheme_name(req), location);
530 } else {
531 snprintf(url, len, "%s://%s:%d%s", url_get_scheme_name(req), url_get_hostname(req), url_get_port(req), location);
532 }
533 } else {
534 if (endswith("/", url_get_path(req)) == TRUE) {
535 char *tmp;
536 /**
537 * We're dealing with a req that ends in / and a relative
538 * URL that starts with ./ We want to increment two places
539 * to avoid this path: /haha/./mama.jpg
540 */
541 if (location[0] == '.' && strlen(location) > 1) {
542 tmp = location+2;
543 } else {
544 tmp = location;
545 }
546 snprintf ( // if the path ends with / we won't need one in the format
547 url, len,
548 "%s://%s:%d%s%s",
549 url_get_scheme_name(req), url_get_hostname(req), url_get_port(req), url_get_path(req), tmp
550 );
551 } else {
552 snprintf ( // need to add a slash to separate base path from parsed path/file
553 url, len,
554 "%s://%s:%d%s/%s",
555 url_get_scheme_name(req), url_get_hostname(req), url_get_port(req), url_get_path(req), location
556 );
557 }
558 }
559 ret = new_url(url);
560 url_set_scheme(ret, url_get_scheme(req));
561 free(url);
562 return ret;
563 }
564
565 char *
url_normalize_string(URL req,char * location)566 url_normalize_string(URL req, char *location)
567 {
568 char *t;
569 URL u;
570
571 u = url_normalize(req, location);
572 t = strdup(url_get_absolute(u));
573 u = url_destroy(u);
574 return t;
575 }
576
577 private void
__url_parse(URL this,char * url)578 __url_parse(URL this, char *url)
579 {
580 char *ptr = NULL;
581 char *esc = NULL;
582 char *post;
583
584 /**
585 * URL escaping is in its infancy so we're
586 * going to make it a configurable option.
587 * see: url-escaping in siegerc.
588 */
589 esc = __url_escape(url);
590 if (my.escape) {
591 ptr = __url_set_absolute(this, esc);
592 } else {
593 ptr = __url_set_absolute(this, url);
594 }
595
596 ptr = __url_set_scheme(this, ptr);
597
598 post = strstr(this->url, " POST");
599
600 if (! post) {
601 post = strstr(this->url, " PUT");
602 }
603
604 if (! post) {
605 post = strstr(this->url, " PATCH");
606 }
607
608 if (! post) {
609 post = strstr(this->url, " OPTIONS");
610 }
611
612 if (! post) {
613 post = strstr(this->url, " DELETE");
614 }
615
616 if (post != NULL){
617 if (!strncasecmp(post," PUT", 4)) {
618 this->method = PUT;
619 *post = '\0';
620 post += 4;
621 } else if (!strncasecmp(post," POST", 5)) {
622 this->method = POST;
623 *post = '\0';
624 post += 5;
625 } else if (!strncasecmp(post," DELETE", 7)) {
626 this->method = DELETE;
627 *post = '\0';
628 post += 7;
629 } else if (!strncasecmp(post," OPTIONS", 8)) {
630 this->method = OPTIONS;
631 *post = '\0';
632 post += 8;
633 } else {
634 this->method = PATCH;
635 *post = '\0';
636 post += 6;
637 }
638 __parse_post_data(this, post);
639 } else {
640 this->method = GET;
641 this->postdata = NULL;
642 this->posttemp = NULL;
643 this->postlen = 0;
644 }
645
646 if (__url_has_credentials(ptr)) {
647 ptr = __url_set_username(this, ptr);
648 ptr = __url_set_password(this, ptr);
649 }
650
651 ptr = __url_set_hostname(this, ptr);
652 ptr = __url_set_port(this, ptr);
653 ptr = __url_set_path(this, ptr);
654 ptr = __url_set_file(this, ptr);
655 ptr = __url_set_parameters(this, ptr);
656 ptr = __url_set_query(this, ptr);
657 ptr = __url_set_fragment(this, ptr);
658 return;
659 }
660
661 private void
__parse_post_data(URL this,char * datap)662 __parse_post_data(URL this, char *datap)
663 {
664 for (; isspace((unsigned int)*datap); datap++) {
665 /* Advance past white space */
666 }
667 if (*datap == '<') {
668 datap++;
669 load_file(this, datap);
670 datap = __url_set_path(this, datap);
671 datap = __url_set_file(this, datap);
672 return;
673 } else {
674 this->postdata = xstrdup(datap);
675 this->postlen = strlen(this->postdata);
676 if (! empty(my.conttype)) {
677 this->conttype = xstrdup(my.conttype);
678 } else {
679 this->conttype = xstrdup("application/x-www-form-urlencoded");
680 }
681 return;
682 }
683
684 return;
685 }
686
687 /**
688 * assign the full url to this->url
689 */
690 private char *
__url_set_absolute(URL this,char * url)691 __url_set_absolute(URL this, char *url)
692 {
693 int n;
694 size_t len;
695 char *slash;
696 char scheme[16];
697
698 if (empty(url)) return NULL;
699
700 memset(scheme, '\0', 16);
701
702 if (!strncasecmp(url, "http:", 5)){
703 n = 7;
704 strncpy(scheme, "http", 4);
705 }
706 if (!strncasecmp(url, "https:", 6)){
707 n = 8;
708 strncpy(scheme, "https", 5);
709 }
710 if (!strncasecmp(url, "ftp:", 4)){
711 n = 6;
712 strncpy(scheme, "ftp", 3);
713 }
714 if (strlen(scheme) < 3) {
715 // A scheme wasn't supplied; we'll use http by default.
716 n = 7;
717 strncpy(scheme, "http", 4);
718 }
719
720 len = strlen(url)+5;
721 if (!__url_has_scheme(url)) {
722 this->url = xmalloc(len+n);
723 memset(this->url, '\0', len+n);
724 slash = strstr(url, "/");
725 if (slash) {
726 snprintf(this->url, len+n, "%s://%s", scheme, url);
727 } else {
728 snprintf(this->url, len+n, "%s://%s/", scheme, url);
729 }
730 } else {
731 this->url = xmalloc(len);
732 memset(this->url, '\0', len);
733 snprintf(this->url, len, "%s", url);
734 }
735 return this->url;
736 }
737
738 #define SCHEME_CHAR(ch) (isalnum (ch) || (ch) == '-' || (ch) == '+')
739 /**
740 * stolen from wget:url.c
741 */
742 private BOOLEAN
__url_has_scheme(char * url)743 __url_has_scheme (char *url)
744 {
745 const char *p = url;
746
747 /* The first char must be a scheme char. */
748 if (!*p || !SCHEME_CHAR (*p))
749 return FALSE;
750 ++p;
751 /* Followed by 0 or more scheme chars. */
752 while (*p && SCHEME_CHAR (*p))
753 ++p;
754 /* Terminated by ':'. */
755 return *p == ':';
756 }
757
758 private BOOLEAN
__url_has_credentials(char * url)759 __url_has_credentials(char *url)
760 {
761 /**
762 * if there's an @ before /?#; then we have creds
763 */
764 const char *p = (const char *)strpbrk (url, "@/?#;");
765 if (!p || *p != '@')
766 return FALSE;
767 return TRUE;
768 }
769
770 private int
__url_default_port(URL this)771 __url_default_port(URL this)
772 {
773 switch(this->scheme){
774 case FTP:
775 return 21;
776 case HTTP:
777 return 80;
778 case HTTPS:
779 return 443;
780 case UNSUPPORTED:
781 default:
782 return 80;
783 }
784 }
785
786 /**
787 * set the scheme, i.e., http/https
788 * <SCHEME>://<username>:<password>@<hostname>:<port>/<path>;<params>?<query>#<frag>
789 */
790 private char *
__url_set_scheme(URL this,char * url)791 __url_set_scheme(URL this, char *url)
792 {
793 if(!strncasecmp(this->url, "http:", 5)){
794 this->scheme = HTTP;
795 return url+7;
796 }
797 if(!strncasecmp(this->url, "https:", 6)){
798 this->scheme = HTTPS;
799 return url+8;
800 }
801 if(!strncasecmp(this->url, "ftp:", 4)){
802 this->scheme = FTP;
803 return url+6;
804 }
805 this->scheme = UNSUPPORTED;
806 return url;
807 }
808
809 /**
810 * set the username
811 * <scheme>://<USERNAME>:<password>@<hostname>:<port>/<path>;<params>?<query>#<frag>
812 */
813 private char *
__url_set_username(URL this,char * str)814 __url_set_username(URL this, char *str)
815 {
816 int i;
817 char *a;
818 char *s;
819
820 a = strchr(str, '@');
821 s = strchr(str, '/');
822
823 if((!a) || (s && (a >= s))){
824 return str;
825 }
826
827 for(i = 0; str[i] && str[i] != ':' && str[i] != '@' && str[i] != '/'; i++);
828
829 if(str[i] != '@' && str[i] != ':'){
830 return str;
831 }
832
833 this->username = malloc(i+1);
834 memcpy(this->username, str, i + 1);
835 this->username[i] = '\0';
836 str += i + 1;
837
838 return str;
839 }
840
841 /**
842 * set the password
843 * <scheme>://<username>:<PASSWORD>@<hostname>:<port>/<path>;<params>?<query>#<frag>
844 */
845 private char *
__url_set_password(URL this,char * str)846 __url_set_password(URL this, char *str)
847 {
848 int i;
849 char *a;
850 char *s;
851 a = strchr(str, '@');
852 s = strchr(str, '/');
853
854 if((!a) || (s && (a >= s)) ){
855 return str;
856 }
857 /**
858 * XXX: as the original author (Zachary Beane <xach@xach.com>) notes:
859 * this code breaks if user has an '@' or a '/' in their password.
860 */
861 for(i = 0 ; str[i] != '@'; i++);
862 this->password = xmalloc(i+1);
863
864 memcpy(this->password, str, i);
865 this->password[i] = '\0';
866
867 str += i + 1;
868
869 return str;
870 }
871
872 /**
873 * set the hostname
874 * <scheme>://<username>:<password>@<HOSTNAME>:<port>/<path>;<params>?<query>#<frag>
875 */
876 private char *
__url_set_hostname(URL this,char * str)877 __url_set_hostname(URL this, char *str)
878 {
879 int i;
880 int n;
881 int len;
882
883 if (startswith("//", str)) {
884 n = 2;
885 len = strlen(str);
886 memmove(str, str+n, len - n + 1);
887 }
888
889 /**
890 * Check for IPv6 address. The convention here is to use square brackets
891 * around the IPv6 address in order to have a clear delimitation between
892 * address and port
893 */
894 if (startswith("[", str)) {
895 /* skip to matching square bracket */
896 for (i = 0; str[i] && str[i] != ']'; i++);
897
898 if (str[i] == ']') {
899 i++;
900 }
901 } else {
902 /* skip to end, slash, or port colon */
903 for (i = 0; str[i] && str[i] != '/' && str[i] != '#' && str[i] != ':'; i++);
904 }
905
906 this->hostname = xmalloc(i + 1);
907 memset(this->hostname, '\0', i+1);
908 memcpy(this->hostname, str, i);
909
910 /* if there's a port */
911 if (str[i] == ':') {
912 str += i + 1;
913 } else {
914 str += i;
915 }
916 return str;
917 }
918
919 /**
920 * set the port
921 * <scheme>://<username>:<password>@<hostname>:<PORT>/<path>;<params>?<query>#<frag>
922 */
923 private char *
__url_set_port(URL this,char * str)924 __url_set_port(URL this, char *str)
925 {
926 char *portstr;
927 int i;
928
929 this->port = __url_default_port(this);
930
931 for(i = 0; isdigit(str[i]); i++);
932
933 if(i == 0) return str;
934
935
936 portstr = malloc(i + 1);
937 memcpy(portstr, str, i + 1);
938 portstr[i] = '\0';
939
940 this->port = atoi(portstr);
941 xfree(portstr);
942
943 str += i;
944 return str;
945 }
946
947 /**
948 * set the path
949 * <scheme>://<username>:<password>@<hostname>:<port>/<PATH>;<params>?<query>#<frag>
950 */
951 private char *
__url_set_path(URL this,char * str)952 __url_set_path(URL this, char *str)
953 {
954 int i; // capture the lenght of the path
955 int j; // capture the length of the request (sans frag)
956 char *c;
957
958 if (str != NULL && str[0] == '#') {
959 // WTF'ery. We probably have this: www.joedog.org#haha
960 this->request = xstrdup("/");
961 return str;
962 }
963
964 this->request = xstrdup(str);
965
966 /**
967 * Does the request have a fragment?
968 * Let's whack that annoyance off...
969 */
970 c = (char *)strstr(this->request, "#");
971 if (c) {
972 *c = '\0';
973 }
974
975 for (i = strlen(str); i > 0 && str[i] != '/'; i--);
976 for (j = 0; str[j] && (str[j] != '#' && !isspace(str[j])); j++);
977
978 if (str[i] != '/') {
979 if (this->scheme == FTP) {
980 this->path = "";
981 } else {
982 this->path = xmalloc(2);
983 this->request = xmalloc(2);
984 strncpy(this->path, "/", 2);
985 strncpy(this->request, "/", 2);
986 this->path[1] = '\0';
987 this->request[1] = '\0';
988 }
989 } else {
990 this->path = xmalloc(i+2);
991 memcpy(this->path, str, i+1);
992 this->path[i] = '/';
993 this->path[i + 1] = '\0';
994 if (this->scheme == FTP && this->path[0] == '/') {
995 memmove(this->path, this->path+1, strlen(this->path));
996 }
997 }
998 trim(this->request);
999 str += i + 1;
1000 return str;
1001 }
1002
1003 /**
1004 * set the file
1005 * <scheme>://<username>:<password>@<hostname>:<port>/<FILE>;<params>?<query>#<frag>
1006 */
1007 private char *
__url_set_file(URL this,char * str)1008 __url_set_file(URL this, char *str)
1009 {
1010 int i;
1011
1012 if (str==NULL) return NULL;
1013 if (this->file != NULL && strlen(this->file) > 1) return str;
1014
1015 for(i = 0; str[i] && (str[i] != ';' && str[i] != '?' && !isspace(str[i])); i++);
1016 this->file = xmalloc(i+1);
1017 memset(this->file, '\0', i+1);
1018 memcpy(this->file, str, i);
1019 trim(this->file);
1020
1021 /* if there are params or a query string */
1022 if (str[i] == ';') {
1023 this->hasparams = TRUE;
1024 str += i + 1;
1025 } else if(str[i] == '?') {
1026 str += i + 1;
1027 } else {
1028 str += i;
1029 }
1030 return str;
1031 }
1032
1033 /**
1034 * set the parameters
1035 * <scheme>://<username>:<password>@<hostname>:<port>/<path>;<PARAMS>?<query>#<frag>
1036 */
1037 private char *
__url_set_parameters(URL this,char * str)1038 __url_set_parameters(URL this, char *str)
1039 {
1040 int i;
1041
1042 if (str==NULL) return NULL;
1043 if (this->params != NULL && strlen(this->params) > 1) {
1044 return str;
1045 }
1046
1047 if (this->hasparams == FALSE) {
1048 this->params = "";
1049 return str;
1050 }
1051
1052 for (i = 0; str[i] && (str[i] != '?' && !isspace(str[i])); i++);
1053
1054 this->params = xmalloc(i+1);
1055 memset(this->params, '\0', i+1);
1056 memcpy(this->params, str, i);
1057
1058 /* if there is a query string */
1059 if(str[i] == '?'){
1060 str += i + 1;
1061 } else {
1062 str += i;
1063 }
1064 return str;
1065 }
1066
1067 /**
1068 * set the query
1069 * <scheme>://<username>:<password>@<hostname>:<port>/<path>;<params>?<QUERY>#<frag>
1070 */
1071 private char *
__url_set_query(URL this,char * str)1072 __url_set_query(URL this, char *str)
1073 {
1074 int i;
1075
1076 if (str==NULL) {
1077 this->query = xstrcat("");
1078 return NULL;
1079 }
1080
1081 if (this->query != NULL && strlen(this->query) > 1) return str;
1082
1083 for(i = 0; str[i] && (str[i] != '#' && !isspace(str[i])); i++);
1084
1085 this->query = xmalloc(i+1);
1086 memset(this->query, '\0', i+1);
1087 memcpy(this->query, str, i);
1088
1089 /* if there are params or a query string */
1090 if(str[i] == '#'){
1091 str += i + 1;
1092 } else {
1093 str += i;
1094 }
1095 return str;
1096 }
1097
1098 /**
1099 * set the fragment (not used by siege)
1100 * <scheme>://<username>:<password>@<hostname>:<port>/<path>;<params>?<query>#<FRAG>
1101 */
1102 private char *
__url_set_fragment(URL this,char * str)1103 __url_set_fragment(URL this, char *str)
1104 {
1105 int i;
1106
1107 if (str==NULL) return NULL;
1108 if (this->frag != NULL && strlen(this->frag) > 1) return str;
1109
1110 for(i = 0; str[i] && !isspace(str[i]); i++);
1111
1112 this->frag = xmalloc(i+1);
1113 memcpy(this->frag, str, i);
1114
1115 str += i + 1;
1116 return str;
1117 }
1118
1119 /**
1120 * The following functions provide url encoding. They
1121 * were lifted from wget:
1122 * Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
1123 * Free Software Foundation, Inc.
1124 */
1125 enum {
1126 /* rfc1738 reserved chars, preserved from encoding. */
1127 urlchr_reserved = 1,
1128
1129 /* rfc1738 unsafe chars, plus some more. */
1130 urlchr_unsafe = 2
1131 };
1132
1133 #define urlchr_test(c, mask) (urlchr_table[(unsigned char)(c)] & (mask))
1134 #define URL_RESERVED_CHAR(c) urlchr_test(c, urlchr_reserved)
1135 #define URL_UNSAFE_CHAR(c) urlchr_test(c, urlchr_unsafe)
1136
1137 /* Shorthands for the table: */
1138 #define R urlchr_reserved
1139 #define U urlchr_unsafe
1140 #define RU R|U
1141
1142 static const unsigned char urlchr_table[256] =
1143 {
1144 U, U, U, U, U, U, U, U, /* NUL SOH STX ETX EOT ENQ ACK BEL */
1145 U, U, U, U, U, U, U, U, /* BS HT LF VT FF CR SO SI */
1146 U, U, U, U, U, U, U, U, /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB */
1147 U, U, U, U, U, U, U, U, /* CAN EM SUB ESC FS GS RS US */
1148 U, 0, U, RU, 0, U, R, 0, /* SP ! " # $ % & ' */
1149 0, 0, 0, R, 0, 0, 0, R, /* ( ) * + , - . / */
1150 0, 0, 0, 0, 0, 0, 0, 0, /* 0 1 2 3 4 5 6 7 */
1151 0, 0, RU, R, U, R, U, R, /* 8 9 : ; < = > ? */
1152 RU, 0, 0, 0, 0, 0, 0, 0, /* @ A B C D E F G */
1153 0, 0, 0, 0, 0, 0, 0, 0, /* H I J K L M N O */
1154 0, 0, 0, 0, 0, 0, 0, 0, /* P Q R S T U V W */
1155 0, 0, 0, RU, U, RU, U, 0, /* X Y Z [ \ ] ^ _ */
1156 U, 0, 0, 0, 0, 0, 0, 0, /* ` a b c d e f g */
1157 0, 0, 0, 0, 0, 0, 0, 0, /* h i j k l m n o */
1158 0, 0, 0, 0, 0, 0, 0, 0, /* p q r s t u v w */
1159 0, 0, 0, U, U, U, U, U, /* x y z { | } ~ DEL */
1160
1161 U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
1162 U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
1163 U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
1164 U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
1165
1166 U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
1167 U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
1168 U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
1169 U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
1170 };
1171 #undef R
1172 #undef U
1173 #undef RU
1174
1175 enum copy_method { CM_DECODE, CM_ENCODE, CM_PASSTHROUGH };
1176
1177 /**
1178 * Decide whether to encode, decode, or pass through the char at P.
1179 * This used to be a macro, but it got a little too convoluted.
1180 */
1181 static inline enum copy_method
decide_copy_method(const char * p)1182 decide_copy_method (const char *p)
1183 {
1184 if (*p == '%') {
1185 if (ISXDIGIT (*(p + 1)) && ISXDIGIT (*(p + 2))) {
1186 /**
1187 * %xx sequence: decode it, unless it would decode to an
1188 * unsafe or a reserved char; in that case, leave it as is.
1189 */
1190 char preempt = X2DIGITS_TO_NUM (*(p + 1), *(p + 2));
1191 if (URL_UNSAFE_CHAR (preempt) || URL_RESERVED_CHAR (preempt))
1192 return CM_PASSTHROUGH;
1193 else
1194 return CM_DECODE;
1195 } else {
1196 return CM_ENCODE;
1197 }
1198 }
1199 else if (URL_UNSAFE_CHAR (*p) && !URL_RESERVED_CHAR (*p))
1200 return CM_ENCODE;
1201 else
1202 return CM_PASSTHROUGH;
1203 }
1204
1205 static METHOD
__url_has_method(const char * url)1206 __url_has_method(const char *url)
1207 {
1208 unsigned int i = 0;
1209 const char * r = NULL;
1210 static const char* const methods[] = {
1211 " GET", " HEAD", " POST", " PUT", " TRACE", " DELETE", " OPTIONS", " CONNECT", " PATCH"
1212 };
1213
1214 for (i = 0; i < sizeof(methods) / sizeof(methods[0]); i++) {
1215 r = strstr(url, methods[i]);
1216 if (r != NULL) return i;
1217 }
1218
1219 return NOMETHOD;
1220 }
1221
1222 private char *
__url_escape(const char * s)1223 __url_escape(const char *s)
1224 {
1225 const char *p1;
1226 char *newstr, *p2;
1227 int oldlen, newlen, host_len;
1228 char *path_start, *host_start;
1229
1230 int encode_count = 0;
1231 int decode_count = 0;
1232
1233 /**
1234 * FIXME: we're not going to escape siege method
1235 * URLS, i.e., things with PUT or POST but if the
1236 * path contains spaces they won't be escaped.
1237 */
1238 if (__url_has_method(s)!=NOMETHOD) {
1239 return (char *)s;
1240 }
1241
1242 /* skip directly to path */
1243 host_start = strstr(s, "//");
1244 if (host_start) {
1245 host_start += 2;
1246 } else {
1247 host_start = (char *)s;
1248 }
1249
1250 path_start = strstr(host_start, "/");
1251 if (path_start) {
1252 path_start += 1;
1253 } else { /* there is no path to escape */
1254 return (char *)s;
1255 }
1256
1257 /* First, pass through the string to see if there's anything to do,
1258 and to calculate the new length. */
1259 for (p1 = path_start; *p1; p1++) {
1260 switch (decide_copy_method (p1)) {
1261 case CM_ENCODE:
1262 ++encode_count;
1263 break;
1264 case CM_DECODE:
1265 ++decode_count;
1266 break;
1267 case CM_PASSTHROUGH:
1268 break;
1269 }
1270 }
1271
1272 if (!encode_count && !decode_count)
1273 return (char *)s; /* C const model sucks. */
1274
1275 oldlen = p1 - s;
1276 host_len = path_start - s;
1277 /* Each encoding adds two characters (hex digits), while each
1278 decoding removes two characters. */
1279 newlen = oldlen + 2 * (encode_count - decode_count);
1280 newstr = xmalloc (newlen + 1);
1281
1282 /* copy unmodified to new_str up to path_start */
1283 memcpy(newstr, s, host_len);
1284 p1 = path_start;
1285 p2 = newstr + host_len;
1286
1287 while (*p1) {
1288 switch (decide_copy_method (p1)) {
1289 case CM_ENCODE: {
1290 unsigned char c = *p1++;
1291 *p2++ = '%';
1292 *p2++ = XNUM_TO_DIGIT (c >> 4);
1293 *p2++ = XNUM_TO_DIGIT (c & 0xf);
1294 }
1295 break;
1296 case CM_DECODE:
1297 *p2++ = X2DIGITS_TO_NUM (p1[1], p1[2]);
1298 p1 += 3; /* skip %xx */
1299 break;
1300 case CM_PASSTHROUGH:
1301 *p2++ = *p1++;
1302 }
1303 }
1304 *p2 = '\0';
1305 return newstr;
1306 }
1307
1308
1309 private void
__url_replace(char * url,const char * needle,const char * replacement)1310 __url_replace(char *url, const char *needle, const char *replacement)
1311 {
1312 char buf[4096] = {0};
1313 char *ins = &buf[0];
1314 char *str = NULL;
1315 const char *tmp = url;
1316 size_t nlen = strlen(needle);
1317 size_t rlen = strlen(replacement);
1318
1319 while (1) {
1320 const char *p = strstr(tmp, needle);
1321
1322 if (p == NULL) {
1323 strcpy(ins, tmp);
1324 break;
1325 }
1326
1327 memcpy(ins, tmp, p - tmp);
1328 ins += p - tmp;
1329
1330 memcpy(ins, replacement, rlen);
1331 ins += rlen;
1332 tmp = p + nlen;
1333 }
1334 if (strlen(buf) > strlen(url)){
1335 str = (char *)realloc(url, strlen(buf)+1);
1336 if (str == NULL) {
1337 return;
1338 }
1339 url = str;
1340 memset(url, '\0', strlen(buf)+1);
1341 } else {
1342 memset(url, '\0', strlen(url));
1343 }
1344 strncpy(url, buf, strlen(buf));
1345 }
1346