1 /**
2  * URL Support
3  *
4  * Copyright (C) 2013-2015
5  * Jeffrey Fulmer - <jeff@joedog.org>, et al.
6  * Copyright (C) 1999 by
7  * Jeffrey Fulmer - <jeff@joedog.org>.
8  *
9  * This file is distributed as part of Siege
10  *
11  * This program is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU General Public License as published by
13  * the Free Software Foundation; either version 2 of the License, or
14  * (at your option) any later version.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public License along
22  * with this program; if not, write to the Free Software Foundation, Inc.
23  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24  *--
25  */
26 #ifdef  HAVE_CONFIG_H
27 # include <config.h>
28 #endif/*HAVE_CONFIG_H*/
29 
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <setup.h>
33 #include <url.h>
34 #include <load.h>
35 #include <perl.h>
36 #include <date.h>
37 #include <util.h>
38 #include <memory.h>
39 #include <notify.h>
40 #include <joedog/boolean.h>
41 #include <joedog/defs.h>
42 
43 struct URL_T
44 {
45   int       ID;
46   char *    url;
47   SCHEME    scheme;
48   METHOD    method;
49   char *    username;
50   char *    password;
51   char *    hostname;
52   int       port;
53   char *    path;
54   char *    file;
55   char *    params;
56   BOOLEAN   hasparams;
57   char *    query;
58   char *    frag;
59   char *    request;
60   size_t    postlen;
61   char *    postdata;
62   char *    posttemp;
63   char *    conttype;
64   BOOLEAN   cached;
65   BOOLEAN   redir;
66 };
67 
68 size_t URLSIZE = sizeof(struct URL_T);
69 
70 private void    __url_parse(URL this, char *url);
71 private void    __parse_post_data(URL this, char *datap);
72 private char *  __url_set_absolute(URL this, char *url);
73 private BOOLEAN __url_has_scheme (char *url);
74 private BOOLEAN __url_has_credentials(char *url);
75 private int     __url_default_port(URL this);
76 private char *  __url_set_scheme(URL this, char *url);
77 private char *  __url_set_password(URL this, char *str);
78 private char *  __url_set_username(URL this, char *str);
79 private char *  __url_set_hostname(URL this, char *str);
80 private char *  __url_set_port(URL this, char *str);
81 private char *  __url_set_path(URL this, char *str);
82 private char *  __url_set_file(URL this, char *str);
83 private char *  __url_set_parameters(URL this, char *str);
84 private char *  __url_set_query(URL this, char *str);
85 private char *  __url_set_fragment(URL this, char *str);
86 private char *  __url_escape(const char *s);
87 private METHOD  __url_has_method(const char *url);
88 private void    __url_replace(char *url, const char *needle, const char *replacement);
89 
90 URL
new_url(char * str)91 new_url(char *str)
92 {
93   URL this;
94   this = xmalloc(URLSIZE);
95   this->ID        = 0;
96   this->scheme    = HTTP;
97   this->hasparams = FALSE;
98   this->params    = NULL;
99   this->redir     = FALSE;
100   this->method    = GET;
101   this->username  = NULL;
102   this->password  = NULL;
103   this->hostname  = NULL;
104   this->port      = 80;
105   this->path      = NULL;
106   this->file      = NULL;
107   this->params    = NULL;
108   this->hasparams = FALSE;
109   this->query     = NULL;
110   this->frag      = NULL;
111   this->request   = NULL;
112   this->postlen   = 0;
113   this->postdata  = NULL;
114   this->posttemp  = NULL;
115   this->conttype  = NULL;
116   this->cached    = FALSE;
117   this->redir     = FALSE;
118   __url_parse(this, str);
119   return this;
120 }
121 
122 URL
url_destroy(URL this)123 url_destroy(URL this)
124 {
125   if (this!=NULL) {
126     xfree(this->url);
127     xfree(this->username);
128     xfree(this->password);
129     xfree(this->hostname);
130     if (this->path != NULL && this->path[0] != '\0') {
131       xfree(this->path);
132     }
133     xfree(this->file);
134     xfree(this->query);
135     xfree(this->frag);
136     xfree(this->request);
137     xfree(this->conttype);
138     xfree(this->postdata);
139     xfree(this->posttemp);
140     if (this->hasparams==TRUE) {
141       xfree(this->params);
142     }
143     xfree(this);
144   }
145   return NULL;
146 }
147 
148 /**
149  * URL setters
150  */
151 void
url_set_ID(URL this,int ID)152 url_set_ID(URL this, int ID)
153 {
154   this->ID = ID;
155   return;
156 }
157 
158 /**
159  * This function is largely for RE-setting the scheme
160  */
161 void
url_set_scheme(URL this,SCHEME scheme)162 url_set_scheme(URL this, SCHEME scheme)
163 {
164   char *tmp;
165   char *str;
166   int   n;
167   int   len;
168 
169   this->scheme = scheme;
170   str = strdup(url_get_scheme_name(this));
171 
172   if (this->url != NULL) {
173     tmp = xstrdup(this->url);
174     if (!strncasecmp(tmp, "http:", 5)){
175       n = 7;
176     }
177     if (!strncasecmp(tmp, "https:", 6)){
178       n = 8;
179     }
180     if (!strncasecmp(tmp, "ftp:", 4)){
181       n = 6;
182     }
183     len = strlen(tmp);
184     memmove(tmp, tmp+n, len - n + 1);
185     xfree(this->url);
186     len = strlen(tmp)+strlen(str)+4;
187     this->url = xmalloc(len);
188     memset(this->url, '\0', len);
189     snprintf(this->url, len, "%s://%s", str, tmp);
190     xfree(tmp);
191     xfree(str);
192   }
193   return;
194 }
195 
196 /**
197  * if we don't have a hostname at
198  * construction, we can use this
199  * method to add one...
200  */
201 void
url_set_hostname(URL this,char * hostname)202 url_set_hostname(URL this, char *hostname)
203 {
204   size_t len;
205 
206   if (empty(hostname)) return;
207 
208   xfree(this->hostname);
209   len = strlen(hostname)+1;
210   this->hostname = xmalloc(len);
211   memset(this->hostname, '\0', len);
212   strncpy(this->hostname, hostname, len);
213   return;
214 }
215 
216 void
url_set_redirect(URL this,BOOLEAN redir)217 url_set_redirect(URL this, BOOLEAN redir)
218 {
219   this->redir = redir;
220 }
221 
222 void
url_set_conttype(URL this,char * type)223 url_set_conttype(URL this, char *type) {
224   this->conttype = xstrdup(type);
225   return;
226 }
227 
228 void
url_set_method(URL this,METHOD method)229 url_set_method(URL this, METHOD method) {
230   this->method = method;
231 }
232 
233 /**
234  * invoked when post data is read from a file.
235  * see load.c
236  */
237 void
url_set_postdata(URL this,char * postdata,size_t postlen)238 url_set_postdata(URL this, char *postdata, size_t postlen)
239 {
240   this->postlen   = postlen;
241   this->postdata = xmalloc(this->postlen+1);
242   memcpy(this->postdata, postdata, this->postlen);
243   this->postdata[this->postlen] = '\0';
244   return;
245 }
246 
247 /**
248  * URL getters
249  */
250 public int
url_get_ID(URL this)251 url_get_ID(URL this)
252 {
253   return this->ID;
254 }
255 
256 public char *
url_get_absolute(URL this)257 url_get_absolute(URL this)
258 {
259   return (this == NULL) ? "NULL" : this->url;
260 }
261 
262 public SCHEME
url_get_scheme(URL this)263 url_get_scheme(URL this)
264 {
265   return this->scheme;
266 }
267 
268 public char *
url_get_display(URL this)269 url_get_display(URL this)
270 {
271   if (my.fullurl)
272     return url_get_absolute(this);
273 
274   if (this->method == GET)
275     return url_get_request(this);
276 
277   return url_get_absolute(this);
278 }
279 
280 public char *
url_get_scheme_name(URL this)281 url_get_scheme_name(URL this)
282 {
283   switch (this->scheme) {
284     case HTTP:
285     return "http";
286     case HTTPS:
287     return "https";
288     case FTP:
289     return "ftp";
290     case PROXY:
291     return "proxy";
292     case UNSUPPORTED:
293     default:
294     return "unsupported";
295   }
296   return "unsupported";
297 }
298 
299 public char *
url_get_username(URL this)300 url_get_username(URL this)
301 {
302   return this->username;
303 }
304 
305 public char *
url_get_password(URL this)306 url_get_password(URL this)
307 {
308   return this->password;
309 }
310 
311 public char *
url_get_hostname(URL this)312 url_get_hostname(URL this)
313 {
314   return this->hostname;
315 }
316 
317 public int
url_get_port(URL this)318 url_get_port(URL this)
319 {
320   return this->port;
321 }
322 
323 public char *
url_get_path(URL this)324 url_get_path(URL this)
325 {
326   return this->path;
327 }
328 
329 public char *
url_get_file(URL this)330 url_get_file(URL this)
331 {
332   return this->file;
333 }
334 
335 public char *
url_get_request(URL this)336 url_get_request(URL this)
337 {
338   return this->request;
339 }
340 
341 public char *
url_get_parameters(URL this)342 url_get_parameters(URL this)
343 {
344   return this->params;
345 }
346 
347 public char *
url_get_query(URL this)348 url_get_query(URL this)
349 {
350   return this->query;
351 }
352 
353 public char *
url_get_fragment(URL this)354 url_get_fragment(URL this)
355 {
356   return this->frag;
357 }
358 
359 public size_t
url_get_postlen(URL this)360 url_get_postlen(URL this) {
361   return this->postlen;
362 }
363 
364 public char *
url_get_postdata(URL this)365 url_get_postdata(URL this) {
366   return this->postdata;
367 }
368 
369 public char *
url_get_posttemp(URL this)370 url_get_posttemp(URL this) {
371   return this->posttemp;
372 }
373 
374 public char *
url_get_conttype(URL this)375 url_get_conttype(URL this) {
376 
377   if (this->conttype == NULL) {
378     if (! empty(my.conttype)) {
379       this->conttype = xstrdup(my.conttype);
380     } else {
381       this->conttype = xstrdup("application/x-www-form-urlencoded");
382     }
383   }
384   return this->conttype;
385 }
386 
387 public METHOD
url_get_method(URL this)388 url_get_method(URL this) {
389   return this->method;
390 }
391 
392 public char *
url_get_method_name(URL this)393 url_get_method_name(URL this) {
394   switch (this->method){
395     case POST:
396       return "POST";
397     case PATCH:
398       return "PATCH";
399     case PUT:
400       return "PUT";
401     case DELETE:
402       return "DELETE";
403     case OPTIONS:
404       return "OPTIONS";
405     case HEAD:
406      return "HEAD";
407     case GET:
408     default:
409       return "GET";
410   }
411   return "GET";
412 }
413 
414 BOOLEAN
url_is_redirect(URL this)415 url_is_redirect(URL this)
416 {
417   return this->redir;
418 }
419 
420 void
url_set_username(URL this,char * username)421 url_set_username(URL this, char *username)
422 {
423   size_t len = strlen(username);
424 
425   this->username = malloc(len+1);
426   memset(this->username, '\0', len+1);
427   memcpy(this->username, username, len);
428   return;
429 }
430 
431 void
url_set_password(URL this,char * password)432 url_set_password(URL this, char *password)
433 {
434   size_t len = strlen(password);
435 
436   this->password = malloc(len+1);
437   memset(this->password, '\0', len+1);
438   memcpy(this->password, password, len);
439   return;
440 }
441 
442 void
url_dump(URL this)443 url_dump(URL this)
444 {
445   printf("URL ID:    %d\n", this->ID);
446   printf("Abolute:   %s\n", this->url);
447   printf("Scheme:    %s\n", url_get_scheme_name(this));
448   printf("Method:    %s\n", url_get_method_name(this));
449   printf("Username:  %s\n", url_get_username(this));
450   printf("Password:  %s\n", url_get_password(this));
451   printf("Hostname:  %s\n", url_get_hostname(this));
452   printf("Port:      %d\n", url_get_port(this));
453   printf("Path:      %s\n", url_get_path(this));
454   printf("File:      %s\n", url_get_file(this));
455   printf("Request:   %s\n", url_get_request(this));
456   if (this->hasparams==TRUE)
457     printf("Params:   %s\n", url_get_parameters(this));
458   printf("Query:     %s\n", url_get_query(this));
459   printf("Fragment:  %s\n", url_get_fragment(this));
460   printf("Post Len:  %d\n", (int)url_get_postlen(this));
461   printf("Post Data: %s\n", url_get_postdata(this));
462   printf("Cont Type: %s\n", url_get_conttype(this));
463   return;
464 }
465 
466 URL
url_normalize(URL req,char * location)467 url_normalize(URL req, char *location)
468 {
469   URL    ret;
470   char * url;
471   size_t len;
472 
473   /**
474    * Should we just do this for all URLs
475    * or just the ones we parse??
476    */
477   __url_replace(location, "&amp;",  "&");
478   __url_replace(location, "&#038;", "&");
479 
480   len = strlen(url_get_absolute(req)) + strlen(location) + 32;
481 
482   if (stristr(location, "data:image/gif")) {
483     // stupid CSS tricks
484     return NULL;
485   }
486 
487   if (stristr(location, "://")) {
488     // it's very likely normalized
489     ret = new_url(location);
490 
491     // but we better test it...
492     if (strlen(url_get_hostname(ret)) > 1) {
493       return ret;
494     }
495   }
496 
497   if ((location[0] != '/') && location[0] != '.' && (strchr(location, '.') != NULL && strchr(location, '/') != NULL)) {
498     /**
499      * This is probably host/path; it doesn't start with relevent path
500      * indicators and it contains the hallmarks of host/path namely at
501      * least one dot and slash
502      */
503     ret = new_url(location);
504     url_set_scheme(ret, url_get_scheme(req));
505     // so we better test it...
506     if (strchr(url_get_hostname(ret), '.') != NULL) {
507       return ret;
508     }
509   }
510 
511   if (strstr(location, "localhost") != NULL) {
512     ret = new_url(location);
513     url_set_scheme(ret, url_get_scheme(req));
514     if (strlen(url_get_hostname(ret)) == 9) {
515       // we found and correctly parsed localhost
516       return ret;
517     }
518   }
519 
520   /**
521    * If we got this far we better construct it...
522    */
523   url = (char*)malloc(len);
524   memset(url, '\0', len);
525 
526   if (location[0] == '/') {
527     if (strlen(location) > 1 && location[1] == '/') {
528       /* starts with // so we should use base protocol */
529       snprintf(url, len, "%s:%s", url_get_scheme_name(req), location);
530     } else {
531       snprintf(url, len, "%s://%s:%d%s", url_get_scheme_name(req), url_get_hostname(req), url_get_port(req), location);
532     }
533   } else {
534     if (endswith("/", url_get_path(req)) == TRUE) {
535       char *tmp;
536       /**
537        * We're dealing with a req that ends in / and a relative
538        * URL that starts with ./ We want to increment two places
539        * to avoid this path:  /haha/./mama.jpg
540        */
541       if (location[0] == '.' && strlen(location) > 1) {
542         tmp = location+2;
543       } else {
544         tmp = location;
545       }
546       snprintf (  // if the path ends with / we won't need one in the format
547         url, len,
548        "%s://%s:%d%s%s",
549        url_get_scheme_name(req), url_get_hostname(req), url_get_port(req), url_get_path(req), tmp
550       );
551     } else {
552       snprintf (  // need to add a slash to separate base path from parsed path/file
553         url, len,
554         "%s://%s:%d%s/%s",
555         url_get_scheme_name(req), url_get_hostname(req), url_get_port(req), url_get_path(req), location
556       );
557     }
558   }
559   ret = new_url(url);
560   url_set_scheme(ret, url_get_scheme(req));
561   free(url);
562   return ret;
563 }
564 
565 char *
url_normalize_string(URL req,char * location)566 url_normalize_string(URL req, char *location)
567 {
568   char *t;
569   URL   u;
570 
571   u = url_normalize(req, location);
572   t = strdup(url_get_absolute(u));
573   u = url_destroy(u);
574   return t;
575 }
576 
577 private void
__url_parse(URL this,char * url)578 __url_parse(URL this, char *url)
579 {
580   char   *ptr = NULL;
581   char   *esc = NULL;
582   char   *post;
583 
584   /**
585    * URL escaping is in its infancy so we're
586    * going to make it a configurable option.
587    * see: url-escaping in siegerc.
588    */
589   esc = __url_escape(url);
590   if (my.escape) {
591     ptr = __url_set_absolute(this, esc);
592   } else {
593     ptr = __url_set_absolute(this, url);
594   }
595 
596   ptr = __url_set_scheme(this, ptr);
597 
598   post = strstr(this->url, " POST");
599 
600   if (! post) {
601     post = strstr(this->url, " PUT");
602   }
603 
604   if (! post) {
605     post = strstr(this->url, " PATCH");
606   }
607 
608   if (! post) {
609     post = strstr(this->url, " OPTIONS");
610   }
611 
612   if (! post) {
613     post = strstr(this->url, " DELETE");
614   }
615 
616   if (post != NULL){
617     if (!strncasecmp(post," PUT", 4)) {
618       this->method = PUT;
619       *post = '\0';
620       post += 4;
621     } else if (!strncasecmp(post," POST", 5)) {
622       this->method = POST;
623       *post = '\0';
624       post += 5;
625     } else if (!strncasecmp(post," DELETE", 7)) {
626       this->method = DELETE;
627       *post = '\0';
628       post += 7;
629     } else if (!strncasecmp(post," OPTIONS", 8)) {
630       this->method = OPTIONS;
631       *post = '\0';
632       post += 8;
633     } else {
634       this->method = PATCH;
635       *post = '\0';
636       post += 6;
637     }
638     __parse_post_data(this, post);
639   } else {
640     this->method = GET;
641     this->postdata   = NULL;
642     this->posttemp   = NULL;
643     this->postlen    = 0;
644   }
645 
646   if (__url_has_credentials(ptr)) {
647     ptr = __url_set_username(this, ptr);
648     ptr = __url_set_password(this, ptr);
649   }
650 
651   ptr = __url_set_hostname(this, ptr);
652   ptr = __url_set_port(this, ptr);
653   ptr = __url_set_path(this, ptr);
654   ptr = __url_set_file(this, ptr);
655   ptr = __url_set_parameters(this, ptr);
656   ptr = __url_set_query(this, ptr);
657   ptr = __url_set_fragment(this, ptr);
658   return;
659 }
660 
661 private void
__parse_post_data(URL this,char * datap)662 __parse_post_data(URL this, char *datap)
663 {
664   for (; isspace((unsigned int)*datap); datap++) {
665     /* Advance past white space */
666   }
667   if (*datap == '<') {
668     datap++;
669     load_file(this, datap);
670     datap = __url_set_path(this, datap);
671     datap = __url_set_file(this, datap);
672     return;
673   } else {
674     this->postdata = xstrdup(datap);
675     this->postlen  = strlen(this->postdata);
676     if (! empty(my.conttype)) {
677       this->conttype = xstrdup(my.conttype);
678     } else {
679       this->conttype = xstrdup("application/x-www-form-urlencoded");
680     }
681     return;
682   }
683 
684   return;
685 }
686 
687 /**
688  * assign the full url to this->url
689  */
690 private char *
__url_set_absolute(URL this,char * url)691 __url_set_absolute(URL this, char *url)
692 {
693   int    n;
694   size_t len;
695   char   *slash;
696   char   scheme[16];
697 
698   if (empty(url)) return NULL;
699 
700   memset(scheme, '\0', 16);
701 
702   if (!strncasecmp(url, "http:", 5)){
703     n = 7;
704     strncpy(scheme, "http", 4);
705   }
706   if (!strncasecmp(url, "https:", 6)){
707     n = 8;
708     strncpy(scheme, "https", 5);
709   }
710   if (!strncasecmp(url, "ftp:", 4)){
711     n = 6;
712     strncpy(scheme, "ftp", 3);
713   }
714   if (strlen(scheme) < 3) {
715     // A scheme wasn't supplied; we'll use http by default.
716     n = 7;
717     strncpy(scheme, "http", 4);
718   }
719 
720   len = strlen(url)+5;
721   if (!__url_has_scheme(url)) {
722     this->url = xmalloc(len+n);
723     memset(this->url, '\0', len+n);
724     slash = strstr(url, "/");
725     if (slash) {
726       snprintf(this->url, len+n, "%s://%s", scheme, url);
727     } else {
728       snprintf(this->url, len+n, "%s://%s/", scheme, url);
729     }
730   } else {
731     this->url = xmalloc(len);
732     memset(this->url, '\0', len);
733     snprintf(this->url, len, "%s", url);
734   }
735   return this->url;
736 }
737 
738 #define SCHEME_CHAR(ch) (isalnum (ch) || (ch) == '-' || (ch) == '+')
739 /**
740  * stolen from wget:url.c
741  */
742 private BOOLEAN
__url_has_scheme(char * url)743 __url_has_scheme (char *url)
744 {
745   const char *p = url;
746 
747   /* The first char must be a scheme char. */
748   if (!*p || !SCHEME_CHAR (*p))
749     return FALSE;
750   ++p;
751   /* Followed by 0 or more scheme chars. */
752   while (*p && SCHEME_CHAR (*p))
753     ++p;
754   /* Terminated by ':'. */
755   return *p == ':';
756 }
757 
758 private BOOLEAN
__url_has_credentials(char * url)759 __url_has_credentials(char *url)
760 {
761   /**
762    * if there's an @ before /?#; then we have creds
763    */
764   const char *p = (const char *)strpbrk (url, "@/?#;");
765   if (!p || *p != '@')
766     return FALSE;
767   return TRUE;
768 }
769 
770 private int
__url_default_port(URL this)771 __url_default_port(URL this)
772 {
773   switch(this->scheme){
774     case FTP:
775      return 21;
776     case HTTP:
777       return 80;
778     case HTTPS:
779       return 443;
780     case UNSUPPORTED:
781     default:
782       return 80;
783   }
784 }
785 
786 /**
787  * set the scheme, i.e., http/https
788  * <SCHEME>://<username>:<password>@<hostname>:<port>/<path>;<params>?<query>#<frag>
789  */
790 private char *
__url_set_scheme(URL this,char * url)791 __url_set_scheme(URL this, char *url)
792 {
793   if(!strncasecmp(this->url, "http:", 5)){
794     this->scheme = HTTP;
795     return url+7;
796   }
797   if(!strncasecmp(this->url, "https:", 6)){
798     this->scheme = HTTPS;
799     return url+8;
800   }
801   if(!strncasecmp(this->url, "ftp:", 4)){
802     this->scheme = FTP;
803     return url+6;
804   }
805   this->scheme = UNSUPPORTED;
806   return url;
807 }
808 
809 /**
810  * set the username
811  * <scheme>://<USERNAME>:<password>@<hostname>:<port>/<path>;<params>?<query>#<frag>
812  */
813 private char *
__url_set_username(URL this,char * str)814 __url_set_username(URL this, char *str)
815 {
816   int i;
817   char *a;
818   char *s;
819 
820   a = strchr(str, '@');
821   s = strchr(str, '/');
822 
823   if((!a) || (s && (a >= s))){
824     return str;
825   }
826 
827   for(i = 0; str[i] && str[i] != ':' && str[i] != '@' && str[i] != '/'; i++);
828 
829   if(str[i] != '@' && str[i] != ':'){
830     return str;
831   }
832 
833   this->username = malloc(i+1);
834   memcpy(this->username, str, i + 1);
835   this->username[i] = '\0';
836   str += i + 1;
837 
838   return str;
839 }
840 
841 /**
842  * set the password
843  * <scheme>://<username>:<PASSWORD>@<hostname>:<port>/<path>;<params>?<query>#<frag>
844  */
845 private char *
__url_set_password(URL this,char * str)846 __url_set_password(URL this, char *str)
847 {
848   int i;
849   char *a;
850   char *s;
851   a = strchr(str, '@');
852   s = strchr(str, '/');
853 
854   if((!a) || (s && (a >= s)) ){
855     return str;
856   }
857   /**
858    * XXX: as the original author (Zachary Beane <xach@xach.com>) notes:
859    * this code breaks if user has an '@' or a '/' in their password.
860    */
861   for(i = 0 ; str[i] != '@'; i++);
862   this->password = xmalloc(i+1);
863 
864   memcpy(this->password, str, i);
865   this->password[i] = '\0';
866 
867   str += i + 1;
868 
869   return str;
870 }
871 
872 /**
873  * set the hostname
874  * <scheme>://<username>:<password>@<HOSTNAME>:<port>/<path>;<params>?<query>#<frag>
875  */
876 private char *
__url_set_hostname(URL this,char * str)877 __url_set_hostname(URL this, char *str)
878 {
879   int i;
880   int n;
881   int len;
882 
883   if (startswith("//", str)) {
884     n   = 2;
885     len = strlen(str);
886     memmove(str, str+n, len - n + 1);
887   }
888 
889   /**
890    * Check for IPv6 address. The convention here is to use square brackets
891    * around the IPv6 address in order to have a clear delimitation between
892    * address and port
893    */
894   if (startswith("[", str)) {
895     /* skip to matching square bracket */
896     for (i = 0; str[i] && str[i] != ']'; i++);
897 
898     if (str[i] == ']') {
899       i++;
900     }
901   } else {
902     /* skip to end, slash, or port colon */
903     for (i = 0; str[i] && str[i] != '/' && str[i] != '#' && str[i] != ':'; i++);
904   }
905 
906   this->hostname = xmalloc(i + 1);
907   memset(this->hostname, '\0', i+1);
908   memcpy(this->hostname, str, i);
909 
910   /* if there's a port */
911   if (str[i] == ':') {
912     str += i + 1;
913   } else {
914     str += i;
915   }
916   return str;
917 }
918 
919 /**
920  * set the port
921  * <scheme>://<username>:<password>@<hostname>:<PORT>/<path>;<params>?<query>#<frag>
922  */
923 private char *
__url_set_port(URL this,char * str)924 __url_set_port(URL this, char *str)
925 {
926   char *portstr;
927   int i;
928 
929   this->port = __url_default_port(this);
930 
931    for(i = 0; isdigit(str[i]); i++);
932 
933    if(i == 0) return str;
934 
935 
936    portstr = malloc(i + 1);
937    memcpy(portstr, str, i + 1);
938    portstr[i] = '\0';
939 
940    this->port = atoi(portstr);
941    xfree(portstr);
942 
943    str += i;
944    return str;
945 }
946 
947 /**
948  * set the path
949  * <scheme>://<username>:<password>@<hostname>:<port>/<PATH>;<params>?<query>#<frag>
950  */
951 private char *
__url_set_path(URL this,char * str)952 __url_set_path(URL this, char *str)
953 {
954   int   i;    // capture the lenght of the path
955   int   j;    // capture the length of the request (sans frag)
956   char *c;
957 
958   if (str != NULL && str[0] == '#') {
959     // WTF'ery. We probably have this: www.joedog.org#haha
960     this->request = xstrdup("/");
961     return str;
962   }
963 
964   this->request = xstrdup(str);
965 
966   /**
967    * Does the request have a fragment?
968    * Let's whack that annoyance off...
969    */
970   c = (char *)strstr(this->request, "#");
971   if (c) {
972    *c = '\0';
973   }
974 
975   for (i = strlen(str); i > 0 && str[i] != '/'; i--);
976   for (j = 0; str[j] && (str[j] != '#' && !isspace(str[j])); j++);
977 
978   if (str[i] != '/') {
979     if (this->scheme == FTP) {
980       this->path    = "";
981     } else {
982       this->path    = xmalloc(2);
983       this->request = xmalloc(2);
984       strncpy(this->path,    "/", 2);
985       strncpy(this->request, "/", 2);
986       this->path[1]    = '\0';
987       this->request[1] = '\0';
988     }
989   } else {
990     this->path    = xmalloc(i+2);
991     memcpy(this->path, str, i+1);
992     this->path[i] = '/';
993     this->path[i + 1]    = '\0';
994     if (this->scheme == FTP && this->path[0] == '/') {
995       memmove(this->path, this->path+1, strlen(this->path));
996     }
997   }
998   trim(this->request);
999   str += i + 1;
1000   return str;
1001 }
1002 
1003 /**
1004  * set the file
1005  * <scheme>://<username>:<password>@<hostname>:<port>/<FILE>;<params>?<query>#<frag>
1006  */
1007 private char *
__url_set_file(URL this,char * str)1008 __url_set_file(URL this, char *str)
1009 {
1010   int   i;
1011 
1012   if (str==NULL) return NULL;
1013   if (this->file != NULL && strlen(this->file) > 1) return str;
1014 
1015   for(i = 0; str[i] && (str[i] != ';' && str[i] != '?' && !isspace(str[i])); i++);
1016   this->file = xmalloc(i+1);
1017   memset(this->file, '\0', i+1);
1018   memcpy(this->file, str, i);
1019   trim(this->file);
1020 
1021   /* if there are params or a query string */
1022   if (str[i] == ';') {
1023     this->hasparams = TRUE;
1024     str += i + 1;
1025   } else if(str[i] == '?') {
1026     str += i + 1;
1027   } else {
1028     str += i;
1029   }
1030   return str;
1031 }
1032 
1033 /**
1034  * set the parameters
1035  * <scheme>://<username>:<password>@<hostname>:<port>/<path>;<PARAMS>?<query>#<frag>
1036  */
1037 private char *
__url_set_parameters(URL this,char * str)1038 __url_set_parameters(URL this, char *str)
1039 {
1040   int i;
1041 
1042   if (str==NULL) return NULL;
1043   if (this->params != NULL && strlen(this->params) > 1) {
1044     return str;
1045   }
1046 
1047   if (this->hasparams == FALSE) {
1048     this->params = "";
1049     return str;
1050   }
1051 
1052   for (i = 0; str[i] && (str[i] != '?' && !isspace(str[i])); i++);
1053 
1054   this->params = xmalloc(i+1);
1055   memset(this->params, '\0', i+1);
1056   memcpy(this->params, str, i);
1057 
1058   /* if there is a query string */
1059   if(str[i] == '?'){
1060     str += i + 1;
1061   } else {
1062     str += i;
1063   }
1064   return str;
1065 }
1066 
1067 /**
1068  * set the query
1069  * <scheme>://<username>:<password>@<hostname>:<port>/<path>;<params>?<QUERY>#<frag>
1070  */
1071 private char *
__url_set_query(URL this,char * str)1072 __url_set_query(URL this, char *str)
1073 {
1074   int   i;
1075 
1076   if (str==NULL) {
1077     this->query = xstrcat("");
1078     return NULL;
1079   }
1080 
1081   if (this->query != NULL && strlen(this->query) > 1) return str;
1082 
1083   for(i = 0; str[i] && (str[i] != '#' && !isspace(str[i])); i++);
1084 
1085   this->query = xmalloc(i+1);
1086   memset(this->query, '\0', i+1);
1087   memcpy(this->query, str, i);
1088 
1089   /* if there are params or a query string */
1090   if(str[i] == '#'){
1091     str += i + 1;
1092   } else {
1093     str += i;
1094   }
1095   return str;
1096 }
1097 
1098 /**
1099  * set the fragment (not used by siege)
1100  * <scheme>://<username>:<password>@<hostname>:<port>/<path>;<params>?<query>#<FRAG>
1101  */
1102 private char *
__url_set_fragment(URL this,char * str)1103 __url_set_fragment(URL this, char *str)
1104 {
1105   int   i;
1106 
1107   if (str==NULL) return NULL;
1108   if (this->frag != NULL && strlen(this->frag) > 1) return str;
1109 
1110   for(i = 0; str[i] && !isspace(str[i]); i++);
1111 
1112   this->frag = xmalloc(i+1);
1113   memcpy(this->frag, str, i);
1114 
1115   str += i + 1;
1116   return str;
1117 }
1118 
1119 /**
1120  * The following functions provide url encoding. They
1121  * were lifted from wget:
1122  * Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001
1123  * Free Software Foundation, Inc.
1124  */
1125 enum {
1126   /* rfc1738 reserved chars, preserved from encoding.  */
1127   urlchr_reserved = 1,
1128 
1129   /* rfc1738 unsafe chars, plus some more.  */
1130   urlchr_unsafe   = 2
1131 };
1132 
1133 #define urlchr_test(c, mask) (urlchr_table[(unsigned char)(c)] & (mask))
1134 #define URL_RESERVED_CHAR(c) urlchr_test(c, urlchr_reserved)
1135 #define URL_UNSAFE_CHAR(c) urlchr_test(c, urlchr_unsafe)
1136 
1137 /* Shorthands for the table: */
1138 #define R  urlchr_reserved
1139 #define U  urlchr_unsafe
1140 #define RU R|U
1141 
1142 static const unsigned char urlchr_table[256] =
1143 {
1144   U,  U,  U,  U,   U,  U,  U,  U,   /* NUL SOH STX ETX  EOT ENQ ACK BEL */
1145   U,  U,  U,  U,   U,  U,  U,  U,   /* BS  HT  LF  VT   FF  CR  SO  SI  */
1146   U,  U,  U,  U,   U,  U,  U,  U,   /* DLE DC1 DC2 DC3  DC4 NAK SYN ETB */
1147   U,  U,  U,  U,   U,  U,  U,  U,   /* CAN EM  SUB ESC  FS  GS  RS  US  */
1148   U,  0,  U, RU,   0,  U,  R,  0,   /* SP  !   "   #    $   %   &   '   */
1149   0,  0,  0,  R,   0,  0,  0,  R,   /* (   )   *   +    ,   -   .   /   */
1150   0,  0,  0,  0,   0,  0,  0,  0,   /* 0   1   2   3    4   5   6   7   */
1151   0,  0, RU,  R,   U,  R,  U,  R,   /* 8   9   :   ;    <   =   >   ?   */
1152  RU,  0,  0,  0,   0,  0,  0,  0,   /* @   A   B   C    D   E   F   G   */
1153   0,  0,  0,  0,   0,  0,  0,  0,   /* H   I   J   K    L   M   N   O   */
1154   0,  0,  0,  0,   0,  0,  0,  0,   /* P   Q   R   S    T   U   V   W   */
1155   0,  0,  0, RU,   U, RU,  U,  0,   /* X   Y   Z   [    \   ]   ^   _   */
1156   U,  0,  0,  0,   0,  0,  0,  0,   /* `   a   b   c    d   e   f   g   */
1157   0,  0,  0,  0,   0,  0,  0,  0,   /* h   i   j   k    l   m   n   o   */
1158   0,  0,  0,  0,   0,  0,  0,  0,   /* p   q   r   s    t   u   v   w   */
1159   0,  0,  0,  U,   U,  U,  U,  U,   /* x   y   z   {    |   }   ~   DEL */
1160 
1161   U, U, U, U,  U, U, U, U,  U, U, U, U,  U, U, U, U,
1162   U, U, U, U,  U, U, U, U,  U, U, U, U,  U, U, U, U,
1163   U, U, U, U,  U, U, U, U,  U, U, U, U,  U, U, U, U,
1164   U, U, U, U,  U, U, U, U,  U, U, U, U,  U, U, U, U,
1165 
1166   U, U, U, U,  U, U, U, U,  U, U, U, U,  U, U, U, U,
1167   U, U, U, U,  U, U, U, U,  U, U, U, U,  U, U, U, U,
1168   U, U, U, U,  U, U, U, U,  U, U, U, U,  U, U, U, U,
1169   U, U, U, U,  U, U, U, U,  U, U, U, U,  U, U, U, U,
1170 };
1171 #undef R
1172 #undef U
1173 #undef RU
1174 
1175 enum copy_method { CM_DECODE, CM_ENCODE, CM_PASSTHROUGH };
1176 
1177 /**
1178  * Decide whether to encode, decode, or pass through the char at P.
1179  *  This used to be a macro, but it got a little too convoluted.
1180  */
1181 static inline enum copy_method
decide_copy_method(const char * p)1182 decide_copy_method (const char *p)
1183 {
1184   if (*p == '%') {
1185     if (ISXDIGIT (*(p + 1)) && ISXDIGIT (*(p + 2))) {
1186       /**
1187        * %xx sequence: decode it, unless it would decode to an
1188        * unsafe or a reserved char; in that case, leave it as is.
1189        */
1190       char preempt = X2DIGITS_TO_NUM (*(p + 1), *(p + 2));
1191       if (URL_UNSAFE_CHAR (preempt) || URL_RESERVED_CHAR (preempt))
1192         return CM_PASSTHROUGH;
1193       else
1194         return CM_DECODE;
1195     } else {
1196       return CM_ENCODE;
1197     }
1198   }
1199   else if (URL_UNSAFE_CHAR (*p) && !URL_RESERVED_CHAR (*p))
1200     return CM_ENCODE;
1201   else
1202     return CM_PASSTHROUGH;
1203 }
1204 
1205 static METHOD
__url_has_method(const char * url)1206 __url_has_method(const char *url)
1207 {
1208    unsigned int i = 0;
1209    const char * r = NULL;
1210    static const char* const methods[] = {
1211      " GET", " HEAD", " POST", " PUT", " TRACE", " DELETE", " OPTIONS", " CONNECT", " PATCH"
1212    };
1213 
1214    for (i = 0; i < sizeof(methods) / sizeof(methods[0]); i++) {
1215      r = strstr(url, methods[i]);
1216      if (r != NULL) return i;
1217    }
1218 
1219    return NOMETHOD;
1220 }
1221 
1222 private char *
__url_escape(const char * s)1223 __url_escape(const char *s)
1224 {
1225   const char *p1;
1226   char *newstr, *p2;
1227   int oldlen, newlen, host_len;
1228   char *path_start, *host_start;
1229 
1230   int encode_count = 0;
1231   int decode_count = 0;
1232 
1233   /**
1234    * FIXME: we're not going to escape siege method
1235    * URLS, i.e., things with PUT or POST but if the
1236    * path contains spaces they won't be escaped.
1237    */
1238   if (__url_has_method(s)!=NOMETHOD) {
1239     return (char *)s;
1240   }
1241 
1242   /* skip directly to path */
1243   host_start = strstr(s, "//");
1244   if (host_start) {
1245     host_start += 2;
1246   } else {
1247     host_start = (char *)s;
1248   }
1249 
1250   path_start = strstr(host_start, "/");
1251   if (path_start) {
1252     path_start += 1;
1253   } else { /* there is no path to escape */
1254     return (char *)s;
1255   }
1256 
1257   /* First, pass through the string to see if there's anything to do,
1258      and to calculate the new length.  */
1259   for (p1 = path_start; *p1; p1++) {
1260     switch (decide_copy_method (p1)) {
1261       case CM_ENCODE:
1262         ++encode_count;
1263         break;
1264       case CM_DECODE:
1265         ++decode_count;
1266         break;
1267       case CM_PASSTHROUGH:
1268         break;
1269     }
1270   }
1271 
1272   if (!encode_count && !decode_count)
1273     return (char *)s; /* C const model sucks. */
1274 
1275   oldlen = p1 - s;
1276   host_len = path_start - s;
1277   /* Each encoding adds two characters (hex digits), while each
1278      decoding removes two characters.  */
1279   newlen = oldlen + 2 * (encode_count - decode_count);
1280   newstr = xmalloc (newlen + 1);
1281 
1282   /* copy unmodified to new_str up to path_start */
1283   memcpy(newstr, s, host_len);
1284   p1 = path_start;
1285   p2 = newstr + host_len;
1286 
1287   while (*p1) {
1288     switch (decide_copy_method (p1)) {
1289       case CM_ENCODE: {
1290         unsigned char c = *p1++;
1291         *p2++ = '%';
1292         *p2++ = XNUM_TO_DIGIT (c >> 4);
1293         *p2++ = XNUM_TO_DIGIT (c & 0xf);
1294       }
1295       break;
1296     case CM_DECODE:
1297       *p2++ = X2DIGITS_TO_NUM (p1[1], p1[2]);
1298       p1 += 3;              /* skip %xx */
1299       break;
1300     case CM_PASSTHROUGH:
1301       *p2++ = *p1++;
1302     }
1303   }
1304   *p2 = '\0';
1305   return newstr;
1306 }
1307 
1308 
1309 private void
__url_replace(char * url,const char * needle,const char * replacement)1310 __url_replace(char *url, const char *needle, const char *replacement)
1311 {
1312   char   buf[4096] = {0};
1313   char  *ins       = &buf[0];
1314   char  *str       = NULL;
1315   const char *tmp  = url;
1316   size_t nlen = strlen(needle);
1317   size_t rlen = strlen(replacement);
1318 
1319   while (1) {
1320     const char *p = strstr(tmp, needle);
1321 
1322     if (p == NULL) {
1323       strcpy(ins, tmp);
1324       break;
1325     }
1326 
1327     memcpy(ins, tmp, p - tmp);
1328     ins += p - tmp;
1329 
1330     memcpy(ins, replacement, rlen);
1331     ins += rlen;
1332     tmp = p + nlen;
1333   }
1334   if (strlen(buf) > strlen(url)){
1335     str = (char *)realloc(url, strlen(buf)+1);
1336     if (str == NULL) {
1337       return;
1338     }
1339     url = str;
1340     memset(url, '\0', strlen(buf)+1);
1341   } else {
1342     memset(url, '\0', strlen(url));
1343   }
1344   strncpy(url, buf, strlen(buf));
1345 }
1346