1 /**
2  * uri.c: set of generic URI related routines
3  *
4  * Reference: RFCs 3986, 2732 and 2373
5  *
6  * See Copyright for the status of this software.
7  *
8  * daniel@veillard.com
9  */
10 
11 #define IN_LIBXML
12 #include "libxml.h"
13 
14 #include <limits.h>
15 #include <string.h>
16 
17 #include <libxml/xmlmemory.h>
18 #include <libxml/uri.h>
19 #include <libxml/globals.h>
20 #include <libxml/xmlerror.h>
21 
22 /**
23  * MAX_URI_LENGTH:
24  *
25  * The definition of the URI regexp in the above RFC has no size limit
26  * In practice they are usually relatively short except for the
27  * data URI scheme as defined in RFC 2397. Even for data URI the usual
28  * maximum size before hitting random practical limits is around 64 KB
29  * and 4KB is usually a maximum admitted limit for proper operations.
30  * The value below is more a security limit than anything else and
31  * really should never be hit by 'normal' operations
32  * Set to 1 MByte in 2012, this is only enforced on output
33  */
34 #define MAX_URI_LENGTH 1024 * 1024
35 
36 static void
xmlURIErrMemory(const char * extra)37 xmlURIErrMemory(const char *extra)
38 {
39     if (extra)
40         __xmlRaiseError(NULL, NULL, NULL,
41                         NULL, NULL, XML_FROM_URI,
42                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
43                         extra, NULL, NULL, 0, 0,
44                         "Memory allocation failed : %s\n", extra);
45     else
46         __xmlRaiseError(NULL, NULL, NULL,
47                         NULL, NULL, XML_FROM_URI,
48                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
49                         NULL, NULL, NULL, 0, 0,
50                         "Memory allocation failed\n");
51 }
52 
53 static void xmlCleanURI(xmlURIPtr uri);
54 
55 /*
56  * Old rule from 2396 used in legacy handling code
57  * alpha    = lowalpha | upalpha
58  */
59 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
60 
61 
62 /*
63  * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
64  *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
65  *            "u" | "v" | "w" | "x" | "y" | "z"
66  */
67 
68 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
69 
70 /*
71  * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
72  *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
73  *           "U" | "V" | "W" | "X" | "Y" | "Z"
74  */
75 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
76 
77 #ifdef IS_DIGIT
78 #undef IS_DIGIT
79 #endif
80 /*
81  * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
82  */
83 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
84 
85 /*
86  * alphanum = alpha | digit
87  */
88 
89 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
90 
91 /*
92  * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
93  */
94 
95 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
96     ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
97     ((x) == '(') || ((x) == ')'))
98 
99 /*
100  * unwise = "{" | "}" | "|" | "\" | "^" | "`"
101  */
102 
103 #define IS_UNWISE(p)                                                    \
104       (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
105        ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
106        ((*(p) == ']')) || ((*(p) == '`')))
107 /*
108  * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
109  *            "[" | "]"
110  */
111 
112 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
113         ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
114         ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
115         ((x) == ']'))
116 
117 /*
118  * unreserved = alphanum | mark
119  */
120 
121 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
122 
123 /*
124  * Skip to next pointer char, handle escaped sequences
125  */
126 
127 #define NEXT(p) ((*p == '%')? p += 3 : p++)
128 
129 /*
130  * Productions from the spec.
131  *
132  *    authority     = server | reg_name
133  *    reg_name      = 1*( unreserved | escaped | "$" | "," |
134  *                        ";" | ":" | "@" | "&" | "=" | "+" )
135  *
136  * path          = [ abs_path | opaque_part ]
137  */
138 
139 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
140 
141 /************************************************************************
142  *									*
143  *                         RFC 3986 parser				*
144  *									*
145  ************************************************************************/
146 
147 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
148 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||		\
149                       ((*(p) >= 'A') && (*(p) <= 'Z')))
150 #define ISA_HEXDIG(p)							\
151        (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||		\
152         ((*(p) >= 'A') && (*(p) <= 'F')))
153 
154 /*
155  *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
156  *                     / "*" / "+" / "," / ";" / "="
157  */
158 #define ISA_SUB_DELIM(p)						\
159       (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||		\
160        ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||		\
161        ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||		\
162        ((*(p) == '=')) || ((*(p) == '\'')))
163 
164 /*
165  *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
166  */
167 #define ISA_GEN_DELIM(p)						\
168       (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
169        ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
170        ((*(p) == '@')))
171 
172 /*
173  *    reserved      = gen-delims / sub-delims
174  */
175 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
176 
177 /*
178  *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
179  */
180 #define ISA_UNRESERVED(p)						\
181       ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||		\
182        ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
183 
184 /*
185  *    pct-encoded   = "%" HEXDIG HEXDIG
186  */
187 #define ISA_PCT_ENCODED(p)						\
188      ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
189 
190 /*
191  *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
192  */
193 #define ISA_PCHAR(p)							\
194      (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||	\
195       ((*(p) == ':')) || ((*(p) == '@')))
196 
197 /**
198  * xmlParse3986Scheme:
199  * @uri:  pointer to an URI structure
200  * @str:  pointer to the string to analyze
201  *
202  * Parse an URI scheme
203  *
204  * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
205  *
206  * Returns 0 or the error code
207  */
208 static int
xmlParse3986Scheme(xmlURIPtr uri,const char ** str)209 xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
210     const char *cur;
211 
212     if (str == NULL)
213 	return(-1);
214 
215     cur = *str;
216     if (!ISA_ALPHA(cur))
217 	return(2);
218     cur++;
219     while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
220            (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
221     if (uri != NULL) {
222 	if (uri->scheme != NULL) xmlFree(uri->scheme);
223 	uri->scheme = STRNDUP(*str, cur - *str);
224     }
225     *str = cur;
226     return(0);
227 }
228 
229 /**
230  * xmlParse3986Fragment:
231  * @uri:  pointer to an URI structure
232  * @str:  pointer to the string to analyze
233  *
234  * Parse the query part of an URI
235  *
236  * fragment      = *( pchar / "/" / "?" )
237  * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
238  *       in the fragment identifier but this is used very broadly for
239  *       xpointer scheme selection, so we are allowing it here to not break
240  *       for example all the DocBook processing chains.
241  *
242  * Returns 0 or the error code
243  */
244 static int
xmlParse3986Fragment(xmlURIPtr uri,const char ** str)245 xmlParse3986Fragment(xmlURIPtr uri, const char **str)
246 {
247     const char *cur;
248 
249     if (str == NULL)
250         return (-1);
251 
252     cur = *str;
253 
254     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
255            (*cur == '[') || (*cur == ']') ||
256            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
257         NEXT(cur);
258     if (uri != NULL) {
259         if (uri->fragment != NULL)
260             xmlFree(uri->fragment);
261 	if (uri->cleanup & 2)
262 	    uri->fragment = STRNDUP(*str, cur - *str);
263 	else
264 	    uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
265     }
266     *str = cur;
267     return (0);
268 }
269 
270 /**
271  * xmlParse3986Query:
272  * @uri:  pointer to an URI structure
273  * @str:  pointer to the string to analyze
274  *
275  * Parse the query part of an URI
276  *
277  * query = *uric
278  *
279  * Returns 0 or the error code
280  */
281 static int
xmlParse3986Query(xmlURIPtr uri,const char ** str)282 xmlParse3986Query(xmlURIPtr uri, const char **str)
283 {
284     const char *cur;
285 
286     if (str == NULL)
287         return (-1);
288 
289     cur = *str;
290 
291     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
292            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
293         NEXT(cur);
294     if (uri != NULL) {
295         if (uri->query != NULL)
296             xmlFree(uri->query);
297 	if (uri->cleanup & 2)
298 	    uri->query = STRNDUP(*str, cur - *str);
299 	else
300 	    uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
301 
302 	/* Save the raw bytes of the query as well.
303 	 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
304 	 */
305 	if (uri->query_raw != NULL)
306 	    xmlFree (uri->query_raw);
307 	uri->query_raw = STRNDUP (*str, cur - *str);
308     }
309     *str = cur;
310     return (0);
311 }
312 
313 /**
314  * xmlParse3986Port:
315  * @uri:  pointer to an URI structure
316  * @str:  the string to analyze
317  *
318  * Parse a port part and fills in the appropriate fields
319  * of the @uri structure
320  *
321  * port          = *DIGIT
322  *
323  * Returns 0 or the error code
324  */
325 static int
xmlParse3986Port(xmlURIPtr uri,const char ** str)326 xmlParse3986Port(xmlURIPtr uri, const char **str)
327 {
328     const char *cur = *str;
329     int port = 0;
330 
331     if (ISA_DIGIT(cur)) {
332 	while (ISA_DIGIT(cur)) {
333             int digit = *cur - '0';
334 
335             if (port > INT_MAX / 10)
336                 return(1);
337             port *= 10;
338             if (port > INT_MAX - digit)
339                 return(1);
340 	    port += digit;
341 
342 	    cur++;
343 	}
344 	if (uri != NULL)
345 	    uri->port = port;
346 	*str = cur;
347 	return(0);
348     }
349     return(1);
350 }
351 
352 /**
353  * xmlParse3986Userinfo:
354  * @uri:  pointer to an URI structure
355  * @str:  the string to analyze
356  *
357  * Parse an user information part and fills in the appropriate fields
358  * of the @uri structure
359  *
360  * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
361  *
362  * Returns 0 or the error code
363  */
364 static int
xmlParse3986Userinfo(xmlURIPtr uri,const char ** str)365 xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
366 {
367     const char *cur;
368 
369     cur = *str;
370     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
371            ISA_SUB_DELIM(cur) || (*cur == ':'))
372 	NEXT(cur);
373     if (*cur == '@') {
374 	if (uri != NULL) {
375 	    if (uri->user != NULL) xmlFree(uri->user);
376 	    if (uri->cleanup & 2)
377 		uri->user = STRNDUP(*str, cur - *str);
378 	    else
379 		uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
380 	}
381 	*str = cur;
382 	return(0);
383     }
384     return(1);
385 }
386 
387 /**
388  * xmlParse3986DecOctet:
389  * @str:  the string to analyze
390  *
391  *    dec-octet     = DIGIT                 ; 0-9
392  *                  / %x31-39 DIGIT         ; 10-99
393  *                  / "1" 2DIGIT            ; 100-199
394  *                  / "2" %x30-34 DIGIT     ; 200-249
395  *                  / "25" %x30-35          ; 250-255
396  *
397  * Skip a dec-octet.
398  *
399  * Returns 0 if found and skipped, 1 otherwise
400  */
401 static int
xmlParse3986DecOctet(const char ** str)402 xmlParse3986DecOctet(const char **str) {
403     const char *cur = *str;
404 
405     if (!(ISA_DIGIT(cur)))
406         return(1);
407     if (!ISA_DIGIT(cur+1))
408 	cur++;
409     else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
410 	cur += 2;
411     else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
412 	cur += 3;
413     else if ((*cur == '2') && (*(cur + 1) >= '0') &&
414 	     (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
415 	cur += 3;
416     else if ((*cur == '2') && (*(cur + 1) == '5') &&
417 	     (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
418 	cur += 3;
419     else
420         return(1);
421     *str = cur;
422     return(0);
423 }
424 /**
425  * xmlParse3986Host:
426  * @uri:  pointer to an URI structure
427  * @str:  the string to analyze
428  *
429  * Parse an host part and fills in the appropriate fields
430  * of the @uri structure
431  *
432  * host          = IP-literal / IPv4address / reg-name
433  * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
434  * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
435  * reg-name      = *( unreserved / pct-encoded / sub-delims )
436  *
437  * Returns 0 or the error code
438  */
439 static int
xmlParse3986Host(xmlURIPtr uri,const char ** str)440 xmlParse3986Host(xmlURIPtr uri, const char **str)
441 {
442     const char *cur = *str;
443     const char *host;
444 
445     host = cur;
446     /*
447      * IPv6 and future addressing scheme are enclosed between brackets
448      */
449     if (*cur == '[') {
450         cur++;
451 	while ((*cur != ']') && (*cur != 0))
452 	    cur++;
453 	if (*cur != ']')
454 	    return(1);
455 	cur++;
456 	goto found;
457     }
458     /*
459      * try to parse an IPv4
460      */
461     if (ISA_DIGIT(cur)) {
462         if (xmlParse3986DecOctet(&cur) != 0)
463 	    goto not_ipv4;
464 	if (*cur != '.')
465 	    goto not_ipv4;
466 	cur++;
467         if (xmlParse3986DecOctet(&cur) != 0)
468 	    goto not_ipv4;
469 	if (*cur != '.')
470 	    goto not_ipv4;
471         if (xmlParse3986DecOctet(&cur) != 0)
472 	    goto not_ipv4;
473 	if (*cur != '.')
474 	    goto not_ipv4;
475         if (xmlParse3986DecOctet(&cur) != 0)
476 	    goto not_ipv4;
477 	goto found;
478 not_ipv4:
479         cur = *str;
480     }
481     /*
482      * then this should be a hostname which can be empty
483      */
484     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
485         NEXT(cur);
486 found:
487     if (uri != NULL) {
488 	if (uri->authority != NULL) xmlFree(uri->authority);
489 	uri->authority = NULL;
490 	if (uri->server != NULL) xmlFree(uri->server);
491 	if (cur != host) {
492 	    if (uri->cleanup & 2)
493 		uri->server = STRNDUP(host, cur - host);
494 	    else
495 		uri->server = xmlURIUnescapeString(host, cur - host, NULL);
496 	} else
497 	    uri->server = NULL;
498     }
499     *str = cur;
500     return(0);
501 }
502 
503 /**
504  * xmlParse3986Authority:
505  * @uri:  pointer to an URI structure
506  * @str:  the string to analyze
507  *
508  * Parse an authority part and fills in the appropriate fields
509  * of the @uri structure
510  *
511  * authority     = [ userinfo "@" ] host [ ":" port ]
512  *
513  * Returns 0 or the error code
514  */
515 static int
xmlParse3986Authority(xmlURIPtr uri,const char ** str)516 xmlParse3986Authority(xmlURIPtr uri, const char **str)
517 {
518     const char *cur;
519     int ret;
520 
521     cur = *str;
522     /*
523      * try to parse an userinfo and check for the trailing @
524      */
525     ret = xmlParse3986Userinfo(uri, &cur);
526     if ((ret != 0) || (*cur != '@'))
527         cur = *str;
528     else
529         cur++;
530     ret = xmlParse3986Host(uri, &cur);
531     if (ret != 0) return(ret);
532     if (*cur == ':') {
533         cur++;
534         ret = xmlParse3986Port(uri, &cur);
535 	if (ret != 0) return(ret);
536     }
537     *str = cur;
538     return(0);
539 }
540 
541 /**
542  * xmlParse3986Segment:
543  * @str:  the string to analyze
544  * @forbid: an optional forbidden character
545  * @empty: allow an empty segment
546  *
547  * Parse a segment and fills in the appropriate fields
548  * of the @uri structure
549  *
550  * segment       = *pchar
551  * segment-nz    = 1*pchar
552  * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
553  *               ; non-zero-length segment without any colon ":"
554  *
555  * Returns 0 or the error code
556  */
557 static int
xmlParse3986Segment(const char ** str,char forbid,int empty)558 xmlParse3986Segment(const char **str, char forbid, int empty)
559 {
560     const char *cur;
561 
562     cur = *str;
563     if (!ISA_PCHAR(cur)) {
564         if (empty)
565 	    return(0);
566 	return(1);
567     }
568     while (ISA_PCHAR(cur) && (*cur != forbid))
569         NEXT(cur);
570     *str = cur;
571     return (0);
572 }
573 
574 /**
575  * xmlParse3986PathAbEmpty:
576  * @uri:  pointer to an URI structure
577  * @str:  the string to analyze
578  *
579  * Parse an path absolute or empty and fills in the appropriate fields
580  * of the @uri structure
581  *
582  * path-abempty  = *( "/" segment )
583  *
584  * Returns 0 or the error code
585  */
586 static int
xmlParse3986PathAbEmpty(xmlURIPtr uri,const char ** str)587 xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
588 {
589     const char *cur;
590     int ret;
591 
592     cur = *str;
593 
594     while (*cur == '/') {
595         cur++;
596 	ret = xmlParse3986Segment(&cur, 0, 1);
597 	if (ret != 0) return(ret);
598     }
599     if (uri != NULL) {
600 	if (uri->path != NULL) xmlFree(uri->path);
601         if (*str != cur) {
602             if (uri->cleanup & 2)
603                 uri->path = STRNDUP(*str, cur - *str);
604             else
605                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
606         } else {
607             uri->path = NULL;
608         }
609     }
610     *str = cur;
611     return (0);
612 }
613 
614 /**
615  * xmlParse3986PathAbsolute:
616  * @uri:  pointer to an URI structure
617  * @str:  the string to analyze
618  *
619  * Parse an path absolute and fills in the appropriate fields
620  * of the @uri structure
621  *
622  * path-absolute = "/" [ segment-nz *( "/" segment ) ]
623  *
624  * Returns 0 or the error code
625  */
626 static int
xmlParse3986PathAbsolute(xmlURIPtr uri,const char ** str)627 xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
628 {
629     const char *cur;
630     int ret;
631 
632     cur = *str;
633 
634     if (*cur != '/')
635         return(1);
636     cur++;
637     ret = xmlParse3986Segment(&cur, 0, 0);
638     if (ret == 0) {
639 	while (*cur == '/') {
640 	    cur++;
641 	    ret = xmlParse3986Segment(&cur, 0, 1);
642 	    if (ret != 0) return(ret);
643 	}
644     }
645     if (uri != NULL) {
646 	if (uri->path != NULL) xmlFree(uri->path);
647         if (cur != *str) {
648             if (uri->cleanup & 2)
649                 uri->path = STRNDUP(*str, cur - *str);
650             else
651                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
652         } else {
653             uri->path = NULL;
654         }
655     }
656     *str = cur;
657     return (0);
658 }
659 
660 /**
661  * xmlParse3986PathRootless:
662  * @uri:  pointer to an URI structure
663  * @str:  the string to analyze
664  *
665  * Parse an path without root and fills in the appropriate fields
666  * of the @uri structure
667  *
668  * path-rootless = segment-nz *( "/" segment )
669  *
670  * Returns 0 or the error code
671  */
672 static int
xmlParse3986PathRootless(xmlURIPtr uri,const char ** str)673 xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
674 {
675     const char *cur;
676     int ret;
677 
678     cur = *str;
679 
680     ret = xmlParse3986Segment(&cur, 0, 0);
681     if (ret != 0) return(ret);
682     while (*cur == '/') {
683         cur++;
684 	ret = xmlParse3986Segment(&cur, 0, 1);
685 	if (ret != 0) return(ret);
686     }
687     if (uri != NULL) {
688 	if (uri->path != NULL) xmlFree(uri->path);
689         if (cur != *str) {
690             if (uri->cleanup & 2)
691                 uri->path = STRNDUP(*str, cur - *str);
692             else
693                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
694         } else {
695             uri->path = NULL;
696         }
697     }
698     *str = cur;
699     return (0);
700 }
701 
702 /**
703  * xmlParse3986PathNoScheme:
704  * @uri:  pointer to an URI structure
705  * @str:  the string to analyze
706  *
707  * Parse an path which is not a scheme and fills in the appropriate fields
708  * of the @uri structure
709  *
710  * path-noscheme = segment-nz-nc *( "/" segment )
711  *
712  * Returns 0 or the error code
713  */
714 static int
xmlParse3986PathNoScheme(xmlURIPtr uri,const char ** str)715 xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
716 {
717     const char *cur;
718     int ret;
719 
720     cur = *str;
721 
722     ret = xmlParse3986Segment(&cur, ':', 0);
723     if (ret != 0) return(ret);
724     while (*cur == '/') {
725         cur++;
726 	ret = xmlParse3986Segment(&cur, 0, 1);
727 	if (ret != 0) return(ret);
728     }
729     if (uri != NULL) {
730 	if (uri->path != NULL) xmlFree(uri->path);
731         if (cur != *str) {
732             if (uri->cleanup & 2)
733                 uri->path = STRNDUP(*str, cur - *str);
734             else
735                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
736         } else {
737             uri->path = NULL;
738         }
739     }
740     *str = cur;
741     return (0);
742 }
743 
744 /**
745  * xmlParse3986HierPart:
746  * @uri:  pointer to an URI structure
747  * @str:  the string to analyze
748  *
749  * Parse an hierarchical part and fills in the appropriate fields
750  * of the @uri structure
751  *
752  * hier-part     = "//" authority path-abempty
753  *                / path-absolute
754  *                / path-rootless
755  *                / path-empty
756  *
757  * Returns 0 or the error code
758  */
759 static int
xmlParse3986HierPart(xmlURIPtr uri,const char ** str)760 xmlParse3986HierPart(xmlURIPtr uri, const char **str)
761 {
762     const char *cur;
763     int ret;
764 
765     cur = *str;
766 
767     if ((*cur == '/') && (*(cur + 1) == '/')) {
768         cur += 2;
769 	ret = xmlParse3986Authority(uri, &cur);
770 	if (ret != 0) return(ret);
771 	if (uri->server == NULL)
772 	    uri->port = -1;
773 	ret = xmlParse3986PathAbEmpty(uri, &cur);
774 	if (ret != 0) return(ret);
775 	*str = cur;
776 	return(0);
777     } else if (*cur == '/') {
778         ret = xmlParse3986PathAbsolute(uri, &cur);
779 	if (ret != 0) return(ret);
780     } else if (ISA_PCHAR(cur)) {
781         ret = xmlParse3986PathRootless(uri, &cur);
782 	if (ret != 0) return(ret);
783     } else {
784 	/* path-empty is effectively empty */
785 	if (uri != NULL) {
786 	    if (uri->path != NULL) xmlFree(uri->path);
787 	    uri->path = NULL;
788 	}
789     }
790     *str = cur;
791     return (0);
792 }
793 
794 /**
795  * xmlParse3986RelativeRef:
796  * @uri:  pointer to an URI structure
797  * @str:  the string to analyze
798  *
799  * Parse an URI string and fills in the appropriate fields
800  * of the @uri structure
801  *
802  * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
803  * relative-part = "//" authority path-abempty
804  *               / path-absolute
805  *               / path-noscheme
806  *               / path-empty
807  *
808  * Returns 0 or the error code
809  */
810 static int
xmlParse3986RelativeRef(xmlURIPtr uri,const char * str)811 xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
812     int ret;
813 
814     if ((*str == '/') && (*(str + 1) == '/')) {
815         str += 2;
816 	ret = xmlParse3986Authority(uri, &str);
817 	if (ret != 0) return(ret);
818 	ret = xmlParse3986PathAbEmpty(uri, &str);
819 	if (ret != 0) return(ret);
820     } else if (*str == '/') {
821 	ret = xmlParse3986PathAbsolute(uri, &str);
822 	if (ret != 0) return(ret);
823     } else if (ISA_PCHAR(str)) {
824         ret = xmlParse3986PathNoScheme(uri, &str);
825 	if (ret != 0) return(ret);
826     } else {
827 	/* path-empty is effectively empty */
828 	if (uri != NULL) {
829 	    if (uri->path != NULL) xmlFree(uri->path);
830 	    uri->path = NULL;
831 	}
832     }
833 
834     if (*str == '?') {
835 	str++;
836 	ret = xmlParse3986Query(uri, &str);
837 	if (ret != 0) return(ret);
838     }
839     if (*str == '#') {
840 	str++;
841 	ret = xmlParse3986Fragment(uri, &str);
842 	if (ret != 0) return(ret);
843     }
844     if (*str != 0) {
845 	xmlCleanURI(uri);
846 	return(1);
847     }
848     return(0);
849 }
850 
851 
852 /**
853  * xmlParse3986URI:
854  * @uri:  pointer to an URI structure
855  * @str:  the string to analyze
856  *
857  * Parse an URI string and fills in the appropriate fields
858  * of the @uri structure
859  *
860  * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
861  *
862  * Returns 0 or the error code
863  */
864 static int
xmlParse3986URI(xmlURIPtr uri,const char * str)865 xmlParse3986URI(xmlURIPtr uri, const char *str) {
866     int ret;
867 
868     ret = xmlParse3986Scheme(uri, &str);
869     if (ret != 0) return(ret);
870     if (*str != ':') {
871 	return(1);
872     }
873     str++;
874     ret = xmlParse3986HierPart(uri, &str);
875     if (ret != 0) return(ret);
876     if (*str == '?') {
877 	str++;
878 	ret = xmlParse3986Query(uri, &str);
879 	if (ret != 0) return(ret);
880     }
881     if (*str == '#') {
882 	str++;
883 	ret = xmlParse3986Fragment(uri, &str);
884 	if (ret != 0) return(ret);
885     }
886     if (*str != 0) {
887 	xmlCleanURI(uri);
888 	return(1);
889     }
890     return(0);
891 }
892 
893 /**
894  * xmlParse3986URIReference:
895  * @uri:  pointer to an URI structure
896  * @str:  the string to analyze
897  *
898  * Parse an URI reference string and fills in the appropriate fields
899  * of the @uri structure
900  *
901  * URI-reference = URI / relative-ref
902  *
903  * Returns 0 or the error code
904  */
905 static int
xmlParse3986URIReference(xmlURIPtr uri,const char * str)906 xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
907     int ret;
908 
909     if (str == NULL)
910 	return(-1);
911     xmlCleanURI(uri);
912 
913     /*
914      * Try first to parse absolute refs, then fallback to relative if
915      * it fails.
916      */
917     ret = xmlParse3986URI(uri, str);
918     if (ret != 0) {
919 	xmlCleanURI(uri);
920         ret = xmlParse3986RelativeRef(uri, str);
921 	if (ret != 0) {
922 	    xmlCleanURI(uri);
923 	    return(ret);
924 	}
925     }
926     return(0);
927 }
928 
929 /**
930  * xmlParseURI:
931  * @str:  the URI string to analyze
932  *
933  * Parse an URI based on RFC 3986
934  *
935  * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
936  *
937  * Returns a newly built xmlURIPtr or NULL in case of error
938  */
939 xmlURIPtr
xmlParseURI(const char * str)940 xmlParseURI(const char *str) {
941     xmlURIPtr uri;
942     int ret;
943 
944     if (str == NULL)
945 	return(NULL);
946     uri = xmlCreateURI();
947     if (uri != NULL) {
948 	ret = xmlParse3986URIReference(uri, str);
949         if (ret) {
950 	    xmlFreeURI(uri);
951 	    return(NULL);
952 	}
953     }
954     return(uri);
955 }
956 
957 /**
958  * xmlParseURIReference:
959  * @uri:  pointer to an URI structure
960  * @str:  the string to analyze
961  *
962  * Parse an URI reference string based on RFC 3986 and fills in the
963  * appropriate fields of the @uri structure
964  *
965  * URI-reference = URI / relative-ref
966  *
967  * Returns 0 or the error code
968  */
969 int
xmlParseURIReference(xmlURIPtr uri,const char * str)970 xmlParseURIReference(xmlURIPtr uri, const char *str) {
971     return(xmlParse3986URIReference(uri, str));
972 }
973 
974 /**
975  * xmlParseURIRaw:
976  * @str:  the URI string to analyze
977  * @raw:  if 1 unescaping of URI pieces are disabled
978  *
979  * Parse an URI but allows to keep intact the original fragments.
980  *
981  * URI-reference = URI / relative-ref
982  *
983  * Returns a newly built xmlURIPtr or NULL in case of error
984  */
985 xmlURIPtr
xmlParseURIRaw(const char * str,int raw)986 xmlParseURIRaw(const char *str, int raw) {
987     xmlURIPtr uri;
988     int ret;
989 
990     if (str == NULL)
991 	return(NULL);
992     uri = xmlCreateURI();
993     if (uri != NULL) {
994         if (raw) {
995 	    uri->cleanup |= 2;
996 	}
997 	ret = xmlParseURIReference(uri, str);
998         if (ret) {
999 	    xmlFreeURI(uri);
1000 	    return(NULL);
1001 	}
1002     }
1003     return(uri);
1004 }
1005 
1006 /************************************************************************
1007  *									*
1008  *			Generic URI structure functions			*
1009  *									*
1010  ************************************************************************/
1011 
1012 /**
1013  * xmlCreateURI:
1014  *
1015  * Simply creates an empty xmlURI
1016  *
1017  * Returns the new structure or NULL in case of error
1018  */
1019 xmlURIPtr
xmlCreateURI(void)1020 xmlCreateURI(void) {
1021     xmlURIPtr ret;
1022 
1023     ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1024     if (ret == NULL) {
1025         xmlURIErrMemory("creating URI structure\n");
1026 	return(NULL);
1027     }
1028     memset(ret, 0, sizeof(xmlURI));
1029     return(ret);
1030 }
1031 
1032 /**
1033  * xmlSaveUriRealloc:
1034  *
1035  * Function to handle properly a reallocation when saving an URI
1036  * Also imposes some limit on the length of an URI string output
1037  */
1038 static xmlChar *
xmlSaveUriRealloc(xmlChar * ret,int * max)1039 xmlSaveUriRealloc(xmlChar *ret, int *max) {
1040     xmlChar *temp;
1041     int tmp;
1042 
1043     if (*max > MAX_URI_LENGTH) {
1044         xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1045         return(NULL);
1046     }
1047     tmp = *max * 2;
1048     temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1049     if (temp == NULL) {
1050         xmlURIErrMemory("saving URI\n");
1051         return(NULL);
1052     }
1053     *max = tmp;
1054     return(temp);
1055 }
1056 
1057 /**
1058  * xmlSaveUri:
1059  * @uri:  pointer to an xmlURI
1060  *
1061  * Save the URI as an escaped string
1062  *
1063  * Returns a new string (to be deallocated by caller)
1064  */
1065 xmlChar *
xmlSaveUri(xmlURIPtr uri)1066 xmlSaveUri(xmlURIPtr uri) {
1067     xmlChar *ret = NULL;
1068     xmlChar *temp;
1069     const char *p;
1070     int len;
1071     int max;
1072 
1073     if (uri == NULL) return(NULL);
1074 
1075 
1076     max = 80;
1077     ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
1078     if (ret == NULL) {
1079         xmlURIErrMemory("saving URI\n");
1080 	return(NULL);
1081     }
1082     len = 0;
1083 
1084     if (uri->scheme != NULL) {
1085 	p = uri->scheme;
1086 	while (*p != 0) {
1087 	    if (len >= max) {
1088                 temp = xmlSaveUriRealloc(ret, &max);
1089                 if (temp == NULL) goto mem_error;
1090 		ret = temp;
1091 	    }
1092 	    ret[len++] = *p++;
1093 	}
1094 	if (len >= max) {
1095             temp = xmlSaveUriRealloc(ret, &max);
1096             if (temp == NULL) goto mem_error;
1097             ret = temp;
1098 	}
1099 	ret[len++] = ':';
1100     }
1101     if (uri->opaque != NULL) {
1102 	p = uri->opaque;
1103 	while (*p != 0) {
1104 	    if (len + 3 >= max) {
1105                 temp = xmlSaveUriRealloc(ret, &max);
1106                 if (temp == NULL) goto mem_error;
1107                 ret = temp;
1108 	    }
1109 	    if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1110 		ret[len++] = *p++;
1111 	    else {
1112 		int val = *(unsigned char *)p++;
1113 		int hi = val / 0x10, lo = val % 0x10;
1114 		ret[len++] = '%';
1115 		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1116 		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1117 	    }
1118 	}
1119     } else {
1120 	if ((uri->server != NULL) || (uri->port == -1)) {
1121 	    if (len + 3 >= max) {
1122                 temp = xmlSaveUriRealloc(ret, &max);
1123                 if (temp == NULL) goto mem_error;
1124                 ret = temp;
1125 	    }
1126 	    ret[len++] = '/';
1127 	    ret[len++] = '/';
1128 	    if (uri->user != NULL) {
1129 		p = uri->user;
1130 		while (*p != 0) {
1131 		    if (len + 3 >= max) {
1132                         temp = xmlSaveUriRealloc(ret, &max);
1133                         if (temp == NULL) goto mem_error;
1134                         ret = temp;
1135 		    }
1136 		    if ((IS_UNRESERVED(*(p))) ||
1137 			((*(p) == ';')) || ((*(p) == ':')) ||
1138 			((*(p) == '&')) || ((*(p) == '=')) ||
1139 			((*(p) == '+')) || ((*(p) == '$')) ||
1140 			((*(p) == ',')))
1141 			ret[len++] = *p++;
1142 		    else {
1143 			int val = *(unsigned char *)p++;
1144 			int hi = val / 0x10, lo = val % 0x10;
1145 			ret[len++] = '%';
1146 			ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1147 			ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1148 		    }
1149 		}
1150 		if (len + 3 >= max) {
1151                     temp = xmlSaveUriRealloc(ret, &max);
1152                     if (temp == NULL) goto mem_error;
1153                     ret = temp;
1154 		}
1155 		ret[len++] = '@';
1156 	    }
1157 	    if (uri->server != NULL) {
1158 		p = uri->server;
1159 		while (*p != 0) {
1160 		    if (len >= max) {
1161 			temp = xmlSaveUriRealloc(ret, &max);
1162 			if (temp == NULL) goto mem_error;
1163 			ret = temp;
1164 		    }
1165 		    ret[len++] = *p++;
1166 		}
1167 		if (uri->port > 0) {
1168 		    if (len + 10 >= max) {
1169 			temp = xmlSaveUriRealloc(ret, &max);
1170 			if (temp == NULL) goto mem_error;
1171 			ret = temp;
1172 		    }
1173 		    len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1174 		}
1175 	    }
1176 	} else if (uri->authority != NULL) {
1177 	    if (len + 3 >= max) {
1178                 temp = xmlSaveUriRealloc(ret, &max);
1179                 if (temp == NULL) goto mem_error;
1180                 ret = temp;
1181 	    }
1182 	    ret[len++] = '/';
1183 	    ret[len++] = '/';
1184 	    p = uri->authority;
1185 	    while (*p != 0) {
1186 		if (len + 3 >= max) {
1187                     temp = xmlSaveUriRealloc(ret, &max);
1188                     if (temp == NULL) goto mem_error;
1189                     ret = temp;
1190 		}
1191 		if ((IS_UNRESERVED(*(p))) ||
1192                     ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1193                     ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1194                     ((*(p) == '=')) || ((*(p) == '+')))
1195 		    ret[len++] = *p++;
1196 		else {
1197 		    int val = *(unsigned char *)p++;
1198 		    int hi = val / 0x10, lo = val % 0x10;
1199 		    ret[len++] = '%';
1200 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1201 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1202 		}
1203 	    }
1204 	} else if (uri->scheme != NULL) {
1205 	    if (len + 3 >= max) {
1206                 temp = xmlSaveUriRealloc(ret, &max);
1207                 if (temp == NULL) goto mem_error;
1208                 ret = temp;
1209 	    }
1210 	    ret[len++] = '/';
1211 	    ret[len++] = '/';
1212 	}
1213 	if (uri->path != NULL) {
1214 	    p = uri->path;
1215 	    /*
1216 	     * the colon in file:///d: should not be escaped or
1217 	     * Windows accesses fail later.
1218 	     */
1219 	    if ((uri->scheme != NULL) &&
1220 		(p[0] == '/') &&
1221 		(((p[1] >= 'a') && (p[1] <= 'z')) ||
1222 		 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1223 		(p[2] == ':') &&
1224 	        (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1225 		if (len + 3 >= max) {
1226                     temp = xmlSaveUriRealloc(ret, &max);
1227                     if (temp == NULL) goto mem_error;
1228                     ret = temp;
1229 		}
1230 		ret[len++] = *p++;
1231 		ret[len++] = *p++;
1232 		ret[len++] = *p++;
1233 	    }
1234 	    while (*p != 0) {
1235 		if (len + 3 >= max) {
1236                     temp = xmlSaveUriRealloc(ret, &max);
1237                     if (temp == NULL) goto mem_error;
1238                     ret = temp;
1239 		}
1240 		if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1241                     ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1242 	            ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1243 	            ((*(p) == ',')))
1244 		    ret[len++] = *p++;
1245 		else {
1246 		    int val = *(unsigned char *)p++;
1247 		    int hi = val / 0x10, lo = val % 0x10;
1248 		    ret[len++] = '%';
1249 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1250 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1251 		}
1252 	    }
1253 	}
1254 	if (uri->query_raw != NULL) {
1255 	    if (len + 1 >= max) {
1256                 temp = xmlSaveUriRealloc(ret, &max);
1257                 if (temp == NULL) goto mem_error;
1258                 ret = temp;
1259 	    }
1260 	    ret[len++] = '?';
1261 	    p = uri->query_raw;
1262 	    while (*p != 0) {
1263 		if (len + 1 >= max) {
1264                     temp = xmlSaveUriRealloc(ret, &max);
1265                     if (temp == NULL) goto mem_error;
1266                     ret = temp;
1267 		}
1268 		ret[len++] = *p++;
1269 	    }
1270 	} else if (uri->query != NULL) {
1271 	    if (len + 3 >= max) {
1272                 temp = xmlSaveUriRealloc(ret, &max);
1273                 if (temp == NULL) goto mem_error;
1274                 ret = temp;
1275 	    }
1276 	    ret[len++] = '?';
1277 	    p = uri->query;
1278 	    while (*p != 0) {
1279 		if (len + 3 >= max) {
1280                     temp = xmlSaveUriRealloc(ret, &max);
1281                     if (temp == NULL) goto mem_error;
1282                     ret = temp;
1283 		}
1284 		if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1285 		    ret[len++] = *p++;
1286 		else {
1287 		    int val = *(unsigned char *)p++;
1288 		    int hi = val / 0x10, lo = val % 0x10;
1289 		    ret[len++] = '%';
1290 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1291 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1292 		}
1293 	    }
1294 	}
1295     }
1296     if (uri->fragment != NULL) {
1297 	if (len + 3 >= max) {
1298             temp = xmlSaveUriRealloc(ret, &max);
1299             if (temp == NULL) goto mem_error;
1300             ret = temp;
1301 	}
1302 	ret[len++] = '#';
1303 	p = uri->fragment;
1304 	while (*p != 0) {
1305 	    if (len + 3 >= max) {
1306                 temp = xmlSaveUriRealloc(ret, &max);
1307                 if (temp == NULL) goto mem_error;
1308                 ret = temp;
1309 	    }
1310 	    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1311 		ret[len++] = *p++;
1312 	    else {
1313 		int val = *(unsigned char *)p++;
1314 		int hi = val / 0x10, lo = val % 0x10;
1315 		ret[len++] = '%';
1316 		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1317 		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1318 	    }
1319 	}
1320     }
1321     if (len >= max) {
1322         temp = xmlSaveUriRealloc(ret, &max);
1323         if (temp == NULL) goto mem_error;
1324         ret = temp;
1325     }
1326     ret[len] = 0;
1327     return(ret);
1328 
1329 mem_error:
1330     xmlFree(ret);
1331     return(NULL);
1332 }
1333 
1334 /**
1335  * xmlPrintURI:
1336  * @stream:  a FILE* for the output
1337  * @uri:  pointer to an xmlURI
1338  *
1339  * Prints the URI in the stream @stream.
1340  */
1341 void
xmlPrintURI(FILE * stream,xmlURIPtr uri)1342 xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1343     xmlChar *out;
1344 
1345     out = xmlSaveUri(uri);
1346     if (out != NULL) {
1347 	fprintf(stream, "%s", (char *) out);
1348 	xmlFree(out);
1349     }
1350 }
1351 
1352 /**
1353  * xmlCleanURI:
1354  * @uri:  pointer to an xmlURI
1355  *
1356  * Make sure the xmlURI struct is free of content
1357  */
1358 static void
xmlCleanURI(xmlURIPtr uri)1359 xmlCleanURI(xmlURIPtr uri) {
1360     if (uri == NULL) return;
1361 
1362     if (uri->scheme != NULL) xmlFree(uri->scheme);
1363     uri->scheme = NULL;
1364     if (uri->server != NULL) xmlFree(uri->server);
1365     uri->server = NULL;
1366     if (uri->user != NULL) xmlFree(uri->user);
1367     uri->user = NULL;
1368     if (uri->path != NULL) xmlFree(uri->path);
1369     uri->path = NULL;
1370     if (uri->fragment != NULL) xmlFree(uri->fragment);
1371     uri->fragment = NULL;
1372     if (uri->opaque != NULL) xmlFree(uri->opaque);
1373     uri->opaque = NULL;
1374     if (uri->authority != NULL) xmlFree(uri->authority);
1375     uri->authority = NULL;
1376     if (uri->query != NULL) xmlFree(uri->query);
1377     uri->query = NULL;
1378     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1379     uri->query_raw = NULL;
1380 }
1381 
1382 /**
1383  * xmlFreeURI:
1384  * @uri:  pointer to an xmlURI
1385  *
1386  * Free up the xmlURI struct
1387  */
1388 void
xmlFreeURI(xmlURIPtr uri)1389 xmlFreeURI(xmlURIPtr uri) {
1390     if (uri == NULL) return;
1391 
1392     if (uri->scheme != NULL) xmlFree(uri->scheme);
1393     if (uri->server != NULL) xmlFree(uri->server);
1394     if (uri->user != NULL) xmlFree(uri->user);
1395     if (uri->path != NULL) xmlFree(uri->path);
1396     if (uri->fragment != NULL) xmlFree(uri->fragment);
1397     if (uri->opaque != NULL) xmlFree(uri->opaque);
1398     if (uri->authority != NULL) xmlFree(uri->authority);
1399     if (uri->query != NULL) xmlFree(uri->query);
1400     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1401     xmlFree(uri);
1402 }
1403 
1404 /************************************************************************
1405  *									*
1406  *			Helper functions				*
1407  *									*
1408  ************************************************************************/
1409 
1410 /**
1411  * xmlNormalizeURIPath:
1412  * @path:  pointer to the path string
1413  *
1414  * Applies the 5 normalization steps to a path string--that is, RFC 2396
1415  * Section 5.2, steps 6.c through 6.g.
1416  *
1417  * Normalization occurs directly on the string, no new allocation is done
1418  *
1419  * Returns 0 or an error code
1420  */
1421 int
xmlNormalizeURIPath(char * path)1422 xmlNormalizeURIPath(char *path) {
1423     char *cur, *out;
1424 
1425     if (path == NULL)
1426 	return(-1);
1427 
1428     /* Skip all initial "/" chars.  We want to get to the beginning of the
1429      * first non-empty segment.
1430      */
1431     cur = path;
1432     while (cur[0] == '/')
1433       ++cur;
1434     if (cur[0] == '\0')
1435       return(0);
1436 
1437     /* Keep everything we've seen so far.  */
1438     out = cur;
1439 
1440     /*
1441      * Analyze each segment in sequence for cases (c) and (d).
1442      */
1443     while (cur[0] != '\0') {
1444 	/*
1445 	 * c) All occurrences of "./", where "." is a complete path segment,
1446 	 *    are removed from the buffer string.
1447 	 */
1448 	if ((cur[0] == '.') && (cur[1] == '/')) {
1449 	    cur += 2;
1450 	    /* '//' normalization should be done at this point too */
1451 	    while (cur[0] == '/')
1452 		cur++;
1453 	    continue;
1454 	}
1455 
1456 	/*
1457 	 * d) If the buffer string ends with "." as a complete path segment,
1458 	 *    that "." is removed.
1459 	 */
1460 	if ((cur[0] == '.') && (cur[1] == '\0'))
1461 	    break;
1462 
1463 	/* Otherwise keep the segment.  */
1464 	while (cur[0] != '/') {
1465             if (cur[0] == '\0')
1466               goto done_cd;
1467 	    (out++)[0] = (cur++)[0];
1468 	}
1469 	/* normalize // */
1470 	while ((cur[0] == '/') && (cur[1] == '/'))
1471 	    cur++;
1472 
1473         (out++)[0] = (cur++)[0];
1474     }
1475  done_cd:
1476     out[0] = '\0';
1477 
1478     /* Reset to the beginning of the first segment for the next sequence.  */
1479     cur = path;
1480     while (cur[0] == '/')
1481       ++cur;
1482     if (cur[0] == '\0')
1483 	return(0);
1484 
1485     /*
1486      * Analyze each segment in sequence for cases (e) and (f).
1487      *
1488      * e) All occurrences of "<segment>/../", where <segment> is a
1489      *    complete path segment not equal to "..", are removed from the
1490      *    buffer string.  Removal of these path segments is performed
1491      *    iteratively, removing the leftmost matching pattern on each
1492      *    iteration, until no matching pattern remains.
1493      *
1494      * f) If the buffer string ends with "<segment>/..", where <segment>
1495      *    is a complete path segment not equal to "..", that
1496      *    "<segment>/.." is removed.
1497      *
1498      * To satisfy the "iterative" clause in (e), we need to collapse the
1499      * string every time we find something that needs to be removed.  Thus,
1500      * we don't need to keep two pointers into the string: we only need a
1501      * "current position" pointer.
1502      */
1503     while (1) {
1504         char *segp, *tmp;
1505 
1506         /* At the beginning of each iteration of this loop, "cur" points to
1507          * the first character of the segment we want to examine.
1508          */
1509 
1510         /* Find the end of the current segment.  */
1511         segp = cur;
1512         while ((segp[0] != '/') && (segp[0] != '\0'))
1513           ++segp;
1514 
1515         /* If this is the last segment, we're done (we need at least two
1516          * segments to meet the criteria for the (e) and (f) cases).
1517          */
1518         if (segp[0] == '\0')
1519           break;
1520 
1521         /* If the first segment is "..", or if the next segment _isn't_ "..",
1522          * keep this segment and try the next one.
1523          */
1524         ++segp;
1525         if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1526             || ((segp[0] != '.') || (segp[1] != '.')
1527                 || ((segp[2] != '/') && (segp[2] != '\0')))) {
1528           cur = segp;
1529           continue;
1530         }
1531 
1532         /* If we get here, remove this segment and the next one and back up
1533          * to the previous segment (if there is one), to implement the
1534          * "iteratively" clause.  It's pretty much impossible to back up
1535          * while maintaining two pointers into the buffer, so just compact
1536          * the whole buffer now.
1537          */
1538 
1539         /* If this is the end of the buffer, we're done.  */
1540         if (segp[2] == '\0') {
1541           cur[0] = '\0';
1542           break;
1543         }
1544         /* Valgrind complained, strcpy(cur, segp + 3); */
1545         /* string will overlap, do not use strcpy */
1546         tmp = cur;
1547         segp += 3;
1548         while ((*tmp++ = *segp++) != 0)
1549           ;
1550 
1551         /* If there are no previous segments, then keep going from here.  */
1552         segp = cur;
1553         while ((segp > path) && ((--segp)[0] == '/'))
1554           ;
1555         if (segp == path)
1556           continue;
1557 
1558         /* "segp" is pointing to the end of a previous segment; find it's
1559          * start.  We need to back up to the previous segment and start
1560          * over with that to handle things like "foo/bar/../..".  If we
1561          * don't do this, then on the first pass we'll remove the "bar/..",
1562          * but be pointing at the second ".." so we won't realize we can also
1563          * remove the "foo/..".
1564          */
1565         cur = segp;
1566         while ((cur > path) && (cur[-1] != '/'))
1567           --cur;
1568     }
1569     out[0] = '\0';
1570 
1571     /*
1572      * g) If the resulting buffer string still begins with one or more
1573      *    complete path segments of "..", then the reference is
1574      *    considered to be in error. Implementations may handle this
1575      *    error by retaining these components in the resolved path (i.e.,
1576      *    treating them as part of the final URI), by removing them from
1577      *    the resolved path (i.e., discarding relative levels above the
1578      *    root), or by avoiding traversal of the reference.
1579      *
1580      * We discard them from the final path.
1581      */
1582     if (path[0] == '/') {
1583       cur = path;
1584       while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1585              && ((cur[3] == '/') || (cur[3] == '\0')))
1586 	cur += 3;
1587 
1588       if (cur != path) {
1589 	out = path;
1590 	while (cur[0] != '\0')
1591           (out++)[0] = (cur++)[0];
1592 	out[0] = 0;
1593       }
1594     }
1595 
1596     return(0);
1597 }
1598 
is_hex(char c)1599 static int is_hex(char c) {
1600     if (((c >= '0') && (c <= '9')) ||
1601         ((c >= 'a') && (c <= 'f')) ||
1602         ((c >= 'A') && (c <= 'F')))
1603 	return(1);
1604     return(0);
1605 }
1606 
1607 /**
1608  * xmlURIUnescapeString:
1609  * @str:  the string to unescape
1610  * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
1611  * @target:  optional destination buffer
1612  *
1613  * Unescaping routine, but does not check that the string is an URI. The
1614  * output is a direct unsigned char translation of %XX values (no encoding)
1615  * Note that the length of the result can only be smaller or same size as
1616  * the input string.
1617  *
1618  * Returns a copy of the string, but unescaped, will return NULL only in case
1619  * of error
1620  */
1621 char *
xmlURIUnescapeString(const char * str,int len,char * target)1622 xmlURIUnescapeString(const char *str, int len, char *target) {
1623     char *ret, *out;
1624     const char *in;
1625 
1626     if (str == NULL)
1627 	return(NULL);
1628     if (len <= 0) len = strlen(str);
1629     if (len < 0) return(NULL);
1630 
1631     if (target == NULL) {
1632 	ret = (char *) xmlMallocAtomic(len + 1);
1633 	if (ret == NULL) {
1634             xmlURIErrMemory("unescaping URI value\n");
1635 	    return(NULL);
1636 	}
1637     } else
1638 	ret = target;
1639     in = str;
1640     out = ret;
1641     while(len > 0) {
1642 	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1643 	    in++;
1644 	    if ((*in >= '0') && (*in <= '9'))
1645 	        *out = (*in - '0');
1646 	    else if ((*in >= 'a') && (*in <= 'f'))
1647 	        *out = (*in - 'a') + 10;
1648 	    else if ((*in >= 'A') && (*in <= 'F'))
1649 	        *out = (*in - 'A') + 10;
1650 	    in++;
1651 	    if ((*in >= '0') && (*in <= '9'))
1652 	        *out = *out * 16 + (*in - '0');
1653 	    else if ((*in >= 'a') && (*in <= 'f'))
1654 	        *out = *out * 16 + (*in - 'a') + 10;
1655 	    else if ((*in >= 'A') && (*in <= 'F'))
1656 	        *out = *out * 16 + (*in - 'A') + 10;
1657 	    in++;
1658 	    len -= 3;
1659 	    out++;
1660 	} else {
1661 	    *out++ = *in++;
1662 	    len--;
1663 	}
1664     }
1665     *out = 0;
1666     return(ret);
1667 }
1668 
1669 /**
1670  * xmlURIEscapeStr:
1671  * @str:  string to escape
1672  * @list: exception list string of chars not to escape
1673  *
1674  * This routine escapes a string to hex, ignoring reserved characters (a-z)
1675  * and the characters in the exception list.
1676  *
1677  * Returns a new escaped string or NULL in case of error.
1678  */
1679 xmlChar *
xmlURIEscapeStr(const xmlChar * str,const xmlChar * list)1680 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1681     xmlChar *ret, ch;
1682     xmlChar *temp;
1683     const xmlChar *in;
1684     int len, out;
1685 
1686     if (str == NULL)
1687 	return(NULL);
1688     if (str[0] == 0)
1689 	return(xmlStrdup(str));
1690     len = xmlStrlen(str);
1691     if (!(len > 0)) return(NULL);
1692 
1693     len += 20;
1694     ret = (xmlChar *) xmlMallocAtomic(len);
1695     if (ret == NULL) {
1696         xmlURIErrMemory("escaping URI value\n");
1697 	return(NULL);
1698     }
1699     in = (const xmlChar *) str;
1700     out = 0;
1701     while(*in != 0) {
1702 	if (len - out <= 3) {
1703             temp = xmlSaveUriRealloc(ret, &len);
1704 	    if (temp == NULL) {
1705                 xmlURIErrMemory("escaping URI value\n");
1706 		xmlFree(ret);
1707 		return(NULL);
1708 	    }
1709 	    ret = temp;
1710 	}
1711 
1712 	ch = *in;
1713 
1714 	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1715 	    unsigned char val;
1716 	    ret[out++] = '%';
1717 	    val = ch >> 4;
1718 	    if (val <= 9)
1719 		ret[out++] = '0' + val;
1720 	    else
1721 		ret[out++] = 'A' + val - 0xA;
1722 	    val = ch & 0xF;
1723 	    if (val <= 9)
1724 		ret[out++] = '0' + val;
1725 	    else
1726 		ret[out++] = 'A' + val - 0xA;
1727 	    in++;
1728 	} else {
1729 	    ret[out++] = *in++;
1730 	}
1731 
1732     }
1733     ret[out] = 0;
1734     return(ret);
1735 }
1736 
1737 /**
1738  * xmlURIEscape:
1739  * @str:  the string of the URI to escape
1740  *
1741  * Escaping routine, does not do validity checks !
1742  * It will try to escape the chars needing this, but this is heuristic
1743  * based it's impossible to be sure.
1744  *
1745  * Returns an copy of the string, but escaped
1746  *
1747  * 25 May 2001
1748  * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1749  * according to RFC2396.
1750  *   - Carl Douglas
1751  */
1752 xmlChar *
xmlURIEscape(const xmlChar * str)1753 xmlURIEscape(const xmlChar * str)
1754 {
1755     xmlChar *ret, *segment = NULL;
1756     xmlURIPtr uri;
1757     int ret2;
1758 
1759     if (str == NULL)
1760         return (NULL);
1761 
1762     uri = xmlCreateURI();
1763     if (uri != NULL) {
1764 	/*
1765 	 * Allow escaping errors in the unescaped form
1766 	 */
1767         uri->cleanup = 1;
1768         ret2 = xmlParseURIReference(uri, (const char *)str);
1769         if (ret2) {
1770             xmlFreeURI(uri);
1771             return (NULL);
1772         }
1773     }
1774 
1775     if (!uri)
1776         return NULL;
1777 
1778     ret = NULL;
1779 
1780 #define NULLCHK(p) if(!p) { \
1781          xmlURIErrMemory("escaping URI value\n"); \
1782          xmlFreeURI(uri); \
1783          xmlFree(ret); \
1784          return NULL; } \
1785 
1786     if (uri->scheme) {
1787         segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1788         NULLCHK(segment)
1789         ret = xmlStrcat(ret, segment);
1790         ret = xmlStrcat(ret, BAD_CAST ":");
1791         xmlFree(segment);
1792     }
1793 
1794     if (uri->authority) {
1795         segment =
1796             xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1797         NULLCHK(segment)
1798         ret = xmlStrcat(ret, BAD_CAST "//");
1799         ret = xmlStrcat(ret, segment);
1800         xmlFree(segment);
1801     }
1802 
1803     if (uri->user) {
1804         segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1805         NULLCHK(segment)
1806         ret = xmlStrcat(ret,BAD_CAST "//");
1807         ret = xmlStrcat(ret, segment);
1808         ret = xmlStrcat(ret, BAD_CAST "@");
1809         xmlFree(segment);
1810     }
1811 
1812     if (uri->server) {
1813         segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1814         NULLCHK(segment)
1815         if (uri->user == NULL)
1816             ret = xmlStrcat(ret, BAD_CAST "//");
1817         ret = xmlStrcat(ret, segment);
1818         xmlFree(segment);
1819     }
1820 
1821     if (uri->port) {
1822         xmlChar port[10];
1823 
1824         snprintf((char *) port, 10, "%d", uri->port);
1825         ret = xmlStrcat(ret, BAD_CAST ":");
1826         ret = xmlStrcat(ret, port);
1827     }
1828 
1829     if (uri->path) {
1830         segment =
1831             xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1832         NULLCHK(segment)
1833         ret = xmlStrcat(ret, segment);
1834         xmlFree(segment);
1835     }
1836 
1837     if (uri->query_raw) {
1838         ret = xmlStrcat(ret, BAD_CAST "?");
1839         ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1840     }
1841     else if (uri->query) {
1842         segment =
1843             xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1844         NULLCHK(segment)
1845         ret = xmlStrcat(ret, BAD_CAST "?");
1846         ret = xmlStrcat(ret, segment);
1847         xmlFree(segment);
1848     }
1849 
1850     if (uri->opaque) {
1851         segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1852         NULLCHK(segment)
1853         ret = xmlStrcat(ret, segment);
1854         xmlFree(segment);
1855     }
1856 
1857     if (uri->fragment) {
1858         segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1859         NULLCHK(segment)
1860         ret = xmlStrcat(ret, BAD_CAST "#");
1861         ret = xmlStrcat(ret, segment);
1862         xmlFree(segment);
1863     }
1864 
1865     xmlFreeURI(uri);
1866 #undef NULLCHK
1867 
1868     return (ret);
1869 }
1870 
1871 /************************************************************************
1872  *									*
1873  *			Public functions				*
1874  *									*
1875  ************************************************************************/
1876 
1877 /**
1878  * xmlBuildURI:
1879  * @URI:  the URI instance found in the document
1880  * @base:  the base value
1881  *
1882  * Computes he final URI of the reference done by checking that
1883  * the given URI is valid, and building the final URI using the
1884  * base URI. This is processed according to section 5.2 of the
1885  * RFC 2396
1886  *
1887  * 5.2. Resolving Relative References to Absolute Form
1888  *
1889  * Returns a new URI string (to be freed by the caller) or NULL in case
1890  *         of error.
1891  */
1892 xmlChar *
xmlBuildURI(const xmlChar * URI,const xmlChar * base)1893 xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1894     xmlChar *val = NULL;
1895     int ret, len, indx, cur, out;
1896     xmlURIPtr ref = NULL;
1897     xmlURIPtr bas = NULL;
1898     xmlURIPtr res = NULL;
1899 
1900     /*
1901      * 1) The URI reference is parsed into the potential four components and
1902      *    fragment identifier, as described in Section 4.3.
1903      *
1904      *    NOTE that a completely empty URI is treated by modern browsers
1905      *    as a reference to "." rather than as a synonym for the current
1906      *    URI.  Should we do that here?
1907      */
1908     if (URI == NULL)
1909 	ret = -1;
1910     else {
1911 	if (*URI) {
1912 	    ref = xmlCreateURI();
1913 	    if (ref == NULL)
1914 		goto done;
1915 	    ret = xmlParseURIReference(ref, (const char *) URI);
1916 	}
1917 	else
1918 	    ret = 0;
1919     }
1920     if (ret != 0)
1921 	goto done;
1922     if ((ref != NULL) && (ref->scheme != NULL)) {
1923 	/*
1924 	 * The URI is absolute don't modify.
1925 	 */
1926 	val = xmlStrdup(URI);
1927 	goto done;
1928     }
1929     if (base == NULL)
1930 	ret = -1;
1931     else {
1932 	bas = xmlCreateURI();
1933 	if (bas == NULL)
1934 	    goto done;
1935 	ret = xmlParseURIReference(bas, (const char *) base);
1936     }
1937     if (ret != 0) {
1938 	if (ref)
1939 	    val = xmlSaveUri(ref);
1940 	goto done;
1941     }
1942     if (ref == NULL) {
1943 	/*
1944 	 * the base fragment must be ignored
1945 	 */
1946 	if (bas->fragment != NULL) {
1947 	    xmlFree(bas->fragment);
1948 	    bas->fragment = NULL;
1949 	}
1950 	val = xmlSaveUri(bas);
1951 	goto done;
1952     }
1953 
1954     /*
1955      * 2) If the path component is empty and the scheme, authority, and
1956      *    query components are undefined, then it is a reference to the
1957      *    current document and we are done.  Otherwise, the reference URI's
1958      *    query and fragment components are defined as found (or not found)
1959      *    within the URI reference and not inherited from the base URI.
1960      *
1961      *    NOTE that in modern browsers, the parsing differs from the above
1962      *    in the following aspect:  the query component is allowed to be
1963      *    defined while still treating this as a reference to the current
1964      *    document.
1965      */
1966     res = xmlCreateURI();
1967     if (res == NULL)
1968 	goto done;
1969     if ((ref->scheme == NULL) && (ref->path == NULL) &&
1970 	((ref->authority == NULL) && (ref->server == NULL))) {
1971 	if (bas->scheme != NULL)
1972 	    res->scheme = xmlMemStrdup(bas->scheme);
1973 	if (bas->authority != NULL)
1974 	    res->authority = xmlMemStrdup(bas->authority);
1975 	else if ((bas->server != NULL) || (bas->port == -1)) {
1976 	    if (bas->server != NULL)
1977 		res->server = xmlMemStrdup(bas->server);
1978 	    if (bas->user != NULL)
1979 		res->user = xmlMemStrdup(bas->user);
1980 	    res->port = bas->port;
1981 	}
1982 	if (bas->path != NULL)
1983 	    res->path = xmlMemStrdup(bas->path);
1984 	if (ref->query_raw != NULL)
1985 	    res->query_raw = xmlMemStrdup (ref->query_raw);
1986 	else if (ref->query != NULL)
1987 	    res->query = xmlMemStrdup(ref->query);
1988 	else if (bas->query_raw != NULL)
1989 	    res->query_raw = xmlMemStrdup(bas->query_raw);
1990 	else if (bas->query != NULL)
1991 	    res->query = xmlMemStrdup(bas->query);
1992 	if (ref->fragment != NULL)
1993 	    res->fragment = xmlMemStrdup(ref->fragment);
1994 	goto step_7;
1995     }
1996 
1997     /*
1998      * 3) If the scheme component is defined, indicating that the reference
1999      *    starts with a scheme name, then the reference is interpreted as an
2000      *    absolute URI and we are done.  Otherwise, the reference URI's
2001      *    scheme is inherited from the base URI's scheme component.
2002      */
2003     if (ref->scheme != NULL) {
2004 	val = xmlSaveUri(ref);
2005 	goto done;
2006     }
2007     if (bas->scheme != NULL)
2008 	res->scheme = xmlMemStrdup(bas->scheme);
2009 
2010     if (ref->query_raw != NULL)
2011 	res->query_raw = xmlMemStrdup(ref->query_raw);
2012     else if (ref->query != NULL)
2013 	res->query = xmlMemStrdup(ref->query);
2014     if (ref->fragment != NULL)
2015 	res->fragment = xmlMemStrdup(ref->fragment);
2016 
2017     /*
2018      * 4) If the authority component is defined, then the reference is a
2019      *    network-path and we skip to step 7.  Otherwise, the reference
2020      *    URI's authority is inherited from the base URI's authority
2021      *    component, which will also be undefined if the URI scheme does not
2022      *    use an authority component.
2023      */
2024     if ((ref->authority != NULL) || (ref->server != NULL)) {
2025 	if (ref->authority != NULL)
2026 	    res->authority = xmlMemStrdup(ref->authority);
2027 	else {
2028 	    res->server = xmlMemStrdup(ref->server);
2029 	    if (ref->user != NULL)
2030 		res->user = xmlMemStrdup(ref->user);
2031             res->port = ref->port;
2032 	}
2033 	if (ref->path != NULL)
2034 	    res->path = xmlMemStrdup(ref->path);
2035 	goto step_7;
2036     }
2037     if (bas->authority != NULL)
2038 	res->authority = xmlMemStrdup(bas->authority);
2039     else if ((bas->server != NULL) || (bas->port == -1)) {
2040 	if (bas->server != NULL)
2041 	    res->server = xmlMemStrdup(bas->server);
2042 	if (bas->user != NULL)
2043 	    res->user = xmlMemStrdup(bas->user);
2044 	res->port = bas->port;
2045     }
2046 
2047     /*
2048      * 5) If the path component begins with a slash character ("/"), then
2049      *    the reference is an absolute-path and we skip to step 7.
2050      */
2051     if ((ref->path != NULL) && (ref->path[0] == '/')) {
2052 	res->path = xmlMemStrdup(ref->path);
2053 	goto step_7;
2054     }
2055 
2056 
2057     /*
2058      * 6) If this step is reached, then we are resolving a relative-path
2059      *    reference.  The relative path needs to be merged with the base
2060      *    URI's path.  Although there are many ways to do this, we will
2061      *    describe a simple method using a separate string buffer.
2062      *
2063      * Allocate a buffer large enough for the result string.
2064      */
2065     len = 2; /* extra / and 0 */
2066     if (ref->path != NULL)
2067 	len += strlen(ref->path);
2068     if (bas->path != NULL)
2069 	len += strlen(bas->path);
2070     res->path = (char *) xmlMallocAtomic(len);
2071     if (res->path == NULL) {
2072         xmlURIErrMemory("resolving URI against base\n");
2073 	goto done;
2074     }
2075     res->path[0] = 0;
2076 
2077     /*
2078      * a) All but the last segment of the base URI's path component is
2079      *    copied to the buffer.  In other words, any characters after the
2080      *    last (right-most) slash character, if any, are excluded.
2081      */
2082     cur = 0;
2083     out = 0;
2084     if (bas->path != NULL) {
2085 	while (bas->path[cur] != 0) {
2086 	    while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2087 		cur++;
2088 	    if (bas->path[cur] == 0)
2089 		break;
2090 
2091 	    cur++;
2092 	    while (out < cur) {
2093 		res->path[out] = bas->path[out];
2094 		out++;
2095 	    }
2096 	}
2097     }
2098     res->path[out] = 0;
2099 
2100     /*
2101      * b) The reference's path component is appended to the buffer
2102      *    string.
2103      */
2104     if (ref->path != NULL && ref->path[0] != 0) {
2105 	indx = 0;
2106 	/*
2107 	 * Ensure the path includes a '/'
2108 	 */
2109 	if ((out == 0) && (bas->server != NULL))
2110 	    res->path[out++] = '/';
2111 	while (ref->path[indx] != 0) {
2112 	    res->path[out++] = ref->path[indx++];
2113 	}
2114     }
2115     res->path[out] = 0;
2116 
2117     /*
2118      * Steps c) to h) are really path normalization steps
2119      */
2120     xmlNormalizeURIPath(res->path);
2121 
2122 step_7:
2123 
2124     /*
2125      * 7) The resulting URI components, including any inherited from the
2126      *    base URI, are recombined to give the absolute form of the URI
2127      *    reference.
2128      */
2129     val = xmlSaveUri(res);
2130 
2131 done:
2132     if (ref != NULL)
2133 	xmlFreeURI(ref);
2134     if (bas != NULL)
2135 	xmlFreeURI(bas);
2136     if (res != NULL)
2137 	xmlFreeURI(res);
2138     return(val);
2139 }
2140 
2141 /**
2142  * xmlBuildRelativeURI:
2143  * @URI:  the URI reference under consideration
2144  * @base:  the base value
2145  *
2146  * Expresses the URI of the reference in terms relative to the
2147  * base.  Some examples of this operation include:
2148  *     base = "http://site1.com/docs/book1.html"
2149  *        URI input                        URI returned
2150  *     docs/pic1.gif                    pic1.gif
2151  *     docs/img/pic1.gif                img/pic1.gif
2152  *     img/pic1.gif                     ../img/pic1.gif
2153  *     http://site1.com/docs/pic1.gif   pic1.gif
2154  *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
2155  *
2156  *     base = "docs/book1.html"
2157  *        URI input                        URI returned
2158  *     docs/pic1.gif                    pic1.gif
2159  *     docs/img/pic1.gif                img/pic1.gif
2160  *     img/pic1.gif                     ../img/pic1.gif
2161  *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
2162  *
2163  *
2164  * Note: if the URI reference is really weird or complicated, it may be
2165  *       worthwhile to first convert it into a "nice" one by calling
2166  *       xmlBuildURI (using 'base') before calling this routine,
2167  *       since this routine (for reasonable efficiency) assumes URI has
2168  *       already been through some validation.
2169  *
2170  * Returns a new URI string (to be freed by the caller) or NULL in case
2171  * error.
2172  */
2173 xmlChar *
xmlBuildRelativeURI(const xmlChar * URI,const xmlChar * base)2174 xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2175 {
2176     xmlChar *val = NULL;
2177     int ret;
2178     int ix;
2179     int nbslash = 0;
2180     int len;
2181     xmlURIPtr ref = NULL;
2182     xmlURIPtr bas = NULL;
2183     xmlChar *bptr, *uptr, *vptr;
2184     int remove_path = 0;
2185 
2186     if ((URI == NULL) || (*URI == 0))
2187 	return NULL;
2188 
2189     /*
2190      * First parse URI into a standard form
2191      */
2192     ref = xmlCreateURI ();
2193     if (ref == NULL)
2194 	return NULL;
2195     /* If URI not already in "relative" form */
2196     if (URI[0] != '.') {
2197 	ret = xmlParseURIReference (ref, (const char *) URI);
2198 	if (ret != 0)
2199 	    goto done;		/* Error in URI, return NULL */
2200     } else
2201 	ref->path = (char *)xmlStrdup(URI);
2202 
2203     /*
2204      * Next parse base into the same standard form
2205      */
2206     if ((base == NULL) || (*base == 0)) {
2207 	val = xmlStrdup (URI);
2208 	goto done;
2209     }
2210     bas = xmlCreateURI ();
2211     if (bas == NULL)
2212 	goto done;
2213     if (base[0] != '.') {
2214 	ret = xmlParseURIReference (bas, (const char *) base);
2215 	if (ret != 0)
2216 	    goto done;		/* Error in base, return NULL */
2217     } else
2218 	bas->path = (char *)xmlStrdup(base);
2219 
2220     /*
2221      * If the scheme / server on the URI differs from the base,
2222      * just return the URI
2223      */
2224     if ((ref->scheme != NULL) &&
2225 	((bas->scheme == NULL) ||
2226 	 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2227 	 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2228 	val = xmlStrdup (URI);
2229 	goto done;
2230     }
2231     if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2232 	val = xmlStrdup(BAD_CAST "");
2233 	goto done;
2234     }
2235     if (bas->path == NULL) {
2236 	val = xmlStrdup((xmlChar *)ref->path);
2237 	goto done;
2238     }
2239     if (ref->path == NULL) {
2240         ref->path = (char *) "/";
2241 	remove_path = 1;
2242     }
2243 
2244     /*
2245      * At this point (at last!) we can compare the two paths
2246      *
2247      * First we take care of the special case where either of the
2248      * two path components may be missing (bug 316224)
2249      */
2250     bptr = (xmlChar *)bas->path;
2251     {
2252         xmlChar *rptr = (xmlChar *) ref->path;
2253         int pos = 0;
2254 
2255         /*
2256          * Next we compare the two strings and find where they first differ
2257          */
2258 	if ((*rptr == '.') && (rptr[1] == '/'))
2259             rptr += 2;
2260 	if ((*bptr == '.') && (bptr[1] == '/'))
2261             bptr += 2;
2262 	else if ((*bptr == '/') && (*rptr != '/'))
2263 	    bptr++;
2264 	while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))
2265 	    pos++;
2266 
2267 	if (bptr[pos] == rptr[pos]) {
2268 	    val = xmlStrdup(BAD_CAST "");
2269 	    goto done;		/* (I can't imagine why anyone would do this) */
2270 	}
2271 
2272 	/*
2273 	 * In URI, "back up" to the last '/' encountered.  This will be the
2274 	 * beginning of the "unique" suffix of URI
2275 	 */
2276 	ix = pos;
2277 	for (; ix > 0; ix--) {
2278 	    if (rptr[ix - 1] == '/')
2279 		break;
2280 	}
2281 	uptr = (xmlChar *)&rptr[ix];
2282 
2283 	/*
2284 	 * In base, count the number of '/' from the differing point
2285 	 */
2286 	for (; bptr[ix] != 0; ix++) {
2287 	    if (bptr[ix] == '/')
2288 		nbslash++;
2289 	}
2290 
2291 	/*
2292 	 * e.g: URI="foo/" base="foo/bar" -> "./"
2293 	 */
2294 	if (nbslash == 0 && !uptr[0]) {
2295 	    val = xmlStrdup(BAD_CAST "./");
2296 	    goto done;
2297 	}
2298 
2299 	len = xmlStrlen (uptr) + 1;
2300     }
2301 
2302     if (nbslash == 0) {
2303 	if (uptr != NULL)
2304 	    /* exception characters from xmlSaveUri */
2305 	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2306 	goto done;
2307     }
2308 
2309     /*
2310      * Allocate just enough space for the returned string -
2311      * length of the remainder of the URI, plus enough space
2312      * for the "../" groups, plus one for the terminator
2313      */
2314     val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2315     if (val == NULL) {
2316         xmlURIErrMemory("building relative URI\n");
2317 	goto done;
2318     }
2319     vptr = val;
2320     /*
2321      * Put in as many "../" as needed
2322      */
2323     for (; nbslash>0; nbslash--) {
2324 	*vptr++ = '.';
2325 	*vptr++ = '.';
2326 	*vptr++ = '/';
2327     }
2328     /*
2329      * Finish up with the end of the URI
2330      */
2331     if (uptr != NULL) {
2332         if ((vptr > val) && (len > 0) &&
2333 	    (uptr[0] == '/') && (vptr[-1] == '/')) {
2334 	    memcpy (vptr, uptr + 1, len - 1);
2335 	    vptr[len - 2] = 0;
2336 	} else {
2337 	    memcpy (vptr, uptr, len);
2338 	    vptr[len - 1] = 0;
2339 	}
2340     } else {
2341 	vptr[len - 1] = 0;
2342     }
2343 
2344     /* escape the freshly-built path */
2345     vptr = val;
2346 	/* exception characters from xmlSaveUri */
2347     val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2348     xmlFree(vptr);
2349 
2350 done:
2351     /*
2352      * Free the working variables
2353      */
2354     if (remove_path != 0)
2355         ref->path = NULL;
2356     if (ref != NULL)
2357 	xmlFreeURI (ref);
2358     if (bas != NULL)
2359 	xmlFreeURI (bas);
2360 
2361     return val;
2362 }
2363 
2364 /**
2365  * xmlCanonicPath:
2366  * @path:  the resource locator in a filesystem notation
2367  *
2368  * Constructs a canonic path from the specified path.
2369  *
2370  * Returns a new canonic path, or a duplicate of the path parameter if the
2371  * construction fails. The caller is responsible for freeing the memory occupied
2372  * by the returned string. If there is insufficient memory available, or the
2373  * argument is NULL, the function returns NULL.
2374  */
2375 #define IS_WINDOWS_PATH(p)					\
2376 	((p != NULL) &&						\
2377 	 (((p[0] >= 'a') && (p[0] <= 'z')) ||			\
2378 	  ((p[0] >= 'A') && (p[0] <= 'Z'))) &&			\
2379 	 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2380 xmlChar *
xmlCanonicPath(const xmlChar * path)2381 xmlCanonicPath(const xmlChar *path)
2382 {
2383 /*
2384  * For Windows implementations, additional work needs to be done to
2385  * replace backslashes in pathnames with "forward slashes"
2386  */
2387 #if defined(_WIN32) && !defined(__CYGWIN__)
2388     int len = 0;
2389     char *p = NULL;
2390 #endif
2391     xmlURIPtr uri;
2392     xmlChar *ret;
2393     const xmlChar *absuri;
2394 
2395     if (path == NULL)
2396 	return(NULL);
2397 
2398 #if defined(_WIN32)
2399     /*
2400      * We must not change the backslashes to slashes if the the path
2401      * starts with \\?\
2402      * Those paths can be up to 32k characters long.
2403      * Was added specifically for OpenOffice, those paths can't be converted
2404      * to URIs anyway.
2405      */
2406     if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2407         (path[3] == '\\') )
2408 	return xmlStrdup((const xmlChar *) path);
2409 #endif
2410 
2411 	/* sanitize filename starting with // so it can be used as URI */
2412     if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2413         path++;
2414 
2415     if ((uri = xmlParseURI((const char *) path)) != NULL) {
2416 	xmlFreeURI(uri);
2417 	return xmlStrdup(path);
2418     }
2419 
2420     /* Check if this is an "absolute uri" */
2421     absuri = xmlStrstr(path, BAD_CAST "://");
2422     if (absuri != NULL) {
2423         int l, j;
2424 	unsigned char c;
2425 	xmlChar *escURI;
2426 
2427         /*
2428 	 * this looks like an URI where some parts have not been
2429 	 * escaped leading to a parsing problem.  Check that the first
2430 	 * part matches a protocol.
2431 	 */
2432 	l = absuri - path;
2433 	/* Bypass if first part (part before the '://') is > 20 chars */
2434 	if ((l <= 0) || (l > 20))
2435 	    goto path_processing;
2436 	/* Bypass if any non-alpha characters are present in first part */
2437 	for (j = 0;j < l;j++) {
2438 	    c = path[j];
2439 	    if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2440 	        goto path_processing;
2441 	}
2442 
2443 	/* Escape all except the characters specified in the supplied path */
2444         escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2445 	if (escURI != NULL) {
2446 	    /* Try parsing the escaped path */
2447 	    uri = xmlParseURI((const char *) escURI);
2448 	    /* If successful, return the escaped string */
2449 	    if (uri != NULL) {
2450 	        xmlFreeURI(uri);
2451 		return escURI;
2452 	    }
2453             xmlFree(escURI);
2454 	}
2455     }
2456 
2457 path_processing:
2458 /* For Windows implementations, replace backslashes with 'forward slashes' */
2459 #if defined(_WIN32) && !defined(__CYGWIN__)
2460     /*
2461      * Create a URI structure
2462      */
2463     uri = xmlCreateURI();
2464     if (uri == NULL) {		/* Guard against 'out of memory' */
2465         return(NULL);
2466     }
2467 
2468     len = xmlStrlen(path);
2469     if ((len > 2) && IS_WINDOWS_PATH(path)) {
2470         /* make the scheme 'file' */
2471 	uri->scheme = (char *) xmlStrdup(BAD_CAST "file");
2472 	/* allocate space for leading '/' + path + string terminator */
2473 	uri->path = xmlMallocAtomic(len + 2);
2474 	if (uri->path == NULL) {
2475 	    xmlFreeURI(uri);	/* Guard against 'out of memory' */
2476 	    return(NULL);
2477 	}
2478 	/* Put in leading '/' plus path */
2479 	uri->path[0] = '/';
2480 	p = uri->path + 1;
2481 	strncpy(p, (char *) path, len + 1);
2482     } else {
2483 	uri->path = (char *) xmlStrdup(path);
2484 	if (uri->path == NULL) {
2485 	    xmlFreeURI(uri);
2486 	    return(NULL);
2487 	}
2488 	p = uri->path;
2489     }
2490     /* Now change all occurrences of '\' to '/' */
2491     while (*p != '\0') {
2492 	if (*p == '\\')
2493 	    *p = '/';
2494 	p++;
2495     }
2496 
2497     if (uri->scheme == NULL) {
2498 	ret = xmlStrdup((const xmlChar *) uri->path);
2499     } else {
2500 	ret = xmlSaveUri(uri);
2501     }
2502 
2503     xmlFreeURI(uri);
2504 #else
2505     ret = xmlStrdup((const xmlChar *) path);
2506 #endif
2507     return(ret);
2508 }
2509 
2510 /**
2511  * xmlPathToURI:
2512  * @path:  the resource locator in a filesystem notation
2513  *
2514  * Constructs an URI expressing the existing path
2515  *
2516  * Returns a new URI, or a duplicate of the path parameter if the
2517  * construction fails. The caller is responsible for freeing the memory
2518  * occupied by the returned string. If there is insufficient memory available,
2519  * or the argument is NULL, the function returns NULL.
2520  */
2521 xmlChar *
xmlPathToURI(const xmlChar * path)2522 xmlPathToURI(const xmlChar *path)
2523 {
2524     xmlURIPtr uri;
2525     xmlURI temp;
2526     xmlChar *ret, *cal;
2527 
2528     if (path == NULL)
2529         return(NULL);
2530 
2531     if ((uri = xmlParseURI((const char *) path)) != NULL) {
2532 	xmlFreeURI(uri);
2533 	return xmlStrdup(path);
2534     }
2535     cal = xmlCanonicPath(path);
2536     if (cal == NULL)
2537         return(NULL);
2538 #if defined(_WIN32) && !defined(__CYGWIN__)
2539     /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2540        If 'cal' is a valid URI already then we are done here, as continuing would make
2541        it invalid. */
2542     if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2543 	xmlFreeURI(uri);
2544 	return cal;
2545     }
2546     /* 'cal' can contain a relative path with backslashes. If that is processed
2547        by xmlSaveURI, they will be escaped and the external entity loader machinery
2548        will fail. So convert them to slashes. Misuse 'ret' for walking. */
2549     ret = cal;
2550     while (*ret != '\0') {
2551 	if (*ret == '\\')
2552 	    *ret = '/';
2553 	ret++;
2554     }
2555 #endif
2556     memset(&temp, 0, sizeof(temp));
2557     temp.path = (char *) cal;
2558     ret = xmlSaveUri(&temp);
2559     xmlFree(cal);
2560     return(ret);
2561 }
2562 #define bottom_uri
2563 #include "elfgcchack.h"
2564