1 /**
2  * uri.c: set of generic URI related routines
3  *
4  * Reference: RFCs 3986, 2732 and 2373
5  *
6  * See Copyright for the status of this software.
7  *
8  * daniel@veillard.com
9  */
10 
11 #define IN_LIBXML
12 #include "libxml.h"
13 
14 #include <string.h>
15 
16 #include <libxml/xmlmemory.h>
17 #include <libxml/uri.h>
18 #include <libxml/globals.h>
19 #include <libxml/xmlerror.h>
20 
21 static void xmlCleanURI(xmlURIPtr uri);
22 
23 /*
24  * Old rule from 2396 used in legacy handling code
25  * alpha    = lowalpha | upalpha
26  */
27 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
28 
29 
30 /*
31  * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
32  *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
33  *            "u" | "v" | "w" | "x" | "y" | "z"
34  */
35 
36 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
37 
38 /*
39  * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
40  *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
41  *           "U" | "V" | "W" | "X" | "Y" | "Z"
42  */
43 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
44 
45 #ifdef IS_DIGIT
46 #undef IS_DIGIT
47 #endif
48 /*
49  * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
50  */
51 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
52 
53 /*
54  * alphanum = alpha | digit
55  */
56 
57 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
58 
59 /*
60  * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
61  */
62 
63 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
64     ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
65     ((x) == '(') || ((x) == ')'))
66 
67 /*
68  * unwise = "{" | "}" | "|" | "\" | "^" | "`"
69  */
70 
71 #define IS_UNWISE(p)                                                    \
72       (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
73        ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
74        ((*(p) == ']')) || ((*(p) == '`')))
75 /*
76  * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
77  *            "[" | "]"
78  */
79 
80 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
81         ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
82         ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
83         ((x) == ']'))
84 
85 /*
86  * unreserved = alphanum | mark
87  */
88 
89 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
90 
91 /*
92  * Skip to next pointer char, handle escaped sequences
93  */
94 
95 #define NEXT(p) ((*p == '%')? p += 3 : p++)
96 
97 /*
98  * Productions from the spec.
99  *
100  *    authority     = server | reg_name
101  *    reg_name      = 1*( unreserved | escaped | "$" | "," |
102  *                        ";" | ":" | "@" | "&" | "=" | "+" )
103  *
104  * path          = [ abs_path | opaque_part ]
105  */
106 
107 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
108 
109 /************************************************************************
110  *									*
111  *                         RFC 3986 parser				*
112  *									*
113  ************************************************************************/
114 
115 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
116 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||		\
117                       ((*(p) >= 'A') && (*(p) <= 'Z')))
118 #define ISA_HEXDIG(p)							\
119        (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||		\
120         ((*(p) >= 'A') && (*(p) <= 'F')))
121 
122 /*
123  *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
124  *                     / "*" / "+" / "," / ";" / "="
125  */
126 #define ISA_SUB_DELIM(p)						\
127       (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||		\
128        ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||		\
129        ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||		\
130        ((*(p) == '=')))
131 
132 /*
133  *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
134  */
135 #define ISA_GEN_DELIM(p)						\
136       (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
137        ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
138        ((*(p) == '@')))
139 
140 /*
141  *    reserved      = gen-delims / sub-delims
142  */
143 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
144 
145 /*
146  *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
147  */
148 #define ISA_UNRESERVED(p)						\
149       ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||		\
150        ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
151 
152 /*
153  *    pct-encoded   = "%" HEXDIG HEXDIG
154  */
155 #define ISA_PCT_ENCODED(p)						\
156      ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
157 
158 /*
159  *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
160  */
161 #define ISA_PCHAR(p)							\
162      (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||	\
163       ((*(p) == ':')) || ((*(p) == '@')))
164 
165 /**
166  * xmlParse3986Scheme:
167  * @uri:  pointer to an URI structure
168  * @str:  pointer to the string to analyze
169  *
170  * Parse an URI scheme
171  *
172  * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
173  *
174  * Returns 0 or the error code
175  */
176 static int
xmlParse3986Scheme(xmlURIPtr uri,const char ** str)177 xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
178     const char *cur;
179 
180     if (str == NULL)
181 	return(-1);
182 
183     cur = *str;
184     if (!ISA_ALPHA(cur))
185 	return(2);
186     cur++;
187     while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
188            (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
189     if (uri != NULL) {
190 	if (uri->scheme != NULL) xmlFree(uri->scheme);
191 	uri->scheme = STRNDUP(*str, cur - *str);
192     }
193     *str = cur;
194     return(0);
195 }
196 
197 /**
198  * xmlParse3986Fragment:
199  * @uri:  pointer to an URI structure
200  * @str:  pointer to the string to analyze
201  *
202  * Parse the query part of an URI
203  *
204  * fragment      = *( pchar / "/" / "?" )
205  * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
206  *       in the fragment identifier but this is used very broadly for
207  *       xpointer scheme selection, so we are allowing it here to not break
208  *       for example all the DocBook processing chains.
209  *
210  * Returns 0 or the error code
211  */
212 static int
xmlParse3986Fragment(xmlURIPtr uri,const char ** str)213 xmlParse3986Fragment(xmlURIPtr uri, const char **str)
214 {
215     const char *cur;
216 
217     if (str == NULL)
218         return (-1);
219 
220     cur = *str;
221 
222     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
223            (*cur == '[') || (*cur == ']') ||
224            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
225         NEXT(cur);
226     if (uri != NULL) {
227         if (uri->fragment != NULL)
228             xmlFree(uri->fragment);
229 	if (uri->cleanup & 2)
230 	    uri->fragment = STRNDUP(*str, cur - *str);
231 	else
232 	    uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
233     }
234     *str = cur;
235     return (0);
236 }
237 
238 /**
239  * xmlParse3986Query:
240  * @uri:  pointer to an URI structure
241  * @str:  pointer to the string to analyze
242  *
243  * Parse the query part of an URI
244  *
245  * query = *uric
246  *
247  * Returns 0 or the error code
248  */
249 static int
xmlParse3986Query(xmlURIPtr uri,const char ** str)250 xmlParse3986Query(xmlURIPtr uri, const char **str)
251 {
252     const char *cur;
253 
254     if (str == NULL)
255         return (-1);
256 
257     cur = *str;
258 
259     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
260            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
261         NEXT(cur);
262     if (uri != NULL) {
263         if (uri->query != NULL)
264             xmlFree(uri->query);
265 	if (uri->cleanup & 2)
266 	    uri->query = STRNDUP(*str, cur - *str);
267 	else
268 	    uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
269 
270 	/* Save the raw bytes of the query as well.
271 	 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
272 	 */
273 	if (uri->query_raw != NULL)
274 	    xmlFree (uri->query_raw);
275 	uri->query_raw = STRNDUP (*str, cur - *str);
276     }
277     *str = cur;
278     return (0);
279 }
280 
281 /**
282  * xmlParse3986Port:
283  * @uri:  pointer to an URI structure
284  * @str:  the string to analyze
285  *
286  * Parse a port  part and fills in the appropriate fields
287  * of the @uri structure
288  *
289  * port          = *DIGIT
290  *
291  * Returns 0 or the error code
292  */
293 static int
xmlParse3986Port(xmlURIPtr uri,const char ** str)294 xmlParse3986Port(xmlURIPtr uri, const char **str)
295 {
296     const char *cur = *str;
297 
298     if (ISA_DIGIT(cur)) {
299 	if (uri != NULL)
300 	    uri->port = 0;
301 	while (ISA_DIGIT(cur)) {
302 	    if (uri != NULL)
303 		uri->port = uri->port * 10 + (*cur - '0');
304 	    cur++;
305 	}
306 	*str = cur;
307 	return(0);
308     }
309     return(1);
310 }
311 
312 /**
313  * xmlParse3986Userinfo:
314  * @uri:  pointer to an URI structure
315  * @str:  the string to analyze
316  *
317  * Parse an user informations part and fills in the appropriate fields
318  * of the @uri structure
319  *
320  * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
321  *
322  * Returns 0 or the error code
323  */
324 static int
xmlParse3986Userinfo(xmlURIPtr uri,const char ** str)325 xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
326 {
327     const char *cur;
328 
329     cur = *str;
330     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
331            ISA_SUB_DELIM(cur) || (*cur == ':'))
332 	NEXT(cur);
333     if (*cur == '@') {
334 	if (uri != NULL) {
335 	    if (uri->user != NULL) xmlFree(uri->user);
336 	    if (uri->cleanup & 2)
337 		uri->user = STRNDUP(*str, cur - *str);
338 	    else
339 		uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
340 	}
341 	*str = cur;
342 	return(0);
343     }
344     return(1);
345 }
346 
347 /**
348  * xmlParse3986DecOctet:
349  * @str:  the string to analyze
350  *
351  *    dec-octet     = DIGIT                 ; 0-9
352  *                  / %x31-39 DIGIT         ; 10-99
353  *                  / "1" 2DIGIT            ; 100-199
354  *                  / "2" %x30-34 DIGIT     ; 200-249
355  *                  / "25" %x30-35          ; 250-255
356  *
357  * Skip a dec-octet.
358  *
359  * Returns 0 if found and skipped, 1 otherwise
360  */
361 static int
xmlParse3986DecOctet(const char ** str)362 xmlParse3986DecOctet(const char **str) {
363     const char *cur = *str;
364 
365     if (!(ISA_DIGIT(cur)))
366         return(1);
367     if (!ISA_DIGIT(cur+1))
368 	cur++;
369     else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
370 	cur += 2;
371     else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
372 	cur += 3;
373     else if ((*cur == '2') && (*(cur + 1) >= '0') &&
374 	     (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
375 	cur += 3;
376     else if ((*cur == '2') && (*(cur + 1) == '5') &&
377 	     (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
378 	cur += 3;
379     else
380         return(1);
381     *str = cur;
382     return(0);
383 }
384 /**
385  * xmlParse3986Host:
386  * @uri:  pointer to an URI structure
387  * @str:  the string to analyze
388  *
389  * Parse an host part and fills in the appropriate fields
390  * of the @uri structure
391  *
392  * host          = IP-literal / IPv4address / reg-name
393  * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
394  * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
395  * reg-name      = *( unreserved / pct-encoded / sub-delims )
396  *
397  * Returns 0 or the error code
398  */
399 static int
xmlParse3986Host(xmlURIPtr uri,const char ** str)400 xmlParse3986Host(xmlURIPtr uri, const char **str)
401 {
402     const char *cur = *str;
403     const char *host;
404 
405     host = cur;
406     /*
407      * IPv6 and future adressing scheme are enclosed between brackets
408      */
409     if (*cur == '[') {
410         cur++;
411 	while ((*cur != ']') && (*cur != 0))
412 	    cur++;
413 	if (*cur != ']')
414 	    return(1);
415 	cur++;
416 	goto found;
417     }
418     /*
419      * try to parse an IPv4
420      */
421     if (ISA_DIGIT(cur)) {
422         if (xmlParse3986DecOctet(&cur) != 0)
423 	    goto not_ipv4;
424 	if (*cur != '.')
425 	    goto not_ipv4;
426 	cur++;
427         if (xmlParse3986DecOctet(&cur) != 0)
428 	    goto not_ipv4;
429 	if (*cur != '.')
430 	    goto not_ipv4;
431         if (xmlParse3986DecOctet(&cur) != 0)
432 	    goto not_ipv4;
433 	if (*cur != '.')
434 	    goto not_ipv4;
435         if (xmlParse3986DecOctet(&cur) != 0)
436 	    goto not_ipv4;
437 	goto found;
438 not_ipv4:
439         cur = *str;
440     }
441     /*
442      * then this should be a hostname which can be empty
443      */
444     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
445         NEXT(cur);
446 found:
447     if (uri != NULL) {
448 	if (uri->authority != NULL) xmlFree(uri->authority);
449 	uri->authority = NULL;
450 	if (uri->server != NULL) xmlFree(uri->server);
451 	if (cur != host) {
452 	    if (uri->cleanup & 2)
453 		uri->server = STRNDUP(host, cur - host);
454 	    else
455 		uri->server = xmlURIUnescapeString(host, cur - host, NULL);
456 	} else
457 	    uri->server = NULL;
458     }
459     *str = cur;
460     return(0);
461 }
462 
463 /**
464  * xmlParse3986Authority:
465  * @uri:  pointer to an URI structure
466  * @str:  the string to analyze
467  *
468  * Parse an authority part and fills in the appropriate fields
469  * of the @uri structure
470  *
471  * authority     = [ userinfo "@" ] host [ ":" port ]
472  *
473  * Returns 0 or the error code
474  */
475 static int
xmlParse3986Authority(xmlURIPtr uri,const char ** str)476 xmlParse3986Authority(xmlURIPtr uri, const char **str)
477 {
478     const char *cur;
479     int ret;
480 
481     cur = *str;
482     /*
483      * try to parse an userinfo and check for the trailing @
484      */
485     ret = xmlParse3986Userinfo(uri, &cur);
486     if ((ret != 0) || (*cur != '@'))
487         cur = *str;
488     else
489         cur++;
490     ret = xmlParse3986Host(uri, &cur);
491     if (ret != 0) return(ret);
492     if (*cur == ':') {
493         cur++;
494         ret = xmlParse3986Port(uri, &cur);
495 	if (ret != 0) return(ret);
496     }
497     *str = cur;
498     return(0);
499 }
500 
501 /**
502  * xmlParse3986Segment:
503  * @str:  the string to analyze
504  * @forbid: an optional forbidden character
505  * @empty: allow an empty segment
506  *
507  * Parse a segment and fills in the appropriate fields
508  * of the @uri structure
509  *
510  * segment       = *pchar
511  * segment-nz    = 1*pchar
512  * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
513  *               ; non-zero-length segment without any colon ":"
514  *
515  * Returns 0 or the error code
516  */
517 static int
xmlParse3986Segment(const char ** str,char forbid,int empty)518 xmlParse3986Segment(const char **str, char forbid, int empty)
519 {
520     const char *cur;
521 
522     cur = *str;
523     if (!ISA_PCHAR(cur)) {
524         if (empty)
525 	    return(0);
526 	return(1);
527     }
528     while (ISA_PCHAR(cur) && (*cur != forbid))
529         NEXT(cur);
530     *str = cur;
531     return (0);
532 }
533 
534 /**
535  * xmlParse3986PathAbEmpty:
536  * @uri:  pointer to an URI structure
537  * @str:  the string to analyze
538  *
539  * Parse an path absolute or empty and fills in the appropriate fields
540  * of the @uri structure
541  *
542  * path-abempty  = *( "/" segment )
543  *
544  * Returns 0 or the error code
545  */
546 static int
xmlParse3986PathAbEmpty(xmlURIPtr uri,const char ** str)547 xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
548 {
549     const char *cur;
550     int ret;
551 
552     cur = *str;
553 
554     while (*cur == '/') {
555         cur++;
556 	ret = xmlParse3986Segment(&cur, 0, 1);
557 	if (ret != 0) return(ret);
558     }
559     if (uri != NULL) {
560 	if (uri->path != NULL) xmlFree(uri->path);
561         if (*str != cur) {
562             if (uri->cleanup & 2)
563                 uri->path = STRNDUP(*str, cur - *str);
564             else
565                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
566         } else {
567             uri->path = NULL;
568         }
569     }
570     *str = cur;
571     return (0);
572 }
573 
574 /**
575  * xmlParse3986PathAbsolute:
576  * @uri:  pointer to an URI structure
577  * @str:  the string to analyze
578  *
579  * Parse an path absolute and fills in the appropriate fields
580  * of the @uri structure
581  *
582  * path-absolute = "/" [ segment-nz *( "/" segment ) ]
583  *
584  * Returns 0 or the error code
585  */
586 static int
xmlParse3986PathAbsolute(xmlURIPtr uri,const char ** str)587 xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
588 {
589     const char *cur;
590     int ret;
591 
592     cur = *str;
593 
594     if (*cur != '/')
595         return(1);
596     cur++;
597     ret = xmlParse3986Segment(&cur, 0, 0);
598     if (ret == 0) {
599 	while (*cur == '/') {
600 	    cur++;
601 	    ret = xmlParse3986Segment(&cur, 0, 1);
602 	    if (ret != 0) return(ret);
603 	}
604     }
605     if (uri != NULL) {
606 	if (uri->path != NULL) xmlFree(uri->path);
607         if (cur != *str) {
608             if (uri->cleanup & 2)
609                 uri->path = STRNDUP(*str, cur - *str);
610             else
611                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
612         } else {
613             uri->path = NULL;
614         }
615     }
616     *str = cur;
617     return (0);
618 }
619 
620 /**
621  * xmlParse3986PathRootless:
622  * @uri:  pointer to an URI structure
623  * @str:  the string to analyze
624  *
625  * Parse an path without root and fills in the appropriate fields
626  * of the @uri structure
627  *
628  * path-rootless = segment-nz *( "/" segment )
629  *
630  * Returns 0 or the error code
631  */
632 static int
xmlParse3986PathRootless(xmlURIPtr uri,const char ** str)633 xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
634 {
635     const char *cur;
636     int ret;
637 
638     cur = *str;
639 
640     ret = xmlParse3986Segment(&cur, 0, 0);
641     if (ret != 0) return(ret);
642     while (*cur == '/') {
643         cur++;
644 	ret = xmlParse3986Segment(&cur, 0, 1);
645 	if (ret != 0) return(ret);
646     }
647     if (uri != NULL) {
648 	if (uri->path != NULL) xmlFree(uri->path);
649         if (cur != *str) {
650             if (uri->cleanup & 2)
651                 uri->path = STRNDUP(*str, cur - *str);
652             else
653                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
654         } else {
655             uri->path = NULL;
656         }
657     }
658     *str = cur;
659     return (0);
660 }
661 
662 /**
663  * xmlParse3986PathNoScheme:
664  * @uri:  pointer to an URI structure
665  * @str:  the string to analyze
666  *
667  * Parse an path which is not a scheme and fills in the appropriate fields
668  * of the @uri structure
669  *
670  * path-noscheme = segment-nz-nc *( "/" segment )
671  *
672  * Returns 0 or the error code
673  */
674 static int
xmlParse3986PathNoScheme(xmlURIPtr uri,const char ** str)675 xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
676 {
677     const char *cur;
678     int ret;
679 
680     cur = *str;
681 
682     ret = xmlParse3986Segment(&cur, ':', 0);
683     if (ret != 0) return(ret);
684     while (*cur == '/') {
685         cur++;
686 	ret = xmlParse3986Segment(&cur, 0, 1);
687 	if (ret != 0) return(ret);
688     }
689     if (uri != NULL) {
690 	if (uri->path != NULL) xmlFree(uri->path);
691         if (cur != *str) {
692             if (uri->cleanup & 2)
693                 uri->path = STRNDUP(*str, cur - *str);
694             else
695                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
696         } else {
697             uri->path = NULL;
698         }
699     }
700     *str = cur;
701     return (0);
702 }
703 
704 /**
705  * xmlParse3986HierPart:
706  * @uri:  pointer to an URI structure
707  * @str:  the string to analyze
708  *
709  * Parse an hierarchical part and fills in the appropriate fields
710  * of the @uri structure
711  *
712  * hier-part     = "//" authority path-abempty
713  *                / path-absolute
714  *                / path-rootless
715  *                / path-empty
716  *
717  * Returns 0 or the error code
718  */
719 static int
xmlParse3986HierPart(xmlURIPtr uri,const char ** str)720 xmlParse3986HierPart(xmlURIPtr uri, const char **str)
721 {
722     const char *cur;
723     int ret;
724 
725     cur = *str;
726 
727     if ((*cur == '/') && (*(cur + 1) == '/')) {
728         cur += 2;
729 	ret = xmlParse3986Authority(uri, &cur);
730 	if (ret != 0) return(ret);
731 	ret = xmlParse3986PathAbEmpty(uri, &cur);
732 	if (ret != 0) return(ret);
733 	*str = cur;
734 	return(0);
735     } else if (*cur == '/') {
736         ret = xmlParse3986PathAbsolute(uri, &cur);
737 	if (ret != 0) return(ret);
738     } else if (ISA_PCHAR(cur)) {
739         ret = xmlParse3986PathRootless(uri, &cur);
740 	if (ret != 0) return(ret);
741     } else {
742 	/* path-empty is effectively empty */
743 	if (uri != NULL) {
744 	    if (uri->path != NULL) xmlFree(uri->path);
745 	    uri->path = NULL;
746 	}
747     }
748     *str = cur;
749     return (0);
750 }
751 
752 /**
753  * xmlParse3986RelativeRef:
754  * @uri:  pointer to an URI structure
755  * @str:  the string to analyze
756  *
757  * Parse an URI string and fills in the appropriate fields
758  * of the @uri structure
759  *
760  * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
761  * relative-part = "//" authority path-abempty
762  *               / path-absolute
763  *               / path-noscheme
764  *               / path-empty
765  *
766  * Returns 0 or the error code
767  */
768 static int
xmlParse3986RelativeRef(xmlURIPtr uri,const char * str)769 xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
770     int ret;
771 
772     if ((*str == '/') && (*(str + 1) == '/')) {
773         str += 2;
774 	ret = xmlParse3986Authority(uri, &str);
775 	if (ret != 0) return(ret);
776 	ret = xmlParse3986PathAbEmpty(uri, &str);
777 	if (ret != 0) return(ret);
778     } else if (*str == '/') {
779 	ret = xmlParse3986PathAbsolute(uri, &str);
780 	if (ret != 0) return(ret);
781     } else if (ISA_PCHAR(str)) {
782         ret = xmlParse3986PathNoScheme(uri, &str);
783 	if (ret != 0) return(ret);
784     } else {
785 	/* path-empty is effectively empty */
786 	if (uri != NULL) {
787 	    if (uri->path != NULL) xmlFree(uri->path);
788 	    uri->path = NULL;
789 	}
790     }
791 
792     if (*str == '?') {
793 	str++;
794 	ret = xmlParse3986Query(uri, &str);
795 	if (ret != 0) return(ret);
796     }
797     if (*str == '#') {
798 	str++;
799 	ret = xmlParse3986Fragment(uri, &str);
800 	if (ret != 0) return(ret);
801     }
802     if (*str != 0) {
803 	xmlCleanURI(uri);
804 	return(1);
805     }
806     return(0);
807 }
808 
809 
810 /**
811  * xmlParse3986URI:
812  * @uri:  pointer to an URI structure
813  * @str:  the string to analyze
814  *
815  * Parse an URI string and fills in the appropriate fields
816  * of the @uri structure
817  *
818  * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
819  *
820  * Returns 0 or the error code
821  */
822 static int
xmlParse3986URI(xmlURIPtr uri,const char * str)823 xmlParse3986URI(xmlURIPtr uri, const char *str) {
824     int ret;
825 
826     ret = xmlParse3986Scheme(uri, &str);
827     if (ret != 0) return(ret);
828     if (*str != ':') {
829 	return(1);
830     }
831     str++;
832     ret = xmlParse3986HierPart(uri, &str);
833     if (ret != 0) return(ret);
834     if (*str == '?') {
835 	str++;
836 	ret = xmlParse3986Query(uri, &str);
837 	if (ret != 0) return(ret);
838     }
839     if (*str == '#') {
840 	str++;
841 	ret = xmlParse3986Fragment(uri, &str);
842 	if (ret != 0) return(ret);
843     }
844     if (*str != 0) {
845 	xmlCleanURI(uri);
846 	return(1);
847     }
848     return(0);
849 }
850 
851 /**
852  * xmlParse3986URIReference:
853  * @uri:  pointer to an URI structure
854  * @str:  the string to analyze
855  *
856  * Parse an URI reference string and fills in the appropriate fields
857  * of the @uri structure
858  *
859  * URI-reference = URI / relative-ref
860  *
861  * Returns 0 or the error code
862  */
863 static int
xmlParse3986URIReference(xmlURIPtr uri,const char * str)864 xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
865     int ret;
866 
867     if (str == NULL)
868 	return(-1);
869     xmlCleanURI(uri);
870 
871     /*
872      * Try first to parse absolute refs, then fallback to relative if
873      * it fails.
874      */
875     ret = xmlParse3986URI(uri, str);
876     if (ret != 0) {
877 	xmlCleanURI(uri);
878         ret = xmlParse3986RelativeRef(uri, str);
879 	if (ret != 0) {
880 	    xmlCleanURI(uri);
881 	    return(ret);
882 	}
883     }
884     return(0);
885 }
886 
887 /**
888  * xmlParseURI:
889  * @str:  the URI string to analyze
890  *
891  * Parse an URI based on RFC 3986
892  *
893  * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
894  *
895  * Returns a newly built xmlURIPtr or NULL in case of error
896  */
897 xmlURIPtr
xmlParseURI(const char * str)898 xmlParseURI(const char *str) {
899     xmlURIPtr uri;
900     int ret;
901 
902     if (str == NULL)
903 	return(NULL);
904     uri = xmlCreateURI();
905     if (uri != NULL) {
906 	ret = xmlParse3986URIReference(uri, str);
907         if (ret) {
908 	    xmlFreeURI(uri);
909 	    return(NULL);
910 	}
911     }
912     return(uri);
913 }
914 
915 /**
916  * xmlParseURIReference:
917  * @uri:  pointer to an URI structure
918  * @str:  the string to analyze
919  *
920  * Parse an URI reference string based on RFC 3986 and fills in the
921  * appropriate fields of the @uri structure
922  *
923  * URI-reference = URI / relative-ref
924  *
925  * Returns 0 or the error code
926  */
927 int
xmlParseURIReference(xmlURIPtr uri,const char * str)928 xmlParseURIReference(xmlURIPtr uri, const char *str) {
929     return(xmlParse3986URIReference(uri, str));
930 }
931 
932 /**
933  * xmlParseURIRaw:
934  * @str:  the URI string to analyze
935  * @raw:  if 1 unescaping of URI pieces are disabled
936  *
937  * Parse an URI but allows to keep intact the original fragments.
938  *
939  * URI-reference = URI / relative-ref
940  *
941  * Returns a newly built xmlURIPtr or NULL in case of error
942  */
943 xmlURIPtr
xmlParseURIRaw(const char * str,int raw)944 xmlParseURIRaw(const char *str, int raw) {
945     xmlURIPtr uri;
946     int ret;
947 
948     if (str == NULL)
949 	return(NULL);
950     uri = xmlCreateURI();
951     if (uri != NULL) {
952         if (raw) {
953 	    uri->cleanup |= 2;
954 	}
955 	ret = xmlParseURIReference(uri, str);
956         if (ret) {
957 	    xmlFreeURI(uri);
958 	    return(NULL);
959 	}
960     }
961     return(uri);
962 }
963 
964 /************************************************************************
965  *									*
966  *			Generic URI structure functions			*
967  *									*
968  ************************************************************************/
969 
970 /**
971  * xmlCreateURI:
972  *
973  * Simply creates an empty xmlURI
974  *
975  * Returns the new structure or NULL in case of error
976  */
977 xmlURIPtr
xmlCreateURI(void)978 xmlCreateURI(void) {
979     xmlURIPtr ret;
980 
981     ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
982     if (ret == NULL) {
983 	xmlGenericError(xmlGenericErrorContext,
984 		"xmlCreateURI: out of memory\n");
985 	return(NULL);
986     }
987     memset(ret, 0, sizeof(xmlURI));
988     return(ret);
989 }
990 
991 /**
992  * xmlSaveUri:
993  * @uri:  pointer to an xmlURI
994  *
995  * Save the URI as an escaped string
996  *
997  * Returns a new string (to be deallocated by caller)
998  */
999 xmlChar *
xmlSaveUri(xmlURIPtr uri)1000 xmlSaveUri(xmlURIPtr uri) {
1001     xmlChar *ret = NULL;
1002     xmlChar *temp;
1003     const char *p;
1004     int len;
1005     int max;
1006 
1007     if (uri == NULL) return(NULL);
1008 
1009 
1010     max = 80;
1011     ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
1012     if (ret == NULL) {
1013 	xmlGenericError(xmlGenericErrorContext,
1014 		"xmlSaveUri: out of memory\n");
1015 	return(NULL);
1016     }
1017     len = 0;
1018 
1019     if (uri->scheme != NULL) {
1020 	p = uri->scheme;
1021 	while (*p != 0) {
1022 	    if (len >= max) {
1023 		max *= 2;
1024 		temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1025 		if (temp == NULL) {
1026 		    xmlGenericError(xmlGenericErrorContext,
1027 			    "xmlSaveUri: out of memory\n");
1028 		    xmlFree(ret);
1029 		    return(NULL);
1030 		}
1031 		ret = temp;
1032 	    }
1033 	    ret[len++] = *p++;
1034 	}
1035 	if (len >= max) {
1036 	    max *= 2;
1037 	    temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1038 	    if (temp == NULL) {
1039 		xmlGenericError(xmlGenericErrorContext,
1040 			"xmlSaveUri: out of memory\n");
1041 		xmlFree(ret);
1042 		return(NULL);
1043 	    }
1044 	    ret = temp;
1045 	}
1046 	ret[len++] = ':';
1047     }
1048     if (uri->opaque != NULL) {
1049 	p = uri->opaque;
1050 	while (*p != 0) {
1051 	    if (len + 3 >= max) {
1052 		max *= 2;
1053 		temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1054 		if (temp == NULL) {
1055 		    xmlGenericError(xmlGenericErrorContext,
1056 			    "xmlSaveUri: out of memory\n");
1057 		    xmlFree(ret);
1058 		    return(NULL);
1059 		}
1060 		ret = temp;
1061 	    }
1062 	    if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1063 		ret[len++] = *p++;
1064 	    else {
1065 		int val = *(unsigned char *)p++;
1066 		int hi = val / 0x10, lo = val % 0x10;
1067 		ret[len++] = '%';
1068 		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1069 		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1070 	    }
1071 	}
1072     } else {
1073 	if (uri->server != NULL) {
1074 	    if (len + 3 >= max) {
1075 		max *= 2;
1076 		temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1077 		if (temp == NULL) {
1078 		    xmlGenericError(xmlGenericErrorContext,
1079 			    "xmlSaveUri: out of memory\n");
1080                   xmlFree(ret);
1081 		    return(NULL);
1082 		}
1083 		ret = temp;
1084 	    }
1085 	    ret[len++] = '/';
1086 	    ret[len++] = '/';
1087 	    if (uri->user != NULL) {
1088 		p = uri->user;
1089 		while (*p != 0) {
1090 		    if (len + 3 >= max) {
1091 			max *= 2;
1092 			temp = (xmlChar *) xmlRealloc(ret,
1093 				(max + 1) * sizeof(xmlChar));
1094 			if (temp == NULL) {
1095 			    xmlGenericError(xmlGenericErrorContext,
1096 				    "xmlSaveUri: out of memory\n");
1097 			    xmlFree(ret);
1098 			    return(NULL);
1099 			}
1100 			ret = temp;
1101 		    }
1102 		    if ((IS_UNRESERVED(*(p))) ||
1103 			((*(p) == ';')) || ((*(p) == ':')) ||
1104 			((*(p) == '&')) || ((*(p) == '=')) ||
1105 			((*(p) == '+')) || ((*(p) == '$')) ||
1106 			((*(p) == ',')))
1107 			ret[len++] = *p++;
1108 		    else {
1109 			int val = *(unsigned char *)p++;
1110 			int hi = val / 0x10, lo = val % 0x10;
1111 			ret[len++] = '%';
1112 			ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1113 			ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1114 		    }
1115 		}
1116 		if (len + 3 >= max) {
1117 		    max *= 2;
1118 		    temp = (xmlChar *) xmlRealloc(ret,
1119 			    (max + 1) * sizeof(xmlChar));
1120 		    if (temp == NULL) {
1121 			xmlGenericError(xmlGenericErrorContext,
1122 				"xmlSaveUri: out of memory\n");
1123 			xmlFree(ret);
1124 			return(NULL);
1125 		    }
1126 		    ret = temp;
1127 		}
1128 		ret[len++] = '@';
1129 	    }
1130 	    p = uri->server;
1131 	    while (*p != 0) {
1132 		if (len >= max) {
1133 		    max *= 2;
1134 		    temp = (xmlChar *) xmlRealloc(ret,
1135 			    (max + 1) * sizeof(xmlChar));
1136 		    if (temp == NULL) {
1137 			xmlGenericError(xmlGenericErrorContext,
1138 				"xmlSaveUri: out of memory\n");
1139 			xmlFree(ret);
1140 			return(NULL);
1141 		    }
1142 		    ret = temp;
1143 		}
1144 		ret[len++] = *p++;
1145 	    }
1146 	    if (uri->port > 0) {
1147 		if (len + 10 >= max) {
1148 		    max *= 2;
1149 		    temp = (xmlChar *) xmlRealloc(ret,
1150 			    (max + 1) * sizeof(xmlChar));
1151 		    if (temp == NULL) {
1152 			xmlGenericError(xmlGenericErrorContext,
1153 				"xmlSaveUri: out of memory\n");
1154                      xmlFree(ret);
1155 			return(NULL);
1156 		    }
1157 		    ret = temp;
1158 		}
1159 		len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1160 	    }
1161 	} else if (uri->authority != NULL) {
1162 	    if (len + 3 >= max) {
1163 		max *= 2;
1164 		temp = (xmlChar *) xmlRealloc(ret,
1165 			(max + 1) * sizeof(xmlChar));
1166 		if (temp == NULL) {
1167 			xmlGenericError(xmlGenericErrorContext,
1168 				"xmlSaveUri: out of memory\n");
1169                      xmlFree(ret);
1170 			return(NULL);
1171 		    }
1172 		    ret = temp;
1173 	    }
1174 	    ret[len++] = '/';
1175 	    ret[len++] = '/';
1176 	    p = uri->authority;
1177 	    while (*p != 0) {
1178 		if (len + 3 >= max) {
1179 		    max *= 2;
1180 		    temp = (xmlChar *) xmlRealloc(ret,
1181 			    (max + 1) * sizeof(xmlChar));
1182 		    if (temp == NULL) {
1183 			xmlGenericError(xmlGenericErrorContext,
1184 				"xmlSaveUri: out of memory\n");
1185                      xmlFree(ret);
1186 			return(NULL);
1187 		    }
1188 		    ret = temp;
1189 		}
1190 		if ((IS_UNRESERVED(*(p))) ||
1191                     ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1192                     ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1193                     ((*(p) == '=')) || ((*(p) == '+')))
1194 		    ret[len++] = *p++;
1195 		else {
1196 		    int val = *(unsigned char *)p++;
1197 		    int hi = val / 0x10, lo = val % 0x10;
1198 		    ret[len++] = '%';
1199 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1200 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1201 		}
1202 	    }
1203 	} else if (uri->scheme != NULL) {
1204 	    if (len + 3 >= max) {
1205 		max *= 2;
1206 		temp = (xmlChar *) xmlRealloc(ret,
1207 			(max + 1) * sizeof(xmlChar));
1208 		if (temp == NULL) {
1209 			xmlGenericError(xmlGenericErrorContext,
1210 				"xmlSaveUri: out of memory\n");
1211                      xmlFree(ret);
1212 			return(NULL);
1213 		    }
1214 		    ret = temp;
1215 	    }
1216 	    ret[len++] = '/';
1217 	    ret[len++] = '/';
1218 	}
1219 	if (uri->path != NULL) {
1220 	    p = uri->path;
1221 	    /*
1222 	     * the colon in file:///d: should not be escaped or
1223 	     * Windows accesses fail later.
1224 	     */
1225 	    if ((uri->scheme != NULL) &&
1226 		(p[0] == '/') &&
1227 		(((p[1] >= 'a') && (p[1] <= 'z')) ||
1228 		 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1229 		(p[2] == ':') &&
1230 	        (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1231 		if (len + 3 >= max) {
1232 		    max *= 2;
1233 		    ret = (xmlChar *) xmlRealloc(ret,
1234 			    (max + 1) * sizeof(xmlChar));
1235 		    if (ret == NULL) {
1236 			xmlGenericError(xmlGenericErrorContext,
1237 				"xmlSaveUri: out of memory\n");
1238 			return(NULL);
1239 		    }
1240 		}
1241 		ret[len++] = *p++;
1242 		ret[len++] = *p++;
1243 		ret[len++] = *p++;
1244 	    }
1245 	    while (*p != 0) {
1246 		if (len + 3 >= max) {
1247 		    max *= 2;
1248 		    temp = (xmlChar *) xmlRealloc(ret,
1249 			    (max + 1) * sizeof(xmlChar));
1250 		    if (temp == NULL) {
1251 			xmlGenericError(xmlGenericErrorContext,
1252 				"xmlSaveUri: out of memory\n");
1253                      xmlFree(ret);
1254 			return(NULL);
1255 		    }
1256 		    ret = temp;
1257 		}
1258 		if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1259                     ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1260 	            ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1261 	            ((*(p) == ',')))
1262 		    ret[len++] = *p++;
1263 		else {
1264 		    int val = *(unsigned char *)p++;
1265 		    int hi = val / 0x10, lo = val % 0x10;
1266 		    ret[len++] = '%';
1267 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1268 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1269 		}
1270 	    }
1271 	}
1272 	if (uri->query_raw != NULL) {
1273 	    if (len + 1 >= max) {
1274 		max *= 2;
1275 		temp = (xmlChar *) xmlRealloc(ret,
1276 			(max + 1) * sizeof(xmlChar));
1277 		if (temp == NULL) {
1278 			xmlGenericError(xmlGenericErrorContext,
1279 				"xmlSaveUri: out of memory\n");
1280                      xmlFree(ret);
1281 			return(NULL);
1282 		    }
1283 		    ret = temp;
1284 	    }
1285 	    ret[len++] = '?';
1286 	    p = uri->query_raw;
1287 	    while (*p != 0) {
1288 		if (len + 1 >= max) {
1289 		    max *= 2;
1290 		    temp = (xmlChar *) xmlRealloc(ret,
1291 			    (max + 1) * sizeof(xmlChar));
1292 		    if (temp == NULL) {
1293 			xmlGenericError(xmlGenericErrorContext,
1294 				"xmlSaveUri: out of memory\n");
1295                      xmlFree(ret);
1296 			return(NULL);
1297 		    }
1298 		    ret = temp;
1299 		}
1300 		ret[len++] = *p++;
1301 	    }
1302 	} else if (uri->query != NULL) {
1303 	    if (len + 3 >= max) {
1304 		max *= 2;
1305 		temp = (xmlChar *) xmlRealloc(ret,
1306 			(max + 1) * sizeof(xmlChar));
1307 		if (temp == NULL) {
1308 			xmlGenericError(xmlGenericErrorContext,
1309 				"xmlSaveUri: out of memory\n");
1310                      xmlFree(ret);
1311 			return(NULL);
1312 		    }
1313 		    ret = temp;
1314 	    }
1315 	    ret[len++] = '?';
1316 	    p = uri->query;
1317 	    while (*p != 0) {
1318 		if (len + 3 >= max) {
1319 		    max *= 2;
1320 		    temp = (xmlChar *) xmlRealloc(ret,
1321 			    (max + 1) * sizeof(xmlChar));
1322 		    if (temp == NULL) {
1323 			xmlGenericError(xmlGenericErrorContext,
1324 				"xmlSaveUri: out of memory\n");
1325                      xmlFree(ret);
1326 			return(NULL);
1327 		    }
1328 		    ret = temp;
1329 		}
1330 		if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1331 		    ret[len++] = *p++;
1332 		else {
1333 		    int val = *(unsigned char *)p++;
1334 		    int hi = val / 0x10, lo = val % 0x10;
1335 		    ret[len++] = '%';
1336 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1337 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1338 		}
1339 	    }
1340 	}
1341     }
1342     if (uri->fragment != NULL) {
1343 	if (len + 3 >= max) {
1344 	    max *= 2;
1345 	    temp = (xmlChar *) xmlRealloc(ret,
1346 		    (max + 1) * sizeof(xmlChar));
1347 	    if (temp == NULL) {
1348 			xmlGenericError(xmlGenericErrorContext,
1349 				"xmlSaveUri: out of memory\n");
1350                      xmlFree(ret);
1351 			return(NULL);
1352 		    }
1353 		    ret = temp;
1354 	}
1355 	ret[len++] = '#';
1356 	p = uri->fragment;
1357 	while (*p != 0) {
1358 	    if (len + 3 >= max) {
1359 		max *= 2;
1360 		temp = (xmlChar *) xmlRealloc(ret,
1361 			(max + 1) * sizeof(xmlChar));
1362 		if (temp == NULL) {
1363 			xmlGenericError(xmlGenericErrorContext,
1364 				"xmlSaveUri: out of memory\n");
1365                      xmlFree(ret);
1366 			return(NULL);
1367 		    }
1368 		    ret = temp;
1369 	    }
1370 	    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1371 		ret[len++] = *p++;
1372 	    else {
1373 		int val = *(unsigned char *)p++;
1374 		int hi = val / 0x10, lo = val % 0x10;
1375 		ret[len++] = '%';
1376 		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1377 		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1378 	    }
1379 	}
1380     }
1381     if (len >= max) {
1382 	max *= 2;
1383 	temp = (xmlChar *) xmlRealloc(ret, (max + 1) * sizeof(xmlChar));
1384 	if (temp == NULL) {
1385 			xmlGenericError(xmlGenericErrorContext,
1386 				"xmlSaveUri: out of memory\n");
1387                      xmlFree(ret);
1388 			return(NULL);
1389 		    }
1390 		    ret = temp;
1391     }
1392     ret[len] = 0;
1393     return(ret);
1394 }
1395 
1396 /**
1397  * xmlPrintURI:
1398  * @stream:  a FILE* for the output
1399  * @uri:  pointer to an xmlURI
1400  *
1401  * Prints the URI in the stream @stream.
1402  */
1403 void
xmlPrintURI(FILE * stream,xmlURIPtr uri)1404 xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1405     xmlChar *out;
1406 
1407     out = xmlSaveUri(uri);
1408     if (out != NULL) {
1409 	fprintf(stream, "%s", (char *) out);
1410 	xmlFree(out);
1411     }
1412 }
1413 
1414 /**
1415  * xmlCleanURI:
1416  * @uri:  pointer to an xmlURI
1417  *
1418  * Make sure the xmlURI struct is free of content
1419  */
1420 static void
xmlCleanURI(xmlURIPtr uri)1421 xmlCleanURI(xmlURIPtr uri) {
1422     if (uri == NULL) return;
1423 
1424     if (uri->scheme != NULL) xmlFree(uri->scheme);
1425     uri->scheme = NULL;
1426     if (uri->server != NULL) xmlFree(uri->server);
1427     uri->server = NULL;
1428     if (uri->user != NULL) xmlFree(uri->user);
1429     uri->user = NULL;
1430     if (uri->path != NULL) xmlFree(uri->path);
1431     uri->path = NULL;
1432     if (uri->fragment != NULL) xmlFree(uri->fragment);
1433     uri->fragment = NULL;
1434     if (uri->opaque != NULL) xmlFree(uri->opaque);
1435     uri->opaque = NULL;
1436     if (uri->authority != NULL) xmlFree(uri->authority);
1437     uri->authority = NULL;
1438     if (uri->query != NULL) xmlFree(uri->query);
1439     uri->query = NULL;
1440     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1441     uri->query_raw = NULL;
1442 }
1443 
1444 /**
1445  * xmlFreeURI:
1446  * @uri:  pointer to an xmlURI
1447  *
1448  * Free up the xmlURI struct
1449  */
1450 void
xmlFreeURI(xmlURIPtr uri)1451 xmlFreeURI(xmlURIPtr uri) {
1452     if (uri == NULL) return;
1453 
1454     if (uri->scheme != NULL) xmlFree(uri->scheme);
1455     if (uri->server != NULL) xmlFree(uri->server);
1456     if (uri->user != NULL) xmlFree(uri->user);
1457     if (uri->path != NULL) xmlFree(uri->path);
1458     if (uri->fragment != NULL) xmlFree(uri->fragment);
1459     if (uri->opaque != NULL) xmlFree(uri->opaque);
1460     if (uri->authority != NULL) xmlFree(uri->authority);
1461     if (uri->query != NULL) xmlFree(uri->query);
1462     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1463     xmlFree(uri);
1464 }
1465 
1466 /************************************************************************
1467  *									*
1468  *			Helper functions				*
1469  *									*
1470  ************************************************************************/
1471 
1472 /**
1473  * xmlNormalizeURIPath:
1474  * @path:  pointer to the path string
1475  *
1476  * Applies the 5 normalization steps to a path string--that is, RFC 2396
1477  * Section 5.2, steps 6.c through 6.g.
1478  *
1479  * Normalization occurs directly on the string, no new allocation is done
1480  *
1481  * Returns 0 or an error code
1482  */
1483 int
xmlNormalizeURIPath(char * path)1484 xmlNormalizeURIPath(char *path) {
1485     char *cur, *out;
1486 
1487     if (path == NULL)
1488 	return(-1);
1489 
1490     /* Skip all initial "/" chars.  We want to get to the beginning of the
1491      * first non-empty segment.
1492      */
1493     cur = path;
1494     while (cur[0] == '/')
1495       ++cur;
1496     if (cur[0] == '\0')
1497       return(0);
1498 
1499     /* Keep everything we've seen so far.  */
1500     out = cur;
1501 
1502     /*
1503      * Analyze each segment in sequence for cases (c) and (d).
1504      */
1505     while (cur[0] != '\0') {
1506 	/*
1507 	 * c) All occurrences of "./", where "." is a complete path segment,
1508 	 *    are removed from the buffer string.
1509 	 */
1510 	if ((cur[0] == '.') && (cur[1] == '/')) {
1511 	    cur += 2;
1512 	    /* '//' normalization should be done at this point too */
1513 	    while (cur[0] == '/')
1514 		cur++;
1515 	    continue;
1516 	}
1517 
1518 	/*
1519 	 * d) If the buffer string ends with "." as a complete path segment,
1520 	 *    that "." is removed.
1521 	 */
1522 	if ((cur[0] == '.') && (cur[1] == '\0'))
1523 	    break;
1524 
1525 	/* Otherwise keep the segment.  */
1526 	while (cur[0] != '/') {
1527             if (cur[0] == '\0')
1528               goto done_cd;
1529 	    (out++)[0] = (cur++)[0];
1530 	}
1531 	/* nomalize // */
1532 	while ((cur[0] == '/') && (cur[1] == '/'))
1533 	    cur++;
1534 
1535         (out++)[0] = (cur++)[0];
1536     }
1537  done_cd:
1538     out[0] = '\0';
1539 
1540     /* Reset to the beginning of the first segment for the next sequence.  */
1541     cur = path;
1542     while (cur[0] == '/')
1543       ++cur;
1544     if (cur[0] == '\0')
1545 	return(0);
1546 
1547     /*
1548      * Analyze each segment in sequence for cases (e) and (f).
1549      *
1550      * e) All occurrences of "<segment>/../", where <segment> is a
1551      *    complete path segment not equal to "..", are removed from the
1552      *    buffer string.  Removal of these path segments is performed
1553      *    iteratively, removing the leftmost matching pattern on each
1554      *    iteration, until no matching pattern remains.
1555      *
1556      * f) If the buffer string ends with "<segment>/..", where <segment>
1557      *    is a complete path segment not equal to "..", that
1558      *    "<segment>/.." is removed.
1559      *
1560      * To satisfy the "iterative" clause in (e), we need to collapse the
1561      * string every time we find something that needs to be removed.  Thus,
1562      * we don't need to keep two pointers into the string: we only need a
1563      * "current position" pointer.
1564      */
1565     while (1) {
1566         char *segp, *tmp;
1567 
1568         /* At the beginning of each iteration of this loop, "cur" points to
1569          * the first character of the segment we want to examine.
1570          */
1571 
1572         /* Find the end of the current segment.  */
1573         segp = cur;
1574         while ((segp[0] != '/') && (segp[0] != '\0'))
1575           ++segp;
1576 
1577         /* If this is the last segment, we're done (we need at least two
1578          * segments to meet the criteria for the (e) and (f) cases).
1579          */
1580         if (segp[0] == '\0')
1581           break;
1582 
1583         /* If the first segment is "..", or if the next segment _isn't_ "..",
1584          * keep this segment and try the next one.
1585          */
1586         ++segp;
1587         if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1588             || ((segp[0] != '.') || (segp[1] != '.')
1589                 || ((segp[2] != '/') && (segp[2] != '\0')))) {
1590           cur = segp;
1591           continue;
1592         }
1593 
1594         /* If we get here, remove this segment and the next one and back up
1595          * to the previous segment (if there is one), to implement the
1596          * "iteratively" clause.  It's pretty much impossible to back up
1597          * while maintaining two pointers into the buffer, so just compact
1598          * the whole buffer now.
1599          */
1600 
1601         /* If this is the end of the buffer, we're done.  */
1602         if (segp[2] == '\0') {
1603           cur[0] = '\0';
1604           break;
1605         }
1606         /* Valgrind complained, strcpy(cur, segp + 3); */
1607 	/* string will overlap, do not use strcpy */
1608 	tmp = cur;
1609 	segp += 3;
1610 	while ((*tmp++ = *segp++) != 0);
1611 
1612         /* If there are no previous segments, then keep going from here.  */
1613         segp = cur;
1614         while ((segp > path) && ((--segp)[0] == '/'))
1615           ;
1616         if (segp == path)
1617           continue;
1618 
1619         /* "segp" is pointing to the end of a previous segment; find it's
1620          * start.  We need to back up to the previous segment and start
1621          * over with that to handle things like "foo/bar/../..".  If we
1622          * don't do this, then on the first pass we'll remove the "bar/..",
1623          * but be pointing at the second ".." so we won't realize we can also
1624          * remove the "foo/..".
1625          */
1626         cur = segp;
1627         while ((cur > path) && (cur[-1] != '/'))
1628           --cur;
1629     }
1630     out[0] = '\0';
1631 
1632     /*
1633      * g) If the resulting buffer string still begins with one or more
1634      *    complete path segments of "..", then the reference is
1635      *    considered to be in error. Implementations may handle this
1636      *    error by retaining these components in the resolved path (i.e.,
1637      *    treating them as part of the final URI), by removing them from
1638      *    the resolved path (i.e., discarding relative levels above the
1639      *    root), or by avoiding traversal of the reference.
1640      *
1641      * We discard them from the final path.
1642      */
1643     if (path[0] == '/') {
1644       cur = path;
1645       while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1646              && ((cur[3] == '/') || (cur[3] == '\0')))
1647 	cur += 3;
1648 
1649       if (cur != path) {
1650 	out = path;
1651 	while (cur[0] != '\0')
1652           (out++)[0] = (cur++)[0];
1653 	out[0] = 0;
1654       }
1655     }
1656 
1657     return(0);
1658 }
1659 
is_hex(char c)1660 static int is_hex(char c) {
1661     if (((c >= '0') && (c <= '9')) ||
1662         ((c >= 'a') && (c <= 'f')) ||
1663         ((c >= 'A') && (c <= 'F')))
1664 	return(1);
1665     return(0);
1666 }
1667 
1668 /**
1669  * xmlURIUnescapeString:
1670  * @str:  the string to unescape
1671  * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
1672  * @target:  optional destination buffer
1673  *
1674  * Unescaping routine, but does not check that the string is an URI. The
1675  * output is a direct unsigned char translation of %XX values (no encoding)
1676  * Note that the length of the result can only be smaller or same size as
1677  * the input string.
1678  *
1679  * Returns a copy of the string, but unescaped, will return NULL only in case
1680  * of error
1681  */
1682 char *
xmlURIUnescapeString(const char * str,int len,char * target)1683 xmlURIUnescapeString(const char *str, int len, char *target) {
1684     char *ret, *out;
1685     const char *in;
1686 
1687     if (str == NULL)
1688 	return(NULL);
1689     if (len <= 0) len = strlen(str);
1690     if (len < 0) return(NULL);
1691 
1692     if (target == NULL) {
1693 	ret = (char *) xmlMallocAtomic(len + 1);
1694 	if (ret == NULL) {
1695 	    xmlGenericError(xmlGenericErrorContext,
1696 		    "xmlURIUnescapeString: out of memory\n");
1697 	    return(NULL);
1698 	}
1699     } else
1700 	ret = target;
1701     in = str;
1702     out = ret;
1703     while(len > 0) {
1704 	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1705 	    in++;
1706 	    if ((*in >= '0') && (*in <= '9'))
1707 	        *out = (*in - '0');
1708 	    else if ((*in >= 'a') && (*in <= 'f'))
1709 	        *out = (*in - 'a') + 10;
1710 	    else if ((*in >= 'A') && (*in <= 'F'))
1711 	        *out = (*in - 'A') + 10;
1712 	    in++;
1713 	    if ((*in >= '0') && (*in <= '9'))
1714 	        *out = *out * 16 + (*in - '0');
1715 	    else if ((*in >= 'a') && (*in <= 'f'))
1716 	        *out = *out * 16 + (*in - 'a') + 10;
1717 	    else if ((*in >= 'A') && (*in <= 'F'))
1718 	        *out = *out * 16 + (*in - 'A') + 10;
1719 	    in++;
1720 	    len -= 3;
1721 	    out++;
1722 	} else {
1723 	    *out++ = *in++;
1724 	    len--;
1725 	}
1726     }
1727     *out = 0;
1728     return(ret);
1729 }
1730 
1731 /**
1732  * xmlURIEscapeStr:
1733  * @str:  string to escape
1734  * @list: exception list string of chars not to escape
1735  *
1736  * This routine escapes a string to hex, ignoring reserved characters (a-z)
1737  * and the characters in the exception list.
1738  *
1739  * Returns a new escaped string or NULL in case of error.
1740  */
1741 xmlChar *
xmlURIEscapeStr(const xmlChar * str,const xmlChar * list)1742 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1743     xmlChar *ret, ch;
1744     xmlChar *temp;
1745     const xmlChar *in;
1746 
1747     unsigned int len, out;
1748 
1749     if (str == NULL)
1750 	return(NULL);
1751     if (str[0] == 0)
1752 	return(xmlStrdup(str));
1753     len = xmlStrlen(str);
1754     if (!(len > 0)) return(NULL);
1755 
1756     len += 20;
1757     ret = (xmlChar *) xmlMallocAtomic(len);
1758     if (ret == NULL) {
1759 	xmlGenericError(xmlGenericErrorContext,
1760 		"xmlURIEscapeStr: out of memory\n");
1761 	return(NULL);
1762     }
1763     in = (const xmlChar *) str;
1764     out = 0;
1765     while(*in != 0) {
1766 	if (len - out <= 3) {
1767 	    len += 20;
1768 	    temp = (xmlChar *) xmlRealloc(ret, len);
1769 	    if (temp == NULL) {
1770 		xmlGenericError(xmlGenericErrorContext,
1771 			"xmlURIEscapeStr: out of memory\n");
1772 		xmlFree(ret);
1773 		return(NULL);
1774 	    }
1775 	    ret = temp;
1776 	}
1777 
1778 	ch = *in;
1779 
1780 	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1781 	    unsigned char val;
1782 	    ret[out++] = '%';
1783 	    val = ch >> 4;
1784 	    if (val <= 9)
1785 		ret[out++] = '0' + val;
1786 	    else
1787 		ret[out++] = 'A' + val - 0xA;
1788 	    val = ch & 0xF;
1789 	    if (val <= 9)
1790 		ret[out++] = '0' + val;
1791 	    else
1792 		ret[out++] = 'A' + val - 0xA;
1793 	    in++;
1794 	} else {
1795 	    ret[out++] = *in++;
1796 	}
1797 
1798     }
1799     ret[out] = 0;
1800     return(ret);
1801 }
1802 
1803 /**
1804  * xmlURIEscape:
1805  * @str:  the string of the URI to escape
1806  *
1807  * Escaping routine, does not do validity checks !
1808  * It will try to escape the chars needing this, but this is heuristic
1809  * based it's impossible to be sure.
1810  *
1811  * Returns an copy of the string, but escaped
1812  *
1813  * 25 May 2001
1814  * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1815  * according to RFC2396.
1816  *   - Carl Douglas
1817  */
1818 xmlChar *
xmlURIEscape(const xmlChar * str)1819 xmlURIEscape(const xmlChar * str)
1820 {
1821     xmlChar *ret, *segment = NULL;
1822     xmlURIPtr uri;
1823     int ret2;
1824 
1825 #define NULLCHK(p) if(!p) { \
1826                    xmlGenericError(xmlGenericErrorContext, \
1827                         "xmlURIEscape: out of memory\n"); \
1828                         xmlFreeURI(uri); \
1829                         return NULL; } \
1830 
1831     if (str == NULL)
1832         return (NULL);
1833 
1834     uri = xmlCreateURI();
1835     if (uri != NULL) {
1836 	/*
1837 	 * Allow escaping errors in the unescaped form
1838 	 */
1839         uri->cleanup = 1;
1840         ret2 = xmlParseURIReference(uri, (const char *)str);
1841         if (ret2) {
1842             xmlFreeURI(uri);
1843             return (NULL);
1844         }
1845     }
1846 
1847     if (!uri)
1848         return NULL;
1849 
1850     ret = NULL;
1851 
1852     if (uri->scheme) {
1853         segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1854         NULLCHK(segment)
1855         ret = xmlStrcat(ret, segment);
1856         ret = xmlStrcat(ret, BAD_CAST ":");
1857         xmlFree(segment);
1858     }
1859 
1860     if (uri->authority) {
1861         segment =
1862             xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1863         NULLCHK(segment)
1864         ret = xmlStrcat(ret, BAD_CAST "//");
1865         ret = xmlStrcat(ret, segment);
1866         xmlFree(segment);
1867     }
1868 
1869     if (uri->user) {
1870         segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1871         NULLCHK(segment)
1872 		ret = xmlStrcat(ret,BAD_CAST "//");
1873         ret = xmlStrcat(ret, segment);
1874         ret = xmlStrcat(ret, BAD_CAST "@");
1875         xmlFree(segment);
1876     }
1877 
1878     if (uri->server) {
1879         segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1880         NULLCHK(segment)
1881 		if (uri->user == NULL)
1882 		ret = xmlStrcat(ret, BAD_CAST "//");
1883         ret = xmlStrcat(ret, segment);
1884         xmlFree(segment);
1885     }
1886 
1887     if (uri->port) {
1888         xmlChar port[10];
1889 
1890         snprintf((char *) port, 10, "%d", uri->port);
1891         ret = xmlStrcat(ret, BAD_CAST ":");
1892         ret = xmlStrcat(ret, port);
1893     }
1894 
1895     if (uri->path) {
1896         segment =
1897             xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1898         NULLCHK(segment)
1899         ret = xmlStrcat(ret, segment);
1900         xmlFree(segment);
1901     }
1902 
1903     if (uri->query_raw) {
1904         ret = xmlStrcat(ret, BAD_CAST "?");
1905         ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1906     }
1907     else if (uri->query) {
1908         segment =
1909             xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1910         NULLCHK(segment)
1911         ret = xmlStrcat(ret, BAD_CAST "?");
1912         ret = xmlStrcat(ret, segment);
1913         xmlFree(segment);
1914     }
1915 
1916     if (uri->opaque) {
1917         segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1918         NULLCHK(segment)
1919         ret = xmlStrcat(ret, segment);
1920         xmlFree(segment);
1921     }
1922 
1923     if (uri->fragment) {
1924         segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1925         NULLCHK(segment)
1926         ret = xmlStrcat(ret, BAD_CAST "#");
1927         ret = xmlStrcat(ret, segment);
1928         xmlFree(segment);
1929     }
1930 
1931     xmlFreeURI(uri);
1932 #undef NULLCHK
1933 
1934     return (ret);
1935 }
1936 
1937 /************************************************************************
1938  *									*
1939  *			Public functions				*
1940  *									*
1941  ************************************************************************/
1942 
1943 /**
1944  * xmlBuildURI:
1945  * @URI:  the URI instance found in the document
1946  * @base:  the base value
1947  *
1948  * Computes he final URI of the reference done by checking that
1949  * the given URI is valid, and building the final URI using the
1950  * base URI. This is processed according to section 5.2 of the
1951  * RFC 2396
1952  *
1953  * 5.2. Resolving Relative References to Absolute Form
1954  *
1955  * Returns a new URI string (to be freed by the caller) or NULL in case
1956  *         of error.
1957  */
1958 xmlChar *
xmlBuildURI(const xmlChar * URI,const xmlChar * base)1959 xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1960     xmlChar *val = NULL;
1961     int ret, len, indx, cur, out;
1962     xmlURIPtr ref = NULL;
1963     xmlURIPtr bas = NULL;
1964     xmlURIPtr res = NULL;
1965 
1966     /*
1967      * 1) The URI reference is parsed into the potential four components and
1968      *    fragment identifier, as described in Section 4.3.
1969      *
1970      *    NOTE that a completely empty URI is treated by modern browsers
1971      *    as a reference to "." rather than as a synonym for the current
1972      *    URI.  Should we do that here?
1973      */
1974     if (URI == NULL)
1975 	ret = -1;
1976     else {
1977 	if (*URI) {
1978 	    ref = xmlCreateURI();
1979 	    if (ref == NULL)
1980 		goto done;
1981 	    ret = xmlParseURIReference(ref, (const char *) URI);
1982 	}
1983 	else
1984 	    ret = 0;
1985     }
1986     if (ret != 0)
1987 	goto done;
1988     if ((ref != NULL) && (ref->scheme != NULL)) {
1989 	/*
1990 	 * The URI is absolute don't modify.
1991 	 */
1992 	val = xmlStrdup(URI);
1993 	goto done;
1994     }
1995     if (base == NULL)
1996 	ret = -1;
1997     else {
1998 	bas = xmlCreateURI();
1999 	if (bas == NULL)
2000 	    goto done;
2001 	ret = xmlParseURIReference(bas, (const char *) base);
2002     }
2003     if (ret != 0) {
2004 	if (ref)
2005 	    val = xmlSaveUri(ref);
2006 	goto done;
2007     }
2008     if (ref == NULL) {
2009 	/*
2010 	 * the base fragment must be ignored
2011 	 */
2012 	if (bas->fragment != NULL) {
2013 	    xmlFree(bas->fragment);
2014 	    bas->fragment = NULL;
2015 	}
2016 	val = xmlSaveUri(bas);
2017 	goto done;
2018     }
2019 
2020     /*
2021      * 2) If the path component is empty and the scheme, authority, and
2022      *    query components are undefined, then it is a reference to the
2023      *    current document and we are done.  Otherwise, the reference URI's
2024      *    query and fragment components are defined as found (or not found)
2025      *    within the URI reference and not inherited from the base URI.
2026      *
2027      *    NOTE that in modern browsers, the parsing differs from the above
2028      *    in the following aspect:  the query component is allowed to be
2029      *    defined while still treating this as a reference to the current
2030      *    document.
2031      */
2032     res = xmlCreateURI();
2033     if (res == NULL)
2034 	goto done;
2035     if ((ref->scheme == NULL) && (ref->path == NULL) &&
2036 	((ref->authority == NULL) && (ref->server == NULL))) {
2037 	if (bas->scheme != NULL)
2038 	    res->scheme = xmlMemStrdup(bas->scheme);
2039 	if (bas->authority != NULL)
2040 	    res->authority = xmlMemStrdup(bas->authority);
2041 	else if (bas->server != NULL) {
2042 	    res->server = xmlMemStrdup(bas->server);
2043 	    if (bas->user != NULL)
2044 		res->user = xmlMemStrdup(bas->user);
2045 	    res->port = bas->port;
2046 	}
2047 	if (bas->path != NULL)
2048 	    res->path = xmlMemStrdup(bas->path);
2049 	if (ref->query_raw != NULL)
2050 	    res->query_raw = xmlMemStrdup (ref->query_raw);
2051 	else if (ref->query != NULL)
2052 	    res->query = xmlMemStrdup(ref->query);
2053 	else if (bas->query_raw != NULL)
2054 	    res->query_raw = xmlMemStrdup(bas->query_raw);
2055 	else if (bas->query != NULL)
2056 	    res->query = xmlMemStrdup(bas->query);
2057 	if (ref->fragment != NULL)
2058 	    res->fragment = xmlMemStrdup(ref->fragment);
2059 	goto step_7;
2060     }
2061 
2062     /*
2063      * 3) If the scheme component is defined, indicating that the reference
2064      *    starts with a scheme name, then the reference is interpreted as an
2065      *    absolute URI and we are done.  Otherwise, the reference URI's
2066      *    scheme is inherited from the base URI's scheme component.
2067      */
2068     if (ref->scheme != NULL) {
2069 	val = xmlSaveUri(ref);
2070 	goto done;
2071     }
2072     if (bas->scheme != NULL)
2073 	res->scheme = xmlMemStrdup(bas->scheme);
2074 
2075     if (ref->query_raw != NULL)
2076 	res->query_raw = xmlMemStrdup(ref->query_raw);
2077     else if (ref->query != NULL)
2078 	res->query = xmlMemStrdup(ref->query);
2079     if (ref->fragment != NULL)
2080 	res->fragment = xmlMemStrdup(ref->fragment);
2081 
2082     /*
2083      * 4) If the authority component is defined, then the reference is a
2084      *    network-path and we skip to step 7.  Otherwise, the reference
2085      *    URI's authority is inherited from the base URI's authority
2086      *    component, which will also be undefined if the URI scheme does not
2087      *    use an authority component.
2088      */
2089     if ((ref->authority != NULL) || (ref->server != NULL)) {
2090 	if (ref->authority != NULL)
2091 	    res->authority = xmlMemStrdup(ref->authority);
2092 	else {
2093 	    res->server = xmlMemStrdup(ref->server);
2094 	    if (ref->user != NULL)
2095 		res->user = xmlMemStrdup(ref->user);
2096             res->port = ref->port;
2097 	}
2098 	if (ref->path != NULL)
2099 	    res->path = xmlMemStrdup(ref->path);
2100 	goto step_7;
2101     }
2102     if (bas->authority != NULL)
2103 	res->authority = xmlMemStrdup(bas->authority);
2104     else if (bas->server != NULL) {
2105 	res->server = xmlMemStrdup(bas->server);
2106 	if (bas->user != NULL)
2107 	    res->user = xmlMemStrdup(bas->user);
2108 	res->port = bas->port;
2109     }
2110 
2111     /*
2112      * 5) If the path component begins with a slash character ("/"), then
2113      *    the reference is an absolute-path and we skip to step 7.
2114      */
2115     if ((ref->path != NULL) && (ref->path[0] == '/')) {
2116 	res->path = xmlMemStrdup(ref->path);
2117 	goto step_7;
2118     }
2119 
2120 
2121     /*
2122      * 6) If this step is reached, then we are resolving a relative-path
2123      *    reference.  The relative path needs to be merged with the base
2124      *    URI's path.  Although there are many ways to do this, we will
2125      *    describe a simple method using a separate string buffer.
2126      *
2127      * Allocate a buffer large enough for the result string.
2128      */
2129     len = 2; /* extra / and 0 */
2130     if (ref->path != NULL)
2131 	len += strlen(ref->path);
2132     if (bas->path != NULL)
2133 	len += strlen(bas->path);
2134     res->path = (char *) xmlMallocAtomic(len);
2135     if (res->path == NULL) {
2136 	xmlGenericError(xmlGenericErrorContext,
2137 		"xmlBuildURI: out of memory\n");
2138 	goto done;
2139     }
2140     res->path[0] = 0;
2141 
2142     /*
2143      * a) All but the last segment of the base URI's path component is
2144      *    copied to the buffer.  In other words, any characters after the
2145      *    last (right-most) slash character, if any, are excluded.
2146      */
2147     cur = 0;
2148     out = 0;
2149     if (bas->path != NULL) {
2150 	while (bas->path[cur] != 0) {
2151 	    while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2152 		cur++;
2153 	    if (bas->path[cur] == 0)
2154 		break;
2155 
2156 	    cur++;
2157 	    while (out < cur) {
2158 		res->path[out] = bas->path[out];
2159 		out++;
2160 	    }
2161 	}
2162     }
2163     res->path[out] = 0;
2164 
2165     /*
2166      * b) The reference's path component is appended to the buffer
2167      *    string.
2168      */
2169     if (ref->path != NULL && ref->path[0] != 0) {
2170 	indx = 0;
2171 	/*
2172 	 * Ensure the path includes a '/'
2173 	 */
2174 	if ((out == 0) && (bas->server != NULL))
2175 	    res->path[out++] = '/';
2176 	while (ref->path[indx] != 0) {
2177 	    res->path[out++] = ref->path[indx++];
2178 	}
2179     }
2180     res->path[out] = 0;
2181 
2182     /*
2183      * Steps c) to h) are really path normalization steps
2184      */
2185     xmlNormalizeURIPath(res->path);
2186 
2187 step_7:
2188 
2189     /*
2190      * 7) The resulting URI components, including any inherited from the
2191      *    base URI, are recombined to give the absolute form of the URI
2192      *    reference.
2193      */
2194     val = xmlSaveUri(res);
2195 
2196 done:
2197     if (ref != NULL)
2198 	xmlFreeURI(ref);
2199     if (bas != NULL)
2200 	xmlFreeURI(bas);
2201     if (res != NULL)
2202 	xmlFreeURI(res);
2203     return(val);
2204 }
2205 
2206 /**
2207  * xmlBuildRelativeURI:
2208  * @URI:  the URI reference under consideration
2209  * @base:  the base value
2210  *
2211  * Expresses the URI of the reference in terms relative to the
2212  * base.  Some examples of this operation include:
2213  *     base = "http://site1.com/docs/book1.html"
2214  *        URI input                        URI returned
2215  *     docs/pic1.gif                    pic1.gif
2216  *     docs/img/pic1.gif                img/pic1.gif
2217  *     img/pic1.gif                     ../img/pic1.gif
2218  *     http://site1.com/docs/pic1.gif   pic1.gif
2219  *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
2220  *
2221  *     base = "docs/book1.html"
2222  *        URI input                        URI returned
2223  *     docs/pic1.gif                    pic1.gif
2224  *     docs/img/pic1.gif                img/pic1.gif
2225  *     img/pic1.gif                     ../img/pic1.gif
2226  *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
2227  *
2228  *
2229  * Note: if the URI reference is really wierd or complicated, it may be
2230  *       worthwhile to first convert it into a "nice" one by calling
2231  *       xmlBuildURI (using 'base') before calling this routine,
2232  *       since this routine (for reasonable efficiency) assumes URI has
2233  *       already been through some validation.
2234  *
2235  * Returns a new URI string (to be freed by the caller) or NULL in case
2236  * error.
2237  */
2238 xmlChar *
xmlBuildRelativeURI(const xmlChar * URI,const xmlChar * base)2239 xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2240 {
2241     xmlChar *val = NULL;
2242     int ret;
2243     int ix;
2244     int pos = 0;
2245     int nbslash = 0;
2246     int len;
2247     xmlURIPtr ref = NULL;
2248     xmlURIPtr bas = NULL;
2249     xmlChar *bptr, *uptr, *vptr;
2250     int remove_path = 0;
2251 
2252     if ((URI == NULL) || (*URI == 0))
2253 	return NULL;
2254 
2255     /*
2256      * First parse URI into a standard form
2257      */
2258     ref = xmlCreateURI ();
2259     if (ref == NULL)
2260 	return NULL;
2261     /* If URI not already in "relative" form */
2262     if (URI[0] != '.') {
2263 	ret = xmlParseURIReference (ref, (const char *) URI);
2264 	if (ret != 0)
2265 	    goto done;		/* Error in URI, return NULL */
2266     } else
2267 	ref->path = (char *)xmlStrdup(URI);
2268 
2269     /*
2270      * Next parse base into the same standard form
2271      */
2272     if ((base == NULL) || (*base == 0)) {
2273 	val = xmlStrdup (URI);
2274 	goto done;
2275     }
2276     bas = xmlCreateURI ();
2277     if (bas == NULL)
2278 	goto done;
2279     if (base[0] != '.') {
2280 	ret = xmlParseURIReference (bas, (const char *) base);
2281 	if (ret != 0)
2282 	    goto done;		/* Error in base, return NULL */
2283     } else
2284 	bas->path = (char *)xmlStrdup(base);
2285 
2286     /*
2287      * If the scheme / server on the URI differs from the base,
2288      * just return the URI
2289      */
2290     if ((ref->scheme != NULL) &&
2291 	((bas->scheme == NULL) ||
2292 	 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2293 	 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2294 	val = xmlStrdup (URI);
2295 	goto done;
2296     }
2297     if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2298 	val = xmlStrdup(BAD_CAST "");
2299 	goto done;
2300     }
2301     if (bas->path == NULL) {
2302 	val = xmlStrdup((xmlChar *)ref->path);
2303 	goto done;
2304     }
2305     if (ref->path == NULL) {
2306         ref->path = (char *) "/";
2307 	remove_path = 1;
2308     }
2309 
2310     /*
2311      * At this point (at last!) we can compare the two paths
2312      *
2313      * First we take care of the special case where either of the
2314      * two path components may be missing (bug 316224)
2315      */
2316     if (bas->path == NULL) {
2317 	if (ref->path != NULL) {
2318 	    uptr = (xmlChar *) ref->path;
2319 	    if (*uptr == '/')
2320 		uptr++;
2321 	    /* exception characters from xmlSaveUri */
2322 	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2323 	}
2324 	goto done;
2325     }
2326     bptr = (xmlChar *)bas->path;
2327     if (ref->path == NULL) {
2328 	for (ix = 0; bptr[ix] != 0; ix++) {
2329 	    if (bptr[ix] == '/')
2330 		nbslash++;
2331 	}
2332 	uptr = NULL;
2333 	len = 1;	/* this is for a string terminator only */
2334     } else {
2335     /*
2336      * Next we compare the two strings and find where they first differ
2337      */
2338 	if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
2339             pos += 2;
2340 	if ((*bptr == '.') && (bptr[1] == '/'))
2341             bptr += 2;
2342 	else if ((*bptr == '/') && (ref->path[pos] != '/'))
2343 	    bptr++;
2344 	while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
2345 	    pos++;
2346 
2347 	if (bptr[pos] == ref->path[pos]) {
2348 	    val = xmlStrdup(BAD_CAST "");
2349 	    goto done;		/* (I can't imagine why anyone would do this) */
2350 	}
2351 
2352 	/*
2353 	 * In URI, "back up" to the last '/' encountered.  This will be the
2354 	 * beginning of the "unique" suffix of URI
2355 	 */
2356 	ix = pos;
2357 	if ((ref->path[ix] == '/') && (ix > 0))
2358 	    ix--;
2359 	else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
2360 	    ix -= 2;
2361 	for (; ix > 0; ix--) {
2362 	    if (ref->path[ix] == '/')
2363 		break;
2364 	}
2365 	if (ix == 0) {
2366 	    uptr = (xmlChar *)ref->path;
2367 	} else {
2368 	    ix++;
2369 	    uptr = (xmlChar *)&ref->path[ix];
2370 	}
2371 
2372 	/*
2373 	 * In base, count the number of '/' from the differing point
2374 	 */
2375 	if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
2376 	    for (; bptr[ix] != 0; ix++) {
2377 		if (bptr[ix] == '/')
2378 		    nbslash++;
2379 	    }
2380 	}
2381 	len = xmlStrlen (uptr) + 1;
2382     }
2383 
2384     if (nbslash == 0) {
2385 	if (uptr != NULL)
2386 	    /* exception characters from xmlSaveUri */
2387 	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2388 	goto done;
2389     }
2390 
2391     /*
2392      * Allocate just enough space for the returned string -
2393      * length of the remainder of the URI, plus enough space
2394      * for the "../" groups, plus one for the terminator
2395      */
2396     val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2397     if (val == NULL) {
2398 	xmlGenericError(xmlGenericErrorContext,
2399 		"xmlBuildRelativeURI: out of memory\n");
2400 	goto done;
2401     }
2402     vptr = val;
2403     /*
2404      * Put in as many "../" as needed
2405      */
2406     for (; nbslash>0; nbslash--) {
2407 	*vptr++ = '.';
2408 	*vptr++ = '.';
2409 	*vptr++ = '/';
2410     }
2411     /*
2412      * Finish up with the end of the URI
2413      */
2414     if (uptr != NULL) {
2415         if ((vptr > val) && (len > 0) &&
2416 	    (uptr[0] == '/') && (vptr[-1] == '/')) {
2417 	    memcpy (vptr, uptr + 1, len - 1);
2418 	    vptr[len - 2] = 0;
2419 	} else {
2420 	    memcpy (vptr, uptr, len);
2421 	    vptr[len - 1] = 0;
2422 	}
2423     } else {
2424 	vptr[len - 1] = 0;
2425     }
2426 
2427     /* escape the freshly-built path */
2428     vptr = val;
2429 	/* exception characters from xmlSaveUri */
2430     val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2431     xmlFree(vptr);
2432 
2433 done:
2434     /*
2435      * Free the working variables
2436      */
2437     if (remove_path != 0)
2438         ref->path = NULL;
2439     if (ref != NULL)
2440 	xmlFreeURI (ref);
2441     if (bas != NULL)
2442 	xmlFreeURI (bas);
2443 
2444     return val;
2445 }
2446 
2447 /**
2448  * xmlCanonicPath:
2449  * @path:  the resource locator in a filesystem notation
2450  *
2451  * Constructs a canonic path from the specified path.
2452  *
2453  * Returns a new canonic path, or a duplicate of the path parameter if the
2454  * construction fails. The caller is responsible for freeing the memory occupied
2455  * by the returned string. If there is insufficient memory available, or the
2456  * argument is NULL, the function returns NULL.
2457  */
2458 #define IS_WINDOWS_PATH(p) 					\
2459 	((p != NULL) &&						\
2460 	 (((p[0] >= 'a') && (p[0] <= 'z')) ||			\
2461 	  ((p[0] >= 'A') && (p[0] <= 'Z'))) &&			\
2462 	 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2463 xmlChar *
xmlCanonicPath(const xmlChar * path)2464 xmlCanonicPath(const xmlChar *path)
2465 {
2466 /*
2467  * For Windows implementations, additional work needs to be done to
2468  * replace backslashes in pathnames with "forward slashes"
2469  */
2470 #if defined(_WIN32) && !defined(__CYGWIN__)
2471     int len = 0;
2472     int i = 0;
2473     xmlChar *p = NULL;
2474 #endif
2475     xmlURIPtr uri;
2476     xmlChar *ret;
2477     const xmlChar *absuri;
2478 
2479     if (path == NULL)
2480 	return(NULL);
2481 
2482     /* sanitize filename starting with // so it can be used as URI */
2483     if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2484         path++;
2485 
2486     if ((uri = xmlParseURI((const char *) path)) != NULL) {
2487 	xmlFreeURI(uri);
2488 	return xmlStrdup(path);
2489     }
2490 
2491     /* Check if this is an "absolute uri" */
2492     absuri = xmlStrstr(path, BAD_CAST "://");
2493     if (absuri != NULL) {
2494         int l, j;
2495 	unsigned char c;
2496 	xmlChar *escURI;
2497 
2498         /*
2499 	 * this looks like an URI where some parts have not been
2500 	 * escaped leading to a parsing problem.  Check that the first
2501 	 * part matches a protocol.
2502 	 */
2503 	l = absuri - path;
2504 	/* Bypass if first part (part before the '://') is > 20 chars */
2505 	if ((l <= 0) || (l > 20))
2506 	    goto path_processing;
2507 	/* Bypass if any non-alpha characters are present in first part */
2508 	for (j = 0;j < l;j++) {
2509 	    c = path[j];
2510 	    if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2511 	        goto path_processing;
2512 	}
2513 
2514 	/* Escape all except the characters specified in the supplied path */
2515         escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2516 	if (escURI != NULL) {
2517 	    /* Try parsing the escaped path */
2518 	    uri = xmlParseURI((const char *) escURI);
2519 	    /* If successful, return the escaped string */
2520 	    if (uri != NULL) {
2521 	        xmlFreeURI(uri);
2522 		return escURI;
2523 	    }
2524 	}
2525     }
2526 
2527 path_processing:
2528 /* For Windows implementations, replace backslashes with 'forward slashes' */
2529 #if defined(_WIN32) && !defined(__CYGWIN__)
2530     /*
2531      * Create a URI structure
2532      */
2533     uri = xmlCreateURI();
2534     if (uri == NULL) {		/* Guard against 'out of memory' */
2535         return(NULL);
2536     }
2537 
2538     len = xmlStrlen(path);
2539     if ((len > 2) && IS_WINDOWS_PATH(path)) {
2540         /* make the scheme 'file' */
2541 	uri->scheme = xmlStrdup(BAD_CAST "file");
2542 	/* allocate space for leading '/' + path + string terminator */
2543 	uri->path = xmlMallocAtomic(len + 2);
2544 	if (uri->path == NULL) {
2545 	    xmlFreeURI(uri);	/* Guard agains 'out of memory' */
2546 	    return(NULL);
2547 	}
2548 	/* Put in leading '/' plus path */
2549 	uri->path[0] = '/';
2550 	p = uri->path + 1;
2551 	strncpy(p, path, len + 1);
2552     } else {
2553 	uri->path = xmlStrdup(path);
2554 	if (uri->path == NULL) {
2555 	    xmlFreeURI(uri);
2556 	    return(NULL);
2557 	}
2558 	p = uri->path;
2559     }
2560     /* Now change all occurences of '\' to '/' */
2561     while (*p != '\0') {
2562 	if (*p == '\\')
2563 	    *p = '/';
2564 	p++;
2565     }
2566 
2567     if (uri->scheme == NULL) {
2568 	ret = xmlStrdup((const xmlChar *) uri->path);
2569     } else {
2570 	ret = xmlSaveUri(uri);
2571     }
2572 
2573     xmlFreeURI(uri);
2574 #else
2575     ret = xmlStrdup((const xmlChar *) path);
2576 #endif
2577     return(ret);
2578 }
2579 
2580 /**
2581  * xmlPathToURI:
2582  * @path:  the resource locator in a filesystem notation
2583  *
2584  * Constructs an URI expressing the existing path
2585  *
2586  * Returns a new URI, or a duplicate of the path parameter if the
2587  * construction fails. The caller is responsible for freeing the memory
2588  * occupied by the returned string. If there is insufficient memory available,
2589  * or the argument is NULL, the function returns NULL.
2590  */
2591 xmlChar *
xmlPathToURI(const xmlChar * path)2592 xmlPathToURI(const xmlChar *path)
2593 {
2594     xmlURIPtr uri;
2595     xmlURI temp;
2596     xmlChar *ret, *cal;
2597 
2598     if (path == NULL)
2599         return(NULL);
2600 
2601     if ((uri = xmlParseURI((const char *) path)) != NULL) {
2602 	xmlFreeURI(uri);
2603 	return xmlStrdup(path);
2604     }
2605     cal = xmlCanonicPath(path);
2606     if (cal == NULL)
2607         return(NULL);
2608 #if defined(_WIN32) && !defined(__CYGWIN__)
2609     /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2610        If 'cal' is a valid URI allready then we are done here, as continuing would make
2611        it invalid. */
2612     if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2613 	xmlFreeURI(uri);
2614 	return cal;
2615     }
2616     /* 'cal' can contain a relative path with backslashes. If that is processed
2617        by xmlSaveURI, they will be escaped and the external entity loader machinery
2618        will fail. So convert them to slashes. Misuse 'ret' for walking. */
2619     ret = cal;
2620     while (*ret != '\0') {
2621 	if (*ret == '\\')
2622 	    *ret = '/';
2623 	ret++;
2624     }
2625 #endif
2626     memset(&temp, 0, sizeof(temp));
2627     temp.path = (char *) cal;
2628     ret = xmlSaveUri(&temp);
2629     xmlFree(cal);
2630     return(ret);
2631 }
2632 #define bottom_uri
2633 #include "elfgcchack.h"
2634