1 /* libxml2 - Library for parsing XML documents
2  * Copyright (C) 2006-2019 Free Software Foundation, Inc.
3  *
4  * This file is not part of the GNU gettext program, but is used with
5  * GNU gettext.
6  *
7  * The original copyright notice is as follows:
8  */
9 
10 /*
11  * Copyright (C) 1998-2012 Daniel Veillard.  All Rights Reserved.
12  *
13  * Permission is hereby granted, free of charge, to any person obtaining a copy
14  * of this software and associated documentation files (the "Software"), to deal
15  * in the Software without restriction, including without limitation the rights
16  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17  * copies of the Software, and to permit persons to whom the Software is fur-
18  * nished to do so, subject to the following conditions:
19  *
20  * The above copyright notice and this permission notice shall be included in
21  * all copies or substantial portions of the Software.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FIT-
25  * NESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
26  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29  * THE SOFTWARE.
30  *
31  * daniel@veillard.com
32  */
33 
34 /**
35  * uri.c: set of generic URI related routines
36  *
37  * Reference: RFCs 3986, 2732 and 2373
38  */
39 
40 #define IN_LIBXML
41 #include "libxml.h"
42 
43 #include <string.h>
44 
45 #include <libxml/xmlmemory.h>
46 #include <libxml/uri.h>
47 #include <libxml/globals.h>
48 #include <libxml/xmlerror.h>
49 
50 /**
51  * MAX_URI_LENGTH:
52  *
53  * The definition of the URI regexp in the above RFC has no size limit
54  * In practice they are usually relativey short except for the
55  * data URI scheme as defined in RFC 2397. Even for data URI the usual
56  * maximum size before hitting random practical limits is around 64 KB
57  * and 4KB is usually a maximum admitted limit for proper operations.
58  * The value below is more a security limit than anything else and
59  * really should never be hit by 'normal' operations
60  * Set to 1 MByte in 2012, this is only enforced on output
61  */
62 #define MAX_URI_LENGTH 1024 * 1024
63 
64 static void
xmlURIErrMemory(const char * extra)65 xmlURIErrMemory(const char *extra)
66 {
67     if (extra)
68         __xmlRaiseError(NULL, NULL, NULL,
69                         NULL, NULL, XML_FROM_URI,
70                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
71                         extra, NULL, NULL, 0, 0,
72                         "Memory allocation failed : %s\n", extra);
73     else
74         __xmlRaiseError(NULL, NULL, NULL,
75                         NULL, NULL, XML_FROM_URI,
76                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
77                         NULL, NULL, NULL, 0, 0,
78                         "Memory allocation failed\n");
79 }
80 
81 static void xmlCleanURI(xmlURIPtr uri);
82 
83 /*
84  * Old rule from 2396 used in legacy handling code
85  * alpha    = lowalpha | upalpha
86  */
87 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
88 
89 
90 /*
91  * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
92  *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
93  *            "u" | "v" | "w" | "x" | "y" | "z"
94  */
95 
96 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
97 
98 /*
99  * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
100  *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
101  *           "U" | "V" | "W" | "X" | "Y" | "Z"
102  */
103 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
104 
105 #ifdef IS_DIGIT
106 #undef IS_DIGIT
107 #endif
108 /*
109  * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
110  */
111 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
112 
113 /*
114  * alphanum = alpha | digit
115  */
116 
117 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
118 
119 /*
120  * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
121  */
122 
123 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
124     ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
125     ((x) == '(') || ((x) == ')'))
126 
127 /*
128  * unwise = "{" | "}" | "|" | "\" | "^" | "`"
129  */
130 
131 #define IS_UNWISE(p)                                                    \
132       (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
133        ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
134        ((*(p) == ']')) || ((*(p) == '`')))
135 /*
136  * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
137  *            "[" | "]"
138  */
139 
140 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
141         ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
142         ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
143         ((x) == ']'))
144 
145 /*
146  * unreserved = alphanum | mark
147  */
148 
149 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
150 
151 /*
152  * Skip to next pointer char, handle escaped sequences
153  */
154 
155 #define NEXT(p) ((*p == '%')? p += 3 : p++)
156 
157 /*
158  * Productions from the spec.
159  *
160  *    authority     = server | reg_name
161  *    reg_name      = 1*( unreserved | escaped | "$" | "," |
162  *                        ";" | ":" | "@" | "&" | "=" | "+" )
163  *
164  * path          = [ abs_path | opaque_part ]
165  */
166 
167 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
168 
169 /************************************************************************
170  *									*
171  *                         RFC 3986 parser				*
172  *									*
173  ************************************************************************/
174 
175 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
176 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||		\
177                       ((*(p) >= 'A') && (*(p) <= 'Z')))
178 #define ISA_HEXDIG(p)							\
179        (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||		\
180         ((*(p) >= 'A') && (*(p) <= 'F')))
181 
182 /*
183  *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
184  *                     / "*" / "+" / "," / ";" / "="
185  */
186 #define ISA_SUB_DELIM(p)						\
187       (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||		\
188        ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||		\
189        ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||		\
190        ((*(p) == '=')) || ((*(p) == '\'')))
191 
192 /*
193  *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
194  */
195 #define ISA_GEN_DELIM(p)						\
196       (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
197        ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
198        ((*(p) == '@')))
199 
200 /*
201  *    reserved      = gen-delims / sub-delims
202  */
203 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
204 
205 /*
206  *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
207  */
208 #define ISA_UNRESERVED(p)						\
209       ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||		\
210        ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
211 
212 /*
213  *    pct-encoded   = "%" HEXDIG HEXDIG
214  */
215 #define ISA_PCT_ENCODED(p)						\
216      ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
217 
218 /*
219  *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
220  */
221 #define ISA_PCHAR(p)							\
222      (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||	\
223       ((*(p) == ':')) || ((*(p) == '@')))
224 
225 /**
226  * xmlParse3986Scheme:
227  * @uri:  pointer to an URI structure
228  * @str:  pointer to the string to analyze
229  *
230  * Parse an URI scheme
231  *
232  * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
233  *
234  * Returns 0 or the error code
235  */
236 static int
xmlParse3986Scheme(xmlURIPtr uri,const char ** str)237 xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
238     const char *cur;
239 
240     if (str == NULL)
241 	return(-1);
242 
243     cur = *str;
244     if (!ISA_ALPHA(cur))
245 	return(2);
246     cur++;
247     while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
248            (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
249     if (uri != NULL) {
250 	if (uri->scheme != NULL) xmlFree(uri->scheme);
251 	uri->scheme = STRNDUP(*str, cur - *str);
252     }
253     *str = cur;
254     return(0);
255 }
256 
257 /**
258  * xmlParse3986Fragment:
259  * @uri:  pointer to an URI structure
260  * @str:  pointer to the string to analyze
261  *
262  * Parse the query part of an URI
263  *
264  * fragment      = *( pchar / "/" / "?" )
265  * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
266  *       in the fragment identifier but this is used very broadly for
267  *       xpointer scheme selection, so we are allowing it here to not break
268  *       for example all the DocBook processing chains.
269  *
270  * Returns 0 or the error code
271  */
272 static int
xmlParse3986Fragment(xmlURIPtr uri,const char ** str)273 xmlParse3986Fragment(xmlURIPtr uri, const char **str)
274 {
275     const char *cur;
276 
277     if (str == NULL)
278         return (-1);
279 
280     cur = *str;
281 
282     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
283            (*cur == '[') || (*cur == ']') ||
284            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
285         NEXT(cur);
286     if (uri != NULL) {
287         if (uri->fragment != NULL)
288             xmlFree(uri->fragment);
289 	if (uri->cleanup & 2)
290 	    uri->fragment = STRNDUP(*str, cur - *str);
291 	else
292 	    uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
293     }
294     *str = cur;
295     return (0);
296 }
297 
298 /**
299  * xmlParse3986Query:
300  * @uri:  pointer to an URI structure
301  * @str:  pointer to the string to analyze
302  *
303  * Parse the query part of an URI
304  *
305  * query = *uric
306  *
307  * Returns 0 or the error code
308  */
309 static int
xmlParse3986Query(xmlURIPtr uri,const char ** str)310 xmlParse3986Query(xmlURIPtr uri, const char **str)
311 {
312     const char *cur;
313 
314     if (str == NULL)
315         return (-1);
316 
317     cur = *str;
318 
319     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
320            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
321         NEXT(cur);
322     if (uri != NULL) {
323         if (uri->query != NULL)
324             xmlFree(uri->query);
325 	if (uri->cleanup & 2)
326 	    uri->query = STRNDUP(*str, cur - *str);
327 	else
328 	    uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
329 
330 	/* Save the raw bytes of the query as well.
331 	 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
332 	 */
333 	if (uri->query_raw != NULL)
334 	    xmlFree (uri->query_raw);
335 	uri->query_raw = STRNDUP (*str, cur - *str);
336     }
337     *str = cur;
338     return (0);
339 }
340 
341 /**
342  * xmlParse3986Port:
343  * @uri:  pointer to an URI structure
344  * @str:  the string to analyze
345  *
346  * Parse a port part and fills in the appropriate fields
347  * of the @uri structure
348  *
349  * port          = *DIGIT
350  *
351  * Returns 0 or the error code
352  */
353 static int
xmlParse3986Port(xmlURIPtr uri,const char ** str)354 xmlParse3986Port(xmlURIPtr uri, const char **str)
355 {
356     const char *cur = *str;
357     unsigned port = 0; /* unsigned for defined overflow behavior */
358 
359     if (ISA_DIGIT(cur)) {
360 	while (ISA_DIGIT(cur)) {
361 	    port = port * 10 + (*cur - '0');
362 
363 	    cur++;
364 	}
365 	if (uri != NULL)
366 	    uri->port = port & INT_MAX; /* port value modulo INT_MAX+1 */
367 	*str = cur;
368 	return(0);
369     }
370     return(1);
371 }
372 
373 /**
374  * xmlParse3986Userinfo:
375  * @uri:  pointer to an URI structure
376  * @str:  the string to analyze
377  *
378  * Parse an user informations part and fills in the appropriate fields
379  * of the @uri structure
380  *
381  * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
382  *
383  * Returns 0 or the error code
384  */
385 static int
xmlParse3986Userinfo(xmlURIPtr uri,const char ** str)386 xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
387 {
388     const char *cur;
389 
390     cur = *str;
391     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
392            ISA_SUB_DELIM(cur) || (*cur == ':'))
393 	NEXT(cur);
394     if (*cur == '@') {
395 	if (uri != NULL) {
396 	    if (uri->user != NULL) xmlFree(uri->user);
397 	    if (uri->cleanup & 2)
398 		uri->user = STRNDUP(*str, cur - *str);
399 	    else
400 		uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
401 	}
402 	*str = cur;
403 	return(0);
404     }
405     return(1);
406 }
407 
408 /**
409  * xmlParse3986DecOctet:
410  * @str:  the string to analyze
411  *
412  *    dec-octet     = DIGIT                 ; 0-9
413  *                  / %x31-39 DIGIT         ; 10-99
414  *                  / "1" 2DIGIT            ; 100-199
415  *                  / "2" %x30-34 DIGIT     ; 200-249
416  *                  / "25" %x30-35          ; 250-255
417  *
418  * Skip a dec-octet.
419  *
420  * Returns 0 if found and skipped, 1 otherwise
421  */
422 static int
xmlParse3986DecOctet(const char ** str)423 xmlParse3986DecOctet(const char **str) {
424     const char *cur = *str;
425 
426     if (!(ISA_DIGIT(cur)))
427         return(1);
428     if (!ISA_DIGIT(cur+1))
429 	cur++;
430     else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
431 	cur += 2;
432     else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
433 	cur += 3;
434     else if ((*cur == '2') && (*(cur + 1) >= '0') &&
435 	     (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
436 	cur += 3;
437     else if ((*cur == '2') && (*(cur + 1) == '5') &&
438 	     (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
439 	cur += 3;
440     else
441         return(1);
442     *str = cur;
443     return(0);
444 }
445 /**
446  * xmlParse3986Host:
447  * @uri:  pointer to an URI structure
448  * @str:  the string to analyze
449  *
450  * Parse an host part and fills in the appropriate fields
451  * of the @uri structure
452  *
453  * host          = IP-literal / IPv4address / reg-name
454  * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
455  * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
456  * reg-name      = *( unreserved / pct-encoded / sub-delims )
457  *
458  * Returns 0 or the error code
459  */
460 static int
xmlParse3986Host(xmlURIPtr uri,const char ** str)461 xmlParse3986Host(xmlURIPtr uri, const char **str)
462 {
463     const char *cur = *str;
464     const char *host;
465 
466     host = cur;
467     /*
468      * IPv6 and future adressing scheme are enclosed between brackets
469      */
470     if (*cur == '[') {
471         cur++;
472 	while ((*cur != ']') && (*cur != 0))
473 	    cur++;
474 	if (*cur != ']')
475 	    return(1);
476 	cur++;
477 	goto found;
478     }
479     /*
480      * try to parse an IPv4
481      */
482     if (ISA_DIGIT(cur)) {
483         if (xmlParse3986DecOctet(&cur) != 0)
484 	    goto not_ipv4;
485 	if (*cur != '.')
486 	    goto not_ipv4;
487 	cur++;
488         if (xmlParse3986DecOctet(&cur) != 0)
489 	    goto not_ipv4;
490 	if (*cur != '.')
491 	    goto not_ipv4;
492         if (xmlParse3986DecOctet(&cur) != 0)
493 	    goto not_ipv4;
494 	if (*cur != '.')
495 	    goto not_ipv4;
496         if (xmlParse3986DecOctet(&cur) != 0)
497 	    goto not_ipv4;
498 	goto found;
499 not_ipv4:
500         cur = *str;
501     }
502     /*
503      * then this should be a hostname which can be empty
504      */
505     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
506         NEXT(cur);
507 found:
508     if (uri != NULL) {
509 	if (uri->authority != NULL) xmlFree(uri->authority);
510 	uri->authority = NULL;
511 	if (uri->server != NULL) xmlFree(uri->server);
512 	if (cur != host) {
513 	    if (uri->cleanup & 2)
514 		uri->server = STRNDUP(host, cur - host);
515 	    else
516 		uri->server = xmlURIUnescapeString(host, cur - host, NULL);
517 	} else
518 	    uri->server = NULL;
519     }
520     *str = cur;
521     return(0);
522 }
523 
524 /**
525  * xmlParse3986Authority:
526  * @uri:  pointer to an URI structure
527  * @str:  the string to analyze
528  *
529  * Parse an authority part and fills in the appropriate fields
530  * of the @uri structure
531  *
532  * authority     = [ userinfo "@" ] host [ ":" port ]
533  *
534  * Returns 0 or the error code
535  */
536 static int
xmlParse3986Authority(xmlURIPtr uri,const char ** str)537 xmlParse3986Authority(xmlURIPtr uri, const char **str)
538 {
539     const char *cur;
540     int ret;
541 
542     cur = *str;
543     /*
544      * try to parse an userinfo and check for the trailing @
545      */
546     ret = xmlParse3986Userinfo(uri, &cur);
547     if ((ret != 0) || (*cur != '@'))
548         cur = *str;
549     else
550         cur++;
551     ret = xmlParse3986Host(uri, &cur);
552     if (ret != 0) return(ret);
553     if (*cur == ':') {
554         cur++;
555         ret = xmlParse3986Port(uri, &cur);
556 	if (ret != 0) return(ret);
557     }
558     *str = cur;
559     return(0);
560 }
561 
562 /**
563  * xmlParse3986Segment:
564  * @str:  the string to analyze
565  * @forbid: an optional forbidden character
566  * @empty: allow an empty segment
567  *
568  * Parse a segment and fills in the appropriate fields
569  * of the @uri structure
570  *
571  * segment       = *pchar
572  * segment-nz    = 1*pchar
573  * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
574  *               ; non-zero-length segment without any colon ":"
575  *
576  * Returns 0 or the error code
577  */
578 static int
xmlParse3986Segment(const char ** str,char forbid,int empty)579 xmlParse3986Segment(const char **str, char forbid, int empty)
580 {
581     const char *cur;
582 
583     cur = *str;
584     if (!ISA_PCHAR(cur)) {
585         if (empty)
586 	    return(0);
587 	return(1);
588     }
589     while (ISA_PCHAR(cur) && (*cur != forbid))
590         NEXT(cur);
591     *str = cur;
592     return (0);
593 }
594 
595 /**
596  * xmlParse3986PathAbEmpty:
597  * @uri:  pointer to an URI structure
598  * @str:  the string to analyze
599  *
600  * Parse an path absolute or empty and fills in the appropriate fields
601  * of the @uri structure
602  *
603  * path-abempty  = *( "/" segment )
604  *
605  * Returns 0 or the error code
606  */
607 static int
xmlParse3986PathAbEmpty(xmlURIPtr uri,const char ** str)608 xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
609 {
610     const char *cur;
611     int ret;
612 
613     cur = *str;
614 
615     while (*cur == '/') {
616         cur++;
617 	ret = xmlParse3986Segment(&cur, 0, 1);
618 	if (ret != 0) return(ret);
619     }
620     if (uri != NULL) {
621 	if (uri->path != NULL) xmlFree(uri->path);
622         if (*str != cur) {
623             if (uri->cleanup & 2)
624                 uri->path = STRNDUP(*str, cur - *str);
625             else
626                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
627         } else {
628             uri->path = NULL;
629         }
630     }
631     *str = cur;
632     return (0);
633 }
634 
635 /**
636  * xmlParse3986PathAbsolute:
637  * @uri:  pointer to an URI structure
638  * @str:  the string to analyze
639  *
640  * Parse an path absolute and fills in the appropriate fields
641  * of the @uri structure
642  *
643  * path-absolute = "/" [ segment-nz *( "/" segment ) ]
644  *
645  * Returns 0 or the error code
646  */
647 static int
xmlParse3986PathAbsolute(xmlURIPtr uri,const char ** str)648 xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
649 {
650     const char *cur;
651     int ret;
652 
653     cur = *str;
654 
655     if (*cur != '/')
656         return(1);
657     cur++;
658     ret = xmlParse3986Segment(&cur, 0, 0);
659     if (ret == 0) {
660 	while (*cur == '/') {
661 	    cur++;
662 	    ret = xmlParse3986Segment(&cur, 0, 1);
663 	    if (ret != 0) return(ret);
664 	}
665     }
666     if (uri != NULL) {
667 	if (uri->path != NULL) xmlFree(uri->path);
668         if (cur != *str) {
669             if (uri->cleanup & 2)
670                 uri->path = STRNDUP(*str, cur - *str);
671             else
672                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
673         } else {
674             uri->path = NULL;
675         }
676     }
677     *str = cur;
678     return (0);
679 }
680 
681 /**
682  * xmlParse3986PathRootless:
683  * @uri:  pointer to an URI structure
684  * @str:  the string to analyze
685  *
686  * Parse an path without root and fills in the appropriate fields
687  * of the @uri structure
688  *
689  * path-rootless = segment-nz *( "/" segment )
690  *
691  * Returns 0 or the error code
692  */
693 static int
xmlParse3986PathRootless(xmlURIPtr uri,const char ** str)694 xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
695 {
696     const char *cur;
697     int ret;
698 
699     cur = *str;
700 
701     ret = xmlParse3986Segment(&cur, 0, 0);
702     if (ret != 0) return(ret);
703     while (*cur == '/') {
704         cur++;
705 	ret = xmlParse3986Segment(&cur, 0, 1);
706 	if (ret != 0) return(ret);
707     }
708     if (uri != NULL) {
709 	if (uri->path != NULL) xmlFree(uri->path);
710         if (cur != *str) {
711             if (uri->cleanup & 2)
712                 uri->path = STRNDUP(*str, cur - *str);
713             else
714                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
715         } else {
716             uri->path = NULL;
717         }
718     }
719     *str = cur;
720     return (0);
721 }
722 
723 /**
724  * xmlParse3986PathNoScheme:
725  * @uri:  pointer to an URI structure
726  * @str:  the string to analyze
727  *
728  * Parse an path which is not a scheme and fills in the appropriate fields
729  * of the @uri structure
730  *
731  * path-noscheme = segment-nz-nc *( "/" segment )
732  *
733  * Returns 0 or the error code
734  */
735 static int
xmlParse3986PathNoScheme(xmlURIPtr uri,const char ** str)736 xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
737 {
738     const char *cur;
739     int ret;
740 
741     cur = *str;
742 
743     ret = xmlParse3986Segment(&cur, ':', 0);
744     if (ret != 0) return(ret);
745     while (*cur == '/') {
746         cur++;
747 	ret = xmlParse3986Segment(&cur, 0, 1);
748 	if (ret != 0) return(ret);
749     }
750     if (uri != NULL) {
751 	if (uri->path != NULL) xmlFree(uri->path);
752         if (cur != *str) {
753             if (uri->cleanup & 2)
754                 uri->path = STRNDUP(*str, cur - *str);
755             else
756                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
757         } else {
758             uri->path = NULL;
759         }
760     }
761     *str = cur;
762     return (0);
763 }
764 
765 /**
766  * xmlParse3986HierPart:
767  * @uri:  pointer to an URI structure
768  * @str:  the string to analyze
769  *
770  * Parse an hierarchical part and fills in the appropriate fields
771  * of the @uri structure
772  *
773  * hier-part     = "//" authority path-abempty
774  *                / path-absolute
775  *                / path-rootless
776  *                / path-empty
777  *
778  * Returns 0 or the error code
779  */
780 static int
xmlParse3986HierPart(xmlURIPtr uri,const char ** str)781 xmlParse3986HierPart(xmlURIPtr uri, const char **str)
782 {
783     const char *cur;
784     int ret;
785 
786     cur = *str;
787 
788     if ((*cur == '/') && (*(cur + 1) == '/')) {
789         cur += 2;
790 	ret = xmlParse3986Authority(uri, &cur);
791 	if (ret != 0) return(ret);
792 	if (uri->server == NULL)
793 	    uri->port = -1;
794 	ret = xmlParse3986PathAbEmpty(uri, &cur);
795 	if (ret != 0) return(ret);
796 	*str = cur;
797 	return(0);
798     } else if (*cur == '/') {
799         ret = xmlParse3986PathAbsolute(uri, &cur);
800 	if (ret != 0) return(ret);
801     } else if (ISA_PCHAR(cur)) {
802         ret = xmlParse3986PathRootless(uri, &cur);
803 	if (ret != 0) return(ret);
804     } else {
805 	/* path-empty is effectively empty */
806 	if (uri != NULL) {
807 	    if (uri->path != NULL) xmlFree(uri->path);
808 	    uri->path = NULL;
809 	}
810     }
811     *str = cur;
812     return (0);
813 }
814 
815 /**
816  * xmlParse3986RelativeRef:
817  * @uri:  pointer to an URI structure
818  * @str:  the string to analyze
819  *
820  * Parse an URI string and fills in the appropriate fields
821  * of the @uri structure
822  *
823  * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
824  * relative-part = "//" authority path-abempty
825  *               / path-absolute
826  *               / path-noscheme
827  *               / path-empty
828  *
829  * Returns 0 or the error code
830  */
831 static int
xmlParse3986RelativeRef(xmlURIPtr uri,const char * str)832 xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
833     int ret;
834 
835     if ((*str == '/') && (*(str + 1) == '/')) {
836         str += 2;
837 	ret = xmlParse3986Authority(uri, &str);
838 	if (ret != 0) return(ret);
839 	ret = xmlParse3986PathAbEmpty(uri, &str);
840 	if (ret != 0) return(ret);
841     } else if (*str == '/') {
842 	ret = xmlParse3986PathAbsolute(uri, &str);
843 	if (ret != 0) return(ret);
844     } else if (ISA_PCHAR(str)) {
845         ret = xmlParse3986PathNoScheme(uri, &str);
846 	if (ret != 0) return(ret);
847     } else {
848 	/* path-empty is effectively empty */
849 	if (uri != NULL) {
850 	    if (uri->path != NULL) xmlFree(uri->path);
851 	    uri->path = NULL;
852 	}
853     }
854 
855     if (*str == '?') {
856 	str++;
857 	ret = xmlParse3986Query(uri, &str);
858 	if (ret != 0) return(ret);
859     }
860     if (*str == '#') {
861 	str++;
862 	ret = xmlParse3986Fragment(uri, &str);
863 	if (ret != 0) return(ret);
864     }
865     if (*str != 0) {
866 	xmlCleanURI(uri);
867 	return(1);
868     }
869     return(0);
870 }
871 
872 
873 /**
874  * xmlParse3986URI:
875  * @uri:  pointer to an URI structure
876  * @str:  the string to analyze
877  *
878  * Parse an URI string and fills in the appropriate fields
879  * of the @uri structure
880  *
881  * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
882  *
883  * Returns 0 or the error code
884  */
885 static int
xmlParse3986URI(xmlURIPtr uri,const char * str)886 xmlParse3986URI(xmlURIPtr uri, const char *str) {
887     int ret;
888 
889     ret = xmlParse3986Scheme(uri, &str);
890     if (ret != 0) return(ret);
891     if (*str != ':') {
892 	return(1);
893     }
894     str++;
895     ret = xmlParse3986HierPart(uri, &str);
896     if (ret != 0) return(ret);
897     if (*str == '?') {
898 	str++;
899 	ret = xmlParse3986Query(uri, &str);
900 	if (ret != 0) return(ret);
901     }
902     if (*str == '#') {
903 	str++;
904 	ret = xmlParse3986Fragment(uri, &str);
905 	if (ret != 0) return(ret);
906     }
907     if (*str != 0) {
908 	xmlCleanURI(uri);
909 	return(1);
910     }
911     return(0);
912 }
913 
914 /**
915  * xmlParse3986URIReference:
916  * @uri:  pointer to an URI structure
917  * @str:  the string to analyze
918  *
919  * Parse an URI reference string and fills in the appropriate fields
920  * of the @uri structure
921  *
922  * URI-reference = URI / relative-ref
923  *
924  * Returns 0 or the error code
925  */
926 static int
xmlParse3986URIReference(xmlURIPtr uri,const char * str)927 xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
928     int ret;
929 
930     if (str == NULL)
931 	return(-1);
932     xmlCleanURI(uri);
933 
934     /*
935      * Try first to parse absolute refs, then fallback to relative if
936      * it fails.
937      */
938     ret = xmlParse3986URI(uri, str);
939     if (ret != 0) {
940 	xmlCleanURI(uri);
941         ret = xmlParse3986RelativeRef(uri, str);
942 	if (ret != 0) {
943 	    xmlCleanURI(uri);
944 	    return(ret);
945 	}
946     }
947     return(0);
948 }
949 
950 /**
951  * xmlParseURI:
952  * @str:  the URI string to analyze
953  *
954  * Parse an URI based on RFC 3986
955  *
956  * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
957  *
958  * Returns a newly built xmlURIPtr or NULL in case of error
959  */
960 xmlURIPtr
xmlParseURI(const char * str)961 xmlParseURI(const char *str) {
962     xmlURIPtr uri;
963     int ret;
964 
965     if (str == NULL)
966 	return(NULL);
967     uri = xmlCreateURI();
968     if (uri != NULL) {
969 	ret = xmlParse3986URIReference(uri, str);
970         if (ret) {
971 	    xmlFreeURI(uri);
972 	    return(NULL);
973 	}
974     }
975     return(uri);
976 }
977 
978 /**
979  * xmlParseURIReference:
980  * @uri:  pointer to an URI structure
981  * @str:  the string to analyze
982  *
983  * Parse an URI reference string based on RFC 3986 and fills in the
984  * appropriate fields of the @uri structure
985  *
986  * URI-reference = URI / relative-ref
987  *
988  * Returns 0 or the error code
989  */
990 int
xmlParseURIReference(xmlURIPtr uri,const char * str)991 xmlParseURIReference(xmlURIPtr uri, const char *str) {
992     return(xmlParse3986URIReference(uri, str));
993 }
994 
995 /**
996  * xmlParseURIRaw:
997  * @str:  the URI string to analyze
998  * @raw:  if 1 unescaping of URI pieces are disabled
999  *
1000  * Parse an URI but allows to keep intact the original fragments.
1001  *
1002  * URI-reference = URI / relative-ref
1003  *
1004  * Returns a newly built xmlURIPtr or NULL in case of error
1005  */
1006 xmlURIPtr
xmlParseURIRaw(const char * str,int raw)1007 xmlParseURIRaw(const char *str, int raw) {
1008     xmlURIPtr uri;
1009     int ret;
1010 
1011     if (str == NULL)
1012 	return(NULL);
1013     uri = xmlCreateURI();
1014     if (uri != NULL) {
1015         if (raw) {
1016 	    uri->cleanup |= 2;
1017 	}
1018 	ret = xmlParseURIReference(uri, str);
1019         if (ret) {
1020 	    xmlFreeURI(uri);
1021 	    return(NULL);
1022 	}
1023     }
1024     return(uri);
1025 }
1026 
1027 /************************************************************************
1028  *									*
1029  *			Generic URI structure functions			*
1030  *									*
1031  ************************************************************************/
1032 
1033 /**
1034  * xmlCreateURI:
1035  *
1036  * Simply creates an empty xmlURI
1037  *
1038  * Returns the new structure or NULL in case of error
1039  */
1040 xmlURIPtr
xmlCreateURI(void)1041 xmlCreateURI(void) {
1042     xmlURIPtr ret;
1043 
1044     ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1045     if (ret == NULL) {
1046         xmlURIErrMemory("creating URI structure\n");
1047 	return(NULL);
1048     }
1049     memset(ret, 0, sizeof(xmlURI));
1050     return(ret);
1051 }
1052 
1053 /**
1054  * xmlSaveUriRealloc:
1055  *
1056  * Function to handle properly a reallocation when saving an URI
1057  * Also imposes some limit on the length of an URI string output
1058  */
1059 static xmlChar *
xmlSaveUriRealloc(xmlChar * ret,int * max)1060 xmlSaveUriRealloc(xmlChar *ret, int *max) {
1061     xmlChar *temp;
1062     int tmp;
1063 
1064     if (*max > MAX_URI_LENGTH) {
1065         xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1066         return(NULL);
1067     }
1068     tmp = *max * 2;
1069     temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1070     if (temp == NULL) {
1071         xmlURIErrMemory("saving URI\n");
1072         return(NULL);
1073     }
1074     *max = tmp;
1075     return(temp);
1076 }
1077 
1078 /**
1079  * xmlSaveUri:
1080  * @uri:  pointer to an xmlURI
1081  *
1082  * Save the URI as an escaped string
1083  *
1084  * Returns a new string (to be deallocated by caller)
1085  */
1086 xmlChar *
xmlSaveUri(xmlURIPtr uri)1087 xmlSaveUri(xmlURIPtr uri) {
1088     xmlChar *ret = NULL;
1089     xmlChar *temp;
1090     const char *p;
1091     int len;
1092     int max;
1093 
1094     if (uri == NULL) return(NULL);
1095 
1096 
1097     max = 80;
1098     ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
1099     if (ret == NULL) {
1100         xmlURIErrMemory("saving URI\n");
1101 	return(NULL);
1102     }
1103     len = 0;
1104 
1105     if (uri->scheme != NULL) {
1106 	p = uri->scheme;
1107 	while (*p != 0) {
1108 	    if (len >= max) {
1109                 temp = xmlSaveUriRealloc(ret, &max);
1110                 if (temp == NULL) goto mem_error;
1111 		ret = temp;
1112 	    }
1113 	    ret[len++] = *p++;
1114 	}
1115 	if (len >= max) {
1116             temp = xmlSaveUriRealloc(ret, &max);
1117             if (temp == NULL) goto mem_error;
1118             ret = temp;
1119 	}
1120 	ret[len++] = ':';
1121     }
1122     if (uri->opaque != NULL) {
1123 	p = uri->opaque;
1124 	while (*p != 0) {
1125 	    if (len + 3 >= max) {
1126                 temp = xmlSaveUriRealloc(ret, &max);
1127                 if (temp == NULL) goto mem_error;
1128                 ret = temp;
1129 	    }
1130 	    if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1131 		ret[len++] = *p++;
1132 	    else {
1133 		int val = *(unsigned char *)p++;
1134 		int hi = val / 0x10, lo = val % 0x10;
1135 		ret[len++] = '%';
1136 		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1137 		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1138 	    }
1139 	}
1140     } else {
1141 	if ((uri->server != NULL) || (uri->port == -1)) {
1142 	    if (len + 3 >= max) {
1143                 temp = xmlSaveUriRealloc(ret, &max);
1144                 if (temp == NULL) goto mem_error;
1145                 ret = temp;
1146 	    }
1147 	    ret[len++] = '/';
1148 	    ret[len++] = '/';
1149 	    if (uri->user != NULL) {
1150 		p = uri->user;
1151 		while (*p != 0) {
1152 		    if (len + 3 >= max) {
1153                         temp = xmlSaveUriRealloc(ret, &max);
1154                         if (temp == NULL) goto mem_error;
1155                         ret = temp;
1156 		    }
1157 		    if ((IS_UNRESERVED(*(p))) ||
1158 			((*(p) == ';')) || ((*(p) == ':')) ||
1159 			((*(p) == '&')) || ((*(p) == '=')) ||
1160 			((*(p) == '+')) || ((*(p) == '$')) ||
1161 			((*(p) == ',')))
1162 			ret[len++] = *p++;
1163 		    else {
1164 			int val = *(unsigned char *)p++;
1165 			int hi = val / 0x10, lo = val % 0x10;
1166 			ret[len++] = '%';
1167 			ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1168 			ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1169 		    }
1170 		}
1171 		if (len + 3 >= max) {
1172                     temp = xmlSaveUriRealloc(ret, &max);
1173                     if (temp == NULL) goto mem_error;
1174                     ret = temp;
1175 		}
1176 		ret[len++] = '@';
1177 	    }
1178 	    if (uri->server != NULL) {
1179 		p = uri->server;
1180 		while (*p != 0) {
1181 		    if (len >= max) {
1182 			temp = xmlSaveUriRealloc(ret, &max);
1183 			if (temp == NULL) goto mem_error;
1184 			ret = temp;
1185 		    }
1186 		    ret[len++] = *p++;
1187 		}
1188 		if (uri->port > 0) {
1189 		    if (len + 10 >= max) {
1190 			temp = xmlSaveUriRealloc(ret, &max);
1191 			if (temp == NULL) goto mem_error;
1192 			ret = temp;
1193 		    }
1194 		    len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1195 		}
1196 	    }
1197 	} else if (uri->authority != NULL) {
1198 	    if (len + 3 >= max) {
1199                 temp = xmlSaveUriRealloc(ret, &max);
1200                 if (temp == NULL) goto mem_error;
1201                 ret = temp;
1202 	    }
1203 	    ret[len++] = '/';
1204 	    ret[len++] = '/';
1205 	    p = uri->authority;
1206 	    while (*p != 0) {
1207 		if (len + 3 >= max) {
1208                     temp = xmlSaveUriRealloc(ret, &max);
1209                     if (temp == NULL) goto mem_error;
1210                     ret = temp;
1211 		}
1212 		if ((IS_UNRESERVED(*(p))) ||
1213                     ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1214                     ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1215                     ((*(p) == '=')) || ((*(p) == '+')))
1216 		    ret[len++] = *p++;
1217 		else {
1218 		    int val = *(unsigned char *)p++;
1219 		    int hi = val / 0x10, lo = val % 0x10;
1220 		    ret[len++] = '%';
1221 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1222 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1223 		}
1224 	    }
1225 	} else if (uri->scheme != NULL) {
1226 	    if (len + 3 >= max) {
1227                 temp = xmlSaveUriRealloc(ret, &max);
1228                 if (temp == NULL) goto mem_error;
1229                 ret = temp;
1230 	    }
1231 	}
1232 	if (uri->path != NULL) {
1233 	    p = uri->path;
1234 	    /*
1235 	     * the colon in file:///d: should not be escaped or
1236 	     * Windows accesses fail later.
1237 	     */
1238 	    if ((uri->scheme != NULL) &&
1239 		(p[0] == '/') &&
1240 		(((p[1] >= 'a') && (p[1] <= 'z')) ||
1241 		 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1242 		(p[2] == ':') &&
1243 	        (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1244 		if (len + 3 >= max) {
1245                     temp = xmlSaveUriRealloc(ret, &max);
1246                     if (temp == NULL) goto mem_error;
1247                     ret = temp;
1248 		}
1249 		ret[len++] = *p++;
1250 		ret[len++] = *p++;
1251 		ret[len++] = *p++;
1252 	    }
1253 	    while (*p != 0) {
1254 		if (len + 3 >= max) {
1255                     temp = xmlSaveUriRealloc(ret, &max);
1256                     if (temp == NULL) goto mem_error;
1257                     ret = temp;
1258 		}
1259 		if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1260                     ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1261 	            ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1262 	            ((*(p) == ',')))
1263 		    ret[len++] = *p++;
1264 		else {
1265 		    int val = *(unsigned char *)p++;
1266 		    int hi = val / 0x10, lo = val % 0x10;
1267 		    ret[len++] = '%';
1268 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1269 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1270 		}
1271 	    }
1272 	}
1273 	if (uri->query_raw != NULL) {
1274 	    if (len + 1 >= max) {
1275                 temp = xmlSaveUriRealloc(ret, &max);
1276                 if (temp == NULL) goto mem_error;
1277                 ret = temp;
1278 	    }
1279 	    ret[len++] = '?';
1280 	    p = uri->query_raw;
1281 	    while (*p != 0) {
1282 		if (len + 1 >= max) {
1283                     temp = xmlSaveUriRealloc(ret, &max);
1284                     if (temp == NULL) goto mem_error;
1285                     ret = temp;
1286 		}
1287 		ret[len++] = *p++;
1288 	    }
1289 	} else if (uri->query != NULL) {
1290 	    if (len + 3 >= max) {
1291                 temp = xmlSaveUriRealloc(ret, &max);
1292                 if (temp == NULL) goto mem_error;
1293                 ret = temp;
1294 	    }
1295 	    ret[len++] = '?';
1296 	    p = uri->query;
1297 	    while (*p != 0) {
1298 		if (len + 3 >= max) {
1299                     temp = xmlSaveUriRealloc(ret, &max);
1300                     if (temp == NULL) goto mem_error;
1301                     ret = temp;
1302 		}
1303 		if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1304 		    ret[len++] = *p++;
1305 		else {
1306 		    int val = *(unsigned char *)p++;
1307 		    int hi = val / 0x10, lo = val % 0x10;
1308 		    ret[len++] = '%';
1309 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1310 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1311 		}
1312 	    }
1313 	}
1314     }
1315     if (uri->fragment != NULL) {
1316 	if (len + 3 >= max) {
1317             temp = xmlSaveUriRealloc(ret, &max);
1318             if (temp == NULL) goto mem_error;
1319             ret = temp;
1320 	}
1321 	ret[len++] = '#';
1322 	p = uri->fragment;
1323 	while (*p != 0) {
1324 	    if (len + 3 >= max) {
1325                 temp = xmlSaveUriRealloc(ret, &max);
1326                 if (temp == NULL) goto mem_error;
1327                 ret = temp;
1328 	    }
1329 	    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1330 		ret[len++] = *p++;
1331 	    else {
1332 		int val = *(unsigned char *)p++;
1333 		int hi = val / 0x10, lo = val % 0x10;
1334 		ret[len++] = '%';
1335 		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1336 		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1337 	    }
1338 	}
1339     }
1340     if (len >= max) {
1341         temp = xmlSaveUriRealloc(ret, &max);
1342         if (temp == NULL) goto mem_error;
1343         ret = temp;
1344     }
1345     ret[len] = 0;
1346     return(ret);
1347 
1348 mem_error:
1349     xmlFree(ret);
1350     return(NULL);
1351 }
1352 
1353 /**
1354  * xmlPrintURI:
1355  * @stream:  a FILE* for the output
1356  * @uri:  pointer to an xmlURI
1357  *
1358  * Prints the URI in the stream @stream.
1359  */
1360 void
xmlPrintURI(FILE * stream,xmlURIPtr uri)1361 xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1362     xmlChar *out;
1363 
1364     out = xmlSaveUri(uri);
1365     if (out != NULL) {
1366 	fprintf(stream, "%s", (char *) out);
1367 	xmlFree(out);
1368     }
1369 }
1370 
1371 /**
1372  * xmlCleanURI:
1373  * @uri:  pointer to an xmlURI
1374  *
1375  * Make sure the xmlURI struct is free of content
1376  */
1377 static void
xmlCleanURI(xmlURIPtr uri)1378 xmlCleanURI(xmlURIPtr uri) {
1379     if (uri == NULL) return;
1380 
1381     if (uri->scheme != NULL) xmlFree(uri->scheme);
1382     uri->scheme = NULL;
1383     if (uri->server != NULL) xmlFree(uri->server);
1384     uri->server = NULL;
1385     if (uri->user != NULL) xmlFree(uri->user);
1386     uri->user = NULL;
1387     if (uri->path != NULL) xmlFree(uri->path);
1388     uri->path = NULL;
1389     if (uri->fragment != NULL) xmlFree(uri->fragment);
1390     uri->fragment = NULL;
1391     if (uri->opaque != NULL) xmlFree(uri->opaque);
1392     uri->opaque = NULL;
1393     if (uri->authority != NULL) xmlFree(uri->authority);
1394     uri->authority = NULL;
1395     if (uri->query != NULL) xmlFree(uri->query);
1396     uri->query = NULL;
1397     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1398     uri->query_raw = NULL;
1399 }
1400 
1401 /**
1402  * xmlFreeURI:
1403  * @uri:  pointer to an xmlURI
1404  *
1405  * Free up the xmlURI struct
1406  */
1407 void
xmlFreeURI(xmlURIPtr uri)1408 xmlFreeURI(xmlURIPtr uri) {
1409     if (uri == NULL) return;
1410 
1411     if (uri->scheme != NULL) xmlFree(uri->scheme);
1412     if (uri->server != NULL) xmlFree(uri->server);
1413     if (uri->user != NULL) xmlFree(uri->user);
1414     if (uri->path != NULL) xmlFree(uri->path);
1415     if (uri->fragment != NULL) xmlFree(uri->fragment);
1416     if (uri->opaque != NULL) xmlFree(uri->opaque);
1417     if (uri->authority != NULL) xmlFree(uri->authority);
1418     if (uri->query != NULL) xmlFree(uri->query);
1419     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1420     xmlFree(uri);
1421 }
1422 
1423 /************************************************************************
1424  *									*
1425  *			Helper functions				*
1426  *									*
1427  ************************************************************************/
1428 
1429 /**
1430  * xmlNormalizeURIPath:
1431  * @path:  pointer to the path string
1432  *
1433  * Applies the 5 normalization steps to a path string--that is, RFC 2396
1434  * Section 5.2, steps 6.c through 6.g.
1435  *
1436  * Normalization occurs directly on the string, no new allocation is done
1437  *
1438  * Returns 0 or an error code
1439  */
1440 int
xmlNormalizeURIPath(char * path)1441 xmlNormalizeURIPath(char *path) {
1442     char *cur, *out;
1443 
1444     if (path == NULL)
1445 	return(-1);
1446 
1447     /* Skip all initial "/" chars.  We want to get to the beginning of the
1448      * first non-empty segment.
1449      */
1450     cur = path;
1451     while (cur[0] == '/')
1452       ++cur;
1453     if (cur[0] == '\0')
1454       return(0);
1455 
1456     /* Keep everything we've seen so far.  */
1457     out = cur;
1458 
1459     /*
1460      * Analyze each segment in sequence for cases (c) and (d).
1461      */
1462     while (cur[0] != '\0') {
1463 	/*
1464 	 * c) All occurrences of "./", where "." is a complete path segment,
1465 	 *    are removed from the buffer string.
1466 	 */
1467 	if ((cur[0] == '.') && (cur[1] == '/')) {
1468 	    cur += 2;
1469 	    /* '//' normalization should be done at this point too */
1470 	    while (cur[0] == '/')
1471 		cur++;
1472 	    continue;
1473 	}
1474 
1475 	/*
1476 	 * d) If the buffer string ends with "." as a complete path segment,
1477 	 *    that "." is removed.
1478 	 */
1479 	if ((cur[0] == '.') && (cur[1] == '\0'))
1480 	    break;
1481 
1482 	/* Otherwise keep the segment.  */
1483 	while (cur[0] != '/') {
1484             if (cur[0] == '\0')
1485               goto done_cd;
1486 	    (out++)[0] = (cur++)[0];
1487 	}
1488 	/* nomalize // */
1489 	while ((cur[0] == '/') && (cur[1] == '/'))
1490 	    cur++;
1491 
1492         (out++)[0] = (cur++)[0];
1493     }
1494  done_cd:
1495     out[0] = '\0';
1496 
1497     /* Reset to the beginning of the first segment for the next sequence.  */
1498     cur = path;
1499     while (cur[0] == '/')
1500       ++cur;
1501     if (cur[0] == '\0')
1502 	return(0);
1503 
1504     /*
1505      * Analyze each segment in sequence for cases (e) and (f).
1506      *
1507      * e) All occurrences of "<segment>/../", where <segment> is a
1508      *    complete path segment not equal to "..", are removed from the
1509      *    buffer string.  Removal of these path segments is performed
1510      *    iteratively, removing the leftmost matching pattern on each
1511      *    iteration, until no matching pattern remains.
1512      *
1513      * f) If the buffer string ends with "<segment>/..", where <segment>
1514      *    is a complete path segment not equal to "..", that
1515      *    "<segment>/.." is removed.
1516      *
1517      * To satisfy the "iterative" clause in (e), we need to collapse the
1518      * string every time we find something that needs to be removed.  Thus,
1519      * we don't need to keep two pointers into the string: we only need a
1520      * "current position" pointer.
1521      */
1522     while (1) {
1523         char *segp, *tmp;
1524 
1525         /* At the beginning of each iteration of this loop, "cur" points to
1526          * the first character of the segment we want to examine.
1527          */
1528 
1529         /* Find the end of the current segment.  */
1530         segp = cur;
1531         while ((segp[0] != '/') && (segp[0] != '\0'))
1532           ++segp;
1533 
1534         /* If this is the last segment, we're done (we need at least two
1535          * segments to meet the criteria for the (e) and (f) cases).
1536          */
1537         if (segp[0] == '\0')
1538           break;
1539 
1540         /* If the first segment is "..", or if the next segment _isn't_ "..",
1541          * keep this segment and try the next one.
1542          */
1543         ++segp;
1544         if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1545             || ((segp[0] != '.') || (segp[1] != '.')
1546                 || ((segp[2] != '/') && (segp[2] != '\0')))) {
1547           cur = segp;
1548           continue;
1549         }
1550 
1551         /* If we get here, remove this segment and the next one and back up
1552          * to the previous segment (if there is one), to implement the
1553          * "iteratively" clause.  It's pretty much impossible to back up
1554          * while maintaining two pointers into the buffer, so just compact
1555          * the whole buffer now.
1556          */
1557 
1558         /* If this is the end of the buffer, we're done.  */
1559         if (segp[2] == '\0') {
1560           cur[0] = '\0';
1561           break;
1562         }
1563         /* Valgrind complained, strcpy(cur, segp + 3); */
1564         /* string will overlap, do not use strcpy */
1565         tmp = cur;
1566         segp += 3;
1567         while ((*tmp++ = *segp++) != 0)
1568           ;
1569 
1570         /* If there are no previous segments, then keep going from here.  */
1571         segp = cur;
1572         while ((segp > path) && ((--segp)[0] == '/'))
1573           ;
1574         if (segp == path)
1575           continue;
1576 
1577         /* "segp" is pointing to the end of a previous segment; find it's
1578          * start.  We need to back up to the previous segment and start
1579          * over with that to handle things like "foo/bar/../..".  If we
1580          * don't do this, then on the first pass we'll remove the "bar/..",
1581          * but be pointing at the second ".." so we won't realize we can also
1582          * remove the "foo/..".
1583          */
1584         cur = segp;
1585         while ((cur > path) && (cur[-1] != '/'))
1586           --cur;
1587     }
1588     out[0] = '\0';
1589 
1590     /*
1591      * g) If the resulting buffer string still begins with one or more
1592      *    complete path segments of "..", then the reference is
1593      *    considered to be in error. Implementations may handle this
1594      *    error by retaining these components in the resolved path (i.e.,
1595      *    treating them as part of the final URI), by removing them from
1596      *    the resolved path (i.e., discarding relative levels above the
1597      *    root), or by avoiding traversal of the reference.
1598      *
1599      * We discard them from the final path.
1600      */
1601     if (path[0] == '/') {
1602       cur = path;
1603       while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1604              && ((cur[3] == '/') || (cur[3] == '\0')))
1605 	cur += 3;
1606 
1607       if (cur != path) {
1608 	out = path;
1609 	while (cur[0] != '\0')
1610           (out++)[0] = (cur++)[0];
1611 	out[0] = 0;
1612       }
1613     }
1614 
1615     return(0);
1616 }
1617 
is_hex(char c)1618 static int is_hex(char c) {
1619     if (((c >= '0') && (c <= '9')) ||
1620         ((c >= 'a') && (c <= 'f')) ||
1621         ((c >= 'A') && (c <= 'F')))
1622 	return(1);
1623     return(0);
1624 }
1625 
1626 /**
1627  * xmlURIUnescapeString:
1628  * @str:  the string to unescape
1629  * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
1630  * @target:  optional destination buffer
1631  *
1632  * Unescaping routine, but does not check that the string is an URI. The
1633  * output is a direct unsigned char translation of %XX values (no encoding)
1634  * Note that the length of the result can only be smaller or same size as
1635  * the input string.
1636  *
1637  * Returns a copy of the string, but unescaped, will return NULL only in case
1638  * of error
1639  */
1640 char *
xmlURIUnescapeString(const char * str,int len,char * target)1641 xmlURIUnescapeString(const char *str, int len, char *target) {
1642     char *ret, *out;
1643     const char *in;
1644 
1645     if (str == NULL)
1646 	return(NULL);
1647     if (len <= 0) len = strlen(str);
1648     if (len < 0) return(NULL);
1649 
1650     if (target == NULL) {
1651 	ret = (char *) xmlMallocAtomic(len + 1);
1652 	if (ret == NULL) {
1653             xmlURIErrMemory("unescaping URI value\n");
1654 	    return(NULL);
1655 	}
1656     } else
1657 	ret = target;
1658     in = str;
1659     out = ret;
1660     while(len > 0) {
1661 	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1662 	    in++;
1663 	    if ((*in >= '0') && (*in <= '9'))
1664 	        *out = (*in - '0');
1665 	    else if ((*in >= 'a') && (*in <= 'f'))
1666 	        *out = (*in - 'a') + 10;
1667 	    else if ((*in >= 'A') && (*in <= 'F'))
1668 	        *out = (*in - 'A') + 10;
1669 	    in++;
1670 	    if ((*in >= '0') && (*in <= '9'))
1671 	        *out = *out * 16 + (*in - '0');
1672 	    else if ((*in >= 'a') && (*in <= 'f'))
1673 	        *out = *out * 16 + (*in - 'a') + 10;
1674 	    else if ((*in >= 'A') && (*in <= 'F'))
1675 	        *out = *out * 16 + (*in - 'A') + 10;
1676 	    in++;
1677 	    len -= 3;
1678 	    out++;
1679 	} else {
1680 	    *out++ = *in++;
1681 	    len--;
1682 	}
1683     }
1684     *out = 0;
1685     return(ret);
1686 }
1687 
1688 /**
1689  * xmlURIEscapeStr:
1690  * @str:  string to escape
1691  * @list: exception list string of chars not to escape
1692  *
1693  * This routine escapes a string to hex, ignoring reserved characters (a-z)
1694  * and the characters in the exception list.
1695  *
1696  * Returns a new escaped string or NULL in case of error.
1697  */
1698 xmlChar *
xmlURIEscapeStr(const xmlChar * str,const xmlChar * list)1699 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1700     xmlChar *ret, ch;
1701     xmlChar *temp;
1702     const xmlChar *in;
1703     int len, out;
1704 
1705     if (str == NULL)
1706 	return(NULL);
1707     if (str[0] == 0)
1708 	return(xmlStrdup(str));
1709     len = xmlStrlen(str);
1710     if (!(len > 0)) return(NULL);
1711 
1712     len += 20;
1713     ret = (xmlChar *) xmlMallocAtomic(len);
1714     if (ret == NULL) {
1715         xmlURIErrMemory("escaping URI value\n");
1716 	return(NULL);
1717     }
1718     in = (const xmlChar *) str;
1719     out = 0;
1720     while(*in != 0) {
1721 	if (len - out <= 3) {
1722             temp = xmlSaveUriRealloc(ret, &len);
1723 	    if (temp == NULL) {
1724                 xmlURIErrMemory("escaping URI value\n");
1725 		xmlFree(ret);
1726 		return(NULL);
1727 	    }
1728 	    ret = temp;
1729 	}
1730 
1731 	ch = *in;
1732 
1733 	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1734 	    unsigned char val;
1735 	    ret[out++] = '%';
1736 	    val = ch >> 4;
1737 	    if (val <= 9)
1738 		ret[out++] = '0' + val;
1739 	    else
1740 		ret[out++] = 'A' + val - 0xA;
1741 	    val = ch & 0xF;
1742 	    if (val <= 9)
1743 		ret[out++] = '0' + val;
1744 	    else
1745 		ret[out++] = 'A' + val - 0xA;
1746 	    in++;
1747 	} else {
1748 	    ret[out++] = *in++;
1749 	}
1750 
1751     }
1752     ret[out] = 0;
1753     return(ret);
1754 }
1755 
1756 /**
1757  * xmlURIEscape:
1758  * @str:  the string of the URI to escape
1759  *
1760  * Escaping routine, does not do validity checks !
1761  * It will try to escape the chars needing this, but this is heuristic
1762  * based it's impossible to be sure.
1763  *
1764  * Returns an copy of the string, but escaped
1765  *
1766  * 25 May 2001
1767  * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1768  * according to RFC2396.
1769  *   - Carl Douglas
1770  */
1771 xmlChar *
xmlURIEscape(const xmlChar * str)1772 xmlURIEscape(const xmlChar * str)
1773 {
1774     xmlChar *ret, *segment = NULL;
1775     xmlURIPtr uri;
1776     int ret2;
1777 
1778 #define NULLCHK(p) if(!p) { \
1779          xmlURIErrMemory("escaping URI value\n"); \
1780          xmlFreeURI(uri); \
1781          return NULL; } \
1782 
1783     if (str == NULL)
1784         return (NULL);
1785 
1786     uri = xmlCreateURI();
1787     if (uri != NULL) {
1788 	/*
1789 	 * Allow escaping errors in the unescaped form
1790 	 */
1791         uri->cleanup = 1;
1792         ret2 = xmlParseURIReference(uri, (const char *)str);
1793         if (ret2) {
1794             xmlFreeURI(uri);
1795             return (NULL);
1796         }
1797     }
1798 
1799     if (!uri)
1800         return NULL;
1801 
1802     ret = NULL;
1803 
1804     if (uri->scheme) {
1805         segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1806         NULLCHK(segment)
1807         ret = xmlStrcat(ret, segment);
1808         ret = xmlStrcat(ret, BAD_CAST ":");
1809         xmlFree(segment);
1810     }
1811 
1812     if (uri->authority) {
1813         segment =
1814             xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1815         NULLCHK(segment)
1816         ret = xmlStrcat(ret, BAD_CAST "//");
1817         ret = xmlStrcat(ret, segment);
1818         xmlFree(segment);
1819     }
1820 
1821     if (uri->user) {
1822         segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1823         NULLCHK(segment)
1824 		ret = xmlStrcat(ret,BAD_CAST "//");
1825         ret = xmlStrcat(ret, segment);
1826         ret = xmlStrcat(ret, BAD_CAST "@");
1827         xmlFree(segment);
1828     }
1829 
1830     if (uri->server) {
1831         segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1832         NULLCHK(segment)
1833 		if (uri->user == NULL)
1834 		ret = xmlStrcat(ret, BAD_CAST "//");
1835         ret = xmlStrcat(ret, segment);
1836         xmlFree(segment);
1837     }
1838 
1839     if (uri->port) {
1840         xmlChar port[10];
1841 
1842         snprintf((char *) port, 10, "%d", uri->port);
1843         ret = xmlStrcat(ret, BAD_CAST ":");
1844         ret = xmlStrcat(ret, port);
1845     }
1846 
1847     if (uri->path) {
1848         segment =
1849             xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1850         NULLCHK(segment)
1851         ret = xmlStrcat(ret, segment);
1852         xmlFree(segment);
1853     }
1854 
1855     if (uri->query_raw) {
1856         ret = xmlStrcat(ret, BAD_CAST "?");
1857         ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1858     }
1859     else if (uri->query) {
1860         segment =
1861             xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1862         NULLCHK(segment)
1863         ret = xmlStrcat(ret, BAD_CAST "?");
1864         ret = xmlStrcat(ret, segment);
1865         xmlFree(segment);
1866     }
1867 
1868     if (uri->opaque) {
1869         segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1870         NULLCHK(segment)
1871         ret = xmlStrcat(ret, segment);
1872         xmlFree(segment);
1873     }
1874 
1875     if (uri->fragment) {
1876         segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1877         NULLCHK(segment)
1878         ret = xmlStrcat(ret, BAD_CAST "#");
1879         ret = xmlStrcat(ret, segment);
1880         xmlFree(segment);
1881     }
1882 
1883     xmlFreeURI(uri);
1884 #undef NULLCHK
1885 
1886     return (ret);
1887 }
1888 
1889 /************************************************************************
1890  *									*
1891  *			Public functions				*
1892  *									*
1893  ************************************************************************/
1894 
1895 /**
1896  * xmlBuildURI:
1897  * @URI:  the URI instance found in the document
1898  * @base:  the base value
1899  *
1900  * Computes he final URI of the reference done by checking that
1901  * the given URI is valid, and building the final URI using the
1902  * base URI. This is processed according to section 5.2 of the
1903  * RFC 2396
1904  *
1905  * 5.2. Resolving Relative References to Absolute Form
1906  *
1907  * Returns a new URI string (to be freed by the caller) or NULL in case
1908  *         of error.
1909  */
1910 xmlChar *
xmlBuildURI(const xmlChar * URI,const xmlChar * base)1911 xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1912     xmlChar *val = NULL;
1913     int ret, len, indx, cur, out;
1914     xmlURIPtr ref = NULL;
1915     xmlURIPtr bas = NULL;
1916     xmlURIPtr res = NULL;
1917 
1918     /*
1919      * 1) The URI reference is parsed into the potential four components and
1920      *    fragment identifier, as described in Section 4.3.
1921      *
1922      *    NOTE that a completely empty URI is treated by modern browsers
1923      *    as a reference to "." rather than as a synonym for the current
1924      *    URI.  Should we do that here?
1925      */
1926     if (URI == NULL)
1927 	ret = -1;
1928     else {
1929 	if (*URI) {
1930 	    ref = xmlCreateURI();
1931 	    if (ref == NULL)
1932 		goto done;
1933 	    ret = xmlParseURIReference(ref, (const char *) URI);
1934 	}
1935 	else
1936 	    ret = 0;
1937     }
1938     if (ret != 0)
1939 	goto done;
1940     if ((ref != NULL) && (ref->scheme != NULL)) {
1941 	/*
1942 	 * The URI is absolute don't modify.
1943 	 */
1944 	val = xmlStrdup(URI);
1945 	goto done;
1946     }
1947     if (base == NULL)
1948 	ret = -1;
1949     else {
1950 	bas = xmlCreateURI();
1951 	if (bas == NULL)
1952 	    goto done;
1953 	ret = xmlParseURIReference(bas, (const char *) base);
1954     }
1955     if (ret != 0) {
1956 	if (ref)
1957 	    val = xmlSaveUri(ref);
1958 	goto done;
1959     }
1960     if (ref == NULL) {
1961 	/*
1962 	 * the base fragment must be ignored
1963 	 */
1964 	if (bas->fragment != NULL) {
1965 	    xmlFree(bas->fragment);
1966 	    bas->fragment = NULL;
1967 	}
1968 	val = xmlSaveUri(bas);
1969 	goto done;
1970     }
1971 
1972     /*
1973      * 2) If the path component is empty and the scheme, authority, and
1974      *    query components are undefined, then it is a reference to the
1975      *    current document and we are done.  Otherwise, the reference URI's
1976      *    query and fragment components are defined as found (or not found)
1977      *    within the URI reference and not inherited from the base URI.
1978      *
1979      *    NOTE that in modern browsers, the parsing differs from the above
1980      *    in the following aspect:  the query component is allowed to be
1981      *    defined while still treating this as a reference to the current
1982      *    document.
1983      */
1984     res = xmlCreateURI();
1985     if (res == NULL)
1986 	goto done;
1987     if ((ref->scheme == NULL) && (ref->path == NULL) &&
1988 	((ref->authority == NULL) && (ref->server == NULL))) {
1989 	if (bas->scheme != NULL)
1990 	    res->scheme = xmlMemStrdup(bas->scheme);
1991 	if (bas->authority != NULL)
1992 	    res->authority = xmlMemStrdup(bas->authority);
1993 	else if ((bas->server != NULL) || (bas->port == -1)) {
1994 	    if (bas->server != NULL)
1995 		res->server = xmlMemStrdup(bas->server);
1996 	    if (bas->user != NULL)
1997 		res->user = xmlMemStrdup(bas->user);
1998 	    res->port = bas->port;
1999 	}
2000 	if (bas->path != NULL)
2001 	    res->path = xmlMemStrdup(bas->path);
2002 	if (ref->query_raw != NULL)
2003 	    res->query_raw = xmlMemStrdup (ref->query_raw);
2004 	else if (ref->query != NULL)
2005 	    res->query = xmlMemStrdup(ref->query);
2006 	else if (bas->query_raw != NULL)
2007 	    res->query_raw = xmlMemStrdup(bas->query_raw);
2008 	else if (bas->query != NULL)
2009 	    res->query = xmlMemStrdup(bas->query);
2010 	if (ref->fragment != NULL)
2011 	    res->fragment = xmlMemStrdup(ref->fragment);
2012 	goto step_7;
2013     }
2014 
2015     /*
2016      * 3) If the scheme component is defined, indicating that the reference
2017      *    starts with a scheme name, then the reference is interpreted as an
2018      *    absolute URI and we are done.  Otherwise, the reference URI's
2019      *    scheme is inherited from the base URI's scheme component.
2020      */
2021     if (ref->scheme != NULL) {
2022 	val = xmlSaveUri(ref);
2023 	goto done;
2024     }
2025     if (bas->scheme != NULL)
2026 	res->scheme = xmlMemStrdup(bas->scheme);
2027 
2028     if (ref->query_raw != NULL)
2029 	res->query_raw = xmlMemStrdup(ref->query_raw);
2030     else if (ref->query != NULL)
2031 	res->query = xmlMemStrdup(ref->query);
2032     if (ref->fragment != NULL)
2033 	res->fragment = xmlMemStrdup(ref->fragment);
2034 
2035     /*
2036      * 4) If the authority component is defined, then the reference is a
2037      *    network-path and we skip to step 7.  Otherwise, the reference
2038      *    URI's authority is inherited from the base URI's authority
2039      *    component, which will also be undefined if the URI scheme does not
2040      *    use an authority component.
2041      */
2042     if ((ref->authority != NULL) || (ref->server != NULL)) {
2043 	if (ref->authority != NULL)
2044 	    res->authority = xmlMemStrdup(ref->authority);
2045 	else {
2046 	    res->server = xmlMemStrdup(ref->server);
2047 	    if (ref->user != NULL)
2048 		res->user = xmlMemStrdup(ref->user);
2049             res->port = ref->port;
2050 	}
2051 	if (ref->path != NULL)
2052 	    res->path = xmlMemStrdup(ref->path);
2053 	goto step_7;
2054     }
2055     if (bas->authority != NULL)
2056 	res->authority = xmlMemStrdup(bas->authority);
2057     else if ((bas->server != NULL) || (bas->port == -1)) {
2058 	if (bas->server != NULL)
2059 	    res->server = xmlMemStrdup(bas->server);
2060 	if (bas->user != NULL)
2061 	    res->user = xmlMemStrdup(bas->user);
2062 	res->port = bas->port;
2063     }
2064 
2065     /*
2066      * 5) If the path component begins with a slash character ("/"), then
2067      *    the reference is an absolute-path and we skip to step 7.
2068      */
2069     if ((ref->path != NULL) && (ref->path[0] == '/')) {
2070 	res->path = xmlMemStrdup(ref->path);
2071 	goto step_7;
2072     }
2073 
2074 
2075     /*
2076      * 6) If this step is reached, then we are resolving a relative-path
2077      *    reference.  The relative path needs to be merged with the base
2078      *    URI's path.  Although there are many ways to do this, we will
2079      *    describe a simple method using a separate string buffer.
2080      *
2081      * Allocate a buffer large enough for the result string.
2082      */
2083     len = 2; /* extra / and 0 */
2084     if (ref->path != NULL)
2085 	len += strlen(ref->path);
2086     if (bas->path != NULL)
2087 	len += strlen(bas->path);
2088     res->path = (char *) xmlMallocAtomic(len);
2089     if (res->path == NULL) {
2090         xmlURIErrMemory("resolving URI against base\n");
2091 	goto done;
2092     }
2093     res->path[0] = 0;
2094 
2095     /*
2096      * a) All but the last segment of the base URI's path component is
2097      *    copied to the buffer.  In other words, any characters after the
2098      *    last (right-most) slash character, if any, are excluded.
2099      */
2100     cur = 0;
2101     out = 0;
2102     if (bas->path != NULL) {
2103 	while (bas->path[cur] != 0) {
2104 	    while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2105 		cur++;
2106 	    if (bas->path[cur] == 0)
2107 		break;
2108 
2109 	    cur++;
2110 	    while (out < cur) {
2111 		res->path[out] = bas->path[out];
2112 		out++;
2113 	    }
2114 	}
2115     }
2116     res->path[out] = 0;
2117 
2118     /*
2119      * b) The reference's path component is appended to the buffer
2120      *    string.
2121      */
2122     if (ref->path != NULL && ref->path[0] != 0) {
2123 	indx = 0;
2124 	/*
2125 	 * Ensure the path includes a '/'
2126 	 */
2127 	if ((out == 0) && (bas->server != NULL))
2128 	    res->path[out++] = '/';
2129 	while (ref->path[indx] != 0) {
2130 	    res->path[out++] = ref->path[indx++];
2131 	}
2132     }
2133     res->path[out] = 0;
2134 
2135     /*
2136      * Steps c) to h) are really path normalization steps
2137      */
2138     xmlNormalizeURIPath(res->path);
2139 
2140 step_7:
2141 
2142     /*
2143      * 7) The resulting URI components, including any inherited from the
2144      *    base URI, are recombined to give the absolute form of the URI
2145      *    reference.
2146      */
2147     val = xmlSaveUri(res);
2148 
2149 done:
2150     if (ref != NULL)
2151 	xmlFreeURI(ref);
2152     if (bas != NULL)
2153 	xmlFreeURI(bas);
2154     if (res != NULL)
2155 	xmlFreeURI(res);
2156     return(val);
2157 }
2158 
2159 /**
2160  * xmlBuildRelativeURI:
2161  * @URI:  the URI reference under consideration
2162  * @base:  the base value
2163  *
2164  * Expresses the URI of the reference in terms relative to the
2165  * base.  Some examples of this operation include:
2166  *     base = "http://site1.com/docs/book1.html"
2167  *        URI input                        URI returned
2168  *     docs/pic1.gif                    pic1.gif
2169  *     docs/img/pic1.gif                img/pic1.gif
2170  *     img/pic1.gif                     ../img/pic1.gif
2171  *     http://site1.com/docs/pic1.gif   pic1.gif
2172  *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
2173  *
2174  *     base = "docs/book1.html"
2175  *        URI input                        URI returned
2176  *     docs/pic1.gif                    pic1.gif
2177  *     docs/img/pic1.gif                img/pic1.gif
2178  *     img/pic1.gif                     ../img/pic1.gif
2179  *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
2180  *
2181  *
2182  * Note: if the URI reference is really wierd or complicated, it may be
2183  *       worthwhile to first convert it into a "nice" one by calling
2184  *       xmlBuildURI (using 'base') before calling this routine,
2185  *       since this routine (for reasonable efficiency) assumes URI has
2186  *       already been through some validation.
2187  *
2188  * Returns a new URI string (to be freed by the caller) or NULL in case
2189  * error.
2190  */
2191 xmlChar *
xmlBuildRelativeURI(const xmlChar * URI,const xmlChar * base)2192 xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2193 {
2194     xmlChar *val = NULL;
2195     int ret;
2196     int ix;
2197     int nbslash = 0;
2198     int len;
2199     xmlURIPtr ref = NULL;
2200     xmlURIPtr bas = NULL;
2201     xmlChar *bptr, *uptr, *vptr;
2202     int remove_path = 0;
2203 
2204     if ((URI == NULL) || (*URI == 0))
2205 	return NULL;
2206 
2207     /*
2208      * First parse URI into a standard form
2209      */
2210     ref = xmlCreateURI ();
2211     if (ref == NULL)
2212 	return NULL;
2213     /* If URI not already in "relative" form */
2214     if (URI[0] != '.') {
2215 	ret = xmlParseURIReference (ref, (const char *) URI);
2216 	if (ret != 0)
2217 	    goto done;		/* Error in URI, return NULL */
2218     } else
2219 	ref->path = (char *)xmlStrdup(URI);
2220 
2221     /*
2222      * Next parse base into the same standard form
2223      */
2224     if ((base == NULL) || (*base == 0)) {
2225 	val = xmlStrdup (URI);
2226 	goto done;
2227     }
2228     bas = xmlCreateURI ();
2229     if (bas == NULL)
2230 	goto done;
2231     if (base[0] != '.') {
2232 	ret = xmlParseURIReference (bas, (const char *) base);
2233 	if (ret != 0)
2234 	    goto done;		/* Error in base, return NULL */
2235     } else
2236 	bas->path = (char *)xmlStrdup(base);
2237 
2238     /*
2239      * If the scheme / server on the URI differs from the base,
2240      * just return the URI
2241      */
2242     if ((ref->scheme != NULL) &&
2243 	((bas->scheme == NULL) ||
2244 	 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2245 	 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2246 	val = xmlStrdup (URI);
2247 	goto done;
2248     }
2249     if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2250 	val = xmlStrdup(BAD_CAST "");
2251 	goto done;
2252     }
2253     if (bas->path == NULL) {
2254 	val = xmlStrdup((xmlChar *)ref->path);
2255 	goto done;
2256     }
2257     if (ref->path == NULL) {
2258         ref->path = (char *) "/";
2259 	remove_path = 1;
2260     }
2261 
2262     /*
2263      * At this point (at last!) we can compare the two paths
2264      *
2265      * First we take care of the special case where either of the
2266      * two path components may be missing (bug 316224)
2267      */
2268     bptr = (xmlChar *)bas->path;
2269     {
2270         xmlChar *rptr = (xmlChar *) ref->path;
2271         int pos = 0;
2272 
2273         /*
2274          * Next we compare the two strings and find where they first differ
2275          */
2276 	if ((*rptr == '.') && (rptr[1] == '/'))
2277             rptr += 2;
2278 	if ((*bptr == '.') && (bptr[1] == '/'))
2279             bptr += 2;
2280 	else if ((*bptr == '/') && (*rptr != '/'))
2281 	    bptr++;
2282 	while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))
2283 	    pos++;
2284 
2285 	if (bptr[pos] == rptr[pos]) {
2286 	    val = xmlStrdup(BAD_CAST "");
2287 	    goto done;		/* (I can't imagine why anyone would do this) */
2288 	}
2289 
2290 	/*
2291 	 * In URI, "back up" to the last '/' encountered.  This will be the
2292 	 * beginning of the "unique" suffix of URI
2293 	 */
2294 	ix = pos;
2295 	for (; ix > 0; ix--) {
2296 	    if (rptr[ix - 1] == '/')
2297 		break;
2298 	}
2299 	uptr = (xmlChar *)&rptr[ix];
2300 
2301 	/*
2302 	 * In base, count the number of '/' from the differing point
2303 	 */
2304 	for (; bptr[ix] != 0; ix++) {
2305 	    if (bptr[ix] == '/')
2306 		nbslash++;
2307 	}
2308 
2309 	/*
2310 	 * e.g: URI="foo/" base="foo/bar" -> "./"
2311 	 */
2312 	if (nbslash == 0 && !uptr[0]) {
2313 	    val = xmlStrdup(BAD_CAST "./");
2314 	    goto done;
2315 	}
2316 
2317 	len = xmlStrlen (uptr) + 1;
2318     }
2319 
2320     if (nbslash == 0) {
2321 	if (uptr != NULL)
2322 	    /* exception characters from xmlSaveUri */
2323 	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2324 	goto done;
2325     }
2326 
2327     /*
2328      * Allocate just enough space for the returned string -
2329      * length of the remainder of the URI, plus enough space
2330      * for the "../" groups, plus one for the terminator
2331      */
2332     val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2333     if (val == NULL) {
2334         xmlURIErrMemory("building relative URI\n");
2335 	goto done;
2336     }
2337     vptr = val;
2338     /*
2339      * Put in as many "../" as needed
2340      */
2341     for (; nbslash>0; nbslash--) {
2342 	*vptr++ = '.';
2343 	*vptr++ = '.';
2344 	*vptr++ = '/';
2345     }
2346     /*
2347      * Finish up with the end of the URI
2348      */
2349     if (uptr != NULL) {
2350         if ((vptr > val) && (len > 0) &&
2351 	    (uptr[0] == '/') && (vptr[-1] == '/')) {
2352 	    memcpy (vptr, uptr + 1, len - 1);
2353 	    vptr[len - 2] = 0;
2354 	} else {
2355 	    memcpy (vptr, uptr, len);
2356 	    vptr[len - 1] = 0;
2357 	}
2358     } else {
2359 	vptr[len - 1] = 0;
2360     }
2361 
2362     /* escape the freshly-built path */
2363     vptr = val;
2364 	/* exception characters from xmlSaveUri */
2365     val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2366     xmlFree(vptr);
2367 
2368 done:
2369     /*
2370      * Free the working variables
2371      */
2372     if (remove_path != 0)
2373         ref->path = NULL;
2374     if (ref != NULL)
2375 	xmlFreeURI (ref);
2376     if (bas != NULL)
2377 	xmlFreeURI (bas);
2378 
2379     return val;
2380 }
2381 
2382 /**
2383  * xmlCanonicPath:
2384  * @path:  the resource locator in a filesystem notation
2385  *
2386  * Constructs a canonic path from the specified path.
2387  *
2388  * Returns a new canonic path, or a duplicate of the path parameter if the
2389  * construction fails. The caller is responsible for freeing the memory occupied
2390  * by the returned string. If there is insufficient memory available, or the
2391  * argument is NULL, the function returns NULL.
2392  */
2393 #define IS_WINDOWS_PATH(p)					\
2394 	((p != NULL) &&						\
2395 	 (((p[0] >= 'a') && (p[0] <= 'z')) ||			\
2396 	  ((p[0] >= 'A') && (p[0] <= 'Z'))) &&			\
2397 	 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2398 xmlChar *
xmlCanonicPath(const xmlChar * path)2399 xmlCanonicPath(const xmlChar *path)
2400 {
2401 /*
2402  * For Windows implementations, additional work needs to be done to
2403  * replace backslashes in pathnames with "forward slashes"
2404  */
2405 #if defined(_WIN32) && !defined(__CYGWIN__)
2406     int len = 0;
2407     char *p = NULL;
2408 #endif
2409     xmlURIPtr uri;
2410     xmlChar *ret;
2411     const xmlChar *absuri;
2412 
2413     if (path == NULL)
2414 	return(NULL);
2415 
2416 #if defined(_WIN32)
2417     /*
2418      * We must not change the backslashes to slashes if the the path
2419      * starts with \\?\
2420      * Those paths can be up to 32k characters long.
2421      * Was added specifically for OpenOffice, those paths can't be converted
2422      * to URIs anyway.
2423      */
2424     if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2425         (path[3] == '\\') )
2426 	return xmlStrdup((const xmlChar *) path);
2427 #endif
2428 
2429 	/* sanitize filename starting with // so it can be used as URI */
2430     if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2431         path++;
2432 
2433     if ((uri = xmlParseURI((const char *) path)) != NULL) {
2434 	xmlFreeURI(uri);
2435 	return xmlStrdup(path);
2436     }
2437 
2438     /* Check if this is an "absolute uri" */
2439     absuri = xmlStrstr(path, BAD_CAST "://");
2440     if (absuri != NULL) {
2441         int l, j;
2442 	unsigned char c;
2443 	xmlChar *escURI;
2444 
2445         /*
2446 	 * this looks like an URI where some parts have not been
2447 	 * escaped leading to a parsing problem.  Check that the first
2448 	 * part matches a protocol.
2449 	 */
2450 	l = absuri - path;
2451 	/* Bypass if first part (part before the '://') is > 20 chars */
2452 	if ((l <= 0) || (l > 20))
2453 	    goto path_processing;
2454 	/* Bypass if any non-alpha characters are present in first part */
2455 	for (j = 0;j < l;j++) {
2456 	    c = path[j];
2457 	    if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2458 	        goto path_processing;
2459 	}
2460 
2461 	/* Escape all except the characters specified in the supplied path */
2462         escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2463 	if (escURI != NULL) {
2464 	    /* Try parsing the escaped path */
2465 	    uri = xmlParseURI((const char *) escURI);
2466 	    /* If successful, return the escaped string */
2467 	    if (uri != NULL) {
2468 	        xmlFreeURI(uri);
2469 		return escURI;
2470 	    }
2471             xmlFree(escURI);
2472 	}
2473     }
2474 
2475 path_processing:
2476 /* For Windows implementations, replace backslashes with 'forward slashes' */
2477 #if defined(_WIN32) && !defined(__CYGWIN__)
2478     /*
2479      * Create a URI structure
2480      */
2481     uri = xmlCreateURI();
2482     if (uri == NULL) {		/* Guard against 'out of memory' */
2483         return(NULL);
2484     }
2485 
2486     len = xmlStrlen(path);
2487     if ((len > 2) && IS_WINDOWS_PATH(path)) {
2488         /* make the scheme 'file' */
2489 	uri->scheme = (char *) xmlStrdup(BAD_CAST "file");
2490 	/* allocate space for leading '/' + path + string terminator */
2491 	uri->path = xmlMallocAtomic(len + 2);
2492 	if (uri->path == NULL) {
2493 	    xmlFreeURI(uri);	/* Guard agains 'out of memory' */
2494 	    return(NULL);
2495 	}
2496 	/* Put in leading '/' plus path */
2497 	uri->path[0] = '/';
2498 	p = uri->path + 1;
2499 	strncpy(p, (char *) path, len + 1);
2500     } else {
2501 	uri->path = (char *) xmlStrdup(path);
2502 	if (uri->path == NULL) {
2503 	    xmlFreeURI(uri);
2504 	    return(NULL);
2505 	}
2506 	p = uri->path;
2507     }
2508     /* Now change all occurences of '\' to '/' */
2509     while (*p != '\0') {
2510 	if (*p == '\\')
2511 	    *p = '/';
2512 	p++;
2513     }
2514 
2515     if (uri->scheme == NULL) {
2516 	ret = xmlStrdup((const xmlChar *) uri->path);
2517     } else {
2518 	ret = xmlSaveUri(uri);
2519     }
2520 
2521     xmlFreeURI(uri);
2522 #else
2523     ret = xmlStrdup((const xmlChar *) path);
2524 #endif
2525     return(ret);
2526 }
2527 
2528 /**
2529  * xmlPathToURI:
2530  * @path:  the resource locator in a filesystem notation
2531  *
2532  * Constructs an URI expressing the existing path
2533  *
2534  * Returns a new URI, or a duplicate of the path parameter if the
2535  * construction fails. The caller is responsible for freeing the memory
2536  * occupied by the returned string. If there is insufficient memory available,
2537  * or the argument is NULL, the function returns NULL.
2538  */
2539 xmlChar *
xmlPathToURI(const xmlChar * path)2540 xmlPathToURI(const xmlChar *path)
2541 {
2542     xmlURIPtr uri;
2543     xmlURI temp;
2544     xmlChar *ret, *cal;
2545 
2546     if (path == NULL)
2547         return(NULL);
2548 
2549     if ((uri = xmlParseURI((const char *) path)) != NULL) {
2550 	xmlFreeURI(uri);
2551 	return xmlStrdup(path);
2552     }
2553     cal = xmlCanonicPath(path);
2554     if (cal == NULL)
2555         return(NULL);
2556 #if defined(_WIN32) && !defined(__CYGWIN__)
2557     /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2558        If 'cal' is a valid URI allready then we are done here, as continuing would make
2559        it invalid. */
2560     if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2561 	xmlFreeURI(uri);
2562 	return cal;
2563     }
2564     /* 'cal' can contain a relative path with backslashes. If that is processed
2565        by xmlSaveURI, they will be escaped and the external entity loader machinery
2566        will fail. So convert them to slashes. Misuse 'ret' for walking. */
2567     ret = cal;
2568     while (*ret != '\0') {
2569 	if (*ret == '\\')
2570 	    *ret = '/';
2571 	ret++;
2572     }
2573 #endif
2574     memset(&temp, 0, sizeof(temp));
2575     temp.path = (char *) cal;
2576     ret = xmlSaveUri(&temp);
2577     xmlFree(cal);
2578     return(ret);
2579 }
2580 #define bottom_uri
2581 #include "elfgcchack.h"
2582