xref: /reactos/sdk/lib/3rdparty/libxml2/uri.c (revision 139a3d66)
1 /**
2  * uri.c: set of generic URI related routines
3  *
4  * Reference: RFCs 3986, 2732 and 2373
5  *
6  * See Copyright for the status of this software.
7  *
8  * daniel@veillard.com
9  */
10 
11 #define IN_LIBXML
12 #include "libxml.h"
13 
14 #include <string.h>
15 
16 #include <libxml/xmlmemory.h>
17 #include <libxml/uri.h>
18 #include <libxml/globals.h>
19 #include <libxml/xmlerror.h>
20 
21 /**
22  * MAX_URI_LENGTH:
23  *
24  * The definition of the URI regexp in the above RFC has no size limit
25  * In practice they are usually relatively short except for the
26  * data URI scheme as defined in RFC 2397. Even for data URI the usual
27  * maximum size before hitting random practical limits is around 64 KB
28  * and 4KB is usually a maximum admitted limit for proper operations.
29  * The value below is more a security limit than anything else and
30  * really should never be hit by 'normal' operations
31  * Set to 1 MByte in 2012, this is only enforced on output
32  */
33 #define MAX_URI_LENGTH 1024 * 1024
34 
35 static void
36 xmlURIErrMemory(const char *extra)
37 {
38     if (extra)
39         __xmlRaiseError(NULL, NULL, NULL,
40                         NULL, NULL, XML_FROM_URI,
41                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
42                         extra, NULL, NULL, 0, 0,
43                         "Memory allocation failed : %s\n", extra);
44     else
45         __xmlRaiseError(NULL, NULL, NULL,
46                         NULL, NULL, XML_FROM_URI,
47                         XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
48                         NULL, NULL, NULL, 0, 0,
49                         "Memory allocation failed\n");
50 }
51 
52 static void xmlCleanURI(xmlURIPtr uri);
53 
54 /*
55  * Old rule from 2396 used in legacy handling code
56  * alpha    = lowalpha | upalpha
57  */
58 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
59 
60 
61 /*
62  * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
63  *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
64  *            "u" | "v" | "w" | "x" | "y" | "z"
65  */
66 
67 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
68 
69 /*
70  * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
71  *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
72  *           "U" | "V" | "W" | "X" | "Y" | "Z"
73  */
74 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
75 
76 #ifdef IS_DIGIT
77 #undef IS_DIGIT
78 #endif
79 /*
80  * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
81  */
82 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
83 
84 /*
85  * alphanum = alpha | digit
86  */
87 
88 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
89 
90 /*
91  * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
92  */
93 
94 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
95     ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
96     ((x) == '(') || ((x) == ')'))
97 
98 /*
99  * unwise = "{" | "}" | "|" | "\" | "^" | "`"
100  */
101 
102 #define IS_UNWISE(p)                                                    \
103       (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
104        ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
105        ((*(p) == ']')) || ((*(p) == '`')))
106 /*
107  * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
108  *            "[" | "]"
109  */
110 
111 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
112         ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
113         ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
114         ((x) == ']'))
115 
116 /*
117  * unreserved = alphanum | mark
118  */
119 
120 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
121 
122 /*
123  * Skip to next pointer char, handle escaped sequences
124  */
125 
126 #define NEXT(p) ((*p == '%')? p += 3 : p++)
127 
128 /*
129  * Productions from the spec.
130  *
131  *    authority     = server | reg_name
132  *    reg_name      = 1*( unreserved | escaped | "$" | "," |
133  *                        ";" | ":" | "@" | "&" | "=" | "+" )
134  *
135  * path          = [ abs_path | opaque_part ]
136  */
137 
138 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
139 
140 /************************************************************************
141  *									*
142  *                         RFC 3986 parser				*
143  *									*
144  ************************************************************************/
145 
146 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
147 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||		\
148                       ((*(p) >= 'A') && (*(p) <= 'Z')))
149 #define ISA_HEXDIG(p)							\
150        (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||		\
151         ((*(p) >= 'A') && (*(p) <= 'F')))
152 
153 /*
154  *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
155  *                     / "*" / "+" / "," / ";" / "="
156  */
157 #define ISA_SUB_DELIM(p)						\
158       (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||		\
159        ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||		\
160        ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||		\
161        ((*(p) == '=')) || ((*(p) == '\'')))
162 
163 /*
164  *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
165  */
166 #define ISA_GEN_DELIM(p)						\
167       (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
168        ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
169        ((*(p) == '@')))
170 
171 /*
172  *    reserved      = gen-delims / sub-delims
173  */
174 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
175 
176 /*
177  *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
178  */
179 #define ISA_UNRESERVED(p)						\
180       ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||		\
181        ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
182 
183 /*
184  *    pct-encoded   = "%" HEXDIG HEXDIG
185  */
186 #define ISA_PCT_ENCODED(p)						\
187      ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
188 
189 /*
190  *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
191  */
192 #define ISA_PCHAR(p)							\
193      (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||	\
194       ((*(p) == ':')) || ((*(p) == '@')))
195 
196 /**
197  * xmlParse3986Scheme:
198  * @uri:  pointer to an URI structure
199  * @str:  pointer to the string to analyze
200  *
201  * Parse an URI scheme
202  *
203  * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
204  *
205  * Returns 0 or the error code
206  */
207 static int
208 xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
209     const char *cur;
210 
211     if (str == NULL)
212 	return(-1);
213 
214     cur = *str;
215     if (!ISA_ALPHA(cur))
216 	return(2);
217     cur++;
218     while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
219            (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
220     if (uri != NULL) {
221 	if (uri->scheme != NULL) xmlFree(uri->scheme);
222 	uri->scheme = STRNDUP(*str, cur - *str);
223     }
224     *str = cur;
225     return(0);
226 }
227 
228 /**
229  * xmlParse3986Fragment:
230  * @uri:  pointer to an URI structure
231  * @str:  pointer to the string to analyze
232  *
233  * Parse the query part of an URI
234  *
235  * fragment      = *( pchar / "/" / "?" )
236  * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
237  *       in the fragment identifier but this is used very broadly for
238  *       xpointer scheme selection, so we are allowing it here to not break
239  *       for example all the DocBook processing chains.
240  *
241  * Returns 0 or the error code
242  */
243 static int
244 xmlParse3986Fragment(xmlURIPtr uri, const char **str)
245 {
246     const char *cur;
247 
248     if (str == NULL)
249         return (-1);
250 
251     cur = *str;
252 
253     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
254            (*cur == '[') || (*cur == ']') ||
255            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
256         NEXT(cur);
257     if (uri != NULL) {
258         if (uri->fragment != NULL)
259             xmlFree(uri->fragment);
260 	if (uri->cleanup & 2)
261 	    uri->fragment = STRNDUP(*str, cur - *str);
262 	else
263 	    uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
264     }
265     *str = cur;
266     return (0);
267 }
268 
269 /**
270  * xmlParse3986Query:
271  * @uri:  pointer to an URI structure
272  * @str:  pointer to the string to analyze
273  *
274  * Parse the query part of an URI
275  *
276  * query = *uric
277  *
278  * Returns 0 or the error code
279  */
280 static int
281 xmlParse3986Query(xmlURIPtr uri, const char **str)
282 {
283     const char *cur;
284 
285     if (str == NULL)
286         return (-1);
287 
288     cur = *str;
289 
290     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
291            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
292         NEXT(cur);
293     if (uri != NULL) {
294         if (uri->query != NULL)
295             xmlFree(uri->query);
296 	if (uri->cleanup & 2)
297 	    uri->query = STRNDUP(*str, cur - *str);
298 	else
299 	    uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
300 
301 	/* Save the raw bytes of the query as well.
302 	 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
303 	 */
304 	if (uri->query_raw != NULL)
305 	    xmlFree (uri->query_raw);
306 	uri->query_raw = STRNDUP (*str, cur - *str);
307     }
308     *str = cur;
309     return (0);
310 }
311 
312 /**
313  * xmlParse3986Port:
314  * @uri:  pointer to an URI structure
315  * @str:  the string to analyze
316  *
317  * Parse a port part and fills in the appropriate fields
318  * of the @uri structure
319  *
320  * port          = *DIGIT
321  *
322  * Returns 0 or the error code
323  */
324 static int
325 xmlParse3986Port(xmlURIPtr uri, const char **str)
326 {
327     const char *cur = *str;
328     int port = 0;
329 
330     if (ISA_DIGIT(cur)) {
331 	while (ISA_DIGIT(cur)) {
332 	    port = port * 10 + (*cur - '0');
333             if (port > 99999999)
334                 port = 99999999;
335 
336 	    cur++;
337 	}
338 	if (uri != NULL)
339 	    uri->port = port;
340 	*str = cur;
341 	return(0);
342     }
343     return(1);
344 }
345 
346 /**
347  * xmlParse3986Userinfo:
348  * @uri:  pointer to an URI structure
349  * @str:  the string to analyze
350  *
351  * Parse an user informations part and fills in the appropriate fields
352  * of the @uri structure
353  *
354  * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
355  *
356  * Returns 0 or the error code
357  */
358 static int
359 xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
360 {
361     const char *cur;
362 
363     cur = *str;
364     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
365            ISA_SUB_DELIM(cur) || (*cur == ':'))
366 	NEXT(cur);
367     if (*cur == '@') {
368 	if (uri != NULL) {
369 	    if (uri->user != NULL) xmlFree(uri->user);
370 	    if (uri->cleanup & 2)
371 		uri->user = STRNDUP(*str, cur - *str);
372 	    else
373 		uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
374 	}
375 	*str = cur;
376 	return(0);
377     }
378     return(1);
379 }
380 
381 /**
382  * xmlParse3986DecOctet:
383  * @str:  the string to analyze
384  *
385  *    dec-octet     = DIGIT                 ; 0-9
386  *                  / %x31-39 DIGIT         ; 10-99
387  *                  / "1" 2DIGIT            ; 100-199
388  *                  / "2" %x30-34 DIGIT     ; 200-249
389  *                  / "25" %x30-35          ; 250-255
390  *
391  * Skip a dec-octet.
392  *
393  * Returns 0 if found and skipped, 1 otherwise
394  */
395 static int
396 xmlParse3986DecOctet(const char **str) {
397     const char *cur = *str;
398 
399     if (!(ISA_DIGIT(cur)))
400         return(1);
401     if (!ISA_DIGIT(cur+1))
402 	cur++;
403     else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
404 	cur += 2;
405     else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
406 	cur += 3;
407     else if ((*cur == '2') && (*(cur + 1) >= '0') &&
408 	     (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
409 	cur += 3;
410     else if ((*cur == '2') && (*(cur + 1) == '5') &&
411 	     (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
412 	cur += 3;
413     else
414         return(1);
415     *str = cur;
416     return(0);
417 }
418 /**
419  * xmlParse3986Host:
420  * @uri:  pointer to an URI structure
421  * @str:  the string to analyze
422  *
423  * Parse an host part and fills in the appropriate fields
424  * of the @uri structure
425  *
426  * host          = IP-literal / IPv4address / reg-name
427  * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
428  * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
429  * reg-name      = *( unreserved / pct-encoded / sub-delims )
430  *
431  * Returns 0 or the error code
432  */
433 static int
434 xmlParse3986Host(xmlURIPtr uri, const char **str)
435 {
436     const char *cur = *str;
437     const char *host;
438 
439     host = cur;
440     /*
441      * IPv6 and future addressing scheme are enclosed between brackets
442      */
443     if (*cur == '[') {
444         cur++;
445 	while ((*cur != ']') && (*cur != 0))
446 	    cur++;
447 	if (*cur != ']')
448 	    return(1);
449 	cur++;
450 	goto found;
451     }
452     /*
453      * try to parse an IPv4
454      */
455     if (ISA_DIGIT(cur)) {
456         if (xmlParse3986DecOctet(&cur) != 0)
457 	    goto not_ipv4;
458 	if (*cur != '.')
459 	    goto not_ipv4;
460 	cur++;
461         if (xmlParse3986DecOctet(&cur) != 0)
462 	    goto not_ipv4;
463 	if (*cur != '.')
464 	    goto not_ipv4;
465         if (xmlParse3986DecOctet(&cur) != 0)
466 	    goto not_ipv4;
467 	if (*cur != '.')
468 	    goto not_ipv4;
469         if (xmlParse3986DecOctet(&cur) != 0)
470 	    goto not_ipv4;
471 	goto found;
472 not_ipv4:
473         cur = *str;
474     }
475     /*
476      * then this should be a hostname which can be empty
477      */
478     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
479         NEXT(cur);
480 found:
481     if (uri != NULL) {
482 	if (uri->authority != NULL) xmlFree(uri->authority);
483 	uri->authority = NULL;
484 	if (uri->server != NULL) xmlFree(uri->server);
485 	if (cur != host) {
486 	    if (uri->cleanup & 2)
487 		uri->server = STRNDUP(host, cur - host);
488 	    else
489 		uri->server = xmlURIUnescapeString(host, cur - host, NULL);
490 	} else
491 	    uri->server = NULL;
492     }
493     *str = cur;
494     return(0);
495 }
496 
497 /**
498  * xmlParse3986Authority:
499  * @uri:  pointer to an URI structure
500  * @str:  the string to analyze
501  *
502  * Parse an authority part and fills in the appropriate fields
503  * of the @uri structure
504  *
505  * authority     = [ userinfo "@" ] host [ ":" port ]
506  *
507  * Returns 0 or the error code
508  */
509 static int
510 xmlParse3986Authority(xmlURIPtr uri, const char **str)
511 {
512     const char *cur;
513     int ret;
514 
515     cur = *str;
516     /*
517      * try to parse an userinfo and check for the trailing @
518      */
519     ret = xmlParse3986Userinfo(uri, &cur);
520     if ((ret != 0) || (*cur != '@'))
521         cur = *str;
522     else
523         cur++;
524     ret = xmlParse3986Host(uri, &cur);
525     if (ret != 0) return(ret);
526     if (*cur == ':') {
527         cur++;
528         ret = xmlParse3986Port(uri, &cur);
529 	if (ret != 0) return(ret);
530     }
531     *str = cur;
532     return(0);
533 }
534 
535 /**
536  * xmlParse3986Segment:
537  * @str:  the string to analyze
538  * @forbid: an optional forbidden character
539  * @empty: allow an empty segment
540  *
541  * Parse a segment and fills in the appropriate fields
542  * of the @uri structure
543  *
544  * segment       = *pchar
545  * segment-nz    = 1*pchar
546  * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
547  *               ; non-zero-length segment without any colon ":"
548  *
549  * Returns 0 or the error code
550  */
551 static int
552 xmlParse3986Segment(const char **str, char forbid, int empty)
553 {
554     const char *cur;
555 
556     cur = *str;
557     if (!ISA_PCHAR(cur)) {
558         if (empty)
559 	    return(0);
560 	return(1);
561     }
562     while (ISA_PCHAR(cur) && (*cur != forbid))
563         NEXT(cur);
564     *str = cur;
565     return (0);
566 }
567 
568 /**
569  * xmlParse3986PathAbEmpty:
570  * @uri:  pointer to an URI structure
571  * @str:  the string to analyze
572  *
573  * Parse an path absolute or empty and fills in the appropriate fields
574  * of the @uri structure
575  *
576  * path-abempty  = *( "/" segment )
577  *
578  * Returns 0 or the error code
579  */
580 static int
581 xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
582 {
583     const char *cur;
584     int ret;
585 
586     cur = *str;
587 
588     while (*cur == '/') {
589         cur++;
590 	ret = xmlParse3986Segment(&cur, 0, 1);
591 	if (ret != 0) return(ret);
592     }
593     if (uri != NULL) {
594 	if (uri->path != NULL) xmlFree(uri->path);
595         if (*str != cur) {
596             if (uri->cleanup & 2)
597                 uri->path = STRNDUP(*str, cur - *str);
598             else
599                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
600         } else {
601             uri->path = NULL;
602         }
603     }
604     *str = cur;
605     return (0);
606 }
607 
608 /**
609  * xmlParse3986PathAbsolute:
610  * @uri:  pointer to an URI structure
611  * @str:  the string to analyze
612  *
613  * Parse an path absolute and fills in the appropriate fields
614  * of the @uri structure
615  *
616  * path-absolute = "/" [ segment-nz *( "/" segment ) ]
617  *
618  * Returns 0 or the error code
619  */
620 static int
621 xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
622 {
623     const char *cur;
624     int ret;
625 
626     cur = *str;
627 
628     if (*cur != '/')
629         return(1);
630     cur++;
631     ret = xmlParse3986Segment(&cur, 0, 0);
632     if (ret == 0) {
633 	while (*cur == '/') {
634 	    cur++;
635 	    ret = xmlParse3986Segment(&cur, 0, 1);
636 	    if (ret != 0) return(ret);
637 	}
638     }
639     if (uri != NULL) {
640 	if (uri->path != NULL) xmlFree(uri->path);
641         if (cur != *str) {
642             if (uri->cleanup & 2)
643                 uri->path = STRNDUP(*str, cur - *str);
644             else
645                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
646         } else {
647             uri->path = NULL;
648         }
649     }
650     *str = cur;
651     return (0);
652 }
653 
654 /**
655  * xmlParse3986PathRootless:
656  * @uri:  pointer to an URI structure
657  * @str:  the string to analyze
658  *
659  * Parse an path without root and fills in the appropriate fields
660  * of the @uri structure
661  *
662  * path-rootless = segment-nz *( "/" segment )
663  *
664  * Returns 0 or the error code
665  */
666 static int
667 xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
668 {
669     const char *cur;
670     int ret;
671 
672     cur = *str;
673 
674     ret = xmlParse3986Segment(&cur, 0, 0);
675     if (ret != 0) return(ret);
676     while (*cur == '/') {
677         cur++;
678 	ret = xmlParse3986Segment(&cur, 0, 1);
679 	if (ret != 0) return(ret);
680     }
681     if (uri != NULL) {
682 	if (uri->path != NULL) xmlFree(uri->path);
683         if (cur != *str) {
684             if (uri->cleanup & 2)
685                 uri->path = STRNDUP(*str, cur - *str);
686             else
687                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
688         } else {
689             uri->path = NULL;
690         }
691     }
692     *str = cur;
693     return (0);
694 }
695 
696 /**
697  * xmlParse3986PathNoScheme:
698  * @uri:  pointer to an URI structure
699  * @str:  the string to analyze
700  *
701  * Parse an path which is not a scheme and fills in the appropriate fields
702  * of the @uri structure
703  *
704  * path-noscheme = segment-nz-nc *( "/" segment )
705  *
706  * Returns 0 or the error code
707  */
708 static int
709 xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
710 {
711     const char *cur;
712     int ret;
713 
714     cur = *str;
715 
716     ret = xmlParse3986Segment(&cur, ':', 0);
717     if (ret != 0) return(ret);
718     while (*cur == '/') {
719         cur++;
720 	ret = xmlParse3986Segment(&cur, 0, 1);
721 	if (ret != 0) return(ret);
722     }
723     if (uri != NULL) {
724 	if (uri->path != NULL) xmlFree(uri->path);
725         if (cur != *str) {
726             if (uri->cleanup & 2)
727                 uri->path = STRNDUP(*str, cur - *str);
728             else
729                 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
730         } else {
731             uri->path = NULL;
732         }
733     }
734     *str = cur;
735     return (0);
736 }
737 
738 /**
739  * xmlParse3986HierPart:
740  * @uri:  pointer to an URI structure
741  * @str:  the string to analyze
742  *
743  * Parse an hierarchical part and fills in the appropriate fields
744  * of the @uri structure
745  *
746  * hier-part     = "//" authority path-abempty
747  *                / path-absolute
748  *                / path-rootless
749  *                / path-empty
750  *
751  * Returns 0 or the error code
752  */
753 static int
754 xmlParse3986HierPart(xmlURIPtr uri, const char **str)
755 {
756     const char *cur;
757     int ret;
758 
759     cur = *str;
760 
761     if ((*cur == '/') && (*(cur + 1) == '/')) {
762         cur += 2;
763 	ret = xmlParse3986Authority(uri, &cur);
764 	if (ret != 0) return(ret);
765 	if (uri->server == NULL)
766 	    uri->port = -1;
767 	ret = xmlParse3986PathAbEmpty(uri, &cur);
768 	if (ret != 0) return(ret);
769 	*str = cur;
770 	return(0);
771     } else if (*cur == '/') {
772         ret = xmlParse3986PathAbsolute(uri, &cur);
773 	if (ret != 0) return(ret);
774     } else if (ISA_PCHAR(cur)) {
775         ret = xmlParse3986PathRootless(uri, &cur);
776 	if (ret != 0) return(ret);
777     } else {
778 	/* path-empty is effectively empty */
779 	if (uri != NULL) {
780 	    if (uri->path != NULL) xmlFree(uri->path);
781 	    uri->path = NULL;
782 	}
783     }
784     *str = cur;
785     return (0);
786 }
787 
788 /**
789  * xmlParse3986RelativeRef:
790  * @uri:  pointer to an URI structure
791  * @str:  the string to analyze
792  *
793  * Parse an URI string and fills in the appropriate fields
794  * of the @uri structure
795  *
796  * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
797  * relative-part = "//" authority path-abempty
798  *               / path-absolute
799  *               / path-noscheme
800  *               / path-empty
801  *
802  * Returns 0 or the error code
803  */
804 static int
805 xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
806     int ret;
807 
808     if ((*str == '/') && (*(str + 1) == '/')) {
809         str += 2;
810 	ret = xmlParse3986Authority(uri, &str);
811 	if (ret != 0) return(ret);
812 	ret = xmlParse3986PathAbEmpty(uri, &str);
813 	if (ret != 0) return(ret);
814     } else if (*str == '/') {
815 	ret = xmlParse3986PathAbsolute(uri, &str);
816 	if (ret != 0) return(ret);
817     } else if (ISA_PCHAR(str)) {
818         ret = xmlParse3986PathNoScheme(uri, &str);
819 	if (ret != 0) return(ret);
820     } else {
821 	/* path-empty is effectively empty */
822 	if (uri != NULL) {
823 	    if (uri->path != NULL) xmlFree(uri->path);
824 	    uri->path = NULL;
825 	}
826     }
827 
828     if (*str == '?') {
829 	str++;
830 	ret = xmlParse3986Query(uri, &str);
831 	if (ret != 0) return(ret);
832     }
833     if (*str == '#') {
834 	str++;
835 	ret = xmlParse3986Fragment(uri, &str);
836 	if (ret != 0) return(ret);
837     }
838     if (*str != 0) {
839 	xmlCleanURI(uri);
840 	return(1);
841     }
842     return(0);
843 }
844 
845 
846 /**
847  * xmlParse3986URI:
848  * @uri:  pointer to an URI structure
849  * @str:  the string to analyze
850  *
851  * Parse an URI string and fills in the appropriate fields
852  * of the @uri structure
853  *
854  * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
855  *
856  * Returns 0 or the error code
857  */
858 static int
859 xmlParse3986URI(xmlURIPtr uri, const char *str) {
860     int ret;
861 
862     ret = xmlParse3986Scheme(uri, &str);
863     if (ret != 0) return(ret);
864     if (*str != ':') {
865 	return(1);
866     }
867     str++;
868     ret = xmlParse3986HierPart(uri, &str);
869     if (ret != 0) return(ret);
870     if (*str == '?') {
871 	str++;
872 	ret = xmlParse3986Query(uri, &str);
873 	if (ret != 0) return(ret);
874     }
875     if (*str == '#') {
876 	str++;
877 	ret = xmlParse3986Fragment(uri, &str);
878 	if (ret != 0) return(ret);
879     }
880     if (*str != 0) {
881 	xmlCleanURI(uri);
882 	return(1);
883     }
884     return(0);
885 }
886 
887 /**
888  * xmlParse3986URIReference:
889  * @uri:  pointer to an URI structure
890  * @str:  the string to analyze
891  *
892  * Parse an URI reference string and fills in the appropriate fields
893  * of the @uri structure
894  *
895  * URI-reference = URI / relative-ref
896  *
897  * Returns 0 or the error code
898  */
899 static int
900 xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
901     int ret;
902 
903     if (str == NULL)
904 	return(-1);
905     xmlCleanURI(uri);
906 
907     /*
908      * Try first to parse absolute refs, then fallback to relative if
909      * it fails.
910      */
911     ret = xmlParse3986URI(uri, str);
912     if (ret != 0) {
913 	xmlCleanURI(uri);
914         ret = xmlParse3986RelativeRef(uri, str);
915 	if (ret != 0) {
916 	    xmlCleanURI(uri);
917 	    return(ret);
918 	}
919     }
920     return(0);
921 }
922 
923 /**
924  * xmlParseURI:
925  * @str:  the URI string to analyze
926  *
927  * Parse an URI based on RFC 3986
928  *
929  * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
930  *
931  * Returns a newly built xmlURIPtr or NULL in case of error
932  */
933 xmlURIPtr
934 xmlParseURI(const char *str) {
935     xmlURIPtr uri;
936     int ret;
937 
938     if (str == NULL)
939 	return(NULL);
940     uri = xmlCreateURI();
941     if (uri != NULL) {
942 	ret = xmlParse3986URIReference(uri, str);
943         if (ret) {
944 	    xmlFreeURI(uri);
945 	    return(NULL);
946 	}
947     }
948     return(uri);
949 }
950 
951 /**
952  * xmlParseURIReference:
953  * @uri:  pointer to an URI structure
954  * @str:  the string to analyze
955  *
956  * Parse an URI reference string based on RFC 3986 and fills in the
957  * appropriate fields of the @uri structure
958  *
959  * URI-reference = URI / relative-ref
960  *
961  * Returns 0 or the error code
962  */
963 int
964 xmlParseURIReference(xmlURIPtr uri, const char *str) {
965     return(xmlParse3986URIReference(uri, str));
966 }
967 
968 /**
969  * xmlParseURIRaw:
970  * @str:  the URI string to analyze
971  * @raw:  if 1 unescaping of URI pieces are disabled
972  *
973  * Parse an URI but allows to keep intact the original fragments.
974  *
975  * URI-reference = URI / relative-ref
976  *
977  * Returns a newly built xmlURIPtr or NULL in case of error
978  */
979 xmlURIPtr
980 xmlParseURIRaw(const char *str, int raw) {
981     xmlURIPtr uri;
982     int ret;
983 
984     if (str == NULL)
985 	return(NULL);
986     uri = xmlCreateURI();
987     if (uri != NULL) {
988         if (raw) {
989 	    uri->cleanup |= 2;
990 	}
991 	ret = xmlParseURIReference(uri, str);
992         if (ret) {
993 	    xmlFreeURI(uri);
994 	    return(NULL);
995 	}
996     }
997     return(uri);
998 }
999 
1000 /************************************************************************
1001  *									*
1002  *			Generic URI structure functions			*
1003  *									*
1004  ************************************************************************/
1005 
1006 /**
1007  * xmlCreateURI:
1008  *
1009  * Simply creates an empty xmlURI
1010  *
1011  * Returns the new structure or NULL in case of error
1012  */
1013 xmlURIPtr
1014 xmlCreateURI(void) {
1015     xmlURIPtr ret;
1016 
1017     ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1018     if (ret == NULL) {
1019         xmlURIErrMemory("creating URI structure\n");
1020 	return(NULL);
1021     }
1022     memset(ret, 0, sizeof(xmlURI));
1023     return(ret);
1024 }
1025 
1026 /**
1027  * xmlSaveUriRealloc:
1028  *
1029  * Function to handle properly a reallocation when saving an URI
1030  * Also imposes some limit on the length of an URI string output
1031  */
1032 static xmlChar *
1033 xmlSaveUriRealloc(xmlChar *ret, int *max) {
1034     xmlChar *temp;
1035     int tmp;
1036 
1037     if (*max > MAX_URI_LENGTH) {
1038         xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1039         return(NULL);
1040     }
1041     tmp = *max * 2;
1042     temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1043     if (temp == NULL) {
1044         xmlURIErrMemory("saving URI\n");
1045         return(NULL);
1046     }
1047     *max = tmp;
1048     return(temp);
1049 }
1050 
1051 /**
1052  * xmlSaveUri:
1053  * @uri:  pointer to an xmlURI
1054  *
1055  * Save the URI as an escaped string
1056  *
1057  * Returns a new string (to be deallocated by caller)
1058  */
1059 xmlChar *
1060 xmlSaveUri(xmlURIPtr uri) {
1061     xmlChar *ret = NULL;
1062     xmlChar *temp;
1063     const char *p;
1064     int len;
1065     int max;
1066 
1067     if (uri == NULL) return(NULL);
1068 
1069 
1070     max = 80;
1071     ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
1072     if (ret == NULL) {
1073         xmlURIErrMemory("saving URI\n");
1074 	return(NULL);
1075     }
1076     len = 0;
1077 
1078     if (uri->scheme != NULL) {
1079 	p = uri->scheme;
1080 	while (*p != 0) {
1081 	    if (len >= max) {
1082                 temp = xmlSaveUriRealloc(ret, &max);
1083                 if (temp == NULL) goto mem_error;
1084 		ret = temp;
1085 	    }
1086 	    ret[len++] = *p++;
1087 	}
1088 	if (len >= max) {
1089             temp = xmlSaveUriRealloc(ret, &max);
1090             if (temp == NULL) goto mem_error;
1091             ret = temp;
1092 	}
1093 	ret[len++] = ':';
1094     }
1095     if (uri->opaque != NULL) {
1096 	p = uri->opaque;
1097 	while (*p != 0) {
1098 	    if (len + 3 >= max) {
1099                 temp = xmlSaveUriRealloc(ret, &max);
1100                 if (temp == NULL) goto mem_error;
1101                 ret = temp;
1102 	    }
1103 	    if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1104 		ret[len++] = *p++;
1105 	    else {
1106 		int val = *(unsigned char *)p++;
1107 		int hi = val / 0x10, lo = val % 0x10;
1108 		ret[len++] = '%';
1109 		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1110 		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1111 	    }
1112 	}
1113     } else {
1114 	if ((uri->server != NULL) || (uri->port == -1)) {
1115 	    if (len + 3 >= max) {
1116                 temp = xmlSaveUriRealloc(ret, &max);
1117                 if (temp == NULL) goto mem_error;
1118                 ret = temp;
1119 	    }
1120 	    ret[len++] = '/';
1121 	    ret[len++] = '/';
1122 	    if (uri->user != NULL) {
1123 		p = uri->user;
1124 		while (*p != 0) {
1125 		    if (len + 3 >= max) {
1126                         temp = xmlSaveUriRealloc(ret, &max);
1127                         if (temp == NULL) goto mem_error;
1128                         ret = temp;
1129 		    }
1130 		    if ((IS_UNRESERVED(*(p))) ||
1131 			((*(p) == ';')) || ((*(p) == ':')) ||
1132 			((*(p) == '&')) || ((*(p) == '=')) ||
1133 			((*(p) == '+')) || ((*(p) == '$')) ||
1134 			((*(p) == ',')))
1135 			ret[len++] = *p++;
1136 		    else {
1137 			int val = *(unsigned char *)p++;
1138 			int hi = val / 0x10, lo = val % 0x10;
1139 			ret[len++] = '%';
1140 			ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1141 			ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1142 		    }
1143 		}
1144 		if (len + 3 >= max) {
1145                     temp = xmlSaveUriRealloc(ret, &max);
1146                     if (temp == NULL) goto mem_error;
1147                     ret = temp;
1148 		}
1149 		ret[len++] = '@';
1150 	    }
1151 	    if (uri->server != NULL) {
1152 		p = uri->server;
1153 		while (*p != 0) {
1154 		    if (len >= max) {
1155 			temp = xmlSaveUriRealloc(ret, &max);
1156 			if (temp == NULL) goto mem_error;
1157 			ret = temp;
1158 		    }
1159 		    ret[len++] = *p++;
1160 		}
1161 		if (uri->port > 0) {
1162 		    if (len + 10 >= max) {
1163 			temp = xmlSaveUriRealloc(ret, &max);
1164 			if (temp == NULL) goto mem_error;
1165 			ret = temp;
1166 		    }
1167 		    len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1168 		}
1169 	    }
1170 	} else if (uri->authority != NULL) {
1171 	    if (len + 3 >= max) {
1172                 temp = xmlSaveUriRealloc(ret, &max);
1173                 if (temp == NULL) goto mem_error;
1174                 ret = temp;
1175 	    }
1176 	    ret[len++] = '/';
1177 	    ret[len++] = '/';
1178 	    p = uri->authority;
1179 	    while (*p != 0) {
1180 		if (len + 3 >= max) {
1181                     temp = xmlSaveUriRealloc(ret, &max);
1182                     if (temp == NULL) goto mem_error;
1183                     ret = temp;
1184 		}
1185 		if ((IS_UNRESERVED(*(p))) ||
1186                     ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1187                     ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1188                     ((*(p) == '=')) || ((*(p) == '+')))
1189 		    ret[len++] = *p++;
1190 		else {
1191 		    int val = *(unsigned char *)p++;
1192 		    int hi = val / 0x10, lo = val % 0x10;
1193 		    ret[len++] = '%';
1194 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1195 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1196 		}
1197 	    }
1198 	} else if (uri->scheme != NULL) {
1199 	    if (len + 3 >= max) {
1200                 temp = xmlSaveUriRealloc(ret, &max);
1201                 if (temp == NULL) goto mem_error;
1202                 ret = temp;
1203 	    }
1204 	}
1205 	if (uri->path != NULL) {
1206 	    p = uri->path;
1207 	    /*
1208 	     * the colon in file:///d: should not be escaped or
1209 	     * Windows accesses fail later.
1210 	     */
1211 	    if ((uri->scheme != NULL) &&
1212 		(p[0] == '/') &&
1213 		(((p[1] >= 'a') && (p[1] <= 'z')) ||
1214 		 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1215 		(p[2] == ':') &&
1216 	        (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1217 		if (len + 3 >= max) {
1218                     temp = xmlSaveUriRealloc(ret, &max);
1219                     if (temp == NULL) goto mem_error;
1220                     ret = temp;
1221 		}
1222 		ret[len++] = *p++;
1223 		ret[len++] = *p++;
1224 		ret[len++] = *p++;
1225 	    }
1226 	    while (*p != 0) {
1227 		if (len + 3 >= max) {
1228                     temp = xmlSaveUriRealloc(ret, &max);
1229                     if (temp == NULL) goto mem_error;
1230                     ret = temp;
1231 		}
1232 		if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1233                     ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1234 	            ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1235 	            ((*(p) == ',')))
1236 		    ret[len++] = *p++;
1237 		else {
1238 		    int val = *(unsigned char *)p++;
1239 		    int hi = val / 0x10, lo = val % 0x10;
1240 		    ret[len++] = '%';
1241 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1242 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1243 		}
1244 	    }
1245 	}
1246 	if (uri->query_raw != NULL) {
1247 	    if (len + 1 >= max) {
1248                 temp = xmlSaveUriRealloc(ret, &max);
1249                 if (temp == NULL) goto mem_error;
1250                 ret = temp;
1251 	    }
1252 	    ret[len++] = '?';
1253 	    p = uri->query_raw;
1254 	    while (*p != 0) {
1255 		if (len + 1 >= max) {
1256                     temp = xmlSaveUriRealloc(ret, &max);
1257                     if (temp == NULL) goto mem_error;
1258                     ret = temp;
1259 		}
1260 		ret[len++] = *p++;
1261 	    }
1262 	} else if (uri->query != NULL) {
1263 	    if (len + 3 >= max) {
1264                 temp = xmlSaveUriRealloc(ret, &max);
1265                 if (temp == NULL) goto mem_error;
1266                 ret = temp;
1267 	    }
1268 	    ret[len++] = '?';
1269 	    p = uri->query;
1270 	    while (*p != 0) {
1271 		if (len + 3 >= max) {
1272                     temp = xmlSaveUriRealloc(ret, &max);
1273                     if (temp == NULL) goto mem_error;
1274                     ret = temp;
1275 		}
1276 		if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1277 		    ret[len++] = *p++;
1278 		else {
1279 		    int val = *(unsigned char *)p++;
1280 		    int hi = val / 0x10, lo = val % 0x10;
1281 		    ret[len++] = '%';
1282 		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1283 		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1284 		}
1285 	    }
1286 	}
1287     }
1288     if (uri->fragment != NULL) {
1289 	if (len + 3 >= max) {
1290             temp = xmlSaveUriRealloc(ret, &max);
1291             if (temp == NULL) goto mem_error;
1292             ret = temp;
1293 	}
1294 	ret[len++] = '#';
1295 	p = uri->fragment;
1296 	while (*p != 0) {
1297 	    if (len + 3 >= max) {
1298                 temp = xmlSaveUriRealloc(ret, &max);
1299                 if (temp == NULL) goto mem_error;
1300                 ret = temp;
1301 	    }
1302 	    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1303 		ret[len++] = *p++;
1304 	    else {
1305 		int val = *(unsigned char *)p++;
1306 		int hi = val / 0x10, lo = val % 0x10;
1307 		ret[len++] = '%';
1308 		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1309 		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1310 	    }
1311 	}
1312     }
1313     if (len >= max) {
1314         temp = xmlSaveUriRealloc(ret, &max);
1315         if (temp == NULL) goto mem_error;
1316         ret = temp;
1317     }
1318     ret[len] = 0;
1319     return(ret);
1320 
1321 mem_error:
1322     xmlFree(ret);
1323     return(NULL);
1324 }
1325 
1326 /**
1327  * xmlPrintURI:
1328  * @stream:  a FILE* for the output
1329  * @uri:  pointer to an xmlURI
1330  *
1331  * Prints the URI in the stream @stream.
1332  */
1333 void
1334 xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1335     xmlChar *out;
1336 
1337     out = xmlSaveUri(uri);
1338     if (out != NULL) {
1339 	fprintf(stream, "%s", (char *) out);
1340 	xmlFree(out);
1341     }
1342 }
1343 
1344 /**
1345  * xmlCleanURI:
1346  * @uri:  pointer to an xmlURI
1347  *
1348  * Make sure the xmlURI struct is free of content
1349  */
1350 static void
1351 xmlCleanURI(xmlURIPtr uri) {
1352     if (uri == NULL) return;
1353 
1354     if (uri->scheme != NULL) xmlFree(uri->scheme);
1355     uri->scheme = NULL;
1356     if (uri->server != NULL) xmlFree(uri->server);
1357     uri->server = NULL;
1358     if (uri->user != NULL) xmlFree(uri->user);
1359     uri->user = NULL;
1360     if (uri->path != NULL) xmlFree(uri->path);
1361     uri->path = NULL;
1362     if (uri->fragment != NULL) xmlFree(uri->fragment);
1363     uri->fragment = NULL;
1364     if (uri->opaque != NULL) xmlFree(uri->opaque);
1365     uri->opaque = NULL;
1366     if (uri->authority != NULL) xmlFree(uri->authority);
1367     uri->authority = NULL;
1368     if (uri->query != NULL) xmlFree(uri->query);
1369     uri->query = NULL;
1370     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1371     uri->query_raw = NULL;
1372 }
1373 
1374 /**
1375  * xmlFreeURI:
1376  * @uri:  pointer to an xmlURI
1377  *
1378  * Free up the xmlURI struct
1379  */
1380 void
1381 xmlFreeURI(xmlURIPtr uri) {
1382     if (uri == NULL) return;
1383 
1384     if (uri->scheme != NULL) xmlFree(uri->scheme);
1385     if (uri->server != NULL) xmlFree(uri->server);
1386     if (uri->user != NULL) xmlFree(uri->user);
1387     if (uri->path != NULL) xmlFree(uri->path);
1388     if (uri->fragment != NULL) xmlFree(uri->fragment);
1389     if (uri->opaque != NULL) xmlFree(uri->opaque);
1390     if (uri->authority != NULL) xmlFree(uri->authority);
1391     if (uri->query != NULL) xmlFree(uri->query);
1392     if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1393     xmlFree(uri);
1394 }
1395 
1396 /************************************************************************
1397  *									*
1398  *			Helper functions				*
1399  *									*
1400  ************************************************************************/
1401 
1402 /**
1403  * xmlNormalizeURIPath:
1404  * @path:  pointer to the path string
1405  *
1406  * Applies the 5 normalization steps to a path string--that is, RFC 2396
1407  * Section 5.2, steps 6.c through 6.g.
1408  *
1409  * Normalization occurs directly on the string, no new allocation is done
1410  *
1411  * Returns 0 or an error code
1412  */
1413 int
1414 xmlNormalizeURIPath(char *path) {
1415     char *cur, *out;
1416 
1417     if (path == NULL)
1418 	return(-1);
1419 
1420     /* Skip all initial "/" chars.  We want to get to the beginning of the
1421      * first non-empty segment.
1422      */
1423     cur = path;
1424     while (cur[0] == '/')
1425       ++cur;
1426     if (cur[0] == '\0')
1427       return(0);
1428 
1429     /* Keep everything we've seen so far.  */
1430     out = cur;
1431 
1432     /*
1433      * Analyze each segment in sequence for cases (c) and (d).
1434      */
1435     while (cur[0] != '\0') {
1436 	/*
1437 	 * c) All occurrences of "./", where "." is a complete path segment,
1438 	 *    are removed from the buffer string.
1439 	 */
1440 	if ((cur[0] == '.') && (cur[1] == '/')) {
1441 	    cur += 2;
1442 	    /* '//' normalization should be done at this point too */
1443 	    while (cur[0] == '/')
1444 		cur++;
1445 	    continue;
1446 	}
1447 
1448 	/*
1449 	 * d) If the buffer string ends with "." as a complete path segment,
1450 	 *    that "." is removed.
1451 	 */
1452 	if ((cur[0] == '.') && (cur[1] == '\0'))
1453 	    break;
1454 
1455 	/* Otherwise keep the segment.  */
1456 	while (cur[0] != '/') {
1457             if (cur[0] == '\0')
1458               goto done_cd;
1459 	    (out++)[0] = (cur++)[0];
1460 	}
1461 	/* normalize // */
1462 	while ((cur[0] == '/') && (cur[1] == '/'))
1463 	    cur++;
1464 
1465         (out++)[0] = (cur++)[0];
1466     }
1467  done_cd:
1468     out[0] = '\0';
1469 
1470     /* Reset to the beginning of the first segment for the next sequence.  */
1471     cur = path;
1472     while (cur[0] == '/')
1473       ++cur;
1474     if (cur[0] == '\0')
1475 	return(0);
1476 
1477     /*
1478      * Analyze each segment in sequence for cases (e) and (f).
1479      *
1480      * e) All occurrences of "<segment>/../", where <segment> is a
1481      *    complete path segment not equal to "..", are removed from the
1482      *    buffer string.  Removal of these path segments is performed
1483      *    iteratively, removing the leftmost matching pattern on each
1484      *    iteration, until no matching pattern remains.
1485      *
1486      * f) If the buffer string ends with "<segment>/..", where <segment>
1487      *    is a complete path segment not equal to "..", that
1488      *    "<segment>/.." is removed.
1489      *
1490      * To satisfy the "iterative" clause in (e), we need to collapse the
1491      * string every time we find something that needs to be removed.  Thus,
1492      * we don't need to keep two pointers into the string: we only need a
1493      * "current position" pointer.
1494      */
1495     while (1) {
1496         char *segp, *tmp;
1497 
1498         /* At the beginning of each iteration of this loop, "cur" points to
1499          * the first character of the segment we want to examine.
1500          */
1501 
1502         /* Find the end of the current segment.  */
1503         segp = cur;
1504         while ((segp[0] != '/') && (segp[0] != '\0'))
1505           ++segp;
1506 
1507         /* If this is the last segment, we're done (we need at least two
1508          * segments to meet the criteria for the (e) and (f) cases).
1509          */
1510         if (segp[0] == '\0')
1511           break;
1512 
1513         /* If the first segment is "..", or if the next segment _isn't_ "..",
1514          * keep this segment and try the next one.
1515          */
1516         ++segp;
1517         if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1518             || ((segp[0] != '.') || (segp[1] != '.')
1519                 || ((segp[2] != '/') && (segp[2] != '\0')))) {
1520           cur = segp;
1521           continue;
1522         }
1523 
1524         /* If we get here, remove this segment and the next one and back up
1525          * to the previous segment (if there is one), to implement the
1526          * "iteratively" clause.  It's pretty much impossible to back up
1527          * while maintaining two pointers into the buffer, so just compact
1528          * the whole buffer now.
1529          */
1530 
1531         /* If this is the end of the buffer, we're done.  */
1532         if (segp[2] == '\0') {
1533           cur[0] = '\0';
1534           break;
1535         }
1536         /* Valgrind complained, strcpy(cur, segp + 3); */
1537         /* string will overlap, do not use strcpy */
1538         tmp = cur;
1539         segp += 3;
1540         while ((*tmp++ = *segp++) != 0)
1541           ;
1542 
1543         /* If there are no previous segments, then keep going from here.  */
1544         segp = cur;
1545         while ((segp > path) && ((--segp)[0] == '/'))
1546           ;
1547         if (segp == path)
1548           continue;
1549 
1550         /* "segp" is pointing to the end of a previous segment; find it's
1551          * start.  We need to back up to the previous segment and start
1552          * over with that to handle things like "foo/bar/../..".  If we
1553          * don't do this, then on the first pass we'll remove the "bar/..",
1554          * but be pointing at the second ".." so we won't realize we can also
1555          * remove the "foo/..".
1556          */
1557         cur = segp;
1558         while ((cur > path) && (cur[-1] != '/'))
1559           --cur;
1560     }
1561     out[0] = '\0';
1562 
1563     /*
1564      * g) If the resulting buffer string still begins with one or more
1565      *    complete path segments of "..", then the reference is
1566      *    considered to be in error. Implementations may handle this
1567      *    error by retaining these components in the resolved path (i.e.,
1568      *    treating them as part of the final URI), by removing them from
1569      *    the resolved path (i.e., discarding relative levels above the
1570      *    root), or by avoiding traversal of the reference.
1571      *
1572      * We discard them from the final path.
1573      */
1574     if (path[0] == '/') {
1575       cur = path;
1576       while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1577              && ((cur[3] == '/') || (cur[3] == '\0')))
1578 	cur += 3;
1579 
1580       if (cur != path) {
1581 	out = path;
1582 	while (cur[0] != '\0')
1583           (out++)[0] = (cur++)[0];
1584 	out[0] = 0;
1585       }
1586     }
1587 
1588     return(0);
1589 }
1590 
1591 static int is_hex(char c) {
1592     if (((c >= '0') && (c <= '9')) ||
1593         ((c >= 'a') && (c <= 'f')) ||
1594         ((c >= 'A') && (c <= 'F')))
1595 	return(1);
1596     return(0);
1597 }
1598 
1599 /**
1600  * xmlURIUnescapeString:
1601  * @str:  the string to unescape
1602  * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
1603  * @target:  optional destination buffer
1604  *
1605  * Unescaping routine, but does not check that the string is an URI. The
1606  * output is a direct unsigned char translation of %XX values (no encoding)
1607  * Note that the length of the result can only be smaller or same size as
1608  * the input string.
1609  *
1610  * Returns a copy of the string, but unescaped, will return NULL only in case
1611  * of error
1612  */
1613 char *
1614 xmlURIUnescapeString(const char *str, int len, char *target) {
1615     char *ret, *out;
1616     const char *in;
1617 
1618     if (str == NULL)
1619 	return(NULL);
1620     if (len <= 0) len = strlen(str);
1621     if (len < 0) return(NULL);
1622 
1623     if (target == NULL) {
1624 	ret = (char *) xmlMallocAtomic(len + 1);
1625 	if (ret == NULL) {
1626             xmlURIErrMemory("unescaping URI value\n");
1627 	    return(NULL);
1628 	}
1629     } else
1630 	ret = target;
1631     in = str;
1632     out = ret;
1633     while(len > 0) {
1634 	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1635 	    in++;
1636 	    if ((*in >= '0') && (*in <= '9'))
1637 	        *out = (*in - '0');
1638 	    else if ((*in >= 'a') && (*in <= 'f'))
1639 	        *out = (*in - 'a') + 10;
1640 	    else if ((*in >= 'A') && (*in <= 'F'))
1641 	        *out = (*in - 'A') + 10;
1642 	    in++;
1643 	    if ((*in >= '0') && (*in <= '9'))
1644 	        *out = *out * 16 + (*in - '0');
1645 	    else if ((*in >= 'a') && (*in <= 'f'))
1646 	        *out = *out * 16 + (*in - 'a') + 10;
1647 	    else if ((*in >= 'A') && (*in <= 'F'))
1648 	        *out = *out * 16 + (*in - 'A') + 10;
1649 	    in++;
1650 	    len -= 3;
1651 	    out++;
1652 	} else {
1653 	    *out++ = *in++;
1654 	    len--;
1655 	}
1656     }
1657     *out = 0;
1658     return(ret);
1659 }
1660 
1661 /**
1662  * xmlURIEscapeStr:
1663  * @str:  string to escape
1664  * @list: exception list string of chars not to escape
1665  *
1666  * This routine escapes a string to hex, ignoring reserved characters (a-z)
1667  * and the characters in the exception list.
1668  *
1669  * Returns a new escaped string or NULL in case of error.
1670  */
1671 xmlChar *
1672 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1673     xmlChar *ret, ch;
1674     xmlChar *temp;
1675     const xmlChar *in;
1676     int len, out;
1677 
1678     if (str == NULL)
1679 	return(NULL);
1680     if (str[0] == 0)
1681 	return(xmlStrdup(str));
1682     len = xmlStrlen(str);
1683     if (!(len > 0)) return(NULL);
1684 
1685     len += 20;
1686     ret = (xmlChar *) xmlMallocAtomic(len);
1687     if (ret == NULL) {
1688         xmlURIErrMemory("escaping URI value\n");
1689 	return(NULL);
1690     }
1691     in = (const xmlChar *) str;
1692     out = 0;
1693     while(*in != 0) {
1694 	if (len - out <= 3) {
1695             temp = xmlSaveUriRealloc(ret, &len);
1696 	    if (temp == NULL) {
1697                 xmlURIErrMemory("escaping URI value\n");
1698 		xmlFree(ret);
1699 		return(NULL);
1700 	    }
1701 	    ret = temp;
1702 	}
1703 
1704 	ch = *in;
1705 
1706 	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1707 	    unsigned char val;
1708 	    ret[out++] = '%';
1709 	    val = ch >> 4;
1710 	    if (val <= 9)
1711 		ret[out++] = '0' + val;
1712 	    else
1713 		ret[out++] = 'A' + val - 0xA;
1714 	    val = ch & 0xF;
1715 	    if (val <= 9)
1716 		ret[out++] = '0' + val;
1717 	    else
1718 		ret[out++] = 'A' + val - 0xA;
1719 	    in++;
1720 	} else {
1721 	    ret[out++] = *in++;
1722 	}
1723 
1724     }
1725     ret[out] = 0;
1726     return(ret);
1727 }
1728 
1729 /**
1730  * xmlURIEscape:
1731  * @str:  the string of the URI to escape
1732  *
1733  * Escaping routine, does not do validity checks !
1734  * It will try to escape the chars needing this, but this is heuristic
1735  * based it's impossible to be sure.
1736  *
1737  * Returns an copy of the string, but escaped
1738  *
1739  * 25 May 2001
1740  * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1741  * according to RFC2396.
1742  *   - Carl Douglas
1743  */
1744 xmlChar *
1745 xmlURIEscape(const xmlChar * str)
1746 {
1747     xmlChar *ret, *segment = NULL;
1748     xmlURIPtr uri;
1749     int ret2;
1750 
1751 #define NULLCHK(p) if(!p) { \
1752          xmlURIErrMemory("escaping URI value\n"); \
1753          xmlFreeURI(uri); \
1754          return NULL; } \
1755 
1756     if (str == NULL)
1757         return (NULL);
1758 
1759     uri = xmlCreateURI();
1760     if (uri != NULL) {
1761 	/*
1762 	 * Allow escaping errors in the unescaped form
1763 	 */
1764         uri->cleanup = 1;
1765         ret2 = xmlParseURIReference(uri, (const char *)str);
1766         if (ret2) {
1767             xmlFreeURI(uri);
1768             return (NULL);
1769         }
1770     }
1771 
1772     if (!uri)
1773         return NULL;
1774 
1775     ret = NULL;
1776 
1777     if (uri->scheme) {
1778         segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1779         NULLCHK(segment)
1780         ret = xmlStrcat(ret, segment);
1781         ret = xmlStrcat(ret, BAD_CAST ":");
1782         xmlFree(segment);
1783     }
1784 
1785     if (uri->authority) {
1786         segment =
1787             xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1788         NULLCHK(segment)
1789         ret = xmlStrcat(ret, BAD_CAST "//");
1790         ret = xmlStrcat(ret, segment);
1791         xmlFree(segment);
1792     }
1793 
1794     if (uri->user) {
1795         segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1796         NULLCHK(segment)
1797 		ret = xmlStrcat(ret,BAD_CAST "//");
1798         ret = xmlStrcat(ret, segment);
1799         ret = xmlStrcat(ret, BAD_CAST "@");
1800         xmlFree(segment);
1801     }
1802 
1803     if (uri->server) {
1804         segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1805         NULLCHK(segment)
1806 		if (uri->user == NULL)
1807 		ret = xmlStrcat(ret, BAD_CAST "//");
1808         ret = xmlStrcat(ret, segment);
1809         xmlFree(segment);
1810     }
1811 
1812     if (uri->port) {
1813         xmlChar port[10];
1814 
1815         snprintf((char *) port, 10, "%d", uri->port);
1816         ret = xmlStrcat(ret, BAD_CAST ":");
1817         ret = xmlStrcat(ret, port);
1818     }
1819 
1820     if (uri->path) {
1821         segment =
1822             xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1823         NULLCHK(segment)
1824         ret = xmlStrcat(ret, segment);
1825         xmlFree(segment);
1826     }
1827 
1828     if (uri->query_raw) {
1829         ret = xmlStrcat(ret, BAD_CAST "?");
1830         ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1831     }
1832     else if (uri->query) {
1833         segment =
1834             xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1835         NULLCHK(segment)
1836         ret = xmlStrcat(ret, BAD_CAST "?");
1837         ret = xmlStrcat(ret, segment);
1838         xmlFree(segment);
1839     }
1840 
1841     if (uri->opaque) {
1842         segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1843         NULLCHK(segment)
1844         ret = xmlStrcat(ret, segment);
1845         xmlFree(segment);
1846     }
1847 
1848     if (uri->fragment) {
1849         segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1850         NULLCHK(segment)
1851         ret = xmlStrcat(ret, BAD_CAST "#");
1852         ret = xmlStrcat(ret, segment);
1853         xmlFree(segment);
1854     }
1855 
1856     xmlFreeURI(uri);
1857 #undef NULLCHK
1858 
1859     return (ret);
1860 }
1861 
1862 /************************************************************************
1863  *									*
1864  *			Public functions				*
1865  *									*
1866  ************************************************************************/
1867 
1868 /**
1869  * xmlBuildURI:
1870  * @URI:  the URI instance found in the document
1871  * @base:  the base value
1872  *
1873  * Computes he final URI of the reference done by checking that
1874  * the given URI is valid, and building the final URI using the
1875  * base URI. This is processed according to section 5.2 of the
1876  * RFC 2396
1877  *
1878  * 5.2. Resolving Relative References to Absolute Form
1879  *
1880  * Returns a new URI string (to be freed by the caller) or NULL in case
1881  *         of error.
1882  */
1883 xmlChar *
1884 xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1885     xmlChar *val = NULL;
1886     int ret, len, indx, cur, out;
1887     xmlURIPtr ref = NULL;
1888     xmlURIPtr bas = NULL;
1889     xmlURIPtr res = NULL;
1890 
1891     /*
1892      * 1) The URI reference is parsed into the potential four components and
1893      *    fragment identifier, as described in Section 4.3.
1894      *
1895      *    NOTE that a completely empty URI is treated by modern browsers
1896      *    as a reference to "." rather than as a synonym for the current
1897      *    URI.  Should we do that here?
1898      */
1899     if (URI == NULL)
1900 	ret = -1;
1901     else {
1902 	if (*URI) {
1903 	    ref = xmlCreateURI();
1904 	    if (ref == NULL)
1905 		goto done;
1906 	    ret = xmlParseURIReference(ref, (const char *) URI);
1907 	}
1908 	else
1909 	    ret = 0;
1910     }
1911     if (ret != 0)
1912 	goto done;
1913     if ((ref != NULL) && (ref->scheme != NULL)) {
1914 	/*
1915 	 * The URI is absolute don't modify.
1916 	 */
1917 	val = xmlStrdup(URI);
1918 	goto done;
1919     }
1920     if (base == NULL)
1921 	ret = -1;
1922     else {
1923 	bas = xmlCreateURI();
1924 	if (bas == NULL)
1925 	    goto done;
1926 	ret = xmlParseURIReference(bas, (const char *) base);
1927     }
1928     if (ret != 0) {
1929 	if (ref)
1930 	    val = xmlSaveUri(ref);
1931 	goto done;
1932     }
1933     if (ref == NULL) {
1934 	/*
1935 	 * the base fragment must be ignored
1936 	 */
1937 	if (bas->fragment != NULL) {
1938 	    xmlFree(bas->fragment);
1939 	    bas->fragment = NULL;
1940 	}
1941 	val = xmlSaveUri(bas);
1942 	goto done;
1943     }
1944 
1945     /*
1946      * 2) If the path component is empty and the scheme, authority, and
1947      *    query components are undefined, then it is a reference to the
1948      *    current document and we are done.  Otherwise, the reference URI's
1949      *    query and fragment components are defined as found (or not found)
1950      *    within the URI reference and not inherited from the base URI.
1951      *
1952      *    NOTE that in modern browsers, the parsing differs from the above
1953      *    in the following aspect:  the query component is allowed to be
1954      *    defined while still treating this as a reference to the current
1955      *    document.
1956      */
1957     res = xmlCreateURI();
1958     if (res == NULL)
1959 	goto done;
1960     if ((ref->scheme == NULL) && (ref->path == NULL) &&
1961 	((ref->authority == NULL) && (ref->server == NULL))) {
1962 	if (bas->scheme != NULL)
1963 	    res->scheme = xmlMemStrdup(bas->scheme);
1964 	if (bas->authority != NULL)
1965 	    res->authority = xmlMemStrdup(bas->authority);
1966 	else if ((bas->server != NULL) || (bas->port == -1)) {
1967 	    if (bas->server != NULL)
1968 		res->server = xmlMemStrdup(bas->server);
1969 	    if (bas->user != NULL)
1970 		res->user = xmlMemStrdup(bas->user);
1971 	    res->port = bas->port;
1972 	}
1973 	if (bas->path != NULL)
1974 	    res->path = xmlMemStrdup(bas->path);
1975 	if (ref->query_raw != NULL)
1976 	    res->query_raw = xmlMemStrdup (ref->query_raw);
1977 	else if (ref->query != NULL)
1978 	    res->query = xmlMemStrdup(ref->query);
1979 	else if (bas->query_raw != NULL)
1980 	    res->query_raw = xmlMemStrdup(bas->query_raw);
1981 	else if (bas->query != NULL)
1982 	    res->query = xmlMemStrdup(bas->query);
1983 	if (ref->fragment != NULL)
1984 	    res->fragment = xmlMemStrdup(ref->fragment);
1985 	goto step_7;
1986     }
1987 
1988     /*
1989      * 3) If the scheme component is defined, indicating that the reference
1990      *    starts with a scheme name, then the reference is interpreted as an
1991      *    absolute URI and we are done.  Otherwise, the reference URI's
1992      *    scheme is inherited from the base URI's scheme component.
1993      */
1994     if (ref->scheme != NULL) {
1995 	val = xmlSaveUri(ref);
1996 	goto done;
1997     }
1998     if (bas->scheme != NULL)
1999 	res->scheme = xmlMemStrdup(bas->scheme);
2000 
2001     if (ref->query_raw != NULL)
2002 	res->query_raw = xmlMemStrdup(ref->query_raw);
2003     else if (ref->query != NULL)
2004 	res->query = xmlMemStrdup(ref->query);
2005     if (ref->fragment != NULL)
2006 	res->fragment = xmlMemStrdup(ref->fragment);
2007 
2008     /*
2009      * 4) If the authority component is defined, then the reference is a
2010      *    network-path and we skip to step 7.  Otherwise, the reference
2011      *    URI's authority is inherited from the base URI's authority
2012      *    component, which will also be undefined if the URI scheme does not
2013      *    use an authority component.
2014      */
2015     if ((ref->authority != NULL) || (ref->server != NULL)) {
2016 	if (ref->authority != NULL)
2017 	    res->authority = xmlMemStrdup(ref->authority);
2018 	else {
2019 	    res->server = xmlMemStrdup(ref->server);
2020 	    if (ref->user != NULL)
2021 		res->user = xmlMemStrdup(ref->user);
2022             res->port = ref->port;
2023 	}
2024 	if (ref->path != NULL)
2025 	    res->path = xmlMemStrdup(ref->path);
2026 	goto step_7;
2027     }
2028     if (bas->authority != NULL)
2029 	res->authority = xmlMemStrdup(bas->authority);
2030     else if ((bas->server != NULL) || (bas->port == -1)) {
2031 	if (bas->server != NULL)
2032 	    res->server = xmlMemStrdup(bas->server);
2033 	if (bas->user != NULL)
2034 	    res->user = xmlMemStrdup(bas->user);
2035 	res->port = bas->port;
2036     }
2037 
2038     /*
2039      * 5) If the path component begins with a slash character ("/"), then
2040      *    the reference is an absolute-path and we skip to step 7.
2041      */
2042     if ((ref->path != NULL) && (ref->path[0] == '/')) {
2043 	res->path = xmlMemStrdup(ref->path);
2044 	goto step_7;
2045     }
2046 
2047 
2048     /*
2049      * 6) If this step is reached, then we are resolving a relative-path
2050      *    reference.  The relative path needs to be merged with the base
2051      *    URI's path.  Although there are many ways to do this, we will
2052      *    describe a simple method using a separate string buffer.
2053      *
2054      * Allocate a buffer large enough for the result string.
2055      */
2056     len = 2; /* extra / and 0 */
2057     if (ref->path != NULL)
2058 	len += strlen(ref->path);
2059     if (bas->path != NULL)
2060 	len += strlen(bas->path);
2061     res->path = (char *) xmlMallocAtomic(len);
2062     if (res->path == NULL) {
2063         xmlURIErrMemory("resolving URI against base\n");
2064 	goto done;
2065     }
2066     res->path[0] = 0;
2067 
2068     /*
2069      * a) All but the last segment of the base URI's path component is
2070      *    copied to the buffer.  In other words, any characters after the
2071      *    last (right-most) slash character, if any, are excluded.
2072      */
2073     cur = 0;
2074     out = 0;
2075     if (bas->path != NULL) {
2076 	while (bas->path[cur] != 0) {
2077 	    while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2078 		cur++;
2079 	    if (bas->path[cur] == 0)
2080 		break;
2081 
2082 	    cur++;
2083 	    while (out < cur) {
2084 		res->path[out] = bas->path[out];
2085 		out++;
2086 	    }
2087 	}
2088     }
2089     res->path[out] = 0;
2090 
2091     /*
2092      * b) The reference's path component is appended to the buffer
2093      *    string.
2094      */
2095     if (ref->path != NULL && ref->path[0] != 0) {
2096 	indx = 0;
2097 	/*
2098 	 * Ensure the path includes a '/'
2099 	 */
2100 	if ((out == 0) && (bas->server != NULL))
2101 	    res->path[out++] = '/';
2102 	while (ref->path[indx] != 0) {
2103 	    res->path[out++] = ref->path[indx++];
2104 	}
2105     }
2106     res->path[out] = 0;
2107 
2108     /*
2109      * Steps c) to h) are really path normalization steps
2110      */
2111     xmlNormalizeURIPath(res->path);
2112 
2113 step_7:
2114 
2115     /*
2116      * 7) The resulting URI components, including any inherited from the
2117      *    base URI, are recombined to give the absolute form of the URI
2118      *    reference.
2119      */
2120     val = xmlSaveUri(res);
2121 
2122 done:
2123     if (ref != NULL)
2124 	xmlFreeURI(ref);
2125     if (bas != NULL)
2126 	xmlFreeURI(bas);
2127     if (res != NULL)
2128 	xmlFreeURI(res);
2129     return(val);
2130 }
2131 
2132 /**
2133  * xmlBuildRelativeURI:
2134  * @URI:  the URI reference under consideration
2135  * @base:  the base value
2136  *
2137  * Expresses the URI of the reference in terms relative to the
2138  * base.  Some examples of this operation include:
2139  *     base = "http://site1.com/docs/book1.html"
2140  *        URI input                        URI returned
2141  *     docs/pic1.gif                    pic1.gif
2142  *     docs/img/pic1.gif                img/pic1.gif
2143  *     img/pic1.gif                     ../img/pic1.gif
2144  *     http://site1.com/docs/pic1.gif   pic1.gif
2145  *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
2146  *
2147  *     base = "docs/book1.html"
2148  *        URI input                        URI returned
2149  *     docs/pic1.gif                    pic1.gif
2150  *     docs/img/pic1.gif                img/pic1.gif
2151  *     img/pic1.gif                     ../img/pic1.gif
2152  *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
2153  *
2154  *
2155  * Note: if the URI reference is really weird or complicated, it may be
2156  *       worthwhile to first convert it into a "nice" one by calling
2157  *       xmlBuildURI (using 'base') before calling this routine,
2158  *       since this routine (for reasonable efficiency) assumes URI has
2159  *       already been through some validation.
2160  *
2161  * Returns a new URI string (to be freed by the caller) or NULL in case
2162  * error.
2163  */
2164 xmlChar *
2165 xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2166 {
2167     xmlChar *val = NULL;
2168     int ret;
2169     int ix;
2170     int nbslash = 0;
2171     int len;
2172     xmlURIPtr ref = NULL;
2173     xmlURIPtr bas = NULL;
2174     xmlChar *bptr, *uptr, *vptr;
2175     int remove_path = 0;
2176 
2177     if ((URI == NULL) || (*URI == 0))
2178 	return NULL;
2179 
2180     /*
2181      * First parse URI into a standard form
2182      */
2183     ref = xmlCreateURI ();
2184     if (ref == NULL)
2185 	return NULL;
2186     /* If URI not already in "relative" form */
2187     if (URI[0] != '.') {
2188 	ret = xmlParseURIReference (ref, (const char *) URI);
2189 	if (ret != 0)
2190 	    goto done;		/* Error in URI, return NULL */
2191     } else
2192 	ref->path = (char *)xmlStrdup(URI);
2193 
2194     /*
2195      * Next parse base into the same standard form
2196      */
2197     if ((base == NULL) || (*base == 0)) {
2198 	val = xmlStrdup (URI);
2199 	goto done;
2200     }
2201     bas = xmlCreateURI ();
2202     if (bas == NULL)
2203 	goto done;
2204     if (base[0] != '.') {
2205 	ret = xmlParseURIReference (bas, (const char *) base);
2206 	if (ret != 0)
2207 	    goto done;		/* Error in base, return NULL */
2208     } else
2209 	bas->path = (char *)xmlStrdup(base);
2210 
2211     /*
2212      * If the scheme / server on the URI differs from the base,
2213      * just return the URI
2214      */
2215     if ((ref->scheme != NULL) &&
2216 	((bas->scheme == NULL) ||
2217 	 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2218 	 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2219 	val = xmlStrdup (URI);
2220 	goto done;
2221     }
2222     if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2223 	val = xmlStrdup(BAD_CAST "");
2224 	goto done;
2225     }
2226     if (bas->path == NULL) {
2227 	val = xmlStrdup((xmlChar *)ref->path);
2228 	goto done;
2229     }
2230     if (ref->path == NULL) {
2231         ref->path = (char *) "/";
2232 	remove_path = 1;
2233     }
2234 
2235     /*
2236      * At this point (at last!) we can compare the two paths
2237      *
2238      * First we take care of the special case where either of the
2239      * two path components may be missing (bug 316224)
2240      */
2241     bptr = (xmlChar *)bas->path;
2242     {
2243         xmlChar *rptr = (xmlChar *) ref->path;
2244         int pos = 0;
2245 
2246         /*
2247          * Next we compare the two strings and find where they first differ
2248          */
2249 	if ((*rptr == '.') && (rptr[1] == '/'))
2250             rptr += 2;
2251 	if ((*bptr == '.') && (bptr[1] == '/'))
2252             bptr += 2;
2253 	else if ((*bptr == '/') && (*rptr != '/'))
2254 	    bptr++;
2255 	while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))
2256 	    pos++;
2257 
2258 	if (bptr[pos] == rptr[pos]) {
2259 	    val = xmlStrdup(BAD_CAST "");
2260 	    goto done;		/* (I can't imagine why anyone would do this) */
2261 	}
2262 
2263 	/*
2264 	 * In URI, "back up" to the last '/' encountered.  This will be the
2265 	 * beginning of the "unique" suffix of URI
2266 	 */
2267 	ix = pos;
2268 	for (; ix > 0; ix--) {
2269 	    if (rptr[ix - 1] == '/')
2270 		break;
2271 	}
2272 	uptr = (xmlChar *)&rptr[ix];
2273 
2274 	/*
2275 	 * In base, count the number of '/' from the differing point
2276 	 */
2277 	for (; bptr[ix] != 0; ix++) {
2278 	    if (bptr[ix] == '/')
2279 		nbslash++;
2280 	}
2281 
2282 	/*
2283 	 * e.g: URI="foo/" base="foo/bar" -> "./"
2284 	 */
2285 	if (nbslash == 0 && !uptr[0]) {
2286 	    val = xmlStrdup(BAD_CAST "./");
2287 	    goto done;
2288 	}
2289 
2290 	len = xmlStrlen (uptr) + 1;
2291     }
2292 
2293     if (nbslash == 0) {
2294 	if (uptr != NULL)
2295 	    /* exception characters from xmlSaveUri */
2296 	    val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2297 	goto done;
2298     }
2299 
2300     /*
2301      * Allocate just enough space for the returned string -
2302      * length of the remainder of the URI, plus enough space
2303      * for the "../" groups, plus one for the terminator
2304      */
2305     val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2306     if (val == NULL) {
2307         xmlURIErrMemory("building relative URI\n");
2308 	goto done;
2309     }
2310     vptr = val;
2311     /*
2312      * Put in as many "../" as needed
2313      */
2314     for (; nbslash>0; nbslash--) {
2315 	*vptr++ = '.';
2316 	*vptr++ = '.';
2317 	*vptr++ = '/';
2318     }
2319     /*
2320      * Finish up with the end of the URI
2321      */
2322     if (uptr != NULL) {
2323         if ((vptr > val) && (len > 0) &&
2324 	    (uptr[0] == '/') && (vptr[-1] == '/')) {
2325 	    memcpy (vptr, uptr + 1, len - 1);
2326 	    vptr[len - 2] = 0;
2327 	} else {
2328 	    memcpy (vptr, uptr, len);
2329 	    vptr[len - 1] = 0;
2330 	}
2331     } else {
2332 	vptr[len - 1] = 0;
2333     }
2334 
2335     /* escape the freshly-built path */
2336     vptr = val;
2337 	/* exception characters from xmlSaveUri */
2338     val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2339     xmlFree(vptr);
2340 
2341 done:
2342     /*
2343      * Free the working variables
2344      */
2345     if (remove_path != 0)
2346         ref->path = NULL;
2347     if (ref != NULL)
2348 	xmlFreeURI (ref);
2349     if (bas != NULL)
2350 	xmlFreeURI (bas);
2351 
2352     return val;
2353 }
2354 
2355 /**
2356  * xmlCanonicPath:
2357  * @path:  the resource locator in a filesystem notation
2358  *
2359  * Constructs a canonic path from the specified path.
2360  *
2361  * Returns a new canonic path, or a duplicate of the path parameter if the
2362  * construction fails. The caller is responsible for freeing the memory occupied
2363  * by the returned string. If there is insufficient memory available, or the
2364  * argument is NULL, the function returns NULL.
2365  */
2366 #define IS_WINDOWS_PATH(p)					\
2367 	((p != NULL) &&						\
2368 	 (((p[0] >= 'a') && (p[0] <= 'z')) ||			\
2369 	  ((p[0] >= 'A') && (p[0] <= 'Z'))) &&			\
2370 	 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2371 xmlChar *
2372 xmlCanonicPath(const xmlChar *path)
2373 {
2374 /*
2375  * For Windows implementations, additional work needs to be done to
2376  * replace backslashes in pathnames with "forward slashes"
2377  */
2378 #if defined(_WIN32) && !defined(__CYGWIN__)
2379     int len = 0;
2380     char *p = NULL;
2381 #endif
2382     xmlURIPtr uri;
2383     xmlChar *ret;
2384     const xmlChar *absuri;
2385 
2386     if (path == NULL)
2387 	return(NULL);
2388 
2389 #if defined(_WIN32)
2390     /*
2391      * We must not change the backslashes to slashes if the the path
2392      * starts with \\?\
2393      * Those paths can be up to 32k characters long.
2394      * Was added specifically for OpenOffice, those paths can't be converted
2395      * to URIs anyway.
2396      */
2397     if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2398         (path[3] == '\\') )
2399 	return xmlStrdup((const xmlChar *) path);
2400 #endif
2401 
2402 	/* sanitize filename starting with // so it can be used as URI */
2403     if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2404         path++;
2405 
2406     if ((uri = xmlParseURI((const char *) path)) != NULL) {
2407 	xmlFreeURI(uri);
2408 	return xmlStrdup(path);
2409     }
2410 
2411     /* Check if this is an "absolute uri" */
2412     absuri = xmlStrstr(path, BAD_CAST "://");
2413     if (absuri != NULL) {
2414         int l, j;
2415 	unsigned char c;
2416 	xmlChar *escURI;
2417 
2418         /*
2419 	 * this looks like an URI where some parts have not been
2420 	 * escaped leading to a parsing problem.  Check that the first
2421 	 * part matches a protocol.
2422 	 */
2423 	l = absuri - path;
2424 	/* Bypass if first part (part before the '://') is > 20 chars */
2425 	if ((l <= 0) || (l > 20))
2426 	    goto path_processing;
2427 	/* Bypass if any non-alpha characters are present in first part */
2428 	for (j = 0;j < l;j++) {
2429 	    c = path[j];
2430 	    if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2431 	        goto path_processing;
2432 	}
2433 
2434 	/* Escape all except the characters specified in the supplied path */
2435         escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2436 	if (escURI != NULL) {
2437 	    /* Try parsing the escaped path */
2438 	    uri = xmlParseURI((const char *) escURI);
2439 	    /* If successful, return the escaped string */
2440 	    if (uri != NULL) {
2441 	        xmlFreeURI(uri);
2442 		return escURI;
2443 	    }
2444             xmlFree(escURI);
2445 	}
2446     }
2447 
2448 path_processing:
2449 /* For Windows implementations, replace backslashes with 'forward slashes' */
2450 #if defined(_WIN32) && !defined(__CYGWIN__)
2451     /*
2452      * Create a URI structure
2453      */
2454     uri = xmlCreateURI();
2455     if (uri == NULL) {		/* Guard against 'out of memory' */
2456         return(NULL);
2457     }
2458 
2459     len = xmlStrlen(path);
2460     if ((len > 2) && IS_WINDOWS_PATH(path)) {
2461         /* make the scheme 'file' */
2462 	uri->scheme = (char *) xmlStrdup(BAD_CAST "file");
2463 	/* allocate space for leading '/' + path + string terminator */
2464 	uri->path = xmlMallocAtomic(len + 2);
2465 	if (uri->path == NULL) {
2466 	    xmlFreeURI(uri);	/* Guard against 'out of memory' */
2467 	    return(NULL);
2468 	}
2469 	/* Put in leading '/' plus path */
2470 	uri->path[0] = '/';
2471 	p = uri->path + 1;
2472 	strncpy(p, (char *) path, len + 1);
2473     } else {
2474 	uri->path = (char *) xmlStrdup(path);
2475 	if (uri->path == NULL) {
2476 	    xmlFreeURI(uri);
2477 	    return(NULL);
2478 	}
2479 	p = uri->path;
2480     }
2481     /* Now change all occurrences of '\' to '/' */
2482     while (*p != '\0') {
2483 	if (*p == '\\')
2484 	    *p = '/';
2485 	p++;
2486     }
2487 
2488     if (uri->scheme == NULL) {
2489 	ret = xmlStrdup((const xmlChar *) uri->path);
2490     } else {
2491 	ret = xmlSaveUri(uri);
2492     }
2493 
2494     xmlFreeURI(uri);
2495 #else
2496     ret = xmlStrdup((const xmlChar *) path);
2497 #endif
2498     return(ret);
2499 }
2500 
2501 /**
2502  * xmlPathToURI:
2503  * @path:  the resource locator in a filesystem notation
2504  *
2505  * Constructs an URI expressing the existing path
2506  *
2507  * Returns a new URI, or a duplicate of the path parameter if the
2508  * construction fails. The caller is responsible for freeing the memory
2509  * occupied by the returned string. If there is insufficient memory available,
2510  * or the argument is NULL, the function returns NULL.
2511  */
2512 xmlChar *
2513 xmlPathToURI(const xmlChar *path)
2514 {
2515     xmlURIPtr uri;
2516     xmlURI temp;
2517     xmlChar *ret, *cal;
2518 
2519     if (path == NULL)
2520         return(NULL);
2521 
2522     if ((uri = xmlParseURI((const char *) path)) != NULL) {
2523 	xmlFreeURI(uri);
2524 	return xmlStrdup(path);
2525     }
2526     cal = xmlCanonicPath(path);
2527     if (cal == NULL)
2528         return(NULL);
2529 #if defined(_WIN32) && !defined(__CYGWIN__)
2530     /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2531        If 'cal' is a valid URI already then we are done here, as continuing would make
2532        it invalid. */
2533     if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2534 	xmlFreeURI(uri);
2535 	return cal;
2536     }
2537     /* 'cal' can contain a relative path with backslashes. If that is processed
2538        by xmlSaveURI, they will be escaped and the external entity loader machinery
2539        will fail. So convert them to slashes. Misuse 'ret' for walking. */
2540     ret = cal;
2541     while (*ret != '\0') {
2542 	if (*ret == '\\')
2543 	    *ret = '/';
2544 	ret++;
2545     }
2546 #endif
2547     memset(&temp, 0, sizeof(temp));
2548     temp.path = (char *) cal;
2549     ret = xmlSaveUri(&temp);
2550     xmlFree(cal);
2551     return(ret);
2552 }
2553 #define bottom_uri
2554 #include "elfgcchack.h"
2555