1 /**
2 * uri.c: set of generic URI related routines
3 *
4 * Reference: RFCs 3986, 2732 and 2373
5 *
6 * See Copyright for the status of this software.
7 *
8 * daniel@veillard.com
9 */
10
11 #define IN_LIBXML
12 #include "libxml.h"
13
14 #include <limits.h>
15 #include <string.h>
16
17 #include <libxml/xmlmemory.h>
18 #include <libxml/uri.h>
19 #include <libxml/globals.h>
20 #include <libxml/xmlerror.h>
21
22 /**
23 * MAX_URI_LENGTH:
24 *
25 * The definition of the URI regexp in the above RFC has no size limit
26 * In practice they are usually relatively short except for the
27 * data URI scheme as defined in RFC 2397. Even for data URI the usual
28 * maximum size before hitting random practical limits is around 64 KB
29 * and 4KB is usually a maximum admitted limit for proper operations.
30 * The value below is more a security limit than anything else and
31 * really should never be hit by 'normal' operations
32 * Set to 1 MByte in 2012, this is only enforced on output
33 */
34 #define MAX_URI_LENGTH 1024 * 1024
35
36 static void
xmlURIErrMemory(const char * extra)37 xmlURIErrMemory(const char *extra)
38 {
39 if (extra)
40 __xmlRaiseError(NULL, NULL, NULL,
41 NULL, NULL, XML_FROM_URI,
42 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
43 extra, NULL, NULL, 0, 0,
44 "Memory allocation failed : %s\n", extra);
45 else
46 __xmlRaiseError(NULL, NULL, NULL,
47 NULL, NULL, XML_FROM_URI,
48 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
49 NULL, NULL, NULL, 0, 0,
50 "Memory allocation failed\n");
51 }
52
53 static void xmlCleanURI(xmlURIPtr uri);
54
55 /*
56 * Old rule from 2396 used in legacy handling code
57 * alpha = lowalpha | upalpha
58 */
59 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
60
61
62 /*
63 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
64 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
65 * "u" | "v" | "w" | "x" | "y" | "z"
66 */
67
68 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
69
70 /*
71 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
72 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
73 * "U" | "V" | "W" | "X" | "Y" | "Z"
74 */
75 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
76
77 #ifdef IS_DIGIT
78 #undef IS_DIGIT
79 #endif
80 /*
81 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
82 */
83 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
84
85 /*
86 * alphanum = alpha | digit
87 */
88
89 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
90
91 /*
92 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
93 */
94
95 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
96 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
97 ((x) == '(') || ((x) == ')'))
98
99 /*
100 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
101 */
102
103 #define IS_UNWISE(p) \
104 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
105 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
106 ((*(p) == ']')) || ((*(p) == '`')))
107 /*
108 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
109 * "[" | "]"
110 */
111
112 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
113 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
114 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
115 ((x) == ']'))
116
117 /*
118 * unreserved = alphanum | mark
119 */
120
121 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
122
123 /*
124 * Skip to next pointer char, handle escaped sequences
125 */
126
127 #define NEXT(p) ((*p == '%')? p += 3 : p++)
128
129 /*
130 * Productions from the spec.
131 *
132 * authority = server | reg_name
133 * reg_name = 1*( unreserved | escaped | "$" | "," |
134 * ";" | ":" | "@" | "&" | "=" | "+" )
135 *
136 * path = [ abs_path | opaque_part ]
137 */
138
139 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
140
141 /************************************************************************
142 * *
143 * RFC 3986 parser *
144 * *
145 ************************************************************************/
146
147 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
148 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
149 ((*(p) >= 'A') && (*(p) <= 'Z')))
150 #define ISA_HEXDIG(p) \
151 (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
152 ((*(p) >= 'A') && (*(p) <= 'F')))
153
154 /*
155 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
156 * / "*" / "+" / "," / ";" / "="
157 */
158 #define ISA_SUB_DELIM(p) \
159 (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
160 ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
161 ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
162 ((*(p) == '=')) || ((*(p) == '\'')))
163
164 /*
165 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
166 */
167 #define ISA_GEN_DELIM(p) \
168 (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
169 ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
170 ((*(p) == '@')))
171
172 /*
173 * reserved = gen-delims / sub-delims
174 */
175 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
176
177 /*
178 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
179 */
180 #define ISA_UNRESERVED(p) \
181 ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
182 ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
183
184 /*
185 * pct-encoded = "%" HEXDIG HEXDIG
186 */
187 #define ISA_PCT_ENCODED(p) \
188 ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
189
190 /*
191 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
192 */
193 #define ISA_PCHAR(p) \
194 (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
195 ((*(p) == ':')) || ((*(p) == '@')))
196
197 /**
198 * xmlParse3986Scheme:
199 * @uri: pointer to an URI structure
200 * @str: pointer to the string to analyze
201 *
202 * Parse an URI scheme
203 *
204 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
205 *
206 * Returns 0 or the error code
207 */
208 static int
xmlParse3986Scheme(xmlURIPtr uri,const char ** str)209 xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
210 const char *cur;
211
212 if (str == NULL)
213 return(-1);
214
215 cur = *str;
216 if (!ISA_ALPHA(cur))
217 return(2);
218 cur++;
219 while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
220 (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
221 if (uri != NULL) {
222 if (uri->scheme != NULL) xmlFree(uri->scheme);
223 uri->scheme = STRNDUP(*str, cur - *str);
224 }
225 *str = cur;
226 return(0);
227 }
228
229 /**
230 * xmlParse3986Fragment:
231 * @uri: pointer to an URI structure
232 * @str: pointer to the string to analyze
233 *
234 * Parse the query part of an URI
235 *
236 * fragment = *( pchar / "/" / "?" )
237 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
238 * in the fragment identifier but this is used very broadly for
239 * xpointer scheme selection, so we are allowing it here to not break
240 * for example all the DocBook processing chains.
241 *
242 * Returns 0 or the error code
243 */
244 static int
xmlParse3986Fragment(xmlURIPtr uri,const char ** str)245 xmlParse3986Fragment(xmlURIPtr uri, const char **str)
246 {
247 const char *cur;
248
249 if (str == NULL)
250 return (-1);
251
252 cur = *str;
253
254 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
255 (*cur == '[') || (*cur == ']') ||
256 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
257 NEXT(cur);
258 if (uri != NULL) {
259 if (uri->fragment != NULL)
260 xmlFree(uri->fragment);
261 if (uri->cleanup & 2)
262 uri->fragment = STRNDUP(*str, cur - *str);
263 else
264 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
265 }
266 *str = cur;
267 return (0);
268 }
269
270 /**
271 * xmlParse3986Query:
272 * @uri: pointer to an URI structure
273 * @str: pointer to the string to analyze
274 *
275 * Parse the query part of an URI
276 *
277 * query = *uric
278 *
279 * Returns 0 or the error code
280 */
281 static int
xmlParse3986Query(xmlURIPtr uri,const char ** str)282 xmlParse3986Query(xmlURIPtr uri, const char **str)
283 {
284 const char *cur;
285
286 if (str == NULL)
287 return (-1);
288
289 cur = *str;
290
291 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
292 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
293 NEXT(cur);
294 if (uri != NULL) {
295 if (uri->query != NULL)
296 xmlFree(uri->query);
297 if (uri->cleanup & 2)
298 uri->query = STRNDUP(*str, cur - *str);
299 else
300 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
301
302 /* Save the raw bytes of the query as well.
303 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
304 */
305 if (uri->query_raw != NULL)
306 xmlFree (uri->query_raw);
307 uri->query_raw = STRNDUP (*str, cur - *str);
308 }
309 *str = cur;
310 return (0);
311 }
312
313 /**
314 * xmlParse3986Port:
315 * @uri: pointer to an URI structure
316 * @str: the string to analyze
317 *
318 * Parse a port part and fills in the appropriate fields
319 * of the @uri structure
320 *
321 * port = *DIGIT
322 *
323 * Returns 0 or the error code
324 */
325 static int
xmlParse3986Port(xmlURIPtr uri,const char ** str)326 xmlParse3986Port(xmlURIPtr uri, const char **str)
327 {
328 const char *cur = *str;
329 int port = 0;
330
331 if (ISA_DIGIT(cur)) {
332 while (ISA_DIGIT(cur)) {
333 int digit = *cur - '0';
334
335 if (port > INT_MAX / 10)
336 return(1);
337 port *= 10;
338 if (port > INT_MAX - digit)
339 return(1);
340 port += digit;
341
342 cur++;
343 }
344 if (uri != NULL)
345 uri->port = port;
346 *str = cur;
347 return(0);
348 }
349 return(1);
350 }
351
352 /**
353 * xmlParse3986Userinfo:
354 * @uri: pointer to an URI structure
355 * @str: the string to analyze
356 *
357 * Parse an user information part and fills in the appropriate fields
358 * of the @uri structure
359 *
360 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
361 *
362 * Returns 0 or the error code
363 */
364 static int
xmlParse3986Userinfo(xmlURIPtr uri,const char ** str)365 xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
366 {
367 const char *cur;
368
369 cur = *str;
370 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
371 ISA_SUB_DELIM(cur) || (*cur == ':'))
372 NEXT(cur);
373 if (*cur == '@') {
374 if (uri != NULL) {
375 if (uri->user != NULL) xmlFree(uri->user);
376 if (uri->cleanup & 2)
377 uri->user = STRNDUP(*str, cur - *str);
378 else
379 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
380 }
381 *str = cur;
382 return(0);
383 }
384 return(1);
385 }
386
387 /**
388 * xmlParse3986DecOctet:
389 * @str: the string to analyze
390 *
391 * dec-octet = DIGIT ; 0-9
392 * / %x31-39 DIGIT ; 10-99
393 * / "1" 2DIGIT ; 100-199
394 * / "2" %x30-34 DIGIT ; 200-249
395 * / "25" %x30-35 ; 250-255
396 *
397 * Skip a dec-octet.
398 *
399 * Returns 0 if found and skipped, 1 otherwise
400 */
401 static int
xmlParse3986DecOctet(const char ** str)402 xmlParse3986DecOctet(const char **str) {
403 const char *cur = *str;
404
405 if (!(ISA_DIGIT(cur)))
406 return(1);
407 if (!ISA_DIGIT(cur+1))
408 cur++;
409 else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
410 cur += 2;
411 else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
412 cur += 3;
413 else if ((*cur == '2') && (*(cur + 1) >= '0') &&
414 (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
415 cur += 3;
416 else if ((*cur == '2') && (*(cur + 1) == '5') &&
417 (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
418 cur += 3;
419 else
420 return(1);
421 *str = cur;
422 return(0);
423 }
424 /**
425 * xmlParse3986Host:
426 * @uri: pointer to an URI structure
427 * @str: the string to analyze
428 *
429 * Parse an host part and fills in the appropriate fields
430 * of the @uri structure
431 *
432 * host = IP-literal / IPv4address / reg-name
433 * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
434 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
435 * reg-name = *( unreserved / pct-encoded / sub-delims )
436 *
437 * Returns 0 or the error code
438 */
439 static int
xmlParse3986Host(xmlURIPtr uri,const char ** str)440 xmlParse3986Host(xmlURIPtr uri, const char **str)
441 {
442 const char *cur = *str;
443 const char *host;
444
445 host = cur;
446 /*
447 * IPv6 and future addressing scheme are enclosed between brackets
448 */
449 if (*cur == '[') {
450 cur++;
451 while ((*cur != ']') && (*cur != 0))
452 cur++;
453 if (*cur != ']')
454 return(1);
455 cur++;
456 goto found;
457 }
458 /*
459 * try to parse an IPv4
460 */
461 if (ISA_DIGIT(cur)) {
462 if (xmlParse3986DecOctet(&cur) != 0)
463 goto not_ipv4;
464 if (*cur != '.')
465 goto not_ipv4;
466 cur++;
467 if (xmlParse3986DecOctet(&cur) != 0)
468 goto not_ipv4;
469 if (*cur != '.')
470 goto not_ipv4;
471 if (xmlParse3986DecOctet(&cur) != 0)
472 goto not_ipv4;
473 if (*cur != '.')
474 goto not_ipv4;
475 if (xmlParse3986DecOctet(&cur) != 0)
476 goto not_ipv4;
477 goto found;
478 not_ipv4:
479 cur = *str;
480 }
481 /*
482 * then this should be a hostname which can be empty
483 */
484 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
485 NEXT(cur);
486 found:
487 if (uri != NULL) {
488 if (uri->authority != NULL) xmlFree(uri->authority);
489 uri->authority = NULL;
490 if (uri->server != NULL) xmlFree(uri->server);
491 if (cur != host) {
492 if (uri->cleanup & 2)
493 uri->server = STRNDUP(host, cur - host);
494 else
495 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
496 } else
497 uri->server = NULL;
498 }
499 *str = cur;
500 return(0);
501 }
502
503 /**
504 * xmlParse3986Authority:
505 * @uri: pointer to an URI structure
506 * @str: the string to analyze
507 *
508 * Parse an authority part and fills in the appropriate fields
509 * of the @uri structure
510 *
511 * authority = [ userinfo "@" ] host [ ":" port ]
512 *
513 * Returns 0 or the error code
514 */
515 static int
xmlParse3986Authority(xmlURIPtr uri,const char ** str)516 xmlParse3986Authority(xmlURIPtr uri, const char **str)
517 {
518 const char *cur;
519 int ret;
520
521 cur = *str;
522 /*
523 * try to parse an userinfo and check for the trailing @
524 */
525 ret = xmlParse3986Userinfo(uri, &cur);
526 if ((ret != 0) || (*cur != '@'))
527 cur = *str;
528 else
529 cur++;
530 ret = xmlParse3986Host(uri, &cur);
531 if (ret != 0) return(ret);
532 if (*cur == ':') {
533 cur++;
534 ret = xmlParse3986Port(uri, &cur);
535 if (ret != 0) return(ret);
536 }
537 *str = cur;
538 return(0);
539 }
540
541 /**
542 * xmlParse3986Segment:
543 * @str: the string to analyze
544 * @forbid: an optional forbidden character
545 * @empty: allow an empty segment
546 *
547 * Parse a segment and fills in the appropriate fields
548 * of the @uri structure
549 *
550 * segment = *pchar
551 * segment-nz = 1*pchar
552 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
553 * ; non-zero-length segment without any colon ":"
554 *
555 * Returns 0 or the error code
556 */
557 static int
xmlParse3986Segment(const char ** str,char forbid,int empty)558 xmlParse3986Segment(const char **str, char forbid, int empty)
559 {
560 const char *cur;
561
562 cur = *str;
563 if (!ISA_PCHAR(cur)) {
564 if (empty)
565 return(0);
566 return(1);
567 }
568 while (ISA_PCHAR(cur) && (*cur != forbid))
569 NEXT(cur);
570 *str = cur;
571 return (0);
572 }
573
574 /**
575 * xmlParse3986PathAbEmpty:
576 * @uri: pointer to an URI structure
577 * @str: the string to analyze
578 *
579 * Parse an path absolute or empty and fills in the appropriate fields
580 * of the @uri structure
581 *
582 * path-abempty = *( "/" segment )
583 *
584 * Returns 0 or the error code
585 */
586 static int
xmlParse3986PathAbEmpty(xmlURIPtr uri,const char ** str)587 xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
588 {
589 const char *cur;
590 int ret;
591
592 cur = *str;
593
594 while (*cur == '/') {
595 cur++;
596 ret = xmlParse3986Segment(&cur, 0, 1);
597 if (ret != 0) return(ret);
598 }
599 if (uri != NULL) {
600 if (uri->path != NULL) xmlFree(uri->path);
601 if (*str != cur) {
602 if (uri->cleanup & 2)
603 uri->path = STRNDUP(*str, cur - *str);
604 else
605 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
606 } else {
607 uri->path = NULL;
608 }
609 }
610 *str = cur;
611 return (0);
612 }
613
614 /**
615 * xmlParse3986PathAbsolute:
616 * @uri: pointer to an URI structure
617 * @str: the string to analyze
618 *
619 * Parse an path absolute and fills in the appropriate fields
620 * of the @uri structure
621 *
622 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
623 *
624 * Returns 0 or the error code
625 */
626 static int
xmlParse3986PathAbsolute(xmlURIPtr uri,const char ** str)627 xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
628 {
629 const char *cur;
630 int ret;
631
632 cur = *str;
633
634 if (*cur != '/')
635 return(1);
636 cur++;
637 ret = xmlParse3986Segment(&cur, 0, 0);
638 if (ret == 0) {
639 while (*cur == '/') {
640 cur++;
641 ret = xmlParse3986Segment(&cur, 0, 1);
642 if (ret != 0) return(ret);
643 }
644 }
645 if (uri != NULL) {
646 if (uri->path != NULL) xmlFree(uri->path);
647 if (cur != *str) {
648 if (uri->cleanup & 2)
649 uri->path = STRNDUP(*str, cur - *str);
650 else
651 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
652 } else {
653 uri->path = NULL;
654 }
655 }
656 *str = cur;
657 return (0);
658 }
659
660 /**
661 * xmlParse3986PathRootless:
662 * @uri: pointer to an URI structure
663 * @str: the string to analyze
664 *
665 * Parse an path without root and fills in the appropriate fields
666 * of the @uri structure
667 *
668 * path-rootless = segment-nz *( "/" segment )
669 *
670 * Returns 0 or the error code
671 */
672 static int
xmlParse3986PathRootless(xmlURIPtr uri,const char ** str)673 xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
674 {
675 const char *cur;
676 int ret;
677
678 cur = *str;
679
680 ret = xmlParse3986Segment(&cur, 0, 0);
681 if (ret != 0) return(ret);
682 while (*cur == '/') {
683 cur++;
684 ret = xmlParse3986Segment(&cur, 0, 1);
685 if (ret != 0) return(ret);
686 }
687 if (uri != NULL) {
688 if (uri->path != NULL) xmlFree(uri->path);
689 if (cur != *str) {
690 if (uri->cleanup & 2)
691 uri->path = STRNDUP(*str, cur - *str);
692 else
693 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
694 } else {
695 uri->path = NULL;
696 }
697 }
698 *str = cur;
699 return (0);
700 }
701
702 /**
703 * xmlParse3986PathNoScheme:
704 * @uri: pointer to an URI structure
705 * @str: the string to analyze
706 *
707 * Parse an path which is not a scheme and fills in the appropriate fields
708 * of the @uri structure
709 *
710 * path-noscheme = segment-nz-nc *( "/" segment )
711 *
712 * Returns 0 or the error code
713 */
714 static int
xmlParse3986PathNoScheme(xmlURIPtr uri,const char ** str)715 xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
716 {
717 const char *cur;
718 int ret;
719
720 cur = *str;
721
722 ret = xmlParse3986Segment(&cur, ':', 0);
723 if (ret != 0) return(ret);
724 while (*cur == '/') {
725 cur++;
726 ret = xmlParse3986Segment(&cur, 0, 1);
727 if (ret != 0) return(ret);
728 }
729 if (uri != NULL) {
730 if (uri->path != NULL) xmlFree(uri->path);
731 if (cur != *str) {
732 if (uri->cleanup & 2)
733 uri->path = STRNDUP(*str, cur - *str);
734 else
735 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
736 } else {
737 uri->path = NULL;
738 }
739 }
740 *str = cur;
741 return (0);
742 }
743
744 /**
745 * xmlParse3986HierPart:
746 * @uri: pointer to an URI structure
747 * @str: the string to analyze
748 *
749 * Parse an hierarchical part and fills in the appropriate fields
750 * of the @uri structure
751 *
752 * hier-part = "//" authority path-abempty
753 * / path-absolute
754 * / path-rootless
755 * / path-empty
756 *
757 * Returns 0 or the error code
758 */
759 static int
xmlParse3986HierPart(xmlURIPtr uri,const char ** str)760 xmlParse3986HierPart(xmlURIPtr uri, const char **str)
761 {
762 const char *cur;
763 int ret;
764
765 cur = *str;
766
767 if ((*cur == '/') && (*(cur + 1) == '/')) {
768 cur += 2;
769 ret = xmlParse3986Authority(uri, &cur);
770 if (ret != 0) return(ret);
771 if (uri->server == NULL)
772 uri->port = -1;
773 ret = xmlParse3986PathAbEmpty(uri, &cur);
774 if (ret != 0) return(ret);
775 *str = cur;
776 return(0);
777 } else if (*cur == '/') {
778 ret = xmlParse3986PathAbsolute(uri, &cur);
779 if (ret != 0) return(ret);
780 } else if (ISA_PCHAR(cur)) {
781 ret = xmlParse3986PathRootless(uri, &cur);
782 if (ret != 0) return(ret);
783 } else {
784 /* path-empty is effectively empty */
785 if (uri != NULL) {
786 if (uri->path != NULL) xmlFree(uri->path);
787 uri->path = NULL;
788 }
789 }
790 *str = cur;
791 return (0);
792 }
793
794 /**
795 * xmlParse3986RelativeRef:
796 * @uri: pointer to an URI structure
797 * @str: the string to analyze
798 *
799 * Parse an URI string and fills in the appropriate fields
800 * of the @uri structure
801 *
802 * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
803 * relative-part = "//" authority path-abempty
804 * / path-absolute
805 * / path-noscheme
806 * / path-empty
807 *
808 * Returns 0 or the error code
809 */
810 static int
xmlParse3986RelativeRef(xmlURIPtr uri,const char * str)811 xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
812 int ret;
813
814 if ((*str == '/') && (*(str + 1) == '/')) {
815 str += 2;
816 ret = xmlParse3986Authority(uri, &str);
817 if (ret != 0) return(ret);
818 ret = xmlParse3986PathAbEmpty(uri, &str);
819 if (ret != 0) return(ret);
820 } else if (*str == '/') {
821 ret = xmlParse3986PathAbsolute(uri, &str);
822 if (ret != 0) return(ret);
823 } else if (ISA_PCHAR(str)) {
824 ret = xmlParse3986PathNoScheme(uri, &str);
825 if (ret != 0) return(ret);
826 } else {
827 /* path-empty is effectively empty */
828 if (uri != NULL) {
829 if (uri->path != NULL) xmlFree(uri->path);
830 uri->path = NULL;
831 }
832 }
833
834 if (*str == '?') {
835 str++;
836 ret = xmlParse3986Query(uri, &str);
837 if (ret != 0) return(ret);
838 }
839 if (*str == '#') {
840 str++;
841 ret = xmlParse3986Fragment(uri, &str);
842 if (ret != 0) return(ret);
843 }
844 if (*str != 0) {
845 xmlCleanURI(uri);
846 return(1);
847 }
848 return(0);
849 }
850
851
852 /**
853 * xmlParse3986URI:
854 * @uri: pointer to an URI structure
855 * @str: the string to analyze
856 *
857 * Parse an URI string and fills in the appropriate fields
858 * of the @uri structure
859 *
860 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
861 *
862 * Returns 0 or the error code
863 */
864 static int
xmlParse3986URI(xmlURIPtr uri,const char * str)865 xmlParse3986URI(xmlURIPtr uri, const char *str) {
866 int ret;
867
868 ret = xmlParse3986Scheme(uri, &str);
869 if (ret != 0) return(ret);
870 if (*str != ':') {
871 return(1);
872 }
873 str++;
874 ret = xmlParse3986HierPart(uri, &str);
875 if (ret != 0) return(ret);
876 if (*str == '?') {
877 str++;
878 ret = xmlParse3986Query(uri, &str);
879 if (ret != 0) return(ret);
880 }
881 if (*str == '#') {
882 str++;
883 ret = xmlParse3986Fragment(uri, &str);
884 if (ret != 0) return(ret);
885 }
886 if (*str != 0) {
887 xmlCleanURI(uri);
888 return(1);
889 }
890 return(0);
891 }
892
893 /**
894 * xmlParse3986URIReference:
895 * @uri: pointer to an URI structure
896 * @str: the string to analyze
897 *
898 * Parse an URI reference string and fills in the appropriate fields
899 * of the @uri structure
900 *
901 * URI-reference = URI / relative-ref
902 *
903 * Returns 0 or the error code
904 */
905 static int
xmlParse3986URIReference(xmlURIPtr uri,const char * str)906 xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
907 int ret;
908
909 if (str == NULL)
910 return(-1);
911 xmlCleanURI(uri);
912
913 /*
914 * Try first to parse absolute refs, then fallback to relative if
915 * it fails.
916 */
917 ret = xmlParse3986URI(uri, str);
918 if (ret != 0) {
919 xmlCleanURI(uri);
920 ret = xmlParse3986RelativeRef(uri, str);
921 if (ret != 0) {
922 xmlCleanURI(uri);
923 return(ret);
924 }
925 }
926 return(0);
927 }
928
929 /**
930 * xmlParseURI:
931 * @str: the URI string to analyze
932 *
933 * Parse an URI based on RFC 3986
934 *
935 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
936 *
937 * Returns a newly built xmlURIPtr or NULL in case of error
938 */
939 xmlURIPtr
xmlParseURI(const char * str)940 xmlParseURI(const char *str) {
941 xmlURIPtr uri;
942 int ret;
943
944 if (str == NULL)
945 return(NULL);
946 uri = xmlCreateURI();
947 if (uri != NULL) {
948 ret = xmlParse3986URIReference(uri, str);
949 if (ret) {
950 xmlFreeURI(uri);
951 return(NULL);
952 }
953 }
954 return(uri);
955 }
956
957 /**
958 * xmlParseURIReference:
959 * @uri: pointer to an URI structure
960 * @str: the string to analyze
961 *
962 * Parse an URI reference string based on RFC 3986 and fills in the
963 * appropriate fields of the @uri structure
964 *
965 * URI-reference = URI / relative-ref
966 *
967 * Returns 0 or the error code
968 */
969 int
xmlParseURIReference(xmlURIPtr uri,const char * str)970 xmlParseURIReference(xmlURIPtr uri, const char *str) {
971 return(xmlParse3986URIReference(uri, str));
972 }
973
974 /**
975 * xmlParseURIRaw:
976 * @str: the URI string to analyze
977 * @raw: if 1 unescaping of URI pieces are disabled
978 *
979 * Parse an URI but allows to keep intact the original fragments.
980 *
981 * URI-reference = URI / relative-ref
982 *
983 * Returns a newly built xmlURIPtr or NULL in case of error
984 */
985 xmlURIPtr
xmlParseURIRaw(const char * str,int raw)986 xmlParseURIRaw(const char *str, int raw) {
987 xmlURIPtr uri;
988 int ret;
989
990 if (str == NULL)
991 return(NULL);
992 uri = xmlCreateURI();
993 if (uri != NULL) {
994 if (raw) {
995 uri->cleanup |= 2;
996 }
997 ret = xmlParseURIReference(uri, str);
998 if (ret) {
999 xmlFreeURI(uri);
1000 return(NULL);
1001 }
1002 }
1003 return(uri);
1004 }
1005
1006 /************************************************************************
1007 * *
1008 * Generic URI structure functions *
1009 * *
1010 ************************************************************************/
1011
1012 /**
1013 * xmlCreateURI:
1014 *
1015 * Simply creates an empty xmlURI
1016 *
1017 * Returns the new structure or NULL in case of error
1018 */
1019 xmlURIPtr
xmlCreateURI(void)1020 xmlCreateURI(void) {
1021 xmlURIPtr ret;
1022
1023 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1024 if (ret == NULL) {
1025 xmlURIErrMemory("creating URI structure\n");
1026 return(NULL);
1027 }
1028 memset(ret, 0, sizeof(xmlURI));
1029 return(ret);
1030 }
1031
1032 /**
1033 * xmlSaveUriRealloc:
1034 *
1035 * Function to handle properly a reallocation when saving an URI
1036 * Also imposes some limit on the length of an URI string output
1037 */
1038 static xmlChar *
xmlSaveUriRealloc(xmlChar * ret,int * max)1039 xmlSaveUriRealloc(xmlChar *ret, int *max) {
1040 xmlChar *temp;
1041 int tmp;
1042
1043 if (*max > MAX_URI_LENGTH) {
1044 xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1045 return(NULL);
1046 }
1047 tmp = *max * 2;
1048 temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1049 if (temp == NULL) {
1050 xmlURIErrMemory("saving URI\n");
1051 return(NULL);
1052 }
1053 *max = tmp;
1054 return(temp);
1055 }
1056
1057 /**
1058 * xmlSaveUri:
1059 * @uri: pointer to an xmlURI
1060 *
1061 * Save the URI as an escaped string
1062 *
1063 * Returns a new string (to be deallocated by caller)
1064 */
1065 xmlChar *
xmlSaveUri(xmlURIPtr uri)1066 xmlSaveUri(xmlURIPtr uri) {
1067 xmlChar *ret = NULL;
1068 xmlChar *temp;
1069 const char *p;
1070 int len;
1071 int max;
1072
1073 if (uri == NULL) return(NULL);
1074
1075
1076 max = 80;
1077 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
1078 if (ret == NULL) {
1079 xmlURIErrMemory("saving URI\n");
1080 return(NULL);
1081 }
1082 len = 0;
1083
1084 if (uri->scheme != NULL) {
1085 p = uri->scheme;
1086 while (*p != 0) {
1087 if (len >= max) {
1088 temp = xmlSaveUriRealloc(ret, &max);
1089 if (temp == NULL) goto mem_error;
1090 ret = temp;
1091 }
1092 ret[len++] = *p++;
1093 }
1094 if (len >= max) {
1095 temp = xmlSaveUriRealloc(ret, &max);
1096 if (temp == NULL) goto mem_error;
1097 ret = temp;
1098 }
1099 ret[len++] = ':';
1100 }
1101 if (uri->opaque != NULL) {
1102 p = uri->opaque;
1103 while (*p != 0) {
1104 if (len + 3 >= max) {
1105 temp = xmlSaveUriRealloc(ret, &max);
1106 if (temp == NULL) goto mem_error;
1107 ret = temp;
1108 }
1109 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1110 ret[len++] = *p++;
1111 else {
1112 int val = *(unsigned char *)p++;
1113 int hi = val / 0x10, lo = val % 0x10;
1114 ret[len++] = '%';
1115 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1116 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1117 }
1118 }
1119 } else {
1120 if ((uri->server != NULL) || (uri->port == -1)) {
1121 if (len + 3 >= max) {
1122 temp = xmlSaveUriRealloc(ret, &max);
1123 if (temp == NULL) goto mem_error;
1124 ret = temp;
1125 }
1126 ret[len++] = '/';
1127 ret[len++] = '/';
1128 if (uri->user != NULL) {
1129 p = uri->user;
1130 while (*p != 0) {
1131 if (len + 3 >= max) {
1132 temp = xmlSaveUriRealloc(ret, &max);
1133 if (temp == NULL) goto mem_error;
1134 ret = temp;
1135 }
1136 if ((IS_UNRESERVED(*(p))) ||
1137 ((*(p) == ';')) || ((*(p) == ':')) ||
1138 ((*(p) == '&')) || ((*(p) == '=')) ||
1139 ((*(p) == '+')) || ((*(p) == '$')) ||
1140 ((*(p) == ',')))
1141 ret[len++] = *p++;
1142 else {
1143 int val = *(unsigned char *)p++;
1144 int hi = val / 0x10, lo = val % 0x10;
1145 ret[len++] = '%';
1146 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1147 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1148 }
1149 }
1150 if (len + 3 >= max) {
1151 temp = xmlSaveUriRealloc(ret, &max);
1152 if (temp == NULL) goto mem_error;
1153 ret = temp;
1154 }
1155 ret[len++] = '@';
1156 }
1157 if (uri->server != NULL) {
1158 p = uri->server;
1159 while (*p != 0) {
1160 if (len >= max) {
1161 temp = xmlSaveUriRealloc(ret, &max);
1162 if (temp == NULL) goto mem_error;
1163 ret = temp;
1164 }
1165 ret[len++] = *p++;
1166 }
1167 if (uri->port > 0) {
1168 if (len + 10 >= max) {
1169 temp = xmlSaveUriRealloc(ret, &max);
1170 if (temp == NULL) goto mem_error;
1171 ret = temp;
1172 }
1173 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1174 }
1175 }
1176 } else if (uri->authority != NULL) {
1177 if (len + 3 >= max) {
1178 temp = xmlSaveUriRealloc(ret, &max);
1179 if (temp == NULL) goto mem_error;
1180 ret = temp;
1181 }
1182 ret[len++] = '/';
1183 ret[len++] = '/';
1184 p = uri->authority;
1185 while (*p != 0) {
1186 if (len + 3 >= max) {
1187 temp = xmlSaveUriRealloc(ret, &max);
1188 if (temp == NULL) goto mem_error;
1189 ret = temp;
1190 }
1191 if ((IS_UNRESERVED(*(p))) ||
1192 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1193 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1194 ((*(p) == '=')) || ((*(p) == '+')))
1195 ret[len++] = *p++;
1196 else {
1197 int val = *(unsigned char *)p++;
1198 int hi = val / 0x10, lo = val % 0x10;
1199 ret[len++] = '%';
1200 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1201 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1202 }
1203 }
1204 } else if (uri->scheme != NULL) {
1205 if (len + 3 >= max) {
1206 temp = xmlSaveUriRealloc(ret, &max);
1207 if (temp == NULL) goto mem_error;
1208 ret = temp;
1209 }
1210 ret[len++] = '/';
1211 ret[len++] = '/';
1212 }
1213 if (uri->path != NULL) {
1214 p = uri->path;
1215 /*
1216 * the colon in file:///d: should not be escaped or
1217 * Windows accesses fail later.
1218 */
1219 if ((uri->scheme != NULL) &&
1220 (p[0] == '/') &&
1221 (((p[1] >= 'a') && (p[1] <= 'z')) ||
1222 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1223 (p[2] == ':') &&
1224 (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1225 if (len + 3 >= max) {
1226 temp = xmlSaveUriRealloc(ret, &max);
1227 if (temp == NULL) goto mem_error;
1228 ret = temp;
1229 }
1230 ret[len++] = *p++;
1231 ret[len++] = *p++;
1232 ret[len++] = *p++;
1233 }
1234 while (*p != 0) {
1235 if (len + 3 >= max) {
1236 temp = xmlSaveUriRealloc(ret, &max);
1237 if (temp == NULL) goto mem_error;
1238 ret = temp;
1239 }
1240 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1241 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1242 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1243 ((*(p) == ',')))
1244 ret[len++] = *p++;
1245 else {
1246 int val = *(unsigned char *)p++;
1247 int hi = val / 0x10, lo = val % 0x10;
1248 ret[len++] = '%';
1249 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1250 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1251 }
1252 }
1253 }
1254 if (uri->query_raw != NULL) {
1255 if (len + 1 >= max) {
1256 temp = xmlSaveUriRealloc(ret, &max);
1257 if (temp == NULL) goto mem_error;
1258 ret = temp;
1259 }
1260 ret[len++] = '?';
1261 p = uri->query_raw;
1262 while (*p != 0) {
1263 if (len + 1 >= max) {
1264 temp = xmlSaveUriRealloc(ret, &max);
1265 if (temp == NULL) goto mem_error;
1266 ret = temp;
1267 }
1268 ret[len++] = *p++;
1269 }
1270 } else if (uri->query != NULL) {
1271 if (len + 3 >= max) {
1272 temp = xmlSaveUriRealloc(ret, &max);
1273 if (temp == NULL) goto mem_error;
1274 ret = temp;
1275 }
1276 ret[len++] = '?';
1277 p = uri->query;
1278 while (*p != 0) {
1279 if (len + 3 >= max) {
1280 temp = xmlSaveUriRealloc(ret, &max);
1281 if (temp == NULL) goto mem_error;
1282 ret = temp;
1283 }
1284 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1285 ret[len++] = *p++;
1286 else {
1287 int val = *(unsigned char *)p++;
1288 int hi = val / 0x10, lo = val % 0x10;
1289 ret[len++] = '%';
1290 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1291 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1292 }
1293 }
1294 }
1295 }
1296 if (uri->fragment != NULL) {
1297 if (len + 3 >= max) {
1298 temp = xmlSaveUriRealloc(ret, &max);
1299 if (temp == NULL) goto mem_error;
1300 ret = temp;
1301 }
1302 ret[len++] = '#';
1303 p = uri->fragment;
1304 while (*p != 0) {
1305 if (len + 3 >= max) {
1306 temp = xmlSaveUriRealloc(ret, &max);
1307 if (temp == NULL) goto mem_error;
1308 ret = temp;
1309 }
1310 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1311 ret[len++] = *p++;
1312 else {
1313 int val = *(unsigned char *)p++;
1314 int hi = val / 0x10, lo = val % 0x10;
1315 ret[len++] = '%';
1316 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1317 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1318 }
1319 }
1320 }
1321 if (len >= max) {
1322 temp = xmlSaveUriRealloc(ret, &max);
1323 if (temp == NULL) goto mem_error;
1324 ret = temp;
1325 }
1326 ret[len] = 0;
1327 return(ret);
1328
1329 mem_error:
1330 xmlFree(ret);
1331 return(NULL);
1332 }
1333
1334 /**
1335 * xmlPrintURI:
1336 * @stream: a FILE* for the output
1337 * @uri: pointer to an xmlURI
1338 *
1339 * Prints the URI in the stream @stream.
1340 */
1341 void
xmlPrintURI(FILE * stream,xmlURIPtr uri)1342 xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1343 xmlChar *out;
1344
1345 out = xmlSaveUri(uri);
1346 if (out != NULL) {
1347 fprintf(stream, "%s", (char *) out);
1348 xmlFree(out);
1349 }
1350 }
1351
1352 /**
1353 * xmlCleanURI:
1354 * @uri: pointer to an xmlURI
1355 *
1356 * Make sure the xmlURI struct is free of content
1357 */
1358 static void
xmlCleanURI(xmlURIPtr uri)1359 xmlCleanURI(xmlURIPtr uri) {
1360 if (uri == NULL) return;
1361
1362 if (uri->scheme != NULL) xmlFree(uri->scheme);
1363 uri->scheme = NULL;
1364 if (uri->server != NULL) xmlFree(uri->server);
1365 uri->server = NULL;
1366 if (uri->user != NULL) xmlFree(uri->user);
1367 uri->user = NULL;
1368 if (uri->path != NULL) xmlFree(uri->path);
1369 uri->path = NULL;
1370 if (uri->fragment != NULL) xmlFree(uri->fragment);
1371 uri->fragment = NULL;
1372 if (uri->opaque != NULL) xmlFree(uri->opaque);
1373 uri->opaque = NULL;
1374 if (uri->authority != NULL) xmlFree(uri->authority);
1375 uri->authority = NULL;
1376 if (uri->query != NULL) xmlFree(uri->query);
1377 uri->query = NULL;
1378 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1379 uri->query_raw = NULL;
1380 }
1381
1382 /**
1383 * xmlFreeURI:
1384 * @uri: pointer to an xmlURI
1385 *
1386 * Free up the xmlURI struct
1387 */
1388 void
xmlFreeURI(xmlURIPtr uri)1389 xmlFreeURI(xmlURIPtr uri) {
1390 if (uri == NULL) return;
1391
1392 if (uri->scheme != NULL) xmlFree(uri->scheme);
1393 if (uri->server != NULL) xmlFree(uri->server);
1394 if (uri->user != NULL) xmlFree(uri->user);
1395 if (uri->path != NULL) xmlFree(uri->path);
1396 if (uri->fragment != NULL) xmlFree(uri->fragment);
1397 if (uri->opaque != NULL) xmlFree(uri->opaque);
1398 if (uri->authority != NULL) xmlFree(uri->authority);
1399 if (uri->query != NULL) xmlFree(uri->query);
1400 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1401 xmlFree(uri);
1402 }
1403
1404 /************************************************************************
1405 * *
1406 * Helper functions *
1407 * *
1408 ************************************************************************/
1409
1410 /**
1411 * xmlNormalizeURIPath:
1412 * @path: pointer to the path string
1413 *
1414 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1415 * Section 5.2, steps 6.c through 6.g.
1416 *
1417 * Normalization occurs directly on the string, no new allocation is done
1418 *
1419 * Returns 0 or an error code
1420 */
1421 int
xmlNormalizeURIPath(char * path)1422 xmlNormalizeURIPath(char *path) {
1423 char *cur, *out;
1424
1425 if (path == NULL)
1426 return(-1);
1427
1428 /* Skip all initial "/" chars. We want to get to the beginning of the
1429 * first non-empty segment.
1430 */
1431 cur = path;
1432 while (cur[0] == '/')
1433 ++cur;
1434 if (cur[0] == '\0')
1435 return(0);
1436
1437 /* Keep everything we've seen so far. */
1438 out = cur;
1439
1440 /*
1441 * Analyze each segment in sequence for cases (c) and (d).
1442 */
1443 while (cur[0] != '\0') {
1444 /*
1445 * c) All occurrences of "./", where "." is a complete path segment,
1446 * are removed from the buffer string.
1447 */
1448 if ((cur[0] == '.') && (cur[1] == '/')) {
1449 cur += 2;
1450 /* '//' normalization should be done at this point too */
1451 while (cur[0] == '/')
1452 cur++;
1453 continue;
1454 }
1455
1456 /*
1457 * d) If the buffer string ends with "." as a complete path segment,
1458 * that "." is removed.
1459 */
1460 if ((cur[0] == '.') && (cur[1] == '\0'))
1461 break;
1462
1463 /* Otherwise keep the segment. */
1464 while (cur[0] != '/') {
1465 if (cur[0] == '\0')
1466 goto done_cd;
1467 (out++)[0] = (cur++)[0];
1468 }
1469 /* normalize // */
1470 while ((cur[0] == '/') && (cur[1] == '/'))
1471 cur++;
1472
1473 (out++)[0] = (cur++)[0];
1474 }
1475 done_cd:
1476 out[0] = '\0';
1477
1478 /* Reset to the beginning of the first segment for the next sequence. */
1479 cur = path;
1480 while (cur[0] == '/')
1481 ++cur;
1482 if (cur[0] == '\0')
1483 return(0);
1484
1485 /*
1486 * Analyze each segment in sequence for cases (e) and (f).
1487 *
1488 * e) All occurrences of "<segment>/../", where <segment> is a
1489 * complete path segment not equal to "..", are removed from the
1490 * buffer string. Removal of these path segments is performed
1491 * iteratively, removing the leftmost matching pattern on each
1492 * iteration, until no matching pattern remains.
1493 *
1494 * f) If the buffer string ends with "<segment>/..", where <segment>
1495 * is a complete path segment not equal to "..", that
1496 * "<segment>/.." is removed.
1497 *
1498 * To satisfy the "iterative" clause in (e), we need to collapse the
1499 * string every time we find something that needs to be removed. Thus,
1500 * we don't need to keep two pointers into the string: we only need a
1501 * "current position" pointer.
1502 */
1503 while (1) {
1504 char *segp, *tmp;
1505
1506 /* At the beginning of each iteration of this loop, "cur" points to
1507 * the first character of the segment we want to examine.
1508 */
1509
1510 /* Find the end of the current segment. */
1511 segp = cur;
1512 while ((segp[0] != '/') && (segp[0] != '\0'))
1513 ++segp;
1514
1515 /* If this is the last segment, we're done (we need at least two
1516 * segments to meet the criteria for the (e) and (f) cases).
1517 */
1518 if (segp[0] == '\0')
1519 break;
1520
1521 /* If the first segment is "..", or if the next segment _isn't_ "..",
1522 * keep this segment and try the next one.
1523 */
1524 ++segp;
1525 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1526 || ((segp[0] != '.') || (segp[1] != '.')
1527 || ((segp[2] != '/') && (segp[2] != '\0')))) {
1528 cur = segp;
1529 continue;
1530 }
1531
1532 /* If we get here, remove this segment and the next one and back up
1533 * to the previous segment (if there is one), to implement the
1534 * "iteratively" clause. It's pretty much impossible to back up
1535 * while maintaining two pointers into the buffer, so just compact
1536 * the whole buffer now.
1537 */
1538
1539 /* If this is the end of the buffer, we're done. */
1540 if (segp[2] == '\0') {
1541 cur[0] = '\0';
1542 break;
1543 }
1544 /* Valgrind complained, strcpy(cur, segp + 3); */
1545 /* string will overlap, do not use strcpy */
1546 tmp = cur;
1547 segp += 3;
1548 while ((*tmp++ = *segp++) != 0)
1549 ;
1550
1551 /* If there are no previous segments, then keep going from here. */
1552 segp = cur;
1553 while ((segp > path) && ((--segp)[0] == '/'))
1554 ;
1555 if (segp == path)
1556 continue;
1557
1558 /* "segp" is pointing to the end of a previous segment; find it's
1559 * start. We need to back up to the previous segment and start
1560 * over with that to handle things like "foo/bar/../..". If we
1561 * don't do this, then on the first pass we'll remove the "bar/..",
1562 * but be pointing at the second ".." so we won't realize we can also
1563 * remove the "foo/..".
1564 */
1565 cur = segp;
1566 while ((cur > path) && (cur[-1] != '/'))
1567 --cur;
1568 }
1569 out[0] = '\0';
1570
1571 /*
1572 * g) If the resulting buffer string still begins with one or more
1573 * complete path segments of "..", then the reference is
1574 * considered to be in error. Implementations may handle this
1575 * error by retaining these components in the resolved path (i.e.,
1576 * treating them as part of the final URI), by removing them from
1577 * the resolved path (i.e., discarding relative levels above the
1578 * root), or by avoiding traversal of the reference.
1579 *
1580 * We discard them from the final path.
1581 */
1582 if (path[0] == '/') {
1583 cur = path;
1584 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1585 && ((cur[3] == '/') || (cur[3] == '\0')))
1586 cur += 3;
1587
1588 if (cur != path) {
1589 out = path;
1590 while (cur[0] != '\0')
1591 (out++)[0] = (cur++)[0];
1592 out[0] = 0;
1593 }
1594 }
1595
1596 return(0);
1597 }
1598
is_hex(char c)1599 static int is_hex(char c) {
1600 if (((c >= '0') && (c <= '9')) ||
1601 ((c >= 'a') && (c <= 'f')) ||
1602 ((c >= 'A') && (c <= 'F')))
1603 return(1);
1604 return(0);
1605 }
1606
1607 /**
1608 * xmlURIUnescapeString:
1609 * @str: the string to unescape
1610 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
1611 * @target: optional destination buffer
1612 *
1613 * Unescaping routine, but does not check that the string is an URI. The
1614 * output is a direct unsigned char translation of %XX values (no encoding)
1615 * Note that the length of the result can only be smaller or same size as
1616 * the input string.
1617 *
1618 * Returns a copy of the string, but unescaped, will return NULL only in case
1619 * of error
1620 */
1621 char *
xmlURIUnescapeString(const char * str,int len,char * target)1622 xmlURIUnescapeString(const char *str, int len, char *target) {
1623 char *ret, *out;
1624 const char *in;
1625
1626 if (str == NULL)
1627 return(NULL);
1628 if (len <= 0) len = strlen(str);
1629 if (len < 0) return(NULL);
1630
1631 if (target == NULL) {
1632 ret = (char *) xmlMallocAtomic(len + 1);
1633 if (ret == NULL) {
1634 xmlURIErrMemory("unescaping URI value\n");
1635 return(NULL);
1636 }
1637 } else
1638 ret = target;
1639 in = str;
1640 out = ret;
1641 while(len > 0) {
1642 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1643 in++;
1644 if ((*in >= '0') && (*in <= '9'))
1645 *out = (*in - '0');
1646 else if ((*in >= 'a') && (*in <= 'f'))
1647 *out = (*in - 'a') + 10;
1648 else if ((*in >= 'A') && (*in <= 'F'))
1649 *out = (*in - 'A') + 10;
1650 in++;
1651 if ((*in >= '0') && (*in <= '9'))
1652 *out = *out * 16 + (*in - '0');
1653 else if ((*in >= 'a') && (*in <= 'f'))
1654 *out = *out * 16 + (*in - 'a') + 10;
1655 else if ((*in >= 'A') && (*in <= 'F'))
1656 *out = *out * 16 + (*in - 'A') + 10;
1657 in++;
1658 len -= 3;
1659 out++;
1660 } else {
1661 *out++ = *in++;
1662 len--;
1663 }
1664 }
1665 *out = 0;
1666 return(ret);
1667 }
1668
1669 /**
1670 * xmlURIEscapeStr:
1671 * @str: string to escape
1672 * @list: exception list string of chars not to escape
1673 *
1674 * This routine escapes a string to hex, ignoring reserved characters (a-z)
1675 * and the characters in the exception list.
1676 *
1677 * Returns a new escaped string or NULL in case of error.
1678 */
1679 xmlChar *
xmlURIEscapeStr(const xmlChar * str,const xmlChar * list)1680 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1681 xmlChar *ret, ch;
1682 xmlChar *temp;
1683 const xmlChar *in;
1684 int len, out;
1685
1686 if (str == NULL)
1687 return(NULL);
1688 if (str[0] == 0)
1689 return(xmlStrdup(str));
1690 len = xmlStrlen(str);
1691 if (!(len > 0)) return(NULL);
1692
1693 len += 20;
1694 ret = (xmlChar *) xmlMallocAtomic(len);
1695 if (ret == NULL) {
1696 xmlURIErrMemory("escaping URI value\n");
1697 return(NULL);
1698 }
1699 in = (const xmlChar *) str;
1700 out = 0;
1701 while(*in != 0) {
1702 if (len - out <= 3) {
1703 temp = xmlSaveUriRealloc(ret, &len);
1704 if (temp == NULL) {
1705 xmlURIErrMemory("escaping URI value\n");
1706 xmlFree(ret);
1707 return(NULL);
1708 }
1709 ret = temp;
1710 }
1711
1712 ch = *in;
1713
1714 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1715 unsigned char val;
1716 ret[out++] = '%';
1717 val = ch >> 4;
1718 if (val <= 9)
1719 ret[out++] = '0' + val;
1720 else
1721 ret[out++] = 'A' + val - 0xA;
1722 val = ch & 0xF;
1723 if (val <= 9)
1724 ret[out++] = '0' + val;
1725 else
1726 ret[out++] = 'A' + val - 0xA;
1727 in++;
1728 } else {
1729 ret[out++] = *in++;
1730 }
1731
1732 }
1733 ret[out] = 0;
1734 return(ret);
1735 }
1736
1737 /**
1738 * xmlURIEscape:
1739 * @str: the string of the URI to escape
1740 *
1741 * Escaping routine, does not do validity checks !
1742 * It will try to escape the chars needing this, but this is heuristic
1743 * based it's impossible to be sure.
1744 *
1745 * Returns an copy of the string, but escaped
1746 *
1747 * 25 May 2001
1748 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1749 * according to RFC2396.
1750 * - Carl Douglas
1751 */
1752 xmlChar *
xmlURIEscape(const xmlChar * str)1753 xmlURIEscape(const xmlChar * str)
1754 {
1755 xmlChar *ret, *segment = NULL;
1756 xmlURIPtr uri;
1757 int ret2;
1758
1759 if (str == NULL)
1760 return (NULL);
1761
1762 uri = xmlCreateURI();
1763 if (uri != NULL) {
1764 /*
1765 * Allow escaping errors in the unescaped form
1766 */
1767 uri->cleanup = 1;
1768 ret2 = xmlParseURIReference(uri, (const char *)str);
1769 if (ret2) {
1770 xmlFreeURI(uri);
1771 return (NULL);
1772 }
1773 }
1774
1775 if (!uri)
1776 return NULL;
1777
1778 ret = NULL;
1779
1780 #define NULLCHK(p) if(!p) { \
1781 xmlURIErrMemory("escaping URI value\n"); \
1782 xmlFreeURI(uri); \
1783 xmlFree(ret); \
1784 return NULL; } \
1785
1786 if (uri->scheme) {
1787 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1788 NULLCHK(segment)
1789 ret = xmlStrcat(ret, segment);
1790 ret = xmlStrcat(ret, BAD_CAST ":");
1791 xmlFree(segment);
1792 }
1793
1794 if (uri->authority) {
1795 segment =
1796 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1797 NULLCHK(segment)
1798 ret = xmlStrcat(ret, BAD_CAST "//");
1799 ret = xmlStrcat(ret, segment);
1800 xmlFree(segment);
1801 }
1802
1803 if (uri->user) {
1804 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1805 NULLCHK(segment)
1806 ret = xmlStrcat(ret,BAD_CAST "//");
1807 ret = xmlStrcat(ret, segment);
1808 ret = xmlStrcat(ret, BAD_CAST "@");
1809 xmlFree(segment);
1810 }
1811
1812 if (uri->server) {
1813 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1814 NULLCHK(segment)
1815 if (uri->user == NULL)
1816 ret = xmlStrcat(ret, BAD_CAST "//");
1817 ret = xmlStrcat(ret, segment);
1818 xmlFree(segment);
1819 }
1820
1821 if (uri->port) {
1822 xmlChar port[10];
1823
1824 snprintf((char *) port, 10, "%d", uri->port);
1825 ret = xmlStrcat(ret, BAD_CAST ":");
1826 ret = xmlStrcat(ret, port);
1827 }
1828
1829 if (uri->path) {
1830 segment =
1831 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1832 NULLCHK(segment)
1833 ret = xmlStrcat(ret, segment);
1834 xmlFree(segment);
1835 }
1836
1837 if (uri->query_raw) {
1838 ret = xmlStrcat(ret, BAD_CAST "?");
1839 ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1840 }
1841 else if (uri->query) {
1842 segment =
1843 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1844 NULLCHK(segment)
1845 ret = xmlStrcat(ret, BAD_CAST "?");
1846 ret = xmlStrcat(ret, segment);
1847 xmlFree(segment);
1848 }
1849
1850 if (uri->opaque) {
1851 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1852 NULLCHK(segment)
1853 ret = xmlStrcat(ret, segment);
1854 xmlFree(segment);
1855 }
1856
1857 if (uri->fragment) {
1858 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1859 NULLCHK(segment)
1860 ret = xmlStrcat(ret, BAD_CAST "#");
1861 ret = xmlStrcat(ret, segment);
1862 xmlFree(segment);
1863 }
1864
1865 xmlFreeURI(uri);
1866 #undef NULLCHK
1867
1868 return (ret);
1869 }
1870
1871 /************************************************************************
1872 * *
1873 * Public functions *
1874 * *
1875 ************************************************************************/
1876
1877 /**
1878 * xmlBuildURI:
1879 * @URI: the URI instance found in the document
1880 * @base: the base value
1881 *
1882 * Computes he final URI of the reference done by checking that
1883 * the given URI is valid, and building the final URI using the
1884 * base URI. This is processed according to section 5.2 of the
1885 * RFC 2396
1886 *
1887 * 5.2. Resolving Relative References to Absolute Form
1888 *
1889 * Returns a new URI string (to be freed by the caller) or NULL in case
1890 * of error.
1891 */
1892 xmlChar *
xmlBuildURI(const xmlChar * URI,const xmlChar * base)1893 xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1894 xmlChar *val = NULL;
1895 int ret, len, indx, cur, out;
1896 xmlURIPtr ref = NULL;
1897 xmlURIPtr bas = NULL;
1898 xmlURIPtr res = NULL;
1899
1900 /*
1901 * 1) The URI reference is parsed into the potential four components and
1902 * fragment identifier, as described in Section 4.3.
1903 *
1904 * NOTE that a completely empty URI is treated by modern browsers
1905 * as a reference to "." rather than as a synonym for the current
1906 * URI. Should we do that here?
1907 */
1908 if (URI == NULL)
1909 ret = -1;
1910 else {
1911 if (*URI) {
1912 ref = xmlCreateURI();
1913 if (ref == NULL)
1914 goto done;
1915 ret = xmlParseURIReference(ref, (const char *) URI);
1916 }
1917 else
1918 ret = 0;
1919 }
1920 if (ret != 0)
1921 goto done;
1922 if ((ref != NULL) && (ref->scheme != NULL)) {
1923 /*
1924 * The URI is absolute don't modify.
1925 */
1926 val = xmlStrdup(URI);
1927 goto done;
1928 }
1929 if (base == NULL)
1930 ret = -1;
1931 else {
1932 bas = xmlCreateURI();
1933 if (bas == NULL)
1934 goto done;
1935 ret = xmlParseURIReference(bas, (const char *) base);
1936 }
1937 if (ret != 0) {
1938 if (ref)
1939 val = xmlSaveUri(ref);
1940 goto done;
1941 }
1942 if (ref == NULL) {
1943 /*
1944 * the base fragment must be ignored
1945 */
1946 if (bas->fragment != NULL) {
1947 xmlFree(bas->fragment);
1948 bas->fragment = NULL;
1949 }
1950 val = xmlSaveUri(bas);
1951 goto done;
1952 }
1953
1954 /*
1955 * 2) If the path component is empty and the scheme, authority, and
1956 * query components are undefined, then it is a reference to the
1957 * current document and we are done. Otherwise, the reference URI's
1958 * query and fragment components are defined as found (or not found)
1959 * within the URI reference and not inherited from the base URI.
1960 *
1961 * NOTE that in modern browsers, the parsing differs from the above
1962 * in the following aspect: the query component is allowed to be
1963 * defined while still treating this as a reference to the current
1964 * document.
1965 */
1966 res = xmlCreateURI();
1967 if (res == NULL)
1968 goto done;
1969 if ((ref->scheme == NULL) && (ref->path == NULL) &&
1970 ((ref->authority == NULL) && (ref->server == NULL))) {
1971 if (bas->scheme != NULL)
1972 res->scheme = xmlMemStrdup(bas->scheme);
1973 if (bas->authority != NULL)
1974 res->authority = xmlMemStrdup(bas->authority);
1975 else if ((bas->server != NULL) || (bas->port == -1)) {
1976 if (bas->server != NULL)
1977 res->server = xmlMemStrdup(bas->server);
1978 if (bas->user != NULL)
1979 res->user = xmlMemStrdup(bas->user);
1980 res->port = bas->port;
1981 }
1982 if (bas->path != NULL)
1983 res->path = xmlMemStrdup(bas->path);
1984 if (ref->query_raw != NULL)
1985 res->query_raw = xmlMemStrdup (ref->query_raw);
1986 else if (ref->query != NULL)
1987 res->query = xmlMemStrdup(ref->query);
1988 else if (bas->query_raw != NULL)
1989 res->query_raw = xmlMemStrdup(bas->query_raw);
1990 else if (bas->query != NULL)
1991 res->query = xmlMemStrdup(bas->query);
1992 if (ref->fragment != NULL)
1993 res->fragment = xmlMemStrdup(ref->fragment);
1994 goto step_7;
1995 }
1996
1997 /*
1998 * 3) If the scheme component is defined, indicating that the reference
1999 * starts with a scheme name, then the reference is interpreted as an
2000 * absolute URI and we are done. Otherwise, the reference URI's
2001 * scheme is inherited from the base URI's scheme component.
2002 */
2003 if (ref->scheme != NULL) {
2004 val = xmlSaveUri(ref);
2005 goto done;
2006 }
2007 if (bas->scheme != NULL)
2008 res->scheme = xmlMemStrdup(bas->scheme);
2009
2010 if (ref->query_raw != NULL)
2011 res->query_raw = xmlMemStrdup(ref->query_raw);
2012 else if (ref->query != NULL)
2013 res->query = xmlMemStrdup(ref->query);
2014 if (ref->fragment != NULL)
2015 res->fragment = xmlMemStrdup(ref->fragment);
2016
2017 /*
2018 * 4) If the authority component is defined, then the reference is a
2019 * network-path and we skip to step 7. Otherwise, the reference
2020 * URI's authority is inherited from the base URI's authority
2021 * component, which will also be undefined if the URI scheme does not
2022 * use an authority component.
2023 */
2024 if ((ref->authority != NULL) || (ref->server != NULL)) {
2025 if (ref->authority != NULL)
2026 res->authority = xmlMemStrdup(ref->authority);
2027 else {
2028 res->server = xmlMemStrdup(ref->server);
2029 if (ref->user != NULL)
2030 res->user = xmlMemStrdup(ref->user);
2031 res->port = ref->port;
2032 }
2033 if (ref->path != NULL)
2034 res->path = xmlMemStrdup(ref->path);
2035 goto step_7;
2036 }
2037 if (bas->authority != NULL)
2038 res->authority = xmlMemStrdup(bas->authority);
2039 else if ((bas->server != NULL) || (bas->port == -1)) {
2040 if (bas->server != NULL)
2041 res->server = xmlMemStrdup(bas->server);
2042 if (bas->user != NULL)
2043 res->user = xmlMemStrdup(bas->user);
2044 res->port = bas->port;
2045 }
2046
2047 /*
2048 * 5) If the path component begins with a slash character ("/"), then
2049 * the reference is an absolute-path and we skip to step 7.
2050 */
2051 if ((ref->path != NULL) && (ref->path[0] == '/')) {
2052 res->path = xmlMemStrdup(ref->path);
2053 goto step_7;
2054 }
2055
2056
2057 /*
2058 * 6) If this step is reached, then we are resolving a relative-path
2059 * reference. The relative path needs to be merged with the base
2060 * URI's path. Although there are many ways to do this, we will
2061 * describe a simple method using a separate string buffer.
2062 *
2063 * Allocate a buffer large enough for the result string.
2064 */
2065 len = 2; /* extra / and 0 */
2066 if (ref->path != NULL)
2067 len += strlen(ref->path);
2068 if (bas->path != NULL)
2069 len += strlen(bas->path);
2070 res->path = (char *) xmlMallocAtomic(len);
2071 if (res->path == NULL) {
2072 xmlURIErrMemory("resolving URI against base\n");
2073 goto done;
2074 }
2075 res->path[0] = 0;
2076
2077 /*
2078 * a) All but the last segment of the base URI's path component is
2079 * copied to the buffer. In other words, any characters after the
2080 * last (right-most) slash character, if any, are excluded.
2081 */
2082 cur = 0;
2083 out = 0;
2084 if (bas->path != NULL) {
2085 while (bas->path[cur] != 0) {
2086 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2087 cur++;
2088 if (bas->path[cur] == 0)
2089 break;
2090
2091 cur++;
2092 while (out < cur) {
2093 res->path[out] = bas->path[out];
2094 out++;
2095 }
2096 }
2097 }
2098 res->path[out] = 0;
2099
2100 /*
2101 * b) The reference's path component is appended to the buffer
2102 * string.
2103 */
2104 if (ref->path != NULL && ref->path[0] != 0) {
2105 indx = 0;
2106 /*
2107 * Ensure the path includes a '/'
2108 */
2109 if ((out == 0) && (bas->server != NULL))
2110 res->path[out++] = '/';
2111 while (ref->path[indx] != 0) {
2112 res->path[out++] = ref->path[indx++];
2113 }
2114 }
2115 res->path[out] = 0;
2116
2117 /*
2118 * Steps c) to h) are really path normalization steps
2119 */
2120 xmlNormalizeURIPath(res->path);
2121
2122 step_7:
2123
2124 /*
2125 * 7) The resulting URI components, including any inherited from the
2126 * base URI, are recombined to give the absolute form of the URI
2127 * reference.
2128 */
2129 val = xmlSaveUri(res);
2130
2131 done:
2132 if (ref != NULL)
2133 xmlFreeURI(ref);
2134 if (bas != NULL)
2135 xmlFreeURI(bas);
2136 if (res != NULL)
2137 xmlFreeURI(res);
2138 return(val);
2139 }
2140
2141 /**
2142 * xmlBuildRelativeURI:
2143 * @URI: the URI reference under consideration
2144 * @base: the base value
2145 *
2146 * Expresses the URI of the reference in terms relative to the
2147 * base. Some examples of this operation include:
2148 * base = "http://site1.com/docs/book1.html"
2149 * URI input URI returned
2150 * docs/pic1.gif pic1.gif
2151 * docs/img/pic1.gif img/pic1.gif
2152 * img/pic1.gif ../img/pic1.gif
2153 * http://site1.com/docs/pic1.gif pic1.gif
2154 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2155 *
2156 * base = "docs/book1.html"
2157 * URI input URI returned
2158 * docs/pic1.gif pic1.gif
2159 * docs/img/pic1.gif img/pic1.gif
2160 * img/pic1.gif ../img/pic1.gif
2161 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2162 *
2163 *
2164 * Note: if the URI reference is really weird or complicated, it may be
2165 * worthwhile to first convert it into a "nice" one by calling
2166 * xmlBuildURI (using 'base') before calling this routine,
2167 * since this routine (for reasonable efficiency) assumes URI has
2168 * already been through some validation.
2169 *
2170 * Returns a new URI string (to be freed by the caller) or NULL in case
2171 * error.
2172 */
2173 xmlChar *
xmlBuildRelativeURI(const xmlChar * URI,const xmlChar * base)2174 xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2175 {
2176 xmlChar *val = NULL;
2177 int ret;
2178 int ix;
2179 int nbslash = 0;
2180 int len;
2181 xmlURIPtr ref = NULL;
2182 xmlURIPtr bas = NULL;
2183 xmlChar *bptr, *uptr, *vptr;
2184 int remove_path = 0;
2185
2186 if ((URI == NULL) || (*URI == 0))
2187 return NULL;
2188
2189 /*
2190 * First parse URI into a standard form
2191 */
2192 ref = xmlCreateURI ();
2193 if (ref == NULL)
2194 return NULL;
2195 /* If URI not already in "relative" form */
2196 if (URI[0] != '.') {
2197 ret = xmlParseURIReference (ref, (const char *) URI);
2198 if (ret != 0)
2199 goto done; /* Error in URI, return NULL */
2200 } else
2201 ref->path = (char *)xmlStrdup(URI);
2202
2203 /*
2204 * Next parse base into the same standard form
2205 */
2206 if ((base == NULL) || (*base == 0)) {
2207 val = xmlStrdup (URI);
2208 goto done;
2209 }
2210 bas = xmlCreateURI ();
2211 if (bas == NULL)
2212 goto done;
2213 if (base[0] != '.') {
2214 ret = xmlParseURIReference (bas, (const char *) base);
2215 if (ret != 0)
2216 goto done; /* Error in base, return NULL */
2217 } else
2218 bas->path = (char *)xmlStrdup(base);
2219
2220 /*
2221 * If the scheme / server on the URI differs from the base,
2222 * just return the URI
2223 */
2224 if ((ref->scheme != NULL) &&
2225 ((bas->scheme == NULL) ||
2226 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2227 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2228 val = xmlStrdup (URI);
2229 goto done;
2230 }
2231 if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2232 val = xmlStrdup(BAD_CAST "");
2233 goto done;
2234 }
2235 if (bas->path == NULL) {
2236 val = xmlStrdup((xmlChar *)ref->path);
2237 goto done;
2238 }
2239 if (ref->path == NULL) {
2240 ref->path = (char *) "/";
2241 remove_path = 1;
2242 }
2243
2244 /*
2245 * At this point (at last!) we can compare the two paths
2246 *
2247 * First we take care of the special case where either of the
2248 * two path components may be missing (bug 316224)
2249 */
2250 bptr = (xmlChar *)bas->path;
2251 {
2252 xmlChar *rptr = (xmlChar *) ref->path;
2253 int pos = 0;
2254
2255 /*
2256 * Next we compare the two strings and find where they first differ
2257 */
2258 if ((*rptr == '.') && (rptr[1] == '/'))
2259 rptr += 2;
2260 if ((*bptr == '.') && (bptr[1] == '/'))
2261 bptr += 2;
2262 else if ((*bptr == '/') && (*rptr != '/'))
2263 bptr++;
2264 while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))
2265 pos++;
2266
2267 if (bptr[pos] == rptr[pos]) {
2268 val = xmlStrdup(BAD_CAST "");
2269 goto done; /* (I can't imagine why anyone would do this) */
2270 }
2271
2272 /*
2273 * In URI, "back up" to the last '/' encountered. This will be the
2274 * beginning of the "unique" suffix of URI
2275 */
2276 ix = pos;
2277 for (; ix > 0; ix--) {
2278 if (rptr[ix - 1] == '/')
2279 break;
2280 }
2281 uptr = (xmlChar *)&rptr[ix];
2282
2283 /*
2284 * In base, count the number of '/' from the differing point
2285 */
2286 for (; bptr[ix] != 0; ix++) {
2287 if (bptr[ix] == '/')
2288 nbslash++;
2289 }
2290
2291 /*
2292 * e.g: URI="foo/" base="foo/bar" -> "./"
2293 */
2294 if (nbslash == 0 && !uptr[0]) {
2295 val = xmlStrdup(BAD_CAST "./");
2296 goto done;
2297 }
2298
2299 len = xmlStrlen (uptr) + 1;
2300 }
2301
2302 if (nbslash == 0) {
2303 if (uptr != NULL)
2304 /* exception characters from xmlSaveUri */
2305 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2306 goto done;
2307 }
2308
2309 /*
2310 * Allocate just enough space for the returned string -
2311 * length of the remainder of the URI, plus enough space
2312 * for the "../" groups, plus one for the terminator
2313 */
2314 val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2315 if (val == NULL) {
2316 xmlURIErrMemory("building relative URI\n");
2317 goto done;
2318 }
2319 vptr = val;
2320 /*
2321 * Put in as many "../" as needed
2322 */
2323 for (; nbslash>0; nbslash--) {
2324 *vptr++ = '.';
2325 *vptr++ = '.';
2326 *vptr++ = '/';
2327 }
2328 /*
2329 * Finish up with the end of the URI
2330 */
2331 if (uptr != NULL) {
2332 if ((vptr > val) && (len > 0) &&
2333 (uptr[0] == '/') && (vptr[-1] == '/')) {
2334 memcpy (vptr, uptr + 1, len - 1);
2335 vptr[len - 2] = 0;
2336 } else {
2337 memcpy (vptr, uptr, len);
2338 vptr[len - 1] = 0;
2339 }
2340 } else {
2341 vptr[len - 1] = 0;
2342 }
2343
2344 /* escape the freshly-built path */
2345 vptr = val;
2346 /* exception characters from xmlSaveUri */
2347 val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2348 xmlFree(vptr);
2349
2350 done:
2351 /*
2352 * Free the working variables
2353 */
2354 if (remove_path != 0)
2355 ref->path = NULL;
2356 if (ref != NULL)
2357 xmlFreeURI (ref);
2358 if (bas != NULL)
2359 xmlFreeURI (bas);
2360
2361 return val;
2362 }
2363
2364 /**
2365 * xmlCanonicPath:
2366 * @path: the resource locator in a filesystem notation
2367 *
2368 * Constructs a canonic path from the specified path.
2369 *
2370 * Returns a new canonic path, or a duplicate of the path parameter if the
2371 * construction fails. The caller is responsible for freeing the memory occupied
2372 * by the returned string. If there is insufficient memory available, or the
2373 * argument is NULL, the function returns NULL.
2374 */
2375 #define IS_WINDOWS_PATH(p) \
2376 ((p != NULL) && \
2377 (((p[0] >= 'a') && (p[0] <= 'z')) || \
2378 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2379 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2380 xmlChar *
xmlCanonicPath(const xmlChar * path)2381 xmlCanonicPath(const xmlChar *path)
2382 {
2383 /*
2384 * For Windows implementations, additional work needs to be done to
2385 * replace backslashes in pathnames with "forward slashes"
2386 */
2387 #if defined(_WIN32) && !defined(__CYGWIN__)
2388 int len = 0;
2389 char *p = NULL;
2390 #endif
2391 xmlURIPtr uri;
2392 xmlChar *ret;
2393 const xmlChar *absuri;
2394
2395 if (path == NULL)
2396 return(NULL);
2397
2398 #if defined(_WIN32)
2399 /*
2400 * We must not change the backslashes to slashes if the the path
2401 * starts with \\?\
2402 * Those paths can be up to 32k characters long.
2403 * Was added specifically for OpenOffice, those paths can't be converted
2404 * to URIs anyway.
2405 */
2406 if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2407 (path[3] == '\\') )
2408 return xmlStrdup((const xmlChar *) path);
2409 #endif
2410
2411 /* sanitize filename starting with // so it can be used as URI */
2412 if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2413 path++;
2414
2415 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2416 xmlFreeURI(uri);
2417 return xmlStrdup(path);
2418 }
2419
2420 /* Check if this is an "absolute uri" */
2421 absuri = xmlStrstr(path, BAD_CAST "://");
2422 if (absuri != NULL) {
2423 int l, j;
2424 unsigned char c;
2425 xmlChar *escURI;
2426
2427 /*
2428 * this looks like an URI where some parts have not been
2429 * escaped leading to a parsing problem. Check that the first
2430 * part matches a protocol.
2431 */
2432 l = absuri - path;
2433 /* Bypass if first part (part before the '://') is > 20 chars */
2434 if ((l <= 0) || (l > 20))
2435 goto path_processing;
2436 /* Bypass if any non-alpha characters are present in first part */
2437 for (j = 0;j < l;j++) {
2438 c = path[j];
2439 if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2440 goto path_processing;
2441 }
2442
2443 /* Escape all except the characters specified in the supplied path */
2444 escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2445 if (escURI != NULL) {
2446 /* Try parsing the escaped path */
2447 uri = xmlParseURI((const char *) escURI);
2448 /* If successful, return the escaped string */
2449 if (uri != NULL) {
2450 xmlFreeURI(uri);
2451 return escURI;
2452 }
2453 xmlFree(escURI);
2454 }
2455 }
2456
2457 path_processing:
2458 /* For Windows implementations, replace backslashes with 'forward slashes' */
2459 #if defined(_WIN32) && !defined(__CYGWIN__)
2460 /*
2461 * Create a URI structure
2462 */
2463 uri = xmlCreateURI();
2464 if (uri == NULL) { /* Guard against 'out of memory' */
2465 return(NULL);
2466 }
2467
2468 len = xmlStrlen(path);
2469 if ((len > 2) && IS_WINDOWS_PATH(path)) {
2470 /* make the scheme 'file' */
2471 uri->scheme = (char *) xmlStrdup(BAD_CAST "file");
2472 /* allocate space for leading '/' + path + string terminator */
2473 uri->path = xmlMallocAtomic(len + 2);
2474 if (uri->path == NULL) {
2475 xmlFreeURI(uri); /* Guard against 'out of memory' */
2476 return(NULL);
2477 }
2478 /* Put in leading '/' plus path */
2479 uri->path[0] = '/';
2480 p = uri->path + 1;
2481 strncpy(p, (char *) path, len + 1);
2482 } else {
2483 uri->path = (char *) xmlStrdup(path);
2484 if (uri->path == NULL) {
2485 xmlFreeURI(uri);
2486 return(NULL);
2487 }
2488 p = uri->path;
2489 }
2490 /* Now change all occurrences of '\' to '/' */
2491 while (*p != '\0') {
2492 if (*p == '\\')
2493 *p = '/';
2494 p++;
2495 }
2496
2497 if (uri->scheme == NULL) {
2498 ret = xmlStrdup((const xmlChar *) uri->path);
2499 } else {
2500 ret = xmlSaveUri(uri);
2501 }
2502
2503 xmlFreeURI(uri);
2504 #else
2505 ret = xmlStrdup((const xmlChar *) path);
2506 #endif
2507 return(ret);
2508 }
2509
2510 /**
2511 * xmlPathToURI:
2512 * @path: the resource locator in a filesystem notation
2513 *
2514 * Constructs an URI expressing the existing path
2515 *
2516 * Returns a new URI, or a duplicate of the path parameter if the
2517 * construction fails. The caller is responsible for freeing the memory
2518 * occupied by the returned string. If there is insufficient memory available,
2519 * or the argument is NULL, the function returns NULL.
2520 */
2521 xmlChar *
xmlPathToURI(const xmlChar * path)2522 xmlPathToURI(const xmlChar *path)
2523 {
2524 xmlURIPtr uri;
2525 xmlURI temp;
2526 xmlChar *ret, *cal;
2527
2528 if (path == NULL)
2529 return(NULL);
2530
2531 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2532 xmlFreeURI(uri);
2533 return xmlStrdup(path);
2534 }
2535 cal = xmlCanonicPath(path);
2536 if (cal == NULL)
2537 return(NULL);
2538 #if defined(_WIN32) && !defined(__CYGWIN__)
2539 /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2540 If 'cal' is a valid URI already then we are done here, as continuing would make
2541 it invalid. */
2542 if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2543 xmlFreeURI(uri);
2544 return cal;
2545 }
2546 /* 'cal' can contain a relative path with backslashes. If that is processed
2547 by xmlSaveURI, they will be escaped and the external entity loader machinery
2548 will fail. So convert them to slashes. Misuse 'ret' for walking. */
2549 ret = cal;
2550 while (*ret != '\0') {
2551 if (*ret == '\\')
2552 *ret = '/';
2553 ret++;
2554 }
2555 #endif
2556 memset(&temp, 0, sizeof(temp));
2557 temp.path = (char *) cal;
2558 ret = xmlSaveUri(&temp);
2559 xmlFree(cal);
2560 return(ret);
2561 }
2562 #define bottom_uri
2563 #include "elfgcchack.h"
2564