1 /* libxml2 - Library for parsing XML documents
2 * Copyright (C) 2006-2019 Free Software Foundation, Inc.
3 *
4 * This file is not part of the GNU gettext program, but is used with
5 * GNU gettext.
6 *
7 * The original copyright notice is as follows:
8 */
9
10 /*
11 * Copyright (C) 1998-2012 Daniel Veillard. All Rights Reserved.
12 *
13 * Permission is hereby granted, free of charge, to any person obtaining a copy
14 * of this software and associated documentation files (the "Software"), to deal
15 * in the Software without restriction, including without limitation the rights
16 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17 * copies of the Software, and to permit persons to whom the Software is fur-
18 * nished to do so, subject to the following conditions:
19 *
20 * The above copyright notice and this permission notice shall be included in
21 * all copies or substantial portions of the Software.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FIT-
25 * NESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29 * THE SOFTWARE.
30 *
31 * daniel@veillard.com
32 */
33
34 /**
35 * uri.c: set of generic URI related routines
36 *
37 * Reference: RFCs 3986, 2732 and 2373
38 */
39
40 #define IN_LIBXML
41 #include "libxml.h"
42
43 #include <string.h>
44
45 #include <libxml/xmlmemory.h>
46 #include <libxml/uri.h>
47 #include <libxml/globals.h>
48 #include <libxml/xmlerror.h>
49
50 /**
51 * MAX_URI_LENGTH:
52 *
53 * The definition of the URI regexp in the above RFC has no size limit
54 * In practice they are usually relativey short except for the
55 * data URI scheme as defined in RFC 2397. Even for data URI the usual
56 * maximum size before hitting random practical limits is around 64 KB
57 * and 4KB is usually a maximum admitted limit for proper operations.
58 * The value below is more a security limit than anything else and
59 * really should never be hit by 'normal' operations
60 * Set to 1 MByte in 2012, this is only enforced on output
61 */
62 #define MAX_URI_LENGTH 1024 * 1024
63
64 static void
xmlURIErrMemory(const char * extra)65 xmlURIErrMemory(const char *extra)
66 {
67 if (extra)
68 __xmlRaiseError(NULL, NULL, NULL,
69 NULL, NULL, XML_FROM_URI,
70 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
71 extra, NULL, NULL, 0, 0,
72 "Memory allocation failed : %s\n", extra);
73 else
74 __xmlRaiseError(NULL, NULL, NULL,
75 NULL, NULL, XML_FROM_URI,
76 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0,
77 NULL, NULL, NULL, 0, 0,
78 "Memory allocation failed\n");
79 }
80
81 static void xmlCleanURI(xmlURIPtr uri);
82
83 /*
84 * Old rule from 2396 used in legacy handling code
85 * alpha = lowalpha | upalpha
86 */
87 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
88
89
90 /*
91 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
92 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
93 * "u" | "v" | "w" | "x" | "y" | "z"
94 */
95
96 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
97
98 /*
99 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
100 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
101 * "U" | "V" | "W" | "X" | "Y" | "Z"
102 */
103 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
104
105 #ifdef IS_DIGIT
106 #undef IS_DIGIT
107 #endif
108 /*
109 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
110 */
111 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
112
113 /*
114 * alphanum = alpha | digit
115 */
116
117 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
118
119 /*
120 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
121 */
122
123 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
124 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
125 ((x) == '(') || ((x) == ')'))
126
127 /*
128 * unwise = "{" | "}" | "|" | "\" | "^" | "`"
129 */
130
131 #define IS_UNWISE(p) \
132 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
133 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
134 ((*(p) == ']')) || ((*(p) == '`')))
135 /*
136 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
137 * "[" | "]"
138 */
139
140 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
141 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
142 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
143 ((x) == ']'))
144
145 /*
146 * unreserved = alphanum | mark
147 */
148
149 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
150
151 /*
152 * Skip to next pointer char, handle escaped sequences
153 */
154
155 #define NEXT(p) ((*p == '%')? p += 3 : p++)
156
157 /*
158 * Productions from the spec.
159 *
160 * authority = server | reg_name
161 * reg_name = 1*( unreserved | escaped | "$" | "," |
162 * ";" | ":" | "@" | "&" | "=" | "+" )
163 *
164 * path = [ abs_path | opaque_part ]
165 */
166
167 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n))
168
169 /************************************************************************
170 * *
171 * RFC 3986 parser *
172 * *
173 ************************************************************************/
174
175 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
176 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
177 ((*(p) >= 'A') && (*(p) <= 'Z')))
178 #define ISA_HEXDIG(p) \
179 (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
180 ((*(p) >= 'A') && (*(p) <= 'F')))
181
182 /*
183 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
184 * / "*" / "+" / "," / ";" / "="
185 */
186 #define ISA_SUB_DELIM(p) \
187 (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
188 ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
189 ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
190 ((*(p) == '=')) || ((*(p) == '\'')))
191
192 /*
193 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
194 */
195 #define ISA_GEN_DELIM(p) \
196 (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
197 ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
198 ((*(p) == '@')))
199
200 /*
201 * reserved = gen-delims / sub-delims
202 */
203 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
204
205 /*
206 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
207 */
208 #define ISA_UNRESERVED(p) \
209 ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
210 ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
211
212 /*
213 * pct-encoded = "%" HEXDIG HEXDIG
214 */
215 #define ISA_PCT_ENCODED(p) \
216 ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
217
218 /*
219 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
220 */
221 #define ISA_PCHAR(p) \
222 (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
223 ((*(p) == ':')) || ((*(p) == '@')))
224
225 /**
226 * xmlParse3986Scheme:
227 * @uri: pointer to an URI structure
228 * @str: pointer to the string to analyze
229 *
230 * Parse an URI scheme
231 *
232 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
233 *
234 * Returns 0 or the error code
235 */
236 static int
xmlParse3986Scheme(xmlURIPtr uri,const char ** str)237 xmlParse3986Scheme(xmlURIPtr uri, const char **str) {
238 const char *cur;
239
240 if (str == NULL)
241 return(-1);
242
243 cur = *str;
244 if (!ISA_ALPHA(cur))
245 return(2);
246 cur++;
247 while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
248 (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
249 if (uri != NULL) {
250 if (uri->scheme != NULL) xmlFree(uri->scheme);
251 uri->scheme = STRNDUP(*str, cur - *str);
252 }
253 *str = cur;
254 return(0);
255 }
256
257 /**
258 * xmlParse3986Fragment:
259 * @uri: pointer to an URI structure
260 * @str: pointer to the string to analyze
261 *
262 * Parse the query part of an URI
263 *
264 * fragment = *( pchar / "/" / "?" )
265 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
266 * in the fragment identifier but this is used very broadly for
267 * xpointer scheme selection, so we are allowing it here to not break
268 * for example all the DocBook processing chains.
269 *
270 * Returns 0 or the error code
271 */
272 static int
xmlParse3986Fragment(xmlURIPtr uri,const char ** str)273 xmlParse3986Fragment(xmlURIPtr uri, const char **str)
274 {
275 const char *cur;
276
277 if (str == NULL)
278 return (-1);
279
280 cur = *str;
281
282 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
283 (*cur == '[') || (*cur == ']') ||
284 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
285 NEXT(cur);
286 if (uri != NULL) {
287 if (uri->fragment != NULL)
288 xmlFree(uri->fragment);
289 if (uri->cleanup & 2)
290 uri->fragment = STRNDUP(*str, cur - *str);
291 else
292 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL);
293 }
294 *str = cur;
295 return (0);
296 }
297
298 /**
299 * xmlParse3986Query:
300 * @uri: pointer to an URI structure
301 * @str: pointer to the string to analyze
302 *
303 * Parse the query part of an URI
304 *
305 * query = *uric
306 *
307 * Returns 0 or the error code
308 */
309 static int
xmlParse3986Query(xmlURIPtr uri,const char ** str)310 xmlParse3986Query(xmlURIPtr uri, const char **str)
311 {
312 const char *cur;
313
314 if (str == NULL)
315 return (-1);
316
317 cur = *str;
318
319 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
320 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
321 NEXT(cur);
322 if (uri != NULL) {
323 if (uri->query != NULL)
324 xmlFree(uri->query);
325 if (uri->cleanup & 2)
326 uri->query = STRNDUP(*str, cur - *str);
327 else
328 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL);
329
330 /* Save the raw bytes of the query as well.
331 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114
332 */
333 if (uri->query_raw != NULL)
334 xmlFree (uri->query_raw);
335 uri->query_raw = STRNDUP (*str, cur - *str);
336 }
337 *str = cur;
338 return (0);
339 }
340
341 /**
342 * xmlParse3986Port:
343 * @uri: pointer to an URI structure
344 * @str: the string to analyze
345 *
346 * Parse a port part and fills in the appropriate fields
347 * of the @uri structure
348 *
349 * port = *DIGIT
350 *
351 * Returns 0 or the error code
352 */
353 static int
xmlParse3986Port(xmlURIPtr uri,const char ** str)354 xmlParse3986Port(xmlURIPtr uri, const char **str)
355 {
356 const char *cur = *str;
357 unsigned port = 0; /* unsigned for defined overflow behavior */
358
359 if (ISA_DIGIT(cur)) {
360 while (ISA_DIGIT(cur)) {
361 port = port * 10 + (*cur - '0');
362
363 cur++;
364 }
365 if (uri != NULL)
366 uri->port = port & INT_MAX; /* port value modulo INT_MAX+1 */
367 *str = cur;
368 return(0);
369 }
370 return(1);
371 }
372
373 /**
374 * xmlParse3986Userinfo:
375 * @uri: pointer to an URI structure
376 * @str: the string to analyze
377 *
378 * Parse an user informations part and fills in the appropriate fields
379 * of the @uri structure
380 *
381 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
382 *
383 * Returns 0 or the error code
384 */
385 static int
xmlParse3986Userinfo(xmlURIPtr uri,const char ** str)386 xmlParse3986Userinfo(xmlURIPtr uri, const char **str)
387 {
388 const char *cur;
389
390 cur = *str;
391 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
392 ISA_SUB_DELIM(cur) || (*cur == ':'))
393 NEXT(cur);
394 if (*cur == '@') {
395 if (uri != NULL) {
396 if (uri->user != NULL) xmlFree(uri->user);
397 if (uri->cleanup & 2)
398 uri->user = STRNDUP(*str, cur - *str);
399 else
400 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL);
401 }
402 *str = cur;
403 return(0);
404 }
405 return(1);
406 }
407
408 /**
409 * xmlParse3986DecOctet:
410 * @str: the string to analyze
411 *
412 * dec-octet = DIGIT ; 0-9
413 * / %x31-39 DIGIT ; 10-99
414 * / "1" 2DIGIT ; 100-199
415 * / "2" %x30-34 DIGIT ; 200-249
416 * / "25" %x30-35 ; 250-255
417 *
418 * Skip a dec-octet.
419 *
420 * Returns 0 if found and skipped, 1 otherwise
421 */
422 static int
xmlParse3986DecOctet(const char ** str)423 xmlParse3986DecOctet(const char **str) {
424 const char *cur = *str;
425
426 if (!(ISA_DIGIT(cur)))
427 return(1);
428 if (!ISA_DIGIT(cur+1))
429 cur++;
430 else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
431 cur += 2;
432 else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
433 cur += 3;
434 else if ((*cur == '2') && (*(cur + 1) >= '0') &&
435 (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
436 cur += 3;
437 else if ((*cur == '2') && (*(cur + 1) == '5') &&
438 (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
439 cur += 3;
440 else
441 return(1);
442 *str = cur;
443 return(0);
444 }
445 /**
446 * xmlParse3986Host:
447 * @uri: pointer to an URI structure
448 * @str: the string to analyze
449 *
450 * Parse an host part and fills in the appropriate fields
451 * of the @uri structure
452 *
453 * host = IP-literal / IPv4address / reg-name
454 * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
455 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
456 * reg-name = *( unreserved / pct-encoded / sub-delims )
457 *
458 * Returns 0 or the error code
459 */
460 static int
xmlParse3986Host(xmlURIPtr uri,const char ** str)461 xmlParse3986Host(xmlURIPtr uri, const char **str)
462 {
463 const char *cur = *str;
464 const char *host;
465
466 host = cur;
467 /*
468 * IPv6 and future adressing scheme are enclosed between brackets
469 */
470 if (*cur == '[') {
471 cur++;
472 while ((*cur != ']') && (*cur != 0))
473 cur++;
474 if (*cur != ']')
475 return(1);
476 cur++;
477 goto found;
478 }
479 /*
480 * try to parse an IPv4
481 */
482 if (ISA_DIGIT(cur)) {
483 if (xmlParse3986DecOctet(&cur) != 0)
484 goto not_ipv4;
485 if (*cur != '.')
486 goto not_ipv4;
487 cur++;
488 if (xmlParse3986DecOctet(&cur) != 0)
489 goto not_ipv4;
490 if (*cur != '.')
491 goto not_ipv4;
492 if (xmlParse3986DecOctet(&cur) != 0)
493 goto not_ipv4;
494 if (*cur != '.')
495 goto not_ipv4;
496 if (xmlParse3986DecOctet(&cur) != 0)
497 goto not_ipv4;
498 goto found;
499 not_ipv4:
500 cur = *str;
501 }
502 /*
503 * then this should be a hostname which can be empty
504 */
505 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
506 NEXT(cur);
507 found:
508 if (uri != NULL) {
509 if (uri->authority != NULL) xmlFree(uri->authority);
510 uri->authority = NULL;
511 if (uri->server != NULL) xmlFree(uri->server);
512 if (cur != host) {
513 if (uri->cleanup & 2)
514 uri->server = STRNDUP(host, cur - host);
515 else
516 uri->server = xmlURIUnescapeString(host, cur - host, NULL);
517 } else
518 uri->server = NULL;
519 }
520 *str = cur;
521 return(0);
522 }
523
524 /**
525 * xmlParse3986Authority:
526 * @uri: pointer to an URI structure
527 * @str: the string to analyze
528 *
529 * Parse an authority part and fills in the appropriate fields
530 * of the @uri structure
531 *
532 * authority = [ userinfo "@" ] host [ ":" port ]
533 *
534 * Returns 0 or the error code
535 */
536 static int
xmlParse3986Authority(xmlURIPtr uri,const char ** str)537 xmlParse3986Authority(xmlURIPtr uri, const char **str)
538 {
539 const char *cur;
540 int ret;
541
542 cur = *str;
543 /*
544 * try to parse an userinfo and check for the trailing @
545 */
546 ret = xmlParse3986Userinfo(uri, &cur);
547 if ((ret != 0) || (*cur != '@'))
548 cur = *str;
549 else
550 cur++;
551 ret = xmlParse3986Host(uri, &cur);
552 if (ret != 0) return(ret);
553 if (*cur == ':') {
554 cur++;
555 ret = xmlParse3986Port(uri, &cur);
556 if (ret != 0) return(ret);
557 }
558 *str = cur;
559 return(0);
560 }
561
562 /**
563 * xmlParse3986Segment:
564 * @str: the string to analyze
565 * @forbid: an optional forbidden character
566 * @empty: allow an empty segment
567 *
568 * Parse a segment and fills in the appropriate fields
569 * of the @uri structure
570 *
571 * segment = *pchar
572 * segment-nz = 1*pchar
573 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
574 * ; non-zero-length segment without any colon ":"
575 *
576 * Returns 0 or the error code
577 */
578 static int
xmlParse3986Segment(const char ** str,char forbid,int empty)579 xmlParse3986Segment(const char **str, char forbid, int empty)
580 {
581 const char *cur;
582
583 cur = *str;
584 if (!ISA_PCHAR(cur)) {
585 if (empty)
586 return(0);
587 return(1);
588 }
589 while (ISA_PCHAR(cur) && (*cur != forbid))
590 NEXT(cur);
591 *str = cur;
592 return (0);
593 }
594
595 /**
596 * xmlParse3986PathAbEmpty:
597 * @uri: pointer to an URI structure
598 * @str: the string to analyze
599 *
600 * Parse an path absolute or empty and fills in the appropriate fields
601 * of the @uri structure
602 *
603 * path-abempty = *( "/" segment )
604 *
605 * Returns 0 or the error code
606 */
607 static int
xmlParse3986PathAbEmpty(xmlURIPtr uri,const char ** str)608 xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str)
609 {
610 const char *cur;
611 int ret;
612
613 cur = *str;
614
615 while (*cur == '/') {
616 cur++;
617 ret = xmlParse3986Segment(&cur, 0, 1);
618 if (ret != 0) return(ret);
619 }
620 if (uri != NULL) {
621 if (uri->path != NULL) xmlFree(uri->path);
622 if (*str != cur) {
623 if (uri->cleanup & 2)
624 uri->path = STRNDUP(*str, cur - *str);
625 else
626 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
627 } else {
628 uri->path = NULL;
629 }
630 }
631 *str = cur;
632 return (0);
633 }
634
635 /**
636 * xmlParse3986PathAbsolute:
637 * @uri: pointer to an URI structure
638 * @str: the string to analyze
639 *
640 * Parse an path absolute and fills in the appropriate fields
641 * of the @uri structure
642 *
643 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
644 *
645 * Returns 0 or the error code
646 */
647 static int
xmlParse3986PathAbsolute(xmlURIPtr uri,const char ** str)648 xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str)
649 {
650 const char *cur;
651 int ret;
652
653 cur = *str;
654
655 if (*cur != '/')
656 return(1);
657 cur++;
658 ret = xmlParse3986Segment(&cur, 0, 0);
659 if (ret == 0) {
660 while (*cur == '/') {
661 cur++;
662 ret = xmlParse3986Segment(&cur, 0, 1);
663 if (ret != 0) return(ret);
664 }
665 }
666 if (uri != NULL) {
667 if (uri->path != NULL) xmlFree(uri->path);
668 if (cur != *str) {
669 if (uri->cleanup & 2)
670 uri->path = STRNDUP(*str, cur - *str);
671 else
672 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
673 } else {
674 uri->path = NULL;
675 }
676 }
677 *str = cur;
678 return (0);
679 }
680
681 /**
682 * xmlParse3986PathRootless:
683 * @uri: pointer to an URI structure
684 * @str: the string to analyze
685 *
686 * Parse an path without root and fills in the appropriate fields
687 * of the @uri structure
688 *
689 * path-rootless = segment-nz *( "/" segment )
690 *
691 * Returns 0 or the error code
692 */
693 static int
xmlParse3986PathRootless(xmlURIPtr uri,const char ** str)694 xmlParse3986PathRootless(xmlURIPtr uri, const char **str)
695 {
696 const char *cur;
697 int ret;
698
699 cur = *str;
700
701 ret = xmlParse3986Segment(&cur, 0, 0);
702 if (ret != 0) return(ret);
703 while (*cur == '/') {
704 cur++;
705 ret = xmlParse3986Segment(&cur, 0, 1);
706 if (ret != 0) return(ret);
707 }
708 if (uri != NULL) {
709 if (uri->path != NULL) xmlFree(uri->path);
710 if (cur != *str) {
711 if (uri->cleanup & 2)
712 uri->path = STRNDUP(*str, cur - *str);
713 else
714 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
715 } else {
716 uri->path = NULL;
717 }
718 }
719 *str = cur;
720 return (0);
721 }
722
723 /**
724 * xmlParse3986PathNoScheme:
725 * @uri: pointer to an URI structure
726 * @str: the string to analyze
727 *
728 * Parse an path which is not a scheme and fills in the appropriate fields
729 * of the @uri structure
730 *
731 * path-noscheme = segment-nz-nc *( "/" segment )
732 *
733 * Returns 0 or the error code
734 */
735 static int
xmlParse3986PathNoScheme(xmlURIPtr uri,const char ** str)736 xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str)
737 {
738 const char *cur;
739 int ret;
740
741 cur = *str;
742
743 ret = xmlParse3986Segment(&cur, ':', 0);
744 if (ret != 0) return(ret);
745 while (*cur == '/') {
746 cur++;
747 ret = xmlParse3986Segment(&cur, 0, 1);
748 if (ret != 0) return(ret);
749 }
750 if (uri != NULL) {
751 if (uri->path != NULL) xmlFree(uri->path);
752 if (cur != *str) {
753 if (uri->cleanup & 2)
754 uri->path = STRNDUP(*str, cur - *str);
755 else
756 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL);
757 } else {
758 uri->path = NULL;
759 }
760 }
761 *str = cur;
762 return (0);
763 }
764
765 /**
766 * xmlParse3986HierPart:
767 * @uri: pointer to an URI structure
768 * @str: the string to analyze
769 *
770 * Parse an hierarchical part and fills in the appropriate fields
771 * of the @uri structure
772 *
773 * hier-part = "//" authority path-abempty
774 * / path-absolute
775 * / path-rootless
776 * / path-empty
777 *
778 * Returns 0 or the error code
779 */
780 static int
xmlParse3986HierPart(xmlURIPtr uri,const char ** str)781 xmlParse3986HierPart(xmlURIPtr uri, const char **str)
782 {
783 const char *cur;
784 int ret;
785
786 cur = *str;
787
788 if ((*cur == '/') && (*(cur + 1) == '/')) {
789 cur += 2;
790 ret = xmlParse3986Authority(uri, &cur);
791 if (ret != 0) return(ret);
792 if (uri->server == NULL)
793 uri->port = -1;
794 ret = xmlParse3986PathAbEmpty(uri, &cur);
795 if (ret != 0) return(ret);
796 *str = cur;
797 return(0);
798 } else if (*cur == '/') {
799 ret = xmlParse3986PathAbsolute(uri, &cur);
800 if (ret != 0) return(ret);
801 } else if (ISA_PCHAR(cur)) {
802 ret = xmlParse3986PathRootless(uri, &cur);
803 if (ret != 0) return(ret);
804 } else {
805 /* path-empty is effectively empty */
806 if (uri != NULL) {
807 if (uri->path != NULL) xmlFree(uri->path);
808 uri->path = NULL;
809 }
810 }
811 *str = cur;
812 return (0);
813 }
814
815 /**
816 * xmlParse3986RelativeRef:
817 * @uri: pointer to an URI structure
818 * @str: the string to analyze
819 *
820 * Parse an URI string and fills in the appropriate fields
821 * of the @uri structure
822 *
823 * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
824 * relative-part = "//" authority path-abempty
825 * / path-absolute
826 * / path-noscheme
827 * / path-empty
828 *
829 * Returns 0 or the error code
830 */
831 static int
xmlParse3986RelativeRef(xmlURIPtr uri,const char * str)832 xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) {
833 int ret;
834
835 if ((*str == '/') && (*(str + 1) == '/')) {
836 str += 2;
837 ret = xmlParse3986Authority(uri, &str);
838 if (ret != 0) return(ret);
839 ret = xmlParse3986PathAbEmpty(uri, &str);
840 if (ret != 0) return(ret);
841 } else if (*str == '/') {
842 ret = xmlParse3986PathAbsolute(uri, &str);
843 if (ret != 0) return(ret);
844 } else if (ISA_PCHAR(str)) {
845 ret = xmlParse3986PathNoScheme(uri, &str);
846 if (ret != 0) return(ret);
847 } else {
848 /* path-empty is effectively empty */
849 if (uri != NULL) {
850 if (uri->path != NULL) xmlFree(uri->path);
851 uri->path = NULL;
852 }
853 }
854
855 if (*str == '?') {
856 str++;
857 ret = xmlParse3986Query(uri, &str);
858 if (ret != 0) return(ret);
859 }
860 if (*str == '#') {
861 str++;
862 ret = xmlParse3986Fragment(uri, &str);
863 if (ret != 0) return(ret);
864 }
865 if (*str != 0) {
866 xmlCleanURI(uri);
867 return(1);
868 }
869 return(0);
870 }
871
872
873 /**
874 * xmlParse3986URI:
875 * @uri: pointer to an URI structure
876 * @str: the string to analyze
877 *
878 * Parse an URI string and fills in the appropriate fields
879 * of the @uri structure
880 *
881 * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
882 *
883 * Returns 0 or the error code
884 */
885 static int
xmlParse3986URI(xmlURIPtr uri,const char * str)886 xmlParse3986URI(xmlURIPtr uri, const char *str) {
887 int ret;
888
889 ret = xmlParse3986Scheme(uri, &str);
890 if (ret != 0) return(ret);
891 if (*str != ':') {
892 return(1);
893 }
894 str++;
895 ret = xmlParse3986HierPart(uri, &str);
896 if (ret != 0) return(ret);
897 if (*str == '?') {
898 str++;
899 ret = xmlParse3986Query(uri, &str);
900 if (ret != 0) return(ret);
901 }
902 if (*str == '#') {
903 str++;
904 ret = xmlParse3986Fragment(uri, &str);
905 if (ret != 0) return(ret);
906 }
907 if (*str != 0) {
908 xmlCleanURI(uri);
909 return(1);
910 }
911 return(0);
912 }
913
914 /**
915 * xmlParse3986URIReference:
916 * @uri: pointer to an URI structure
917 * @str: the string to analyze
918 *
919 * Parse an URI reference string and fills in the appropriate fields
920 * of the @uri structure
921 *
922 * URI-reference = URI / relative-ref
923 *
924 * Returns 0 or the error code
925 */
926 static int
xmlParse3986URIReference(xmlURIPtr uri,const char * str)927 xmlParse3986URIReference(xmlURIPtr uri, const char *str) {
928 int ret;
929
930 if (str == NULL)
931 return(-1);
932 xmlCleanURI(uri);
933
934 /*
935 * Try first to parse absolute refs, then fallback to relative if
936 * it fails.
937 */
938 ret = xmlParse3986URI(uri, str);
939 if (ret != 0) {
940 xmlCleanURI(uri);
941 ret = xmlParse3986RelativeRef(uri, str);
942 if (ret != 0) {
943 xmlCleanURI(uri);
944 return(ret);
945 }
946 }
947 return(0);
948 }
949
950 /**
951 * xmlParseURI:
952 * @str: the URI string to analyze
953 *
954 * Parse an URI based on RFC 3986
955 *
956 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
957 *
958 * Returns a newly built xmlURIPtr or NULL in case of error
959 */
960 xmlURIPtr
xmlParseURI(const char * str)961 xmlParseURI(const char *str) {
962 xmlURIPtr uri;
963 int ret;
964
965 if (str == NULL)
966 return(NULL);
967 uri = xmlCreateURI();
968 if (uri != NULL) {
969 ret = xmlParse3986URIReference(uri, str);
970 if (ret) {
971 xmlFreeURI(uri);
972 return(NULL);
973 }
974 }
975 return(uri);
976 }
977
978 /**
979 * xmlParseURIReference:
980 * @uri: pointer to an URI structure
981 * @str: the string to analyze
982 *
983 * Parse an URI reference string based on RFC 3986 and fills in the
984 * appropriate fields of the @uri structure
985 *
986 * URI-reference = URI / relative-ref
987 *
988 * Returns 0 or the error code
989 */
990 int
xmlParseURIReference(xmlURIPtr uri,const char * str)991 xmlParseURIReference(xmlURIPtr uri, const char *str) {
992 return(xmlParse3986URIReference(uri, str));
993 }
994
995 /**
996 * xmlParseURIRaw:
997 * @str: the URI string to analyze
998 * @raw: if 1 unescaping of URI pieces are disabled
999 *
1000 * Parse an URI but allows to keep intact the original fragments.
1001 *
1002 * URI-reference = URI / relative-ref
1003 *
1004 * Returns a newly built xmlURIPtr or NULL in case of error
1005 */
1006 xmlURIPtr
xmlParseURIRaw(const char * str,int raw)1007 xmlParseURIRaw(const char *str, int raw) {
1008 xmlURIPtr uri;
1009 int ret;
1010
1011 if (str == NULL)
1012 return(NULL);
1013 uri = xmlCreateURI();
1014 if (uri != NULL) {
1015 if (raw) {
1016 uri->cleanup |= 2;
1017 }
1018 ret = xmlParseURIReference(uri, str);
1019 if (ret) {
1020 xmlFreeURI(uri);
1021 return(NULL);
1022 }
1023 }
1024 return(uri);
1025 }
1026
1027 /************************************************************************
1028 * *
1029 * Generic URI structure functions *
1030 * *
1031 ************************************************************************/
1032
1033 /**
1034 * xmlCreateURI:
1035 *
1036 * Simply creates an empty xmlURI
1037 *
1038 * Returns the new structure or NULL in case of error
1039 */
1040 xmlURIPtr
xmlCreateURI(void)1041 xmlCreateURI(void) {
1042 xmlURIPtr ret;
1043
1044 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI));
1045 if (ret == NULL) {
1046 xmlURIErrMemory("creating URI structure\n");
1047 return(NULL);
1048 }
1049 memset(ret, 0, sizeof(xmlURI));
1050 return(ret);
1051 }
1052
1053 /**
1054 * xmlSaveUriRealloc:
1055 *
1056 * Function to handle properly a reallocation when saving an URI
1057 * Also imposes some limit on the length of an URI string output
1058 */
1059 static xmlChar *
xmlSaveUriRealloc(xmlChar * ret,int * max)1060 xmlSaveUriRealloc(xmlChar *ret, int *max) {
1061 xmlChar *temp;
1062 int tmp;
1063
1064 if (*max > MAX_URI_LENGTH) {
1065 xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n");
1066 return(NULL);
1067 }
1068 tmp = *max * 2;
1069 temp = (xmlChar *) xmlRealloc(ret, (tmp + 1));
1070 if (temp == NULL) {
1071 xmlURIErrMemory("saving URI\n");
1072 return(NULL);
1073 }
1074 *max = tmp;
1075 return(temp);
1076 }
1077
1078 /**
1079 * xmlSaveUri:
1080 * @uri: pointer to an xmlURI
1081 *
1082 * Save the URI as an escaped string
1083 *
1084 * Returns a new string (to be deallocated by caller)
1085 */
1086 xmlChar *
xmlSaveUri(xmlURIPtr uri)1087 xmlSaveUri(xmlURIPtr uri) {
1088 xmlChar *ret = NULL;
1089 xmlChar *temp;
1090 const char *p;
1091 int len;
1092 int max;
1093
1094 if (uri == NULL) return(NULL);
1095
1096
1097 max = 80;
1098 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar));
1099 if (ret == NULL) {
1100 xmlURIErrMemory("saving URI\n");
1101 return(NULL);
1102 }
1103 len = 0;
1104
1105 if (uri->scheme != NULL) {
1106 p = uri->scheme;
1107 while (*p != 0) {
1108 if (len >= max) {
1109 temp = xmlSaveUriRealloc(ret, &max);
1110 if (temp == NULL) goto mem_error;
1111 ret = temp;
1112 }
1113 ret[len++] = *p++;
1114 }
1115 if (len >= max) {
1116 temp = xmlSaveUriRealloc(ret, &max);
1117 if (temp == NULL) goto mem_error;
1118 ret = temp;
1119 }
1120 ret[len++] = ':';
1121 }
1122 if (uri->opaque != NULL) {
1123 p = uri->opaque;
1124 while (*p != 0) {
1125 if (len + 3 >= max) {
1126 temp = xmlSaveUriRealloc(ret, &max);
1127 if (temp == NULL) goto mem_error;
1128 ret = temp;
1129 }
1130 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
1131 ret[len++] = *p++;
1132 else {
1133 int val = *(unsigned char *)p++;
1134 int hi = val / 0x10, lo = val % 0x10;
1135 ret[len++] = '%';
1136 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1137 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1138 }
1139 }
1140 } else {
1141 if ((uri->server != NULL) || (uri->port == -1)) {
1142 if (len + 3 >= max) {
1143 temp = xmlSaveUriRealloc(ret, &max);
1144 if (temp == NULL) goto mem_error;
1145 ret = temp;
1146 }
1147 ret[len++] = '/';
1148 ret[len++] = '/';
1149 if (uri->user != NULL) {
1150 p = uri->user;
1151 while (*p != 0) {
1152 if (len + 3 >= max) {
1153 temp = xmlSaveUriRealloc(ret, &max);
1154 if (temp == NULL) goto mem_error;
1155 ret = temp;
1156 }
1157 if ((IS_UNRESERVED(*(p))) ||
1158 ((*(p) == ';')) || ((*(p) == ':')) ||
1159 ((*(p) == '&')) || ((*(p) == '=')) ||
1160 ((*(p) == '+')) || ((*(p) == '$')) ||
1161 ((*(p) == ',')))
1162 ret[len++] = *p++;
1163 else {
1164 int val = *(unsigned char *)p++;
1165 int hi = val / 0x10, lo = val % 0x10;
1166 ret[len++] = '%';
1167 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1168 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1169 }
1170 }
1171 if (len + 3 >= max) {
1172 temp = xmlSaveUriRealloc(ret, &max);
1173 if (temp == NULL) goto mem_error;
1174 ret = temp;
1175 }
1176 ret[len++] = '@';
1177 }
1178 if (uri->server != NULL) {
1179 p = uri->server;
1180 while (*p != 0) {
1181 if (len >= max) {
1182 temp = xmlSaveUriRealloc(ret, &max);
1183 if (temp == NULL) goto mem_error;
1184 ret = temp;
1185 }
1186 ret[len++] = *p++;
1187 }
1188 if (uri->port > 0) {
1189 if (len + 10 >= max) {
1190 temp = xmlSaveUriRealloc(ret, &max);
1191 if (temp == NULL) goto mem_error;
1192 ret = temp;
1193 }
1194 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port);
1195 }
1196 }
1197 } else if (uri->authority != NULL) {
1198 if (len + 3 >= max) {
1199 temp = xmlSaveUriRealloc(ret, &max);
1200 if (temp == NULL) goto mem_error;
1201 ret = temp;
1202 }
1203 ret[len++] = '/';
1204 ret[len++] = '/';
1205 p = uri->authority;
1206 while (*p != 0) {
1207 if (len + 3 >= max) {
1208 temp = xmlSaveUriRealloc(ret, &max);
1209 if (temp == NULL) goto mem_error;
1210 ret = temp;
1211 }
1212 if ((IS_UNRESERVED(*(p))) ||
1213 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
1214 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1215 ((*(p) == '=')) || ((*(p) == '+')))
1216 ret[len++] = *p++;
1217 else {
1218 int val = *(unsigned char *)p++;
1219 int hi = val / 0x10, lo = val % 0x10;
1220 ret[len++] = '%';
1221 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1222 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1223 }
1224 }
1225 } else if (uri->scheme != NULL) {
1226 if (len + 3 >= max) {
1227 temp = xmlSaveUriRealloc(ret, &max);
1228 if (temp == NULL) goto mem_error;
1229 ret = temp;
1230 }
1231 }
1232 if (uri->path != NULL) {
1233 p = uri->path;
1234 /*
1235 * the colon in file:///d: should not be escaped or
1236 * Windows accesses fail later.
1237 */
1238 if ((uri->scheme != NULL) &&
1239 (p[0] == '/') &&
1240 (((p[1] >= 'a') && (p[1] <= 'z')) ||
1241 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1242 (p[2] == ':') &&
1243 (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) {
1244 if (len + 3 >= max) {
1245 temp = xmlSaveUriRealloc(ret, &max);
1246 if (temp == NULL) goto mem_error;
1247 ret = temp;
1248 }
1249 ret[len++] = *p++;
1250 ret[len++] = *p++;
1251 ret[len++] = *p++;
1252 }
1253 while (*p != 0) {
1254 if (len + 3 >= max) {
1255 temp = xmlSaveUriRealloc(ret, &max);
1256 if (temp == NULL) goto mem_error;
1257 ret = temp;
1258 }
1259 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1260 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1261 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1262 ((*(p) == ',')))
1263 ret[len++] = *p++;
1264 else {
1265 int val = *(unsigned char *)p++;
1266 int hi = val / 0x10, lo = val % 0x10;
1267 ret[len++] = '%';
1268 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1269 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1270 }
1271 }
1272 }
1273 if (uri->query_raw != NULL) {
1274 if (len + 1 >= max) {
1275 temp = xmlSaveUriRealloc(ret, &max);
1276 if (temp == NULL) goto mem_error;
1277 ret = temp;
1278 }
1279 ret[len++] = '?';
1280 p = uri->query_raw;
1281 while (*p != 0) {
1282 if (len + 1 >= max) {
1283 temp = xmlSaveUriRealloc(ret, &max);
1284 if (temp == NULL) goto mem_error;
1285 ret = temp;
1286 }
1287 ret[len++] = *p++;
1288 }
1289 } else if (uri->query != NULL) {
1290 if (len + 3 >= max) {
1291 temp = xmlSaveUriRealloc(ret, &max);
1292 if (temp == NULL) goto mem_error;
1293 ret = temp;
1294 }
1295 ret[len++] = '?';
1296 p = uri->query;
1297 while (*p != 0) {
1298 if (len + 3 >= max) {
1299 temp = xmlSaveUriRealloc(ret, &max);
1300 if (temp == NULL) goto mem_error;
1301 ret = temp;
1302 }
1303 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1304 ret[len++] = *p++;
1305 else {
1306 int val = *(unsigned char *)p++;
1307 int hi = val / 0x10, lo = val % 0x10;
1308 ret[len++] = '%';
1309 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1310 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1311 }
1312 }
1313 }
1314 }
1315 if (uri->fragment != NULL) {
1316 if (len + 3 >= max) {
1317 temp = xmlSaveUriRealloc(ret, &max);
1318 if (temp == NULL) goto mem_error;
1319 ret = temp;
1320 }
1321 ret[len++] = '#';
1322 p = uri->fragment;
1323 while (*p != 0) {
1324 if (len + 3 >= max) {
1325 temp = xmlSaveUriRealloc(ret, &max);
1326 if (temp == NULL) goto mem_error;
1327 ret = temp;
1328 }
1329 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
1330 ret[len++] = *p++;
1331 else {
1332 int val = *(unsigned char *)p++;
1333 int hi = val / 0x10, lo = val % 0x10;
1334 ret[len++] = '%';
1335 ret[len++] = hi + (hi > 9? 'A'-10 : '0');
1336 ret[len++] = lo + (lo > 9? 'A'-10 : '0');
1337 }
1338 }
1339 }
1340 if (len >= max) {
1341 temp = xmlSaveUriRealloc(ret, &max);
1342 if (temp == NULL) goto mem_error;
1343 ret = temp;
1344 }
1345 ret[len] = 0;
1346 return(ret);
1347
1348 mem_error:
1349 xmlFree(ret);
1350 return(NULL);
1351 }
1352
1353 /**
1354 * xmlPrintURI:
1355 * @stream: a FILE* for the output
1356 * @uri: pointer to an xmlURI
1357 *
1358 * Prints the URI in the stream @stream.
1359 */
1360 void
xmlPrintURI(FILE * stream,xmlURIPtr uri)1361 xmlPrintURI(FILE *stream, xmlURIPtr uri) {
1362 xmlChar *out;
1363
1364 out = xmlSaveUri(uri);
1365 if (out != NULL) {
1366 fprintf(stream, "%s", (char *) out);
1367 xmlFree(out);
1368 }
1369 }
1370
1371 /**
1372 * xmlCleanURI:
1373 * @uri: pointer to an xmlURI
1374 *
1375 * Make sure the xmlURI struct is free of content
1376 */
1377 static void
xmlCleanURI(xmlURIPtr uri)1378 xmlCleanURI(xmlURIPtr uri) {
1379 if (uri == NULL) return;
1380
1381 if (uri->scheme != NULL) xmlFree(uri->scheme);
1382 uri->scheme = NULL;
1383 if (uri->server != NULL) xmlFree(uri->server);
1384 uri->server = NULL;
1385 if (uri->user != NULL) xmlFree(uri->user);
1386 uri->user = NULL;
1387 if (uri->path != NULL) xmlFree(uri->path);
1388 uri->path = NULL;
1389 if (uri->fragment != NULL) xmlFree(uri->fragment);
1390 uri->fragment = NULL;
1391 if (uri->opaque != NULL) xmlFree(uri->opaque);
1392 uri->opaque = NULL;
1393 if (uri->authority != NULL) xmlFree(uri->authority);
1394 uri->authority = NULL;
1395 if (uri->query != NULL) xmlFree(uri->query);
1396 uri->query = NULL;
1397 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1398 uri->query_raw = NULL;
1399 }
1400
1401 /**
1402 * xmlFreeURI:
1403 * @uri: pointer to an xmlURI
1404 *
1405 * Free up the xmlURI struct
1406 */
1407 void
xmlFreeURI(xmlURIPtr uri)1408 xmlFreeURI(xmlURIPtr uri) {
1409 if (uri == NULL) return;
1410
1411 if (uri->scheme != NULL) xmlFree(uri->scheme);
1412 if (uri->server != NULL) xmlFree(uri->server);
1413 if (uri->user != NULL) xmlFree(uri->user);
1414 if (uri->path != NULL) xmlFree(uri->path);
1415 if (uri->fragment != NULL) xmlFree(uri->fragment);
1416 if (uri->opaque != NULL) xmlFree(uri->opaque);
1417 if (uri->authority != NULL) xmlFree(uri->authority);
1418 if (uri->query != NULL) xmlFree(uri->query);
1419 if (uri->query_raw != NULL) xmlFree(uri->query_raw);
1420 xmlFree(uri);
1421 }
1422
1423 /************************************************************************
1424 * *
1425 * Helper functions *
1426 * *
1427 ************************************************************************/
1428
1429 /**
1430 * xmlNormalizeURIPath:
1431 * @path: pointer to the path string
1432 *
1433 * Applies the 5 normalization steps to a path string--that is, RFC 2396
1434 * Section 5.2, steps 6.c through 6.g.
1435 *
1436 * Normalization occurs directly on the string, no new allocation is done
1437 *
1438 * Returns 0 or an error code
1439 */
1440 int
xmlNormalizeURIPath(char * path)1441 xmlNormalizeURIPath(char *path) {
1442 char *cur, *out;
1443
1444 if (path == NULL)
1445 return(-1);
1446
1447 /* Skip all initial "/" chars. We want to get to the beginning of the
1448 * first non-empty segment.
1449 */
1450 cur = path;
1451 while (cur[0] == '/')
1452 ++cur;
1453 if (cur[0] == '\0')
1454 return(0);
1455
1456 /* Keep everything we've seen so far. */
1457 out = cur;
1458
1459 /*
1460 * Analyze each segment in sequence for cases (c) and (d).
1461 */
1462 while (cur[0] != '\0') {
1463 /*
1464 * c) All occurrences of "./", where "." is a complete path segment,
1465 * are removed from the buffer string.
1466 */
1467 if ((cur[0] == '.') && (cur[1] == '/')) {
1468 cur += 2;
1469 /* '//' normalization should be done at this point too */
1470 while (cur[0] == '/')
1471 cur++;
1472 continue;
1473 }
1474
1475 /*
1476 * d) If the buffer string ends with "." as a complete path segment,
1477 * that "." is removed.
1478 */
1479 if ((cur[0] == '.') && (cur[1] == '\0'))
1480 break;
1481
1482 /* Otherwise keep the segment. */
1483 while (cur[0] != '/') {
1484 if (cur[0] == '\0')
1485 goto done_cd;
1486 (out++)[0] = (cur++)[0];
1487 }
1488 /* nomalize // */
1489 while ((cur[0] == '/') && (cur[1] == '/'))
1490 cur++;
1491
1492 (out++)[0] = (cur++)[0];
1493 }
1494 done_cd:
1495 out[0] = '\0';
1496
1497 /* Reset to the beginning of the first segment for the next sequence. */
1498 cur = path;
1499 while (cur[0] == '/')
1500 ++cur;
1501 if (cur[0] == '\0')
1502 return(0);
1503
1504 /*
1505 * Analyze each segment in sequence for cases (e) and (f).
1506 *
1507 * e) All occurrences of "<segment>/../", where <segment> is a
1508 * complete path segment not equal to "..", are removed from the
1509 * buffer string. Removal of these path segments is performed
1510 * iteratively, removing the leftmost matching pattern on each
1511 * iteration, until no matching pattern remains.
1512 *
1513 * f) If the buffer string ends with "<segment>/..", where <segment>
1514 * is a complete path segment not equal to "..", that
1515 * "<segment>/.." is removed.
1516 *
1517 * To satisfy the "iterative" clause in (e), we need to collapse the
1518 * string every time we find something that needs to be removed. Thus,
1519 * we don't need to keep two pointers into the string: we only need a
1520 * "current position" pointer.
1521 */
1522 while (1) {
1523 char *segp, *tmp;
1524
1525 /* At the beginning of each iteration of this loop, "cur" points to
1526 * the first character of the segment we want to examine.
1527 */
1528
1529 /* Find the end of the current segment. */
1530 segp = cur;
1531 while ((segp[0] != '/') && (segp[0] != '\0'))
1532 ++segp;
1533
1534 /* If this is the last segment, we're done (we need at least two
1535 * segments to meet the criteria for the (e) and (f) cases).
1536 */
1537 if (segp[0] == '\0')
1538 break;
1539
1540 /* If the first segment is "..", or if the next segment _isn't_ "..",
1541 * keep this segment and try the next one.
1542 */
1543 ++segp;
1544 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
1545 || ((segp[0] != '.') || (segp[1] != '.')
1546 || ((segp[2] != '/') && (segp[2] != '\0')))) {
1547 cur = segp;
1548 continue;
1549 }
1550
1551 /* If we get here, remove this segment and the next one and back up
1552 * to the previous segment (if there is one), to implement the
1553 * "iteratively" clause. It's pretty much impossible to back up
1554 * while maintaining two pointers into the buffer, so just compact
1555 * the whole buffer now.
1556 */
1557
1558 /* If this is the end of the buffer, we're done. */
1559 if (segp[2] == '\0') {
1560 cur[0] = '\0';
1561 break;
1562 }
1563 /* Valgrind complained, strcpy(cur, segp + 3); */
1564 /* string will overlap, do not use strcpy */
1565 tmp = cur;
1566 segp += 3;
1567 while ((*tmp++ = *segp++) != 0)
1568 ;
1569
1570 /* If there are no previous segments, then keep going from here. */
1571 segp = cur;
1572 while ((segp > path) && ((--segp)[0] == '/'))
1573 ;
1574 if (segp == path)
1575 continue;
1576
1577 /* "segp" is pointing to the end of a previous segment; find it's
1578 * start. We need to back up to the previous segment and start
1579 * over with that to handle things like "foo/bar/../..". If we
1580 * don't do this, then on the first pass we'll remove the "bar/..",
1581 * but be pointing at the second ".." so we won't realize we can also
1582 * remove the "foo/..".
1583 */
1584 cur = segp;
1585 while ((cur > path) && (cur[-1] != '/'))
1586 --cur;
1587 }
1588 out[0] = '\0';
1589
1590 /*
1591 * g) If the resulting buffer string still begins with one or more
1592 * complete path segments of "..", then the reference is
1593 * considered to be in error. Implementations may handle this
1594 * error by retaining these components in the resolved path (i.e.,
1595 * treating them as part of the final URI), by removing them from
1596 * the resolved path (i.e., discarding relative levels above the
1597 * root), or by avoiding traversal of the reference.
1598 *
1599 * We discard them from the final path.
1600 */
1601 if (path[0] == '/') {
1602 cur = path;
1603 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
1604 && ((cur[3] == '/') || (cur[3] == '\0')))
1605 cur += 3;
1606
1607 if (cur != path) {
1608 out = path;
1609 while (cur[0] != '\0')
1610 (out++)[0] = (cur++)[0];
1611 out[0] = 0;
1612 }
1613 }
1614
1615 return(0);
1616 }
1617
is_hex(char c)1618 static int is_hex(char c) {
1619 if (((c >= '0') && (c <= '9')) ||
1620 ((c >= 'a') && (c <= 'f')) ||
1621 ((c >= 'A') && (c <= 'F')))
1622 return(1);
1623 return(0);
1624 }
1625
1626 /**
1627 * xmlURIUnescapeString:
1628 * @str: the string to unescape
1629 * @len: the length in bytes to unescape (or <= 0 to indicate full string)
1630 * @target: optional destination buffer
1631 *
1632 * Unescaping routine, but does not check that the string is an URI. The
1633 * output is a direct unsigned char translation of %XX values (no encoding)
1634 * Note that the length of the result can only be smaller or same size as
1635 * the input string.
1636 *
1637 * Returns a copy of the string, but unescaped, will return NULL only in case
1638 * of error
1639 */
1640 char *
xmlURIUnescapeString(const char * str,int len,char * target)1641 xmlURIUnescapeString(const char *str, int len, char *target) {
1642 char *ret, *out;
1643 const char *in;
1644
1645 if (str == NULL)
1646 return(NULL);
1647 if (len <= 0) len = strlen(str);
1648 if (len < 0) return(NULL);
1649
1650 if (target == NULL) {
1651 ret = (char *) xmlMallocAtomic(len + 1);
1652 if (ret == NULL) {
1653 xmlURIErrMemory("unescaping URI value\n");
1654 return(NULL);
1655 }
1656 } else
1657 ret = target;
1658 in = str;
1659 out = ret;
1660 while(len > 0) {
1661 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
1662 in++;
1663 if ((*in >= '0') && (*in <= '9'))
1664 *out = (*in - '0');
1665 else if ((*in >= 'a') && (*in <= 'f'))
1666 *out = (*in - 'a') + 10;
1667 else if ((*in >= 'A') && (*in <= 'F'))
1668 *out = (*in - 'A') + 10;
1669 in++;
1670 if ((*in >= '0') && (*in <= '9'))
1671 *out = *out * 16 + (*in - '0');
1672 else if ((*in >= 'a') && (*in <= 'f'))
1673 *out = *out * 16 + (*in - 'a') + 10;
1674 else if ((*in >= 'A') && (*in <= 'F'))
1675 *out = *out * 16 + (*in - 'A') + 10;
1676 in++;
1677 len -= 3;
1678 out++;
1679 } else {
1680 *out++ = *in++;
1681 len--;
1682 }
1683 }
1684 *out = 0;
1685 return(ret);
1686 }
1687
1688 /**
1689 * xmlURIEscapeStr:
1690 * @str: string to escape
1691 * @list: exception list string of chars not to escape
1692 *
1693 * This routine escapes a string to hex, ignoring reserved characters (a-z)
1694 * and the characters in the exception list.
1695 *
1696 * Returns a new escaped string or NULL in case of error.
1697 */
1698 xmlChar *
xmlURIEscapeStr(const xmlChar * str,const xmlChar * list)1699 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) {
1700 xmlChar *ret, ch;
1701 xmlChar *temp;
1702 const xmlChar *in;
1703 int len, out;
1704
1705 if (str == NULL)
1706 return(NULL);
1707 if (str[0] == 0)
1708 return(xmlStrdup(str));
1709 len = xmlStrlen(str);
1710 if (!(len > 0)) return(NULL);
1711
1712 len += 20;
1713 ret = (xmlChar *) xmlMallocAtomic(len);
1714 if (ret == NULL) {
1715 xmlURIErrMemory("escaping URI value\n");
1716 return(NULL);
1717 }
1718 in = (const xmlChar *) str;
1719 out = 0;
1720 while(*in != 0) {
1721 if (len - out <= 3) {
1722 temp = xmlSaveUriRealloc(ret, &len);
1723 if (temp == NULL) {
1724 xmlURIErrMemory("escaping URI value\n");
1725 xmlFree(ret);
1726 return(NULL);
1727 }
1728 ret = temp;
1729 }
1730
1731 ch = *in;
1732
1733 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) {
1734 unsigned char val;
1735 ret[out++] = '%';
1736 val = ch >> 4;
1737 if (val <= 9)
1738 ret[out++] = '0' + val;
1739 else
1740 ret[out++] = 'A' + val - 0xA;
1741 val = ch & 0xF;
1742 if (val <= 9)
1743 ret[out++] = '0' + val;
1744 else
1745 ret[out++] = 'A' + val - 0xA;
1746 in++;
1747 } else {
1748 ret[out++] = *in++;
1749 }
1750
1751 }
1752 ret[out] = 0;
1753 return(ret);
1754 }
1755
1756 /**
1757 * xmlURIEscape:
1758 * @str: the string of the URI to escape
1759 *
1760 * Escaping routine, does not do validity checks !
1761 * It will try to escape the chars needing this, but this is heuristic
1762 * based it's impossible to be sure.
1763 *
1764 * Returns an copy of the string, but escaped
1765 *
1766 * 25 May 2001
1767 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly
1768 * according to RFC2396.
1769 * - Carl Douglas
1770 */
1771 xmlChar *
xmlURIEscape(const xmlChar * str)1772 xmlURIEscape(const xmlChar * str)
1773 {
1774 xmlChar *ret, *segment = NULL;
1775 xmlURIPtr uri;
1776 int ret2;
1777
1778 #define NULLCHK(p) if(!p) { \
1779 xmlURIErrMemory("escaping URI value\n"); \
1780 xmlFreeURI(uri); \
1781 return NULL; } \
1782
1783 if (str == NULL)
1784 return (NULL);
1785
1786 uri = xmlCreateURI();
1787 if (uri != NULL) {
1788 /*
1789 * Allow escaping errors in the unescaped form
1790 */
1791 uri->cleanup = 1;
1792 ret2 = xmlParseURIReference(uri, (const char *)str);
1793 if (ret2) {
1794 xmlFreeURI(uri);
1795 return (NULL);
1796 }
1797 }
1798
1799 if (!uri)
1800 return NULL;
1801
1802 ret = NULL;
1803
1804 if (uri->scheme) {
1805 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-.");
1806 NULLCHK(segment)
1807 ret = xmlStrcat(ret, segment);
1808 ret = xmlStrcat(ret, BAD_CAST ":");
1809 xmlFree(segment);
1810 }
1811
1812 if (uri->authority) {
1813 segment =
1814 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@");
1815 NULLCHK(segment)
1816 ret = xmlStrcat(ret, BAD_CAST "//");
1817 ret = xmlStrcat(ret, segment);
1818 xmlFree(segment);
1819 }
1820
1821 if (uri->user) {
1822 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,");
1823 NULLCHK(segment)
1824 ret = xmlStrcat(ret,BAD_CAST "//");
1825 ret = xmlStrcat(ret, segment);
1826 ret = xmlStrcat(ret, BAD_CAST "@");
1827 xmlFree(segment);
1828 }
1829
1830 if (uri->server) {
1831 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@");
1832 NULLCHK(segment)
1833 if (uri->user == NULL)
1834 ret = xmlStrcat(ret, BAD_CAST "//");
1835 ret = xmlStrcat(ret, segment);
1836 xmlFree(segment);
1837 }
1838
1839 if (uri->port) {
1840 xmlChar port[10];
1841
1842 snprintf((char *) port, 10, "%d", uri->port);
1843 ret = xmlStrcat(ret, BAD_CAST ":");
1844 ret = xmlStrcat(ret, port);
1845 }
1846
1847 if (uri->path) {
1848 segment =
1849 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;");
1850 NULLCHK(segment)
1851 ret = xmlStrcat(ret, segment);
1852 xmlFree(segment);
1853 }
1854
1855 if (uri->query_raw) {
1856 ret = xmlStrcat(ret, BAD_CAST "?");
1857 ret = xmlStrcat(ret, BAD_CAST uri->query_raw);
1858 }
1859 else if (uri->query) {
1860 segment =
1861 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$");
1862 NULLCHK(segment)
1863 ret = xmlStrcat(ret, BAD_CAST "?");
1864 ret = xmlStrcat(ret, segment);
1865 xmlFree(segment);
1866 }
1867
1868 if (uri->opaque) {
1869 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST "");
1870 NULLCHK(segment)
1871 ret = xmlStrcat(ret, segment);
1872 xmlFree(segment);
1873 }
1874
1875 if (uri->fragment) {
1876 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#");
1877 NULLCHK(segment)
1878 ret = xmlStrcat(ret, BAD_CAST "#");
1879 ret = xmlStrcat(ret, segment);
1880 xmlFree(segment);
1881 }
1882
1883 xmlFreeURI(uri);
1884 #undef NULLCHK
1885
1886 return (ret);
1887 }
1888
1889 /************************************************************************
1890 * *
1891 * Public functions *
1892 * *
1893 ************************************************************************/
1894
1895 /**
1896 * xmlBuildURI:
1897 * @URI: the URI instance found in the document
1898 * @base: the base value
1899 *
1900 * Computes he final URI of the reference done by checking that
1901 * the given URI is valid, and building the final URI using the
1902 * base URI. This is processed according to section 5.2 of the
1903 * RFC 2396
1904 *
1905 * 5.2. Resolving Relative References to Absolute Form
1906 *
1907 * Returns a new URI string (to be freed by the caller) or NULL in case
1908 * of error.
1909 */
1910 xmlChar *
xmlBuildURI(const xmlChar * URI,const xmlChar * base)1911 xmlBuildURI(const xmlChar *URI, const xmlChar *base) {
1912 xmlChar *val = NULL;
1913 int ret, len, indx, cur, out;
1914 xmlURIPtr ref = NULL;
1915 xmlURIPtr bas = NULL;
1916 xmlURIPtr res = NULL;
1917
1918 /*
1919 * 1) The URI reference is parsed into the potential four components and
1920 * fragment identifier, as described in Section 4.3.
1921 *
1922 * NOTE that a completely empty URI is treated by modern browsers
1923 * as a reference to "." rather than as a synonym for the current
1924 * URI. Should we do that here?
1925 */
1926 if (URI == NULL)
1927 ret = -1;
1928 else {
1929 if (*URI) {
1930 ref = xmlCreateURI();
1931 if (ref == NULL)
1932 goto done;
1933 ret = xmlParseURIReference(ref, (const char *) URI);
1934 }
1935 else
1936 ret = 0;
1937 }
1938 if (ret != 0)
1939 goto done;
1940 if ((ref != NULL) && (ref->scheme != NULL)) {
1941 /*
1942 * The URI is absolute don't modify.
1943 */
1944 val = xmlStrdup(URI);
1945 goto done;
1946 }
1947 if (base == NULL)
1948 ret = -1;
1949 else {
1950 bas = xmlCreateURI();
1951 if (bas == NULL)
1952 goto done;
1953 ret = xmlParseURIReference(bas, (const char *) base);
1954 }
1955 if (ret != 0) {
1956 if (ref)
1957 val = xmlSaveUri(ref);
1958 goto done;
1959 }
1960 if (ref == NULL) {
1961 /*
1962 * the base fragment must be ignored
1963 */
1964 if (bas->fragment != NULL) {
1965 xmlFree(bas->fragment);
1966 bas->fragment = NULL;
1967 }
1968 val = xmlSaveUri(bas);
1969 goto done;
1970 }
1971
1972 /*
1973 * 2) If the path component is empty and the scheme, authority, and
1974 * query components are undefined, then it is a reference to the
1975 * current document and we are done. Otherwise, the reference URI's
1976 * query and fragment components are defined as found (or not found)
1977 * within the URI reference and not inherited from the base URI.
1978 *
1979 * NOTE that in modern browsers, the parsing differs from the above
1980 * in the following aspect: the query component is allowed to be
1981 * defined while still treating this as a reference to the current
1982 * document.
1983 */
1984 res = xmlCreateURI();
1985 if (res == NULL)
1986 goto done;
1987 if ((ref->scheme == NULL) && (ref->path == NULL) &&
1988 ((ref->authority == NULL) && (ref->server == NULL))) {
1989 if (bas->scheme != NULL)
1990 res->scheme = xmlMemStrdup(bas->scheme);
1991 if (bas->authority != NULL)
1992 res->authority = xmlMemStrdup(bas->authority);
1993 else if ((bas->server != NULL) || (bas->port == -1)) {
1994 if (bas->server != NULL)
1995 res->server = xmlMemStrdup(bas->server);
1996 if (bas->user != NULL)
1997 res->user = xmlMemStrdup(bas->user);
1998 res->port = bas->port;
1999 }
2000 if (bas->path != NULL)
2001 res->path = xmlMemStrdup(bas->path);
2002 if (ref->query_raw != NULL)
2003 res->query_raw = xmlMemStrdup (ref->query_raw);
2004 else if (ref->query != NULL)
2005 res->query = xmlMemStrdup(ref->query);
2006 else if (bas->query_raw != NULL)
2007 res->query_raw = xmlMemStrdup(bas->query_raw);
2008 else if (bas->query != NULL)
2009 res->query = xmlMemStrdup(bas->query);
2010 if (ref->fragment != NULL)
2011 res->fragment = xmlMemStrdup(ref->fragment);
2012 goto step_7;
2013 }
2014
2015 /*
2016 * 3) If the scheme component is defined, indicating that the reference
2017 * starts with a scheme name, then the reference is interpreted as an
2018 * absolute URI and we are done. Otherwise, the reference URI's
2019 * scheme is inherited from the base URI's scheme component.
2020 */
2021 if (ref->scheme != NULL) {
2022 val = xmlSaveUri(ref);
2023 goto done;
2024 }
2025 if (bas->scheme != NULL)
2026 res->scheme = xmlMemStrdup(bas->scheme);
2027
2028 if (ref->query_raw != NULL)
2029 res->query_raw = xmlMemStrdup(ref->query_raw);
2030 else if (ref->query != NULL)
2031 res->query = xmlMemStrdup(ref->query);
2032 if (ref->fragment != NULL)
2033 res->fragment = xmlMemStrdup(ref->fragment);
2034
2035 /*
2036 * 4) If the authority component is defined, then the reference is a
2037 * network-path and we skip to step 7. Otherwise, the reference
2038 * URI's authority is inherited from the base URI's authority
2039 * component, which will also be undefined if the URI scheme does not
2040 * use an authority component.
2041 */
2042 if ((ref->authority != NULL) || (ref->server != NULL)) {
2043 if (ref->authority != NULL)
2044 res->authority = xmlMemStrdup(ref->authority);
2045 else {
2046 res->server = xmlMemStrdup(ref->server);
2047 if (ref->user != NULL)
2048 res->user = xmlMemStrdup(ref->user);
2049 res->port = ref->port;
2050 }
2051 if (ref->path != NULL)
2052 res->path = xmlMemStrdup(ref->path);
2053 goto step_7;
2054 }
2055 if (bas->authority != NULL)
2056 res->authority = xmlMemStrdup(bas->authority);
2057 else if ((bas->server != NULL) || (bas->port == -1)) {
2058 if (bas->server != NULL)
2059 res->server = xmlMemStrdup(bas->server);
2060 if (bas->user != NULL)
2061 res->user = xmlMemStrdup(bas->user);
2062 res->port = bas->port;
2063 }
2064
2065 /*
2066 * 5) If the path component begins with a slash character ("/"), then
2067 * the reference is an absolute-path and we skip to step 7.
2068 */
2069 if ((ref->path != NULL) && (ref->path[0] == '/')) {
2070 res->path = xmlMemStrdup(ref->path);
2071 goto step_7;
2072 }
2073
2074
2075 /*
2076 * 6) If this step is reached, then we are resolving a relative-path
2077 * reference. The relative path needs to be merged with the base
2078 * URI's path. Although there are many ways to do this, we will
2079 * describe a simple method using a separate string buffer.
2080 *
2081 * Allocate a buffer large enough for the result string.
2082 */
2083 len = 2; /* extra / and 0 */
2084 if (ref->path != NULL)
2085 len += strlen(ref->path);
2086 if (bas->path != NULL)
2087 len += strlen(bas->path);
2088 res->path = (char *) xmlMallocAtomic(len);
2089 if (res->path == NULL) {
2090 xmlURIErrMemory("resolving URI against base\n");
2091 goto done;
2092 }
2093 res->path[0] = 0;
2094
2095 /*
2096 * a) All but the last segment of the base URI's path component is
2097 * copied to the buffer. In other words, any characters after the
2098 * last (right-most) slash character, if any, are excluded.
2099 */
2100 cur = 0;
2101 out = 0;
2102 if (bas->path != NULL) {
2103 while (bas->path[cur] != 0) {
2104 while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
2105 cur++;
2106 if (bas->path[cur] == 0)
2107 break;
2108
2109 cur++;
2110 while (out < cur) {
2111 res->path[out] = bas->path[out];
2112 out++;
2113 }
2114 }
2115 }
2116 res->path[out] = 0;
2117
2118 /*
2119 * b) The reference's path component is appended to the buffer
2120 * string.
2121 */
2122 if (ref->path != NULL && ref->path[0] != 0) {
2123 indx = 0;
2124 /*
2125 * Ensure the path includes a '/'
2126 */
2127 if ((out == 0) && (bas->server != NULL))
2128 res->path[out++] = '/';
2129 while (ref->path[indx] != 0) {
2130 res->path[out++] = ref->path[indx++];
2131 }
2132 }
2133 res->path[out] = 0;
2134
2135 /*
2136 * Steps c) to h) are really path normalization steps
2137 */
2138 xmlNormalizeURIPath(res->path);
2139
2140 step_7:
2141
2142 /*
2143 * 7) The resulting URI components, including any inherited from the
2144 * base URI, are recombined to give the absolute form of the URI
2145 * reference.
2146 */
2147 val = xmlSaveUri(res);
2148
2149 done:
2150 if (ref != NULL)
2151 xmlFreeURI(ref);
2152 if (bas != NULL)
2153 xmlFreeURI(bas);
2154 if (res != NULL)
2155 xmlFreeURI(res);
2156 return(val);
2157 }
2158
2159 /**
2160 * xmlBuildRelativeURI:
2161 * @URI: the URI reference under consideration
2162 * @base: the base value
2163 *
2164 * Expresses the URI of the reference in terms relative to the
2165 * base. Some examples of this operation include:
2166 * base = "http://site1.com/docs/book1.html"
2167 * URI input URI returned
2168 * docs/pic1.gif pic1.gif
2169 * docs/img/pic1.gif img/pic1.gif
2170 * img/pic1.gif ../img/pic1.gif
2171 * http://site1.com/docs/pic1.gif pic1.gif
2172 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
2173 *
2174 * base = "docs/book1.html"
2175 * URI input URI returned
2176 * docs/pic1.gif pic1.gif
2177 * docs/img/pic1.gif img/pic1.gif
2178 * img/pic1.gif ../img/pic1.gif
2179 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
2180 *
2181 *
2182 * Note: if the URI reference is really wierd or complicated, it may be
2183 * worthwhile to first convert it into a "nice" one by calling
2184 * xmlBuildURI (using 'base') before calling this routine,
2185 * since this routine (for reasonable efficiency) assumes URI has
2186 * already been through some validation.
2187 *
2188 * Returns a new URI string (to be freed by the caller) or NULL in case
2189 * error.
2190 */
2191 xmlChar *
xmlBuildRelativeURI(const xmlChar * URI,const xmlChar * base)2192 xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base)
2193 {
2194 xmlChar *val = NULL;
2195 int ret;
2196 int ix;
2197 int nbslash = 0;
2198 int len;
2199 xmlURIPtr ref = NULL;
2200 xmlURIPtr bas = NULL;
2201 xmlChar *bptr, *uptr, *vptr;
2202 int remove_path = 0;
2203
2204 if ((URI == NULL) || (*URI == 0))
2205 return NULL;
2206
2207 /*
2208 * First parse URI into a standard form
2209 */
2210 ref = xmlCreateURI ();
2211 if (ref == NULL)
2212 return NULL;
2213 /* If URI not already in "relative" form */
2214 if (URI[0] != '.') {
2215 ret = xmlParseURIReference (ref, (const char *) URI);
2216 if (ret != 0)
2217 goto done; /* Error in URI, return NULL */
2218 } else
2219 ref->path = (char *)xmlStrdup(URI);
2220
2221 /*
2222 * Next parse base into the same standard form
2223 */
2224 if ((base == NULL) || (*base == 0)) {
2225 val = xmlStrdup (URI);
2226 goto done;
2227 }
2228 bas = xmlCreateURI ();
2229 if (bas == NULL)
2230 goto done;
2231 if (base[0] != '.') {
2232 ret = xmlParseURIReference (bas, (const char *) base);
2233 if (ret != 0)
2234 goto done; /* Error in base, return NULL */
2235 } else
2236 bas->path = (char *)xmlStrdup(base);
2237
2238 /*
2239 * If the scheme / server on the URI differs from the base,
2240 * just return the URI
2241 */
2242 if ((ref->scheme != NULL) &&
2243 ((bas->scheme == NULL) ||
2244 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) ||
2245 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) {
2246 val = xmlStrdup (URI);
2247 goto done;
2248 }
2249 if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) {
2250 val = xmlStrdup(BAD_CAST "");
2251 goto done;
2252 }
2253 if (bas->path == NULL) {
2254 val = xmlStrdup((xmlChar *)ref->path);
2255 goto done;
2256 }
2257 if (ref->path == NULL) {
2258 ref->path = (char *) "/";
2259 remove_path = 1;
2260 }
2261
2262 /*
2263 * At this point (at last!) we can compare the two paths
2264 *
2265 * First we take care of the special case where either of the
2266 * two path components may be missing (bug 316224)
2267 */
2268 bptr = (xmlChar *)bas->path;
2269 {
2270 xmlChar *rptr = (xmlChar *) ref->path;
2271 int pos = 0;
2272
2273 /*
2274 * Next we compare the two strings and find where they first differ
2275 */
2276 if ((*rptr == '.') && (rptr[1] == '/'))
2277 rptr += 2;
2278 if ((*bptr == '.') && (bptr[1] == '/'))
2279 bptr += 2;
2280 else if ((*bptr == '/') && (*rptr != '/'))
2281 bptr++;
2282 while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0))
2283 pos++;
2284
2285 if (bptr[pos] == rptr[pos]) {
2286 val = xmlStrdup(BAD_CAST "");
2287 goto done; /* (I can't imagine why anyone would do this) */
2288 }
2289
2290 /*
2291 * In URI, "back up" to the last '/' encountered. This will be the
2292 * beginning of the "unique" suffix of URI
2293 */
2294 ix = pos;
2295 for (; ix > 0; ix--) {
2296 if (rptr[ix - 1] == '/')
2297 break;
2298 }
2299 uptr = (xmlChar *)&rptr[ix];
2300
2301 /*
2302 * In base, count the number of '/' from the differing point
2303 */
2304 for (; bptr[ix] != 0; ix++) {
2305 if (bptr[ix] == '/')
2306 nbslash++;
2307 }
2308
2309 /*
2310 * e.g: URI="foo/" base="foo/bar" -> "./"
2311 */
2312 if (nbslash == 0 && !uptr[0]) {
2313 val = xmlStrdup(BAD_CAST "./");
2314 goto done;
2315 }
2316
2317 len = xmlStrlen (uptr) + 1;
2318 }
2319
2320 if (nbslash == 0) {
2321 if (uptr != NULL)
2322 /* exception characters from xmlSaveUri */
2323 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,");
2324 goto done;
2325 }
2326
2327 /*
2328 * Allocate just enough space for the returned string -
2329 * length of the remainder of the URI, plus enough space
2330 * for the "../" groups, plus one for the terminator
2331 */
2332 val = (xmlChar *) xmlMalloc (len + 3 * nbslash);
2333 if (val == NULL) {
2334 xmlURIErrMemory("building relative URI\n");
2335 goto done;
2336 }
2337 vptr = val;
2338 /*
2339 * Put in as many "../" as needed
2340 */
2341 for (; nbslash>0; nbslash--) {
2342 *vptr++ = '.';
2343 *vptr++ = '.';
2344 *vptr++ = '/';
2345 }
2346 /*
2347 * Finish up with the end of the URI
2348 */
2349 if (uptr != NULL) {
2350 if ((vptr > val) && (len > 0) &&
2351 (uptr[0] == '/') && (vptr[-1] == '/')) {
2352 memcpy (vptr, uptr + 1, len - 1);
2353 vptr[len - 2] = 0;
2354 } else {
2355 memcpy (vptr, uptr, len);
2356 vptr[len - 1] = 0;
2357 }
2358 } else {
2359 vptr[len - 1] = 0;
2360 }
2361
2362 /* escape the freshly-built path */
2363 vptr = val;
2364 /* exception characters from xmlSaveUri */
2365 val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,");
2366 xmlFree(vptr);
2367
2368 done:
2369 /*
2370 * Free the working variables
2371 */
2372 if (remove_path != 0)
2373 ref->path = NULL;
2374 if (ref != NULL)
2375 xmlFreeURI (ref);
2376 if (bas != NULL)
2377 xmlFreeURI (bas);
2378
2379 return val;
2380 }
2381
2382 /**
2383 * xmlCanonicPath:
2384 * @path: the resource locator in a filesystem notation
2385 *
2386 * Constructs a canonic path from the specified path.
2387 *
2388 * Returns a new canonic path, or a duplicate of the path parameter if the
2389 * construction fails. The caller is responsible for freeing the memory occupied
2390 * by the returned string. If there is insufficient memory available, or the
2391 * argument is NULL, the function returns NULL.
2392 */
2393 #define IS_WINDOWS_PATH(p) \
2394 ((p != NULL) && \
2395 (((p[0] >= 'a') && (p[0] <= 'z')) || \
2396 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \
2397 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\')))
2398 xmlChar *
xmlCanonicPath(const xmlChar * path)2399 xmlCanonicPath(const xmlChar *path)
2400 {
2401 /*
2402 * For Windows implementations, additional work needs to be done to
2403 * replace backslashes in pathnames with "forward slashes"
2404 */
2405 #if defined(_WIN32) && !defined(__CYGWIN__)
2406 int len = 0;
2407 char *p = NULL;
2408 #endif
2409 xmlURIPtr uri;
2410 xmlChar *ret;
2411 const xmlChar *absuri;
2412
2413 if (path == NULL)
2414 return(NULL);
2415
2416 #if defined(_WIN32)
2417 /*
2418 * We must not change the backslashes to slashes if the the path
2419 * starts with \\?\
2420 * Those paths can be up to 32k characters long.
2421 * Was added specifically for OpenOffice, those paths can't be converted
2422 * to URIs anyway.
2423 */
2424 if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') &&
2425 (path[3] == '\\') )
2426 return xmlStrdup((const xmlChar *) path);
2427 #endif
2428
2429 /* sanitize filename starting with // so it can be used as URI */
2430 if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/'))
2431 path++;
2432
2433 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2434 xmlFreeURI(uri);
2435 return xmlStrdup(path);
2436 }
2437
2438 /* Check if this is an "absolute uri" */
2439 absuri = xmlStrstr(path, BAD_CAST "://");
2440 if (absuri != NULL) {
2441 int l, j;
2442 unsigned char c;
2443 xmlChar *escURI;
2444
2445 /*
2446 * this looks like an URI where some parts have not been
2447 * escaped leading to a parsing problem. Check that the first
2448 * part matches a protocol.
2449 */
2450 l = absuri - path;
2451 /* Bypass if first part (part before the '://') is > 20 chars */
2452 if ((l <= 0) || (l > 20))
2453 goto path_processing;
2454 /* Bypass if any non-alpha characters are present in first part */
2455 for (j = 0;j < l;j++) {
2456 c = path[j];
2457 if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z'))))
2458 goto path_processing;
2459 }
2460
2461 /* Escape all except the characters specified in the supplied path */
2462 escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;=");
2463 if (escURI != NULL) {
2464 /* Try parsing the escaped path */
2465 uri = xmlParseURI((const char *) escURI);
2466 /* If successful, return the escaped string */
2467 if (uri != NULL) {
2468 xmlFreeURI(uri);
2469 return escURI;
2470 }
2471 xmlFree(escURI);
2472 }
2473 }
2474
2475 path_processing:
2476 /* For Windows implementations, replace backslashes with 'forward slashes' */
2477 #if defined(_WIN32) && !defined(__CYGWIN__)
2478 /*
2479 * Create a URI structure
2480 */
2481 uri = xmlCreateURI();
2482 if (uri == NULL) { /* Guard against 'out of memory' */
2483 return(NULL);
2484 }
2485
2486 len = xmlStrlen(path);
2487 if ((len > 2) && IS_WINDOWS_PATH(path)) {
2488 /* make the scheme 'file' */
2489 uri->scheme = (char *) xmlStrdup(BAD_CAST "file");
2490 /* allocate space for leading '/' + path + string terminator */
2491 uri->path = xmlMallocAtomic(len + 2);
2492 if (uri->path == NULL) {
2493 xmlFreeURI(uri); /* Guard agains 'out of memory' */
2494 return(NULL);
2495 }
2496 /* Put in leading '/' plus path */
2497 uri->path[0] = '/';
2498 p = uri->path + 1;
2499 strncpy(p, (char *) path, len + 1);
2500 } else {
2501 uri->path = (char *) xmlStrdup(path);
2502 if (uri->path == NULL) {
2503 xmlFreeURI(uri);
2504 return(NULL);
2505 }
2506 p = uri->path;
2507 }
2508 /* Now change all occurences of '\' to '/' */
2509 while (*p != '\0') {
2510 if (*p == '\\')
2511 *p = '/';
2512 p++;
2513 }
2514
2515 if (uri->scheme == NULL) {
2516 ret = xmlStrdup((const xmlChar *) uri->path);
2517 } else {
2518 ret = xmlSaveUri(uri);
2519 }
2520
2521 xmlFreeURI(uri);
2522 #else
2523 ret = xmlStrdup((const xmlChar *) path);
2524 #endif
2525 return(ret);
2526 }
2527
2528 /**
2529 * xmlPathToURI:
2530 * @path: the resource locator in a filesystem notation
2531 *
2532 * Constructs an URI expressing the existing path
2533 *
2534 * Returns a new URI, or a duplicate of the path parameter if the
2535 * construction fails. The caller is responsible for freeing the memory
2536 * occupied by the returned string. If there is insufficient memory available,
2537 * or the argument is NULL, the function returns NULL.
2538 */
2539 xmlChar *
xmlPathToURI(const xmlChar * path)2540 xmlPathToURI(const xmlChar *path)
2541 {
2542 xmlURIPtr uri;
2543 xmlURI temp;
2544 xmlChar *ret, *cal;
2545
2546 if (path == NULL)
2547 return(NULL);
2548
2549 if ((uri = xmlParseURI((const char *) path)) != NULL) {
2550 xmlFreeURI(uri);
2551 return xmlStrdup(path);
2552 }
2553 cal = xmlCanonicPath(path);
2554 if (cal == NULL)
2555 return(NULL);
2556 #if defined(_WIN32) && !defined(__CYGWIN__)
2557 /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?)
2558 If 'cal' is a valid URI allready then we are done here, as continuing would make
2559 it invalid. */
2560 if ((uri = xmlParseURI((const char *) cal)) != NULL) {
2561 xmlFreeURI(uri);
2562 return cal;
2563 }
2564 /* 'cal' can contain a relative path with backslashes. If that is processed
2565 by xmlSaveURI, they will be escaped and the external entity loader machinery
2566 will fail. So convert them to slashes. Misuse 'ret' for walking. */
2567 ret = cal;
2568 while (*ret != '\0') {
2569 if (*ret == '\\')
2570 *ret = '/';
2571 ret++;
2572 }
2573 #endif
2574 memset(&temp, 0, sizeof(temp));
2575 temp.path = (char *) cal;
2576 ret = xmlSaveUri(&temp);
2577 xmlFree(cal);
2578 return(ret);
2579 }
2580 #define bottom_uri
2581 #include "elfgcchack.h"
2582