1 /** 2 * uri.c: set of generic URI related routines 3 * 4 * Reference: RFCs 3986, 2732 and 2373 5 * 6 * See Copyright for the status of this software. 7 * 8 * daniel@veillard.com 9 */ 10 11 #define IN_LIBXML 12 #include "libxml.h" 13 14 #include <string.h> 15 16 #include <libxml/xmlmemory.h> 17 #include <libxml/uri.h> 18 #include <libxml/globals.h> 19 #include <libxml/xmlerror.h> 20 21 /** 22 * MAX_URI_LENGTH: 23 * 24 * The definition of the URI regexp in the above RFC has no size limit 25 * In practice they are usually relativey short except for the 26 * data URI scheme as defined in RFC 2397. Even for data URI the usual 27 * maximum size before hitting random practical limits is around 64 KB 28 * and 4KB is usually a maximum admitted limit for proper operations. 29 * The value below is more a security limit than anything else and 30 * really should never be hit by 'normal' operations 31 * Set to 1 MByte in 2012, this is only enforced on output 32 */ 33 #define MAX_URI_LENGTH 1024 * 1024 34 35 static void 36 xmlURIErrMemory(const char *extra) 37 { 38 if (extra) 39 __xmlRaiseError(NULL, NULL, NULL, 40 NULL, NULL, XML_FROM_URI, 41 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, 42 extra, NULL, NULL, 0, 0, 43 "Memory allocation failed : %s\n", extra); 44 else 45 __xmlRaiseError(NULL, NULL, NULL, 46 NULL, NULL, XML_FROM_URI, 47 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, 48 NULL, NULL, NULL, 0, 0, 49 "Memory allocation failed\n"); 50 } 51 52 static void xmlCleanURI(xmlURIPtr uri); 53 54 /* 55 * Old rule from 2396 used in legacy handling code 56 * alpha = lowalpha | upalpha 57 */ 58 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x)) 59 60 61 /* 62 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | 63 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | 64 * "u" | "v" | "w" | "x" | "y" | "z" 65 */ 66 67 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z')) 68 69 /* 70 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | 71 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | 72 * "U" | "V" | "W" | "X" | "Y" | "Z" 73 */ 74 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z')) 75 76 #ifdef IS_DIGIT 77 #undef IS_DIGIT 78 #endif 79 /* 80 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" 81 */ 82 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9')) 83 84 /* 85 * alphanum = alpha | digit 86 */ 87 88 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x)) 89 90 /* 91 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" 92 */ 93 94 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \ 95 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \ 96 ((x) == '(') || ((x) == ')')) 97 98 /* 99 * unwise = "{" | "}" | "|" | "\" | "^" | "`" 100 */ 101 102 #define IS_UNWISE(p) \ 103 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \ 104 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \ 105 ((*(p) == ']')) || ((*(p) == '`'))) 106 /* 107 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," | 108 * "[" | "]" 109 */ 110 111 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \ 112 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \ 113 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \ 114 ((x) == ']')) 115 116 /* 117 * unreserved = alphanum | mark 118 */ 119 120 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x)) 121 122 /* 123 * Skip to next pointer char, handle escaped sequences 124 */ 125 126 #define NEXT(p) ((*p == '%')? p += 3 : p++) 127 128 /* 129 * Productions from the spec. 130 * 131 * authority = server | reg_name 132 * reg_name = 1*( unreserved | escaped | "$" | "," | 133 * ";" | ":" | "@" | "&" | "=" | "+" ) 134 * 135 * path = [ abs_path | opaque_part ] 136 */ 137 138 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n)) 139 140 /************************************************************************ 141 * * 142 * RFC 3986 parser * 143 * * 144 ************************************************************************/ 145 146 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9')) 147 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \ 148 ((*(p) >= 'A') && (*(p) <= 'Z'))) 149 #define ISA_HEXDIG(p) \ 150 (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \ 151 ((*(p) >= 'A') && (*(p) <= 'F'))) 152 153 /* 154 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 155 * / "*" / "+" / "," / ";" / "=" 156 */ 157 #define ISA_SUB_DELIM(p) \ 158 (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \ 159 ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \ 160 ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \ 161 ((*(p) == '=')) || ((*(p) == '\''))) 162 163 /* 164 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" 165 */ 166 #define ISA_GEN_DELIM(p) \ 167 (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \ 168 ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \ 169 ((*(p) == '@'))) 170 171 /* 172 * reserved = gen-delims / sub-delims 173 */ 174 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p))) 175 176 /* 177 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" 178 */ 179 #define ISA_UNRESERVED(p) \ 180 ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \ 181 ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~'))) 182 183 /* 184 * pct-encoded = "%" HEXDIG HEXDIG 185 */ 186 #define ISA_PCT_ENCODED(p) \ 187 ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2))) 188 189 /* 190 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" 191 */ 192 #define ISA_PCHAR(p) \ 193 (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \ 194 ((*(p) == ':')) || ((*(p) == '@'))) 195 196 /** 197 * xmlParse3986Scheme: 198 * @uri: pointer to an URI structure 199 * @str: pointer to the string to analyze 200 * 201 * Parse an URI scheme 202 * 203 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) 204 * 205 * Returns 0 or the error code 206 */ 207 static int 208 xmlParse3986Scheme(xmlURIPtr uri, const char **str) { 209 const char *cur; 210 211 if (str == NULL) 212 return(-1); 213 214 cur = *str; 215 if (!ISA_ALPHA(cur)) 216 return(2); 217 cur++; 218 while (ISA_ALPHA(cur) || ISA_DIGIT(cur) || 219 (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++; 220 if (uri != NULL) { 221 if (uri->scheme != NULL) xmlFree(uri->scheme); 222 uri->scheme = STRNDUP(*str, cur - *str); 223 } 224 *str = cur; 225 return(0); 226 } 227 228 /** 229 * xmlParse3986Fragment: 230 * @uri: pointer to an URI structure 231 * @str: pointer to the string to analyze 232 * 233 * Parse the query part of an URI 234 * 235 * fragment = *( pchar / "/" / "?" ) 236 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']' 237 * in the fragment identifier but this is used very broadly for 238 * xpointer scheme selection, so we are allowing it here to not break 239 * for example all the DocBook processing chains. 240 * 241 * Returns 0 or the error code 242 */ 243 static int 244 xmlParse3986Fragment(xmlURIPtr uri, const char **str) 245 { 246 const char *cur; 247 248 if (str == NULL) 249 return (-1); 250 251 cur = *str; 252 253 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') || 254 (*cur == '[') || (*cur == ']') || 255 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) 256 NEXT(cur); 257 if (uri != NULL) { 258 if (uri->fragment != NULL) 259 xmlFree(uri->fragment); 260 if (uri->cleanup & 2) 261 uri->fragment = STRNDUP(*str, cur - *str); 262 else 263 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL); 264 } 265 *str = cur; 266 return (0); 267 } 268 269 /** 270 * xmlParse3986Query: 271 * @uri: pointer to an URI structure 272 * @str: pointer to the string to analyze 273 * 274 * Parse the query part of an URI 275 * 276 * query = *uric 277 * 278 * Returns 0 or the error code 279 */ 280 static int 281 xmlParse3986Query(xmlURIPtr uri, const char **str) 282 { 283 const char *cur; 284 285 if (str == NULL) 286 return (-1); 287 288 cur = *str; 289 290 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') || 291 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) 292 NEXT(cur); 293 if (uri != NULL) { 294 if (uri->query != NULL) 295 xmlFree(uri->query); 296 if (uri->cleanup & 2) 297 uri->query = STRNDUP(*str, cur - *str); 298 else 299 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL); 300 301 /* Save the raw bytes of the query as well. 302 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114 303 */ 304 if (uri->query_raw != NULL) 305 xmlFree (uri->query_raw); 306 uri->query_raw = STRNDUP (*str, cur - *str); 307 } 308 *str = cur; 309 return (0); 310 } 311 312 /** 313 * xmlParse3986Port: 314 * @uri: pointer to an URI structure 315 * @str: the string to analyze 316 * 317 * Parse a port part and fills in the appropriate fields 318 * of the @uri structure 319 * 320 * port = *DIGIT 321 * 322 * Returns 0 or the error code 323 */ 324 static int 325 xmlParse3986Port(xmlURIPtr uri, const char **str) 326 { 327 const char *cur = *str; 328 unsigned port = 0; /* unsigned for defined overflow behavior */ 329 330 if (ISA_DIGIT(cur)) { 331 while (ISA_DIGIT(cur)) { 332 port = port * 10 + (*cur - '0'); 333 334 cur++; 335 } 336 if (uri != NULL) 337 uri->port = port & INT_MAX; /* port value modulo INT_MAX+1 */ 338 *str = cur; 339 return(0); 340 } 341 return(1); 342 } 343 344 /** 345 * xmlParse3986Userinfo: 346 * @uri: pointer to an URI structure 347 * @str: the string to analyze 348 * 349 * Parse an user informations part and fills in the appropriate fields 350 * of the @uri structure 351 * 352 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) 353 * 354 * Returns 0 or the error code 355 */ 356 static int 357 xmlParse3986Userinfo(xmlURIPtr uri, const char **str) 358 { 359 const char *cur; 360 361 cur = *str; 362 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || 363 ISA_SUB_DELIM(cur) || (*cur == ':')) 364 NEXT(cur); 365 if (*cur == '@') { 366 if (uri != NULL) { 367 if (uri->user != NULL) xmlFree(uri->user); 368 if (uri->cleanup & 2) 369 uri->user = STRNDUP(*str, cur - *str); 370 else 371 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL); 372 } 373 *str = cur; 374 return(0); 375 } 376 return(1); 377 } 378 379 /** 380 * xmlParse3986DecOctet: 381 * @str: the string to analyze 382 * 383 * dec-octet = DIGIT ; 0-9 384 * / %x31-39 DIGIT ; 10-99 385 * / "1" 2DIGIT ; 100-199 386 * / "2" %x30-34 DIGIT ; 200-249 387 * / "25" %x30-35 ; 250-255 388 * 389 * Skip a dec-octet. 390 * 391 * Returns 0 if found and skipped, 1 otherwise 392 */ 393 static int 394 xmlParse3986DecOctet(const char **str) { 395 const char *cur = *str; 396 397 if (!(ISA_DIGIT(cur))) 398 return(1); 399 if (!ISA_DIGIT(cur+1)) 400 cur++; 401 else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2))) 402 cur += 2; 403 else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2))) 404 cur += 3; 405 else if ((*cur == '2') && (*(cur + 1) >= '0') && 406 (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2))) 407 cur += 3; 408 else if ((*cur == '2') && (*(cur + 1) == '5') && 409 (*(cur + 2) >= '0') && (*(cur + 1) <= '5')) 410 cur += 3; 411 else 412 return(1); 413 *str = cur; 414 return(0); 415 } 416 /** 417 * xmlParse3986Host: 418 * @uri: pointer to an URI structure 419 * @str: the string to analyze 420 * 421 * Parse an host part and fills in the appropriate fields 422 * of the @uri structure 423 * 424 * host = IP-literal / IPv4address / reg-name 425 * IP-literal = "[" ( IPv6address / IPvFuture ) "]" 426 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet 427 * reg-name = *( unreserved / pct-encoded / sub-delims ) 428 * 429 * Returns 0 or the error code 430 */ 431 static int 432 xmlParse3986Host(xmlURIPtr uri, const char **str) 433 { 434 const char *cur = *str; 435 const char *host; 436 437 host = cur; 438 /* 439 * IPv6 and future adressing scheme are enclosed between brackets 440 */ 441 if (*cur == '[') { 442 cur++; 443 while ((*cur != ']') && (*cur != 0)) 444 cur++; 445 if (*cur != ']') 446 return(1); 447 cur++; 448 goto found; 449 } 450 /* 451 * try to parse an IPv4 452 */ 453 if (ISA_DIGIT(cur)) { 454 if (xmlParse3986DecOctet(&cur) != 0) 455 goto not_ipv4; 456 if (*cur != '.') 457 goto not_ipv4; 458 cur++; 459 if (xmlParse3986DecOctet(&cur) != 0) 460 goto not_ipv4; 461 if (*cur != '.') 462 goto not_ipv4; 463 if (xmlParse3986DecOctet(&cur) != 0) 464 goto not_ipv4; 465 if (*cur != '.') 466 goto not_ipv4; 467 if (xmlParse3986DecOctet(&cur) != 0) 468 goto not_ipv4; 469 goto found; 470 not_ipv4: 471 cur = *str; 472 } 473 /* 474 * then this should be a hostname which can be empty 475 */ 476 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur)) 477 NEXT(cur); 478 found: 479 if (uri != NULL) { 480 if (uri->authority != NULL) xmlFree(uri->authority); 481 uri->authority = NULL; 482 if (uri->server != NULL) xmlFree(uri->server); 483 if (cur != host) { 484 if (uri->cleanup & 2) 485 uri->server = STRNDUP(host, cur - host); 486 else 487 uri->server = xmlURIUnescapeString(host, cur - host, NULL); 488 } else 489 uri->server = NULL; 490 } 491 *str = cur; 492 return(0); 493 } 494 495 /** 496 * xmlParse3986Authority: 497 * @uri: pointer to an URI structure 498 * @str: the string to analyze 499 * 500 * Parse an authority part and fills in the appropriate fields 501 * of the @uri structure 502 * 503 * authority = [ userinfo "@" ] host [ ":" port ] 504 * 505 * Returns 0 or the error code 506 */ 507 static int 508 xmlParse3986Authority(xmlURIPtr uri, const char **str) 509 { 510 const char *cur; 511 int ret; 512 513 cur = *str; 514 /* 515 * try to parse an userinfo and check for the trailing @ 516 */ 517 ret = xmlParse3986Userinfo(uri, &cur); 518 if ((ret != 0) || (*cur != '@')) 519 cur = *str; 520 else 521 cur++; 522 ret = xmlParse3986Host(uri, &cur); 523 if (ret != 0) return(ret); 524 if (*cur == ':') { 525 cur++; 526 ret = xmlParse3986Port(uri, &cur); 527 if (ret != 0) return(ret); 528 } 529 *str = cur; 530 return(0); 531 } 532 533 /** 534 * xmlParse3986Segment: 535 * @str: the string to analyze 536 * @forbid: an optional forbidden character 537 * @empty: allow an empty segment 538 * 539 * Parse a segment and fills in the appropriate fields 540 * of the @uri structure 541 * 542 * segment = *pchar 543 * segment-nz = 1*pchar 544 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) 545 * ; non-zero-length segment without any colon ":" 546 * 547 * Returns 0 or the error code 548 */ 549 static int 550 xmlParse3986Segment(const char **str, char forbid, int empty) 551 { 552 const char *cur; 553 554 cur = *str; 555 if (!ISA_PCHAR(cur)) { 556 if (empty) 557 return(0); 558 return(1); 559 } 560 while (ISA_PCHAR(cur) && (*cur != forbid)) 561 NEXT(cur); 562 *str = cur; 563 return (0); 564 } 565 566 /** 567 * xmlParse3986PathAbEmpty: 568 * @uri: pointer to an URI structure 569 * @str: the string to analyze 570 * 571 * Parse an path absolute or empty and fills in the appropriate fields 572 * of the @uri structure 573 * 574 * path-abempty = *( "/" segment ) 575 * 576 * Returns 0 or the error code 577 */ 578 static int 579 xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str) 580 { 581 const char *cur; 582 int ret; 583 584 cur = *str; 585 586 while (*cur == '/') { 587 cur++; 588 ret = xmlParse3986Segment(&cur, 0, 1); 589 if (ret != 0) return(ret); 590 } 591 if (uri != NULL) { 592 if (uri->path != NULL) xmlFree(uri->path); 593 if (*str != cur) { 594 if (uri->cleanup & 2) 595 uri->path = STRNDUP(*str, cur - *str); 596 else 597 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 598 } else { 599 uri->path = NULL; 600 } 601 } 602 *str = cur; 603 return (0); 604 } 605 606 /** 607 * xmlParse3986PathAbsolute: 608 * @uri: pointer to an URI structure 609 * @str: the string to analyze 610 * 611 * Parse an path absolute and fills in the appropriate fields 612 * of the @uri structure 613 * 614 * path-absolute = "/" [ segment-nz *( "/" segment ) ] 615 * 616 * Returns 0 or the error code 617 */ 618 static int 619 xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str) 620 { 621 const char *cur; 622 int ret; 623 624 cur = *str; 625 626 if (*cur != '/') 627 return(1); 628 cur++; 629 ret = xmlParse3986Segment(&cur, 0, 0); 630 if (ret == 0) { 631 while (*cur == '/') { 632 cur++; 633 ret = xmlParse3986Segment(&cur, 0, 1); 634 if (ret != 0) return(ret); 635 } 636 } 637 if (uri != NULL) { 638 if (uri->path != NULL) xmlFree(uri->path); 639 if (cur != *str) { 640 if (uri->cleanup & 2) 641 uri->path = STRNDUP(*str, cur - *str); 642 else 643 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 644 } else { 645 uri->path = NULL; 646 } 647 } 648 *str = cur; 649 return (0); 650 } 651 652 /** 653 * xmlParse3986PathRootless: 654 * @uri: pointer to an URI structure 655 * @str: the string to analyze 656 * 657 * Parse an path without root and fills in the appropriate fields 658 * of the @uri structure 659 * 660 * path-rootless = segment-nz *( "/" segment ) 661 * 662 * Returns 0 or the error code 663 */ 664 static int 665 xmlParse3986PathRootless(xmlURIPtr uri, const char **str) 666 { 667 const char *cur; 668 int ret; 669 670 cur = *str; 671 672 ret = xmlParse3986Segment(&cur, 0, 0); 673 if (ret != 0) return(ret); 674 while (*cur == '/') { 675 cur++; 676 ret = xmlParse3986Segment(&cur, 0, 1); 677 if (ret != 0) return(ret); 678 } 679 if (uri != NULL) { 680 if (uri->path != NULL) xmlFree(uri->path); 681 if (cur != *str) { 682 if (uri->cleanup & 2) 683 uri->path = STRNDUP(*str, cur - *str); 684 else 685 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 686 } else { 687 uri->path = NULL; 688 } 689 } 690 *str = cur; 691 return (0); 692 } 693 694 /** 695 * xmlParse3986PathNoScheme: 696 * @uri: pointer to an URI structure 697 * @str: the string to analyze 698 * 699 * Parse an path which is not a scheme and fills in the appropriate fields 700 * of the @uri structure 701 * 702 * path-noscheme = segment-nz-nc *( "/" segment ) 703 * 704 * Returns 0 or the error code 705 */ 706 static int 707 xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str) 708 { 709 const char *cur; 710 int ret; 711 712 cur = *str; 713 714 ret = xmlParse3986Segment(&cur, ':', 0); 715 if (ret != 0) return(ret); 716 while (*cur == '/') { 717 cur++; 718 ret = xmlParse3986Segment(&cur, 0, 1); 719 if (ret != 0) return(ret); 720 } 721 if (uri != NULL) { 722 if (uri->path != NULL) xmlFree(uri->path); 723 if (cur != *str) { 724 if (uri->cleanup & 2) 725 uri->path = STRNDUP(*str, cur - *str); 726 else 727 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 728 } else { 729 uri->path = NULL; 730 } 731 } 732 *str = cur; 733 return (0); 734 } 735 736 /** 737 * xmlParse3986HierPart: 738 * @uri: pointer to an URI structure 739 * @str: the string to analyze 740 * 741 * Parse an hierarchical part and fills in the appropriate fields 742 * of the @uri structure 743 * 744 * hier-part = "//" authority path-abempty 745 * / path-absolute 746 * / path-rootless 747 * / path-empty 748 * 749 * Returns 0 or the error code 750 */ 751 static int 752 xmlParse3986HierPart(xmlURIPtr uri, const char **str) 753 { 754 const char *cur; 755 int ret; 756 757 cur = *str; 758 759 if ((*cur == '/') && (*(cur + 1) == '/')) { 760 cur += 2; 761 ret = xmlParse3986Authority(uri, &cur); 762 if (ret != 0) return(ret); 763 if (uri->server == NULL) 764 uri->port = -1; 765 ret = xmlParse3986PathAbEmpty(uri, &cur); 766 if (ret != 0) return(ret); 767 *str = cur; 768 return(0); 769 } else if (*cur == '/') { 770 ret = xmlParse3986PathAbsolute(uri, &cur); 771 if (ret != 0) return(ret); 772 } else if (ISA_PCHAR(cur)) { 773 ret = xmlParse3986PathRootless(uri, &cur); 774 if (ret != 0) return(ret); 775 } else { 776 /* path-empty is effectively empty */ 777 if (uri != NULL) { 778 if (uri->path != NULL) xmlFree(uri->path); 779 uri->path = NULL; 780 } 781 } 782 *str = cur; 783 return (0); 784 } 785 786 /** 787 * xmlParse3986RelativeRef: 788 * @uri: pointer to an URI structure 789 * @str: the string to analyze 790 * 791 * Parse an URI string and fills in the appropriate fields 792 * of the @uri structure 793 * 794 * relative-ref = relative-part [ "?" query ] [ "#" fragment ] 795 * relative-part = "//" authority path-abempty 796 * / path-absolute 797 * / path-noscheme 798 * / path-empty 799 * 800 * Returns 0 or the error code 801 */ 802 static int 803 xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) { 804 int ret; 805 806 if ((*str == '/') && (*(str + 1) == '/')) { 807 str += 2; 808 ret = xmlParse3986Authority(uri, &str); 809 if (ret != 0) return(ret); 810 ret = xmlParse3986PathAbEmpty(uri, &str); 811 if (ret != 0) return(ret); 812 } else if (*str == '/') { 813 ret = xmlParse3986PathAbsolute(uri, &str); 814 if (ret != 0) return(ret); 815 } else if (ISA_PCHAR(str)) { 816 ret = xmlParse3986PathNoScheme(uri, &str); 817 if (ret != 0) return(ret); 818 } else { 819 /* path-empty is effectively empty */ 820 if (uri != NULL) { 821 if (uri->path != NULL) xmlFree(uri->path); 822 uri->path = NULL; 823 } 824 } 825 826 if (*str == '?') { 827 str++; 828 ret = xmlParse3986Query(uri, &str); 829 if (ret != 0) return(ret); 830 } 831 if (*str == '#') { 832 str++; 833 ret = xmlParse3986Fragment(uri, &str); 834 if (ret != 0) return(ret); 835 } 836 if (*str != 0) { 837 xmlCleanURI(uri); 838 return(1); 839 } 840 return(0); 841 } 842 843 844 /** 845 * xmlParse3986URI: 846 * @uri: pointer to an URI structure 847 * @str: the string to analyze 848 * 849 * Parse an URI string and fills in the appropriate fields 850 * of the @uri structure 851 * 852 * scheme ":" hier-part [ "?" query ] [ "#" fragment ] 853 * 854 * Returns 0 or the error code 855 */ 856 static int 857 xmlParse3986URI(xmlURIPtr uri, const char *str) { 858 int ret; 859 860 ret = xmlParse3986Scheme(uri, &str); 861 if (ret != 0) return(ret); 862 if (*str != ':') { 863 return(1); 864 } 865 str++; 866 ret = xmlParse3986HierPart(uri, &str); 867 if (ret != 0) return(ret); 868 if (*str == '?') { 869 str++; 870 ret = xmlParse3986Query(uri, &str); 871 if (ret != 0) return(ret); 872 } 873 if (*str == '#') { 874 str++; 875 ret = xmlParse3986Fragment(uri, &str); 876 if (ret != 0) return(ret); 877 } 878 if (*str != 0) { 879 xmlCleanURI(uri); 880 return(1); 881 } 882 return(0); 883 } 884 885 /** 886 * xmlParse3986URIReference: 887 * @uri: pointer to an URI structure 888 * @str: the string to analyze 889 * 890 * Parse an URI reference string and fills in the appropriate fields 891 * of the @uri structure 892 * 893 * URI-reference = URI / relative-ref 894 * 895 * Returns 0 or the error code 896 */ 897 static int 898 xmlParse3986URIReference(xmlURIPtr uri, const char *str) { 899 int ret; 900 901 if (str == NULL) 902 return(-1); 903 xmlCleanURI(uri); 904 905 /* 906 * Try first to parse absolute refs, then fallback to relative if 907 * it fails. 908 */ 909 ret = xmlParse3986URI(uri, str); 910 if (ret != 0) { 911 xmlCleanURI(uri); 912 ret = xmlParse3986RelativeRef(uri, str); 913 if (ret != 0) { 914 xmlCleanURI(uri); 915 return(ret); 916 } 917 } 918 return(0); 919 } 920 921 /** 922 * xmlParseURI: 923 * @str: the URI string to analyze 924 * 925 * Parse an URI based on RFC 3986 926 * 927 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] 928 * 929 * Returns a newly built xmlURIPtr or NULL in case of error 930 */ 931 xmlURIPtr 932 xmlParseURI(const char *str) { 933 xmlURIPtr uri; 934 int ret; 935 936 if (str == NULL) 937 return(NULL); 938 uri = xmlCreateURI(); 939 if (uri != NULL) { 940 ret = xmlParse3986URIReference(uri, str); 941 if (ret) { 942 xmlFreeURI(uri); 943 return(NULL); 944 } 945 } 946 return(uri); 947 } 948 949 /** 950 * xmlParseURIReference: 951 * @uri: pointer to an URI structure 952 * @str: the string to analyze 953 * 954 * Parse an URI reference string based on RFC 3986 and fills in the 955 * appropriate fields of the @uri structure 956 * 957 * URI-reference = URI / relative-ref 958 * 959 * Returns 0 or the error code 960 */ 961 int 962 xmlParseURIReference(xmlURIPtr uri, const char *str) { 963 return(xmlParse3986URIReference(uri, str)); 964 } 965 966 /** 967 * xmlParseURIRaw: 968 * @str: the URI string to analyze 969 * @raw: if 1 unescaping of URI pieces are disabled 970 * 971 * Parse an URI but allows to keep intact the original fragments. 972 * 973 * URI-reference = URI / relative-ref 974 * 975 * Returns a newly built xmlURIPtr or NULL in case of error 976 */ 977 xmlURIPtr 978 xmlParseURIRaw(const char *str, int raw) { 979 xmlURIPtr uri; 980 int ret; 981 982 if (str == NULL) 983 return(NULL); 984 uri = xmlCreateURI(); 985 if (uri != NULL) { 986 if (raw) { 987 uri->cleanup |= 2; 988 } 989 ret = xmlParseURIReference(uri, str); 990 if (ret) { 991 xmlFreeURI(uri); 992 return(NULL); 993 } 994 } 995 return(uri); 996 } 997 998 /************************************************************************ 999 * * 1000 * Generic URI structure functions * 1001 * * 1002 ************************************************************************/ 1003 1004 /** 1005 * xmlCreateURI: 1006 * 1007 * Simply creates an empty xmlURI 1008 * 1009 * Returns the new structure or NULL in case of error 1010 */ 1011 xmlURIPtr 1012 xmlCreateURI(void) { 1013 xmlURIPtr ret; 1014 1015 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI)); 1016 if (ret == NULL) { 1017 xmlURIErrMemory("creating URI structure\n"); 1018 return(NULL); 1019 } 1020 memset(ret, 0, sizeof(xmlURI)); 1021 return(ret); 1022 } 1023 1024 /** 1025 * xmlSaveUriRealloc: 1026 * 1027 * Function to handle properly a reallocation when saving an URI 1028 * Also imposes some limit on the length of an URI string output 1029 */ 1030 static xmlChar * 1031 xmlSaveUriRealloc(xmlChar *ret, int *max) { 1032 xmlChar *temp; 1033 int tmp; 1034 1035 if (*max > MAX_URI_LENGTH) { 1036 xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n"); 1037 return(NULL); 1038 } 1039 tmp = *max * 2; 1040 temp = (xmlChar *) xmlRealloc(ret, (tmp + 1)); 1041 if (temp == NULL) { 1042 xmlURIErrMemory("saving URI\n"); 1043 return(NULL); 1044 } 1045 *max = tmp; 1046 return(temp); 1047 } 1048 1049 /** 1050 * xmlSaveUri: 1051 * @uri: pointer to an xmlURI 1052 * 1053 * Save the URI as an escaped string 1054 * 1055 * Returns a new string (to be deallocated by caller) 1056 */ 1057 xmlChar * 1058 xmlSaveUri(xmlURIPtr uri) { 1059 xmlChar *ret = NULL; 1060 xmlChar *temp; 1061 const char *p; 1062 int len; 1063 int max; 1064 1065 if (uri == NULL) return(NULL); 1066 1067 1068 max = 80; 1069 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar)); 1070 if (ret == NULL) { 1071 xmlURIErrMemory("saving URI\n"); 1072 return(NULL); 1073 } 1074 len = 0; 1075 1076 if (uri->scheme != NULL) { 1077 p = uri->scheme; 1078 while (*p != 0) { 1079 if (len >= max) { 1080 temp = xmlSaveUriRealloc(ret, &max); 1081 if (temp == NULL) goto mem_error; 1082 ret = temp; 1083 } 1084 ret[len++] = *p++; 1085 } 1086 if (len >= max) { 1087 temp = xmlSaveUriRealloc(ret, &max); 1088 if (temp == NULL) goto mem_error; 1089 ret = temp; 1090 } 1091 ret[len++] = ':'; 1092 } 1093 if (uri->opaque != NULL) { 1094 p = uri->opaque; 1095 while (*p != 0) { 1096 if (len + 3 >= max) { 1097 temp = xmlSaveUriRealloc(ret, &max); 1098 if (temp == NULL) goto mem_error; 1099 ret = temp; 1100 } 1101 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p))) 1102 ret[len++] = *p++; 1103 else { 1104 int val = *(unsigned char *)p++; 1105 int hi = val / 0x10, lo = val % 0x10; 1106 ret[len++] = '%'; 1107 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1108 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1109 } 1110 } 1111 } else { 1112 if ((uri->server != NULL) || (uri->port == -1)) { 1113 if (len + 3 >= max) { 1114 temp = xmlSaveUriRealloc(ret, &max); 1115 if (temp == NULL) goto mem_error; 1116 ret = temp; 1117 } 1118 ret[len++] = '/'; 1119 ret[len++] = '/'; 1120 if (uri->user != NULL) { 1121 p = uri->user; 1122 while (*p != 0) { 1123 if (len + 3 >= max) { 1124 temp = xmlSaveUriRealloc(ret, &max); 1125 if (temp == NULL) goto mem_error; 1126 ret = temp; 1127 } 1128 if ((IS_UNRESERVED(*(p))) || 1129 ((*(p) == ';')) || ((*(p) == ':')) || 1130 ((*(p) == '&')) || ((*(p) == '=')) || 1131 ((*(p) == '+')) || ((*(p) == '$')) || 1132 ((*(p) == ','))) 1133 ret[len++] = *p++; 1134 else { 1135 int val = *(unsigned char *)p++; 1136 int hi = val / 0x10, lo = val % 0x10; 1137 ret[len++] = '%'; 1138 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1139 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1140 } 1141 } 1142 if (len + 3 >= max) { 1143 temp = xmlSaveUriRealloc(ret, &max); 1144 if (temp == NULL) goto mem_error; 1145 ret = temp; 1146 } 1147 ret[len++] = '@'; 1148 } 1149 if (uri->server != NULL) { 1150 p = uri->server; 1151 while (*p != 0) { 1152 if (len >= max) { 1153 temp = xmlSaveUriRealloc(ret, &max); 1154 if (temp == NULL) goto mem_error; 1155 ret = temp; 1156 } 1157 ret[len++] = *p++; 1158 } 1159 if (uri->port > 0) { 1160 if (len + 10 >= max) { 1161 temp = xmlSaveUriRealloc(ret, &max); 1162 if (temp == NULL) goto mem_error; 1163 ret = temp; 1164 } 1165 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port); 1166 } 1167 } 1168 } else if (uri->authority != NULL) { 1169 if (len + 3 >= max) { 1170 temp = xmlSaveUriRealloc(ret, &max); 1171 if (temp == NULL) goto mem_error; 1172 ret = temp; 1173 } 1174 ret[len++] = '/'; 1175 ret[len++] = '/'; 1176 p = uri->authority; 1177 while (*p != 0) { 1178 if (len + 3 >= max) { 1179 temp = xmlSaveUriRealloc(ret, &max); 1180 if (temp == NULL) goto mem_error; 1181 ret = temp; 1182 } 1183 if ((IS_UNRESERVED(*(p))) || 1184 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || 1185 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || 1186 ((*(p) == '=')) || ((*(p) == '+'))) 1187 ret[len++] = *p++; 1188 else { 1189 int val = *(unsigned char *)p++; 1190 int hi = val / 0x10, lo = val % 0x10; 1191 ret[len++] = '%'; 1192 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1193 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1194 } 1195 } 1196 } else if (uri->scheme != NULL) { 1197 if (len + 3 >= max) { 1198 temp = xmlSaveUriRealloc(ret, &max); 1199 if (temp == NULL) goto mem_error; 1200 ret = temp; 1201 } 1202 } 1203 if (uri->path != NULL) { 1204 p = uri->path; 1205 /* 1206 * the colon in file:///d: should not be escaped or 1207 * Windows accesses fail later. 1208 */ 1209 if ((uri->scheme != NULL) && 1210 (p[0] == '/') && 1211 (((p[1] >= 'a') && (p[1] <= 'z')) || 1212 ((p[1] >= 'A') && (p[1] <= 'Z'))) && 1213 (p[2] == ':') && 1214 (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) { 1215 if (len + 3 >= max) { 1216 temp = xmlSaveUriRealloc(ret, &max); 1217 if (temp == NULL) goto mem_error; 1218 ret = temp; 1219 } 1220 ret[len++] = *p++; 1221 ret[len++] = *p++; 1222 ret[len++] = *p++; 1223 } 1224 while (*p != 0) { 1225 if (len + 3 >= max) { 1226 temp = xmlSaveUriRealloc(ret, &max); 1227 if (temp == NULL) goto mem_error; 1228 ret = temp; 1229 } 1230 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) || 1231 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || 1232 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || 1233 ((*(p) == ','))) 1234 ret[len++] = *p++; 1235 else { 1236 int val = *(unsigned char *)p++; 1237 int hi = val / 0x10, lo = val % 0x10; 1238 ret[len++] = '%'; 1239 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1240 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1241 } 1242 } 1243 } 1244 if (uri->query_raw != NULL) { 1245 if (len + 1 >= max) { 1246 temp = xmlSaveUriRealloc(ret, &max); 1247 if (temp == NULL) goto mem_error; 1248 ret = temp; 1249 } 1250 ret[len++] = '?'; 1251 p = uri->query_raw; 1252 while (*p != 0) { 1253 if (len + 1 >= max) { 1254 temp = xmlSaveUriRealloc(ret, &max); 1255 if (temp == NULL) goto mem_error; 1256 ret = temp; 1257 } 1258 ret[len++] = *p++; 1259 } 1260 } else if (uri->query != NULL) { 1261 if (len + 3 >= max) { 1262 temp = xmlSaveUriRealloc(ret, &max); 1263 if (temp == NULL) goto mem_error; 1264 ret = temp; 1265 } 1266 ret[len++] = '?'; 1267 p = uri->query; 1268 while (*p != 0) { 1269 if (len + 3 >= max) { 1270 temp = xmlSaveUriRealloc(ret, &max); 1271 if (temp == NULL) goto mem_error; 1272 ret = temp; 1273 } 1274 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) 1275 ret[len++] = *p++; 1276 else { 1277 int val = *(unsigned char *)p++; 1278 int hi = val / 0x10, lo = val % 0x10; 1279 ret[len++] = '%'; 1280 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1281 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1282 } 1283 } 1284 } 1285 } 1286 if (uri->fragment != NULL) { 1287 if (len + 3 >= max) { 1288 temp = xmlSaveUriRealloc(ret, &max); 1289 if (temp == NULL) goto mem_error; 1290 ret = temp; 1291 } 1292 ret[len++] = '#'; 1293 p = uri->fragment; 1294 while (*p != 0) { 1295 if (len + 3 >= max) { 1296 temp = xmlSaveUriRealloc(ret, &max); 1297 if (temp == NULL) goto mem_error; 1298 ret = temp; 1299 } 1300 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) 1301 ret[len++] = *p++; 1302 else { 1303 int val = *(unsigned char *)p++; 1304 int hi = val / 0x10, lo = val % 0x10; 1305 ret[len++] = '%'; 1306 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1307 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1308 } 1309 } 1310 } 1311 if (len >= max) { 1312 temp = xmlSaveUriRealloc(ret, &max); 1313 if (temp == NULL) goto mem_error; 1314 ret = temp; 1315 } 1316 ret[len] = 0; 1317 return(ret); 1318 1319 mem_error: 1320 xmlFree(ret); 1321 return(NULL); 1322 } 1323 1324 /** 1325 * xmlPrintURI: 1326 * @stream: a FILE* for the output 1327 * @uri: pointer to an xmlURI 1328 * 1329 * Prints the URI in the stream @stream. 1330 */ 1331 void 1332 xmlPrintURI(FILE *stream, xmlURIPtr uri) { 1333 xmlChar *out; 1334 1335 out = xmlSaveUri(uri); 1336 if (out != NULL) { 1337 fprintf(stream, "%s", (char *) out); 1338 xmlFree(out); 1339 } 1340 } 1341 1342 /** 1343 * xmlCleanURI: 1344 * @uri: pointer to an xmlURI 1345 * 1346 * Make sure the xmlURI struct is free of content 1347 */ 1348 static void 1349 xmlCleanURI(xmlURIPtr uri) { 1350 if (uri == NULL) return; 1351 1352 if (uri->scheme != NULL) xmlFree(uri->scheme); 1353 uri->scheme = NULL; 1354 if (uri->server != NULL) xmlFree(uri->server); 1355 uri->server = NULL; 1356 if (uri->user != NULL) xmlFree(uri->user); 1357 uri->user = NULL; 1358 if (uri->path != NULL) xmlFree(uri->path); 1359 uri->path = NULL; 1360 if (uri->fragment != NULL) xmlFree(uri->fragment); 1361 uri->fragment = NULL; 1362 if (uri->opaque != NULL) xmlFree(uri->opaque); 1363 uri->opaque = NULL; 1364 if (uri->authority != NULL) xmlFree(uri->authority); 1365 uri->authority = NULL; 1366 if (uri->query != NULL) xmlFree(uri->query); 1367 uri->query = NULL; 1368 if (uri->query_raw != NULL) xmlFree(uri->query_raw); 1369 uri->query_raw = NULL; 1370 } 1371 1372 /** 1373 * xmlFreeURI: 1374 * @uri: pointer to an xmlURI 1375 * 1376 * Free up the xmlURI struct 1377 */ 1378 void 1379 xmlFreeURI(xmlURIPtr uri) { 1380 if (uri == NULL) return; 1381 1382 if (uri->scheme != NULL) xmlFree(uri->scheme); 1383 if (uri->server != NULL) xmlFree(uri->server); 1384 if (uri->user != NULL) xmlFree(uri->user); 1385 if (uri->path != NULL) xmlFree(uri->path); 1386 if (uri->fragment != NULL) xmlFree(uri->fragment); 1387 if (uri->opaque != NULL) xmlFree(uri->opaque); 1388 if (uri->authority != NULL) xmlFree(uri->authority); 1389 if (uri->query != NULL) xmlFree(uri->query); 1390 if (uri->query_raw != NULL) xmlFree(uri->query_raw); 1391 xmlFree(uri); 1392 } 1393 1394 /************************************************************************ 1395 * * 1396 * Helper functions * 1397 * * 1398 ************************************************************************/ 1399 1400 /** 1401 * xmlNormalizeURIPath: 1402 * @path: pointer to the path string 1403 * 1404 * Applies the 5 normalization steps to a path string--that is, RFC 2396 1405 * Section 5.2, steps 6.c through 6.g. 1406 * 1407 * Normalization occurs directly on the string, no new allocation is done 1408 * 1409 * Returns 0 or an error code 1410 */ 1411 int 1412 xmlNormalizeURIPath(char *path) { 1413 char *cur, *out; 1414 1415 if (path == NULL) 1416 return(-1); 1417 1418 /* Skip all initial "/" chars. We want to get to the beginning of the 1419 * first non-empty segment. 1420 */ 1421 cur = path; 1422 while (cur[0] == '/') 1423 ++cur; 1424 if (cur[0] == '\0') 1425 return(0); 1426 1427 /* Keep everything we've seen so far. */ 1428 out = cur; 1429 1430 /* 1431 * Analyze each segment in sequence for cases (c) and (d). 1432 */ 1433 while (cur[0] != '\0') { 1434 /* 1435 * c) All occurrences of "./", where "." is a complete path segment, 1436 * are removed from the buffer string. 1437 */ 1438 if ((cur[0] == '.') && (cur[1] == '/')) { 1439 cur += 2; 1440 /* '//' normalization should be done at this point too */ 1441 while (cur[0] == '/') 1442 cur++; 1443 continue; 1444 } 1445 1446 /* 1447 * d) If the buffer string ends with "." as a complete path segment, 1448 * that "." is removed. 1449 */ 1450 if ((cur[0] == '.') && (cur[1] == '\0')) 1451 break; 1452 1453 /* Otherwise keep the segment. */ 1454 while (cur[0] != '/') { 1455 if (cur[0] == '\0') 1456 goto done_cd; 1457 (out++)[0] = (cur++)[0]; 1458 } 1459 /* nomalize // */ 1460 while ((cur[0] == '/') && (cur[1] == '/')) 1461 cur++; 1462 1463 (out++)[0] = (cur++)[0]; 1464 } 1465 done_cd: 1466 out[0] = '\0'; 1467 1468 /* Reset to the beginning of the first segment for the next sequence. */ 1469 cur = path; 1470 while (cur[0] == '/') 1471 ++cur; 1472 if (cur[0] == '\0') 1473 return(0); 1474 1475 /* 1476 * Analyze each segment in sequence for cases (e) and (f). 1477 * 1478 * e) All occurrences of "<segment>/../", where <segment> is a 1479 * complete path segment not equal to "..", are removed from the 1480 * buffer string. Removal of these path segments is performed 1481 * iteratively, removing the leftmost matching pattern on each 1482 * iteration, until no matching pattern remains. 1483 * 1484 * f) If the buffer string ends with "<segment>/..", where <segment> 1485 * is a complete path segment not equal to "..", that 1486 * "<segment>/.." is removed. 1487 * 1488 * To satisfy the "iterative" clause in (e), we need to collapse the 1489 * string every time we find something that needs to be removed. Thus, 1490 * we don't need to keep two pointers into the string: we only need a 1491 * "current position" pointer. 1492 */ 1493 while (1) { 1494 char *segp, *tmp; 1495 1496 /* At the beginning of each iteration of this loop, "cur" points to 1497 * the first character of the segment we want to examine. 1498 */ 1499 1500 /* Find the end of the current segment. */ 1501 segp = cur; 1502 while ((segp[0] != '/') && (segp[0] != '\0')) 1503 ++segp; 1504 1505 /* If this is the last segment, we're done (we need at least two 1506 * segments to meet the criteria for the (e) and (f) cases). 1507 */ 1508 if (segp[0] == '\0') 1509 break; 1510 1511 /* If the first segment is "..", or if the next segment _isn't_ "..", 1512 * keep this segment and try the next one. 1513 */ 1514 ++segp; 1515 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3)) 1516 || ((segp[0] != '.') || (segp[1] != '.') 1517 || ((segp[2] != '/') && (segp[2] != '\0')))) { 1518 cur = segp; 1519 continue; 1520 } 1521 1522 /* If we get here, remove this segment and the next one and back up 1523 * to the previous segment (if there is one), to implement the 1524 * "iteratively" clause. It's pretty much impossible to back up 1525 * while maintaining two pointers into the buffer, so just compact 1526 * the whole buffer now. 1527 */ 1528 1529 /* If this is the end of the buffer, we're done. */ 1530 if (segp[2] == '\0') { 1531 cur[0] = '\0'; 1532 break; 1533 } 1534 /* Valgrind complained, strcpy(cur, segp + 3); */ 1535 /* string will overlap, do not use strcpy */ 1536 tmp = cur; 1537 segp += 3; 1538 while ((*tmp++ = *segp++) != 0) 1539 ; 1540 1541 /* If there are no previous segments, then keep going from here. */ 1542 segp = cur; 1543 while ((segp > path) && ((--segp)[0] == '/')) 1544 ; 1545 if (segp == path) 1546 continue; 1547 1548 /* "segp" is pointing to the end of a previous segment; find it's 1549 * start. We need to back up to the previous segment and start 1550 * over with that to handle things like "foo/bar/../..". If we 1551 * don't do this, then on the first pass we'll remove the "bar/..", 1552 * but be pointing at the second ".." so we won't realize we can also 1553 * remove the "foo/..". 1554 */ 1555 cur = segp; 1556 while ((cur > path) && (cur[-1] != '/')) 1557 --cur; 1558 } 1559 out[0] = '\0'; 1560 1561 /* 1562 * g) If the resulting buffer string still begins with one or more 1563 * complete path segments of "..", then the reference is 1564 * considered to be in error. Implementations may handle this 1565 * error by retaining these components in the resolved path (i.e., 1566 * treating them as part of the final URI), by removing them from 1567 * the resolved path (i.e., discarding relative levels above the 1568 * root), or by avoiding traversal of the reference. 1569 * 1570 * We discard them from the final path. 1571 */ 1572 if (path[0] == '/') { 1573 cur = path; 1574 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.') 1575 && ((cur[3] == '/') || (cur[3] == '\0'))) 1576 cur += 3; 1577 1578 if (cur != path) { 1579 out = path; 1580 while (cur[0] != '\0') 1581 (out++)[0] = (cur++)[0]; 1582 out[0] = 0; 1583 } 1584 } 1585 1586 return(0); 1587 } 1588 1589 static int is_hex(char c) { 1590 if (((c >= '0') && (c <= '9')) || 1591 ((c >= 'a') && (c <= 'f')) || 1592 ((c >= 'A') && (c <= 'F'))) 1593 return(1); 1594 return(0); 1595 } 1596 1597 /** 1598 * xmlURIUnescapeString: 1599 * @str: the string to unescape 1600 * @len: the length in bytes to unescape (or <= 0 to indicate full string) 1601 * @target: optional destination buffer 1602 * 1603 * Unescaping routine, but does not check that the string is an URI. The 1604 * output is a direct unsigned char translation of %XX values (no encoding) 1605 * Note that the length of the result can only be smaller or same size as 1606 * the input string. 1607 * 1608 * Returns a copy of the string, but unescaped, will return NULL only in case 1609 * of error 1610 */ 1611 char * 1612 xmlURIUnescapeString(const char *str, int len, char *target) { 1613 char *ret, *out; 1614 const char *in; 1615 1616 if (str == NULL) 1617 return(NULL); 1618 if (len <= 0) len = strlen(str); 1619 if (len < 0) return(NULL); 1620 1621 if (target == NULL) { 1622 ret = (char *) xmlMallocAtomic(len + 1); 1623 if (ret == NULL) { 1624 xmlURIErrMemory("unescaping URI value\n"); 1625 return(NULL); 1626 } 1627 } else 1628 ret = target; 1629 in = str; 1630 out = ret; 1631 while(len > 0) { 1632 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) { 1633 in++; 1634 if ((*in >= '0') && (*in <= '9')) 1635 *out = (*in - '0'); 1636 else if ((*in >= 'a') && (*in <= 'f')) 1637 *out = (*in - 'a') + 10; 1638 else if ((*in >= 'A') && (*in <= 'F')) 1639 *out = (*in - 'A') + 10; 1640 in++; 1641 if ((*in >= '0') && (*in <= '9')) 1642 *out = *out * 16 + (*in - '0'); 1643 else if ((*in >= 'a') && (*in <= 'f')) 1644 *out = *out * 16 + (*in - 'a') + 10; 1645 else if ((*in >= 'A') && (*in <= 'F')) 1646 *out = *out * 16 + (*in - 'A') + 10; 1647 in++; 1648 len -= 3; 1649 out++; 1650 } else { 1651 *out++ = *in++; 1652 len--; 1653 } 1654 } 1655 *out = 0; 1656 return(ret); 1657 } 1658 1659 /** 1660 * xmlURIEscapeStr: 1661 * @str: string to escape 1662 * @list: exception list string of chars not to escape 1663 * 1664 * This routine escapes a string to hex, ignoring reserved characters (a-z) 1665 * and the characters in the exception list. 1666 * 1667 * Returns a new escaped string or NULL in case of error. 1668 */ 1669 xmlChar * 1670 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) { 1671 xmlChar *ret, ch; 1672 xmlChar *temp; 1673 const xmlChar *in; 1674 int len, out; 1675 1676 if (str == NULL) 1677 return(NULL); 1678 if (str[0] == 0) 1679 return(xmlStrdup(str)); 1680 len = xmlStrlen(str); 1681 if (!(len > 0)) return(NULL); 1682 1683 len += 20; 1684 ret = (xmlChar *) xmlMallocAtomic(len); 1685 if (ret == NULL) { 1686 xmlURIErrMemory("escaping URI value\n"); 1687 return(NULL); 1688 } 1689 in = (const xmlChar *) str; 1690 out = 0; 1691 while(*in != 0) { 1692 if (len - out <= 3) { 1693 temp = xmlSaveUriRealloc(ret, &len); 1694 if (temp == NULL) { 1695 xmlURIErrMemory("escaping URI value\n"); 1696 xmlFree(ret); 1697 return(NULL); 1698 } 1699 ret = temp; 1700 } 1701 1702 ch = *in; 1703 1704 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) { 1705 unsigned char val; 1706 ret[out++] = '%'; 1707 val = ch >> 4; 1708 if (val <= 9) 1709 ret[out++] = '0' + val; 1710 else 1711 ret[out++] = 'A' + val - 0xA; 1712 val = ch & 0xF; 1713 if (val <= 9) 1714 ret[out++] = '0' + val; 1715 else 1716 ret[out++] = 'A' + val - 0xA; 1717 in++; 1718 } else { 1719 ret[out++] = *in++; 1720 } 1721 1722 } 1723 ret[out] = 0; 1724 return(ret); 1725 } 1726 1727 /** 1728 * xmlURIEscape: 1729 * @str: the string of the URI to escape 1730 * 1731 * Escaping routine, does not do validity checks ! 1732 * It will try to escape the chars needing this, but this is heuristic 1733 * based it's impossible to be sure. 1734 * 1735 * Returns an copy of the string, but escaped 1736 * 1737 * 25 May 2001 1738 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly 1739 * according to RFC2396. 1740 * - Carl Douglas 1741 */ 1742 xmlChar * 1743 xmlURIEscape(const xmlChar * str) 1744 { 1745 xmlChar *ret, *segment = NULL; 1746 xmlURIPtr uri; 1747 int ret2; 1748 1749 #define NULLCHK(p) if(!p) { \ 1750 xmlURIErrMemory("escaping URI value\n"); \ 1751 xmlFreeURI(uri); \ 1752 return NULL; } \ 1753 1754 if (str == NULL) 1755 return (NULL); 1756 1757 uri = xmlCreateURI(); 1758 if (uri != NULL) { 1759 /* 1760 * Allow escaping errors in the unescaped form 1761 */ 1762 uri->cleanup = 1; 1763 ret2 = xmlParseURIReference(uri, (const char *)str); 1764 if (ret2) { 1765 xmlFreeURI(uri); 1766 return (NULL); 1767 } 1768 } 1769 1770 if (!uri) 1771 return NULL; 1772 1773 ret = NULL; 1774 1775 if (uri->scheme) { 1776 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-."); 1777 NULLCHK(segment) 1778 ret = xmlStrcat(ret, segment); 1779 ret = xmlStrcat(ret, BAD_CAST ":"); 1780 xmlFree(segment); 1781 } 1782 1783 if (uri->authority) { 1784 segment = 1785 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@"); 1786 NULLCHK(segment) 1787 ret = xmlStrcat(ret, BAD_CAST "//"); 1788 ret = xmlStrcat(ret, segment); 1789 xmlFree(segment); 1790 } 1791 1792 if (uri->user) { 1793 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,"); 1794 NULLCHK(segment) 1795 ret = xmlStrcat(ret,BAD_CAST "//"); 1796 ret = xmlStrcat(ret, segment); 1797 ret = xmlStrcat(ret, BAD_CAST "@"); 1798 xmlFree(segment); 1799 } 1800 1801 if (uri->server) { 1802 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@"); 1803 NULLCHK(segment) 1804 if (uri->user == NULL) 1805 ret = xmlStrcat(ret, BAD_CAST "//"); 1806 ret = xmlStrcat(ret, segment); 1807 xmlFree(segment); 1808 } 1809 1810 if (uri->port) { 1811 xmlChar port[10]; 1812 1813 snprintf((char *) port, 10, "%d", uri->port); 1814 ret = xmlStrcat(ret, BAD_CAST ":"); 1815 ret = xmlStrcat(ret, port); 1816 } 1817 1818 if (uri->path) { 1819 segment = 1820 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;"); 1821 NULLCHK(segment) 1822 ret = xmlStrcat(ret, segment); 1823 xmlFree(segment); 1824 } 1825 1826 if (uri->query_raw) { 1827 ret = xmlStrcat(ret, BAD_CAST "?"); 1828 ret = xmlStrcat(ret, BAD_CAST uri->query_raw); 1829 } 1830 else if (uri->query) { 1831 segment = 1832 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$"); 1833 NULLCHK(segment) 1834 ret = xmlStrcat(ret, BAD_CAST "?"); 1835 ret = xmlStrcat(ret, segment); 1836 xmlFree(segment); 1837 } 1838 1839 if (uri->opaque) { 1840 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST ""); 1841 NULLCHK(segment) 1842 ret = xmlStrcat(ret, segment); 1843 xmlFree(segment); 1844 } 1845 1846 if (uri->fragment) { 1847 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#"); 1848 NULLCHK(segment) 1849 ret = xmlStrcat(ret, BAD_CAST "#"); 1850 ret = xmlStrcat(ret, segment); 1851 xmlFree(segment); 1852 } 1853 1854 xmlFreeURI(uri); 1855 #undef NULLCHK 1856 1857 return (ret); 1858 } 1859 1860 /************************************************************************ 1861 * * 1862 * Public functions * 1863 * * 1864 ************************************************************************/ 1865 1866 /** 1867 * xmlBuildURI: 1868 * @URI: the URI instance found in the document 1869 * @base: the base value 1870 * 1871 * Computes he final URI of the reference done by checking that 1872 * the given URI is valid, and building the final URI using the 1873 * base URI. This is processed according to section 5.2 of the 1874 * RFC 2396 1875 * 1876 * 5.2. Resolving Relative References to Absolute Form 1877 * 1878 * Returns a new URI string (to be freed by the caller) or NULL in case 1879 * of error. 1880 */ 1881 xmlChar * 1882 xmlBuildURI(const xmlChar *URI, const xmlChar *base) { 1883 xmlChar *val = NULL; 1884 int ret, len, indx, cur, out; 1885 xmlURIPtr ref = NULL; 1886 xmlURIPtr bas = NULL; 1887 xmlURIPtr res = NULL; 1888 1889 /* 1890 * 1) The URI reference is parsed into the potential four components and 1891 * fragment identifier, as described in Section 4.3. 1892 * 1893 * NOTE that a completely empty URI is treated by modern browsers 1894 * as a reference to "." rather than as a synonym for the current 1895 * URI. Should we do that here? 1896 */ 1897 if (URI == NULL) 1898 ret = -1; 1899 else { 1900 if (*URI) { 1901 ref = xmlCreateURI(); 1902 if (ref == NULL) 1903 goto done; 1904 ret = xmlParseURIReference(ref, (const char *) URI); 1905 } 1906 else 1907 ret = 0; 1908 } 1909 if (ret != 0) 1910 goto done; 1911 if ((ref != NULL) && (ref->scheme != NULL)) { 1912 /* 1913 * The URI is absolute don't modify. 1914 */ 1915 val = xmlStrdup(URI); 1916 goto done; 1917 } 1918 if (base == NULL) 1919 ret = -1; 1920 else { 1921 bas = xmlCreateURI(); 1922 if (bas == NULL) 1923 goto done; 1924 ret = xmlParseURIReference(bas, (const char *) base); 1925 } 1926 if (ret != 0) { 1927 if (ref) 1928 val = xmlSaveUri(ref); 1929 goto done; 1930 } 1931 if (ref == NULL) { 1932 /* 1933 * the base fragment must be ignored 1934 */ 1935 if (bas->fragment != NULL) { 1936 xmlFree(bas->fragment); 1937 bas->fragment = NULL; 1938 } 1939 val = xmlSaveUri(bas); 1940 goto done; 1941 } 1942 1943 /* 1944 * 2) If the path component is empty and the scheme, authority, and 1945 * query components are undefined, then it is a reference to the 1946 * current document and we are done. Otherwise, the reference URI's 1947 * query and fragment components are defined as found (or not found) 1948 * within the URI reference and not inherited from the base URI. 1949 * 1950 * NOTE that in modern browsers, the parsing differs from the above 1951 * in the following aspect: the query component is allowed to be 1952 * defined while still treating this as a reference to the current 1953 * document. 1954 */ 1955 res = xmlCreateURI(); 1956 if (res == NULL) 1957 goto done; 1958 if ((ref->scheme == NULL) && (ref->path == NULL) && 1959 ((ref->authority == NULL) && (ref->server == NULL))) { 1960 if (bas->scheme != NULL) 1961 res->scheme = xmlMemStrdup(bas->scheme); 1962 if (bas->authority != NULL) 1963 res->authority = xmlMemStrdup(bas->authority); 1964 else if ((bas->server != NULL) || (bas->port == -1)) { 1965 if (bas->server != NULL) 1966 res->server = xmlMemStrdup(bas->server); 1967 if (bas->user != NULL) 1968 res->user = xmlMemStrdup(bas->user); 1969 res->port = bas->port; 1970 } 1971 if (bas->path != NULL) 1972 res->path = xmlMemStrdup(bas->path); 1973 if (ref->query_raw != NULL) 1974 res->query_raw = xmlMemStrdup (ref->query_raw); 1975 else if (ref->query != NULL) 1976 res->query = xmlMemStrdup(ref->query); 1977 else if (bas->query_raw != NULL) 1978 res->query_raw = xmlMemStrdup(bas->query_raw); 1979 else if (bas->query != NULL) 1980 res->query = xmlMemStrdup(bas->query); 1981 if (ref->fragment != NULL) 1982 res->fragment = xmlMemStrdup(ref->fragment); 1983 goto step_7; 1984 } 1985 1986 /* 1987 * 3) If the scheme component is defined, indicating that the reference 1988 * starts with a scheme name, then the reference is interpreted as an 1989 * absolute URI and we are done. Otherwise, the reference URI's 1990 * scheme is inherited from the base URI's scheme component. 1991 */ 1992 if (ref->scheme != NULL) { 1993 val = xmlSaveUri(ref); 1994 goto done; 1995 } 1996 if (bas->scheme != NULL) 1997 res->scheme = xmlMemStrdup(bas->scheme); 1998 1999 if (ref->query_raw != NULL) 2000 res->query_raw = xmlMemStrdup(ref->query_raw); 2001 else if (ref->query != NULL) 2002 res->query = xmlMemStrdup(ref->query); 2003 if (ref->fragment != NULL) 2004 res->fragment = xmlMemStrdup(ref->fragment); 2005 2006 /* 2007 * 4) If the authority component is defined, then the reference is a 2008 * network-path and we skip to step 7. Otherwise, the reference 2009 * URI's authority is inherited from the base URI's authority 2010 * component, which will also be undefined if the URI scheme does not 2011 * use an authority component. 2012 */ 2013 if ((ref->authority != NULL) || (ref->server != NULL)) { 2014 if (ref->authority != NULL) 2015 res->authority = xmlMemStrdup(ref->authority); 2016 else { 2017 res->server = xmlMemStrdup(ref->server); 2018 if (ref->user != NULL) 2019 res->user = xmlMemStrdup(ref->user); 2020 res->port = ref->port; 2021 } 2022 if (ref->path != NULL) 2023 res->path = xmlMemStrdup(ref->path); 2024 goto step_7; 2025 } 2026 if (bas->authority != NULL) 2027 res->authority = xmlMemStrdup(bas->authority); 2028 else if ((bas->server != NULL) || (bas->port == -1)) { 2029 if (bas->server != NULL) 2030 res->server = xmlMemStrdup(bas->server); 2031 if (bas->user != NULL) 2032 res->user = xmlMemStrdup(bas->user); 2033 res->port = bas->port; 2034 } 2035 2036 /* 2037 * 5) If the path component begins with a slash character ("/"), then 2038 * the reference is an absolute-path and we skip to step 7. 2039 */ 2040 if ((ref->path != NULL) && (ref->path[0] == '/')) { 2041 res->path = xmlMemStrdup(ref->path); 2042 goto step_7; 2043 } 2044 2045 2046 /* 2047 * 6) If this step is reached, then we are resolving a relative-path 2048 * reference. The relative path needs to be merged with the base 2049 * URI's path. Although there are many ways to do this, we will 2050 * describe a simple method using a separate string buffer. 2051 * 2052 * Allocate a buffer large enough for the result string. 2053 */ 2054 len = 2; /* extra / and 0 */ 2055 if (ref->path != NULL) 2056 len += strlen(ref->path); 2057 if (bas->path != NULL) 2058 len += strlen(bas->path); 2059 res->path = (char *) xmlMallocAtomic(len); 2060 if (res->path == NULL) { 2061 xmlURIErrMemory("resolving URI against base\n"); 2062 goto done; 2063 } 2064 res->path[0] = 0; 2065 2066 /* 2067 * a) All but the last segment of the base URI's path component is 2068 * copied to the buffer. In other words, any characters after the 2069 * last (right-most) slash character, if any, are excluded. 2070 */ 2071 cur = 0; 2072 out = 0; 2073 if (bas->path != NULL) { 2074 while (bas->path[cur] != 0) { 2075 while ((bas->path[cur] != 0) && (bas->path[cur] != '/')) 2076 cur++; 2077 if (bas->path[cur] == 0) 2078 break; 2079 2080 cur++; 2081 while (out < cur) { 2082 res->path[out] = bas->path[out]; 2083 out++; 2084 } 2085 } 2086 } 2087 res->path[out] = 0; 2088 2089 /* 2090 * b) The reference's path component is appended to the buffer 2091 * string. 2092 */ 2093 if (ref->path != NULL && ref->path[0] != 0) { 2094 indx = 0; 2095 /* 2096 * Ensure the path includes a '/' 2097 */ 2098 if ((out == 0) && (bas->server != NULL)) 2099 res->path[out++] = '/'; 2100 while (ref->path[indx] != 0) { 2101 res->path[out++] = ref->path[indx++]; 2102 } 2103 } 2104 res->path[out] = 0; 2105 2106 /* 2107 * Steps c) to h) are really path normalization steps 2108 */ 2109 xmlNormalizeURIPath(res->path); 2110 2111 step_7: 2112 2113 /* 2114 * 7) The resulting URI components, including any inherited from the 2115 * base URI, are recombined to give the absolute form of the URI 2116 * reference. 2117 */ 2118 val = xmlSaveUri(res); 2119 2120 done: 2121 if (ref != NULL) 2122 xmlFreeURI(ref); 2123 if (bas != NULL) 2124 xmlFreeURI(bas); 2125 if (res != NULL) 2126 xmlFreeURI(res); 2127 return(val); 2128 } 2129 2130 /** 2131 * xmlBuildRelativeURI: 2132 * @URI: the URI reference under consideration 2133 * @base: the base value 2134 * 2135 * Expresses the URI of the reference in terms relative to the 2136 * base. Some examples of this operation include: 2137 * base = "http://site1.com/docs/book1.html" 2138 * URI input URI returned 2139 * docs/pic1.gif pic1.gif 2140 * docs/img/pic1.gif img/pic1.gif 2141 * img/pic1.gif ../img/pic1.gif 2142 * http://site1.com/docs/pic1.gif pic1.gif 2143 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif 2144 * 2145 * base = "docs/book1.html" 2146 * URI input URI returned 2147 * docs/pic1.gif pic1.gif 2148 * docs/img/pic1.gif img/pic1.gif 2149 * img/pic1.gif ../img/pic1.gif 2150 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif 2151 * 2152 * 2153 * Note: if the URI reference is really wierd or complicated, it may be 2154 * worthwhile to first convert it into a "nice" one by calling 2155 * xmlBuildURI (using 'base') before calling this routine, 2156 * since this routine (for reasonable efficiency) assumes URI has 2157 * already been through some validation. 2158 * 2159 * Returns a new URI string (to be freed by the caller) or NULL in case 2160 * error. 2161 */ 2162 xmlChar * 2163 xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base) 2164 { 2165 xmlChar *val = NULL; 2166 int ret; 2167 int ix; 2168 int nbslash = 0; 2169 int len; 2170 xmlURIPtr ref = NULL; 2171 xmlURIPtr bas = NULL; 2172 xmlChar *bptr, *uptr, *vptr; 2173 int remove_path = 0; 2174 2175 if ((URI == NULL) || (*URI == 0)) 2176 return NULL; 2177 2178 /* 2179 * First parse URI into a standard form 2180 */ 2181 ref = xmlCreateURI (); 2182 if (ref == NULL) 2183 return NULL; 2184 /* If URI not already in "relative" form */ 2185 if (URI[0] != '.') { 2186 ret = xmlParseURIReference (ref, (const char *) URI); 2187 if (ret != 0) 2188 goto done; /* Error in URI, return NULL */ 2189 } else 2190 ref->path = (char *)xmlStrdup(URI); 2191 2192 /* 2193 * Next parse base into the same standard form 2194 */ 2195 if ((base == NULL) || (*base == 0)) { 2196 val = xmlStrdup (URI); 2197 goto done; 2198 } 2199 bas = xmlCreateURI (); 2200 if (bas == NULL) 2201 goto done; 2202 if (base[0] != '.') { 2203 ret = xmlParseURIReference (bas, (const char *) base); 2204 if (ret != 0) 2205 goto done; /* Error in base, return NULL */ 2206 } else 2207 bas->path = (char *)xmlStrdup(base); 2208 2209 /* 2210 * If the scheme / server on the URI differs from the base, 2211 * just return the URI 2212 */ 2213 if ((ref->scheme != NULL) && 2214 ((bas->scheme == NULL) || 2215 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) || 2216 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) { 2217 val = xmlStrdup (URI); 2218 goto done; 2219 } 2220 if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) { 2221 val = xmlStrdup(BAD_CAST ""); 2222 goto done; 2223 } 2224 if (bas->path == NULL) { 2225 val = xmlStrdup((xmlChar *)ref->path); 2226 goto done; 2227 } 2228 if (ref->path == NULL) { 2229 ref->path = (char *) "/"; 2230 remove_path = 1; 2231 } 2232 2233 /* 2234 * At this point (at last!) we can compare the two paths 2235 * 2236 * First we take care of the special case where either of the 2237 * two path components may be missing (bug 316224) 2238 */ 2239 if (bas->path == NULL) { 2240 if (ref->path != NULL) { 2241 uptr = (xmlChar *) ref->path; 2242 if (*uptr == '/') 2243 uptr++; 2244 /* exception characters from xmlSaveUri */ 2245 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,"); 2246 } 2247 goto done; 2248 } 2249 bptr = (xmlChar *)bas->path; 2250 if (ref->path == NULL) { 2251 for (ix = 0; bptr[ix] != 0; ix++) { 2252 if (bptr[ix] == '/') 2253 nbslash++; 2254 } 2255 uptr = NULL; 2256 len = 1; /* this is for a string terminator only */ 2257 } else { 2258 xmlChar *rptr = (xmlChar *) ref->path; 2259 int pos = 0; 2260 2261 /* 2262 * Next we compare the two strings and find where they first differ 2263 */ 2264 if ((*rptr == '.') && (rptr[1] == '/')) 2265 rptr += 2; 2266 if ((*bptr == '.') && (bptr[1] == '/')) 2267 bptr += 2; 2268 else if ((*bptr == '/') && (*rptr != '/')) 2269 bptr++; 2270 while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0)) 2271 pos++; 2272 2273 if (bptr[pos] == rptr[pos]) { 2274 val = xmlStrdup(BAD_CAST ""); 2275 goto done; /* (I can't imagine why anyone would do this) */ 2276 } 2277 2278 /* 2279 * In URI, "back up" to the last '/' encountered. This will be the 2280 * beginning of the "unique" suffix of URI 2281 */ 2282 ix = pos; 2283 if ((rptr[ix] == '/') && (ix > 0)) 2284 ix--; 2285 else if ((rptr[ix] == 0) && (ix > 1) && (rptr[ix - 1] == '/')) 2286 ix -= 2; 2287 for (; ix > 0; ix--) { 2288 if (rptr[ix] == '/') 2289 break; 2290 } 2291 if (ix == 0) { 2292 uptr = (xmlChar *)rptr; 2293 } else { 2294 ix++; 2295 uptr = (xmlChar *)&rptr[ix]; 2296 } 2297 2298 /* 2299 * In base, count the number of '/' from the differing point 2300 */ 2301 if (bptr[pos] != rptr[pos]) {/* check for trivial URI == base */ 2302 for (; bptr[ix] != 0; ix++) { 2303 if (bptr[ix] == '/') 2304 nbslash++; 2305 } 2306 } 2307 len = xmlStrlen (uptr) + 1; 2308 } 2309 2310 if (nbslash == 0) { 2311 if (uptr != NULL) 2312 /* exception characters from xmlSaveUri */ 2313 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,"); 2314 goto done; 2315 } 2316 2317 /* 2318 * Allocate just enough space for the returned string - 2319 * length of the remainder of the URI, plus enough space 2320 * for the "../" groups, plus one for the terminator 2321 */ 2322 val = (xmlChar *) xmlMalloc (len + 3 * nbslash); 2323 if (val == NULL) { 2324 xmlURIErrMemory("building relative URI\n"); 2325 goto done; 2326 } 2327 vptr = val; 2328 /* 2329 * Put in as many "../" as needed 2330 */ 2331 for (; nbslash>0; nbslash--) { 2332 *vptr++ = '.'; 2333 *vptr++ = '.'; 2334 *vptr++ = '/'; 2335 } 2336 /* 2337 * Finish up with the end of the URI 2338 */ 2339 if (uptr != NULL) { 2340 if ((vptr > val) && (len > 0) && 2341 (uptr[0] == '/') && (vptr[-1] == '/')) { 2342 memcpy (vptr, uptr + 1, len - 1); 2343 vptr[len - 2] = 0; 2344 } else { 2345 memcpy (vptr, uptr, len); 2346 vptr[len - 1] = 0; 2347 } 2348 } else { 2349 vptr[len - 1] = 0; 2350 } 2351 2352 /* escape the freshly-built path */ 2353 vptr = val; 2354 /* exception characters from xmlSaveUri */ 2355 val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,"); 2356 xmlFree(vptr); 2357 2358 done: 2359 /* 2360 * Free the working variables 2361 */ 2362 if (remove_path != 0) 2363 ref->path = NULL; 2364 if (ref != NULL) 2365 xmlFreeURI (ref); 2366 if (bas != NULL) 2367 xmlFreeURI (bas); 2368 2369 return val; 2370 } 2371 2372 /** 2373 * xmlCanonicPath: 2374 * @path: the resource locator in a filesystem notation 2375 * 2376 * Constructs a canonic path from the specified path. 2377 * 2378 * Returns a new canonic path, or a duplicate of the path parameter if the 2379 * construction fails. The caller is responsible for freeing the memory occupied 2380 * by the returned string. If there is insufficient memory available, or the 2381 * argument is NULL, the function returns NULL. 2382 */ 2383 #define IS_WINDOWS_PATH(p) \ 2384 ((p != NULL) && \ 2385 (((p[0] >= 'a') && (p[0] <= 'z')) || \ 2386 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \ 2387 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\'))) 2388 xmlChar * 2389 xmlCanonicPath(const xmlChar *path) 2390 { 2391 /* 2392 * For Windows implementations, additional work needs to be done to 2393 * replace backslashes in pathnames with "forward slashes" 2394 */ 2395 #if defined(_WIN32) && !defined(__CYGWIN__) 2396 int len = 0; 2397 char *p = NULL; 2398 #endif 2399 xmlURIPtr uri; 2400 xmlChar *ret; 2401 const xmlChar *absuri; 2402 2403 if (path == NULL) 2404 return(NULL); 2405 2406 #if defined(_WIN32) 2407 /* 2408 * We must not change the backslashes to slashes if the the path 2409 * starts with \\?\ 2410 * Those paths can be up to 32k characters long. 2411 * Was added specifically for OpenOffice, those paths can't be converted 2412 * to URIs anyway. 2413 */ 2414 if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') && 2415 (path[3] == '\\') ) 2416 return xmlStrdup((const xmlChar *) path); 2417 #endif 2418 2419 /* sanitize filename starting with // so it can be used as URI */ 2420 if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/')) 2421 path++; 2422 2423 if ((uri = xmlParseURI((const char *) path)) != NULL) { 2424 xmlFreeURI(uri); 2425 return xmlStrdup(path); 2426 } 2427 2428 /* Check if this is an "absolute uri" */ 2429 absuri = xmlStrstr(path, BAD_CAST "://"); 2430 if (absuri != NULL) { 2431 int l, j; 2432 unsigned char c; 2433 xmlChar *escURI; 2434 2435 /* 2436 * this looks like an URI where some parts have not been 2437 * escaped leading to a parsing problem. Check that the first 2438 * part matches a protocol. 2439 */ 2440 l = absuri - path; 2441 /* Bypass if first part (part before the '://') is > 20 chars */ 2442 if ((l <= 0) || (l > 20)) 2443 goto path_processing; 2444 /* Bypass if any non-alpha characters are present in first part */ 2445 for (j = 0;j < l;j++) { 2446 c = path[j]; 2447 if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')))) 2448 goto path_processing; 2449 } 2450 2451 /* Escape all except the characters specified in the supplied path */ 2452 escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;="); 2453 if (escURI != NULL) { 2454 /* Try parsing the escaped path */ 2455 uri = xmlParseURI((const char *) escURI); 2456 /* If successful, return the escaped string */ 2457 if (uri != NULL) { 2458 xmlFreeURI(uri); 2459 return escURI; 2460 } 2461 xmlFree(escURI); 2462 } 2463 } 2464 2465 path_processing: 2466 /* For Windows implementations, replace backslashes with 'forward slashes' */ 2467 #if defined(_WIN32) && !defined(__CYGWIN__) 2468 /* 2469 * Create a URI structure 2470 */ 2471 uri = xmlCreateURI(); 2472 if (uri == NULL) { /* Guard against 'out of memory' */ 2473 return(NULL); 2474 } 2475 2476 len = xmlStrlen(path); 2477 if ((len > 2) && IS_WINDOWS_PATH(path)) { 2478 /* make the scheme 'file' */ 2479 uri->scheme = (char *) xmlStrdup(BAD_CAST "file"); 2480 /* allocate space for leading '/' + path + string terminator */ 2481 uri->path = xmlMallocAtomic(len + 2); 2482 if (uri->path == NULL) { 2483 xmlFreeURI(uri); /* Guard agains 'out of memory' */ 2484 return(NULL); 2485 } 2486 /* Put in leading '/' plus path */ 2487 uri->path[0] = '/'; 2488 p = uri->path + 1; 2489 strncpy(p, (char *) path, len + 1); 2490 } else { 2491 uri->path = (char *) xmlStrdup(path); 2492 if (uri->path == NULL) { 2493 xmlFreeURI(uri); 2494 return(NULL); 2495 } 2496 p = uri->path; 2497 } 2498 /* Now change all occurences of '\' to '/' */ 2499 while (*p != '\0') { 2500 if (*p == '\\') 2501 *p = '/'; 2502 p++; 2503 } 2504 2505 if (uri->scheme == NULL) { 2506 ret = xmlStrdup((const xmlChar *) uri->path); 2507 } else { 2508 ret = xmlSaveUri(uri); 2509 } 2510 2511 xmlFreeURI(uri); 2512 #else 2513 ret = xmlStrdup((const xmlChar *) path); 2514 #endif 2515 return(ret); 2516 } 2517 2518 /** 2519 * xmlPathToURI: 2520 * @path: the resource locator in a filesystem notation 2521 * 2522 * Constructs an URI expressing the existing path 2523 * 2524 * Returns a new URI, or a duplicate of the path parameter if the 2525 * construction fails. The caller is responsible for freeing the memory 2526 * occupied by the returned string. If there is insufficient memory available, 2527 * or the argument is NULL, the function returns NULL. 2528 */ 2529 xmlChar * 2530 xmlPathToURI(const xmlChar *path) 2531 { 2532 xmlURIPtr uri; 2533 xmlURI temp; 2534 xmlChar *ret, *cal; 2535 2536 if (path == NULL) 2537 return(NULL); 2538 2539 if ((uri = xmlParseURI((const char *) path)) != NULL) { 2540 xmlFreeURI(uri); 2541 return xmlStrdup(path); 2542 } 2543 cal = xmlCanonicPath(path); 2544 if (cal == NULL) 2545 return(NULL); 2546 #if defined(_WIN32) && !defined(__CYGWIN__) 2547 /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?) 2548 If 'cal' is a valid URI allready then we are done here, as continuing would make 2549 it invalid. */ 2550 if ((uri = xmlParseURI((const char *) cal)) != NULL) { 2551 xmlFreeURI(uri); 2552 return cal; 2553 } 2554 /* 'cal' can contain a relative path with backslashes. If that is processed 2555 by xmlSaveURI, they will be escaped and the external entity loader machinery 2556 will fail. So convert them to slashes. Misuse 'ret' for walking. */ 2557 ret = cal; 2558 while (*ret != '\0') { 2559 if (*ret == '\\') 2560 *ret = '/'; 2561 ret++; 2562 } 2563 #endif 2564 memset(&temp, 0, sizeof(temp)); 2565 temp.path = (char *) cal; 2566 ret = xmlSaveUri(&temp); 2567 xmlFree(cal); 2568 return(ret); 2569 } 2570 #define bottom_uri 2571 #include "elfgcchack.h" 2572