1 /** 2 * uri.c: set of generic URI related routines 3 * 4 * Reference: RFCs 3986, 2732 and 2373 5 * 6 * See Copyright for the status of this software. 7 * 8 * daniel@veillard.com 9 */ 10 11 #define IN_LIBXML 12 #include "libxml.h" 13 14 #include <limits.h> 15 #include <string.h> 16 17 #include <libxml/xmlmemory.h> 18 #include <libxml/uri.h> 19 #include <libxml/globals.h> 20 #include <libxml/xmlerror.h> 21 22 /** 23 * MAX_URI_LENGTH: 24 * 25 * The definition of the URI regexp in the above RFC has no size limit 26 * In practice they are usually relatively short except for the 27 * data URI scheme as defined in RFC 2397. Even for data URI the usual 28 * maximum size before hitting random practical limits is around 64 KB 29 * and 4KB is usually a maximum admitted limit for proper operations. 30 * The value below is more a security limit than anything else and 31 * really should never be hit by 'normal' operations 32 * Set to 1 MByte in 2012, this is only enforced on output 33 */ 34 #define MAX_URI_LENGTH 1024 * 1024 35 36 static void 37 xmlURIErrMemory(const char *extra) 38 { 39 if (extra) 40 __xmlRaiseError(NULL, NULL, NULL, 41 NULL, NULL, XML_FROM_URI, 42 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, 43 extra, NULL, NULL, 0, 0, 44 "Memory allocation failed : %s\n", extra); 45 else 46 __xmlRaiseError(NULL, NULL, NULL, 47 NULL, NULL, XML_FROM_URI, 48 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, 49 NULL, NULL, NULL, 0, 0, 50 "Memory allocation failed\n"); 51 } 52 53 static void xmlCleanURI(xmlURIPtr uri); 54 55 /* 56 * Old rule from 2396 used in legacy handling code 57 * alpha = lowalpha | upalpha 58 */ 59 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x)) 60 61 62 /* 63 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | 64 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | 65 * "u" | "v" | "w" | "x" | "y" | "z" 66 */ 67 68 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z')) 69 70 /* 71 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | 72 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | 73 * "U" | "V" | "W" | "X" | "Y" | "Z" 74 */ 75 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z')) 76 77 #ifdef IS_DIGIT 78 #undef IS_DIGIT 79 #endif 80 /* 81 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" 82 */ 83 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9')) 84 85 /* 86 * alphanum = alpha | digit 87 */ 88 89 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x)) 90 91 /* 92 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" 93 */ 94 95 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \ 96 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \ 97 ((x) == '(') || ((x) == ')')) 98 99 /* 100 * unwise = "{" | "}" | "|" | "\" | "^" | "`" 101 */ 102 103 #define IS_UNWISE(p) \ 104 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \ 105 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \ 106 ((*(p) == ']')) || ((*(p) == '`'))) 107 /* 108 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," | 109 * "[" | "]" 110 */ 111 112 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \ 113 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \ 114 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \ 115 ((x) == ']')) 116 117 /* 118 * unreserved = alphanum | mark 119 */ 120 121 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x)) 122 123 /* 124 * Skip to next pointer char, handle escaped sequences 125 */ 126 127 #define NEXT(p) ((*p == '%')? p += 3 : p++) 128 129 /* 130 * Productions from the spec. 131 * 132 * authority = server | reg_name 133 * reg_name = 1*( unreserved | escaped | "$" | "," | 134 * ";" | ":" | "@" | "&" | "=" | "+" ) 135 * 136 * path = [ abs_path | opaque_part ] 137 */ 138 139 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n)) 140 141 /************************************************************************ 142 * * 143 * RFC 3986 parser * 144 * * 145 ************************************************************************/ 146 147 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9')) 148 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \ 149 ((*(p) >= 'A') && (*(p) <= 'Z'))) 150 #define ISA_HEXDIG(p) \ 151 (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \ 152 ((*(p) >= 'A') && (*(p) <= 'F'))) 153 154 /* 155 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 156 * / "*" / "+" / "," / ";" / "=" 157 */ 158 #define ISA_SUB_DELIM(p) \ 159 (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \ 160 ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \ 161 ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \ 162 ((*(p) == '=')) || ((*(p) == '\''))) 163 164 /* 165 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" 166 */ 167 #define ISA_GEN_DELIM(p) \ 168 (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \ 169 ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \ 170 ((*(p) == '@'))) 171 172 /* 173 * reserved = gen-delims / sub-delims 174 */ 175 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p))) 176 177 /* 178 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" 179 */ 180 #define ISA_UNRESERVED(p) \ 181 ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \ 182 ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~'))) 183 184 /* 185 * pct-encoded = "%" HEXDIG HEXDIG 186 */ 187 #define ISA_PCT_ENCODED(p) \ 188 ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2))) 189 190 /* 191 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" 192 */ 193 #define ISA_PCHAR(p) \ 194 (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \ 195 ((*(p) == ':')) || ((*(p) == '@'))) 196 197 /** 198 * xmlParse3986Scheme: 199 * @uri: pointer to an URI structure 200 * @str: pointer to the string to analyze 201 * 202 * Parse an URI scheme 203 * 204 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) 205 * 206 * Returns 0 or the error code 207 */ 208 static int 209 xmlParse3986Scheme(xmlURIPtr uri, const char **str) { 210 const char *cur; 211 212 if (str == NULL) 213 return(-1); 214 215 cur = *str; 216 if (!ISA_ALPHA(cur)) 217 return(2); 218 cur++; 219 while (ISA_ALPHA(cur) || ISA_DIGIT(cur) || 220 (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++; 221 if (uri != NULL) { 222 if (uri->scheme != NULL) xmlFree(uri->scheme); 223 uri->scheme = STRNDUP(*str, cur - *str); 224 } 225 *str = cur; 226 return(0); 227 } 228 229 /** 230 * xmlParse3986Fragment: 231 * @uri: pointer to an URI structure 232 * @str: pointer to the string to analyze 233 * 234 * Parse the query part of an URI 235 * 236 * fragment = *( pchar / "/" / "?" ) 237 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']' 238 * in the fragment identifier but this is used very broadly for 239 * xpointer scheme selection, so we are allowing it here to not break 240 * for example all the DocBook processing chains. 241 * 242 * Returns 0 or the error code 243 */ 244 static int 245 xmlParse3986Fragment(xmlURIPtr uri, const char **str) 246 { 247 const char *cur; 248 249 if (str == NULL) 250 return (-1); 251 252 cur = *str; 253 254 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') || 255 (*cur == '[') || (*cur == ']') || 256 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) 257 NEXT(cur); 258 if (uri != NULL) { 259 if (uri->fragment != NULL) 260 xmlFree(uri->fragment); 261 if (uri->cleanup & 2) 262 uri->fragment = STRNDUP(*str, cur - *str); 263 else 264 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL); 265 } 266 *str = cur; 267 return (0); 268 } 269 270 /** 271 * xmlParse3986Query: 272 * @uri: pointer to an URI structure 273 * @str: pointer to the string to analyze 274 * 275 * Parse the query part of an URI 276 * 277 * query = *uric 278 * 279 * Returns 0 or the error code 280 */ 281 static int 282 xmlParse3986Query(xmlURIPtr uri, const char **str) 283 { 284 const char *cur; 285 286 if (str == NULL) 287 return (-1); 288 289 cur = *str; 290 291 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') || 292 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) 293 NEXT(cur); 294 if (uri != NULL) { 295 if (uri->query != NULL) 296 xmlFree(uri->query); 297 if (uri->cleanup & 2) 298 uri->query = STRNDUP(*str, cur - *str); 299 else 300 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL); 301 302 /* Save the raw bytes of the query as well. 303 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114 304 */ 305 if (uri->query_raw != NULL) 306 xmlFree (uri->query_raw); 307 uri->query_raw = STRNDUP (*str, cur - *str); 308 } 309 *str = cur; 310 return (0); 311 } 312 313 /** 314 * xmlParse3986Port: 315 * @uri: pointer to an URI structure 316 * @str: the string to analyze 317 * 318 * Parse a port part and fills in the appropriate fields 319 * of the @uri structure 320 * 321 * port = *DIGIT 322 * 323 * Returns 0 or the error code 324 */ 325 static int 326 xmlParse3986Port(xmlURIPtr uri, const char **str) 327 { 328 const char *cur = *str; 329 int port = 0; 330 331 if (ISA_DIGIT(cur)) { 332 while (ISA_DIGIT(cur)) { 333 int digit = *cur - '0'; 334 335 if (port > INT_MAX / 10) 336 return(1); 337 port *= 10; 338 if (port > INT_MAX - digit) 339 return(1); 340 port += digit; 341 342 cur++; 343 } 344 if (uri != NULL) 345 uri->port = port; 346 *str = cur; 347 return(0); 348 } 349 return(1); 350 } 351 352 /** 353 * xmlParse3986Userinfo: 354 * @uri: pointer to an URI structure 355 * @str: the string to analyze 356 * 357 * Parse an user information part and fills in the appropriate fields 358 * of the @uri structure 359 * 360 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) 361 * 362 * Returns 0 or the error code 363 */ 364 static int 365 xmlParse3986Userinfo(xmlURIPtr uri, const char **str) 366 { 367 const char *cur; 368 369 cur = *str; 370 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || 371 ISA_SUB_DELIM(cur) || (*cur == ':')) 372 NEXT(cur); 373 if (*cur == '@') { 374 if (uri != NULL) { 375 if (uri->user != NULL) xmlFree(uri->user); 376 if (uri->cleanup & 2) 377 uri->user = STRNDUP(*str, cur - *str); 378 else 379 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL); 380 } 381 *str = cur; 382 return(0); 383 } 384 return(1); 385 } 386 387 /** 388 * xmlParse3986DecOctet: 389 * @str: the string to analyze 390 * 391 * dec-octet = DIGIT ; 0-9 392 * / %x31-39 DIGIT ; 10-99 393 * / "1" 2DIGIT ; 100-199 394 * / "2" %x30-34 DIGIT ; 200-249 395 * / "25" %x30-35 ; 250-255 396 * 397 * Skip a dec-octet. 398 * 399 * Returns 0 if found and skipped, 1 otherwise 400 */ 401 static int 402 xmlParse3986DecOctet(const char **str) { 403 const char *cur = *str; 404 405 if (!(ISA_DIGIT(cur))) 406 return(1); 407 if (!ISA_DIGIT(cur+1)) 408 cur++; 409 else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2))) 410 cur += 2; 411 else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2))) 412 cur += 3; 413 else if ((*cur == '2') && (*(cur + 1) >= '0') && 414 (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2))) 415 cur += 3; 416 else if ((*cur == '2') && (*(cur + 1) == '5') && 417 (*(cur + 2) >= '0') && (*(cur + 1) <= '5')) 418 cur += 3; 419 else 420 return(1); 421 *str = cur; 422 return(0); 423 } 424 /** 425 * xmlParse3986Host: 426 * @uri: pointer to an URI structure 427 * @str: the string to analyze 428 * 429 * Parse an host part and fills in the appropriate fields 430 * of the @uri structure 431 * 432 * host = IP-literal / IPv4address / reg-name 433 * IP-literal = "[" ( IPv6address / IPvFuture ) "]" 434 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet 435 * reg-name = *( unreserved / pct-encoded / sub-delims ) 436 * 437 * Returns 0 or the error code 438 */ 439 static int 440 xmlParse3986Host(xmlURIPtr uri, const char **str) 441 { 442 const char *cur = *str; 443 const char *host; 444 445 host = cur; 446 /* 447 * IPv6 and future addressing scheme are enclosed between brackets 448 */ 449 if (*cur == '[') { 450 cur++; 451 while ((*cur != ']') && (*cur != 0)) 452 cur++; 453 if (*cur != ']') 454 return(1); 455 cur++; 456 goto found; 457 } 458 /* 459 * try to parse an IPv4 460 */ 461 if (ISA_DIGIT(cur)) { 462 if (xmlParse3986DecOctet(&cur) != 0) 463 goto not_ipv4; 464 if (*cur != '.') 465 goto not_ipv4; 466 cur++; 467 if (xmlParse3986DecOctet(&cur) != 0) 468 goto not_ipv4; 469 if (*cur != '.') 470 goto not_ipv4; 471 if (xmlParse3986DecOctet(&cur) != 0) 472 goto not_ipv4; 473 if (*cur != '.') 474 goto not_ipv4; 475 if (xmlParse3986DecOctet(&cur) != 0) 476 goto not_ipv4; 477 goto found; 478 not_ipv4: 479 cur = *str; 480 } 481 /* 482 * then this should be a hostname which can be empty 483 */ 484 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur)) 485 NEXT(cur); 486 found: 487 if (uri != NULL) { 488 if (uri->authority != NULL) xmlFree(uri->authority); 489 uri->authority = NULL; 490 if (uri->server != NULL) xmlFree(uri->server); 491 if (cur != host) { 492 if (uri->cleanup & 2) 493 uri->server = STRNDUP(host, cur - host); 494 else 495 uri->server = xmlURIUnescapeString(host, cur - host, NULL); 496 } else 497 uri->server = NULL; 498 } 499 *str = cur; 500 return(0); 501 } 502 503 /** 504 * xmlParse3986Authority: 505 * @uri: pointer to an URI structure 506 * @str: the string to analyze 507 * 508 * Parse an authority part and fills in the appropriate fields 509 * of the @uri structure 510 * 511 * authority = [ userinfo "@" ] host [ ":" port ] 512 * 513 * Returns 0 or the error code 514 */ 515 static int 516 xmlParse3986Authority(xmlURIPtr uri, const char **str) 517 { 518 const char *cur; 519 int ret; 520 521 cur = *str; 522 /* 523 * try to parse an userinfo and check for the trailing @ 524 */ 525 ret = xmlParse3986Userinfo(uri, &cur); 526 if ((ret != 0) || (*cur != '@')) 527 cur = *str; 528 else 529 cur++; 530 ret = xmlParse3986Host(uri, &cur); 531 if (ret != 0) return(ret); 532 if (*cur == ':') { 533 cur++; 534 ret = xmlParse3986Port(uri, &cur); 535 if (ret != 0) return(ret); 536 } 537 *str = cur; 538 return(0); 539 } 540 541 /** 542 * xmlParse3986Segment: 543 * @str: the string to analyze 544 * @forbid: an optional forbidden character 545 * @empty: allow an empty segment 546 * 547 * Parse a segment and fills in the appropriate fields 548 * of the @uri structure 549 * 550 * segment = *pchar 551 * segment-nz = 1*pchar 552 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) 553 * ; non-zero-length segment without any colon ":" 554 * 555 * Returns 0 or the error code 556 */ 557 static int 558 xmlParse3986Segment(const char **str, char forbid, int empty) 559 { 560 const char *cur; 561 562 cur = *str; 563 if (!ISA_PCHAR(cur)) { 564 if (empty) 565 return(0); 566 return(1); 567 } 568 while (ISA_PCHAR(cur) && (*cur != forbid)) 569 NEXT(cur); 570 *str = cur; 571 return (0); 572 } 573 574 /** 575 * xmlParse3986PathAbEmpty: 576 * @uri: pointer to an URI structure 577 * @str: the string to analyze 578 * 579 * Parse an path absolute or empty and fills in the appropriate fields 580 * of the @uri structure 581 * 582 * path-abempty = *( "/" segment ) 583 * 584 * Returns 0 or the error code 585 */ 586 static int 587 xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str) 588 { 589 const char *cur; 590 int ret; 591 592 cur = *str; 593 594 while (*cur == '/') { 595 cur++; 596 ret = xmlParse3986Segment(&cur, 0, 1); 597 if (ret != 0) return(ret); 598 } 599 if (uri != NULL) { 600 if (uri->path != NULL) xmlFree(uri->path); 601 if (*str != cur) { 602 if (uri->cleanup & 2) 603 uri->path = STRNDUP(*str, cur - *str); 604 else 605 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 606 } else { 607 uri->path = NULL; 608 } 609 } 610 *str = cur; 611 return (0); 612 } 613 614 /** 615 * xmlParse3986PathAbsolute: 616 * @uri: pointer to an URI structure 617 * @str: the string to analyze 618 * 619 * Parse an path absolute and fills in the appropriate fields 620 * of the @uri structure 621 * 622 * path-absolute = "/" [ segment-nz *( "/" segment ) ] 623 * 624 * Returns 0 or the error code 625 */ 626 static int 627 xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str) 628 { 629 const char *cur; 630 int ret; 631 632 cur = *str; 633 634 if (*cur != '/') 635 return(1); 636 cur++; 637 ret = xmlParse3986Segment(&cur, 0, 0); 638 if (ret == 0) { 639 while (*cur == '/') { 640 cur++; 641 ret = xmlParse3986Segment(&cur, 0, 1); 642 if (ret != 0) return(ret); 643 } 644 } 645 if (uri != NULL) { 646 if (uri->path != NULL) xmlFree(uri->path); 647 if (cur != *str) { 648 if (uri->cleanup & 2) 649 uri->path = STRNDUP(*str, cur - *str); 650 else 651 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 652 } else { 653 uri->path = NULL; 654 } 655 } 656 *str = cur; 657 return (0); 658 } 659 660 /** 661 * xmlParse3986PathRootless: 662 * @uri: pointer to an URI structure 663 * @str: the string to analyze 664 * 665 * Parse an path without root and fills in the appropriate fields 666 * of the @uri structure 667 * 668 * path-rootless = segment-nz *( "/" segment ) 669 * 670 * Returns 0 or the error code 671 */ 672 static int 673 xmlParse3986PathRootless(xmlURIPtr uri, const char **str) 674 { 675 const char *cur; 676 int ret; 677 678 cur = *str; 679 680 ret = xmlParse3986Segment(&cur, 0, 0); 681 if (ret != 0) return(ret); 682 while (*cur == '/') { 683 cur++; 684 ret = xmlParse3986Segment(&cur, 0, 1); 685 if (ret != 0) return(ret); 686 } 687 if (uri != NULL) { 688 if (uri->path != NULL) xmlFree(uri->path); 689 if (cur != *str) { 690 if (uri->cleanup & 2) 691 uri->path = STRNDUP(*str, cur - *str); 692 else 693 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 694 } else { 695 uri->path = NULL; 696 } 697 } 698 *str = cur; 699 return (0); 700 } 701 702 /** 703 * xmlParse3986PathNoScheme: 704 * @uri: pointer to an URI structure 705 * @str: the string to analyze 706 * 707 * Parse an path which is not a scheme and fills in the appropriate fields 708 * of the @uri structure 709 * 710 * path-noscheme = segment-nz-nc *( "/" segment ) 711 * 712 * Returns 0 or the error code 713 */ 714 static int 715 xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str) 716 { 717 const char *cur; 718 int ret; 719 720 cur = *str; 721 722 ret = xmlParse3986Segment(&cur, ':', 0); 723 if (ret != 0) return(ret); 724 while (*cur == '/') { 725 cur++; 726 ret = xmlParse3986Segment(&cur, 0, 1); 727 if (ret != 0) return(ret); 728 } 729 if (uri != NULL) { 730 if (uri->path != NULL) xmlFree(uri->path); 731 if (cur != *str) { 732 if (uri->cleanup & 2) 733 uri->path = STRNDUP(*str, cur - *str); 734 else 735 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 736 } else { 737 uri->path = NULL; 738 } 739 } 740 *str = cur; 741 return (0); 742 } 743 744 /** 745 * xmlParse3986HierPart: 746 * @uri: pointer to an URI structure 747 * @str: the string to analyze 748 * 749 * Parse an hierarchical part and fills in the appropriate fields 750 * of the @uri structure 751 * 752 * hier-part = "//" authority path-abempty 753 * / path-absolute 754 * / path-rootless 755 * / path-empty 756 * 757 * Returns 0 or the error code 758 */ 759 static int 760 xmlParse3986HierPart(xmlURIPtr uri, const char **str) 761 { 762 const char *cur; 763 int ret; 764 765 cur = *str; 766 767 if ((*cur == '/') && (*(cur + 1) == '/')) { 768 cur += 2; 769 ret = xmlParse3986Authority(uri, &cur); 770 if (ret != 0) return(ret); 771 if (uri->server == NULL) 772 uri->port = -1; 773 ret = xmlParse3986PathAbEmpty(uri, &cur); 774 if (ret != 0) return(ret); 775 *str = cur; 776 return(0); 777 } else if (*cur == '/') { 778 ret = xmlParse3986PathAbsolute(uri, &cur); 779 if (ret != 0) return(ret); 780 } else if (ISA_PCHAR(cur)) { 781 ret = xmlParse3986PathRootless(uri, &cur); 782 if (ret != 0) return(ret); 783 } else { 784 /* path-empty is effectively empty */ 785 if (uri != NULL) { 786 if (uri->path != NULL) xmlFree(uri->path); 787 uri->path = NULL; 788 } 789 } 790 *str = cur; 791 return (0); 792 } 793 794 /** 795 * xmlParse3986RelativeRef: 796 * @uri: pointer to an URI structure 797 * @str: the string to analyze 798 * 799 * Parse an URI string and fills in the appropriate fields 800 * of the @uri structure 801 * 802 * relative-ref = relative-part [ "?" query ] [ "#" fragment ] 803 * relative-part = "//" authority path-abempty 804 * / path-absolute 805 * / path-noscheme 806 * / path-empty 807 * 808 * Returns 0 or the error code 809 */ 810 static int 811 xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) { 812 int ret; 813 814 if ((*str == '/') && (*(str + 1) == '/')) { 815 str += 2; 816 ret = xmlParse3986Authority(uri, &str); 817 if (ret != 0) return(ret); 818 ret = xmlParse3986PathAbEmpty(uri, &str); 819 if (ret != 0) return(ret); 820 } else if (*str == '/') { 821 ret = xmlParse3986PathAbsolute(uri, &str); 822 if (ret != 0) return(ret); 823 } else if (ISA_PCHAR(str)) { 824 ret = xmlParse3986PathNoScheme(uri, &str); 825 if (ret != 0) return(ret); 826 } else { 827 /* path-empty is effectively empty */ 828 if (uri != NULL) { 829 if (uri->path != NULL) xmlFree(uri->path); 830 uri->path = NULL; 831 } 832 } 833 834 if (*str == '?') { 835 str++; 836 ret = xmlParse3986Query(uri, &str); 837 if (ret != 0) return(ret); 838 } 839 if (*str == '#') { 840 str++; 841 ret = xmlParse3986Fragment(uri, &str); 842 if (ret != 0) return(ret); 843 } 844 if (*str != 0) { 845 xmlCleanURI(uri); 846 return(1); 847 } 848 return(0); 849 } 850 851 852 /** 853 * xmlParse3986URI: 854 * @uri: pointer to an URI structure 855 * @str: the string to analyze 856 * 857 * Parse an URI string and fills in the appropriate fields 858 * of the @uri structure 859 * 860 * scheme ":" hier-part [ "?" query ] [ "#" fragment ] 861 * 862 * Returns 0 or the error code 863 */ 864 static int 865 xmlParse3986URI(xmlURIPtr uri, const char *str) { 866 int ret; 867 868 ret = xmlParse3986Scheme(uri, &str); 869 if (ret != 0) return(ret); 870 if (*str != ':') { 871 return(1); 872 } 873 str++; 874 ret = xmlParse3986HierPart(uri, &str); 875 if (ret != 0) return(ret); 876 if (*str == '?') { 877 str++; 878 ret = xmlParse3986Query(uri, &str); 879 if (ret != 0) return(ret); 880 } 881 if (*str == '#') { 882 str++; 883 ret = xmlParse3986Fragment(uri, &str); 884 if (ret != 0) return(ret); 885 } 886 if (*str != 0) { 887 xmlCleanURI(uri); 888 return(1); 889 } 890 return(0); 891 } 892 893 /** 894 * xmlParse3986URIReference: 895 * @uri: pointer to an URI structure 896 * @str: the string to analyze 897 * 898 * Parse an URI reference string and fills in the appropriate fields 899 * of the @uri structure 900 * 901 * URI-reference = URI / relative-ref 902 * 903 * Returns 0 or the error code 904 */ 905 static int 906 xmlParse3986URIReference(xmlURIPtr uri, const char *str) { 907 int ret; 908 909 if (str == NULL) 910 return(-1); 911 xmlCleanURI(uri); 912 913 /* 914 * Try first to parse absolute refs, then fallback to relative if 915 * it fails. 916 */ 917 ret = xmlParse3986URI(uri, str); 918 if (ret != 0) { 919 xmlCleanURI(uri); 920 ret = xmlParse3986RelativeRef(uri, str); 921 if (ret != 0) { 922 xmlCleanURI(uri); 923 return(ret); 924 } 925 } 926 return(0); 927 } 928 929 /** 930 * xmlParseURI: 931 * @str: the URI string to analyze 932 * 933 * Parse an URI based on RFC 3986 934 * 935 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] 936 * 937 * Returns a newly built xmlURIPtr or NULL in case of error 938 */ 939 xmlURIPtr 940 xmlParseURI(const char *str) { 941 xmlURIPtr uri; 942 int ret; 943 944 if (str == NULL) 945 return(NULL); 946 uri = xmlCreateURI(); 947 if (uri != NULL) { 948 ret = xmlParse3986URIReference(uri, str); 949 if (ret) { 950 xmlFreeURI(uri); 951 return(NULL); 952 } 953 } 954 return(uri); 955 } 956 957 /** 958 * xmlParseURIReference: 959 * @uri: pointer to an URI structure 960 * @str: the string to analyze 961 * 962 * Parse an URI reference string based on RFC 3986 and fills in the 963 * appropriate fields of the @uri structure 964 * 965 * URI-reference = URI / relative-ref 966 * 967 * Returns 0 or the error code 968 */ 969 int 970 xmlParseURIReference(xmlURIPtr uri, const char *str) { 971 return(xmlParse3986URIReference(uri, str)); 972 } 973 974 /** 975 * xmlParseURIRaw: 976 * @str: the URI string to analyze 977 * @raw: if 1 unescaping of URI pieces are disabled 978 * 979 * Parse an URI but allows to keep intact the original fragments. 980 * 981 * URI-reference = URI / relative-ref 982 * 983 * Returns a newly built xmlURIPtr or NULL in case of error 984 */ 985 xmlURIPtr 986 xmlParseURIRaw(const char *str, int raw) { 987 xmlURIPtr uri; 988 int ret; 989 990 if (str == NULL) 991 return(NULL); 992 uri = xmlCreateURI(); 993 if (uri != NULL) { 994 if (raw) { 995 uri->cleanup |= 2; 996 } 997 ret = xmlParseURIReference(uri, str); 998 if (ret) { 999 xmlFreeURI(uri); 1000 return(NULL); 1001 } 1002 } 1003 return(uri); 1004 } 1005 1006 /************************************************************************ 1007 * * 1008 * Generic URI structure functions * 1009 * * 1010 ************************************************************************/ 1011 1012 /** 1013 * xmlCreateURI: 1014 * 1015 * Simply creates an empty xmlURI 1016 * 1017 * Returns the new structure or NULL in case of error 1018 */ 1019 xmlURIPtr 1020 xmlCreateURI(void) { 1021 xmlURIPtr ret; 1022 1023 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI)); 1024 if (ret == NULL) { 1025 xmlURIErrMemory("creating URI structure\n"); 1026 return(NULL); 1027 } 1028 memset(ret, 0, sizeof(xmlURI)); 1029 return(ret); 1030 } 1031 1032 /** 1033 * xmlSaveUriRealloc: 1034 * 1035 * Function to handle properly a reallocation when saving an URI 1036 * Also imposes some limit on the length of an URI string output 1037 */ 1038 static xmlChar * 1039 xmlSaveUriRealloc(xmlChar *ret, int *max) { 1040 xmlChar *temp; 1041 int tmp; 1042 1043 if (*max > MAX_URI_LENGTH) { 1044 xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n"); 1045 return(NULL); 1046 } 1047 tmp = *max * 2; 1048 temp = (xmlChar *) xmlRealloc(ret, (tmp + 1)); 1049 if (temp == NULL) { 1050 xmlURIErrMemory("saving URI\n"); 1051 return(NULL); 1052 } 1053 *max = tmp; 1054 return(temp); 1055 } 1056 1057 /** 1058 * xmlSaveUri: 1059 * @uri: pointer to an xmlURI 1060 * 1061 * Save the URI as an escaped string 1062 * 1063 * Returns a new string (to be deallocated by caller) 1064 */ 1065 xmlChar * 1066 xmlSaveUri(xmlURIPtr uri) { 1067 xmlChar *ret = NULL; 1068 xmlChar *temp; 1069 const char *p; 1070 int len; 1071 int max; 1072 1073 if (uri == NULL) return(NULL); 1074 1075 1076 max = 80; 1077 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar)); 1078 if (ret == NULL) { 1079 xmlURIErrMemory("saving URI\n"); 1080 return(NULL); 1081 } 1082 len = 0; 1083 1084 if (uri->scheme != NULL) { 1085 p = uri->scheme; 1086 while (*p != 0) { 1087 if (len >= max) { 1088 temp = xmlSaveUriRealloc(ret, &max); 1089 if (temp == NULL) goto mem_error; 1090 ret = temp; 1091 } 1092 ret[len++] = *p++; 1093 } 1094 if (len >= max) { 1095 temp = xmlSaveUriRealloc(ret, &max); 1096 if (temp == NULL) goto mem_error; 1097 ret = temp; 1098 } 1099 ret[len++] = ':'; 1100 } 1101 if (uri->opaque != NULL) { 1102 p = uri->opaque; 1103 while (*p != 0) { 1104 if (len + 3 >= max) { 1105 temp = xmlSaveUriRealloc(ret, &max); 1106 if (temp == NULL) goto mem_error; 1107 ret = temp; 1108 } 1109 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p))) 1110 ret[len++] = *p++; 1111 else { 1112 int val = *(unsigned char *)p++; 1113 int hi = val / 0x10, lo = val % 0x10; 1114 ret[len++] = '%'; 1115 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1116 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1117 } 1118 } 1119 } else { 1120 if ((uri->server != NULL) || (uri->port == -1)) { 1121 if (len + 3 >= max) { 1122 temp = xmlSaveUriRealloc(ret, &max); 1123 if (temp == NULL) goto mem_error; 1124 ret = temp; 1125 } 1126 ret[len++] = '/'; 1127 ret[len++] = '/'; 1128 if (uri->user != NULL) { 1129 p = uri->user; 1130 while (*p != 0) { 1131 if (len + 3 >= max) { 1132 temp = xmlSaveUriRealloc(ret, &max); 1133 if (temp == NULL) goto mem_error; 1134 ret = temp; 1135 } 1136 if ((IS_UNRESERVED(*(p))) || 1137 ((*(p) == ';')) || ((*(p) == ':')) || 1138 ((*(p) == '&')) || ((*(p) == '=')) || 1139 ((*(p) == '+')) || ((*(p) == '$')) || 1140 ((*(p) == ','))) 1141 ret[len++] = *p++; 1142 else { 1143 int val = *(unsigned char *)p++; 1144 int hi = val / 0x10, lo = val % 0x10; 1145 ret[len++] = '%'; 1146 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1147 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1148 } 1149 } 1150 if (len + 3 >= max) { 1151 temp = xmlSaveUriRealloc(ret, &max); 1152 if (temp == NULL) goto mem_error; 1153 ret = temp; 1154 } 1155 ret[len++] = '@'; 1156 } 1157 if (uri->server != NULL) { 1158 p = uri->server; 1159 while (*p != 0) { 1160 if (len >= max) { 1161 temp = xmlSaveUriRealloc(ret, &max); 1162 if (temp == NULL) goto mem_error; 1163 ret = temp; 1164 } 1165 ret[len++] = *p++; 1166 } 1167 if (uri->port > 0) { 1168 if (len + 10 >= max) { 1169 temp = xmlSaveUriRealloc(ret, &max); 1170 if (temp == NULL) goto mem_error; 1171 ret = temp; 1172 } 1173 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port); 1174 } 1175 } 1176 } else if (uri->authority != NULL) { 1177 if (len + 3 >= max) { 1178 temp = xmlSaveUriRealloc(ret, &max); 1179 if (temp == NULL) goto mem_error; 1180 ret = temp; 1181 } 1182 ret[len++] = '/'; 1183 ret[len++] = '/'; 1184 p = uri->authority; 1185 while (*p != 0) { 1186 if (len + 3 >= max) { 1187 temp = xmlSaveUriRealloc(ret, &max); 1188 if (temp == NULL) goto mem_error; 1189 ret = temp; 1190 } 1191 if ((IS_UNRESERVED(*(p))) || 1192 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || 1193 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || 1194 ((*(p) == '=')) || ((*(p) == '+'))) 1195 ret[len++] = *p++; 1196 else { 1197 int val = *(unsigned char *)p++; 1198 int hi = val / 0x10, lo = val % 0x10; 1199 ret[len++] = '%'; 1200 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1201 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1202 } 1203 } 1204 } else if (uri->scheme != NULL) { 1205 if (len + 3 >= max) { 1206 temp = xmlSaveUriRealloc(ret, &max); 1207 if (temp == NULL) goto mem_error; 1208 ret = temp; 1209 } 1210 } 1211 if (uri->path != NULL) { 1212 p = uri->path; 1213 /* 1214 * the colon in file:///d: should not be escaped or 1215 * Windows accesses fail later. 1216 */ 1217 if ((uri->scheme != NULL) && 1218 (p[0] == '/') && 1219 (((p[1] >= 'a') && (p[1] <= 'z')) || 1220 ((p[1] >= 'A') && (p[1] <= 'Z'))) && 1221 (p[2] == ':') && 1222 (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) { 1223 if (len + 3 >= max) { 1224 temp = xmlSaveUriRealloc(ret, &max); 1225 if (temp == NULL) goto mem_error; 1226 ret = temp; 1227 } 1228 ret[len++] = *p++; 1229 ret[len++] = *p++; 1230 ret[len++] = *p++; 1231 } 1232 while (*p != 0) { 1233 if (len + 3 >= max) { 1234 temp = xmlSaveUriRealloc(ret, &max); 1235 if (temp == NULL) goto mem_error; 1236 ret = temp; 1237 } 1238 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) || 1239 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || 1240 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || 1241 ((*(p) == ','))) 1242 ret[len++] = *p++; 1243 else { 1244 int val = *(unsigned char *)p++; 1245 int hi = val / 0x10, lo = val % 0x10; 1246 ret[len++] = '%'; 1247 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1248 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1249 } 1250 } 1251 } 1252 if (uri->query_raw != NULL) { 1253 if (len + 1 >= max) { 1254 temp = xmlSaveUriRealloc(ret, &max); 1255 if (temp == NULL) goto mem_error; 1256 ret = temp; 1257 } 1258 ret[len++] = '?'; 1259 p = uri->query_raw; 1260 while (*p != 0) { 1261 if (len + 1 >= max) { 1262 temp = xmlSaveUriRealloc(ret, &max); 1263 if (temp == NULL) goto mem_error; 1264 ret = temp; 1265 } 1266 ret[len++] = *p++; 1267 } 1268 } else if (uri->query != NULL) { 1269 if (len + 3 >= max) { 1270 temp = xmlSaveUriRealloc(ret, &max); 1271 if (temp == NULL) goto mem_error; 1272 ret = temp; 1273 } 1274 ret[len++] = '?'; 1275 p = uri->query; 1276 while (*p != 0) { 1277 if (len + 3 >= max) { 1278 temp = xmlSaveUriRealloc(ret, &max); 1279 if (temp == NULL) goto mem_error; 1280 ret = temp; 1281 } 1282 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) 1283 ret[len++] = *p++; 1284 else { 1285 int val = *(unsigned char *)p++; 1286 int hi = val / 0x10, lo = val % 0x10; 1287 ret[len++] = '%'; 1288 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1289 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1290 } 1291 } 1292 } 1293 } 1294 if (uri->fragment != NULL) { 1295 if (len + 3 >= max) { 1296 temp = xmlSaveUriRealloc(ret, &max); 1297 if (temp == NULL) goto mem_error; 1298 ret = temp; 1299 } 1300 ret[len++] = '#'; 1301 p = uri->fragment; 1302 while (*p != 0) { 1303 if (len + 3 >= max) { 1304 temp = xmlSaveUriRealloc(ret, &max); 1305 if (temp == NULL) goto mem_error; 1306 ret = temp; 1307 } 1308 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) 1309 ret[len++] = *p++; 1310 else { 1311 int val = *(unsigned char *)p++; 1312 int hi = val / 0x10, lo = val % 0x10; 1313 ret[len++] = '%'; 1314 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1315 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1316 } 1317 } 1318 } 1319 if (len >= max) { 1320 temp = xmlSaveUriRealloc(ret, &max); 1321 if (temp == NULL) goto mem_error; 1322 ret = temp; 1323 } 1324 ret[len] = 0; 1325 return(ret); 1326 1327 mem_error: 1328 xmlFree(ret); 1329 return(NULL); 1330 } 1331 1332 /** 1333 * xmlPrintURI: 1334 * @stream: a FILE* for the output 1335 * @uri: pointer to an xmlURI 1336 * 1337 * Prints the URI in the stream @stream. 1338 */ 1339 void 1340 xmlPrintURI(FILE *stream, xmlURIPtr uri) { 1341 xmlChar *out; 1342 1343 out = xmlSaveUri(uri); 1344 if (out != NULL) { 1345 fprintf(stream, "%s", (char *) out); 1346 xmlFree(out); 1347 } 1348 } 1349 1350 /** 1351 * xmlCleanURI: 1352 * @uri: pointer to an xmlURI 1353 * 1354 * Make sure the xmlURI struct is free of content 1355 */ 1356 static void 1357 xmlCleanURI(xmlURIPtr uri) { 1358 if (uri == NULL) return; 1359 1360 if (uri->scheme != NULL) xmlFree(uri->scheme); 1361 uri->scheme = NULL; 1362 if (uri->server != NULL) xmlFree(uri->server); 1363 uri->server = NULL; 1364 if (uri->user != NULL) xmlFree(uri->user); 1365 uri->user = NULL; 1366 if (uri->path != NULL) xmlFree(uri->path); 1367 uri->path = NULL; 1368 if (uri->fragment != NULL) xmlFree(uri->fragment); 1369 uri->fragment = NULL; 1370 if (uri->opaque != NULL) xmlFree(uri->opaque); 1371 uri->opaque = NULL; 1372 if (uri->authority != NULL) xmlFree(uri->authority); 1373 uri->authority = NULL; 1374 if (uri->query != NULL) xmlFree(uri->query); 1375 uri->query = NULL; 1376 if (uri->query_raw != NULL) xmlFree(uri->query_raw); 1377 uri->query_raw = NULL; 1378 } 1379 1380 /** 1381 * xmlFreeURI: 1382 * @uri: pointer to an xmlURI 1383 * 1384 * Free up the xmlURI struct 1385 */ 1386 void 1387 xmlFreeURI(xmlURIPtr uri) { 1388 if (uri == NULL) return; 1389 1390 if (uri->scheme != NULL) xmlFree(uri->scheme); 1391 if (uri->server != NULL) xmlFree(uri->server); 1392 if (uri->user != NULL) xmlFree(uri->user); 1393 if (uri->path != NULL) xmlFree(uri->path); 1394 if (uri->fragment != NULL) xmlFree(uri->fragment); 1395 if (uri->opaque != NULL) xmlFree(uri->opaque); 1396 if (uri->authority != NULL) xmlFree(uri->authority); 1397 if (uri->query != NULL) xmlFree(uri->query); 1398 if (uri->query_raw != NULL) xmlFree(uri->query_raw); 1399 xmlFree(uri); 1400 } 1401 1402 /************************************************************************ 1403 * * 1404 * Helper functions * 1405 * * 1406 ************************************************************************/ 1407 1408 /** 1409 * xmlNormalizeURIPath: 1410 * @path: pointer to the path string 1411 * 1412 * Applies the 5 normalization steps to a path string--that is, RFC 2396 1413 * Section 5.2, steps 6.c through 6.g. 1414 * 1415 * Normalization occurs directly on the string, no new allocation is done 1416 * 1417 * Returns 0 or an error code 1418 */ 1419 int 1420 xmlNormalizeURIPath(char *path) { 1421 char *cur, *out; 1422 1423 if (path == NULL) 1424 return(-1); 1425 1426 /* Skip all initial "/" chars. We want to get to the beginning of the 1427 * first non-empty segment. 1428 */ 1429 cur = path; 1430 while (cur[0] == '/') 1431 ++cur; 1432 if (cur[0] == '\0') 1433 return(0); 1434 1435 /* Keep everything we've seen so far. */ 1436 out = cur; 1437 1438 /* 1439 * Analyze each segment in sequence for cases (c) and (d). 1440 */ 1441 while (cur[0] != '\0') { 1442 /* 1443 * c) All occurrences of "./", where "." is a complete path segment, 1444 * are removed from the buffer string. 1445 */ 1446 if ((cur[0] == '.') && (cur[1] == '/')) { 1447 cur += 2; 1448 /* '//' normalization should be done at this point too */ 1449 while (cur[0] == '/') 1450 cur++; 1451 continue; 1452 } 1453 1454 /* 1455 * d) If the buffer string ends with "." as a complete path segment, 1456 * that "." is removed. 1457 */ 1458 if ((cur[0] == '.') && (cur[1] == '\0')) 1459 break; 1460 1461 /* Otherwise keep the segment. */ 1462 while (cur[0] != '/') { 1463 if (cur[0] == '\0') 1464 goto done_cd; 1465 (out++)[0] = (cur++)[0]; 1466 } 1467 /* normalize // */ 1468 while ((cur[0] == '/') && (cur[1] == '/')) 1469 cur++; 1470 1471 (out++)[0] = (cur++)[0]; 1472 } 1473 done_cd: 1474 out[0] = '\0'; 1475 1476 /* Reset to the beginning of the first segment for the next sequence. */ 1477 cur = path; 1478 while (cur[0] == '/') 1479 ++cur; 1480 if (cur[0] == '\0') 1481 return(0); 1482 1483 /* 1484 * Analyze each segment in sequence for cases (e) and (f). 1485 * 1486 * e) All occurrences of "<segment>/../", where <segment> is a 1487 * complete path segment not equal to "..", are removed from the 1488 * buffer string. Removal of these path segments is performed 1489 * iteratively, removing the leftmost matching pattern on each 1490 * iteration, until no matching pattern remains. 1491 * 1492 * f) If the buffer string ends with "<segment>/..", where <segment> 1493 * is a complete path segment not equal to "..", that 1494 * "<segment>/.." is removed. 1495 * 1496 * To satisfy the "iterative" clause in (e), we need to collapse the 1497 * string every time we find something that needs to be removed. Thus, 1498 * we don't need to keep two pointers into the string: we only need a 1499 * "current position" pointer. 1500 */ 1501 while (1) { 1502 char *segp, *tmp; 1503 1504 /* At the beginning of each iteration of this loop, "cur" points to 1505 * the first character of the segment we want to examine. 1506 */ 1507 1508 /* Find the end of the current segment. */ 1509 segp = cur; 1510 while ((segp[0] != '/') && (segp[0] != '\0')) 1511 ++segp; 1512 1513 /* If this is the last segment, we're done (we need at least two 1514 * segments to meet the criteria for the (e) and (f) cases). 1515 */ 1516 if (segp[0] == '\0') 1517 break; 1518 1519 /* If the first segment is "..", or if the next segment _isn't_ "..", 1520 * keep this segment and try the next one. 1521 */ 1522 ++segp; 1523 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3)) 1524 || ((segp[0] != '.') || (segp[1] != '.') 1525 || ((segp[2] != '/') && (segp[2] != '\0')))) { 1526 cur = segp; 1527 continue; 1528 } 1529 1530 /* If we get here, remove this segment and the next one and back up 1531 * to the previous segment (if there is one), to implement the 1532 * "iteratively" clause. It's pretty much impossible to back up 1533 * while maintaining two pointers into the buffer, so just compact 1534 * the whole buffer now. 1535 */ 1536 1537 /* If this is the end of the buffer, we're done. */ 1538 if (segp[2] == '\0') { 1539 cur[0] = '\0'; 1540 break; 1541 } 1542 /* Valgrind complained, strcpy(cur, segp + 3); */ 1543 /* string will overlap, do not use strcpy */ 1544 tmp = cur; 1545 segp += 3; 1546 while ((*tmp++ = *segp++) != 0) 1547 ; 1548 1549 /* If there are no previous segments, then keep going from here. */ 1550 segp = cur; 1551 while ((segp > path) && ((--segp)[0] == '/')) 1552 ; 1553 if (segp == path) 1554 continue; 1555 1556 /* "segp" is pointing to the end of a previous segment; find it's 1557 * start. We need to back up to the previous segment and start 1558 * over with that to handle things like "foo/bar/../..". If we 1559 * don't do this, then on the first pass we'll remove the "bar/..", 1560 * but be pointing at the second ".." so we won't realize we can also 1561 * remove the "foo/..". 1562 */ 1563 cur = segp; 1564 while ((cur > path) && (cur[-1] != '/')) 1565 --cur; 1566 } 1567 out[0] = '\0'; 1568 1569 /* 1570 * g) If the resulting buffer string still begins with one or more 1571 * complete path segments of "..", then the reference is 1572 * considered to be in error. Implementations may handle this 1573 * error by retaining these components in the resolved path (i.e., 1574 * treating them as part of the final URI), by removing them from 1575 * the resolved path (i.e., discarding relative levels above the 1576 * root), or by avoiding traversal of the reference. 1577 * 1578 * We discard them from the final path. 1579 */ 1580 if (path[0] == '/') { 1581 cur = path; 1582 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.') 1583 && ((cur[3] == '/') || (cur[3] == '\0'))) 1584 cur += 3; 1585 1586 if (cur != path) { 1587 out = path; 1588 while (cur[0] != '\0') 1589 (out++)[0] = (cur++)[0]; 1590 out[0] = 0; 1591 } 1592 } 1593 1594 return(0); 1595 } 1596 1597 static int is_hex(char c) { 1598 if (((c >= '0') && (c <= '9')) || 1599 ((c >= 'a') && (c <= 'f')) || 1600 ((c >= 'A') && (c <= 'F'))) 1601 return(1); 1602 return(0); 1603 } 1604 1605 /** 1606 * xmlURIUnescapeString: 1607 * @str: the string to unescape 1608 * @len: the length in bytes to unescape (or <= 0 to indicate full string) 1609 * @target: optional destination buffer 1610 * 1611 * Unescaping routine, but does not check that the string is an URI. The 1612 * output is a direct unsigned char translation of %XX values (no encoding) 1613 * Note that the length of the result can only be smaller or same size as 1614 * the input string. 1615 * 1616 * Returns a copy of the string, but unescaped, will return NULL only in case 1617 * of error 1618 */ 1619 char * 1620 xmlURIUnescapeString(const char *str, int len, char *target) { 1621 char *ret, *out; 1622 const char *in; 1623 1624 if (str == NULL) 1625 return(NULL); 1626 if (len <= 0) len = strlen(str); 1627 if (len < 0) return(NULL); 1628 1629 if (target == NULL) { 1630 ret = (char *) xmlMallocAtomic(len + 1); 1631 if (ret == NULL) { 1632 xmlURIErrMemory("unescaping URI value\n"); 1633 return(NULL); 1634 } 1635 } else 1636 ret = target; 1637 in = str; 1638 out = ret; 1639 while(len > 0) { 1640 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) { 1641 in++; 1642 if ((*in >= '0') && (*in <= '9')) 1643 *out = (*in - '0'); 1644 else if ((*in >= 'a') && (*in <= 'f')) 1645 *out = (*in - 'a') + 10; 1646 else if ((*in >= 'A') && (*in <= 'F')) 1647 *out = (*in - 'A') + 10; 1648 in++; 1649 if ((*in >= '0') && (*in <= '9')) 1650 *out = *out * 16 + (*in - '0'); 1651 else if ((*in >= 'a') && (*in <= 'f')) 1652 *out = *out * 16 + (*in - 'a') + 10; 1653 else if ((*in >= 'A') && (*in <= 'F')) 1654 *out = *out * 16 + (*in - 'A') + 10; 1655 in++; 1656 len -= 3; 1657 out++; 1658 } else { 1659 *out++ = *in++; 1660 len--; 1661 } 1662 } 1663 *out = 0; 1664 return(ret); 1665 } 1666 1667 /** 1668 * xmlURIEscapeStr: 1669 * @str: string to escape 1670 * @list: exception list string of chars not to escape 1671 * 1672 * This routine escapes a string to hex, ignoring reserved characters (a-z) 1673 * and the characters in the exception list. 1674 * 1675 * Returns a new escaped string or NULL in case of error. 1676 */ 1677 xmlChar * 1678 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) { 1679 xmlChar *ret, ch; 1680 xmlChar *temp; 1681 const xmlChar *in; 1682 int len, out; 1683 1684 if (str == NULL) 1685 return(NULL); 1686 if (str[0] == 0) 1687 return(xmlStrdup(str)); 1688 len = xmlStrlen(str); 1689 if (!(len > 0)) return(NULL); 1690 1691 len += 20; 1692 ret = (xmlChar *) xmlMallocAtomic(len); 1693 if (ret == NULL) { 1694 xmlURIErrMemory("escaping URI value\n"); 1695 return(NULL); 1696 } 1697 in = (const xmlChar *) str; 1698 out = 0; 1699 while(*in != 0) { 1700 if (len - out <= 3) { 1701 temp = xmlSaveUriRealloc(ret, &len); 1702 if (temp == NULL) { 1703 xmlURIErrMemory("escaping URI value\n"); 1704 xmlFree(ret); 1705 return(NULL); 1706 } 1707 ret = temp; 1708 } 1709 1710 ch = *in; 1711 1712 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) { 1713 unsigned char val; 1714 ret[out++] = '%'; 1715 val = ch >> 4; 1716 if (val <= 9) 1717 ret[out++] = '0' + val; 1718 else 1719 ret[out++] = 'A' + val - 0xA; 1720 val = ch & 0xF; 1721 if (val <= 9) 1722 ret[out++] = '0' + val; 1723 else 1724 ret[out++] = 'A' + val - 0xA; 1725 in++; 1726 } else { 1727 ret[out++] = *in++; 1728 } 1729 1730 } 1731 ret[out] = 0; 1732 return(ret); 1733 } 1734 1735 /** 1736 * xmlURIEscape: 1737 * @str: the string of the URI to escape 1738 * 1739 * Escaping routine, does not do validity checks ! 1740 * It will try to escape the chars needing this, but this is heuristic 1741 * based it's impossible to be sure. 1742 * 1743 * Returns an copy of the string, but escaped 1744 * 1745 * 25 May 2001 1746 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly 1747 * according to RFC2396. 1748 * - Carl Douglas 1749 */ 1750 xmlChar * 1751 xmlURIEscape(const xmlChar * str) 1752 { 1753 xmlChar *ret, *segment = NULL; 1754 xmlURIPtr uri; 1755 int ret2; 1756 1757 if (str == NULL) 1758 return (NULL); 1759 1760 uri = xmlCreateURI(); 1761 if (uri != NULL) { 1762 /* 1763 * Allow escaping errors in the unescaped form 1764 */ 1765 uri->cleanup = 1; 1766 ret2 = xmlParseURIReference(uri, (const char *)str); 1767 if (ret2) { 1768 xmlFreeURI(uri); 1769 return (NULL); 1770 } 1771 } 1772 1773 if (!uri) 1774 return NULL; 1775 1776 ret = NULL; 1777 1778 #define NULLCHK(p) if(!p) { \ 1779 xmlURIErrMemory("escaping URI value\n"); \ 1780 xmlFreeURI(uri); \ 1781 xmlFree(ret); \ 1782 return NULL; } \ 1783 1784 if (uri->scheme) { 1785 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-."); 1786 NULLCHK(segment) 1787 ret = xmlStrcat(ret, segment); 1788 ret = xmlStrcat(ret, BAD_CAST ":"); 1789 xmlFree(segment); 1790 } 1791 1792 if (uri->authority) { 1793 segment = 1794 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@"); 1795 NULLCHK(segment) 1796 ret = xmlStrcat(ret, BAD_CAST "//"); 1797 ret = xmlStrcat(ret, segment); 1798 xmlFree(segment); 1799 } 1800 1801 if (uri->user) { 1802 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,"); 1803 NULLCHK(segment) 1804 ret = xmlStrcat(ret,BAD_CAST "//"); 1805 ret = xmlStrcat(ret, segment); 1806 ret = xmlStrcat(ret, BAD_CAST "@"); 1807 xmlFree(segment); 1808 } 1809 1810 if (uri->server) { 1811 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@"); 1812 NULLCHK(segment) 1813 if (uri->user == NULL) 1814 ret = xmlStrcat(ret, BAD_CAST "//"); 1815 ret = xmlStrcat(ret, segment); 1816 xmlFree(segment); 1817 } 1818 1819 if (uri->port) { 1820 xmlChar port[10]; 1821 1822 snprintf((char *) port, 10, "%d", uri->port); 1823 ret = xmlStrcat(ret, BAD_CAST ":"); 1824 ret = xmlStrcat(ret, port); 1825 } 1826 1827 if (uri->path) { 1828 segment = 1829 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;"); 1830 NULLCHK(segment) 1831 ret = xmlStrcat(ret, segment); 1832 xmlFree(segment); 1833 } 1834 1835 if (uri->query_raw) { 1836 ret = xmlStrcat(ret, BAD_CAST "?"); 1837 ret = xmlStrcat(ret, BAD_CAST uri->query_raw); 1838 } 1839 else if (uri->query) { 1840 segment = 1841 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$"); 1842 NULLCHK(segment) 1843 ret = xmlStrcat(ret, BAD_CAST "?"); 1844 ret = xmlStrcat(ret, segment); 1845 xmlFree(segment); 1846 } 1847 1848 if (uri->opaque) { 1849 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST ""); 1850 NULLCHK(segment) 1851 ret = xmlStrcat(ret, segment); 1852 xmlFree(segment); 1853 } 1854 1855 if (uri->fragment) { 1856 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#"); 1857 NULLCHK(segment) 1858 ret = xmlStrcat(ret, BAD_CAST "#"); 1859 ret = xmlStrcat(ret, segment); 1860 xmlFree(segment); 1861 } 1862 1863 xmlFreeURI(uri); 1864 #undef NULLCHK 1865 1866 return (ret); 1867 } 1868 1869 /************************************************************************ 1870 * * 1871 * Public functions * 1872 * * 1873 ************************************************************************/ 1874 1875 /** 1876 * xmlBuildURI: 1877 * @URI: the URI instance found in the document 1878 * @base: the base value 1879 * 1880 * Computes he final URI of the reference done by checking that 1881 * the given URI is valid, and building the final URI using the 1882 * base URI. This is processed according to section 5.2 of the 1883 * RFC 2396 1884 * 1885 * 5.2. Resolving Relative References to Absolute Form 1886 * 1887 * Returns a new URI string (to be freed by the caller) or NULL in case 1888 * of error. 1889 */ 1890 xmlChar * 1891 xmlBuildURI(const xmlChar *URI, const xmlChar *base) { 1892 xmlChar *val = NULL; 1893 int ret, len, indx, cur, out; 1894 xmlURIPtr ref = NULL; 1895 xmlURIPtr bas = NULL; 1896 xmlURIPtr res = NULL; 1897 1898 /* 1899 * 1) The URI reference is parsed into the potential four components and 1900 * fragment identifier, as described in Section 4.3. 1901 * 1902 * NOTE that a completely empty URI is treated by modern browsers 1903 * as a reference to "." rather than as a synonym for the current 1904 * URI. Should we do that here? 1905 */ 1906 if (URI == NULL) 1907 ret = -1; 1908 else { 1909 if (*URI) { 1910 ref = xmlCreateURI(); 1911 if (ref == NULL) 1912 goto done; 1913 ret = xmlParseURIReference(ref, (const char *) URI); 1914 } 1915 else 1916 ret = 0; 1917 } 1918 if (ret != 0) 1919 goto done; 1920 if ((ref != NULL) && (ref->scheme != NULL)) { 1921 /* 1922 * The URI is absolute don't modify. 1923 */ 1924 val = xmlStrdup(URI); 1925 goto done; 1926 } 1927 if (base == NULL) 1928 ret = -1; 1929 else { 1930 bas = xmlCreateURI(); 1931 if (bas == NULL) 1932 goto done; 1933 ret = xmlParseURIReference(bas, (const char *) base); 1934 } 1935 if (ret != 0) { 1936 if (ref) 1937 val = xmlSaveUri(ref); 1938 goto done; 1939 } 1940 if (ref == NULL) { 1941 /* 1942 * the base fragment must be ignored 1943 */ 1944 if (bas->fragment != NULL) { 1945 xmlFree(bas->fragment); 1946 bas->fragment = NULL; 1947 } 1948 val = xmlSaveUri(bas); 1949 goto done; 1950 } 1951 1952 /* 1953 * 2) If the path component is empty and the scheme, authority, and 1954 * query components are undefined, then it is a reference to the 1955 * current document and we are done. Otherwise, the reference URI's 1956 * query and fragment components are defined as found (or not found) 1957 * within the URI reference and not inherited from the base URI. 1958 * 1959 * NOTE that in modern browsers, the parsing differs from the above 1960 * in the following aspect: the query component is allowed to be 1961 * defined while still treating this as a reference to the current 1962 * document. 1963 */ 1964 res = xmlCreateURI(); 1965 if (res == NULL) 1966 goto done; 1967 if ((ref->scheme == NULL) && (ref->path == NULL) && 1968 ((ref->authority == NULL) && (ref->server == NULL))) { 1969 if (bas->scheme != NULL) 1970 res->scheme = xmlMemStrdup(bas->scheme); 1971 if (bas->authority != NULL) 1972 res->authority = xmlMemStrdup(bas->authority); 1973 else if ((bas->server != NULL) || (bas->port == -1)) { 1974 if (bas->server != NULL) 1975 res->server = xmlMemStrdup(bas->server); 1976 if (bas->user != NULL) 1977 res->user = xmlMemStrdup(bas->user); 1978 res->port = bas->port; 1979 } 1980 if (bas->path != NULL) 1981 res->path = xmlMemStrdup(bas->path); 1982 if (ref->query_raw != NULL) 1983 res->query_raw = xmlMemStrdup (ref->query_raw); 1984 else if (ref->query != NULL) 1985 res->query = xmlMemStrdup(ref->query); 1986 else if (bas->query_raw != NULL) 1987 res->query_raw = xmlMemStrdup(bas->query_raw); 1988 else if (bas->query != NULL) 1989 res->query = xmlMemStrdup(bas->query); 1990 if (ref->fragment != NULL) 1991 res->fragment = xmlMemStrdup(ref->fragment); 1992 goto step_7; 1993 } 1994 1995 /* 1996 * 3) If the scheme component is defined, indicating that the reference 1997 * starts with a scheme name, then the reference is interpreted as an 1998 * absolute URI and we are done. Otherwise, the reference URI's 1999 * scheme is inherited from the base URI's scheme component. 2000 */ 2001 if (ref->scheme != NULL) { 2002 val = xmlSaveUri(ref); 2003 goto done; 2004 } 2005 if (bas->scheme != NULL) 2006 res->scheme = xmlMemStrdup(bas->scheme); 2007 2008 if (ref->query_raw != NULL) 2009 res->query_raw = xmlMemStrdup(ref->query_raw); 2010 else if (ref->query != NULL) 2011 res->query = xmlMemStrdup(ref->query); 2012 if (ref->fragment != NULL) 2013 res->fragment = xmlMemStrdup(ref->fragment); 2014 2015 /* 2016 * 4) If the authority component is defined, then the reference is a 2017 * network-path and we skip to step 7. Otherwise, the reference 2018 * URI's authority is inherited from the base URI's authority 2019 * component, which will also be undefined if the URI scheme does not 2020 * use an authority component. 2021 */ 2022 if ((ref->authority != NULL) || (ref->server != NULL)) { 2023 if (ref->authority != NULL) 2024 res->authority = xmlMemStrdup(ref->authority); 2025 else { 2026 res->server = xmlMemStrdup(ref->server); 2027 if (ref->user != NULL) 2028 res->user = xmlMemStrdup(ref->user); 2029 res->port = ref->port; 2030 } 2031 if (ref->path != NULL) 2032 res->path = xmlMemStrdup(ref->path); 2033 goto step_7; 2034 } 2035 if (bas->authority != NULL) 2036 res->authority = xmlMemStrdup(bas->authority); 2037 else if ((bas->server != NULL) || (bas->port == -1)) { 2038 if (bas->server != NULL) 2039 res->server = xmlMemStrdup(bas->server); 2040 if (bas->user != NULL) 2041 res->user = xmlMemStrdup(bas->user); 2042 res->port = bas->port; 2043 } 2044 2045 /* 2046 * 5) If the path component begins with a slash character ("/"), then 2047 * the reference is an absolute-path and we skip to step 7. 2048 */ 2049 if ((ref->path != NULL) && (ref->path[0] == '/')) { 2050 res->path = xmlMemStrdup(ref->path); 2051 goto step_7; 2052 } 2053 2054 2055 /* 2056 * 6) If this step is reached, then we are resolving a relative-path 2057 * reference. The relative path needs to be merged with the base 2058 * URI's path. Although there are many ways to do this, we will 2059 * describe a simple method using a separate string buffer. 2060 * 2061 * Allocate a buffer large enough for the result string. 2062 */ 2063 len = 2; /* extra / and 0 */ 2064 if (ref->path != NULL) 2065 len += strlen(ref->path); 2066 if (bas->path != NULL) 2067 len += strlen(bas->path); 2068 res->path = (char *) xmlMallocAtomic(len); 2069 if (res->path == NULL) { 2070 xmlURIErrMemory("resolving URI against base\n"); 2071 goto done; 2072 } 2073 res->path[0] = 0; 2074 2075 /* 2076 * a) All but the last segment of the base URI's path component is 2077 * copied to the buffer. In other words, any characters after the 2078 * last (right-most) slash character, if any, are excluded. 2079 */ 2080 cur = 0; 2081 out = 0; 2082 if (bas->path != NULL) { 2083 while (bas->path[cur] != 0) { 2084 while ((bas->path[cur] != 0) && (bas->path[cur] != '/')) 2085 cur++; 2086 if (bas->path[cur] == 0) 2087 break; 2088 2089 cur++; 2090 while (out < cur) { 2091 res->path[out] = bas->path[out]; 2092 out++; 2093 } 2094 } 2095 } 2096 res->path[out] = 0; 2097 2098 /* 2099 * b) The reference's path component is appended to the buffer 2100 * string. 2101 */ 2102 if (ref->path != NULL && ref->path[0] != 0) { 2103 indx = 0; 2104 /* 2105 * Ensure the path includes a '/' 2106 */ 2107 if ((out == 0) && (bas->server != NULL)) 2108 res->path[out++] = '/'; 2109 while (ref->path[indx] != 0) { 2110 res->path[out++] = ref->path[indx++]; 2111 } 2112 } 2113 res->path[out] = 0; 2114 2115 /* 2116 * Steps c) to h) are really path normalization steps 2117 */ 2118 xmlNormalizeURIPath(res->path); 2119 2120 step_7: 2121 2122 /* 2123 * 7) The resulting URI components, including any inherited from the 2124 * base URI, are recombined to give the absolute form of the URI 2125 * reference. 2126 */ 2127 val = xmlSaveUri(res); 2128 2129 done: 2130 if (ref != NULL) 2131 xmlFreeURI(ref); 2132 if (bas != NULL) 2133 xmlFreeURI(bas); 2134 if (res != NULL) 2135 xmlFreeURI(res); 2136 return(val); 2137 } 2138 2139 /** 2140 * xmlBuildRelativeURI: 2141 * @URI: the URI reference under consideration 2142 * @base: the base value 2143 * 2144 * Expresses the URI of the reference in terms relative to the 2145 * base. Some examples of this operation include: 2146 * base = "http://site1.com/docs/book1.html" 2147 * URI input URI returned 2148 * docs/pic1.gif pic1.gif 2149 * docs/img/pic1.gif img/pic1.gif 2150 * img/pic1.gif ../img/pic1.gif 2151 * http://site1.com/docs/pic1.gif pic1.gif 2152 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif 2153 * 2154 * base = "docs/book1.html" 2155 * URI input URI returned 2156 * docs/pic1.gif pic1.gif 2157 * docs/img/pic1.gif img/pic1.gif 2158 * img/pic1.gif ../img/pic1.gif 2159 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif 2160 * 2161 * 2162 * Note: if the URI reference is really weird or complicated, it may be 2163 * worthwhile to first convert it into a "nice" one by calling 2164 * xmlBuildURI (using 'base') before calling this routine, 2165 * since this routine (for reasonable efficiency) assumes URI has 2166 * already been through some validation. 2167 * 2168 * Returns a new URI string (to be freed by the caller) or NULL in case 2169 * error. 2170 */ 2171 xmlChar * 2172 xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base) 2173 { 2174 xmlChar *val = NULL; 2175 int ret; 2176 int ix; 2177 int nbslash = 0; 2178 int len; 2179 xmlURIPtr ref = NULL; 2180 xmlURIPtr bas = NULL; 2181 xmlChar *bptr, *uptr, *vptr; 2182 int remove_path = 0; 2183 2184 if ((URI == NULL) || (*URI == 0)) 2185 return NULL; 2186 2187 /* 2188 * First parse URI into a standard form 2189 */ 2190 ref = xmlCreateURI (); 2191 if (ref == NULL) 2192 return NULL; 2193 /* If URI not already in "relative" form */ 2194 if (URI[0] != '.') { 2195 ret = xmlParseURIReference (ref, (const char *) URI); 2196 if (ret != 0) 2197 goto done; /* Error in URI, return NULL */ 2198 } else 2199 ref->path = (char *)xmlStrdup(URI); 2200 2201 /* 2202 * Next parse base into the same standard form 2203 */ 2204 if ((base == NULL) || (*base == 0)) { 2205 val = xmlStrdup (URI); 2206 goto done; 2207 } 2208 bas = xmlCreateURI (); 2209 if (bas == NULL) 2210 goto done; 2211 if (base[0] != '.') { 2212 ret = xmlParseURIReference (bas, (const char *) base); 2213 if (ret != 0) 2214 goto done; /* Error in base, return NULL */ 2215 } else 2216 bas->path = (char *)xmlStrdup(base); 2217 2218 /* 2219 * If the scheme / server on the URI differs from the base, 2220 * just return the URI 2221 */ 2222 if ((ref->scheme != NULL) && 2223 ((bas->scheme == NULL) || 2224 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) || 2225 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) { 2226 val = xmlStrdup (URI); 2227 goto done; 2228 } 2229 if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) { 2230 val = xmlStrdup(BAD_CAST ""); 2231 goto done; 2232 } 2233 if (bas->path == NULL) { 2234 val = xmlStrdup((xmlChar *)ref->path); 2235 goto done; 2236 } 2237 if (ref->path == NULL) { 2238 ref->path = (char *) "/"; 2239 remove_path = 1; 2240 } 2241 2242 /* 2243 * At this point (at last!) we can compare the two paths 2244 * 2245 * First we take care of the special case where either of the 2246 * two path components may be missing (bug 316224) 2247 */ 2248 bptr = (xmlChar *)bas->path; 2249 { 2250 xmlChar *rptr = (xmlChar *) ref->path; 2251 int pos = 0; 2252 2253 /* 2254 * Next we compare the two strings and find where they first differ 2255 */ 2256 if ((*rptr == '.') && (rptr[1] == '/')) 2257 rptr += 2; 2258 if ((*bptr == '.') && (bptr[1] == '/')) 2259 bptr += 2; 2260 else if ((*bptr == '/') && (*rptr != '/')) 2261 bptr++; 2262 while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0)) 2263 pos++; 2264 2265 if (bptr[pos] == rptr[pos]) { 2266 val = xmlStrdup(BAD_CAST ""); 2267 goto done; /* (I can't imagine why anyone would do this) */ 2268 } 2269 2270 /* 2271 * In URI, "back up" to the last '/' encountered. This will be the 2272 * beginning of the "unique" suffix of URI 2273 */ 2274 ix = pos; 2275 for (; ix > 0; ix--) { 2276 if (rptr[ix - 1] == '/') 2277 break; 2278 } 2279 uptr = (xmlChar *)&rptr[ix]; 2280 2281 /* 2282 * In base, count the number of '/' from the differing point 2283 */ 2284 for (; bptr[ix] != 0; ix++) { 2285 if (bptr[ix] == '/') 2286 nbslash++; 2287 } 2288 2289 /* 2290 * e.g: URI="foo/" base="foo/bar" -> "./" 2291 */ 2292 if (nbslash == 0 && !uptr[0]) { 2293 val = xmlStrdup(BAD_CAST "./"); 2294 goto done; 2295 } 2296 2297 len = xmlStrlen (uptr) + 1; 2298 } 2299 2300 if (nbslash == 0) { 2301 if (uptr != NULL) 2302 /* exception characters from xmlSaveUri */ 2303 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,"); 2304 goto done; 2305 } 2306 2307 /* 2308 * Allocate just enough space for the returned string - 2309 * length of the remainder of the URI, plus enough space 2310 * for the "../" groups, plus one for the terminator 2311 */ 2312 val = (xmlChar *) xmlMalloc (len + 3 * nbslash); 2313 if (val == NULL) { 2314 xmlURIErrMemory("building relative URI\n"); 2315 goto done; 2316 } 2317 vptr = val; 2318 /* 2319 * Put in as many "../" as needed 2320 */ 2321 for (; nbslash>0; nbslash--) { 2322 *vptr++ = '.'; 2323 *vptr++ = '.'; 2324 *vptr++ = '/'; 2325 } 2326 /* 2327 * Finish up with the end of the URI 2328 */ 2329 if (uptr != NULL) { 2330 if ((vptr > val) && (len > 0) && 2331 (uptr[0] == '/') && (vptr[-1] == '/')) { 2332 memcpy (vptr, uptr + 1, len - 1); 2333 vptr[len - 2] = 0; 2334 } else { 2335 memcpy (vptr, uptr, len); 2336 vptr[len - 1] = 0; 2337 } 2338 } else { 2339 vptr[len - 1] = 0; 2340 } 2341 2342 /* escape the freshly-built path */ 2343 vptr = val; 2344 /* exception characters from xmlSaveUri */ 2345 val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,"); 2346 xmlFree(vptr); 2347 2348 done: 2349 /* 2350 * Free the working variables 2351 */ 2352 if (remove_path != 0) 2353 ref->path = NULL; 2354 if (ref != NULL) 2355 xmlFreeURI (ref); 2356 if (bas != NULL) 2357 xmlFreeURI (bas); 2358 2359 return val; 2360 } 2361 2362 /** 2363 * xmlCanonicPath: 2364 * @path: the resource locator in a filesystem notation 2365 * 2366 * Constructs a canonic path from the specified path. 2367 * 2368 * Returns a new canonic path, or a duplicate of the path parameter if the 2369 * construction fails. The caller is responsible for freeing the memory occupied 2370 * by the returned string. If there is insufficient memory available, or the 2371 * argument is NULL, the function returns NULL. 2372 */ 2373 #define IS_WINDOWS_PATH(p) \ 2374 ((p != NULL) && \ 2375 (((p[0] >= 'a') && (p[0] <= 'z')) || \ 2376 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \ 2377 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\'))) 2378 xmlChar * 2379 xmlCanonicPath(const xmlChar *path) 2380 { 2381 /* 2382 * For Windows implementations, additional work needs to be done to 2383 * replace backslashes in pathnames with "forward slashes" 2384 */ 2385 #if defined(_WIN32) && !defined(__CYGWIN__) 2386 int len = 0; 2387 char *p = NULL; 2388 #endif 2389 xmlURIPtr uri; 2390 xmlChar *ret; 2391 const xmlChar *absuri; 2392 2393 if (path == NULL) 2394 return(NULL); 2395 2396 #if defined(_WIN32) 2397 /* 2398 * We must not change the backslashes to slashes if the the path 2399 * starts with \\?\ 2400 * Those paths can be up to 32k characters long. 2401 * Was added specifically for OpenOffice, those paths can't be converted 2402 * to URIs anyway. 2403 */ 2404 if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') && 2405 (path[3] == '\\') ) 2406 return xmlStrdup((const xmlChar *) path); 2407 #endif 2408 2409 /* sanitize filename starting with // so it can be used as URI */ 2410 if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/')) 2411 path++; 2412 2413 if ((uri = xmlParseURI((const char *) path)) != NULL) { 2414 xmlFreeURI(uri); 2415 return xmlStrdup(path); 2416 } 2417 2418 /* Check if this is an "absolute uri" */ 2419 absuri = xmlStrstr(path, BAD_CAST "://"); 2420 if (absuri != NULL) { 2421 int l, j; 2422 unsigned char c; 2423 xmlChar *escURI; 2424 2425 /* 2426 * this looks like an URI where some parts have not been 2427 * escaped leading to a parsing problem. Check that the first 2428 * part matches a protocol. 2429 */ 2430 l = absuri - path; 2431 /* Bypass if first part (part before the '://') is > 20 chars */ 2432 if ((l <= 0) || (l > 20)) 2433 goto path_processing; 2434 /* Bypass if any non-alpha characters are present in first part */ 2435 for (j = 0;j < l;j++) { 2436 c = path[j]; 2437 if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')))) 2438 goto path_processing; 2439 } 2440 2441 /* Escape all except the characters specified in the supplied path */ 2442 escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;="); 2443 if (escURI != NULL) { 2444 /* Try parsing the escaped path */ 2445 uri = xmlParseURI((const char *) escURI); 2446 /* If successful, return the escaped string */ 2447 if (uri != NULL) { 2448 xmlFreeURI(uri); 2449 return escURI; 2450 } 2451 xmlFree(escURI); 2452 } 2453 } 2454 2455 path_processing: 2456 /* For Windows implementations, replace backslashes with 'forward slashes' */ 2457 #if defined(_WIN32) && !defined(__CYGWIN__) 2458 /* 2459 * Create a URI structure 2460 */ 2461 uri = xmlCreateURI(); 2462 if (uri == NULL) { /* Guard against 'out of memory' */ 2463 return(NULL); 2464 } 2465 2466 len = xmlStrlen(path); 2467 if ((len > 2) && IS_WINDOWS_PATH(path)) { 2468 /* make the scheme 'file' */ 2469 uri->scheme = (char *) xmlStrdup(BAD_CAST "file"); 2470 /* allocate space for leading '/' + path + string terminator */ 2471 uri->path = xmlMallocAtomic(len + 2); 2472 if (uri->path == NULL) { 2473 xmlFreeURI(uri); /* Guard against 'out of memory' */ 2474 return(NULL); 2475 } 2476 /* Put in leading '/' plus path */ 2477 uri->path[0] = '/'; 2478 p = uri->path + 1; 2479 strncpy(p, (char *) path, len + 1); 2480 } else { 2481 uri->path = (char *) xmlStrdup(path); 2482 if (uri->path == NULL) { 2483 xmlFreeURI(uri); 2484 return(NULL); 2485 } 2486 p = uri->path; 2487 } 2488 /* Now change all occurrences of '\' to '/' */ 2489 while (*p != '\0') { 2490 if (*p == '\\') 2491 *p = '/'; 2492 p++; 2493 } 2494 2495 if (uri->scheme == NULL) { 2496 ret = xmlStrdup((const xmlChar *) uri->path); 2497 } else { 2498 ret = xmlSaveUri(uri); 2499 } 2500 2501 xmlFreeURI(uri); 2502 #else 2503 ret = xmlStrdup((const xmlChar *) path); 2504 #endif 2505 return(ret); 2506 } 2507 2508 /** 2509 * xmlPathToURI: 2510 * @path: the resource locator in a filesystem notation 2511 * 2512 * Constructs an URI expressing the existing path 2513 * 2514 * Returns a new URI, or a duplicate of the path parameter if the 2515 * construction fails. The caller is responsible for freeing the memory 2516 * occupied by the returned string. If there is insufficient memory available, 2517 * or the argument is NULL, the function returns NULL. 2518 */ 2519 xmlChar * 2520 xmlPathToURI(const xmlChar *path) 2521 { 2522 xmlURIPtr uri; 2523 xmlURI temp; 2524 xmlChar *ret, *cal; 2525 2526 if (path == NULL) 2527 return(NULL); 2528 2529 if ((uri = xmlParseURI((const char *) path)) != NULL) { 2530 xmlFreeURI(uri); 2531 return xmlStrdup(path); 2532 } 2533 cal = xmlCanonicPath(path); 2534 if (cal == NULL) 2535 return(NULL); 2536 #if defined(_WIN32) && !defined(__CYGWIN__) 2537 /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?) 2538 If 'cal' is a valid URI already then we are done here, as continuing would make 2539 it invalid. */ 2540 if ((uri = xmlParseURI((const char *) cal)) != NULL) { 2541 xmlFreeURI(uri); 2542 return cal; 2543 } 2544 /* 'cal' can contain a relative path with backslashes. If that is processed 2545 by xmlSaveURI, they will be escaped and the external entity loader machinery 2546 will fail. So convert them to slashes. Misuse 'ret' for walking. */ 2547 ret = cal; 2548 while (*ret != '\0') { 2549 if (*ret == '\\') 2550 *ret = '/'; 2551 ret++; 2552 } 2553 #endif 2554 memset(&temp, 0, sizeof(temp)); 2555 temp.path = (char *) cal; 2556 ret = xmlSaveUri(&temp); 2557 xmlFree(cal); 2558 return(ret); 2559 } 2560 #define bottom_uri 2561 #include "elfgcchack.h" 2562