1 /** 2 * uri.c: set of generic URI related routines 3 * 4 * Reference: RFCs 3986, 2732 and 2373 5 * 6 * See Copyright for the status of this software. 7 * 8 * daniel@veillard.com 9 */ 10 11 #define IN_LIBXML 12 #include "libxml.h" 13 14 #include <string.h> 15 16 #include <libxml/xmlmemory.h> 17 #include <libxml/uri.h> 18 #include <libxml/globals.h> 19 #include <libxml/xmlerror.h> 20 21 /** 22 * MAX_URI_LENGTH: 23 * 24 * The definition of the URI regexp in the above RFC has no size limit 25 * In practice they are usually relatively short except for the 26 * data URI scheme as defined in RFC 2397. Even for data URI the usual 27 * maximum size before hitting random practical limits is around 64 KB 28 * and 4KB is usually a maximum admitted limit for proper operations. 29 * The value below is more a security limit than anything else and 30 * really should never be hit by 'normal' operations 31 * Set to 1 MByte in 2012, this is only enforced on output 32 */ 33 #define MAX_URI_LENGTH 1024 * 1024 34 35 static void 36 xmlURIErrMemory(const char *extra) 37 { 38 if (extra) 39 __xmlRaiseError(NULL, NULL, NULL, 40 NULL, NULL, XML_FROM_URI, 41 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, 42 extra, NULL, NULL, 0, 0, 43 "Memory allocation failed : %s\n", extra); 44 else 45 __xmlRaiseError(NULL, NULL, NULL, 46 NULL, NULL, XML_FROM_URI, 47 XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, 48 NULL, NULL, NULL, 0, 0, 49 "Memory allocation failed\n"); 50 } 51 52 static void xmlCleanURI(xmlURIPtr uri); 53 54 /* 55 * Old rule from 2396 used in legacy handling code 56 * alpha = lowalpha | upalpha 57 */ 58 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x)) 59 60 61 /* 62 * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | 63 * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | 64 * "u" | "v" | "w" | "x" | "y" | "z" 65 */ 66 67 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z')) 68 69 /* 70 * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | 71 * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | 72 * "U" | "V" | "W" | "X" | "Y" | "Z" 73 */ 74 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z')) 75 76 #ifdef IS_DIGIT 77 #undef IS_DIGIT 78 #endif 79 /* 80 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" 81 */ 82 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9')) 83 84 /* 85 * alphanum = alpha | digit 86 */ 87 88 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x)) 89 90 /* 91 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" 92 */ 93 94 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \ 95 ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \ 96 ((x) == '(') || ((x) == ')')) 97 98 /* 99 * unwise = "{" | "}" | "|" | "\" | "^" | "`" 100 */ 101 102 #define IS_UNWISE(p) \ 103 (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \ 104 ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \ 105 ((*(p) == ']')) || ((*(p) == '`'))) 106 /* 107 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," | 108 * "[" | "]" 109 */ 110 111 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \ 112 ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \ 113 ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \ 114 ((x) == ']')) 115 116 /* 117 * unreserved = alphanum | mark 118 */ 119 120 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x)) 121 122 /* 123 * Skip to next pointer char, handle escaped sequences 124 */ 125 126 #define NEXT(p) ((*p == '%')? p += 3 : p++) 127 128 /* 129 * Productions from the spec. 130 * 131 * authority = server | reg_name 132 * reg_name = 1*( unreserved | escaped | "$" | "," | 133 * ";" | ":" | "@" | "&" | "=" | "+" ) 134 * 135 * path = [ abs_path | opaque_part ] 136 */ 137 138 #define STRNDUP(s, n) (char *) xmlStrndup((const xmlChar *)(s), (n)) 139 140 /************************************************************************ 141 * * 142 * RFC 3986 parser * 143 * * 144 ************************************************************************/ 145 146 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9')) 147 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \ 148 ((*(p) >= 'A') && (*(p) <= 'Z'))) 149 #define ISA_HEXDIG(p) \ 150 (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \ 151 ((*(p) >= 'A') && (*(p) <= 'F'))) 152 153 /* 154 * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 155 * / "*" / "+" / "," / ";" / "=" 156 */ 157 #define ISA_SUB_DELIM(p) \ 158 (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \ 159 ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \ 160 ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \ 161 ((*(p) == '=')) || ((*(p) == '\''))) 162 163 /* 164 * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" 165 */ 166 #define ISA_GEN_DELIM(p) \ 167 (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \ 168 ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \ 169 ((*(p) == '@'))) 170 171 /* 172 * reserved = gen-delims / sub-delims 173 */ 174 #define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p))) 175 176 /* 177 * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" 178 */ 179 #define ISA_UNRESERVED(p) \ 180 ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \ 181 ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~'))) 182 183 /* 184 * pct-encoded = "%" HEXDIG HEXDIG 185 */ 186 #define ISA_PCT_ENCODED(p) \ 187 ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2))) 188 189 /* 190 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" 191 */ 192 #define ISA_PCHAR(p) \ 193 (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \ 194 ((*(p) == ':')) || ((*(p) == '@'))) 195 196 /** 197 * xmlParse3986Scheme: 198 * @uri: pointer to an URI structure 199 * @str: pointer to the string to analyze 200 * 201 * Parse an URI scheme 202 * 203 * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) 204 * 205 * Returns 0 or the error code 206 */ 207 static int 208 xmlParse3986Scheme(xmlURIPtr uri, const char **str) { 209 const char *cur; 210 211 if (str == NULL) 212 return(-1); 213 214 cur = *str; 215 if (!ISA_ALPHA(cur)) 216 return(2); 217 cur++; 218 while (ISA_ALPHA(cur) || ISA_DIGIT(cur) || 219 (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++; 220 if (uri != NULL) { 221 if (uri->scheme != NULL) xmlFree(uri->scheme); 222 uri->scheme = STRNDUP(*str, cur - *str); 223 } 224 *str = cur; 225 return(0); 226 } 227 228 /** 229 * xmlParse3986Fragment: 230 * @uri: pointer to an URI structure 231 * @str: pointer to the string to analyze 232 * 233 * Parse the query part of an URI 234 * 235 * fragment = *( pchar / "/" / "?" ) 236 * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']' 237 * in the fragment identifier but this is used very broadly for 238 * xpointer scheme selection, so we are allowing it here to not break 239 * for example all the DocBook processing chains. 240 * 241 * Returns 0 or the error code 242 */ 243 static int 244 xmlParse3986Fragment(xmlURIPtr uri, const char **str) 245 { 246 const char *cur; 247 248 if (str == NULL) 249 return (-1); 250 251 cur = *str; 252 253 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') || 254 (*cur == '[') || (*cur == ']') || 255 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) 256 NEXT(cur); 257 if (uri != NULL) { 258 if (uri->fragment != NULL) 259 xmlFree(uri->fragment); 260 if (uri->cleanup & 2) 261 uri->fragment = STRNDUP(*str, cur - *str); 262 else 263 uri->fragment = xmlURIUnescapeString(*str, cur - *str, NULL); 264 } 265 *str = cur; 266 return (0); 267 } 268 269 /** 270 * xmlParse3986Query: 271 * @uri: pointer to an URI structure 272 * @str: pointer to the string to analyze 273 * 274 * Parse the query part of an URI 275 * 276 * query = *uric 277 * 278 * Returns 0 or the error code 279 */ 280 static int 281 xmlParse3986Query(xmlURIPtr uri, const char **str) 282 { 283 const char *cur; 284 285 if (str == NULL) 286 return (-1); 287 288 cur = *str; 289 290 while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') || 291 ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) 292 NEXT(cur); 293 if (uri != NULL) { 294 if (uri->query != NULL) 295 xmlFree(uri->query); 296 if (uri->cleanup & 2) 297 uri->query = STRNDUP(*str, cur - *str); 298 else 299 uri->query = xmlURIUnescapeString(*str, cur - *str, NULL); 300 301 /* Save the raw bytes of the query as well. 302 * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00114 303 */ 304 if (uri->query_raw != NULL) 305 xmlFree (uri->query_raw); 306 uri->query_raw = STRNDUP (*str, cur - *str); 307 } 308 *str = cur; 309 return (0); 310 } 311 312 /** 313 * xmlParse3986Port: 314 * @uri: pointer to an URI structure 315 * @str: the string to analyze 316 * 317 * Parse a port part and fills in the appropriate fields 318 * of the @uri structure 319 * 320 * port = *DIGIT 321 * 322 * Returns 0 or the error code 323 */ 324 static int 325 xmlParse3986Port(xmlURIPtr uri, const char **str) 326 { 327 const char *cur = *str; 328 int port = 0; 329 330 if (ISA_DIGIT(cur)) { 331 while (ISA_DIGIT(cur)) { 332 port = port * 10 + (*cur - '0'); 333 if (port > 99999999) 334 port = 99999999; 335 336 cur++; 337 } 338 if (uri != NULL) 339 uri->port = port; 340 *str = cur; 341 return(0); 342 } 343 return(1); 344 } 345 346 /** 347 * xmlParse3986Userinfo: 348 * @uri: pointer to an URI structure 349 * @str: the string to analyze 350 * 351 * Parse an user informations part and fills in the appropriate fields 352 * of the @uri structure 353 * 354 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) 355 * 356 * Returns 0 or the error code 357 */ 358 static int 359 xmlParse3986Userinfo(xmlURIPtr uri, const char **str) 360 { 361 const char *cur; 362 363 cur = *str; 364 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || 365 ISA_SUB_DELIM(cur) || (*cur == ':')) 366 NEXT(cur); 367 if (*cur == '@') { 368 if (uri != NULL) { 369 if (uri->user != NULL) xmlFree(uri->user); 370 if (uri->cleanup & 2) 371 uri->user = STRNDUP(*str, cur - *str); 372 else 373 uri->user = xmlURIUnescapeString(*str, cur - *str, NULL); 374 } 375 *str = cur; 376 return(0); 377 } 378 return(1); 379 } 380 381 /** 382 * xmlParse3986DecOctet: 383 * @str: the string to analyze 384 * 385 * dec-octet = DIGIT ; 0-9 386 * / %x31-39 DIGIT ; 10-99 387 * / "1" 2DIGIT ; 100-199 388 * / "2" %x30-34 DIGIT ; 200-249 389 * / "25" %x30-35 ; 250-255 390 * 391 * Skip a dec-octet. 392 * 393 * Returns 0 if found and skipped, 1 otherwise 394 */ 395 static int 396 xmlParse3986DecOctet(const char **str) { 397 const char *cur = *str; 398 399 if (!(ISA_DIGIT(cur))) 400 return(1); 401 if (!ISA_DIGIT(cur+1)) 402 cur++; 403 else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2))) 404 cur += 2; 405 else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2))) 406 cur += 3; 407 else if ((*cur == '2') && (*(cur + 1) >= '0') && 408 (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2))) 409 cur += 3; 410 else if ((*cur == '2') && (*(cur + 1) == '5') && 411 (*(cur + 2) >= '0') && (*(cur + 1) <= '5')) 412 cur += 3; 413 else 414 return(1); 415 *str = cur; 416 return(0); 417 } 418 /** 419 * xmlParse3986Host: 420 * @uri: pointer to an URI structure 421 * @str: the string to analyze 422 * 423 * Parse an host part and fills in the appropriate fields 424 * of the @uri structure 425 * 426 * host = IP-literal / IPv4address / reg-name 427 * IP-literal = "[" ( IPv6address / IPvFuture ) "]" 428 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet 429 * reg-name = *( unreserved / pct-encoded / sub-delims ) 430 * 431 * Returns 0 or the error code 432 */ 433 static int 434 xmlParse3986Host(xmlURIPtr uri, const char **str) 435 { 436 const char *cur = *str; 437 const char *host; 438 439 host = cur; 440 /* 441 * IPv6 and future addressing scheme are enclosed between brackets 442 */ 443 if (*cur == '[') { 444 cur++; 445 while ((*cur != ']') && (*cur != 0)) 446 cur++; 447 if (*cur != ']') 448 return(1); 449 cur++; 450 goto found; 451 } 452 /* 453 * try to parse an IPv4 454 */ 455 if (ISA_DIGIT(cur)) { 456 if (xmlParse3986DecOctet(&cur) != 0) 457 goto not_ipv4; 458 if (*cur != '.') 459 goto not_ipv4; 460 cur++; 461 if (xmlParse3986DecOctet(&cur) != 0) 462 goto not_ipv4; 463 if (*cur != '.') 464 goto not_ipv4; 465 if (xmlParse3986DecOctet(&cur) != 0) 466 goto not_ipv4; 467 if (*cur != '.') 468 goto not_ipv4; 469 if (xmlParse3986DecOctet(&cur) != 0) 470 goto not_ipv4; 471 goto found; 472 not_ipv4: 473 cur = *str; 474 } 475 /* 476 * then this should be a hostname which can be empty 477 */ 478 while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur)) 479 NEXT(cur); 480 found: 481 if (uri != NULL) { 482 if (uri->authority != NULL) xmlFree(uri->authority); 483 uri->authority = NULL; 484 if (uri->server != NULL) xmlFree(uri->server); 485 if (cur != host) { 486 if (uri->cleanup & 2) 487 uri->server = STRNDUP(host, cur - host); 488 else 489 uri->server = xmlURIUnescapeString(host, cur - host, NULL); 490 } else 491 uri->server = NULL; 492 } 493 *str = cur; 494 return(0); 495 } 496 497 /** 498 * xmlParse3986Authority: 499 * @uri: pointer to an URI structure 500 * @str: the string to analyze 501 * 502 * Parse an authority part and fills in the appropriate fields 503 * of the @uri structure 504 * 505 * authority = [ userinfo "@" ] host [ ":" port ] 506 * 507 * Returns 0 or the error code 508 */ 509 static int 510 xmlParse3986Authority(xmlURIPtr uri, const char **str) 511 { 512 const char *cur; 513 int ret; 514 515 cur = *str; 516 /* 517 * try to parse an userinfo and check for the trailing @ 518 */ 519 ret = xmlParse3986Userinfo(uri, &cur); 520 if ((ret != 0) || (*cur != '@')) 521 cur = *str; 522 else 523 cur++; 524 ret = xmlParse3986Host(uri, &cur); 525 if (ret != 0) return(ret); 526 if (*cur == ':') { 527 cur++; 528 ret = xmlParse3986Port(uri, &cur); 529 if (ret != 0) return(ret); 530 } 531 *str = cur; 532 return(0); 533 } 534 535 /** 536 * xmlParse3986Segment: 537 * @str: the string to analyze 538 * @forbid: an optional forbidden character 539 * @empty: allow an empty segment 540 * 541 * Parse a segment and fills in the appropriate fields 542 * of the @uri structure 543 * 544 * segment = *pchar 545 * segment-nz = 1*pchar 546 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) 547 * ; non-zero-length segment without any colon ":" 548 * 549 * Returns 0 or the error code 550 */ 551 static int 552 xmlParse3986Segment(const char **str, char forbid, int empty) 553 { 554 const char *cur; 555 556 cur = *str; 557 if (!ISA_PCHAR(cur)) { 558 if (empty) 559 return(0); 560 return(1); 561 } 562 while (ISA_PCHAR(cur) && (*cur != forbid)) 563 NEXT(cur); 564 *str = cur; 565 return (0); 566 } 567 568 /** 569 * xmlParse3986PathAbEmpty: 570 * @uri: pointer to an URI structure 571 * @str: the string to analyze 572 * 573 * Parse an path absolute or empty and fills in the appropriate fields 574 * of the @uri structure 575 * 576 * path-abempty = *( "/" segment ) 577 * 578 * Returns 0 or the error code 579 */ 580 static int 581 xmlParse3986PathAbEmpty(xmlURIPtr uri, const char **str) 582 { 583 const char *cur; 584 int ret; 585 586 cur = *str; 587 588 while (*cur == '/') { 589 cur++; 590 ret = xmlParse3986Segment(&cur, 0, 1); 591 if (ret != 0) return(ret); 592 } 593 if (uri != NULL) { 594 if (uri->path != NULL) xmlFree(uri->path); 595 if (*str != cur) { 596 if (uri->cleanup & 2) 597 uri->path = STRNDUP(*str, cur - *str); 598 else 599 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 600 } else { 601 uri->path = NULL; 602 } 603 } 604 *str = cur; 605 return (0); 606 } 607 608 /** 609 * xmlParse3986PathAbsolute: 610 * @uri: pointer to an URI structure 611 * @str: the string to analyze 612 * 613 * Parse an path absolute and fills in the appropriate fields 614 * of the @uri structure 615 * 616 * path-absolute = "/" [ segment-nz *( "/" segment ) ] 617 * 618 * Returns 0 or the error code 619 */ 620 static int 621 xmlParse3986PathAbsolute(xmlURIPtr uri, const char **str) 622 { 623 const char *cur; 624 int ret; 625 626 cur = *str; 627 628 if (*cur != '/') 629 return(1); 630 cur++; 631 ret = xmlParse3986Segment(&cur, 0, 0); 632 if (ret == 0) { 633 while (*cur == '/') { 634 cur++; 635 ret = xmlParse3986Segment(&cur, 0, 1); 636 if (ret != 0) return(ret); 637 } 638 } 639 if (uri != NULL) { 640 if (uri->path != NULL) xmlFree(uri->path); 641 if (cur != *str) { 642 if (uri->cleanup & 2) 643 uri->path = STRNDUP(*str, cur - *str); 644 else 645 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 646 } else { 647 uri->path = NULL; 648 } 649 } 650 *str = cur; 651 return (0); 652 } 653 654 /** 655 * xmlParse3986PathRootless: 656 * @uri: pointer to an URI structure 657 * @str: the string to analyze 658 * 659 * Parse an path without root and fills in the appropriate fields 660 * of the @uri structure 661 * 662 * path-rootless = segment-nz *( "/" segment ) 663 * 664 * Returns 0 or the error code 665 */ 666 static int 667 xmlParse3986PathRootless(xmlURIPtr uri, const char **str) 668 { 669 const char *cur; 670 int ret; 671 672 cur = *str; 673 674 ret = xmlParse3986Segment(&cur, 0, 0); 675 if (ret != 0) return(ret); 676 while (*cur == '/') { 677 cur++; 678 ret = xmlParse3986Segment(&cur, 0, 1); 679 if (ret != 0) return(ret); 680 } 681 if (uri != NULL) { 682 if (uri->path != NULL) xmlFree(uri->path); 683 if (cur != *str) { 684 if (uri->cleanup & 2) 685 uri->path = STRNDUP(*str, cur - *str); 686 else 687 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 688 } else { 689 uri->path = NULL; 690 } 691 } 692 *str = cur; 693 return (0); 694 } 695 696 /** 697 * xmlParse3986PathNoScheme: 698 * @uri: pointer to an URI structure 699 * @str: the string to analyze 700 * 701 * Parse an path which is not a scheme and fills in the appropriate fields 702 * of the @uri structure 703 * 704 * path-noscheme = segment-nz-nc *( "/" segment ) 705 * 706 * Returns 0 or the error code 707 */ 708 static int 709 xmlParse3986PathNoScheme(xmlURIPtr uri, const char **str) 710 { 711 const char *cur; 712 int ret; 713 714 cur = *str; 715 716 ret = xmlParse3986Segment(&cur, ':', 0); 717 if (ret != 0) return(ret); 718 while (*cur == '/') { 719 cur++; 720 ret = xmlParse3986Segment(&cur, 0, 1); 721 if (ret != 0) return(ret); 722 } 723 if (uri != NULL) { 724 if (uri->path != NULL) xmlFree(uri->path); 725 if (cur != *str) { 726 if (uri->cleanup & 2) 727 uri->path = STRNDUP(*str, cur - *str); 728 else 729 uri->path = xmlURIUnescapeString(*str, cur - *str, NULL); 730 } else { 731 uri->path = NULL; 732 } 733 } 734 *str = cur; 735 return (0); 736 } 737 738 /** 739 * xmlParse3986HierPart: 740 * @uri: pointer to an URI structure 741 * @str: the string to analyze 742 * 743 * Parse an hierarchical part and fills in the appropriate fields 744 * of the @uri structure 745 * 746 * hier-part = "//" authority path-abempty 747 * / path-absolute 748 * / path-rootless 749 * / path-empty 750 * 751 * Returns 0 or the error code 752 */ 753 static int 754 xmlParse3986HierPart(xmlURIPtr uri, const char **str) 755 { 756 const char *cur; 757 int ret; 758 759 cur = *str; 760 761 if ((*cur == '/') && (*(cur + 1) == '/')) { 762 cur += 2; 763 ret = xmlParse3986Authority(uri, &cur); 764 if (ret != 0) return(ret); 765 if (uri->server == NULL) 766 uri->port = -1; 767 ret = xmlParse3986PathAbEmpty(uri, &cur); 768 if (ret != 0) return(ret); 769 *str = cur; 770 return(0); 771 } else if (*cur == '/') { 772 ret = xmlParse3986PathAbsolute(uri, &cur); 773 if (ret != 0) return(ret); 774 } else if (ISA_PCHAR(cur)) { 775 ret = xmlParse3986PathRootless(uri, &cur); 776 if (ret != 0) return(ret); 777 } else { 778 /* path-empty is effectively empty */ 779 if (uri != NULL) { 780 if (uri->path != NULL) xmlFree(uri->path); 781 uri->path = NULL; 782 } 783 } 784 *str = cur; 785 return (0); 786 } 787 788 /** 789 * xmlParse3986RelativeRef: 790 * @uri: pointer to an URI structure 791 * @str: the string to analyze 792 * 793 * Parse an URI string and fills in the appropriate fields 794 * of the @uri structure 795 * 796 * relative-ref = relative-part [ "?" query ] [ "#" fragment ] 797 * relative-part = "//" authority path-abempty 798 * / path-absolute 799 * / path-noscheme 800 * / path-empty 801 * 802 * Returns 0 or the error code 803 */ 804 static int 805 xmlParse3986RelativeRef(xmlURIPtr uri, const char *str) { 806 int ret; 807 808 if ((*str == '/') && (*(str + 1) == '/')) { 809 str += 2; 810 ret = xmlParse3986Authority(uri, &str); 811 if (ret != 0) return(ret); 812 ret = xmlParse3986PathAbEmpty(uri, &str); 813 if (ret != 0) return(ret); 814 } else if (*str == '/') { 815 ret = xmlParse3986PathAbsolute(uri, &str); 816 if (ret != 0) return(ret); 817 } else if (ISA_PCHAR(str)) { 818 ret = xmlParse3986PathNoScheme(uri, &str); 819 if (ret != 0) return(ret); 820 } else { 821 /* path-empty is effectively empty */ 822 if (uri != NULL) { 823 if (uri->path != NULL) xmlFree(uri->path); 824 uri->path = NULL; 825 } 826 } 827 828 if (*str == '?') { 829 str++; 830 ret = xmlParse3986Query(uri, &str); 831 if (ret != 0) return(ret); 832 } 833 if (*str == '#') { 834 str++; 835 ret = xmlParse3986Fragment(uri, &str); 836 if (ret != 0) return(ret); 837 } 838 if (*str != 0) { 839 xmlCleanURI(uri); 840 return(1); 841 } 842 return(0); 843 } 844 845 846 /** 847 * xmlParse3986URI: 848 * @uri: pointer to an URI structure 849 * @str: the string to analyze 850 * 851 * Parse an URI string and fills in the appropriate fields 852 * of the @uri structure 853 * 854 * scheme ":" hier-part [ "?" query ] [ "#" fragment ] 855 * 856 * Returns 0 or the error code 857 */ 858 static int 859 xmlParse3986URI(xmlURIPtr uri, const char *str) { 860 int ret; 861 862 ret = xmlParse3986Scheme(uri, &str); 863 if (ret != 0) return(ret); 864 if (*str != ':') { 865 return(1); 866 } 867 str++; 868 ret = xmlParse3986HierPart(uri, &str); 869 if (ret != 0) return(ret); 870 if (*str == '?') { 871 str++; 872 ret = xmlParse3986Query(uri, &str); 873 if (ret != 0) return(ret); 874 } 875 if (*str == '#') { 876 str++; 877 ret = xmlParse3986Fragment(uri, &str); 878 if (ret != 0) return(ret); 879 } 880 if (*str != 0) { 881 xmlCleanURI(uri); 882 return(1); 883 } 884 return(0); 885 } 886 887 /** 888 * xmlParse3986URIReference: 889 * @uri: pointer to an URI structure 890 * @str: the string to analyze 891 * 892 * Parse an URI reference string and fills in the appropriate fields 893 * of the @uri structure 894 * 895 * URI-reference = URI / relative-ref 896 * 897 * Returns 0 or the error code 898 */ 899 static int 900 xmlParse3986URIReference(xmlURIPtr uri, const char *str) { 901 int ret; 902 903 if (str == NULL) 904 return(-1); 905 xmlCleanURI(uri); 906 907 /* 908 * Try first to parse absolute refs, then fallback to relative if 909 * it fails. 910 */ 911 ret = xmlParse3986URI(uri, str); 912 if (ret != 0) { 913 xmlCleanURI(uri); 914 ret = xmlParse3986RelativeRef(uri, str); 915 if (ret != 0) { 916 xmlCleanURI(uri); 917 return(ret); 918 } 919 } 920 return(0); 921 } 922 923 /** 924 * xmlParseURI: 925 * @str: the URI string to analyze 926 * 927 * Parse an URI based on RFC 3986 928 * 929 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] 930 * 931 * Returns a newly built xmlURIPtr or NULL in case of error 932 */ 933 xmlURIPtr 934 xmlParseURI(const char *str) { 935 xmlURIPtr uri; 936 int ret; 937 938 if (str == NULL) 939 return(NULL); 940 uri = xmlCreateURI(); 941 if (uri != NULL) { 942 ret = xmlParse3986URIReference(uri, str); 943 if (ret) { 944 xmlFreeURI(uri); 945 return(NULL); 946 } 947 } 948 return(uri); 949 } 950 951 /** 952 * xmlParseURIReference: 953 * @uri: pointer to an URI structure 954 * @str: the string to analyze 955 * 956 * Parse an URI reference string based on RFC 3986 and fills in the 957 * appropriate fields of the @uri structure 958 * 959 * URI-reference = URI / relative-ref 960 * 961 * Returns 0 or the error code 962 */ 963 int 964 xmlParseURIReference(xmlURIPtr uri, const char *str) { 965 return(xmlParse3986URIReference(uri, str)); 966 } 967 968 /** 969 * xmlParseURIRaw: 970 * @str: the URI string to analyze 971 * @raw: if 1 unescaping of URI pieces are disabled 972 * 973 * Parse an URI but allows to keep intact the original fragments. 974 * 975 * URI-reference = URI / relative-ref 976 * 977 * Returns a newly built xmlURIPtr or NULL in case of error 978 */ 979 xmlURIPtr 980 xmlParseURIRaw(const char *str, int raw) { 981 xmlURIPtr uri; 982 int ret; 983 984 if (str == NULL) 985 return(NULL); 986 uri = xmlCreateURI(); 987 if (uri != NULL) { 988 if (raw) { 989 uri->cleanup |= 2; 990 } 991 ret = xmlParseURIReference(uri, str); 992 if (ret) { 993 xmlFreeURI(uri); 994 return(NULL); 995 } 996 } 997 return(uri); 998 } 999 1000 /************************************************************************ 1001 * * 1002 * Generic URI structure functions * 1003 * * 1004 ************************************************************************/ 1005 1006 /** 1007 * xmlCreateURI: 1008 * 1009 * Simply creates an empty xmlURI 1010 * 1011 * Returns the new structure or NULL in case of error 1012 */ 1013 xmlURIPtr 1014 xmlCreateURI(void) { 1015 xmlURIPtr ret; 1016 1017 ret = (xmlURIPtr) xmlMalloc(sizeof(xmlURI)); 1018 if (ret == NULL) { 1019 xmlURIErrMemory("creating URI structure\n"); 1020 return(NULL); 1021 } 1022 memset(ret, 0, sizeof(xmlURI)); 1023 return(ret); 1024 } 1025 1026 /** 1027 * xmlSaveUriRealloc: 1028 * 1029 * Function to handle properly a reallocation when saving an URI 1030 * Also imposes some limit on the length of an URI string output 1031 */ 1032 static xmlChar * 1033 xmlSaveUriRealloc(xmlChar *ret, int *max) { 1034 xmlChar *temp; 1035 int tmp; 1036 1037 if (*max > MAX_URI_LENGTH) { 1038 xmlURIErrMemory("reaching arbitrary MAX_URI_LENGTH limit\n"); 1039 return(NULL); 1040 } 1041 tmp = *max * 2; 1042 temp = (xmlChar *) xmlRealloc(ret, (tmp + 1)); 1043 if (temp == NULL) { 1044 xmlURIErrMemory("saving URI\n"); 1045 return(NULL); 1046 } 1047 *max = tmp; 1048 return(temp); 1049 } 1050 1051 /** 1052 * xmlSaveUri: 1053 * @uri: pointer to an xmlURI 1054 * 1055 * Save the URI as an escaped string 1056 * 1057 * Returns a new string (to be deallocated by caller) 1058 */ 1059 xmlChar * 1060 xmlSaveUri(xmlURIPtr uri) { 1061 xmlChar *ret = NULL; 1062 xmlChar *temp; 1063 const char *p; 1064 int len; 1065 int max; 1066 1067 if (uri == NULL) return(NULL); 1068 1069 1070 max = 80; 1071 ret = (xmlChar *) xmlMallocAtomic((max + 1) * sizeof(xmlChar)); 1072 if (ret == NULL) { 1073 xmlURIErrMemory("saving URI\n"); 1074 return(NULL); 1075 } 1076 len = 0; 1077 1078 if (uri->scheme != NULL) { 1079 p = uri->scheme; 1080 while (*p != 0) { 1081 if (len >= max) { 1082 temp = xmlSaveUriRealloc(ret, &max); 1083 if (temp == NULL) goto mem_error; 1084 ret = temp; 1085 } 1086 ret[len++] = *p++; 1087 } 1088 if (len >= max) { 1089 temp = xmlSaveUriRealloc(ret, &max); 1090 if (temp == NULL) goto mem_error; 1091 ret = temp; 1092 } 1093 ret[len++] = ':'; 1094 } 1095 if (uri->opaque != NULL) { 1096 p = uri->opaque; 1097 while (*p != 0) { 1098 if (len + 3 >= max) { 1099 temp = xmlSaveUriRealloc(ret, &max); 1100 if (temp == NULL) goto mem_error; 1101 ret = temp; 1102 } 1103 if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p))) 1104 ret[len++] = *p++; 1105 else { 1106 int val = *(unsigned char *)p++; 1107 int hi = val / 0x10, lo = val % 0x10; 1108 ret[len++] = '%'; 1109 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1110 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1111 } 1112 } 1113 } else { 1114 if ((uri->server != NULL) || (uri->port == -1)) { 1115 if (len + 3 >= max) { 1116 temp = xmlSaveUriRealloc(ret, &max); 1117 if (temp == NULL) goto mem_error; 1118 ret = temp; 1119 } 1120 ret[len++] = '/'; 1121 ret[len++] = '/'; 1122 if (uri->user != NULL) { 1123 p = uri->user; 1124 while (*p != 0) { 1125 if (len + 3 >= max) { 1126 temp = xmlSaveUriRealloc(ret, &max); 1127 if (temp == NULL) goto mem_error; 1128 ret = temp; 1129 } 1130 if ((IS_UNRESERVED(*(p))) || 1131 ((*(p) == ';')) || ((*(p) == ':')) || 1132 ((*(p) == '&')) || ((*(p) == '=')) || 1133 ((*(p) == '+')) || ((*(p) == '$')) || 1134 ((*(p) == ','))) 1135 ret[len++] = *p++; 1136 else { 1137 int val = *(unsigned char *)p++; 1138 int hi = val / 0x10, lo = val % 0x10; 1139 ret[len++] = '%'; 1140 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1141 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1142 } 1143 } 1144 if (len + 3 >= max) { 1145 temp = xmlSaveUriRealloc(ret, &max); 1146 if (temp == NULL) goto mem_error; 1147 ret = temp; 1148 } 1149 ret[len++] = '@'; 1150 } 1151 if (uri->server != NULL) { 1152 p = uri->server; 1153 while (*p != 0) { 1154 if (len >= max) { 1155 temp = xmlSaveUriRealloc(ret, &max); 1156 if (temp == NULL) goto mem_error; 1157 ret = temp; 1158 } 1159 ret[len++] = *p++; 1160 } 1161 if (uri->port > 0) { 1162 if (len + 10 >= max) { 1163 temp = xmlSaveUriRealloc(ret, &max); 1164 if (temp == NULL) goto mem_error; 1165 ret = temp; 1166 } 1167 len += snprintf((char *) &ret[len], max - len, ":%d", uri->port); 1168 } 1169 } 1170 } else if (uri->authority != NULL) { 1171 if (len + 3 >= max) { 1172 temp = xmlSaveUriRealloc(ret, &max); 1173 if (temp == NULL) goto mem_error; 1174 ret = temp; 1175 } 1176 ret[len++] = '/'; 1177 ret[len++] = '/'; 1178 p = uri->authority; 1179 while (*p != 0) { 1180 if (len + 3 >= max) { 1181 temp = xmlSaveUriRealloc(ret, &max); 1182 if (temp == NULL) goto mem_error; 1183 ret = temp; 1184 } 1185 if ((IS_UNRESERVED(*(p))) || 1186 ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || 1187 ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || 1188 ((*(p) == '=')) || ((*(p) == '+'))) 1189 ret[len++] = *p++; 1190 else { 1191 int val = *(unsigned char *)p++; 1192 int hi = val / 0x10, lo = val % 0x10; 1193 ret[len++] = '%'; 1194 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1195 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1196 } 1197 } 1198 } else if (uri->scheme != NULL) { 1199 if (len + 3 >= max) { 1200 temp = xmlSaveUriRealloc(ret, &max); 1201 if (temp == NULL) goto mem_error; 1202 ret = temp; 1203 } 1204 } 1205 if (uri->path != NULL) { 1206 p = uri->path; 1207 /* 1208 * the colon in file:///d: should not be escaped or 1209 * Windows accesses fail later. 1210 */ 1211 if ((uri->scheme != NULL) && 1212 (p[0] == '/') && 1213 (((p[1] >= 'a') && (p[1] <= 'z')) || 1214 ((p[1] >= 'A') && (p[1] <= 'Z'))) && 1215 (p[2] == ':') && 1216 (xmlStrEqual(BAD_CAST uri->scheme, BAD_CAST "file"))) { 1217 if (len + 3 >= max) { 1218 temp = xmlSaveUriRealloc(ret, &max); 1219 if (temp == NULL) goto mem_error; 1220 ret = temp; 1221 } 1222 ret[len++] = *p++; 1223 ret[len++] = *p++; 1224 ret[len++] = *p++; 1225 } 1226 while (*p != 0) { 1227 if (len + 3 >= max) { 1228 temp = xmlSaveUriRealloc(ret, &max); 1229 if (temp == NULL) goto mem_error; 1230 ret = temp; 1231 } 1232 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) || 1233 ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || 1234 ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || 1235 ((*(p) == ','))) 1236 ret[len++] = *p++; 1237 else { 1238 int val = *(unsigned char *)p++; 1239 int hi = val / 0x10, lo = val % 0x10; 1240 ret[len++] = '%'; 1241 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1242 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1243 } 1244 } 1245 } 1246 if (uri->query_raw != NULL) { 1247 if (len + 1 >= max) { 1248 temp = xmlSaveUriRealloc(ret, &max); 1249 if (temp == NULL) goto mem_error; 1250 ret = temp; 1251 } 1252 ret[len++] = '?'; 1253 p = uri->query_raw; 1254 while (*p != 0) { 1255 if (len + 1 >= max) { 1256 temp = xmlSaveUriRealloc(ret, &max); 1257 if (temp == NULL) goto mem_error; 1258 ret = temp; 1259 } 1260 ret[len++] = *p++; 1261 } 1262 } else if (uri->query != NULL) { 1263 if (len + 3 >= max) { 1264 temp = xmlSaveUriRealloc(ret, &max); 1265 if (temp == NULL) goto mem_error; 1266 ret = temp; 1267 } 1268 ret[len++] = '?'; 1269 p = uri->query; 1270 while (*p != 0) { 1271 if (len + 3 >= max) { 1272 temp = xmlSaveUriRealloc(ret, &max); 1273 if (temp == NULL) goto mem_error; 1274 ret = temp; 1275 } 1276 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) 1277 ret[len++] = *p++; 1278 else { 1279 int val = *(unsigned char *)p++; 1280 int hi = val / 0x10, lo = val % 0x10; 1281 ret[len++] = '%'; 1282 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1283 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1284 } 1285 } 1286 } 1287 } 1288 if (uri->fragment != NULL) { 1289 if (len + 3 >= max) { 1290 temp = xmlSaveUriRealloc(ret, &max); 1291 if (temp == NULL) goto mem_error; 1292 ret = temp; 1293 } 1294 ret[len++] = '#'; 1295 p = uri->fragment; 1296 while (*p != 0) { 1297 if (len + 3 >= max) { 1298 temp = xmlSaveUriRealloc(ret, &max); 1299 if (temp == NULL) goto mem_error; 1300 ret = temp; 1301 } 1302 if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) 1303 ret[len++] = *p++; 1304 else { 1305 int val = *(unsigned char *)p++; 1306 int hi = val / 0x10, lo = val % 0x10; 1307 ret[len++] = '%'; 1308 ret[len++] = hi + (hi > 9? 'A'-10 : '0'); 1309 ret[len++] = lo + (lo > 9? 'A'-10 : '0'); 1310 } 1311 } 1312 } 1313 if (len >= max) { 1314 temp = xmlSaveUriRealloc(ret, &max); 1315 if (temp == NULL) goto mem_error; 1316 ret = temp; 1317 } 1318 ret[len] = 0; 1319 return(ret); 1320 1321 mem_error: 1322 xmlFree(ret); 1323 return(NULL); 1324 } 1325 1326 /** 1327 * xmlPrintURI: 1328 * @stream: a FILE* for the output 1329 * @uri: pointer to an xmlURI 1330 * 1331 * Prints the URI in the stream @stream. 1332 */ 1333 void 1334 xmlPrintURI(FILE *stream, xmlURIPtr uri) { 1335 xmlChar *out; 1336 1337 out = xmlSaveUri(uri); 1338 if (out != NULL) { 1339 fprintf(stream, "%s", (char *) out); 1340 xmlFree(out); 1341 } 1342 } 1343 1344 /** 1345 * xmlCleanURI: 1346 * @uri: pointer to an xmlURI 1347 * 1348 * Make sure the xmlURI struct is free of content 1349 */ 1350 static void 1351 xmlCleanURI(xmlURIPtr uri) { 1352 if (uri == NULL) return; 1353 1354 if (uri->scheme != NULL) xmlFree(uri->scheme); 1355 uri->scheme = NULL; 1356 if (uri->server != NULL) xmlFree(uri->server); 1357 uri->server = NULL; 1358 if (uri->user != NULL) xmlFree(uri->user); 1359 uri->user = NULL; 1360 if (uri->path != NULL) xmlFree(uri->path); 1361 uri->path = NULL; 1362 if (uri->fragment != NULL) xmlFree(uri->fragment); 1363 uri->fragment = NULL; 1364 if (uri->opaque != NULL) xmlFree(uri->opaque); 1365 uri->opaque = NULL; 1366 if (uri->authority != NULL) xmlFree(uri->authority); 1367 uri->authority = NULL; 1368 if (uri->query != NULL) xmlFree(uri->query); 1369 uri->query = NULL; 1370 if (uri->query_raw != NULL) xmlFree(uri->query_raw); 1371 uri->query_raw = NULL; 1372 } 1373 1374 /** 1375 * xmlFreeURI: 1376 * @uri: pointer to an xmlURI 1377 * 1378 * Free up the xmlURI struct 1379 */ 1380 void 1381 xmlFreeURI(xmlURIPtr uri) { 1382 if (uri == NULL) return; 1383 1384 if (uri->scheme != NULL) xmlFree(uri->scheme); 1385 if (uri->server != NULL) xmlFree(uri->server); 1386 if (uri->user != NULL) xmlFree(uri->user); 1387 if (uri->path != NULL) xmlFree(uri->path); 1388 if (uri->fragment != NULL) xmlFree(uri->fragment); 1389 if (uri->opaque != NULL) xmlFree(uri->opaque); 1390 if (uri->authority != NULL) xmlFree(uri->authority); 1391 if (uri->query != NULL) xmlFree(uri->query); 1392 if (uri->query_raw != NULL) xmlFree(uri->query_raw); 1393 xmlFree(uri); 1394 } 1395 1396 /************************************************************************ 1397 * * 1398 * Helper functions * 1399 * * 1400 ************************************************************************/ 1401 1402 /** 1403 * xmlNormalizeURIPath: 1404 * @path: pointer to the path string 1405 * 1406 * Applies the 5 normalization steps to a path string--that is, RFC 2396 1407 * Section 5.2, steps 6.c through 6.g. 1408 * 1409 * Normalization occurs directly on the string, no new allocation is done 1410 * 1411 * Returns 0 or an error code 1412 */ 1413 int 1414 xmlNormalizeURIPath(char *path) { 1415 char *cur, *out; 1416 1417 if (path == NULL) 1418 return(-1); 1419 1420 /* Skip all initial "/" chars. We want to get to the beginning of the 1421 * first non-empty segment. 1422 */ 1423 cur = path; 1424 while (cur[0] == '/') 1425 ++cur; 1426 if (cur[0] == '\0') 1427 return(0); 1428 1429 /* Keep everything we've seen so far. */ 1430 out = cur; 1431 1432 /* 1433 * Analyze each segment in sequence for cases (c) and (d). 1434 */ 1435 while (cur[0] != '\0') { 1436 /* 1437 * c) All occurrences of "./", where "." is a complete path segment, 1438 * are removed from the buffer string. 1439 */ 1440 if ((cur[0] == '.') && (cur[1] == '/')) { 1441 cur += 2; 1442 /* '//' normalization should be done at this point too */ 1443 while (cur[0] == '/') 1444 cur++; 1445 continue; 1446 } 1447 1448 /* 1449 * d) If the buffer string ends with "." as a complete path segment, 1450 * that "." is removed. 1451 */ 1452 if ((cur[0] == '.') && (cur[1] == '\0')) 1453 break; 1454 1455 /* Otherwise keep the segment. */ 1456 while (cur[0] != '/') { 1457 if (cur[0] == '\0') 1458 goto done_cd; 1459 (out++)[0] = (cur++)[0]; 1460 } 1461 /* normalize // */ 1462 while ((cur[0] == '/') && (cur[1] == '/')) 1463 cur++; 1464 1465 (out++)[0] = (cur++)[0]; 1466 } 1467 done_cd: 1468 out[0] = '\0'; 1469 1470 /* Reset to the beginning of the first segment for the next sequence. */ 1471 cur = path; 1472 while (cur[0] == '/') 1473 ++cur; 1474 if (cur[0] == '\0') 1475 return(0); 1476 1477 /* 1478 * Analyze each segment in sequence for cases (e) and (f). 1479 * 1480 * e) All occurrences of "<segment>/../", where <segment> is a 1481 * complete path segment not equal to "..", are removed from the 1482 * buffer string. Removal of these path segments is performed 1483 * iteratively, removing the leftmost matching pattern on each 1484 * iteration, until no matching pattern remains. 1485 * 1486 * f) If the buffer string ends with "<segment>/..", where <segment> 1487 * is a complete path segment not equal to "..", that 1488 * "<segment>/.." is removed. 1489 * 1490 * To satisfy the "iterative" clause in (e), we need to collapse the 1491 * string every time we find something that needs to be removed. Thus, 1492 * we don't need to keep two pointers into the string: we only need a 1493 * "current position" pointer. 1494 */ 1495 while (1) { 1496 char *segp, *tmp; 1497 1498 /* At the beginning of each iteration of this loop, "cur" points to 1499 * the first character of the segment we want to examine. 1500 */ 1501 1502 /* Find the end of the current segment. */ 1503 segp = cur; 1504 while ((segp[0] != '/') && (segp[0] != '\0')) 1505 ++segp; 1506 1507 /* If this is the last segment, we're done (we need at least two 1508 * segments to meet the criteria for the (e) and (f) cases). 1509 */ 1510 if (segp[0] == '\0') 1511 break; 1512 1513 /* If the first segment is "..", or if the next segment _isn't_ "..", 1514 * keep this segment and try the next one. 1515 */ 1516 ++segp; 1517 if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3)) 1518 || ((segp[0] != '.') || (segp[1] != '.') 1519 || ((segp[2] != '/') && (segp[2] != '\0')))) { 1520 cur = segp; 1521 continue; 1522 } 1523 1524 /* If we get here, remove this segment and the next one and back up 1525 * to the previous segment (if there is one), to implement the 1526 * "iteratively" clause. It's pretty much impossible to back up 1527 * while maintaining two pointers into the buffer, so just compact 1528 * the whole buffer now. 1529 */ 1530 1531 /* If this is the end of the buffer, we're done. */ 1532 if (segp[2] == '\0') { 1533 cur[0] = '\0'; 1534 break; 1535 } 1536 /* Valgrind complained, strcpy(cur, segp + 3); */ 1537 /* string will overlap, do not use strcpy */ 1538 tmp = cur; 1539 segp += 3; 1540 while ((*tmp++ = *segp++) != 0) 1541 ; 1542 1543 /* If there are no previous segments, then keep going from here. */ 1544 segp = cur; 1545 while ((segp > path) && ((--segp)[0] == '/')) 1546 ; 1547 if (segp == path) 1548 continue; 1549 1550 /* "segp" is pointing to the end of a previous segment; find it's 1551 * start. We need to back up to the previous segment and start 1552 * over with that to handle things like "foo/bar/../..". If we 1553 * don't do this, then on the first pass we'll remove the "bar/..", 1554 * but be pointing at the second ".." so we won't realize we can also 1555 * remove the "foo/..". 1556 */ 1557 cur = segp; 1558 while ((cur > path) && (cur[-1] != '/')) 1559 --cur; 1560 } 1561 out[0] = '\0'; 1562 1563 /* 1564 * g) If the resulting buffer string still begins with one or more 1565 * complete path segments of "..", then the reference is 1566 * considered to be in error. Implementations may handle this 1567 * error by retaining these components in the resolved path (i.e., 1568 * treating them as part of the final URI), by removing them from 1569 * the resolved path (i.e., discarding relative levels above the 1570 * root), or by avoiding traversal of the reference. 1571 * 1572 * We discard them from the final path. 1573 */ 1574 if (path[0] == '/') { 1575 cur = path; 1576 while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.') 1577 && ((cur[3] == '/') || (cur[3] == '\0'))) 1578 cur += 3; 1579 1580 if (cur != path) { 1581 out = path; 1582 while (cur[0] != '\0') 1583 (out++)[0] = (cur++)[0]; 1584 out[0] = 0; 1585 } 1586 } 1587 1588 return(0); 1589 } 1590 1591 static int is_hex(char c) { 1592 if (((c >= '0') && (c <= '9')) || 1593 ((c >= 'a') && (c <= 'f')) || 1594 ((c >= 'A') && (c <= 'F'))) 1595 return(1); 1596 return(0); 1597 } 1598 1599 /** 1600 * xmlURIUnescapeString: 1601 * @str: the string to unescape 1602 * @len: the length in bytes to unescape (or <= 0 to indicate full string) 1603 * @target: optional destination buffer 1604 * 1605 * Unescaping routine, but does not check that the string is an URI. The 1606 * output is a direct unsigned char translation of %XX values (no encoding) 1607 * Note that the length of the result can only be smaller or same size as 1608 * the input string. 1609 * 1610 * Returns a copy of the string, but unescaped, will return NULL only in case 1611 * of error 1612 */ 1613 char * 1614 xmlURIUnescapeString(const char *str, int len, char *target) { 1615 char *ret, *out; 1616 const char *in; 1617 1618 if (str == NULL) 1619 return(NULL); 1620 if (len <= 0) len = strlen(str); 1621 if (len < 0) return(NULL); 1622 1623 if (target == NULL) { 1624 ret = (char *) xmlMallocAtomic(len + 1); 1625 if (ret == NULL) { 1626 xmlURIErrMemory("unescaping URI value\n"); 1627 return(NULL); 1628 } 1629 } else 1630 ret = target; 1631 in = str; 1632 out = ret; 1633 while(len > 0) { 1634 if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) { 1635 in++; 1636 if ((*in >= '0') && (*in <= '9')) 1637 *out = (*in - '0'); 1638 else if ((*in >= 'a') && (*in <= 'f')) 1639 *out = (*in - 'a') + 10; 1640 else if ((*in >= 'A') && (*in <= 'F')) 1641 *out = (*in - 'A') + 10; 1642 in++; 1643 if ((*in >= '0') && (*in <= '9')) 1644 *out = *out * 16 + (*in - '0'); 1645 else if ((*in >= 'a') && (*in <= 'f')) 1646 *out = *out * 16 + (*in - 'a') + 10; 1647 else if ((*in >= 'A') && (*in <= 'F')) 1648 *out = *out * 16 + (*in - 'A') + 10; 1649 in++; 1650 len -= 3; 1651 out++; 1652 } else { 1653 *out++ = *in++; 1654 len--; 1655 } 1656 } 1657 *out = 0; 1658 return(ret); 1659 } 1660 1661 /** 1662 * xmlURIEscapeStr: 1663 * @str: string to escape 1664 * @list: exception list string of chars not to escape 1665 * 1666 * This routine escapes a string to hex, ignoring reserved characters (a-z) 1667 * and the characters in the exception list. 1668 * 1669 * Returns a new escaped string or NULL in case of error. 1670 */ 1671 xmlChar * 1672 xmlURIEscapeStr(const xmlChar *str, const xmlChar *list) { 1673 xmlChar *ret, ch; 1674 xmlChar *temp; 1675 const xmlChar *in; 1676 int len, out; 1677 1678 if (str == NULL) 1679 return(NULL); 1680 if (str[0] == 0) 1681 return(xmlStrdup(str)); 1682 len = xmlStrlen(str); 1683 if (!(len > 0)) return(NULL); 1684 1685 len += 20; 1686 ret = (xmlChar *) xmlMallocAtomic(len); 1687 if (ret == NULL) { 1688 xmlURIErrMemory("escaping URI value\n"); 1689 return(NULL); 1690 } 1691 in = (const xmlChar *) str; 1692 out = 0; 1693 while(*in != 0) { 1694 if (len - out <= 3) { 1695 temp = xmlSaveUriRealloc(ret, &len); 1696 if (temp == NULL) { 1697 xmlURIErrMemory("escaping URI value\n"); 1698 xmlFree(ret); 1699 return(NULL); 1700 } 1701 ret = temp; 1702 } 1703 1704 ch = *in; 1705 1706 if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!xmlStrchr(list, ch))) { 1707 unsigned char val; 1708 ret[out++] = '%'; 1709 val = ch >> 4; 1710 if (val <= 9) 1711 ret[out++] = '0' + val; 1712 else 1713 ret[out++] = 'A' + val - 0xA; 1714 val = ch & 0xF; 1715 if (val <= 9) 1716 ret[out++] = '0' + val; 1717 else 1718 ret[out++] = 'A' + val - 0xA; 1719 in++; 1720 } else { 1721 ret[out++] = *in++; 1722 } 1723 1724 } 1725 ret[out] = 0; 1726 return(ret); 1727 } 1728 1729 /** 1730 * xmlURIEscape: 1731 * @str: the string of the URI to escape 1732 * 1733 * Escaping routine, does not do validity checks ! 1734 * It will try to escape the chars needing this, but this is heuristic 1735 * based it's impossible to be sure. 1736 * 1737 * Returns an copy of the string, but escaped 1738 * 1739 * 25 May 2001 1740 * Uses xmlParseURI and xmlURIEscapeStr to try to escape correctly 1741 * according to RFC2396. 1742 * - Carl Douglas 1743 */ 1744 xmlChar * 1745 xmlURIEscape(const xmlChar * str) 1746 { 1747 xmlChar *ret, *segment = NULL; 1748 xmlURIPtr uri; 1749 int ret2; 1750 1751 #define NULLCHK(p) if(!p) { \ 1752 xmlURIErrMemory("escaping URI value\n"); \ 1753 xmlFreeURI(uri); \ 1754 return NULL; } \ 1755 1756 if (str == NULL) 1757 return (NULL); 1758 1759 uri = xmlCreateURI(); 1760 if (uri != NULL) { 1761 /* 1762 * Allow escaping errors in the unescaped form 1763 */ 1764 uri->cleanup = 1; 1765 ret2 = xmlParseURIReference(uri, (const char *)str); 1766 if (ret2) { 1767 xmlFreeURI(uri); 1768 return (NULL); 1769 } 1770 } 1771 1772 if (!uri) 1773 return NULL; 1774 1775 ret = NULL; 1776 1777 if (uri->scheme) { 1778 segment = xmlURIEscapeStr(BAD_CAST uri->scheme, BAD_CAST "+-."); 1779 NULLCHK(segment) 1780 ret = xmlStrcat(ret, segment); 1781 ret = xmlStrcat(ret, BAD_CAST ":"); 1782 xmlFree(segment); 1783 } 1784 1785 if (uri->authority) { 1786 segment = 1787 xmlURIEscapeStr(BAD_CAST uri->authority, BAD_CAST "/?;:@"); 1788 NULLCHK(segment) 1789 ret = xmlStrcat(ret, BAD_CAST "//"); 1790 ret = xmlStrcat(ret, segment); 1791 xmlFree(segment); 1792 } 1793 1794 if (uri->user) { 1795 segment = xmlURIEscapeStr(BAD_CAST uri->user, BAD_CAST ";:&=+$,"); 1796 NULLCHK(segment) 1797 ret = xmlStrcat(ret,BAD_CAST "//"); 1798 ret = xmlStrcat(ret, segment); 1799 ret = xmlStrcat(ret, BAD_CAST "@"); 1800 xmlFree(segment); 1801 } 1802 1803 if (uri->server) { 1804 segment = xmlURIEscapeStr(BAD_CAST uri->server, BAD_CAST "/?;:@"); 1805 NULLCHK(segment) 1806 if (uri->user == NULL) 1807 ret = xmlStrcat(ret, BAD_CAST "//"); 1808 ret = xmlStrcat(ret, segment); 1809 xmlFree(segment); 1810 } 1811 1812 if (uri->port) { 1813 xmlChar port[10]; 1814 1815 snprintf((char *) port, 10, "%d", uri->port); 1816 ret = xmlStrcat(ret, BAD_CAST ":"); 1817 ret = xmlStrcat(ret, port); 1818 } 1819 1820 if (uri->path) { 1821 segment = 1822 xmlURIEscapeStr(BAD_CAST uri->path, BAD_CAST ":@&=+$,/?;"); 1823 NULLCHK(segment) 1824 ret = xmlStrcat(ret, segment); 1825 xmlFree(segment); 1826 } 1827 1828 if (uri->query_raw) { 1829 ret = xmlStrcat(ret, BAD_CAST "?"); 1830 ret = xmlStrcat(ret, BAD_CAST uri->query_raw); 1831 } 1832 else if (uri->query) { 1833 segment = 1834 xmlURIEscapeStr(BAD_CAST uri->query, BAD_CAST ";/?:@&=+,$"); 1835 NULLCHK(segment) 1836 ret = xmlStrcat(ret, BAD_CAST "?"); 1837 ret = xmlStrcat(ret, segment); 1838 xmlFree(segment); 1839 } 1840 1841 if (uri->opaque) { 1842 segment = xmlURIEscapeStr(BAD_CAST uri->opaque, BAD_CAST ""); 1843 NULLCHK(segment) 1844 ret = xmlStrcat(ret, segment); 1845 xmlFree(segment); 1846 } 1847 1848 if (uri->fragment) { 1849 segment = xmlURIEscapeStr(BAD_CAST uri->fragment, BAD_CAST "#"); 1850 NULLCHK(segment) 1851 ret = xmlStrcat(ret, BAD_CAST "#"); 1852 ret = xmlStrcat(ret, segment); 1853 xmlFree(segment); 1854 } 1855 1856 xmlFreeURI(uri); 1857 #undef NULLCHK 1858 1859 return (ret); 1860 } 1861 1862 /************************************************************************ 1863 * * 1864 * Public functions * 1865 * * 1866 ************************************************************************/ 1867 1868 /** 1869 * xmlBuildURI: 1870 * @URI: the URI instance found in the document 1871 * @base: the base value 1872 * 1873 * Computes he final URI of the reference done by checking that 1874 * the given URI is valid, and building the final URI using the 1875 * base URI. This is processed according to section 5.2 of the 1876 * RFC 2396 1877 * 1878 * 5.2. Resolving Relative References to Absolute Form 1879 * 1880 * Returns a new URI string (to be freed by the caller) or NULL in case 1881 * of error. 1882 */ 1883 xmlChar * 1884 xmlBuildURI(const xmlChar *URI, const xmlChar *base) { 1885 xmlChar *val = NULL; 1886 int ret, len, indx, cur, out; 1887 xmlURIPtr ref = NULL; 1888 xmlURIPtr bas = NULL; 1889 xmlURIPtr res = NULL; 1890 1891 /* 1892 * 1) The URI reference is parsed into the potential four components and 1893 * fragment identifier, as described in Section 4.3. 1894 * 1895 * NOTE that a completely empty URI is treated by modern browsers 1896 * as a reference to "." rather than as a synonym for the current 1897 * URI. Should we do that here? 1898 */ 1899 if (URI == NULL) 1900 ret = -1; 1901 else { 1902 if (*URI) { 1903 ref = xmlCreateURI(); 1904 if (ref == NULL) 1905 goto done; 1906 ret = xmlParseURIReference(ref, (const char *) URI); 1907 } 1908 else 1909 ret = 0; 1910 } 1911 if (ret != 0) 1912 goto done; 1913 if ((ref != NULL) && (ref->scheme != NULL)) { 1914 /* 1915 * The URI is absolute don't modify. 1916 */ 1917 val = xmlStrdup(URI); 1918 goto done; 1919 } 1920 if (base == NULL) 1921 ret = -1; 1922 else { 1923 bas = xmlCreateURI(); 1924 if (bas == NULL) 1925 goto done; 1926 ret = xmlParseURIReference(bas, (const char *) base); 1927 } 1928 if (ret != 0) { 1929 if (ref) 1930 val = xmlSaveUri(ref); 1931 goto done; 1932 } 1933 if (ref == NULL) { 1934 /* 1935 * the base fragment must be ignored 1936 */ 1937 if (bas->fragment != NULL) { 1938 xmlFree(bas->fragment); 1939 bas->fragment = NULL; 1940 } 1941 val = xmlSaveUri(bas); 1942 goto done; 1943 } 1944 1945 /* 1946 * 2) If the path component is empty and the scheme, authority, and 1947 * query components are undefined, then it is a reference to the 1948 * current document and we are done. Otherwise, the reference URI's 1949 * query and fragment components are defined as found (or not found) 1950 * within the URI reference and not inherited from the base URI. 1951 * 1952 * NOTE that in modern browsers, the parsing differs from the above 1953 * in the following aspect: the query component is allowed to be 1954 * defined while still treating this as a reference to the current 1955 * document. 1956 */ 1957 res = xmlCreateURI(); 1958 if (res == NULL) 1959 goto done; 1960 if ((ref->scheme == NULL) && (ref->path == NULL) && 1961 ((ref->authority == NULL) && (ref->server == NULL))) { 1962 if (bas->scheme != NULL) 1963 res->scheme = xmlMemStrdup(bas->scheme); 1964 if (bas->authority != NULL) 1965 res->authority = xmlMemStrdup(bas->authority); 1966 else if ((bas->server != NULL) || (bas->port == -1)) { 1967 if (bas->server != NULL) 1968 res->server = xmlMemStrdup(bas->server); 1969 if (bas->user != NULL) 1970 res->user = xmlMemStrdup(bas->user); 1971 res->port = bas->port; 1972 } 1973 if (bas->path != NULL) 1974 res->path = xmlMemStrdup(bas->path); 1975 if (ref->query_raw != NULL) 1976 res->query_raw = xmlMemStrdup (ref->query_raw); 1977 else if (ref->query != NULL) 1978 res->query = xmlMemStrdup(ref->query); 1979 else if (bas->query_raw != NULL) 1980 res->query_raw = xmlMemStrdup(bas->query_raw); 1981 else if (bas->query != NULL) 1982 res->query = xmlMemStrdup(bas->query); 1983 if (ref->fragment != NULL) 1984 res->fragment = xmlMemStrdup(ref->fragment); 1985 goto step_7; 1986 } 1987 1988 /* 1989 * 3) If the scheme component is defined, indicating that the reference 1990 * starts with a scheme name, then the reference is interpreted as an 1991 * absolute URI and we are done. Otherwise, the reference URI's 1992 * scheme is inherited from the base URI's scheme component. 1993 */ 1994 if (ref->scheme != NULL) { 1995 val = xmlSaveUri(ref); 1996 goto done; 1997 } 1998 if (bas->scheme != NULL) 1999 res->scheme = xmlMemStrdup(bas->scheme); 2000 2001 if (ref->query_raw != NULL) 2002 res->query_raw = xmlMemStrdup(ref->query_raw); 2003 else if (ref->query != NULL) 2004 res->query = xmlMemStrdup(ref->query); 2005 if (ref->fragment != NULL) 2006 res->fragment = xmlMemStrdup(ref->fragment); 2007 2008 /* 2009 * 4) If the authority component is defined, then the reference is a 2010 * network-path and we skip to step 7. Otherwise, the reference 2011 * URI's authority is inherited from the base URI's authority 2012 * component, which will also be undefined if the URI scheme does not 2013 * use an authority component. 2014 */ 2015 if ((ref->authority != NULL) || (ref->server != NULL)) { 2016 if (ref->authority != NULL) 2017 res->authority = xmlMemStrdup(ref->authority); 2018 else { 2019 res->server = xmlMemStrdup(ref->server); 2020 if (ref->user != NULL) 2021 res->user = xmlMemStrdup(ref->user); 2022 res->port = ref->port; 2023 } 2024 if (ref->path != NULL) 2025 res->path = xmlMemStrdup(ref->path); 2026 goto step_7; 2027 } 2028 if (bas->authority != NULL) 2029 res->authority = xmlMemStrdup(bas->authority); 2030 else if ((bas->server != NULL) || (bas->port == -1)) { 2031 if (bas->server != NULL) 2032 res->server = xmlMemStrdup(bas->server); 2033 if (bas->user != NULL) 2034 res->user = xmlMemStrdup(bas->user); 2035 res->port = bas->port; 2036 } 2037 2038 /* 2039 * 5) If the path component begins with a slash character ("/"), then 2040 * the reference is an absolute-path and we skip to step 7. 2041 */ 2042 if ((ref->path != NULL) && (ref->path[0] == '/')) { 2043 res->path = xmlMemStrdup(ref->path); 2044 goto step_7; 2045 } 2046 2047 2048 /* 2049 * 6) If this step is reached, then we are resolving a relative-path 2050 * reference. The relative path needs to be merged with the base 2051 * URI's path. Although there are many ways to do this, we will 2052 * describe a simple method using a separate string buffer. 2053 * 2054 * Allocate a buffer large enough for the result string. 2055 */ 2056 len = 2; /* extra / and 0 */ 2057 if (ref->path != NULL) 2058 len += strlen(ref->path); 2059 if (bas->path != NULL) 2060 len += strlen(bas->path); 2061 res->path = (char *) xmlMallocAtomic(len); 2062 if (res->path == NULL) { 2063 xmlURIErrMemory("resolving URI against base\n"); 2064 goto done; 2065 } 2066 res->path[0] = 0; 2067 2068 /* 2069 * a) All but the last segment of the base URI's path component is 2070 * copied to the buffer. In other words, any characters after the 2071 * last (right-most) slash character, if any, are excluded. 2072 */ 2073 cur = 0; 2074 out = 0; 2075 if (bas->path != NULL) { 2076 while (bas->path[cur] != 0) { 2077 while ((bas->path[cur] != 0) && (bas->path[cur] != '/')) 2078 cur++; 2079 if (bas->path[cur] == 0) 2080 break; 2081 2082 cur++; 2083 while (out < cur) { 2084 res->path[out] = bas->path[out]; 2085 out++; 2086 } 2087 } 2088 } 2089 res->path[out] = 0; 2090 2091 /* 2092 * b) The reference's path component is appended to the buffer 2093 * string. 2094 */ 2095 if (ref->path != NULL && ref->path[0] != 0) { 2096 indx = 0; 2097 /* 2098 * Ensure the path includes a '/' 2099 */ 2100 if ((out == 0) && (bas->server != NULL)) 2101 res->path[out++] = '/'; 2102 while (ref->path[indx] != 0) { 2103 res->path[out++] = ref->path[indx++]; 2104 } 2105 } 2106 res->path[out] = 0; 2107 2108 /* 2109 * Steps c) to h) are really path normalization steps 2110 */ 2111 xmlNormalizeURIPath(res->path); 2112 2113 step_7: 2114 2115 /* 2116 * 7) The resulting URI components, including any inherited from the 2117 * base URI, are recombined to give the absolute form of the URI 2118 * reference. 2119 */ 2120 val = xmlSaveUri(res); 2121 2122 done: 2123 if (ref != NULL) 2124 xmlFreeURI(ref); 2125 if (bas != NULL) 2126 xmlFreeURI(bas); 2127 if (res != NULL) 2128 xmlFreeURI(res); 2129 return(val); 2130 } 2131 2132 /** 2133 * xmlBuildRelativeURI: 2134 * @URI: the URI reference under consideration 2135 * @base: the base value 2136 * 2137 * Expresses the URI of the reference in terms relative to the 2138 * base. Some examples of this operation include: 2139 * base = "http://site1.com/docs/book1.html" 2140 * URI input URI returned 2141 * docs/pic1.gif pic1.gif 2142 * docs/img/pic1.gif img/pic1.gif 2143 * img/pic1.gif ../img/pic1.gif 2144 * http://site1.com/docs/pic1.gif pic1.gif 2145 * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif 2146 * 2147 * base = "docs/book1.html" 2148 * URI input URI returned 2149 * docs/pic1.gif pic1.gif 2150 * docs/img/pic1.gif img/pic1.gif 2151 * img/pic1.gif ../img/pic1.gif 2152 * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif 2153 * 2154 * 2155 * Note: if the URI reference is really weird or complicated, it may be 2156 * worthwhile to first convert it into a "nice" one by calling 2157 * xmlBuildURI (using 'base') before calling this routine, 2158 * since this routine (for reasonable efficiency) assumes URI has 2159 * already been through some validation. 2160 * 2161 * Returns a new URI string (to be freed by the caller) or NULL in case 2162 * error. 2163 */ 2164 xmlChar * 2165 xmlBuildRelativeURI (const xmlChar * URI, const xmlChar * base) 2166 { 2167 xmlChar *val = NULL; 2168 int ret; 2169 int ix; 2170 int nbslash = 0; 2171 int len; 2172 xmlURIPtr ref = NULL; 2173 xmlURIPtr bas = NULL; 2174 xmlChar *bptr, *uptr, *vptr; 2175 int remove_path = 0; 2176 2177 if ((URI == NULL) || (*URI == 0)) 2178 return NULL; 2179 2180 /* 2181 * First parse URI into a standard form 2182 */ 2183 ref = xmlCreateURI (); 2184 if (ref == NULL) 2185 return NULL; 2186 /* If URI not already in "relative" form */ 2187 if (URI[0] != '.') { 2188 ret = xmlParseURIReference (ref, (const char *) URI); 2189 if (ret != 0) 2190 goto done; /* Error in URI, return NULL */ 2191 } else 2192 ref->path = (char *)xmlStrdup(URI); 2193 2194 /* 2195 * Next parse base into the same standard form 2196 */ 2197 if ((base == NULL) || (*base == 0)) { 2198 val = xmlStrdup (URI); 2199 goto done; 2200 } 2201 bas = xmlCreateURI (); 2202 if (bas == NULL) 2203 goto done; 2204 if (base[0] != '.') { 2205 ret = xmlParseURIReference (bas, (const char *) base); 2206 if (ret != 0) 2207 goto done; /* Error in base, return NULL */ 2208 } else 2209 bas->path = (char *)xmlStrdup(base); 2210 2211 /* 2212 * If the scheme / server on the URI differs from the base, 2213 * just return the URI 2214 */ 2215 if ((ref->scheme != NULL) && 2216 ((bas->scheme == NULL) || 2217 (xmlStrcmp ((xmlChar *)bas->scheme, (xmlChar *)ref->scheme)) || 2218 (xmlStrcmp ((xmlChar *)bas->server, (xmlChar *)ref->server)))) { 2219 val = xmlStrdup (URI); 2220 goto done; 2221 } 2222 if (xmlStrEqual((xmlChar *)bas->path, (xmlChar *)ref->path)) { 2223 val = xmlStrdup(BAD_CAST ""); 2224 goto done; 2225 } 2226 if (bas->path == NULL) { 2227 val = xmlStrdup((xmlChar *)ref->path); 2228 goto done; 2229 } 2230 if (ref->path == NULL) { 2231 ref->path = (char *) "/"; 2232 remove_path = 1; 2233 } 2234 2235 /* 2236 * At this point (at last!) we can compare the two paths 2237 * 2238 * First we take care of the special case where either of the 2239 * two path components may be missing (bug 316224) 2240 */ 2241 bptr = (xmlChar *)bas->path; 2242 { 2243 xmlChar *rptr = (xmlChar *) ref->path; 2244 int pos = 0; 2245 2246 /* 2247 * Next we compare the two strings and find where they first differ 2248 */ 2249 if ((*rptr == '.') && (rptr[1] == '/')) 2250 rptr += 2; 2251 if ((*bptr == '.') && (bptr[1] == '/')) 2252 bptr += 2; 2253 else if ((*bptr == '/') && (*rptr != '/')) 2254 bptr++; 2255 while ((bptr[pos] == rptr[pos]) && (bptr[pos] != 0)) 2256 pos++; 2257 2258 if (bptr[pos] == rptr[pos]) { 2259 val = xmlStrdup(BAD_CAST ""); 2260 goto done; /* (I can't imagine why anyone would do this) */ 2261 } 2262 2263 /* 2264 * In URI, "back up" to the last '/' encountered. This will be the 2265 * beginning of the "unique" suffix of URI 2266 */ 2267 ix = pos; 2268 for (; ix > 0; ix--) { 2269 if (rptr[ix - 1] == '/') 2270 break; 2271 } 2272 uptr = (xmlChar *)&rptr[ix]; 2273 2274 /* 2275 * In base, count the number of '/' from the differing point 2276 */ 2277 for (; bptr[ix] != 0; ix++) { 2278 if (bptr[ix] == '/') 2279 nbslash++; 2280 } 2281 2282 /* 2283 * e.g: URI="foo/" base="foo/bar" -> "./" 2284 */ 2285 if (nbslash == 0 && !uptr[0]) { 2286 val = xmlStrdup(BAD_CAST "./"); 2287 goto done; 2288 } 2289 2290 len = xmlStrlen (uptr) + 1; 2291 } 2292 2293 if (nbslash == 0) { 2294 if (uptr != NULL) 2295 /* exception characters from xmlSaveUri */ 2296 val = xmlURIEscapeStr(uptr, BAD_CAST "/;&=+$,"); 2297 goto done; 2298 } 2299 2300 /* 2301 * Allocate just enough space for the returned string - 2302 * length of the remainder of the URI, plus enough space 2303 * for the "../" groups, plus one for the terminator 2304 */ 2305 val = (xmlChar *) xmlMalloc (len + 3 * nbslash); 2306 if (val == NULL) { 2307 xmlURIErrMemory("building relative URI\n"); 2308 goto done; 2309 } 2310 vptr = val; 2311 /* 2312 * Put in as many "../" as needed 2313 */ 2314 for (; nbslash>0; nbslash--) { 2315 *vptr++ = '.'; 2316 *vptr++ = '.'; 2317 *vptr++ = '/'; 2318 } 2319 /* 2320 * Finish up with the end of the URI 2321 */ 2322 if (uptr != NULL) { 2323 if ((vptr > val) && (len > 0) && 2324 (uptr[0] == '/') && (vptr[-1] == '/')) { 2325 memcpy (vptr, uptr + 1, len - 1); 2326 vptr[len - 2] = 0; 2327 } else { 2328 memcpy (vptr, uptr, len); 2329 vptr[len - 1] = 0; 2330 } 2331 } else { 2332 vptr[len - 1] = 0; 2333 } 2334 2335 /* escape the freshly-built path */ 2336 vptr = val; 2337 /* exception characters from xmlSaveUri */ 2338 val = xmlURIEscapeStr(vptr, BAD_CAST "/;&=+$,"); 2339 xmlFree(vptr); 2340 2341 done: 2342 /* 2343 * Free the working variables 2344 */ 2345 if (remove_path != 0) 2346 ref->path = NULL; 2347 if (ref != NULL) 2348 xmlFreeURI (ref); 2349 if (bas != NULL) 2350 xmlFreeURI (bas); 2351 2352 return val; 2353 } 2354 2355 /** 2356 * xmlCanonicPath: 2357 * @path: the resource locator in a filesystem notation 2358 * 2359 * Constructs a canonic path from the specified path. 2360 * 2361 * Returns a new canonic path, or a duplicate of the path parameter if the 2362 * construction fails. The caller is responsible for freeing the memory occupied 2363 * by the returned string. If there is insufficient memory available, or the 2364 * argument is NULL, the function returns NULL. 2365 */ 2366 #define IS_WINDOWS_PATH(p) \ 2367 ((p != NULL) && \ 2368 (((p[0] >= 'a') && (p[0] <= 'z')) || \ 2369 ((p[0] >= 'A') && (p[0] <= 'Z'))) && \ 2370 (p[1] == ':') && ((p[2] == '/') || (p[2] == '\\'))) 2371 xmlChar * 2372 xmlCanonicPath(const xmlChar *path) 2373 { 2374 /* 2375 * For Windows implementations, additional work needs to be done to 2376 * replace backslashes in pathnames with "forward slashes" 2377 */ 2378 #if defined(_WIN32) && !defined(__CYGWIN__) 2379 int len = 0; 2380 char *p = NULL; 2381 #endif 2382 xmlURIPtr uri; 2383 xmlChar *ret; 2384 const xmlChar *absuri; 2385 2386 if (path == NULL) 2387 return(NULL); 2388 2389 #if defined(_WIN32) 2390 /* 2391 * We must not change the backslashes to slashes if the the path 2392 * starts with \\?\ 2393 * Those paths can be up to 32k characters long. 2394 * Was added specifically for OpenOffice, those paths can't be converted 2395 * to URIs anyway. 2396 */ 2397 if ((path[0] == '\\') && (path[1] == '\\') && (path[2] == '?') && 2398 (path[3] == '\\') ) 2399 return xmlStrdup((const xmlChar *) path); 2400 #endif 2401 2402 /* sanitize filename starting with // so it can be used as URI */ 2403 if ((path[0] == '/') && (path[1] == '/') && (path[2] != '/')) 2404 path++; 2405 2406 if ((uri = xmlParseURI((const char *) path)) != NULL) { 2407 xmlFreeURI(uri); 2408 return xmlStrdup(path); 2409 } 2410 2411 /* Check if this is an "absolute uri" */ 2412 absuri = xmlStrstr(path, BAD_CAST "://"); 2413 if (absuri != NULL) { 2414 int l, j; 2415 unsigned char c; 2416 xmlChar *escURI; 2417 2418 /* 2419 * this looks like an URI where some parts have not been 2420 * escaped leading to a parsing problem. Check that the first 2421 * part matches a protocol. 2422 */ 2423 l = absuri - path; 2424 /* Bypass if first part (part before the '://') is > 20 chars */ 2425 if ((l <= 0) || (l > 20)) 2426 goto path_processing; 2427 /* Bypass if any non-alpha characters are present in first part */ 2428 for (j = 0;j < l;j++) { 2429 c = path[j]; 2430 if (!(((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')))) 2431 goto path_processing; 2432 } 2433 2434 /* Escape all except the characters specified in the supplied path */ 2435 escURI = xmlURIEscapeStr(path, BAD_CAST ":/?_.#&;="); 2436 if (escURI != NULL) { 2437 /* Try parsing the escaped path */ 2438 uri = xmlParseURI((const char *) escURI); 2439 /* If successful, return the escaped string */ 2440 if (uri != NULL) { 2441 xmlFreeURI(uri); 2442 return escURI; 2443 } 2444 xmlFree(escURI); 2445 } 2446 } 2447 2448 path_processing: 2449 /* For Windows implementations, replace backslashes with 'forward slashes' */ 2450 #if defined(_WIN32) && !defined(__CYGWIN__) 2451 /* 2452 * Create a URI structure 2453 */ 2454 uri = xmlCreateURI(); 2455 if (uri == NULL) { /* Guard against 'out of memory' */ 2456 return(NULL); 2457 } 2458 2459 len = xmlStrlen(path); 2460 if ((len > 2) && IS_WINDOWS_PATH(path)) { 2461 /* make the scheme 'file' */ 2462 uri->scheme = (char *) xmlStrdup(BAD_CAST "file"); 2463 /* allocate space for leading '/' + path + string terminator */ 2464 uri->path = xmlMallocAtomic(len + 2); 2465 if (uri->path == NULL) { 2466 xmlFreeURI(uri); /* Guard against 'out of memory' */ 2467 return(NULL); 2468 } 2469 /* Put in leading '/' plus path */ 2470 uri->path[0] = '/'; 2471 p = uri->path + 1; 2472 strncpy(p, (char *) path, len + 1); 2473 } else { 2474 uri->path = (char *) xmlStrdup(path); 2475 if (uri->path == NULL) { 2476 xmlFreeURI(uri); 2477 return(NULL); 2478 } 2479 p = uri->path; 2480 } 2481 /* Now change all occurrences of '\' to '/' */ 2482 while (*p != '\0') { 2483 if (*p == '\\') 2484 *p = '/'; 2485 p++; 2486 } 2487 2488 if (uri->scheme == NULL) { 2489 ret = xmlStrdup((const xmlChar *) uri->path); 2490 } else { 2491 ret = xmlSaveUri(uri); 2492 } 2493 2494 xmlFreeURI(uri); 2495 #else 2496 ret = xmlStrdup((const xmlChar *) path); 2497 #endif 2498 return(ret); 2499 } 2500 2501 /** 2502 * xmlPathToURI: 2503 * @path: the resource locator in a filesystem notation 2504 * 2505 * Constructs an URI expressing the existing path 2506 * 2507 * Returns a new URI, or a duplicate of the path parameter if the 2508 * construction fails. The caller is responsible for freeing the memory 2509 * occupied by the returned string. If there is insufficient memory available, 2510 * or the argument is NULL, the function returns NULL. 2511 */ 2512 xmlChar * 2513 xmlPathToURI(const xmlChar *path) 2514 { 2515 xmlURIPtr uri; 2516 xmlURI temp; 2517 xmlChar *ret, *cal; 2518 2519 if (path == NULL) 2520 return(NULL); 2521 2522 if ((uri = xmlParseURI((const char *) path)) != NULL) { 2523 xmlFreeURI(uri); 2524 return xmlStrdup(path); 2525 } 2526 cal = xmlCanonicPath(path); 2527 if (cal == NULL) 2528 return(NULL); 2529 #if defined(_WIN32) && !defined(__CYGWIN__) 2530 /* xmlCanonicPath can return an URI on Windows (is that the intended behaviour?) 2531 If 'cal' is a valid URI already then we are done here, as continuing would make 2532 it invalid. */ 2533 if ((uri = xmlParseURI((const char *) cal)) != NULL) { 2534 xmlFreeURI(uri); 2535 return cal; 2536 } 2537 /* 'cal' can contain a relative path with backslashes. If that is processed 2538 by xmlSaveURI, they will be escaped and the external entity loader machinery 2539 will fail. So convert them to slashes. Misuse 'ret' for walking. */ 2540 ret = cal; 2541 while (*ret != '\0') { 2542 if (*ret == '\\') 2543 *ret = '/'; 2544 ret++; 2545 } 2546 #endif 2547 memset(&temp, 0, sizeof(temp)); 2548 temp.path = (char *) cal; 2549 ret = xmlSaveUri(&temp); 2550 xmlFree(cal); 2551 return(ret); 2552 } 2553 #define bottom_uri 2554 #include "elfgcchack.h" 2555