1 /* 2 * $Header: /home/jerenkrantz/tmp/commons/commons-convert/cvs/home/cvs/jakarta-commons//httpclient/src/java/org/apache/commons/httpclient/util/URIUtil.java,v 1.27 2004/05/05 20:34:01 olegk Exp $ 3 * $Revision: 507321 $ 4 * $Date: 2007-02-14 01:10:51 +0100 (Wed, 14 Feb 2007) $ 5 * 6 * ==================================================================== 7 * 8 * Licensed to the Apache Software Foundation (ASF) under one or more 9 * contributor license agreements. See the NOTICE file distributed with 10 * this work for additional information regarding copyright ownership. 11 * The ASF licenses this file to You under the Apache License, Version 2.0 12 * (the "License"); you may not use this file except in compliance with 13 * the License. You may obtain a copy of the License at 14 * 15 * http://www.apache.org/licenses/LICENSE-2.0 16 * 17 * Unless required by applicable law or agreed to in writing, software 18 * distributed under the License is distributed on an "AS IS" BASIS, 19 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 * See the License for the specific language governing permissions and 21 * limitations under the License. 22 * ==================================================================== 23 * 24 * This software consists of voluntary contributions made by many 25 * individuals on behalf of the Apache Software Foundation. For more 26 * information on the Apache Software Foundation, please see 27 * <http://www.apache.org/>. 28 * 29 */ 30 31 package org.apache.commons.httpclient.util; 32 33 import java.util.BitSet; 34 35 import org.apache.commons.codec.DecoderException; 36 import org.apache.commons.codec.net.URLCodec; 37 import org.apache.commons.httpclient.URI; 38 import org.apache.commons.httpclient.URIException; 39 40 /** 41 * The URI escape and character encoding and decoding utility. 42 * It's compatible with {@link org.apache.commons.httpclient.HttpURL} rather 43 * than {@link org.apache.commons.httpclient.URI}. 44 * 45 * @author <a href="mailto:jericho@apache.org">Sung-Gu</a> 46 * @version $Revision: 507321 $ $Date: 2002/03/14 15:14:01 47 */ 48 public class URIUtil { 49 50 // ----------------------------------------------------- Instance variables 51 52 protected static final BitSet empty = new BitSet(1); 53 54 // ---------------------------------------------------------- URI utilities 55 56 /** 57 * Get the basename of an URI. It's possibly an empty string. 58 * 59 * @param uri a string regarded an URI 60 * @return the basename string; an empty string if the path ends with slash 61 */ getName(String uri)62 public static String getName(String uri) { 63 if (uri == null || uri.length() == 0) { return uri; } 64 String path = URIUtil.getPath(uri); 65 int at = path.lastIndexOf("/"); 66 int to = path.length(); 67 return (at >= 0) ? path.substring(at + 1, to) : path; 68 } 69 70 71 /** 72 * Get the query of an URI. 73 * 74 * @param uri a string regarded an URI 75 * @return the query string; <code>null</code> if empty or undefined 76 */ getQuery(String uri)77 public static String getQuery(String uri) { 78 if (uri == null || uri.length() == 0) { return null; } 79 // consider of net_path 80 int at = uri.indexOf("//"); 81 int from = uri.indexOf( 82 "/", 83 at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0 84 ); 85 // the authority part of URI ignored 86 int to = uri.length(); 87 // reuse the at and from variables to consider the query 88 at = uri.indexOf("?", from); 89 if (at >= 0) { 90 from = at + 1; 91 } else { 92 return null; 93 } 94 // check the fragment 95 if (uri.lastIndexOf("#") > from) { 96 to = uri.lastIndexOf("#"); 97 } 98 // get the path and query. 99 return (from < 0 || from == to) ? null : uri.substring(from, to); 100 } 101 102 103 /** 104 * Get the path of an URI. 105 * 106 * @param uri a string regarded an URI 107 * @return the path string 108 */ getPath(String uri)109 public static String getPath(String uri) { 110 if (uri == null) { 111 return null; 112 } 113 // consider of net_path 114 int at = uri.indexOf("//"); 115 int from = uri.indexOf( 116 "/", 117 at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0 118 ); 119 // the authority part of URI ignored 120 int to = uri.length(); 121 // check the query 122 if (uri.indexOf('?', from) != -1) { 123 to = uri.indexOf('?', from); 124 } 125 // check the fragment 126 if (uri.lastIndexOf("#") > from && uri.lastIndexOf("#") < to) { 127 to = uri.lastIndexOf("#"); 128 } 129 // get only the path. 130 return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from, to); 131 } 132 133 134 /** 135 * Get the path and query of an URI. 136 * 137 * @param uri a string regarded an URI 138 * @return the path and query string 139 */ getPathQuery(String uri)140 public static String getPathQuery(String uri) { 141 if (uri == null) { 142 return null; 143 } 144 // consider of net_path 145 int at = uri.indexOf("//"); 146 int from = uri.indexOf( 147 "/", 148 at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0 149 ); 150 // the authority part of URI ignored 151 int to = uri.length(); 152 // Ignore the '?' mark so to ignore the query. 153 // check the fragment 154 if (uri.lastIndexOf("#") > from) { 155 to = uri.lastIndexOf("#"); 156 } 157 // get the path and query. 158 return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from, to); 159 } 160 161 162 /** 163 * Get the path of an URI and its rest part. 164 * 165 * @param uri a string regarded an URI 166 * @return the string from the path part 167 */ getFromPath(String uri)168 public static String getFromPath(String uri) { 169 if (uri == null) { 170 return null; 171 } 172 // consider of net_path 173 int at = uri.indexOf("//"); 174 int from = uri.indexOf( 175 "/", 176 at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0 177 ); 178 // get the path and its rest. 179 return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from); 180 } 181 182 // ----------------------------------------------------- Encoding utilities 183 184 /** 185 * Get the all escaped and encoded string with the default protocl charset. 186 * It's the same function to use <code>encode(String unescaped, Bitset 187 * empty, URI.getDefaultProtocolCharset())</code>. 188 * 189 * @param unescaped an unescaped string 190 * @return the escaped string 191 * 192 * @throws URIException if the default protocol charset is not supported 193 * 194 * @see URI#getDefaultProtocolCharset 195 * @see #encode 196 */ encodeAll(String unescaped)197 public static String encodeAll(String unescaped) throws URIException { 198 return encodeAll(unescaped, URI.getDefaultProtocolCharset()); 199 } 200 201 202 /** 203 * Get the all escaped and encoded string with a given charset. 204 * It's the same function to use <code>encode(String unescaped, Bitset 205 * empty, String charset)</code>. 206 * 207 * @param unescaped an unescaped string 208 * @param charset the charset 209 * @return the escaped string 210 * 211 * @throws URIException if the charset is not supported 212 * 213 * @see #encode 214 */ encodeAll(String unescaped, String charset)215 public static String encodeAll(String unescaped, String charset) 216 throws URIException { 217 218 return encode(unescaped, empty, charset); 219 } 220 221 222 /** 223 * Escape and encode a string regarded as within the authority component of 224 * an URI with the default protocol charset. 225 * Within the authority component, the characters ";", ":", "@", "?", and 226 * "/" are reserved. 227 * 228 * @param unescaped an unescaped string 229 * @return the escaped string 230 * 231 * @throws URIException if the default protocol charset is not supported 232 * 233 * @see URI#getDefaultProtocolCharset 234 * @see #encode 235 */ encodeWithinAuthority(String unescaped)236 public static String encodeWithinAuthority(String unescaped) 237 throws URIException { 238 239 return encodeWithinAuthority(unescaped, URI.getDefaultProtocolCharset()); 240 } 241 242 243 /** 244 * Escape and encode a string regarded as within the authority component of 245 * an URI with a given charset. 246 * Within the authority component, the characters ";", ":", "@", "?", and 247 * "/" are reserved. 248 * 249 * @param unescaped an unescaped string 250 * @param charset the charset 251 * @return the escaped string 252 * 253 * @throws URIException if the charset is not supported 254 * 255 * @see #encode 256 */ encodeWithinAuthority(String unescaped, String charset)257 public static String encodeWithinAuthority(String unescaped, String charset) 258 throws URIException { 259 260 return encode(unescaped, URI.allowed_within_authority, charset); 261 } 262 263 264 /** 265 * Escape and encode a string regarded as the path and query components of 266 * an URI with the default protocol charset. 267 * 268 * @param unescaped an unescaped string 269 * @return the escaped string 270 * 271 * @throws URIException if the default protocol charset is not supported 272 * 273 * @see URI#getDefaultProtocolCharset 274 * @see #encode 275 */ encodePathQuery(String unescaped)276 public static String encodePathQuery(String unescaped) throws URIException { 277 return encodePathQuery(unescaped, URI.getDefaultProtocolCharset()); 278 } 279 280 281 /** 282 * Escape and encode a string regarded as the path and query components of 283 * an URI with a given charset. 284 * 285 * @param unescaped an unescaped string 286 * @param charset the charset 287 * @return the escaped string 288 * 289 * @throws URIException if the charset is not supported 290 * 291 * @see #encode 292 */ encodePathQuery(String unescaped, String charset)293 public static String encodePathQuery(String unescaped, String charset) 294 throws URIException { 295 296 int at = unescaped.indexOf('?'); 297 if (at < 0) { 298 return encode(unescaped, URI.allowed_abs_path, charset); 299 } 300 // else 301 return encode(unescaped.substring(0, at), URI.allowed_abs_path, charset) 302 + '?' + encode(unescaped.substring(at + 1), URI.allowed_query, charset); 303 } 304 305 306 /** 307 * Escape and encode a string regarded as within the path component of an 308 * URI with the default protocol charset. 309 * The path may consist of a sequence of path segments separated by a 310 * single slash "/" character. Within a path segment, the characters 311 * "/", ";", "=", and "?" are reserved. 312 * 313 * @param unescaped an unescaped string 314 * @return the escaped string 315 * 316 * @throws URIException if the default protocol charset is not supported 317 * 318 * @see URI#getDefaultProtocolCharset 319 * @see #encode 320 */ encodeWithinPath(String unescaped)321 public static String encodeWithinPath(String unescaped) 322 throws URIException { 323 324 return encodeWithinPath(unescaped, URI.getDefaultProtocolCharset()); 325 } 326 327 328 /** 329 * Escape and encode a string regarded as within the path component of an 330 * URI with a given charset. 331 * The path may consist of a sequence of path segments separated by a 332 * single slash "/" character. Within a path segment, the characters 333 * "/", ";", "=", and "?" are reserved. 334 * 335 * @param unescaped an unescaped string 336 * @param charset the charset 337 * @return the escaped string 338 * 339 * @throws URIException if the charset is not supported 340 * 341 * @see #encode 342 */ encodeWithinPath(String unescaped, String charset)343 public static String encodeWithinPath(String unescaped, String charset) 344 throws URIException { 345 346 return encode(unescaped, URI.allowed_within_path, charset); 347 } 348 349 350 /** 351 * Escape and encode a string regarded as the path component of an URI with 352 * the default protocol charset. 353 * 354 * @param unescaped an unescaped string 355 * @return the escaped string 356 * 357 * @throws URIException if the default protocol charset is not supported 358 * 359 * @see URI#getDefaultProtocolCharset 360 * @see #encode 361 */ encodePath(String unescaped)362 public static String encodePath(String unescaped) throws URIException { 363 return encodePath(unescaped, URI.getDefaultProtocolCharset()); 364 } 365 366 367 /** 368 * Escape and encode a string regarded as the path component of an URI with 369 * a given charset. 370 * 371 * @param unescaped an unescaped string 372 * @param charset the charset 373 * @return the escaped string 374 * 375 * @throws URIException if the charset is not supported 376 * 377 * @see #encode 378 */ encodePath(String unescaped, String charset)379 public static String encodePath(String unescaped, String charset) 380 throws URIException { 381 382 return encode(unescaped, URI.allowed_abs_path, charset); 383 } 384 385 386 /** 387 * Escape and encode a string regarded as within the query component of an 388 * URI with the default protocol charset. 389 * When a query comprise the name and value pairs, it is used in order 390 * to encode each name and value string. The reserved special characters 391 * within a query component are being included in encoding the query. 392 * 393 * @param unescaped an unescaped string 394 * @return the escaped string 395 * 396 * @throws URIException if the default protocol charset is not supported 397 * 398 * @see URI#getDefaultProtocolCharset 399 * @see #encode 400 */ encodeWithinQuery(String unescaped)401 public static String encodeWithinQuery(String unescaped) 402 throws URIException { 403 404 return encodeWithinQuery(unescaped, URI.getDefaultProtocolCharset()); 405 } 406 407 408 /** 409 * Escape and encode a string regarded as within the query component of an 410 * URI with a given charset. 411 * When a query comprise the name and value pairs, it is used in order 412 * to encode each name and value string. The reserved special characters 413 * within a query component are being included in encoding the query. 414 * 415 * @param unescaped an unescaped string 416 * @param charset the charset 417 * @return the escaped string 418 * 419 * @throws URIException if the charset is not supported 420 * 421 * @see #encode 422 */ encodeWithinQuery(String unescaped, String charset)423 public static String encodeWithinQuery(String unescaped, String charset) 424 throws URIException { 425 426 return encode(unescaped, URI.allowed_within_query, charset); 427 } 428 429 430 /** 431 * Escape and encode a string regarded as the query component of an URI with 432 * the default protocol charset. 433 * When a query string is not misunderstood the reserved special characters 434 * ("&", "=", "+", ",", and "$") within a query component, this method 435 * is recommended to use in encoding the whole query. 436 * 437 * @param unescaped an unescaped string 438 * @return the escaped string 439 * 440 * @throws URIException if the default protocol charset is not supported 441 * 442 * @see URI#getDefaultProtocolCharset 443 * @see #encode 444 */ encodeQuery(String unescaped)445 public static String encodeQuery(String unescaped) throws URIException { 446 return encodeQuery(unescaped, URI.getDefaultProtocolCharset()); 447 } 448 449 450 /** 451 * Escape and encode a string regarded as the query component of an URI with 452 * a given charset. 453 * When a query string is not misunderstood the reserved special characters 454 * ("&", "=", "+", ",", and "$") within a query component, this method 455 * is recommended to use in encoding the whole query. 456 * 457 * @param unescaped an unescaped string 458 * @param charset the charset 459 * @return the escaped string 460 * 461 * @throws URIException if the charset is not supported 462 * 463 * @see #encode 464 */ encodeQuery(String unescaped, String charset)465 public static String encodeQuery(String unescaped, String charset) 466 throws URIException { 467 468 return encode(unescaped, URI.allowed_query, charset); 469 } 470 471 472 /** 473 * Escape and encode a given string with allowed characters not to be 474 * escaped and the default protocol charset. 475 * 476 * @param unescaped a string 477 * @param allowed allowed characters not to be escaped 478 * @return the escaped string 479 * 480 * @throws URIException if the default protocol charset is not supported 481 * 482 * @see URI#getDefaultProtocolCharset 483 */ encode(String unescaped, BitSet allowed)484 public static String encode(String unescaped, BitSet allowed) 485 throws URIException { 486 487 return encode(unescaped, allowed, URI.getDefaultProtocolCharset()); 488 } 489 490 491 /** 492 * Escape and encode a given string with allowed characters not to be 493 * escaped and a given charset. 494 * 495 * @param unescaped a string 496 * @param allowed allowed characters not to be escaped 497 * @param charset the charset 498 * @return the escaped string 499 */ encode(String unescaped, BitSet allowed, String charset)500 public static String encode(String unescaped, BitSet allowed, 501 String charset) throws URIException { 502 byte[] rawdata = URLCodec.encodeUrl(allowed, 503 EncodingUtil.getBytes(unescaped, charset)); 504 return EncodingUtil.getAsciiString(rawdata); 505 } 506 507 508 /** 509 * Unescape and decode a given string regarded as an escaped string with the 510 * default protocol charset. 511 * 512 * @param escaped a string 513 * @return the unescaped string 514 * 515 * @throws URIException if the string cannot be decoded (invalid) 516 * 517 * @see URI#getDefaultProtocolCharset 518 */ decode(String escaped)519 public static String decode(String escaped) throws URIException { 520 try { 521 byte[] rawdata = URLCodec.decodeUrl(EncodingUtil.getAsciiBytes(escaped)); 522 return EncodingUtil.getString(rawdata, URI.getDefaultProtocolCharset()); 523 } catch (DecoderException e) { 524 throw new URIException(e.getMessage()); 525 } 526 } 527 528 /** 529 * Unescape and decode a given string regarded as an escaped string. 530 * 531 * @param escaped a string 532 * @param charset the charset 533 * @return the unescaped string 534 * 535 * @throws URIException if the charset is not supported 536 * 537 * @see Coder#decode 538 */ decode(String escaped, String charset)539 public static String decode(String escaped, String charset) 540 throws URIException { 541 542 return Coder.decode(escaped.toCharArray(), charset); 543 } 544 545 // ---------------------------------------------------------- Inner classes 546 547 /** 548 * The basic and internal utility for URI escape and character encoding and 549 * decoding. 550 * 551 * @deprecated use org.apache.commons.codec.net.URLCodec 552 */ 553 protected static class Coder extends URI { 554 555 /** 556 * Escape and encode a given string with allowed characters not to be 557 * escaped. 558 * 559 * @param unescapedComponent an unescaped component 560 * @param allowed allowed characters not to be escaped 561 * @param charset the charset to encode 562 * @return the escaped and encoded string 563 * 564 * @throws URIException if the charset is not supported 565 * 566 * @deprecated use org.apache.commons.codec.net.URLCodec 567 */ encode(String unescapedComponent, BitSet allowed, String charset)568 public static char[] encode(String unescapedComponent, BitSet allowed, String charset) 569 throws URIException { 570 571 return URI.encode(unescapedComponent, allowed, charset); 572 } 573 574 575 /** 576 * Unescape and decode a given string. 577 * 578 * @param escapedComponent an being-unescaped component 579 * @param charset the charset to decode 580 * @return the escaped and encoded string 581 * 582 * @throws URIException if the charset is not supported 583 * 584 * @deprecated use org.apache.commons.codec.net.URLCodec 585 */ decode(char[] escapedComponent, String charset)586 public static String decode(char[] escapedComponent, String charset) 587 throws URIException { 588 589 return URI.decode(escapedComponent, charset); 590 } 591 592 593 /** 594 * Verify whether a given string is escaped or not 595 * 596 * @param original given characters 597 * @return true if the given character array is 7 bit ASCII-compatible. 598 */ verifyEscaped(char[] original)599 public static boolean verifyEscaped(char[] original) { 600 for (int i = 0; i < original.length; i++) { 601 int c = original[i]; 602 if (c > 128) { 603 return false; 604 } else if (c == '%') { 605 if (Character.digit(original[++i], 16) == -1 606 || Character.digit(original[++i], 16) == -1) { 607 return false; 608 } 609 } 610 } 611 return true; 612 } 613 614 615 /** 616 * Replace from a given character to given character in an array order 617 * for a given string. 618 * 619 * @param original a given string 620 * @param from a replacing character array 621 * @param to a replaced character array 622 * @return the replaced string 623 */ replace(String original, char[] from, char[] to)624 public static String replace(String original, char[] from, char[] to) { 625 for (int i = from.length; i > 0; --i) { 626 original = replace(original, from[i], to[i]); 627 } 628 return original; 629 } 630 631 632 /** 633 * Replace from a given character to given character for a given string. 634 * 635 * @param original a given string 636 * @param from a replacing character array 637 * @param to a replaced character array 638 * @return the replaced string 639 */ replace(String original, char from, char to)640 public static String replace(String original, char from, char to) { 641 StringBuffer result = new StringBuffer(original.length()); 642 int at, saved = 0; 643 do { 644 at = original.indexOf(from); 645 if (at >= 0) { 646 result.append(original.substring(0, at)); 647 result.append(to); 648 } else { 649 result.append(original.substring(saved)); 650 } 651 saved = at; 652 } while (at >= 0); 653 return result.toString(); 654 } 655 } 656 657 } 658 659