1 /** 2 * <copyright> 3 * 4 * Copyright (c) 2002, 2015 IBM Corporation and others. 5 * 6 * This program and the accompanying materials 7 * are made available under the terms of the Eclipse Public License 2.0 8 * which accompanies this distribution, and is available at 9 * https://www.eclipse.org/legal/epl-2.0/ 10 * 11 * SPDX-License-Identifier: EPL-2.0 12 * 13 * Contributors: 14 * IBM - Initial API and implementation 15 * 16 * </copyright> 17 * 18 * $Id: URI.java,v 1.34 2008/10/02 16:06:51 emerks Exp $ 19 */ 20 package org.eclipse.e4.ui.css.swt.helpers; 21 22 import java.io.File; 23 import java.lang.ref.WeakReference; 24 import java.util.ArrayList; 25 import java.util.Arrays; 26 import java.util.Collections; 27 import java.util.HashMap; 28 import java.util.HashSet; 29 import java.util.Iterator; 30 import java.util.List; 31 import java.util.Map; 32 import java.util.Objects; 33 import java.util.Set; 34 import java.util.StringTokenizer; 35 36 /** 37 * A representation of a Uniform Resource Identifier (URI), as specified by 38 * <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>, with certain 39 * enhancements. A <code>URI</code> instance can be created by specifying 40 * values for its components, or by providing a single URI string, which is 41 * parsed into its components. Static factory methods whose names begin 42 * with "create" are used for both forms of object creation. No public or 43 * protected constructors are provided; this class can not be subclassed. 44 * 45 * <p>Like <code>String</code>, <code>URI</code> is an immutable class; 46 * a <code>URI</code> instance offers several by-value methods that return a 47 * new <code>URI</code> object based on its current state. Most useful, 48 * a relative <code>URI</code> can be {@link #resolve(URI) resolve}d against 49 * a base absolute <code>URI</code> -- the latter typically identifies the 50 * document in which the former appears. The inverse to this is {@link 51 * #deresolve(URI) deresolve}, which answers the question, "what relative 52 * URI will resolve, against the given base, to this absolute URI?" 53 * 54 * <p>In the <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC</a>, much 55 * attention is focused on a hierarchical naming system used widely to 56 * locate resources via common protocols such as HTTP, FTP, and Gopher, and 57 * to identify files on a local file system. Accordingly, most of this 58 * class's functionality is for handling such URIs, which can be identified 59 * via {@link #isHierarchical isHierarchical}. 60 * 61 * <p id="device_explanation"> 62 * The primary enhancement beyond the RFC description is an optional 63 * device component. Instead of treating the device as just another segment 64 * in the path, it can be stored as a separate component (almost a 65 * sub-authority), with the root below it. For example, resolving 66 * <code>/bar</code> against <code>file:///c:/foo</code> would result in 67 * <code>file:///c:/bar</code> being returned. Also, you cannot take 68 * the parent of a device, so resolving <code>..</code> against 69 * <code>file:///c:/</code> would not yield <code>file:///</code>, as you 70 * might expect. This feature is useful when working with file-scheme 71 * URIs, as devices do not typically occur in protocol-based ones. A 72 * device-enabled <code>URI</code> is created by parsing a string with 73 * {@link #createURI(String) createURI}; if the first segment of the path 74 * ends with the <code>:</code> character, it is stored (including the colon) 75 * as the device, instead. Alternately, either the {@link 76 * #createHierarchicalURI(String, String, String, String, String) no-path} 77 * or the {@link #createHierarchicalURI(String, String, String, String[], 78 * String, String) absolute-path} form of <code>createHierarchicalURI()</code> 79 * can be used, in which a non-null <code>device</code> parameter can be 80 * specified. 81 * 82 * <p id="archive_explanation"> 83 * The other enhancement provides support for the almost-hierarchical 84 * form used for files within archives, such as the JAR scheme, defined 85 * for the Java Platform in the documentation for {@link 86 * java.net.JarURLConnection}. By default, this support is enabled for 87 * absolute URIs with scheme equal to "jar", "zip", or "archive" (ignoring case), and 88 * is implemented by a hierarchical URI, whose authority includes the 89 * entire URI of the archive, up to and including the <code>!</code> 90 * character. The URI of the archive must have no fragment. The whole 91 * archive URI must have no device and an absolute path. Special handling 92 * is supported for {@link #createURI(String) creating}, {@link 93 * #validArchiveAuthority validating}, {@link #devicePath getting the path} 94 * from, and {@link #toString() displaying} archive URIs. In all other 95 * operations, including {@link #resolve(URI) resolving} and {@link 96 * #deresolve(URI) deresolving}, they are handled like any ordinary URI. 97 * The schemes that identify archive URIs can be changed from their default 98 * by setting the <code>org.eclipse.emf.common.util.URI.archiveSchemes</code> 99 * system property. Multiple schemes should be space separated, and the test 100 * of whether a URI's scheme matches is always case-insensitive. 101 * 102 * <p>This implementation does not impose all of the restrictions on 103 * character validity that are specified in the RFC. Static methods whose 104 * names begin with "valid" are used to test whether a given string is valid 105 * value for the various URI components. Presently, these tests place no 106 * restrictions beyond what would have been required in order for {@link 107 * #createURI(String) createURI} to have parsed them correctly from a single 108 * URI string. If necessary in the future, these tests may be made more 109 * strict, to better conform to the RFC. 110 * 111 * <p>Another group of static methods, whose names begin with "encode", use 112 * percent escaping to encode any characters that are not permitted in the 113 * various URI components. Another static method is provided to {@link 114 * #decode decode} encoded strings. An escaped character is represented as 115 * a percent symbol (<code>%</code>), followed by two hex digits that specify 116 * the character code. These encoding methods are more strict than the 117 * validation methods described above. They ensure validity according to the 118 * RFC, with one exception: non-ASCII characters. 119 * 120 * <p>The RFC allows only characters that can be mapped to 7-bit US-ASCII 121 * representations. Non-ASCII, single-byte characters can be used only via 122 * percent escaping, as described above. This implementation uses Java's 123 * Unicode <code>char</code> and <code>String</code> representations, and 124 * makes no attempt to encode characters 0xA0 and above. Characters in the 125 * range 0x80-0x9F are still escaped. In this respect, EMF's notion of a URI 126 * is actually more like an IRI (Internationalized Resource Identifier), for 127 * which an RFC is now in <a href="http://www.w3.org/International/iri-edit/draft-duerst-iri-09.txt">draft 128 * form</a>. 129 * 130 * <p>Finally, note the difference between a <code>null</code> parameter to 131 * the static factory methods and an empty string. The former signifies the 132 * absence of a given URI component, while the latter simply makes the 133 * component blank. This can have a significant effect when resolving. For 134 * example, consider the following two URIs: <code>/bar</code> (with no 135 * authority) and <code>///bar</code> (with a blank authority). Imagine 136 * resolving them against a base with an authority, such as 137 * <code>http://www.eclipse.org/</code>. The former case will yield 138 * <code>http://www.eclipse.org/bar</code>, as the base authority will be 139 * preserved. In the latter case, the empty authority will override the 140 * base authority, resulting in <code>http:///bar</code>! 141 */ 142 public final class URI 143 { 144 // Common to all URI types. 145 private final int hashCode; 146 private static final int HIERARICHICAL_FLAG = 0x0100; 147 private final String scheme; // null -> relative URI reference 148 private final String authority; 149 private final String fragment; 150 private URI cachedTrimFragment; 151 private String cachedToString; 152 //private final boolean iri; 153 //private URI cachedASCIIURI; 154 155 // Applicable only to a hierarchical URI. 156 private final String device; 157 private static final int ABSOLUTE_PATH_FLAG = 0x0010; 158 private final String[] segments; // empty last segment -> trailing separator 159 private final String query; 160 161 // A cache of URIs, keyed by the strings from which they were created. 162 // The fragment of any URI is removed before caching it here, to minimize 163 // the size of the cache in the usual case where most URIs only differ by 164 // the fragment. 165 private static final URICache uriCache = new URICache(); 166 167 private static class URICache extends HashMap<String,WeakReference<URI>> 168 { 169 private static final long serialVersionUID = 1L; 170 171 static final int MIN_LIMIT = 1000; 172 int count; 173 int limit = MIN_LIMIT; 174 get(String key)175 public synchronized URI get(String key) 176 { 177 WeakReference<URI> reference = super.get(key); 178 return reference == null ? null : reference.get(); 179 } 180 put(String key, URI value)181 public synchronized void put(String key, URI value) 182 { 183 super.put(key, new WeakReference<>(value)); 184 if (++count > limit) 185 { 186 cleanGCedValues(); 187 } 188 } 189 cleanGCedValues()190 private void cleanGCedValues() 191 { 192 for (Iterator<Map.Entry<String,WeakReference<URI>>> i = entrySet().iterator(); i.hasNext(); ) 193 { 194 Map.Entry<String,WeakReference<URI>> entry = i.next(); 195 WeakReference<URI> reference = entry.getValue(); 196 if (reference.get() == null) 197 { 198 i.remove(); 199 } 200 } 201 count = 0; 202 limit = Math.max(MIN_LIMIT, size() / 2); 203 } 204 } 205 206 // The lower-cased schemes that will be used to identify archive URIs. 207 private static final Set<String> archiveSchemes; 208 209 // Identifies a file-type absolute URI. 210 private static final String SCHEME_FILE = "file"; 211 private static final String SCHEME_JAR = "jar"; 212 private static final String SCHEME_ZIP = "zip"; 213 private static final String SCHEME_ARCHIVE = "archive"; 214 private static final String SCHEME_PLATFORM = "platform"; 215 216 // Special segment values interpreted at resolve and resolve time. 217 private static final String SEGMENT_EMPTY = ""; 218 private static final String SEGMENT_SELF = "."; 219 private static final String SEGMENT_PARENT = ".."; 220 private static final String[] NO_SEGMENTS = new String[0]; 221 222 // Separators for parsing a URI string. 223 private static final char SCHEME_SEPARATOR = ':'; 224 private static final String AUTHORITY_SEPARATOR = "//"; 225 private static final char DEVICE_IDENTIFIER = ':'; 226 private static final char SEGMENT_SEPARATOR = '/'; 227 private static final char QUERY_SEPARATOR = '?'; 228 private static final char FRAGMENT_SEPARATOR = '#'; 229 private static final char USER_INFO_SEPARATOR = '@'; 230 private static final char PORT_SEPARATOR = ':'; 231 private static final char FILE_EXTENSION_SEPARATOR = '.'; 232 private static final char ARCHIVE_IDENTIFIER = '!'; 233 private static final String ARCHIVE_SEPARATOR = "!/"; 234 235 // Characters to use in escaping. 236 private static final char ESCAPE = '%'; 237 private static final char[] HEX_DIGITS = { 238 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; 239 240 // Some character classes, as defined in RFC 2396's BNF for URI. 241 // These are 128-bit bitmasks, stored as two longs, where the Nth bit is set 242 // iff the ASCII character with value N is included in the set. These are 243 // created with the highBitmask() and lowBitmask() methods defined below, 244 // and a character is tested against them using matches(). 245 // 246 private static final long ALPHA_HI = highBitmask('a', 'z') | highBitmask('A', 'Z'); 247 private static final long ALPHA_LO = lowBitmask('a', 'z') | lowBitmask('A', 'Z'); 248 private static final long DIGIT_HI = highBitmask('0', '9'); 249 private static final long DIGIT_LO = lowBitmask('0', '9'); 250 private static final long ALPHANUM_HI = ALPHA_HI | DIGIT_HI; 251 private static final long ALPHANUM_LO = ALPHA_LO | DIGIT_LO; 252 private static final long HEX_HI = DIGIT_HI | highBitmask('A', 'F') | highBitmask('a', 'f'); 253 private static final long HEX_LO = DIGIT_LO | lowBitmask('A', 'F') | lowBitmask('a', 'f'); 254 private static final long UNRESERVED_HI = ALPHANUM_HI | highBitmask("-_.!~*'()"); 255 private static final long UNRESERVED_LO = ALPHANUM_LO | lowBitmask("-_.!~*'()"); 256 private static final long RESERVED_HI = highBitmask(";/?:@&=+$,"); 257 private static final long RESERVED_LO = lowBitmask(";/?:@&=+$,"); 258 private static final long URIC_HI = RESERVED_HI | UNRESERVED_HI; // | ucschar | escaped 259 private static final long URIC_LO = RESERVED_LO | UNRESERVED_LO; 260 261 // Additional useful character classes, including characters valid in certain 262 // URI components and separators used in parsing them out of a string. 263 // 264 private static final long SEGMENT_CHAR_HI = UNRESERVED_HI | highBitmask(";:@&=+$,"); // | ucschar | escaped 265 private static final long SEGMENT_CHAR_LO = UNRESERVED_LO | lowBitmask(";:@&=+$,"); 266 private static final long PATH_CHAR_HI = SEGMENT_CHAR_HI | highBitmask('/'); // | ucschar | escaped 267 private static final long PATH_CHAR_LO = SEGMENT_CHAR_LO | lowBitmask('/'); 268 // private static final long SCHEME_CHAR_HI = ALPHANUM_HI | highBitmask("+-."); 269 // private static final long SCHEME_CHAR_LO = ALPHANUM_LO | lowBitmask("+-."); 270 private static final long MAJOR_SEPARATOR_HI = highBitmask(":/?#"); 271 private static final long MAJOR_SEPARATOR_LO = lowBitmask(":/?#"); 272 private static final long SEGMENT_END_HI = highBitmask("/?#"); 273 private static final long SEGMENT_END_LO = lowBitmask("/?#"); 274 275 // The intent of this was to switch over to encoding platform resource URIs 276 // by default, but allow people to use a system property to avoid this. 277 // However, that caused problems for people and we had to go back to not 278 // encoding and introduce yet another factory method that explicitly enables 279 // encoding. 280 // 281 private static final boolean ENCODE_PLATFORM_RESOURCE_URIS = 282 System.getProperty("org.eclipse.emf.common.util.URI.encodePlatformResourceURIs") != null && 283 !"false".equalsIgnoreCase(System.getProperty("org.eclipse.emf.common.util.URI.encodePlatformResourceURIs")); 284 285 // Static initializer for archiveSchemes. 286 static 287 { 288 Set<String> set = new HashSet<>(); 289 String propertyValue = System.getProperty("org.eclipse.emf.common.util.URI.archiveSchemes"); 290 291 if (propertyValue == null) 292 { 293 set.add(SCHEME_JAR); 294 set.add(SCHEME_ZIP); 295 set.add(SCHEME_ARCHIVE); 296 } 297 else 298 { 299 for (StringTokenizer t = new StringTokenizer(propertyValue); t.hasMoreTokens(); ) 300 { 301 set.add(t.nextToken().toLowerCase()); 302 } 303 } 304 305 archiveSchemes = Collections.unmodifiableSet(set); 306 } 307 308 // Returns the lower half bitmask for the given ASCII character. lowBitmask(char c)309 private static long lowBitmask(char c) 310 { 311 return c < 64 ? 1L << c : 0L; 312 } 313 314 // Returns the upper half bitmask for the given ACSII character. highBitmask(char c)315 private static long highBitmask(char c) 316 { 317 return c >= 64 && c < 128 ? 1L << (c - 64) : 0L; 318 } 319 320 // Returns the lower half bitmask for all ASCII characters between the two 321 // given characters, inclusive. lowBitmask(char from, char to)322 private static long lowBitmask(char from, char to) 323 { 324 long result = 0L; 325 if (from < 64 && from <= to) 326 { 327 to = to < 64 ? to : 63; 328 for (char c = from; c <= to; c++) 329 { 330 result |= (1L << c); 331 } 332 } 333 return result; 334 } 335 336 // Returns the upper half bitmask for all AsCII characters between the two 337 // given characters, inclusive. 338 private static long highBitmask(char from, char to) 339 { 340 return to < 64 ? 0 : lowBitmask((char)(from < 64 ? 0 : from - 64), (char)(to - 64)); 341 } 342 343 // Returns the lower half bitmask for all the ASCII characters in the given 344 // string. 345 private static long lowBitmask(String chars) 346 { 347 long result = 0L; 348 for (int i = 0, len = chars.length(); i < len; i++) 349 { 350 char c = chars.charAt(i); 351 if (c < 64) { 352 result |= (1L << c); 353 } 354 } 355 return result; 356 } 357 358 // Returns the upper half bitmask for all the ASCII characters in the given 359 // string. 360 private static long highBitmask(String chars) 361 { 362 long result = 0L; 363 for (int i = 0, len = chars.length(); i < len; i++) 364 { 365 char c = chars.charAt(i); 366 if (c >= 64 && c < 128) { 367 result |= (1L << (c - 64)); 368 } 369 } 370 return result; 371 } 372 373 // Returns whether the given character is in the set specified by the given 374 // bitmask. 375 private static boolean matches(char c, long highBitmask, long lowBitmask) 376 { 377 if (c >= 128) { 378 return false; 379 } 380 return c < 64 ? 381 ((1L << c) & lowBitmask) != 0 : 382 ((1L << (c - 64)) & highBitmask) != 0; 383 } 384 385 // Debugging method: converts the given long to a string of binary digits. 386 /* 387 private static String toBits(long l) 388 { 389 StringBuilder result = new StringBuilder(); 390 for (int i = 0; i < 64; i++) 391 { 392 boolean b = (l & 1L) != 0; 393 result.insert(0, b ? '1' : '0'); 394 l >>= 1; 395 } 396 return result.toString(); 397 } 398 */ 399 400 /** 401 * Static factory method for a generic, non-hierarchical URI. There is no 402 * concept of a relative non-hierarchical URI; such an object cannot be 403 * created. 404 * 405 * @exception java.lang.IllegalArgumentException if <code>scheme</code> is 406 * null, if <code>scheme</code> is an <a href="#archive_explanation">archive 407 * URI</a> scheme, or if <code>scheme</code>, <code>opaquePart</code>, or 408 * <code>fragment</code> is not valid according to {@link #validScheme 409 * validScheme}, {@link #validOpaquePart validOpaquePart}, or {@link 410 * #validFragment validFragment}, respectively. 411 */ 412 public static URI createGenericURI(String scheme, String opaquePart, 413 String fragment) 414 { 415 if (scheme == null) 416 { 417 throw new IllegalArgumentException("relative non-hierarchical URI"); 418 } 419 420 if (isArchiveScheme(scheme)) 421 { 422 throw new IllegalArgumentException("non-hierarchical archive URI"); 423 } 424 425 validateURI(false, scheme, opaquePart, null, false, NO_SEGMENTS, null, fragment); 426 return new URI(false, scheme, opaquePart, null, false, NO_SEGMENTS, null, fragment); 427 } 428 429 /** 430 * Static factory method for a hierarchical URI with no path. The 431 * URI will be relative if <code>scheme</code> is non-null, and absolute 432 * otherwise. An absolute URI with no path requires a non-null 433 * <code>authority</code> and/or <code>device</code>. 434 * 435 * @exception java.lang.IllegalArgumentException if <code>scheme</code> is 436 * non-null while <code>authority</code> and <code>device</code> are null, 437 * if <code>scheme</code> is an <a href="#archive_explanation">archive 438 * URI</a> scheme, or if <code>scheme</code>, <code>authority</code>, 439 * <code>device</code>, <code>query</code>, or <code>fragment</code> is not 440 * valid according to {@link #validScheme validSheme}, {@link 441 * #validAuthority validAuthority}, {@link #validDevice validDevice}, 442 * {@link #validQuery validQuery}, or {@link #validFragment validFragment}, 443 * respectively. 444 */ 445 public static URI createHierarchicalURI(String scheme, String authority, 446 String device, String query, 447 String fragment) 448 { 449 if (scheme != null && authority == null && device == null) 450 { 451 throw new IllegalArgumentException( 452 "absolute hierarchical URI without authority, device, path"); 453 } 454 455 if (isArchiveScheme(scheme)) 456 { 457 throw new IllegalArgumentException("archive URI with no path"); 458 } 459 460 validateURI(true, scheme, authority, device, false, NO_SEGMENTS, query, fragment); 461 return new URI(true, scheme, authority, device, false, NO_SEGMENTS, query, fragment); 462 } 463 464 /** 465 * Static factory method for a hierarchical URI with absolute path. 466 * The URI will be relative if <code>scheme</code> is non-null, and 467 * absolute otherwise. 468 * 469 * @param segments an array of non-null strings, each representing one 470 * segment of the path. As an absolute path, it is automatically 471 * preceded by a <code>/</code> separator. If desired, a trailing 472 * separator should be represented by an empty-string segment as the last 473 * element of the array. 474 * 475 * @exception java.lang.IllegalArgumentException if <code>scheme</code> is 476 * an <a href="#archive_explanation">archive URI</a> scheme and 477 * <code>device</code> is non-null, or if <code>scheme</code>, 478 * <code>authority</code>, <code>device</code>, <code>segments</code>, 479 * <code>query</code>, or <code>fragment</code> is not valid according to 480 * {@link #validScheme validScheme}, {@link #validAuthority validAuthority} 481 * or {@link #validArchiveAuthority validArchiveAuthority}, {@link 482 * #validDevice validDevice}, {@link #validSegments validSegments}, {@link 483 * #validQuery validQuery}, or {@link #validFragment validFragment}, as 484 * appropriate. 485 */ 486 public static URI createHierarchicalURI(String scheme, String authority, 487 String device, String[] segments, 488 String query, String fragment) 489 { 490 if (isArchiveScheme(scheme) && device != null) 491 { 492 throw new IllegalArgumentException("archive URI with device"); 493 } 494 495 segments = fix(segments); 496 validateURI(true, scheme, authority, device, true, segments, query, fragment); 497 return new URI(true, scheme, authority, device, true, segments, query, fragment); 498 } 499 500 /** 501 * Static factory method for a relative hierarchical URI with relative 502 * path. 503 * 504 * @param segments an array of non-null strings, each representing one 505 * segment of the path. A trailing separator is represented by an 506 * empty-string segment at the end of the array. 507 * 508 * @exception java.lang.IllegalArgumentException if <code>segments</code>, 509 * <code>query</code>, or <code>fragment</code> is not valid according to 510 * {@link #validSegments validSegments}, {@link #validQuery validQuery}, or 511 * {@link #validFragment validFragment}, respectively. 512 */ 513 public static URI createHierarchicalURI(String[] segments, String query, 514 String fragment) 515 { 516 segments = fix(segments); 517 validateURI(true, null, null, null, false, segments, query, fragment); 518 return new URI(true, null, null, null, false, segments, query, fragment); 519 } 520 521 // Converts null to length-zero array, and clones array to ensure 522 // immutability. 523 private static String[] fix(String[] segments) 524 { 525 return segments == null ? NO_SEGMENTS : (String[])segments.clone(); 526 } 527 528 /** 529 * Static factory method based on parsing a URI string, with 530 * <a href="#device_explanation">explicit device support</a> and handling 531 * for <a href="#archive_explanation">archive URIs</a> enabled. The 532 * specified string is parsed as described in <a 533 * href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>, and an 534 * appropriate <code>URI</code> is created and returned. Note that 535 * validity testing is not as strict as in the RFC; essentially, only 536 * separator characters are considered. This method also does not perform 537 * encoding of invalid characters, so it should only be used when the URI 538 * string is known to have already been encoded, so as to avoid double 539 * encoding. 540 * 541 * @exception java.lang.IllegalArgumentException if any component parsed 542 * from <code>uri</code> is not valid according to {@link #validScheme 543 * validScheme}, {@link #validOpaquePart validOpaquePart}, {@link 544 * #validAuthority validAuthority}, {@link #validArchiveAuthority 545 * validArchiveAuthority}, {@link #validDevice validDevice}, {@link 546 * #validSegments validSegments}, {@link #validQuery validQuery}, or {@link 547 * #validFragment validFragment}, as appropriate. 548 */ 549 public static URI createURI(String uri) 550 { 551 return createURIWithCache(uri); 552 } 553 554 /** 555 * Static factory method that encodes and parses the given URI string. 556 * Appropriate encoding is performed for each component of the URI. 557 * If more than one <code>#</code> is in the string, the last one is 558 * assumed to be the fragment's separator, and any others are encoded. 559 * This method is the simplest way to safely parse an arbitrary URI string. 560 * 561 * @param ignoreEscaped <code>true</code> to leave <code>%</code> characters 562 * unescaped if they already begin a valid three-character escape sequence; 563 * <code>false</code> to encode all <code>%</code> characters. This 564 * capability is provided to allow partially encoded URIs to be "fixed", 565 * while avoiding adding double encoding; however, it is usual just to 566 * specify <code>false</code> to perform ordinary encoding. 567 * 568 * @exception java.lang.IllegalArgumentException if any component parsed 569 * from <code>uri</code> is not valid according to {@link #validScheme 570 * validScheme}, {@link #validOpaquePart validOpaquePart}, {@link 571 * #validAuthority validAuthority}, {@link #validArchiveAuthority 572 * validArchiveAuthority}, {@link #validDevice validDevice}, {@link 573 * #validSegments validSegments}, {@link #validQuery validQuery}, or {@link 574 * #validFragment validFragment}, as appropriate. 575 */ 576 public static URI createURI(String uri, boolean ignoreEscaped) 577 { 578 return createURIWithCache(encodeURI(uri, ignoreEscaped, FRAGMENT_LAST_SEPARATOR)); 579 } 580 581 /** 582 * When specified as the last argument to {@link #createURI(String, boolean, int) 583 * createURI}, indicates that there is no fragment, so any <code>#</code> characters 584 * should be encoded. 585 * @see #createURI(String, boolean, int) 586 */ 587 public static final int FRAGMENT_NONE = 0; 588 589 /** 590 * When specified as the last argument to {@link #createURI(String, boolean, int) 591 * createURI}, indicates that the first <code>#</code> character should be taken as 592 * the fragment separator, and any others should be encoded. 593 * @see #createURI(String, boolean, int) 594 */ 595 public static final int FRAGMENT_FIRST_SEPARATOR = 1; 596 597 /** 598 * When specified as the last argument to {@link #createURI(String, boolean, int) 599 * createURI}, indicates that the last <code>#</code> character should be taken as 600 * the fragment separator, and any others should be encoded. 601 * @see #createURI(String, boolean, int) 602 */ 603 public static final int FRAGMENT_LAST_SEPARATOR = 2; 604 605 /** 606 * Static factory method that encodes and parses the given URI string. 607 * Appropriate encoding is performed for each component of the URI. 608 * Control is provided over which, if any, <code>#</code> should be 609 * taken as the fragment separator and which should be encoded. 610 * This method is the preferred way to safely parse an arbitrary URI string 611 * that is known to contain <code>#</code> characters in the fragment or to 612 * have no fragment at all. 613 * 614 * @param ignoreEscaped <code>true</code> to leave <code>%</code> characters 615 * unescaped if they already begin a valid three-character escape sequence; 616 * <code>false</code> to encode all <code>%</code> characters. This 617 * capability is provided to allow partially encoded URIs to be "fixed", 618 * while avoiding adding double encoding; however, it is usual just to 619 * specify <code>false</code> to perform ordinary encoding. 620 * 621 * @param fragmentLocationStyle one of {@link #FRAGMENT_NONE}, 622 * {@link #FRAGMENT_FIRST_SEPARATOR}, or {@link #FRAGMENT_LAST_SEPARATOR}, 623 * indicating which, if any, of the <code>#</code> characters should be 624 * considered the fragment separator. Any others will be encoded. 625 * 626 * @exception java.lang.IllegalArgumentException if any component parsed 627 * from <code>uri</code> is not valid according to {@link #validScheme 628 * validScheme}, {@link #validOpaquePart validOpaquePart}, {@link 629 * #validAuthority validAuthority}, {@link #validArchiveAuthority 630 * validArchiveAuthority}, {@link #validDevice validDevice}, {@link 631 * #validSegments validSegments}, {@link #validQuery validQuery}, or {@link 632 * #validFragment validFragment}, as appropriate. 633 */ 634 public static URI createURI(String uri, boolean ignoreEscaped, int fragmentLocationStyle) 635 { 636 return createURIWithCache(encodeURI(uri, ignoreEscaped, fragmentLocationStyle)); 637 } 638 639 /** 640 * Static factory method based on parsing a URI string, with 641 * <a href="#device_explanation">explicit device support</a> enabled. 642 * Note that validity testing is not a strict as in the RFC; essentially, 643 * only separator characters are considered. So, for example, non-Latin 644 * alphabet characters appearing in the scheme would not be considered an 645 * error. 646 * 647 * @exception java.lang.IllegalArgumentException if any component parsed 648 * from <code>uri</code> is not valid according to {@link #validScheme 649 * validScheme}, {@link #validOpaquePart validOpaquePart}, {@link 650 * #validAuthority validAuthority}, {@link #validArchiveAuthority 651 * validArchiveAuthority}, {@link #validDevice validDevice}, {@link 652 * #validSegments validSegments}, {@link #validQuery validQuery}, or {@link 653 * #validFragment validFragment}, as appropriate. 654 * 655 * @deprecated Use {@link #createURI(String) createURI}, which now has explicit 656 * device support enabled. The two methods now operate identically. 657 */ 658 @Deprecated 659 public static URI createDeviceURI(String uri) 660 { 661 return createURIWithCache(uri); 662 } 663 664 // Uses a cache to speed up creation of a URI from a string. The cache 665 // is consulted to see if the URI, less any fragment, has already been 666 // created. If needed, the fragment is re-appended to the cached URI, 667 // which is considerably more efficient than creating the whole URI from 668 // scratch. If the URI wasn't found in the cache, it is created using 669 // parseIntoURI() and then cached. This method should always be used 670 // by string-parsing factory methods, instead of parseIntoURI() directly. 671 /** 672 * This method was included in the public API by mistake. 673 * 674 * @deprecated Please use {@link #createURI(String) createURI} instead. 675 */ 676 @Deprecated 677 public static URI createURIWithCache(String uri) 678 { 679 int i = uri.indexOf(FRAGMENT_SEPARATOR); 680 String base = i == -1 ? uri : uri.substring(0, i); 681 String fragment = i == -1 ? null : uri.substring(i + 1); 682 683 URI result = uriCache.get(base); 684 685 if (result == null) 686 { 687 result = parseIntoURI(base); 688 uriCache.put(base, result); 689 } 690 691 if (fragment != null) 692 { 693 result = result.appendFragment(fragment); 694 } 695 return result; 696 } 697 698 // String-parsing implementation. 699 private static URI parseIntoURI(String uri) 700 { 701 boolean hierarchical = true; 702 String scheme = null; 703 String authority = null; 704 String device = null; 705 boolean absolutePath = false; 706 String[] segments = NO_SEGMENTS; 707 String query = null; 708 String fragment = null; 709 710 int i = 0; 711 int j = find(uri, i, MAJOR_SEPARATOR_HI, MAJOR_SEPARATOR_LO); 712 713 if (j < uri.length() && uri.charAt(j) == SCHEME_SEPARATOR) 714 { 715 scheme = uri.substring(i, j); 716 i = j + 1; 717 } 718 719 boolean archiveScheme = isArchiveScheme(scheme); 720 if (archiveScheme) 721 { 722 j = uri.lastIndexOf(ARCHIVE_SEPARATOR); 723 if (j == -1) 724 { 725 throw new IllegalArgumentException("no archive separator"); 726 } 727 hierarchical = true; 728 authority = uri.substring(i, ++j); 729 i = j; 730 } 731 else if (uri.startsWith(AUTHORITY_SEPARATOR, i)) 732 { 733 i += AUTHORITY_SEPARATOR.length(); 734 j = find(uri, i, SEGMENT_END_HI, SEGMENT_END_LO); 735 authority = uri.substring(i, j); 736 i = j; 737 } 738 else if (scheme != null && 739 (i == uri.length() || uri.charAt(i) != SEGMENT_SEPARATOR)) 740 { 741 hierarchical = false; 742 j = uri.indexOf(FRAGMENT_SEPARATOR, i); 743 if (j == -1) { 744 j = uri.length(); 745 } 746 authority = uri.substring(i, j); 747 i = j; 748 } 749 750 if (!archiveScheme && i < uri.length() && uri.charAt(i) == SEGMENT_SEPARATOR) 751 { 752 j = find(uri, i + 1, SEGMENT_END_HI, SEGMENT_END_LO); 753 String s = uri.substring(i + 1, j); 754 755 if (s.length() > 0 && s.charAt(s.length() - 1) == DEVICE_IDENTIFIER) 756 { 757 device = s; 758 i = j; 759 } 760 } 761 762 if (i < uri.length() && uri.charAt(i) == SEGMENT_SEPARATOR) 763 { 764 i++; 765 absolutePath = true; 766 } 767 768 if (segmentsRemain(uri, i)) 769 { 770 List<String> segmentList = new ArrayList<>(); 771 772 while (segmentsRemain(uri, i)) 773 { 774 j = find(uri, i, SEGMENT_END_HI, SEGMENT_END_LO); 775 segmentList.add(uri.substring(i, j)); 776 i = j; 777 778 if (i < uri.length() && uri.charAt(i) == SEGMENT_SEPARATOR) 779 { 780 if (!segmentsRemain(uri, ++i)) { 781 segmentList.add(SEGMENT_EMPTY); 782 } 783 } 784 } 785 segments = new String[segmentList.size()]; 786 segmentList.toArray(segments); 787 } 788 789 if (i < uri.length() && uri.charAt(i) == QUERY_SEPARATOR) 790 { 791 j = uri.indexOf(FRAGMENT_SEPARATOR, ++i); 792 if (j == -1) { 793 j = uri.length(); 794 } 795 query = uri.substring(i, j); 796 i = j; 797 } 798 799 if (i < uri.length()) // && uri.charAt(i) == FRAGMENT_SEPARATOR (implied) 800 { 801 fragment = uri.substring(++i); 802 } 803 804 validateURI(hierarchical, scheme, authority, device, absolutePath, segments, query, fragment); 805 return new URI(hierarchical, scheme, authority, device, absolutePath, segments, query, fragment); 806 } 807 808 // Checks whether the string contains any more segments after the one that 809 // starts at position i. 810 private static boolean segmentsRemain(String uri, int i) 811 { 812 return i < uri.length() && uri.charAt(i) != QUERY_SEPARATOR && 813 uri.charAt(i) != FRAGMENT_SEPARATOR; 814 } 815 816 // Finds the next occurrence of one of the characters in the set represented 817 // by the given bitmask in the given string, beginning at index i. The index 818 // of the first found character, or s.length() if there is none, is 819 // returned. Before searching, i is limited to the range [0, s.length()]. 820 // 821 private static int find(String s, int i, long highBitmask, long lowBitmask) 822 { 823 int len = s.length(); 824 if (i >= len) { 825 return len; 826 } 827 828 for (i = i > 0 ? i : 0; i < len; i++) 829 { 830 if (matches(s.charAt(i), highBitmask, lowBitmask)) { 831 break; 832 } 833 } 834 return i; 835 } 836 837 /** 838 * Static factory method based on parsing a {@link java.io.File} path 839 * string. The <code>pathName</code> is converted into an appropriate 840 * form, as follows: platform specific path separators are converted to 841 * <code>/</code>; the path is encoded; and a "file" scheme and, if missing, 842 * a leading <code>/</code>, are added to an absolute path. The result 843 * is then parsed using {@link #createURI(String) createURI}. 844 * 845 * <p>The encoding step escapes all spaces, <code>#</code> characters, and 846 * other characters disallowed in URIs, as well as <code>?</code>, which 847 * would delimit a path from a query. Decoding is automatically performed 848 * by {@link #toFileString toFileString}, and can be applied to the values 849 * returned by other accessors by via the static {@link #decode(String) 850 * decode} method. 851 * 852 * <p>A relative path with a specified device (something like 853 * <code>C:myfile.txt</code>) cannot be expressed as a valid URI. 854 * 855 * @exception java.lang.IllegalArgumentException if <code>pathName</code> 856 * specifies a device and a relative path, or if any component of the path 857 * is not valid according to {@link #validAuthority validAuthority}, {@link 858 * #validDevice validDevice}, or {@link #validSegments validSegments}, 859 * {@link #validQuery validQuery}, or {@link #validFragment validFragment}. 860 */ 861 public static URI createFileURI(String pathName) 862 { 863 File file = new File(pathName); 864 String uri = File.separatorChar != '/' ? pathName.replace(File.separatorChar, SEGMENT_SEPARATOR) : pathName; 865 uri = encode(uri, PATH_CHAR_HI, PATH_CHAR_LO, false); 866 if (file.isAbsolute()) 867 { 868 return createURI((uri.charAt(0) == SEGMENT_SEPARATOR ? "file:" : "file:/") + uri); 869 } 870 else 871 { 872 URI result = createURI(uri); 873 if (result.scheme() != null) 874 { 875 throw new IllegalArgumentException("invalid relative pathName: " + pathName); 876 } 877 return result; 878 } 879 } 880 881 /** 882 * Static factory method based on parsing a workspace-relative path string. 883 * 884 * <p>The <code>pathName</code> must be of the form: 885 * <pre> 886 * /project-name/path</pre> 887 * 888 * <p>Platform-specific path separators will be converted to slashes. 889 * If not included, the leading path separator will be added. The 890 * result will be of this form, which is parsed using {@link #createURI(String) 891 * createURI}: 892 * <pre> 893 * platform:/resource/project-name/path</pre> 894 * 895 * <p>This scheme supports relocatable projects in Eclipse and in 896 * stand-alone EMF. 897 * 898 * <p>Path encoding is performed only if the 899 * <code>org.eclipse.emf.common.util.URI.encodePlatformResourceURIs</code> 900 * system property is set to "true". Decoding can be performed with the 901 * static {@link #decode(String) decode} method. 902 * 903 * @exception java.lang.IllegalArgumentException if any component parsed 904 * from the path is not valid according to {@link #validDevice validDevice}, 905 * {@link #validSegments validSegments}, {@link #validQuery validQuery}, or 906 * {@link #validFragment validFragment}. 907 * 908 * @see org.eclipse.core.runtime.Platform#resolve 909 * @see #createPlatformResourceURI(String, boolean) 910 * @deprecated Use {@link #createPlatformResourceURI(String, boolean)} instead. 911 */ 912 @Deprecated 913 public static URI createPlatformResourceURI(String pathName) 914 { 915 return createPlatformResourceURI(pathName, ENCODE_PLATFORM_RESOURCE_URIS); 916 } 917 918 /** 919 * Static factory method based on parsing a workspace-relative path string, 920 * with an option to encode the created URI. 921 * 922 * <p>The <code>pathName</code> must be of the form: 923 * <pre> 924 * /project-name/path</pre> 925 * 926 * <p>Platform-specific path separators will be converted to slashes. 927 * If not included, the leading path separator will be added. The 928 * result will be of this form, which is parsed using {@link #createURI(String) 929 * createURI}: 930 * <pre> 931 * platform:/resource/project-name/path</pre> 932 * 933 * <p>This scheme supports relocatable projects in Eclipse and in 934 * stand-alone EMF. 935 * 936 * <p>Depending on the <code>encode</code> argument, the path may be 937 * automatically encoded to escape all spaces, <code>#</code> characters, 938 * and other characters disallowed in URIs, as well as <code>?</code>, 939 * which would delimit a path from a query. Decoding can be performed with 940 * the static {@link #decode(String) decode} method. It is strongly 941 * recommended to specify <code>true</code> to enable encoding, unless the 942 * path string has already been encoded. 943 * 944 * @exception java.lang.IllegalArgumentException if any component parsed 945 * from the path is not valid according to {@link #validDevice validDevice}, 946 * {@link #validSegments validSegments}, {@link #validQuery validQuery}, or 947 * {@link #validFragment validFragment}. 948 * 949 * @see org.eclipse.core.runtime.Platform#resolve 950 */ 951 public static URI createPlatformResourceURI(String pathName, boolean encode) 952 { 953 return createPlatformURI("platform:/resource", "platform:/resource/", pathName, encode); 954 } 955 956 /** 957 * Static factory method based on parsing a plug-in-based path string, 958 * with an option to encode the created URI. 959 * 960 * <p>The <code>pathName</code> must be of the form: 961 * <pre> 962 * /plugin-id/path</pre> 963 * 964 * <p>Platform-specific path separators will be converted to slashes. 965 * If not included, the leading path separator will be added. The 966 * result will be of this form, which is parsed using {@link #createURI(String) 967 * createURI}: 968 * <pre> 969 * platform:/plugin/plugin-id/path</pre> 970 * 971 * <p>This scheme supports relocatable plug-in content in Eclipse. 972 * 973 * <p>Depending on the <code>encode</code> argument, the path may be 974 * automatically encoded to escape all spaces, <code>#</code> characters, 975 * and other characters disallowed in URIs, as well as <code>?</code>, 976 * which would delimit a path from a query. Decoding can be performed with 977 * the static {@link #decode(String) decode} method. It is strongly 978 * recommended to specify <code>true</code> to enable encoding, unless the 979 * path string has already been encoded. 980 * 981 * @exception java.lang.IllegalArgumentException if any component parsed 982 * from the path is not valid according to {@link #validDevice validDevice}, 983 * {@link #validSegments validSegments}, {@link #validQuery validQuery}, or 984 * {@link #validFragment validFragment}. 985 * 986 * @see org.eclipse.core.runtime.Platform#resolve 987 * @since org.eclipse.emf.common 2.3 988 */ 989 public static URI createPlatformPluginURI(String pathName, boolean encode) 990 { 991 return createPlatformURI("platform:/plugin", "platform:/plugin/", pathName, encode); 992 } 993 994 // Private constructor for use of platform factory methods. 995 private static URI createPlatformURI(String unrootedBase, String rootedBase, String pathName, boolean encode) 996 { 997 if (File.separatorChar != SEGMENT_SEPARATOR) 998 { 999 pathName = pathName.replace(File.separatorChar, SEGMENT_SEPARATOR); 1000 } 1001 1002 if (encode) 1003 { 1004 pathName = encode(pathName, PATH_CHAR_HI, PATH_CHAR_LO, false); 1005 } 1006 return createURI((pathName.charAt(0) == SEGMENT_SEPARATOR ? unrootedBase : rootedBase) + pathName); 1007 } 1008 1009 // Private constructor for use of static factory methods. 1010 private URI(boolean hierarchical, String scheme, String authority, 1011 String device, boolean absolutePath, String[] segments, 1012 String query, String fragment) 1013 { 1014 int tmpHashCode = 0; 1015 if (scheme != null) 1016 { 1017 tmpHashCode ^= scheme.toLowerCase().hashCode(); 1018 } 1019 tmpHashCode ^= Objects.hashCode(authority); 1020 tmpHashCode ^= Objects.hashCode(device); 1021 tmpHashCode ^= Objects.hashCode(query); 1022 tmpHashCode ^= Objects.hashCode(fragment); 1023 for (String segment : segments) { 1024 tmpHashCode ^= segment.hashCode(); 1025 } 1026 1027 if (hierarchical) 1028 { 1029 tmpHashCode |= HIERARICHICAL_FLAG; 1030 } 1031 else 1032 { 1033 tmpHashCode &= ~HIERARICHICAL_FLAG; 1034 } 1035 if (absolutePath) 1036 { 1037 tmpHashCode |= ABSOLUTE_PATH_FLAG; 1038 } 1039 else 1040 { 1041 tmpHashCode &= ~ABSOLUTE_PATH_FLAG; 1042 } 1043 this.hashCode = tmpHashCode; 1044 this.scheme = scheme == null ? null : scheme.intern(); 1045 this.authority = authority; 1046 this.device = device; 1047 this.segments = segments; 1048 this.query = query; 1049 this.fragment = fragment; 1050 } 1051 1052 // Validates all of the URI components. Factory methods should call this 1053 // before using the constructor, though they must ensure that the 1054 // inter-component requirements described in their own Javadocs are all 1055 // satisfied, themselves. If a new URI is being constructed out of 1056 // an existing URI, this need not be called. Instead, just the new 1057 // components may be validated individually. 1058 private static void validateURI(boolean hierarchical, String scheme, 1059 String authority, String device, 1060 boolean absolutePath, String[] segments, 1061 String query, String fragment) 1062 { 1063 if (!validScheme(scheme)) 1064 { 1065 throw new IllegalArgumentException("invalid scheme: " + scheme); 1066 } 1067 if (!hierarchical && !validOpaquePart(authority)) 1068 { 1069 throw new IllegalArgumentException("invalid opaquePart: " + authority); 1070 } 1071 if (hierarchical && !isArchiveScheme(scheme) && !validAuthority(authority)) 1072 { 1073 throw new IllegalArgumentException("invalid authority: " + authority); 1074 } 1075 if (hierarchical && isArchiveScheme(scheme) && !validArchiveAuthority(authority)) 1076 { 1077 throw new IllegalArgumentException("invalid authority: " + authority); 1078 } 1079 if (!validDevice(device)) 1080 { 1081 throw new IllegalArgumentException("invalid device: " + device); 1082 } 1083 if (!validSegments(segments)) 1084 { 1085 String s = segments == null ? "invalid segments: null" : 1086 "invalid segment: " + firstInvalidSegment(segments); 1087 throw new IllegalArgumentException(s); 1088 } 1089 if (!validQuery(query)) 1090 { 1091 throw new IllegalArgumentException("invalid query: " + query); 1092 } 1093 if (!validFragment(fragment)) 1094 { 1095 throw new IllegalArgumentException("invalid fragment: " + fragment); 1096 } 1097 } 1098 1099 // Alternate, stricter implementations of the following validation methods 1100 // are provided, commented out, for possible future use... 1101 1102 /** 1103 * Returns <code>true</code> if the specified <code>value</code> would be 1104 * valid as the scheme component of a URI; <code>false</code> otherwise. 1105 * 1106 * <p>A valid scheme may be null or contain any characters except for the 1107 * following: <code>: / ? #</code> 1108 */ 1109 public static boolean validScheme(String value) 1110 { 1111 return value == null || !contains(value, MAJOR_SEPARATOR_HI, MAJOR_SEPARATOR_LO); 1112 1113 // <p>A valid scheme may be null, or consist of a single letter followed 1114 // by any number of letters, numbers, and the following characters: 1115 // <code>+ - .</code> 1116 1117 //if (value == null) return true; 1118 //return value.length() != 0 && 1119 // matches(value.charAt(0), ALPHA_HI, ALPHA_LO) && 1120 // validate(value, SCHEME_CHAR_HI, SCHEME_CHAR_LO, false, false); 1121 } 1122 1123 /** 1124 * Returns <code>true</code> if the specified <code>value</code> would be 1125 * valid as the opaque part component of a URI; <code>false</code> 1126 * otherwise. 1127 * 1128 * <p>A valid opaque part must be non-null, non-empty, and not contain the 1129 * <code>#</code> character. In addition, its first character must not be 1130 * <code>/</code> 1131 */ 1132 public static boolean validOpaquePart(String value) 1133 { 1134 return value != null && value.indexOf(FRAGMENT_SEPARATOR) == -1 && 1135 value.length() > 0 && value.charAt(0) != SEGMENT_SEPARATOR; 1136 1137 // <p>A valid opaque part must be non-null and non-empty. It may contain 1138 // any allowed URI characters, but its first character may not be 1139 // <code>/</code> 1140 1141 //return value != null && value.length() != 0 && 1142 // value.charAt(0) != SEGMENT_SEPARATOR && 1143 // validate(value, URIC_HI, URIC_LO, true, true); 1144 } 1145 1146 /** 1147 * Returns <code>true</code> if the specified <code>value</code> would be 1148 * valid as the authority component of a URI; <code>false</code> otherwise. 1149 * 1150 * <p>A valid authority may be null or contain any characters except for 1151 * the following: <code>/ ? #</code> 1152 */ 1153 public static boolean validAuthority(String value) 1154 { 1155 return value == null || !contains(value, SEGMENT_END_HI, SEGMENT_END_LO); 1156 1157 // A valid authority may be null or contain any allowed URI characters except 1158 // for the following: <code>/ ?</code> 1159 1160 //return value == null || validate(value, SEGMENT_CHAR_HI, SEGMENT_CHAR_LO, true, true); 1161 } 1162 1163 /** 1164 * Returns <code>true</code> if the specified <code>value</code> would be 1165 * valid as the authority component of an <a 1166 * href="#archive_explanation">archive URI</a>; <code>false</code> 1167 * otherwise. 1168 * 1169 * <p>To be valid, the authority, itself, must be a URI with no fragment, 1170 * followed by the character <code>!</code>. 1171 */ 1172 public static boolean validArchiveAuthority(String value) 1173 { 1174 if (value != null && value.length() > 0 && 1175 value.charAt(value.length() - 1) == ARCHIVE_IDENTIFIER) 1176 { 1177 try 1178 { 1179 URI archiveURI = createURI(value.substring(0, value.length() - 1)); 1180 return !archiveURI.hasFragment(); 1181 } 1182 catch (IllegalArgumentException e) 1183 { 1184 // Ignore the exception and return false. 1185 } 1186 } 1187 return false; 1188 } 1189 1190 /** 1191 * Tests whether the specified <code>value</code> would be valid as the 1192 * authority component of an <a href="#archive_explanation">archive 1193 * URI</a>. This method has been replaced by {@link #validArchiveAuthority 1194 * validArchiveAuthority} since the same form of URI is now supported 1195 * for schemes other than "jar". This now simply calls that method. 1196 * 1197 * @deprecated As of EMF 2.0, replaced by {@link #validArchiveAuthority 1198 * validArchiveAuthority}. 1199 */ 1200 @Deprecated 1201 public static boolean validJarAuthority(String value) 1202 { 1203 return validArchiveAuthority(value); 1204 } 1205 1206 /** 1207 * Returns <code>true</code> if the specified <code>value</code> would be 1208 * valid as the device component of a URI; <code>false</code> otherwise. 1209 * 1210 * <p>A valid device may be null or non-empty, containing any characters 1211 * except for the following: <code>/ ? #</code> In addition, its last 1212 * character must be <code>:</code> 1213 */ 1214 public static boolean validDevice(String value) 1215 { 1216 if (value == null) { 1217 return true; 1218 } 1219 int len = value.length(); 1220 return len > 0 && value.charAt(len - 1) == DEVICE_IDENTIFIER && 1221 !contains(value, SEGMENT_END_HI, SEGMENT_END_LO); 1222 } 1223 1224 /** 1225 * Returns <code>true</code> if the specified <code>value</code> would be 1226 * a valid path segment of a URI; <code>false</code> otherwise. 1227 * 1228 * <p>A valid path segment must be non-null and not contain any of the 1229 * following characters: <code>/ ? #</code> 1230 */ 1231 public static boolean validSegment(String value) 1232 { 1233 return value != null && !contains(value, SEGMENT_END_HI, SEGMENT_END_LO); 1234 1235 // <p>A valid path segment must be non-null and may contain any allowed URI 1236 // characters except for the following: <code>/ ?</code> 1237 1238 //return value != null && validate(value, SEGMENT_CHAR_HI, SEGMENT_CHAR_LO, true, true); 1239 } 1240 1241 /** 1242 * Returns <code>true</code> if the specified <code>value</code> would be 1243 * a valid path segment array of a URI; <code>false</code> otherwise. 1244 * 1245 * <p>A valid path segment array must be non-null and contain only path 1246 * segments that are valid according to {@link #validSegment validSegment}. 1247 */ 1248 public static boolean validSegments(String[] value) 1249 { 1250 if (value == null) { 1251 return false; 1252 } 1253 for (String segment : value) { 1254 if (!validSegment(segment)) { 1255 return false; 1256 } 1257 } 1258 return true; 1259 } 1260 1261 // Returns null if the specified value is null or would be a valid path 1262 // segment array of a URI; otherwise, the value of the first invalid 1263 // segment. 1264 private static String firstInvalidSegment(String[] value) 1265 { 1266 if (value == null) { 1267 return null; 1268 } 1269 for (String segment : value) { 1270 if (!validSegment(segment)) { 1271 return segment; 1272 } 1273 } 1274 return null; 1275 } 1276 1277 /** 1278 * Returns <code>true</code> if the specified <code>value</code> would be 1279 * valid as the query component of a URI; <code>false</code> otherwise. 1280 * 1281 * <p>A valid query may be null or contain any characters except for 1282 * <code>#</code> 1283 */ 1284 public static boolean validQuery(String value) 1285 { 1286 return value == null || value.indexOf(FRAGMENT_SEPARATOR) == -1; 1287 1288 // <p>A valid query may be null or contain any allowed URI characters. 1289 1290 //return value == null || validate(value, URIC_HI, URIC_LO, true, true); 1291 } 1292 1293 /** 1294 * Returns <code>true</code> if the specified <code>value</code> would be 1295 * valid as the fragment component of a URI; <code>false</code> otherwise. 1296 * 1297 * <p>A fragment is taken to be unconditionally valid. 1298 */ 1299 public static boolean validFragment(String value) 1300 { 1301 return true; 1302 1303 // <p>A valid fragment may be null or contain any allowed URI characters. 1304 1305 //return value == null || validate(value, URIC_HI, URIC_LO, true, true); 1306 } 1307 1308 // Searches the specified string for any characters in the set represented 1309 // by the 128-bit bitmask. Returns true if any occur, or false otherwise. 1310 private static boolean contains(String s, long highBitmask, long lowBitmask) 1311 { 1312 for (int i = 0, len = s.length(); i < len; i++) 1313 { 1314 if (matches(s.charAt(i), highBitmask, lowBitmask)) { 1315 return true; 1316 } 1317 } 1318 return false; 1319 } 1320 1321 // Tests the non-null string value to see if it contains only ASCII 1322 // characters in the set represented by the specified 128-bit bitmask, 1323 // as well as, optionally, non-ASCII characters 0xA0 and above, and, 1324 // also optionally, escape sequences of % followed by two hex digits. 1325 // This method is used for the new, strict URI validation that is not 1326 // not currently in place. 1327 /* 1328 private static boolean validate(String value, long highBitmask, long lowBitmask, 1329 boolean allowNonASCII, boolean allowEscaped) 1330 { 1331 for (int i = 0, length = value.length(); i < length; i++) 1332 { 1333 char c = value.charAt(i); 1334 1335 if (matches(c, highBitmask, lowBitmask)) continue; 1336 if (allowNonASCII && c >= 160) continue; 1337 if (allowEscaped && isEscaped(value, i)) 1338 { 1339 i += 2; 1340 continue; 1341 } 1342 return false; 1343 } 1344 return true; 1345 } 1346 */ 1347 1348 /** 1349 * Returns <code>true</code> if this is a relative URI, or 1350 * <code>false</code> if it is an absolute URI. 1351 */ 1352 public boolean isRelative() 1353 { 1354 return scheme == null; 1355 } 1356 1357 /** 1358 * Returns <code>true</code> if this a a hierarchical URI, or 1359 * <code>false</code> if it is of the generic form. 1360 */ 1361 public boolean isHierarchical() 1362 { 1363 return (hashCode & HIERARICHICAL_FLAG) != 0; 1364 } 1365 1366 /** 1367 * Returns <code>true</code> if this is a hierarchical URI with an authority 1368 * component; <code>false</code> otherwise. 1369 */ 1370 public boolean hasAuthority() 1371 { 1372 return isHierarchical() && authority != null; 1373 } 1374 1375 /** 1376 * Returns <code>true</code> if this is a non-hierarchical URI with an 1377 * opaque part component; <code>false</code> otherwise. 1378 */ 1379 public boolean hasOpaquePart() 1380 { 1381 // note: hierarchical -> authority != null 1382 return !isHierarchical(); 1383 } 1384 1385 /** 1386 * Returns <code>true</code> if this is a hierarchical URI with a device 1387 * component; <code>false</code> otherwise. 1388 */ 1389 public boolean hasDevice() 1390 { 1391 // note: device != null -> hierarchical 1392 return device != null; 1393 } 1394 1395 /** 1396 * Returns <code>true</code> if this is a hierarchical URI with an 1397 * absolute or relative path; <code>false</code> otherwise. 1398 */ 1399 public boolean hasPath() 1400 { 1401 // note: (absolutePath || authority == null) -> hierarchical 1402 // (authority == null && device == null && !absolutePath) -> scheme == null 1403 return hasAbsolutePath() || (authority == null && device == null); 1404 } 1405 1406 /** 1407 * Returns <code>true</code> if this is a hierarchical URI with an 1408 * absolute path, or <code>false</code> if it is non-hierarchical, has no 1409 * path, or has a relative path. 1410 */ 1411 public boolean hasAbsolutePath() 1412 { 1413 // note: absolutePath -> hierarchical 1414 return (hashCode & ABSOLUTE_PATH_FLAG) != 0; 1415 } 1416 1417 /** 1418 * Returns <code>true</code> if this is a hierarchical URI with a relative 1419 * path, or <code>false</code> if it is non-hierarchical, has no path, or 1420 * has an absolute path. 1421 */ 1422 public boolean hasRelativePath() 1423 { 1424 // note: authority == null -> hierarchical 1425 // (authority == null && device == null && !absolutePath) -> scheme == null 1426 return authority == null && device == null && !hasAbsolutePath(); 1427 } 1428 1429 /** 1430 * Returns <code>true</code> if this is a hierarchical URI with an empty 1431 * relative path; <code>false</code> otherwise. 1432 * 1433 * <p>Note that <code>!hasEmpty()</code> does <em>not</em> imply that this 1434 * URI has any path segments; however, <code>hasRelativePath && 1435 * !hasEmptyPath()</code> does. 1436 */ 1437 public boolean hasEmptyPath() 1438 { 1439 // note: authority == null -> hierarchical 1440 // (authority == null && device == null && !absolutePath) -> scheme == null 1441 return authority == null && device == null && !hasAbsolutePath() && 1442 segments.length == 0; 1443 } 1444 1445 /** 1446 * Returns <code>true</code> if this is a hierarchical URI with a query 1447 * component; <code>false</code> otherwise. 1448 */ 1449 public boolean hasQuery() 1450 { 1451 // note: query != null -> hierarchical 1452 return query != null; 1453 } 1454 1455 /** 1456 * Returns <code>true</code> if this URI has a fragment component; 1457 * <code>false</code> otherwise. 1458 */ 1459 public boolean hasFragment() 1460 { 1461 return fragment != null; 1462 } 1463 1464 /** 1465 * Returns <code>true</code> if this is a current document reference; that 1466 * is, if it is a relative hierarchical URI with no authority, device or 1467 * query components, and no path segments; <code>false</code> is returned 1468 * otherwise. 1469 */ 1470 public boolean isCurrentDocumentReference() 1471 { 1472 // note: authority == null -> hierarchical 1473 // (authority == null && device == null && !absolutePath) -> scheme == null 1474 return authority == null && device == null && !hasAbsolutePath() && 1475 segments.length == 0 && query == null; 1476 } 1477 1478 /** 1479 * Returns <code>true</code> if this is a {@link 1480 * #isCurrentDocumentReference() current document reference} with no 1481 * fragment component; <code>false</code> otherwise. 1482 * 1483 * @see #isCurrentDocumentReference() 1484 */ 1485 public boolean isEmpty() 1486 { 1487 // note: authority == null -> hierarchical 1488 // (authority == null && device == null && !absolutePath) -> scheme == null 1489 return authority == null && device == null && !hasAbsolutePath() && 1490 segments.length == 0 && query == null && fragment == null; 1491 } 1492 1493 /** 1494 * Returns <code>true</code> if this is a hierarchical URI that may refer 1495 * directly to a locally accessible file. This is considered to be the 1496 * case for a file-scheme absolute URI, or for a relative URI with no query; 1497 * <code>false</code> is returned otherwise. 1498 */ 1499 public boolean isFile() 1500 { 1501 return isHierarchical() && 1502 ((isRelative() && !hasQuery()) || SCHEME_FILE.equalsIgnoreCase(scheme)); 1503 } 1504 1505 /** 1506 * Returns <code>true</code> if this is a platform URI, that is, an absolute, 1507 * hierarchical URI, with "platform" scheme, no authority, and at least two 1508 * segments; <code>false</code> is returned otherwise. 1509 * @since org.eclipse.emf.common 2.3 1510 */ 1511 public boolean isPlatform() 1512 { 1513 return isHierarchical() && !hasAuthority() && segmentCount() >= 2 && 1514 SCHEME_PLATFORM.equalsIgnoreCase(scheme); 1515 } 1516 1517 /** 1518 * Returns <code>true</code> if this is a platform resource URI, that is, 1519 * a {@link #isPlatform platform URI} whose first segment is "resource"; 1520 * <code>false</code> is returned otherwise. 1521 * @see #isPlatform 1522 * @since org.eclipse.emf.common 2.3 1523 */ 1524 public boolean isPlatformResource() 1525 { 1526 return isPlatform() && "resource".equals(segments[0]); 1527 } 1528 1529 /** 1530 * Returns <code>true</code> if this is a platform plug-in URI, that is, 1531 * a {@link #isPlatform platform URI} whose first segment is "plugin"; 1532 * <code>false</code> is returned otherwise. 1533 * @see #isPlatform 1534 * @since org.eclipse.emf.common 2.3 1535 */ 1536 public boolean isPlatformPlugin() 1537 { 1538 return isPlatform() && "plugin".equals(segments[0]); 1539 } 1540 1541 /** 1542 * Returns <code>true</code> if this is an archive URI. If so, it is also 1543 * hierarchical, with an authority (consisting of an absolute URI followed 1544 * by "!"), no device, and an absolute path. 1545 */ 1546 public boolean isArchive() 1547 { 1548 return isArchiveScheme(scheme); 1549 } 1550 1551 /** 1552 * Returns <code>true</code> if the specified <code>value</code> would be 1553 * valid as the scheme of an <a 1554 * href="#archive_explanation">archive URI</a>; <code>false</code> 1555 * otherwise. 1556 */ 1557 public static boolean isArchiveScheme(String value) 1558 { 1559 // Returns true if the given value is an archive scheme, as defined by 1560 // the org.eclipse.emf.common.util.URI.archiveSchemes system property. 1561 // By default, "jar", "zip", and "archive" are considered archives. 1562 return value != null && archiveSchemes.contains(value.toLowerCase()); 1563 } 1564 1565 /** 1566 * Returns the hash code. 1567 */ 1568 @Override 1569 public int hashCode() 1570 { 1571 return hashCode; 1572 } 1573 1574 /** 1575 * Returns <code>true</code> if <code>object</code> is an instance of 1576 * <code>URI</code> equal to this one; <code>false</code> otherwise. 1577 * 1578 * <p>Equality is determined strictly by comparing components, not by 1579 * attempting to interpret what resource is being identified. The 1580 * comparison of schemes is case-insensitive. 1581 */ 1582 @Override 1583 public boolean equals(Object object) 1584 { 1585 if (this == object) { 1586 return true; 1587 } 1588 if (!(object instanceof URI)) { 1589 return false; 1590 } 1591 URI uri = (URI) object; 1592 1593 return hashCode == uri.hashCode() && 1594 equals(scheme, uri.scheme(), true) && 1595 equals(authority, isHierarchical() ? uri.authority() : uri.opaquePart()) && 1596 equals(device, uri.device()) && 1597 equals(query, uri.query()) && 1598 equals(fragment, uri.fragment()) && 1599 segmentsEqual(uri); 1600 } 1601 1602 // Tests whether this URI's path segment array is equal to that of the 1603 // given uri. 1604 private boolean segmentsEqual(URI uri) 1605 { 1606 if (segments.length != uri.segmentCount()) { 1607 return false; 1608 } 1609 for (int i = 0, len = segments.length; i < len; i++) 1610 { 1611 if (!segments[i].equals(uri.segment(i))) { 1612 return false; 1613 } 1614 } 1615 return true; 1616 } 1617 1618 // Tests two objects for equality, tolerating nulls; null is considered 1619 // to be a valid value that is only equal to itself. 1620 private static boolean equals(Object o1, Object o2) 1621 { 1622 return o1 == null ? o2 == null : o1.equals(o2); 1623 } 1624 1625 // Tests two strings for equality, tolerating nulls and optionally 1626 // ignoring case. 1627 private static boolean equals(String s1, String s2, boolean ignoreCase) 1628 { 1629 return s1 == null ? s2 == null : 1630 ignoreCase ? s1.equalsIgnoreCase(s2) : s1.equals(s2); 1631 } 1632 1633 /** 1634 * If this is an absolute URI, returns the scheme component; 1635 * <code>null</code> otherwise. 1636 */ 1637 public String scheme() 1638 { 1639 return scheme; 1640 } 1641 1642 /** 1643 * If this is a non-hierarchical URI, returns the opaque part component; 1644 * <code>null</code> otherwise. 1645 */ 1646 public String opaquePart() 1647 { 1648 return isHierarchical() ? null : authority; 1649 } 1650 1651 /** 1652 * If this is a hierarchical URI with an authority component, returns it; 1653 * <code>null</code> otherwise. 1654 */ 1655 public String authority() 1656 { 1657 return isHierarchical() ? authority : null; 1658 } 1659 1660 /** 1661 * If this is a hierarchical URI with an authority component that has a 1662 * user info portion, returns it; <code>null</code> otherwise. 1663 */ 1664 public String userInfo() 1665 { 1666 if (!hasAuthority()) { 1667 return null; 1668 } 1669 1670 int i = authority.indexOf(USER_INFO_SEPARATOR); 1671 return i < 0 ? null : authority.substring(0, i); 1672 } 1673 1674 /** 1675 * If this is a hierarchical URI with an authority component that has a 1676 * host portion, returns it; <code>null</code> otherwise. 1677 */ 1678 public String host() 1679 { 1680 if (!hasAuthority()) { 1681 return null; 1682 } 1683 1684 int i = authority.indexOf(USER_INFO_SEPARATOR); 1685 int j = authority.indexOf(PORT_SEPARATOR); 1686 return j < 0 ? authority.substring(i + 1) : authority.substring(i + 1, j); 1687 } 1688 1689 /** 1690 * If this is a hierarchical URI with an authority component that has a 1691 * port portion, returns it; <code>null</code> otherwise. 1692 */ 1693 public String port() 1694 { 1695 if (!hasAuthority()) { 1696 return null; 1697 } 1698 1699 int i = authority.indexOf(PORT_SEPARATOR); 1700 return i < 0 ? null : authority.substring(i + 1); 1701 } 1702 1703 /** 1704 * If this is a hierarchical URI with a device component, returns it; 1705 * <code>null</code> otherwise. 1706 */ 1707 public String device() 1708 { 1709 return device; 1710 } 1711 1712 /** 1713 * If this is a hierarchical URI with a path, returns an array containing 1714 * the segments of the path; an empty array otherwise. The leading 1715 * separator in an absolute path is not represented in this array, but a 1716 * trailing separator is represented by an empty-string segment as the 1717 * final element. 1718 */ 1719 public String[] segments() 1720 { 1721 return segments.clone(); 1722 } 1723 1724 /** 1725 * Returns an unmodifiable list containing the same segments as the array 1726 * returned by {@link #segments segments}. 1727 */ 1728 public List<String> segmentsList() 1729 { 1730 return Collections.unmodifiableList(Arrays.asList(segments)); 1731 } 1732 1733 /** 1734 * Returns the number of elements in the segment array that would be 1735 * returned by {@link #segments segments}. 1736 */ 1737 public int segmentCount() 1738 { 1739 return segments.length; 1740 } 1741 1742 /** 1743 * Provides fast, indexed access to individual segments in the path 1744 * segment array. 1745 * 1746 * @exception java.lang.IndexOutOfBoundsException if <code>i < 0</code> or 1747 * <code>i >= segmentCount()</code>. 1748 */ 1749 public String segment(int i) 1750 { 1751 return segments[i]; 1752 } 1753 1754 /** 1755 * Returns the last segment in the segment array, or <code>null</code>. 1756 */ 1757 public String lastSegment() 1758 { 1759 int len = segments.length; 1760 if (len == 0) { 1761 return null; 1762 } 1763 return segments[len - 1]; 1764 } 1765 1766 /** 1767 * If this is a hierarchical URI with a path, returns a string 1768 * representation of the path; <code>null</code> otherwise. The path 1769 * consists of a leading segment separator character (a slash), if the 1770 * path is absolute, followed by the slash-separated path segments. If 1771 * this URI has a separate <a href="#device_explanation">device 1772 * component</a>, it is <em>not</em> included in the path. 1773 */ 1774 public String path() 1775 { 1776 if (!hasPath()) { 1777 return null; 1778 } 1779 1780 StringBuilder result = new StringBuilder(); 1781 if (hasAbsolutePath()) { 1782 result.append(SEGMENT_SEPARATOR); 1783 } 1784 1785 for (int i = 0, len = segments.length; i < len; i++) 1786 { 1787 if (i != 0) { 1788 result.append(SEGMENT_SEPARATOR); 1789 } 1790 result.append(segments[i]); 1791 } 1792 return result.toString(); 1793 } 1794 1795 /** 1796 * If this is a hierarchical URI with a path, returns a string 1797 * representation of the path, including the authority and the 1798 * <a href="#device_explanation">device component</a>; 1799 * <code>null</code> otherwise. 1800 * 1801 * <p>If there is no authority, the format of this string is: 1802 * <pre> 1803 * device/pathSegment1/pathSegment2...</pre> 1804 * 1805 * <p>If there is an authority, it is: 1806 * <pre> 1807 * //authority/device/pathSegment1/pathSegment2...</pre> 1808 * 1809 * <p>For an <a href="#archive_explanation">archive URI</a>, it's just: 1810 * <pre> 1811 * authority/pathSegment1/pathSegment2...</pre> 1812 */ 1813 public String devicePath() 1814 { 1815 if (!hasPath()) { 1816 return null; 1817 } 1818 1819 StringBuilder result = new StringBuilder(); 1820 1821 if (hasAuthority()) 1822 { 1823 if (!isArchive()) { 1824 result.append(AUTHORITY_SEPARATOR); 1825 } 1826 result.append(authority); 1827 1828 if (hasDevice()) { 1829 result.append(SEGMENT_SEPARATOR); 1830 } 1831 } 1832 1833 if (hasDevice()) { 1834 result.append(device); 1835 } 1836 if (hasAbsolutePath()) { 1837 result.append(SEGMENT_SEPARATOR); 1838 } 1839 1840 for (int i = 0, len = segments.length; i < len; i++) 1841 { 1842 if (i != 0) { 1843 result.append(SEGMENT_SEPARATOR); 1844 } 1845 result.append(segments[i]); 1846 } 1847 return result.toString(); 1848 } 1849 1850 /** 1851 * If this is a hierarchical URI with a query component, returns it; 1852 * <code>null</code> otherwise. 1853 */ 1854 public String query() 1855 { 1856 return query; 1857 } 1858 1859 1860 /** 1861 * Returns the URI formed from this URI and the given query. 1862 * 1863 * @exception java.lang.IllegalArgumentException if 1864 * <code>query</code> is not a valid query (portion) according 1865 * to {@link #validQuery validQuery}. 1866 */ 1867 public URI appendQuery(String query) 1868 { 1869 if (!validQuery(query)) 1870 { 1871 throw new IllegalArgumentException( 1872 "invalid query portion: " + query); 1873 } 1874 return new URI(isHierarchical(), scheme, authority, device, hasAbsolutePath(), segments, query, fragment); 1875 } 1876 1877 /** 1878 * If this URI has a non-null {@link #query query}, returns the URI 1879 * formed by removing it; this URI unchanged, otherwise. 1880 */ 1881 public URI trimQuery() 1882 { 1883 if (query == null) 1884 { 1885 return this; 1886 } 1887 else 1888 { 1889 return new URI(isHierarchical(), scheme, authority, device, hasAbsolutePath(), segments, null, fragment); 1890 } 1891 } 1892 1893 /** 1894 * If this URI has a fragment component, returns it; <code>null</code> 1895 * otherwise. 1896 */ 1897 public String fragment() 1898 { 1899 return fragment; 1900 } 1901 1902 /** 1903 * Returns the URI formed from this URI and the given fragment. 1904 * 1905 * @exception java.lang.IllegalArgumentException if 1906 * <code>fragment</code> is not a valid fragment (portion) according 1907 * to {@link #validFragment validFragment}. 1908 */ 1909 public URI appendFragment(String fragment) 1910 { 1911 if (!validFragment(fragment)) 1912 { 1913 throw new IllegalArgumentException( 1914 "invalid fragment portion: " + fragment); 1915 } 1916 URI result = new URI(isHierarchical(), scheme, authority, device, hasAbsolutePath(), segments, query, fragment); 1917 1918 if (!hasFragment()) 1919 { 1920 result.cachedTrimFragment = this; 1921 } 1922 return result; 1923 } 1924 1925 /** 1926 * If this URI has a non-null {@link #fragment fragment}, returns the URI 1927 * formed by removing it; this URI unchanged, otherwise. 1928 */ 1929 public URI trimFragment() 1930 { 1931 if (fragment == null) 1932 { 1933 return this; 1934 } 1935 else if (cachedTrimFragment == null) 1936 { 1937 cachedTrimFragment = new URI(isHierarchical(), scheme, authority, device, hasAbsolutePath(), segments, query, null); 1938 } 1939 1940 return cachedTrimFragment; 1941 } 1942 1943 /** 1944 * Resolves this URI reference against a <code>base</code> absolute 1945 * hierarchical URI, returning the resulting absolute URI. If already 1946 * absolute, the URI itself is returned. URI resolution is described in 1947 * detail in section 5.2 of <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 1948 * 2396</a>, "Resolving Relative References to Absolute Form." 1949 * 1950 * <p>During resolution, empty segments, self references ("."), and parent 1951 * references ("..") are interpreted, so that they can be removed from the 1952 * path. Step 6(g) gives a choice of how to handle the case where parent 1953 * references point to a path above the root: the offending segments can 1954 * be preserved or discarded. This method preserves them. To have them 1955 * discarded, please use the two-parameter form of {@link 1956 * #resolve(URI, boolean) resolve}. 1957 * 1958 * @exception java.lang.IllegalArgumentException if <code>base</code> is 1959 * non-hierarchical or is relative. 1960 */ 1961 public URI resolve(URI base) 1962 { 1963 return resolve(base, true); 1964 } 1965 1966 /** 1967 * Resolves this URI reference against a <code>base</code> absolute 1968 * hierarchical URI, returning the resulting absolute URI. If already 1969 * absolute, the URI itself is returned. URI resolution is described in 1970 * detail in section 5.2 of <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 1971 * 2396</a>, "Resolving Relative References to Absolute Form." 1972 * 1973 * <p>During resolution, empty segments, self references ("."), and parent 1974 * references ("..") are interpreted, so that they can be removed from the 1975 * path. Step 6(g) gives a choice of how to handle the case where parent 1976 * references point to a path above the root: the offending segments can 1977 * be preserved or discarded. This method can do either. 1978 * 1979 * @param preserveRootParents <code>true</code> if segments referring to the 1980 * parent of the root path are to be preserved; <code>false</code> if they 1981 * are to be discarded. 1982 * 1983 * @exception java.lang.IllegalArgumentException if <code>base</code> is 1984 * non-hierarchical or is relative. 1985 */ 1986 public URI resolve(URI base, boolean preserveRootParents) 1987 { 1988 if (!base.isHierarchical() || base.isRelative()) 1989 { 1990 throw new IllegalArgumentException( 1991 "resolve against non-hierarchical or relative base"); 1992 } 1993 1994 // an absolute URI needs no resolving 1995 if (!isRelative()) { 1996 return this; 1997 } 1998 1999 // note: isRelative() -> hierarchical 2000 2001 String newAuthority = authority; 2002 String newDevice = device; 2003 boolean newAbsolutePath = hasAbsolutePath(); 2004 String[] newSegments = segments; 2005 String newQuery = query; 2006 // note: it's okay for two URIs to share a segments array, since 2007 // neither will ever modify it 2008 2009 if (authority == null) 2010 { 2011 // no authority: use base's 2012 newAuthority = base.authority(); 2013 2014 if (device == null) 2015 { 2016 // no device: use base's 2017 newDevice = base.device(); 2018 2019 if (hasEmptyPath() && query == null) 2020 { 2021 // current document reference: use base path and query 2022 newAbsolutePath = base.hasAbsolutePath(); 2023 newSegments = base.segments(); 2024 newQuery = base.query(); 2025 } 2026 else if (hasRelativePath()) 2027 { 2028 // relative path: merge with base and keep query (note: if the 2029 // base has no path and this a non-empty relative path, there is 2030 // an implied root in the resulting path) 2031 newAbsolutePath = base.hasAbsolutePath() || !hasEmptyPath(); 2032 newSegments = newAbsolutePath ? mergePath(base, preserveRootParents) 2033 : NO_SEGMENTS; 2034 } 2035 // else absolute path: keep it and query 2036 } 2037 // else keep device, path, and query 2038 } 2039 // else keep authority, device, path, and query 2040 2041 // always keep fragment, even if null, and use scheme from base; 2042 // no validation needed since all components are from existing URIs 2043 return new URI(true, base.scheme(), newAuthority, newDevice, 2044 newAbsolutePath, newSegments, newQuery, fragment); 2045 } 2046 2047 // Merges this URI's relative path with the base non-relative path. If 2048 // base has no path, treat it as the root absolute path, unless this has 2049 // no path either. 2050 private String[] mergePath(URI base, boolean preserveRootParents) 2051 { 2052 if (base.hasRelativePath()) 2053 { 2054 throw new IllegalArgumentException("merge against relative path"); 2055 } 2056 if (!hasRelativePath()) 2057 { 2058 throw new IllegalStateException("merge non-relative path"); 2059 } 2060 2061 int baseSegmentCount = base.segmentCount(); 2062 int segmentCount = segments.length; 2063 String[] stack = new String[baseSegmentCount + segmentCount]; 2064 int sp = 0; 2065 2066 // use a stack to accumulate segments of base, except for the last 2067 // (i.e. skip trailing separator and anything following it), and of 2068 // relative path 2069 for (int i = 0; i < baseSegmentCount - 1; i++) 2070 { 2071 sp = accumulate(stack, sp, base.segment(i), preserveRootParents); 2072 } 2073 2074 for (int i = 0; i < segmentCount; i++) 2075 { 2076 sp = accumulate(stack, sp, segments[i], preserveRootParents); 2077 } 2078 2079 // if the relative path is empty or ends in an empty segment, a parent 2080 // reference, or a self reference, add a trailing separator to a 2081 // non-empty path 2082 if (sp > 0 && (segmentCount == 0 || 2083 SEGMENT_EMPTY.equals(segments[segmentCount - 1]) || 2084 SEGMENT_PARENT.equals(segments[segmentCount - 1]) || 2085 SEGMENT_SELF.equals(segments[segmentCount - 1]))) 2086 { 2087 stack[sp++] = SEGMENT_EMPTY; 2088 } 2089 2090 // return a correctly sized result 2091 String[] result = new String[sp]; 2092 System.arraycopy(stack, 0, result, 0, sp); 2093 return result; 2094 } 2095 2096 // Adds a segment to a stack, skipping empty segments and self references, 2097 // and interpreting parent references. 2098 private static int accumulate(String[] stack, int sp, String segment, 2099 boolean preserveRootParents) 2100 { 2101 if (SEGMENT_PARENT.equals(segment)) 2102 { 2103 if (sp == 0) 2104 { 2105 // special care must be taken for a root's parent reference: it is 2106 // either ignored or the symbolic reference itself is pushed 2107 if (preserveRootParents) { 2108 stack[sp++] = segment; 2109 } 2110 } 2111 else 2112 { 2113 // unless we're already accumulating root parent references, 2114 // parent references simply pop the last segment descended 2115 if (SEGMENT_PARENT.equals(stack[sp - 1])) { 2116 stack[sp++] = segment; 2117 } else { 2118 sp--; 2119 } 2120 } 2121 } 2122 else if (!SEGMENT_EMPTY.equals(segment) && !SEGMENT_SELF.equals(segment)) 2123 { 2124 // skip empty segments and self references; push everything else 2125 stack[sp++] = segment; 2126 } 2127 return sp; 2128 } 2129 2130 /** 2131 * Finds the shortest relative or, if necessary, the absolute URI that, 2132 * when resolved against the given <code>base</code> absolute hierarchical 2133 * URI using {@link #resolve(URI) resolve}, will yield this absolute URI. 2134 * If <code>base</code> is non-hierarchical or is relative, 2135 * or <code>this</code> is non-hierarchical or is relative, 2136 * <code>this</code> will be returned. 2137 */ 2138 public URI deresolve(URI base) 2139 { 2140 return deresolve(base, true, false, true); 2141 } 2142 2143 /** 2144 * Finds an absolute URI that, when resolved against the given 2145 * <code>base</code> absolute hierarchical URI using {@link 2146 * #resolve(URI, boolean) resolve}, will yield this absolute URI. 2147 * If <code>base</code> is non-hierarchical or is relative, 2148 * or <code>this</code> is non-hierarchical or is relative, 2149 * <code>this</code> will be returned. 2150 * 2151 * @param preserveRootParents the boolean argument to <code>resolve(URI, 2152 * boolean)</code> for which the returned URI should resolve to this URI. 2153 * @param anyRelPath if <code>true</code>, the returned URI's path (if 2154 * any) will be relative, if possible. If <code>false</code>, the form of 2155 * the result's path will depend upon the next parameter. 2156 * @param shorterRelPath if <code>anyRelPath</code> is <code>false</code> 2157 * and this parameter is <code>true</code>, the returned URI's path (if 2158 * any) will be relative, if one can be found that is no longer (by number 2159 * of segments) than the absolute path. If both <code>anyRelPath</code> 2160 * and this parameter are <code>false</code>, it will be absolute. 2161 */ 2162 public URI deresolve(URI base, boolean preserveRootParents, 2163 boolean anyRelPath, boolean shorterRelPath) 2164 { 2165 if (!base.isHierarchical() || base.isRelative()) { 2166 return this; 2167 } 2168 2169 if (isRelative()) { 2170 return this; 2171 } 2172 2173 // note: these assertions imply that neither this nor the base URI has a 2174 // relative path; thus, both have either an absolute path or no path 2175 2176 // different scheme: need complete, absolute URI 2177 if (!scheme.equalsIgnoreCase(base.scheme())) { 2178 return this; 2179 } 2180 2181 // since base must be hierarchical, and since a non-hierarchical URI 2182 // must have both scheme and opaque part, the complete absolute URI is 2183 // needed to resolve to a non-hierarchical URI 2184 if (!isHierarchical()) { 2185 return this; 2186 } 2187 2188 String newAuthority = authority; 2189 String newDevice = device; 2190 boolean newAbsolutePath = hasAbsolutePath(); 2191 String[] newSegments = segments; 2192 String newQuery = query; 2193 2194 if (equals(authority, base.authority()) && 2195 (hasDevice() || hasPath() || (!base.hasDevice() && !base.hasPath()))) 2196 { 2197 // matching authorities and no device or path removal 2198 newAuthority = null; 2199 2200 if (equals(device, base.device()) && (hasPath() || !base.hasPath())) 2201 { 2202 // matching devices and no path removal 2203 newDevice = null; 2204 2205 // exception if (!hasPath() && base.hasPath()) 2206 2207 if (!anyRelPath && !shorterRelPath) 2208 { 2209 // user rejects a relative path: keep absolute or no path 2210 } 2211 else if (hasPath() == base.hasPath() && segmentsEqual(base) && 2212 equals(query, base.query())) 2213 { 2214 // current document reference: keep no path or query 2215 newAbsolutePath = false; 2216 newSegments = NO_SEGMENTS; 2217 newQuery = null; 2218 } 2219 else if (!hasPath() && !base.hasPath()) 2220 { 2221 // no paths: keep query only 2222 newAbsolutePath = false; 2223 newSegments = NO_SEGMENTS; 2224 } 2225 // exception if (!hasAbsolutePath()) 2226 else if (hasCollapsableSegments(preserveRootParents)) 2227 { 2228 // path form demands an absolute path: keep it and query 2229 } 2230 else 2231 { 2232 // keep query and select relative or absolute path based on length 2233 String[] rel = findRelativePath(base, preserveRootParents); 2234 if (anyRelPath || segments.length > rel.length) 2235 { 2236 // user demands a relative path or the absolute path is longer 2237 newAbsolutePath = false; 2238 newSegments = rel; 2239 } 2240 // else keep shorter absolute path 2241 } 2242 } 2243 // else keep device, path, and query 2244 } 2245 // else keep authority, device, path, and query 2246 2247 // always include fragment, even if null; 2248 // no validation needed since all components are from existing URIs 2249 return new URI(true, null, newAuthority, newDevice, newAbsolutePath, 2250 newSegments, newQuery, fragment); 2251 } 2252 2253 // Returns true if the non-relative path includes segments that would be 2254 // collapsed when resolving; false otherwise. If preserveRootParents is 2255 // true, collapsible segments include any empty segments, except for the 2256 // last segment, as well as and parent and self references. If 2257 // preserveRootsParents is false, parent references are not collapsible if 2258 // they are the first segment or preceded only by other parent 2259 // references. 2260 private boolean hasCollapsableSegments(boolean preserveRootParents) 2261 { 2262 if (hasRelativePath()) 2263 { 2264 throw new IllegalStateException("test collapsability of relative path"); 2265 } 2266 2267 for (int i = 0, len = segments.length; i < len; i++) 2268 { 2269 String segment = segments[i]; 2270 if ((i < len - 1 && SEGMENT_EMPTY.equals(segment)) || 2271 SEGMENT_SELF.equals(segment) || 2272 SEGMENT_PARENT.equals(segment) && ( 2273 !preserveRootParents || ( 2274 i != 0 && !SEGMENT_PARENT.equals(segments[i - 1])))) 2275 { 2276 return true; 2277 } 2278 } 2279 return false; 2280 } 2281 2282 // Returns the shortest relative path between the the non-relative path of 2283 // the given base and this absolute path. If the base has no path, it is 2284 // treated as the root absolute path. 2285 private String[] findRelativePath(URI base, boolean preserveRootParents) 2286 { 2287 if (base.hasRelativePath()) 2288 { 2289 throw new IllegalArgumentException( 2290 "find relative path against base with relative path"); 2291 } 2292 if (!hasAbsolutePath()) 2293 { 2294 throw new IllegalArgumentException( 2295 "find relative path of non-absolute path"); 2296 } 2297 2298 // treat an empty base path as the root absolute path 2299 String[] startPath = base.collapseSegments(preserveRootParents); 2300 String[] endPath = segments; 2301 2302 // drop last segment from base, as in resolving 2303 int startCount = startPath.length > 0 ? startPath.length - 1 : 0; 2304 int endCount = endPath.length; 2305 2306 // index of first segment that is different between endPath and startPath 2307 int diff = 0; 2308 2309 // if endPath is shorter than startPath, the last segment of endPath may 2310 // not be compared: because startPath has been collapsed and had its 2311 // last segment removed, all preceding segments can be considered non- 2312 // empty and followed by a separator, while the last segment of endPath 2313 // will either be non-empty and not followed by a separator, or just empty 2314 for (int count = startCount < endCount ? startCount : endCount - 1; 2315 diff < count && startPath[diff].equals(endPath[diff]); diff++) 2316 { 2317 // Empty statement. 2318 } 2319 2320 int upCount = startCount - diff; 2321 int downCount = endCount - diff; 2322 2323 // a single separator, possibly preceded by some parent reference 2324 // segments, is redundant 2325 if (downCount == 1 && SEGMENT_EMPTY.equals(endPath[endCount - 1])) 2326 { 2327 downCount = 0; 2328 } 2329 2330 // an empty path needs to be replaced by a single "." if there is no 2331 // query, to distinguish it from a current document reference 2332 if (upCount + downCount == 0) 2333 { 2334 if (query == null) { 2335 return new String[] { SEGMENT_SELF }; 2336 } 2337 return NO_SEGMENTS; 2338 } 2339 2340 // return a correctly sized result 2341 String[] result = new String[upCount + downCount]; 2342 Arrays.fill(result, 0, upCount, SEGMENT_PARENT); 2343 System.arraycopy(endPath, diff, result, upCount, downCount); 2344 return result; 2345 } 2346 2347 // Collapses non-ending empty segments, parent references, and self 2348 // references in a non-relative path, returning the same path that would 2349 // be produced from the base hierarchical URI as part of a resolve. 2350 String[] collapseSegments(boolean preserveRootParents) 2351 { 2352 if (hasRelativePath()) 2353 { 2354 throw new IllegalStateException("collapse relative path"); 2355 } 2356 2357 if (!hasCollapsableSegments(preserveRootParents)) { 2358 return segments(); 2359 } 2360 2361 // use a stack to accumulate segments 2362 int segmentCount = segments.length; 2363 String[] stack = new String[segmentCount]; 2364 int sp = 0; 2365 2366 for (int i = 0; i < segmentCount; i++) 2367 { 2368 sp = accumulate(stack, sp, segments[i], preserveRootParents); 2369 } 2370 2371 // if the path is non-empty and originally ended in an empty segment, a 2372 // parent reference, or a self reference, add a trailing separator 2373 if (sp > 0 && (SEGMENT_EMPTY.equals(segments[segmentCount - 1]) || 2374 SEGMENT_PARENT.equals(segments[segmentCount - 1]) || 2375 SEGMENT_SELF.equals(segments[segmentCount - 1]))) 2376 { 2377 stack[sp++] = SEGMENT_EMPTY; 2378 } 2379 2380 // return a correctly sized result 2381 String[] result = new String[sp]; 2382 System.arraycopy(stack, 0, result, 0, sp); 2383 return result; 2384 } 2385 2386 /** 2387 * Returns the string representation of this URI. For a generic, 2388 * non-hierarchical URI, this looks like: 2389 * <pre> 2390 * scheme:opaquePart#fragment</pre> 2391 * 2392 * <p>For a hierarchical URI, it looks like: 2393 * <pre> 2394 * scheme://authority/device/pathSegment1/pathSegment2...?query#fragment</pre> 2395 * 2396 * <p>For an <a href="#archive_explanation">archive URI</a>, it's just: 2397 * <pre> 2398 * scheme:authority/pathSegment1/pathSegment2...?query#fragment</pre> 2399 * <p>Of course, absent components and their separators will be omitted. 2400 */ 2401 @Override 2402 public String toString() 2403 { 2404 if (cachedToString == null) 2405 { 2406 StringBuilder result = new StringBuilder(); 2407 if (!isRelative()) 2408 { 2409 result.append(scheme); 2410 result.append(SCHEME_SEPARATOR); 2411 } 2412 2413 if (isHierarchical()) 2414 { 2415 if (hasAuthority()) 2416 { 2417 if (!isArchive()) { 2418 result.append(AUTHORITY_SEPARATOR); 2419 } 2420 result.append(authority); 2421 } 2422 2423 if (hasDevice()) 2424 { 2425 result.append(SEGMENT_SEPARATOR); 2426 result.append(device); 2427 } 2428 2429 if (hasAbsolutePath()) { 2430 result.append(SEGMENT_SEPARATOR); 2431 } 2432 2433 for (int i = 0, len = segments.length; i < len; i++) 2434 { 2435 if (i != 0) { 2436 result.append(SEGMENT_SEPARATOR); 2437 } 2438 result.append(segments[i]); 2439 } 2440 2441 if (hasQuery()) 2442 { 2443 result.append(QUERY_SEPARATOR); 2444 result.append(query); 2445 } 2446 } 2447 else 2448 { 2449 result.append(authority); 2450 } 2451 2452 if (hasFragment()) 2453 { 2454 result.append(FRAGMENT_SEPARATOR); 2455 result.append(fragment); 2456 } 2457 cachedToString = result.toString(); 2458 } 2459 return cachedToString; 2460 } 2461 2462 // Returns a string representation of this URI for debugging, explicitly 2463 // showing each of the components. 2464 String toString(boolean includeSimpleForm) 2465 { 2466 StringBuilder result = new StringBuilder(); 2467 if (includeSimpleForm) { 2468 result.append(toString()); 2469 } 2470 result.append("\n hierarchical: "); 2471 result.append(isHierarchical()); 2472 result.append("\n scheme: "); 2473 result.append(scheme); 2474 result.append("\n authority: "); 2475 result.append(authority); 2476 result.append("\n device: "); 2477 result.append(device); 2478 result.append("\n absolutePath: "); 2479 result.append(hasAbsolutePath()); 2480 result.append("\n segments: "); 2481 if (segments.length == 0) { 2482 result.append("<empty>"); 2483 } 2484 for (int i = 0, len = segments.length; i < len; i++) 2485 { 2486 if (i > 0) { 2487 result.append("\n "); 2488 } 2489 result.append(segments[i]); 2490 } 2491 result.append("\n query: "); 2492 result.append(query); 2493 result.append("\n fragment: "); 2494 result.append(fragment); 2495 return result.toString(); 2496 } 2497 2498 /** 2499 * If this URI may refer directly to a locally accessible file, as 2500 * determined by {@link #isFile isFile}, {@link #decode decodes} and formats 2501 * the URI as a pathname to that file; returns null otherwise. 2502 * 2503 * <p>If there is no authority, the format of this string is: 2504 * <pre> 2505 * device/pathSegment1/pathSegment2...</pre> 2506 * 2507 * <p>If there is an authority, it is: 2508 * <pre> 2509 * //authority/device/pathSegment1/pathSegment2...</pre> 2510 * 2511 * <p>However, the character used as a separator is system-dependent and 2512 * obtained from {@link java.io.File#separatorChar}. 2513 */ 2514 public String toFileString() 2515 { 2516 if (!isFile()) { 2517 return null; 2518 } 2519 2520 StringBuilder result = new StringBuilder(); 2521 char separator = File.separatorChar; 2522 2523 if (hasAuthority()) 2524 { 2525 result.append(separator); 2526 result.append(separator); 2527 result.append(authority); 2528 2529 if (hasDevice()) { 2530 result.append(separator); 2531 } 2532 } 2533 2534 if (hasDevice()) { 2535 result.append(device); 2536 } 2537 if (hasAbsolutePath()) { 2538 result.append(separator); 2539 } 2540 2541 for (int i = 0, len = segments.length; i < len; i++) 2542 { 2543 if (i != 0) { 2544 result.append(separator); 2545 } 2546 result.append(segments[i]); 2547 } 2548 2549 return decode(result.toString()); 2550 } 2551 2552 /** 2553 * If this is a platform URI, as determined by {@link #isPlatform}, returns 2554 * the workspace-relative or plug-in-based path to the resource, optionally 2555 * {@link #decode decoding} the segments in the process. 2556 * @see #createPlatformResourceURI(String, boolean) 2557 * @see #createPlatformPluginURI 2558 * @since org.eclipse.emf.common 2.3 2559 */ 2560 public String toPlatformString(boolean decode) 2561 { 2562 if (isPlatform()) 2563 { 2564 StringBuilder result = new StringBuilder(); 2565 for (int i = 1, len = segments.length; i < len; i++) 2566 { 2567 result.append('/').append(decode ? URI.decode(segments[i]) : segments[i]); 2568 } 2569 return result.toString(); 2570 } 2571 return null; 2572 } 2573 2574 /** 2575 * Returns the URI formed by appending the specified segment on to the end 2576 * of the path of this URI, if hierarchical; this URI unchanged, 2577 * otherwise. If this URI has an authority and/or device, but no path, 2578 * the segment becomes the first under the root in an absolute path. 2579 * 2580 * @exception java.lang.IllegalArgumentException if <code>segment</code> 2581 * is not a valid segment according to {@link #validSegment}. 2582 */ 2583 public URI appendSegment(String segment) 2584 { 2585 if (!validSegment(segment)) 2586 { 2587 throw new IllegalArgumentException("invalid segment: " + segment); 2588 } 2589 2590 if (!isHierarchical()) { 2591 return this; 2592 } 2593 2594 // absolute path or no path -> absolute path 2595 boolean newAbsolutePath = !hasRelativePath(); 2596 2597 int len = segments.length; 2598 String[] newSegments = new String[len + 1]; 2599 System.arraycopy(segments, 0, newSegments, 0, len); 2600 newSegments[len] = segment; 2601 2602 return new URI(true, scheme, authority, device, newAbsolutePath, 2603 newSegments, query, fragment); 2604 } 2605 2606 /** 2607 * Returns the URI formed by appending the specified segments on to the 2608 * end of the path of this URI, if hierarchical; this URI unchanged, 2609 * otherwise. If this URI has an authority and/or device, but no path, 2610 * the segments are made to form an absolute path. 2611 * 2612 * @param segments an array of non-null strings, each representing one 2613 * segment of the path. If desired, a trailing separator should be 2614 * represented by an empty-string segment as the last element of the 2615 * array. 2616 * 2617 * @exception java.lang.IllegalArgumentException if <code>segments</code> 2618 * is not a valid segment array according to {@link #validSegments}. 2619 */ 2620 public URI appendSegments(String[] segments) 2621 { 2622 if (!validSegments(segments)) 2623 { 2624 String s = segments == null ? "invalid segments: null" : 2625 "invalid segment: " + firstInvalidSegment(segments); 2626 throw new IllegalArgumentException(s); 2627 } 2628 2629 if (!isHierarchical()) { 2630 return this; 2631 } 2632 2633 // absolute path or no path -> absolute path 2634 boolean newAbsolutePath = !hasRelativePath(); 2635 2636 int len = this.segments.length; 2637 int segmentsCount = segments.length; 2638 String[] newSegments = new String[len + segmentsCount]; 2639 System.arraycopy(this.segments, 0, newSegments, 0, len); 2640 System.arraycopy(segments, 0, newSegments, len, segmentsCount); 2641 2642 return new URI(true, scheme, authority, device, newAbsolutePath, 2643 newSegments, query, fragment); 2644 } 2645 2646 /** 2647 * Returns the URI formed by trimming the specified number of segments 2648 * (including empty segments, such as one representing a trailing 2649 * separator) from the end of the path of this URI, if hierarchical; 2650 * otherwise, this URI is returned unchanged. 2651 * 2652 * <p>Note that if all segments are trimmed from an absolute path, the 2653 * root absolute path remains. 2654 * 2655 * @param i the number of segments to be trimmed in the returned URI. If 2656 * less than 1, this URI is returned unchanged; if equal to or greater 2657 * than the number of segments in this URI's path, all segments are 2658 * trimmed. 2659 */ 2660 public URI trimSegments(int i) 2661 { 2662 if (!isHierarchical() || i < 1) { 2663 return this; 2664 } 2665 2666 String[] newSegments = NO_SEGMENTS; 2667 int len = segments.length - i; 2668 if (len > 0) 2669 { 2670 newSegments = new String[len]; 2671 System.arraycopy(segments, 0, newSegments, 0, len); 2672 } 2673 return new URI(true, scheme, authority, device, hasAbsolutePath(), 2674 newSegments, query, fragment); 2675 } 2676 2677 /** 2678 * Returns <code>true</code> if this is a hierarchical URI that has a path 2679 * that ends with a trailing separator; <code>false</code> otherwise. 2680 * 2681 * <p>A trailing separator is represented as an empty segment as the 2682 * last segment in the path; note that this definition does <em>not</em> 2683 * include the lone separator in the root absolute path. 2684 */ 2685 public boolean hasTrailingPathSeparator() 2686 { 2687 return segments.length > 0 && 2688 SEGMENT_EMPTY.equals(segments[segments.length - 1]); 2689 } 2690 2691 /** 2692 * If this is a hierarchical URI whose path includes a file extension, 2693 * that file extension is returned; null otherwise. We define a file 2694 * extension as any string following the last period (".") in the final 2695 * path segment. If there is no path, the path ends in a trailing 2696 * separator, or the final segment contains no period, then we consider 2697 * there to be no file extension. If the final segment ends in a period, 2698 * then the file extension is an empty string. 2699 */ 2700 public String fileExtension() 2701 { 2702 int len = segments.length; 2703 if (len == 0) { 2704 return null; 2705 } 2706 2707 String lastSegment = segments[len - 1]; 2708 int i = lastSegment.lastIndexOf(FILE_EXTENSION_SEPARATOR); 2709 return i < 0 ? null : lastSegment.substring(i + 1); 2710 } 2711 2712 /** 2713 * Returns the URI formed by appending a period (".") followed by the 2714 * specified file extension to the last path segment of this URI, if it is 2715 * hierarchical with a non-empty path ending in a non-empty segment; 2716 * otherwise, this URI is returned unchanged. 2717 2718 * <p>The extension is appended regardless of whether the segment already 2719 * contains an extension. 2720 * 2721 * @exception java.lang.IllegalArgumentException if 2722 * <code>fileExtension</code> is not a valid segment (portion) according 2723 * to {@link #validSegment}. 2724 */ 2725 public URI appendFileExtension(String fileExtension) 2726 { 2727 if (!validSegment(fileExtension)) 2728 { 2729 throw new IllegalArgumentException( 2730 "invalid segment portion: " + fileExtension); 2731 } 2732 2733 int len = segments.length; 2734 if (len == 0) { 2735 return this; 2736 } 2737 2738 String lastSegment = segments[len - 1]; 2739 if (SEGMENT_EMPTY.equals(lastSegment)) { 2740 return this; 2741 } 2742 StringBuilder newLastSegment = new StringBuilder(lastSegment); 2743 newLastSegment.append(FILE_EXTENSION_SEPARATOR); 2744 newLastSegment.append(fileExtension); 2745 2746 String[] newSegments = new String[len]; 2747 System.arraycopy(segments, 0, newSegments, 0, len - 1); 2748 newSegments[len - 1] = newLastSegment.toString(); 2749 2750 // note: segments.length > 0 -> hierarchical 2751 return new URI(true, scheme, authority, device, hasAbsolutePath(), 2752 newSegments, query, fragment); 2753 } 2754 2755 /** 2756 * If this URI has a non-null {@link #fileExtension fileExtension}, 2757 * returns the URI formed by removing it; this URI unchanged, otherwise. 2758 */ 2759 public URI trimFileExtension() 2760 { 2761 int len = segments.length; 2762 if (len == 0) { 2763 return this; 2764 } 2765 2766 String lastSegment = segments[len - 1]; 2767 int i = lastSegment.lastIndexOf(FILE_EXTENSION_SEPARATOR); 2768 if (i < 0) { 2769 return this; 2770 } 2771 2772 String newLastSegment = lastSegment.substring(0, i); 2773 String[] newSegments = new String[len]; 2774 System.arraycopy(segments, 0, newSegments, 0, len - 1); 2775 newSegments[len - 1] = newLastSegment; 2776 2777 // note: segments.length > 0 -> hierarchical 2778 return new URI(true, scheme, authority, device, hasAbsolutePath(), 2779 newSegments, query, fragment); 2780 } 2781 2782 /** 2783 * Returns <code>true</code> if this is a hierarchical URI that ends in a 2784 * slash; that is, it has a trailing path separator or is the root 2785 * absolute path, and has no query and no fragment; <code>false</code> 2786 * is returned otherwise. 2787 */ 2788 public boolean isPrefix() 2789 { 2790 return isHierarchical() && query == null && fragment == null && 2791 (hasTrailingPathSeparator() || (hasAbsolutePath() && segments.length == 0)); 2792 } 2793 2794 /** 2795 * If this is a hierarchical URI reference and <code>oldPrefix</code> is a 2796 * prefix of it, this returns the URI formed by replacing it by 2797 * <code>newPrefix</code>; <code>null</code> otherwise. 2798 * 2799 * <p>In order to be a prefix, the <code>oldPrefix</code>'s 2800 * {@link #isPrefix isPrefix} must return <code>true</code>, and it must 2801 * match this URI's scheme, authority, and device. Also, the paths must 2802 * match, up to prefix's end. 2803 * 2804 * @exception java.lang.IllegalArgumentException if either 2805 * <code>oldPrefix</code> or <code>newPrefix</code> is not a prefix URI 2806 * according to {@link #isPrefix}. 2807 */ 2808 public URI replacePrefix(URI oldPrefix, URI newPrefix) 2809 { 2810 if (!oldPrefix.isPrefix() || !newPrefix.isPrefix()) 2811 { 2812 String which = oldPrefix.isPrefix() ? "new" : "old"; 2813 throw new IllegalArgumentException("non-prefix " + which + " value"); 2814 } 2815 2816 // Get what's left of the segments after trimming the prefix. 2817 String[] tailSegments = getTailSegments(oldPrefix); 2818 if (tailSegments == null) { 2819 return null; 2820 } 2821 2822 // If the new prefix has segments, it is not the root absolute path, 2823 // and we need to drop the trailing empty segment and append the tail 2824 // segments. 2825 String[] mergedSegments = tailSegments; 2826 if (newPrefix.segmentCount() != 0) 2827 { 2828 int segmentsToKeep = newPrefix.segmentCount() - 1; 2829 mergedSegments = new String[segmentsToKeep + tailSegments.length]; 2830 System.arraycopy(newPrefix.segments(), 0, mergedSegments, 0, 2831 segmentsToKeep); 2832 2833 if (tailSegments.length != 0) 2834 { 2835 System.arraycopy(tailSegments, 0, mergedSegments, segmentsToKeep, 2836 tailSegments.length); 2837 } 2838 } 2839 2840 // no validation needed since all components are from existing URIs 2841 return new URI(true, newPrefix.scheme(), newPrefix.authority(), 2842 newPrefix.device(), newPrefix.hasAbsolutePath(), 2843 mergedSegments, query, fragment); 2844 } 2845 2846 // If this is a hierarchical URI reference and prefix is a prefix of it, 2847 // returns the portion of the path remaining after that prefix has been 2848 // trimmed; null otherwise. 2849 private String[] getTailSegments(URI prefix) 2850 { 2851 if (!prefix.isPrefix()) 2852 { 2853 throw new IllegalArgumentException("non-prefix trim"); 2854 } 2855 2856 // Don't even consider it unless this is hierarchical and has scheme, 2857 // authority, device and path absoluteness equal to those of the prefix. 2858 if (!isHierarchical() || 2859 !equals(scheme, prefix.scheme(), true) || 2860 !equals(authority, prefix.authority()) || 2861 !equals(device, prefix.device()) || 2862 hasAbsolutePath() != prefix.hasAbsolutePath()) 2863 { 2864 return null; 2865 } 2866 2867 // If the prefix has no segments, then it is the root absolute path, and 2868 // we know this is an absolute path, too. 2869 if (prefix.segmentCount() == 0) { 2870 return segments; 2871 } 2872 2873 // This must have no fewer segments than the prefix. Since the prefix 2874 // is not the root absolute path, its last segment is empty; all others 2875 // must match. 2876 int i = 0; 2877 int segmentsToCompare = prefix.segmentCount() - 1; 2878 if (segments.length <= segmentsToCompare) { 2879 return null; 2880 } 2881 2882 for (; i < segmentsToCompare; i++) 2883 { 2884 if (!segments[i].equals(prefix.segment(i))) { 2885 return null; 2886 } 2887 } 2888 2889 // The prefix really is a prefix of this. If this has just one more, 2890 // empty segment, the paths are the same. 2891 if (i == segments.length - 1 && SEGMENT_EMPTY.equals(segments[i])) 2892 { 2893 return NO_SEGMENTS; 2894 } 2895 2896 // Otherwise, the path needs only the remaining segments. 2897 String[] newSegments = new String[segments.length - i]; 2898 System.arraycopy(segments, i, newSegments, 0, newSegments.length); 2899 return newSegments; 2900 } 2901 2902 /** 2903 * Encodes a string so as to produce a valid opaque part value, as defined 2904 * by the RFC. All excluded characters, such as space and <code>#</code>, 2905 * are escaped, as is <code>/</code> if it is the first character. 2906 * 2907 * @param ignoreEscaped <code>true</code> to leave <code>%</code> characters 2908 * unescaped if they already begin a valid three-character escape sequence; 2909 * <code>false</code> to encode all <code>%</code> characters. Note that 2910 * if a <code>%</code> is not followed by 2 hex digits, it will always be 2911 * escaped. 2912 */ 2913 public static String encodeOpaquePart(String value, boolean ignoreEscaped) 2914 { 2915 String result = encode(value, URIC_HI, URIC_LO, ignoreEscaped); 2916 return result != null && result.length() > 0 && result.charAt(0) == SEGMENT_SEPARATOR ? 2917 "%2F" + result.substring(1) : 2918 result; 2919 } 2920 2921 /** 2922 * Encodes a string so as to produce a valid authority, as defined by the 2923 * RFC. All excluded characters, such as space and <code>#</code>, 2924 * are escaped, as are <code>/</code> and <code>?</code> 2925 * 2926 * @param ignoreEscaped <code>true</code> to leave <code>%</code> characters 2927 * unescaped if they already begin a valid three-character escape sequence; 2928 * <code>false</code> to encode all <code>%</code> characters. Note that 2929 * if a <code>%</code> is not followed by 2 hex digits, it will always be 2930 * escaped. 2931 */ 2932 public static String encodeAuthority(String value, boolean ignoreEscaped) 2933 { 2934 return encode(value, SEGMENT_CHAR_HI, SEGMENT_CHAR_LO, ignoreEscaped); 2935 } 2936 2937 /** 2938 * Encodes a string so as to produce a valid segment, as defined by the 2939 * RFC. All excluded characters, such as space and <code>#</code>, 2940 * are escaped, as are <code>/</code> and <code>?</code> 2941 * 2942 * @param ignoreEscaped <code>true</code> to leave <code>%</code> characters 2943 * unescaped if they already begin a valid three-character escape sequence; 2944 * <code>false</code> to encode all <code>%</code> characters. Note that 2945 * if a <code>%</code> is not followed by 2 hex digits, it will always be 2946 * escaped. 2947 */ 2948 public static String encodeSegment(String value, boolean ignoreEscaped) 2949 { 2950 return encode(value, SEGMENT_CHAR_HI, SEGMENT_CHAR_LO, ignoreEscaped); 2951 } 2952 2953 /** 2954 * Encodes a string so as to produce a valid query, as defined by the RFC. 2955 * Only excluded characters, such as space and <code>#</code>, are escaped. 2956 * 2957 * @param ignoreEscaped <code>true</code> to leave <code>%</code> characters 2958 * unescaped if they already begin a valid three-character escape sequence; 2959 * <code>false</code> to encode all <code>%</code> characters. Note that 2960 * if a <code>%</code> is not followed by 2 hex digits, it will always be 2961 * escaped. 2962 */ 2963 public static String encodeQuery(String value, boolean ignoreEscaped) 2964 { 2965 return encode(value, URIC_HI, URIC_LO, ignoreEscaped); 2966 } 2967 2968 /** 2969 * Encodes a string so as to produce a valid fragment, as defined by the 2970 * RFC. Only excluded characters, such as space and <code>#</code>, are 2971 * escaped. 2972 * 2973 * @param ignoreEscaped <code>true</code> to leave <code>%</code> characters 2974 * unescaped if they already begin a valid three-character escape sequence; 2975 * <code>false</code> to encode all <code>%</code> characters. Note that 2976 * if a <code>%</code> is not followed by 2 hex digits, it will always be 2977 * escaped. 2978 */ 2979 public static String encodeFragment(String value, boolean ignoreEscaped) 2980 { 2981 return encode(value, URIC_HI, URIC_LO, ignoreEscaped); 2982 } 2983 2984 // Encodes a complete URI, optionally leaving % characters unescaped when 2985 // beginning a valid three-character escape sequence. We can either treat 2986 // the first or # as a fragment separator, or encode them all. 2987 private static String encodeURI(String uri, boolean ignoreEscaped, int fragmentLocationStyle) 2988 { 2989 if (uri == null) { 2990 return null; 2991 } 2992 2993 StringBuilder result = new StringBuilder(); 2994 2995 int i = uri.indexOf(SCHEME_SEPARATOR); 2996 if (i != -1) 2997 { 2998 String scheme = uri.substring(0, i); 2999 result.append(scheme); 3000 result.append(SCHEME_SEPARATOR); 3001 } 3002 3003 int j = 3004 fragmentLocationStyle == FRAGMENT_FIRST_SEPARATOR ? uri.indexOf(FRAGMENT_SEPARATOR) : 3005 fragmentLocationStyle == FRAGMENT_LAST_SEPARATOR ? uri.lastIndexOf(FRAGMENT_SEPARATOR) : -1; 3006 3007 if (j != -1) 3008 { 3009 String sspart = uri.substring(++i, j); 3010 result.append(encode(sspart, URIC_HI, URIC_LO, ignoreEscaped)); 3011 result.append(FRAGMENT_SEPARATOR); 3012 3013 String fragment = uri.substring(++j); 3014 result.append(encode(fragment, URIC_HI, URIC_LO, ignoreEscaped)); 3015 } 3016 else 3017 { 3018 String sspart = uri.substring(++i); 3019 result.append(encode(sspart, URIC_HI, URIC_LO, ignoreEscaped)); 3020 } 3021 3022 return result.toString(); 3023 } 3024 3025 // Encodes the given string, replacing each ASCII character that is not in 3026 // the set specified by the 128-bit bitmask and each non-ASCII character 3027 // below 0xA0 by an escape sequence of % followed by two hex digits. If 3028 // % is not in the set but ignoreEscaped is true, then % will not be encoded 3029 // iff it already begins a valid escape sequence. 3030 private static String encode(String value, long highBitmask, long lowBitmask, boolean ignoreEscaped) 3031 { 3032 if (value == null) { 3033 return null; 3034 } 3035 3036 StringBuilder result = null; 3037 3038 for (int i = 0, len = value.length(); i < len; i++) 3039 { 3040 char c = value.charAt(i); 3041 3042 if (!matches(c, highBitmask, lowBitmask) && c < 160 && 3043 (!ignoreEscaped || !isEscaped(value, i))) 3044 { 3045 if (result == null) 3046 { 3047 result = new StringBuilder(value.substring(0, i)); 3048 } 3049 appendEscaped(result, (byte)c); 3050 } 3051 else if (result != null) 3052 { 3053 result.append(c); 3054 } 3055 } 3056 return result == null ? value : result.toString(); 3057 } 3058 3059 // Tests whether an escape occurs in the given string, starting at index i. 3060 // An escape sequence is a % followed by two hex digits. 3061 private static boolean isEscaped(String s, int i) 3062 { 3063 return s.charAt(i) == ESCAPE && s.length() > i + 2 && 3064 matches(s.charAt(i + 1), HEX_HI, HEX_LO) && 3065 matches(s.charAt(i + 2), HEX_HI, HEX_LO); 3066 } 3067 3068 // Computes a three-character escape sequence for the byte, appending 3069 // it to the StringBuilder. Only characters up to 0xFF should be escaped; 3070 // all but the least significant byte will be ignored. 3071 private static void appendEscaped(StringBuilder result, byte b) 3072 { 3073 result.append(ESCAPE); 3074 3075 // The byte is automatically widened into an int, with sign extension, 3076 // for shifting. This can introduce 1's to the left of the byte, which 3077 // must be cleared by masking before looking up the hex digit. 3078 // 3079 result.append(HEX_DIGITS[(b >> 4) & 0x0F]); 3080 result.append(HEX_DIGITS[b & 0x0F]); 3081 } 3082 3083 /** 3084 * Decodes the given string by interpreting three-digit escape sequences as the bytes of a UTF-8 encoded character 3085 * and replacing them with the characters they represent. 3086 * Incomplete escape sequences are ignored and invalid UTF-8 encoded bytes are treated as extended ASCII characters. 3087 */ 3088 public static String decode(String value) 3089 { 3090 if (value == null) { 3091 return null; 3092 } 3093 3094 int i = value.indexOf('%'); 3095 if (i < 0) 3096 { 3097 return value; 3098 } 3099 else 3100 { 3101 StringBuilder result = new StringBuilder(value.substring(0, i)); 3102 byte [] bytes = new byte[4]; 3103 int receivedBytes = 0; 3104 int expectedBytes = 0; 3105 for (int len = value.length(); i < len; i++) 3106 { 3107 if (isEscaped(value, i)) 3108 { 3109 char character = unescape(value.charAt(i + 1), value.charAt(i + 2)); 3110 i += 2; 3111 3112 if (expectedBytes > 0) 3113 { 3114 if ((character & 0xC0) == 0x80) 3115 { 3116 bytes[receivedBytes++] = (byte)character; 3117 } 3118 else 3119 { 3120 expectedBytes = 0; 3121 } 3122 } 3123 else if (character >= 0x80) 3124 { 3125 if ((character & 0xE0) == 0xC0) 3126 { 3127 bytes[receivedBytes++] = (byte)character; 3128 expectedBytes = 2; 3129 } 3130 else if ((character & 0xF0) == 0xE0) 3131 { 3132 bytes[receivedBytes++] = (byte)character; 3133 expectedBytes = 3; 3134 } 3135 else if ((character & 0xF8) == 0xF0) 3136 { 3137 bytes[receivedBytes++] = (byte)character; 3138 expectedBytes = 4; 3139 } 3140 } 3141 3142 if (expectedBytes > 0) 3143 { 3144 if (receivedBytes == expectedBytes) 3145 { 3146 switch (receivedBytes) 3147 { 3148 case 2: 3149 { 3150 result.append((char)((bytes[0] & 0x1F) << 6 | bytes[1] & 0x3F)); 3151 break; 3152 } 3153 case 3: 3154 { 3155 result.append((char)((bytes[0] & 0xF) << 12 | (bytes[1] & 0X3F) << 6 | bytes[2] & 0x3F)); 3156 break; 3157 } 3158 case 4: 3159 { 3160 result.appendCodePoint(((bytes[0] & 0x7) << 18 | (bytes[1] & 0X3F) << 12 | (bytes[2] & 0X3F) << 6 | bytes[3] & 0x3F)); 3161 break; 3162 } 3163 } 3164 receivedBytes = 0; 3165 expectedBytes = 0; 3166 } 3167 } 3168 else 3169 { 3170 for (int j = 0; j < receivedBytes; ++j) 3171 { 3172 result.append((char)bytes[j]); 3173 } 3174 receivedBytes = 0; 3175 result.append(character); 3176 } 3177 } 3178 else 3179 { 3180 for (int j = 0; j < receivedBytes; ++j) 3181 { 3182 result.append((char)bytes[j]); 3183 } 3184 receivedBytes = 0; 3185 result.append(value.charAt(i)); 3186 } 3187 } 3188 return result.toString(); 3189 } 3190 } 3191 3192 // Returns the character encoded by % followed by the two given hex digits, 3193 // which is always 0xFF or less, so can safely be casted to a byte. If 3194 // either character is not a hex digit, a bogus result will be returned. 3195 private static char unescape(char highHexDigit, char lowHexDigit) 3196 { 3197 return (char)((valueOf(highHexDigit) << 4) | valueOf(lowHexDigit)); 3198 } 3199 3200 // Returns the int value of the given hex digit. 3201 private static int valueOf(char hexDigit) 3202 { 3203 if (hexDigit >= 'A' && hexDigit <= 'F') 3204 { 3205 return hexDigit - 'A' + 10; 3206 } 3207 if (hexDigit >= 'a' && hexDigit <= 'f') 3208 { 3209 return hexDigit - 'a' + 10; 3210 } 3211 if (hexDigit >= '0' && hexDigit <= '9') 3212 { 3213 return hexDigit - '0'; 3214 } 3215 return 0; 3216 } 3217 3218 /* 3219 * Returns <code>true</code> if this URI contains non-ASCII characters; 3220 * <code>false</code> otherwise. 3221 * 3222 * This unused code is included for possible future use... 3223 */ 3224 /* 3225 public boolean isIRI() 3226 { 3227 return iri; 3228 } 3229 3230 // Returns true if the given string contains any non-ASCII characters; 3231 // false otherwise. 3232 private static boolean containsNonASCII(String value) 3233 { 3234 for (int i = 0, length = value.length(); i < length; i++) 3235 { 3236 if (value.charAt(i) > 127) return true; 3237 } 3238 return false; 3239 } 3240 */ 3241 3242 /* 3243 * If this is an {@link #isIRI IRI}, converts it to a strict ASCII URI, 3244 * using the procedure described in Section 3.1 of the 3245 * <a href="http://www.w3.org/International/iri-edit/draft-duerst-iri-09.txt">IRI 3246 * Draft RFC</a>. Otherwise, this URI, itself, is returned. 3247 * 3248 * This unused code is included for possible future use... 3249 */ 3250 /* 3251 public URI toASCIIURI() 3252 { 3253 if (!iri) return this; 3254 3255 if (cachedASCIIURI == null) 3256 { 3257 String eAuthority = encodeAsASCII(authority); 3258 String eDevice = encodeAsASCII(device); 3259 String eQuery = encodeAsASCII(query); 3260 String eFragment = encodeAsASCII(fragment); 3261 String[] eSegments = new String[segments.length]; 3262 for (int i = 0; i < segments.length; i++) 3263 { 3264 eSegments[i] = encodeAsASCII(segments[i]); 3265 } 3266 cachedASCIIURI = new URI(hierarchical, scheme, eAuthority, eDevice, absolutePath, eSegments, eQuery, eFragment); 3267 3268 } 3269 return cachedASCIIURI; 3270 } 3271 3272 // Returns a strict ASCII encoding of the given value. Each non-ASCII 3273 // character is converted to bytes using UTF-8 encoding, which are then 3274 // represented using % escaping. 3275 private String encodeAsASCII(String value) 3276 { 3277 if (value == null) return null; 3278 3279 StringBuilder result = null; 3280 3281 for (int i = 0, length = value.length(); i < length; i++) 3282 { 3283 char c = value.charAt(i); 3284 3285 if (c >= 128) 3286 { 3287 if (result == null) 3288 { 3289 result = new StringBuilder(value.substring(0, i)); 3290 } 3291 3292 try 3293 { 3294 byte[] encoded = (new String(new char[] { c })).getBytes("UTF-8"); 3295 for (int j = 0, encLen = encoded.length; j < encLen; j++) 3296 { 3297 appendEscaped(result, encoded[j]); 3298 } 3299 } 3300 catch (UnsupportedEncodingException e) 3301 { 3302 throw new WrappedException(e); 3303 } 3304 } 3305 else if (result != null) 3306 { 3307 result.append(c); 3308 } 3309 3310 } 3311 return result == null ? value : result.toString(); 3312 } 3313 3314 // Returns the number of valid, consecutive, three-character escape 3315 // sequences in the given string, starting at index i. 3316 private static int countEscaped(String s, int i) 3317 { 3318 int result = 0; 3319 3320 for (int length = s.length(); i < length; i += 3) 3321 { 3322 if (isEscaped(s, i)) result++; 3323 } 3324 return result; 3325 } 3326 */ 3327 } 3328