1 /* 2 * Copyright (c) 1996, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package sun.net; 27 28 /** 29 * Helper class to map URL "abbreviations" to real URLs. 30 * The default implementation supports the following mappings: 31 * <pre>{@code 32 * ftp.mumble.bar/... => ftp://ftp.mumble.bar/... 33 * gopher.mumble.bar/... => gopher://gopher.mumble.bar/... 34 * other.name.dom/... => http://other.name.dom/... 35 * /foo/... => file:/foo/... 36 * }</pre> 37 * 38 * Full URLs (those including a protocol name) are passed through unchanged. 39 * 40 * Subclassers can override or extend this behavior to support different 41 * or additional canonicalization policies. 42 * 43 * @author Steve Byrne 44 */ 45 46 public class URLCanonicalizer { 47 /** 48 * Creates the default canonicalizer instance. 49 */ URLCanonicalizer()50 public URLCanonicalizer() { } 51 52 /** 53 * Given a possibly abbreviated URL (missing a protocol name, typically), 54 * this method's job is to transform that URL into a canonical form, 55 * by including a protocol name and additional syntax, if necessary. 56 * 57 * For a correctly formed URL, this method should just return its argument. 58 */ canonicalize(String simpleURL)59 public String canonicalize(String simpleURL) { 60 String resultURL = simpleURL; 61 if (simpleURL.startsWith("ftp.")) { 62 resultURL = "ftp://" + simpleURL; 63 } else if (simpleURL.startsWith("gopher.")) { 64 resultURL = "gopher://" + simpleURL; 65 } else if (simpleURL.startsWith("/")) { 66 resultURL = "file:" + simpleURL; 67 } else if (!hasProtocolName(simpleURL)) { 68 if (isSimpleHostName(simpleURL)) { 69 simpleURL = "www." + simpleURL + ".com"; 70 } 71 resultURL = "http://" + simpleURL; 72 } 73 74 return resultURL; 75 } 76 77 /** 78 * Given a possibly abbreviated URL, this predicate function returns 79 * true if it appears that the URL contains a protocol name 80 */ hasProtocolName(String url)81 public boolean hasProtocolName(String url) { 82 int index = url.indexOf(':'); 83 if (index <= 0) { // treat ":foo" as not having a protocol spec 84 return false; 85 } 86 87 for (int i = 0; i < index; i++) { 88 char c = url.charAt(i); 89 90 // REMIND: this is a guess at legal characters in a protocol -- 91 // need to be verified 92 if ((c >= 'A' && c <= 'Z') 93 || (c >= 'a' && c <= 'z') 94 || (c == '-')) { 95 continue; 96 } 97 98 // found an illegal character 99 return false; 100 } 101 102 return true; 103 } 104 105 /** 106 * Returns true if the URL is just a single name, no periods or 107 * slashes, false otherwise 108 **/ isSimpleHostName(String url)109 protected boolean isSimpleHostName(String url) { 110 111 for (int i = 0; i < url.length(); i++) { 112 char c = url.charAt(i); 113 114 // REMIND: this is a guess at legal characters in a protocol -- 115 // need to be verified 116 if ((c >= 'A' && c <= 'Z') 117 || (c >= 'a' && c <= 'z') 118 || (c >= '0' && c <= '9') 119 || (c == '-')) { 120 continue; 121 } 122 123 // found an illegal character 124 return false; 125 } 126 127 return true; 128 } 129 } 130