1 /*
2  * Copyright (c) 1996, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package sun.net;
27 
28 /**
29  * Helper class to map URL "abbreviations" to real URLs.
30  * The default implementation supports the following mappings:
31  * <pre>{@code
32  *   ftp.mumble.bar/... => ftp://ftp.mumble.bar/...
33  *   gopher.mumble.bar/... => gopher://gopher.mumble.bar/...
34  *   other.name.dom/... => http://other.name.dom/...
35  *   /foo/... => file:/foo/...
36  * }</pre>
37  *
38  * Full URLs (those including a protocol name) are passed through unchanged.
39  *
40  * Subclassers can override or extend this behavior to support different
41  * or additional canonicalization policies.
42  *
43  * @author      Steve Byrne
44  */
45 
46 public class URLCanonicalizer {
47     /**
48      * Creates the default canonicalizer instance.
49      */
URLCanonicalizer()50     public URLCanonicalizer() { }
51 
52     /**
53      * Given a possibly abbreviated URL (missing a protocol name, typically),
54      * this method's job is to transform that URL into a canonical form,
55      * by including a protocol name and additional syntax, if necessary.
56      *
57      * For a correctly formed URL, this method should just return its argument.
58      */
canonicalize(String simpleURL)59     public String canonicalize(String simpleURL) {
60         String resultURL = simpleURL;
61         if (simpleURL.startsWith("ftp.")) {
62             resultURL = "ftp://" + simpleURL;
63         } else if (simpleURL.startsWith("gopher.")) {
64             resultURL = "gopher://" + simpleURL;
65         } else if (simpleURL.startsWith("/")) {
66             resultURL = "file:" + simpleURL;
67         } else if (!hasProtocolName(simpleURL)) {
68             if (isSimpleHostName(simpleURL)) {
69                 simpleURL = "www." + simpleURL + ".com";
70             }
71             resultURL = "http://" + simpleURL;
72         }
73 
74         return resultURL;
75     }
76 
77     /**
78      * Given a possibly abbreviated URL, this predicate function returns
79      * true if it appears that the URL contains a protocol name
80      */
hasProtocolName(String url)81     public boolean hasProtocolName(String url) {
82         int index = url.indexOf(':');
83         if (index <= 0) {       // treat ":foo" as not having a protocol spec
84             return false;
85         }
86 
87         for (int i = 0; i < index; i++) {
88             char c = url.charAt(i);
89 
90             // REMIND: this is a guess at legal characters in a protocol --
91             // need to be verified
92             if ((c >= 'A' && c <= 'Z')
93                 || (c >= 'a' && c <= 'z')
94                 || (c == '-')) {
95                 continue;
96             }
97 
98             // found an illegal character
99             return false;
100         }
101 
102         return true;
103     }
104 
105     /**
106      * Returns true if the URL is just a single name, no periods or
107      * slashes, false otherwise
108      **/
isSimpleHostName(String url)109     protected boolean isSimpleHostName(String url) {
110 
111         for (int i = 0; i < url.length(); i++) {
112             char c = url.charAt(i);
113 
114             // REMIND: this is a guess at legal characters in a protocol --
115             // need to be verified
116             if ((c >= 'A' && c <= 'Z')
117                 || (c >= 'a' && c <= 'z')
118                 || (c >= '0' && c <= '9')
119                 || (c == '-')) {
120                 continue;
121             }
122 
123             // found an illegal character
124             return false;
125         }
126 
127         return true;
128     }
129 }
130