1 /*
2  * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3  *           (C) 1999 Antti Koivisto (koivisto@kde.org)
4  *           (C) 2001 Dirk Mueller (mueller@kde.org)
5  *           (C) 2006 Alexey Proskuryakov (ap@webkit.org)
6  * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
7  *
8  * This library is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Library General Public
10  * License as published by the Free Software Foundation; either
11  * version 2 of the License, or (at your option) any later version.
12  *
13  * This library is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * Library General Public License for more details.
17  *
18  * You should have received a copy of the GNU Library General Public License
19  * along with this library; see the file COPYING.LIB.  If not, write to
20  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21  * Boston, MA 02110-1301, USA.
22  */
23 
24 #include "config.h"
25 #include "KURL.h"
26 #include "LinkHash.h"
27 #include "PlatformString.h"
28 #include <wtf/text/AtomicString.h>
29 #include <wtf/text/StringHash.h>
30 #include <wtf/text/StringImpl.h>
31 
32 namespace WebCore {
33 
findSlashDotDotSlash(const UChar * characters,size_t length,size_t position)34 static inline size_t findSlashDotDotSlash(const UChar* characters, size_t length, size_t position)
35 {
36     if (length < 4)
37         return notFound;
38     size_t loopLimit = length - 3;
39     for (size_t i = position; i < loopLimit; ++i) {
40         if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '.' && characters[i + 3] == '/')
41             return i;
42     }
43     return notFound;
44 }
45 
findSlashSlash(const UChar * characters,size_t length,size_t position)46 static inline size_t findSlashSlash(const UChar* characters, size_t length, size_t position)
47 {
48     if (length < 2)
49         return notFound;
50     size_t loopLimit = length - 1;
51     for (size_t i = position; i < loopLimit; ++i) {
52         if (characters[i] == '/' && characters[i + 1] == '/')
53             return i;
54     }
55     return notFound;
56 }
57 
findSlashDotSlash(const UChar * characters,size_t length,size_t position)58 static inline size_t findSlashDotSlash(const UChar* characters, size_t length, size_t position)
59 {
60     if (length < 3)
61         return notFound;
62     size_t loopLimit = length - 2;
63     for (size_t i = position; i < loopLimit; ++i) {
64         if (characters[i] == '/' && characters[i + 1] == '.' && characters[i + 2] == '/')
65             return i;
66     }
67     return notFound;
68 }
69 
containsColonSlashSlash(const UChar * characters,unsigned length)70 static inline bool containsColonSlashSlash(const UChar* characters, unsigned length)
71 {
72     if (length < 3)
73         return false;
74     unsigned loopLimit = length - 2;
75     for (unsigned i = 0; i < loopLimit; ++i) {
76         if (characters[i] == ':' && characters[i + 1] == '/' && characters[i + 2] == '/')
77             return true;
78     }
79     return false;
80 }
81 
squeezeOutNullCharacters(Vector<UChar,512> & string)82 static inline void squeezeOutNullCharacters(Vector<UChar, 512>& string)
83 {
84     size_t size = string.size();
85     size_t i = 0;
86     for (i = 0; i < size; ++i) {
87         if (!string[i])
88             break;
89     }
90     if (i == size)
91         return;
92     size_t j = i;
93     for (++i; i < size; ++i) {
94         if (UChar character = string[i])
95             string[j++] = character;
96     }
97     ASSERT(j < size);
98     string.shrink(j);
99 }
100 
cleanSlashDotDotSlashes(Vector<UChar,512> & path,size_t firstSlash)101 static void cleanSlashDotDotSlashes(Vector<UChar, 512>& path, size_t firstSlash)
102 {
103     size_t slash = firstSlash;
104     do {
105         size_t previousSlash = slash ? reverseFind(path.data(), path.size(), '/', slash - 1) : notFound;
106         // Don't remove the host, i.e. http://foo.org/../foo.html
107         if (previousSlash == notFound || (previousSlash > 3 && path[previousSlash - 2] == ':' && path[previousSlash - 1] == '/')) {
108             path[slash] = 0;
109             path[slash + 1] = 0;
110             path[slash + 2] = 0;
111         } else {
112             for (size_t i = previousSlash; i < slash + 3; ++i)
113                 path[i] = 0;
114         }
115         slash += 3;
116     } while ((slash = findSlashDotDotSlash(path.data(), path.size(), slash)) != notFound);
117     squeezeOutNullCharacters(path);
118 }
119 
mergeDoubleSlashes(Vector<UChar,512> & path,size_t firstSlash)120 static void mergeDoubleSlashes(Vector<UChar, 512>& path, size_t firstSlash)
121 {
122     size_t refPos = find(path.data(), path.size(), '#');
123     if (!refPos || refPos == notFound)
124         refPos = path.size();
125 
126     size_t slash = firstSlash;
127     while (slash < refPos) {
128         if (!slash || path[slash - 1] != ':')
129             path[slash++] = 0;
130         else
131             slash += 2;
132         if ((slash = findSlashSlash(path.data(), path.size(), slash)) == notFound)
133             break;
134     }
135     squeezeOutNullCharacters(path);
136 }
137 
cleanSlashDotSlashes(Vector<UChar,512> & path,size_t firstSlash)138 static void cleanSlashDotSlashes(Vector<UChar, 512>& path, size_t firstSlash)
139 {
140     size_t slash = firstSlash;
141     do {
142         path[slash] = 0;
143         path[slash + 1] = 0;
144         slash += 2;
145     } while ((slash = findSlashDotSlash(path.data(), path.size(), slash)) != notFound);
146     squeezeOutNullCharacters(path);
147 }
148 
cleanPath(Vector<UChar,512> & path)149 static inline void cleanPath(Vector<UChar, 512>& path)
150 {
151     // FIXME: Should not do this in the query or anchor part of the URL.
152     size_t firstSlash = findSlashDotDotSlash(path.data(), path.size(), 0);
153     if (firstSlash != notFound)
154         cleanSlashDotDotSlashes(path, firstSlash);
155 
156     // FIXME: Should not do this in the query part.
157     firstSlash = findSlashSlash(path.data(), path.size(), 0);
158     if (firstSlash != notFound)
159         mergeDoubleSlashes(path, firstSlash);
160 
161     // FIXME: Should not do this in the query or anchor part.
162     firstSlash = findSlashDotSlash(path.data(), path.size(), 0);
163     if (firstSlash != notFound)
164         cleanSlashDotSlashes(path, firstSlash);
165 }
166 
matchLetter(UChar c,UChar lowercaseLetter)167 static inline bool matchLetter(UChar c, UChar lowercaseLetter)
168 {
169     return (c | 0x20) == lowercaseLetter;
170 }
171 
needsTrailingSlash(const UChar * characters,unsigned length)172 static inline bool needsTrailingSlash(const UChar* characters, unsigned length)
173 {
174     if (length < 6)
175         return false;
176     if (!matchLetter(characters[0], 'h')
177             || !matchLetter(characters[1], 't')
178             || !matchLetter(characters[2], 't')
179             || !matchLetter(characters[3], 'p'))
180         return false;
181     if (!(characters[4] == ':'
182             || (matchLetter(characters[4], 's') && characters[5] == ':')))
183         return false;
184 
185     unsigned pos = characters[4] == ':' ? 5 : 6;
186 
187     // Skip initial two slashes if present.
188     if (pos + 1 < length && characters[pos] == '/' && characters[pos + 1] == '/')
189         pos += 2;
190 
191     // Find next slash.
192     while (pos < length && characters[pos] != '/')
193         ++pos;
194 
195     return pos == length;
196 }
197 
visitedLinkHashInline(const UChar * url,unsigned length)198 static ALWAYS_INLINE LinkHash visitedLinkHashInline(const UChar* url, unsigned length)
199 {
200     return AlreadyHashed::avoidDeletedValue(StringHasher::computeHash(url, length));
201 }
202 
visitedLinkHash(const UChar * url,unsigned length)203 LinkHash visitedLinkHash(const UChar* url, unsigned length)
204 {
205     return visitedLinkHashInline(url, length);
206 }
207 
visitedURLInline(const KURL & base,const AtomicString & attributeURL,Vector<UChar,512> & buffer)208 static ALWAYS_INLINE void visitedURLInline(const KURL& base, const AtomicString& attributeURL, Vector<UChar, 512>& buffer)
209 {
210     if (attributeURL.isNull())
211         return;
212 
213     const UChar* characters = attributeURL.characters();
214     unsigned length = attributeURL.length();
215 
216     // This is a poor man's completeURL. Faster with less memory allocation.
217     // FIXME: It's missing a lot of what completeURL does and a lot of what KURL does.
218     // For example, it does not handle international domain names properly.
219 
220     // FIXME: It is wrong that we do not do further processing on strings that have "://" in them:
221     //    1) The "://" could be in the query or anchor.
222     //    2) The URL's path could have a "/./" or a "/../" or a "//" sequence in it.
223 
224     // FIXME: needsTrailingSlash does not properly return true for a URL that has no path, but does
225     // have a query or anchor.
226 
227     bool hasColonSlashSlash = containsColonSlashSlash(characters, length);
228 
229     if (hasColonSlashSlash && !needsTrailingSlash(characters, length)) {
230         buffer.append(attributeURL.characters(), attributeURL.length());
231         return;
232     }
233 
234 
235     if (hasColonSlashSlash) {
236         // FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the
237         // end of the path, *before* the query or anchor.
238         buffer.append(characters, length);
239         buffer.append('/');
240         return;
241     }
242 
243     if (!length)
244         buffer.append(base.string().characters(), base.string().length());
245     else {
246         switch (characters[0]) {
247             case '/':
248                 buffer.append(base.string().characters(), base.pathStart());
249                 break;
250             case '#':
251                 buffer.append(base.string().characters(), base.pathEnd());
252                 break;
253             default:
254                 buffer.append(base.string().characters(), base.pathAfterLastSlash());
255                 break;
256         }
257     }
258     buffer.append(characters, length);
259     cleanPath(buffer);
260     if (needsTrailingSlash(buffer.data(), buffer.size())) {
261         // FIXME: This is incorrect for URLs that have a query or anchor; the "/" needs to go at the
262         // end of the path, *before* the query or anchor.
263         buffer.append('/');
264     }
265 
266     return;
267 }
268 
visitedURL(const KURL & base,const AtomicString & attributeURL,Vector<UChar,512> & buffer)269 void visitedURL(const KURL& base, const AtomicString& attributeURL, Vector<UChar, 512>& buffer)
270 {
271     return visitedURLInline(base, attributeURL, buffer);
272 }
273 
visitedLinkHash(const KURL & base,const AtomicString & attributeURL)274 LinkHash visitedLinkHash(const KURL& base, const AtomicString& attributeURL)
275 {
276     Vector<UChar, 512> url;
277     visitedURLInline(base, attributeURL, url);
278     if (url.isEmpty())
279         return 0;
280 
281     return visitedLinkHashInline(url.data(), url.size());
282 }
283 
284 }  // namespace WebCore
285