1 /*
2  * Copyright (C) 2009, 2010 Apple Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include "config.h"
27 #include "UserContentURLPattern.h"
28 #include "KURL.h"
29 #include <wtf/StdLibExtras.h>
30 
31 namespace WebCore {
32 
matchesPatterns(const KURL & url,const Vector<String> * whitelist,const Vector<String> * blacklist)33 bool UserContentURLPattern::matchesPatterns(const KURL& url, const Vector<String>* whitelist, const Vector<String>* blacklist)
34 {
35     // In order for a URL to be a match it has to be present in the whitelist and not present in the blacklist.
36     // If there is no whitelist at all, then all URLs are assumed to be in the whitelist.
37     bool matchesWhitelist = !whitelist || whitelist->isEmpty();
38     if (!matchesWhitelist) {
39         for (unsigned i = 0; i < whitelist->size(); ++i) {
40             UserContentURLPattern contentPattern(whitelist->at(i));
41             if (contentPattern.matches(url)) {
42                 matchesWhitelist = true;
43                 break;
44             }
45         }
46     }
47 
48     bool matchesBlacklist = false;
49     if (blacklist) {
50         for (unsigned i = 0; i < blacklist->size(); ++i) {
51             UserContentURLPattern contentPattern(blacklist->at(i));
52             if (contentPattern.matches(url)) {
53                 matchesBlacklist = true;
54                 break;
55             }
56         }
57     }
58 
59     return matchesWhitelist && !matchesBlacklist;
60 }
61 
parse(const String & pattern)62 bool UserContentURLPattern::parse(const String& pattern)
63 {
64     DEFINE_STATIC_LOCAL(const String, schemeSeparator, ("://"));
65 
66     size_t schemeEndPos = pattern.find(schemeSeparator);
67     if (schemeEndPos == notFound)
68         return false;
69 
70     m_scheme = pattern.left(schemeEndPos);
71 
72     unsigned hostStartPos = schemeEndPos + schemeSeparator.length();
73     if (hostStartPos >= pattern.length())
74         return false;
75 
76     int pathStartPos = 0;
77 
78     if (equalIgnoringCase(m_scheme, "file"))
79         pathStartPos = hostStartPos;
80     else {
81         size_t hostEndPos = pattern.find("/", hostStartPos);
82         if (hostEndPos == notFound)
83             return false;
84 
85         m_host = pattern.substring(hostStartPos, hostEndPos - hostStartPos);
86         m_matchSubdomains = false;
87 
88         if (m_host == "*") {
89             // The pattern can be just '*', which means match all domains.
90             m_host = "";
91             m_matchSubdomains = true;
92         } else if (m_host.startsWith("*.")) {
93             // The first component can be '*', which means to match all subdomains.
94             m_host = m_host.substring(2); // Length of "*."
95             m_matchSubdomains = true;
96         }
97 
98         // No other '*' can occur in the host.
99         if (m_host.find("*") != notFound)
100             return false;
101 
102         pathStartPos = hostEndPos;
103     }
104 
105     m_path = pattern.right(pattern.length() - pathStartPos);
106 
107     return true;
108 }
109 
matches(const KURL & test) const110 bool UserContentURLPattern::matches(const KURL& test) const
111 {
112     if (m_invalid)
113         return false;
114 
115     if (!equalIgnoringCase(test.protocol(), m_scheme))
116         return false;
117 
118     if (!equalIgnoringCase(m_scheme, "file") && !matchesHost(test))
119         return false;
120 
121     return matchesPath(test);
122 }
123 
matchesHost(const KURL & test) const124 bool UserContentURLPattern::matchesHost(const KURL& test) const
125 {
126     const String& host = test.host();
127     if (equalIgnoringCase(host, m_host))
128         return true;
129 
130     if (!m_matchSubdomains)
131         return false;
132 
133     // If we're matching subdomains, and we have no host, that means the pattern
134     // was <scheme>://*/<whatever>, so we match anything.
135     if (!m_host.length())
136         return true;
137 
138     // Check if the domain is a subdomain of our host.
139     if (!host.endsWith(m_host, false))
140         return false;
141 
142     ASSERT(host.length() > m_host.length());
143 
144     // Check that the character before the suffix is a period.
145     return host[host.length() - m_host.length() - 1] == '.';
146 }
147 
148 struct MatchTester
149 {
150     const String m_pattern;
151     unsigned m_patternIndex;
152 
153     const String m_test;
154     unsigned m_testIndex;
155 
MatchTesterWebCore::MatchTester156     MatchTester(const String& pattern, const String& test)
157     : m_pattern(pattern)
158     , m_patternIndex(0)
159     , m_test(test)
160     , m_testIndex(0)
161     {
162     }
163 
testStringFinishedWebCore::MatchTester164     bool testStringFinished() const { return m_testIndex >= m_test.length(); }
patternStringFinishedWebCore::MatchTester165     bool patternStringFinished() const { return m_patternIndex >= m_pattern.length(); }
166 
eatWildcardWebCore::MatchTester167     void eatWildcard()
168     {
169         while (!patternStringFinished()) {
170             if (m_pattern[m_patternIndex] != '*')
171                 return;
172             m_patternIndex++;
173         }
174     }
175 
eatSameCharsWebCore::MatchTester176     void eatSameChars()
177     {
178         while (!patternStringFinished() && !testStringFinished()) {
179             if (m_pattern[m_patternIndex] == '*')
180                 return;
181             if (m_pattern[m_patternIndex] != m_test[m_testIndex])
182                 return;
183             m_patternIndex++;
184             m_testIndex++;
185         }
186     }
187 
testWebCore::MatchTester188     bool test()
189     {
190         // Eat all the matching chars.
191         eatSameChars();
192 
193         // If the string is finished, then the pattern must be empty too, or contains
194         // only wildcards.
195         if (testStringFinished()) {
196             eatWildcard();
197             if (patternStringFinished())
198                 return true;
199             return false;
200         }
201 
202         // Pattern is empty but not string, this is not a match.
203         if (patternStringFinished())
204             return false;
205 
206         // If we don't encounter a *, then we're hosed.
207         if (m_pattern[m_patternIndex] != '*')
208             return false;
209 
210         while (!testStringFinished()) {
211             MatchTester nextMatch(*this);
212             nextMatch.m_patternIndex++;
213             if (nextMatch.test())
214                 return true;
215             m_testIndex++;
216         }
217 
218         // We reached the end of the string.  Let's see if the pattern contains only
219         // wildcards.
220         eatWildcard();
221         return patternStringFinished();
222     }
223 };
224 
matchesPath(const KURL & test) const225 bool UserContentURLPattern::matchesPath(const KURL& test) const
226 {
227     MatchTester match(m_path, test.path());
228     return match.test();
229 }
230 
231 } // namespace WebCore
232