1 /* ============================================================
2 * Falkon - Qt web browser
3 * Copyright (C) 2010-2017 David Rosca <nowrep@gmail.com>
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
17 * ============================================================ */
18 /**
19  * Copyright (c) 2009, Benjamin C. Meyer <ben@meyerhome.net>
20  *
21  * Redistribution and use in source and binary forms, with or without
22  * modification, are permitted provided that the following conditions
23  * are met:
24  * 1. Redistributions of source code must retain the above copyright
25  *    notice, this list of conditions and the following disclaimer.
26  * 2. Redistributions in binary form must reproduce the above copyright
27  *    notice, this list of conditions and the following disclaimer in the
28  *    documentation and/or other materials provided with the distribution.
29  * 3. Neither the name of the Benjamin Meyer nor the names of its contributors
30  *    may be used to endorse or promote products derived from this software
31  *    without specific prior written permission.
32  *
33  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
34  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
37  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
39  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
41  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
42  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43  * SUCH DAMAGE.
44  */
45 
46 #ifndef ADBLOCKRULE_H
47 #define ADBLOCKRULE_H
48 
49 #include <QObject>
50 #include <QStringList>
51 #include <QStringMatcher>
52 #include <QRegularExpression>
53 
54 #include "qzcommon.h"
55 
56 class QUrl;
57 class QWebEngineUrlRequestInfo;
58 
59 class AdBlockSubscription;
60 
61 class FALKON_EXPORT AdBlockRule
62 {
63     Q_DISABLE_COPY(AdBlockRule)
64 
65 public:
66     AdBlockRule(const QString &filter = QString(), AdBlockSubscription* subscription = nullptr);
67     ~AdBlockRule();
68 
69     AdBlockRule* copy() const;
70 
71     AdBlockSubscription* subscription() const;
72     void setSubscription(AdBlockSubscription* subscription);
73 
74     QString filter() const;
75     void setFilter(const QString &filter);
76 
77     bool isCssRule() const;
78     QString cssSelector() const;
79 
80     bool isDocument() const;
81     bool isElemhide() const;
82 
83     bool isDomainRestricted() const;
84     bool isException() const;
85 
86     bool isComment() const;
87     bool isEnabled() const;
88     void setEnabled(bool enabled);
89 
90     bool isSlow() const;
91     bool isInternalDisabled() const;
92 
93     bool urlMatch(const QUrl &url) const;
94     bool networkMatch(const QWebEngineUrlRequestInfo &request, const QString &domain, const QString &encodedUrl) const;
95 
96     bool matchDomain(const QString &domain) const;
97     bool matchThirdParty(const QWebEngineUrlRequestInfo &request) const;
98     bool matchObject(const QWebEngineUrlRequestInfo &request) const;
99     bool matchSubdocument(const QWebEngineUrlRequestInfo &request) const;
100     bool matchXmlHttpRequest(const QWebEngineUrlRequestInfo &request) const;
101     bool matchImage(const QWebEngineUrlRequestInfo &request) const;
102     bool matchScript(const QWebEngineUrlRequestInfo &request) const;
103     bool matchStyleSheet(const QWebEngineUrlRequestInfo &request) const;
104     bool matchObjectSubrequest(const QWebEngineUrlRequestInfo &request) const;
105     bool matchPing(const QWebEngineUrlRequestInfo &request) const;
106     bool matchMedia(const QWebEngineUrlRequestInfo &request) const;
107     bool matchFont(const QWebEngineUrlRequestInfo &request) const;
108     bool matchOther(const QWebEngineUrlRequestInfo &request) const;
109 
110 protected:
111     bool stringMatch(const QString &domain, const QString &encodedUrl) const;
112     bool isMatchingDomain(const QString &domain, const QString &filter) const;
113     bool isMatchingRegExpStrings(const QString &url) const;
114     QStringList parseRegExpFilter(const QString &filter) const;
115 
116 private:
117     enum RuleType {
118         CssRule = 0,
119         DomainMatchRule = 1,
120         RegExpMatchRule = 2,
121         StringEndsMatchRule = 3,
122         StringContainsMatchRule = 4,
123         MatchAllUrlsRule = 5,
124         Invalid = 6
125     };
126 
127     enum RuleOption {
128         NoOption = 0,
129         DomainRestrictedOption = 1,
130         ThirdPartyOption = 2,
131         ObjectOption = 4,
132         SubdocumentOption = 8,
133         XMLHttpRequestOption = 16,
134         ImageOption = 32,
135         ScriptOption = 64,
136         StyleSheetOption = 128,
137         ObjectSubrequestOption = 256,
138         PingOption = 512,
139         MediaOption = 1024,
140         FontOption = 2048,
141         OtherOption = 4096,
142 
143         // Exception only options
144         DocumentOption = 8192,
145         ElementHideOption = 16384
146     };
147 
148     Q_DECLARE_FLAGS(RuleOptions, RuleOption)
149 
150     inline bool hasOption(const RuleOption &opt) const;
151     inline bool hasException(const RuleOption &opt) const;
152 
153     inline void setOption(const RuleOption &opt);
154     inline void setException(const RuleOption &opt, bool on);
155 
156     void parseFilter();
157     void parseDomains(const QString &domains, const QChar &separator);
158     bool filterIsOnlyDomain(const QString &filter) const;
159     bool filterIsOnlyEndsMatch(const QString &filter) const;
160     QString createRegExpFromFilter(const QString &filter) const;
161     QList<QStringMatcher> createStringMatchers(const QStringList &filters) const;
162 
163     AdBlockSubscription* m_subscription;
164 
165     RuleType m_type;
166     RuleOptions m_options;
167     RuleOptions m_exceptions;
168 
169     // Original rule filter
170     QString m_filter;
171     // Parsed rule for string matching (CSS Selector for CSS rules)
172     QString m_matchString;
173     // Case sensitivity for string matching
174     Qt::CaseSensitivity m_caseSensitivity;
175 
176     bool m_isEnabled;
177     bool m_isException;
178     bool m_isInternalDisabled;
179 
180     QStringList m_allowedDomains;
181     QStringList m_blockedDomains;
182 
183     struct RegExp {
184         QRegularExpression regExp;
185         QList<QStringMatcher> matchers;
186     };
187 
188     // Use dynamic allocation to save memory
189     RegExp* m_regExp;
190 
191     friend class AdBlockMatcher;
192     friend class AdBlockSearchTree;
193     friend class AdBlockSubscription;
194 };
195 
196 #endif // ADBLOCKRULE_H
197 
198