1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 The Qt Company Ltd.
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of the QtCore module of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see https://www.qt.io/terms-conditions. For further
15 ** information use the contact form at https://www.qt.io/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 3 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL3 included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 3 requirements
23 ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24 **
25 ** GNU General Public License Usage
26 ** Alternatively, this file may be used under the terms of the GNU
27 ** General Public License version 2.0 or (at your option) the GNU General
28 ** Public license version 3 or any later version approved by the KDE Free
29 ** Qt Foundation. The licenses are as published by the Free Software
30 ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31 ** included in the packaging of this file. Please review the following
32 ** information to ensure the GNU General Public License requirements will
33 ** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34 ** https://www.gnu.org/licenses/gpl-3.0.html.
35 **
36 ** $QT_END_LICENSE$
37 **
38 ****************************************************************************/
39 
40 #include <qglobal.h>
41 
42 #if QT_CONFIG(topleveldomain)
43 
44 #include "qplatformdefs.h"
45 #include "qurl.h"
46 #include "private/qurltlds_p.h"
47 #include "private/qtldurl_p.h"
48 #include "QtCore/qstring.h"
49 #include "QtCore/qvector.h"
50 
51 QT_BEGIN_NAMESPACE
52 
53 enum TLDMatchType {
54     ExactMatch,
55     SuffixMatch,
56     ExceptionMatch,
57 };
58 
59 // Scan the auto-generated table of TLDs for an entry. For more details
60 // see comments in file:  util/corelib/qurl-generateTLDs/main.cpp
containsTLDEntry(QStringView entry,TLDMatchType match)61 static bool containsTLDEntry(QStringView entry, TLDMatchType match)
62 {
63     const QStringView matchSymbols[] = {
64         u"",
65         u"*",
66         u"!",
67     };
68     const auto symbol = matchSymbols[match];
69     const int index = qt_hash(entry, qt_hash(symbol)) % tldCount;
70 
71     // select the right chunk from the big table
72     short chunk = 0;
73     uint chunkIndex = tldIndices[index], offset = 0;
74 
75     // The offset in the big string, of the group that our entry hashes into.
76     const auto tldGroupOffset = tldIndices[index];
77 
78     // It should always be inside all chunks' total size.
79     Q_ASSERT(tldGroupOffset < tldChunks[tldChunkCount - 1]);
80     // All offsets are stored in non-decreasing order.
81     // This check is within bounds as tldIndices has length tldCount+1.
82     Q_ASSERT(tldGroupOffset <= tldIndices[index + 1]);
83     // The last extra entry in tldIndices
84     // should be equal to the total of all chunks' lengths.
85     Q_ASSERT(tldIndices[tldCount] == tldChunks[tldChunkCount - 1]);
86 
87     // Find which chunk contains the tldGroupOffset
88     while (tldGroupOffset >= tldChunks[chunk]) {
89         chunkIndex -= tldChunks[chunk];
90         offset += tldChunks[chunk];
91         chunk++;
92 
93         // We can not go above the number of chunks we have, since all our
94         // indices are less than the total chunks' size (see asserts above).
95         Q_ASSERT(chunk < tldChunkCount);
96     }
97 
98     // check all the entries from the given offset
99     while (chunkIndex < tldIndices[index+1] - offset) {
100         const auto utf8 = tldData[chunk] + chunkIndex;
101         if ((symbol.isEmpty() || QLatin1Char(*utf8) == symbol) && entry == QString::fromUtf8(utf8 + symbol.size()))
102             return true;
103         chunkIndex += qstrlen(utf8) + 1; // +1 for the ending \0
104     }
105     return false;
106 }
107 
108 /*!
109     \internal
110 
111     Return the top-level-domain per Qt's copy of the Mozilla public suffix list of
112     \a domain.
113 */
114 
qTopLevelDomain(const QString & domain)115 Q_CORE_EXPORT QString qTopLevelDomain(const QString &domain)
116 {
117     const QString domainLower = domain.toLower();
118     QVector<QStringRef> sections = domainLower.splitRef(QLatin1Char('.'), Qt::SkipEmptyParts);
119     if (sections.isEmpty())
120         return QString();
121 
122     QString level, tld;
123     for (int j = sections.count() - 1; j >= 0; --j) {
124         level.prepend(QLatin1Char('.') + sections.at(j));
125         if (qIsEffectiveTLD(level.rightRef(level.size() - 1)))
126             tld = level;
127     }
128     return tld;
129 }
130 
131 /*!
132     \internal
133 
134     Return true if \a domain is a top-level-domain per Qt's copy of the Mozilla public suffix list.
135 */
136 
qIsEffectiveTLD(const QStringRef & domain)137 Q_CORE_EXPORT bool qIsEffectiveTLD(const QStringRef &domain)
138 {
139     // for domain 'foo.bar.com':
140     // 1. return if TLD table contains 'foo.bar.com'
141     // 2. else if table contains '*.bar.com',
142     // 3. test that table does not contain '!foo.bar.com'
143 
144     if (containsTLDEntry(domain, ExactMatch)) // 1
145         return true;
146 
147     const int dot = domain.indexOf(QLatin1Char('.'));
148     if (dot < 0) // Actual TLD: may be effective if the subject of a wildcard rule:
149         return containsTLDEntry(QString(QLatin1Char('.') + domain), SuffixMatch);
150     if (containsTLDEntry(domain.mid(dot), SuffixMatch))   // 2
151         return !containsTLDEntry(domain, ExceptionMatch); // 3
152     return false;
153 }
154 
155 QT_END_NAMESPACE
156 
157 #endif
158