1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 The Qt Company Ltd.
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of the QtCore module of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see https://www.qt.io/terms-conditions. For further
15 ** information use the contact form at https://www.qt.io/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 3 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL3 included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 3 requirements
23 ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24 **
25 ** GNU General Public License Usage
26 ** Alternatively, this file may be used under the terms of the GNU
27 ** General Public License version 2.0 or (at your option) the GNU General
28 ** Public license version 3 or any later version approved by the KDE Free
29 ** Qt Foundation. The licenses are as published by the Free Software
30 ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31 ** included in the packaging of this file. Please review the following
32 ** information to ensure the GNU General Public License requirements will
33 ** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34 ** https://www.gnu.org/licenses/gpl-3.0.html.
35 **
36 ** $QT_END_LICENSE$
37 **
38 ****************************************************************************/
39 
40 
41 #define QT_NO_CAST_FROM_ASCII
42 
43 #include "qmimemagicrule_p.h"
44 
45 #include "qmimetypeparser_p.h"
46 #include <QtCore/QList>
47 #include <QtCore/QDebug>
48 #include <qendian.h>
49 
50 QT_BEGIN_NAMESPACE
51 
52 // in the same order as Type!
53 static const char magicRuleTypes_string[] =
54     "invalid\0"
55     "string\0"
56     "host16\0"
57     "host32\0"
58     "big16\0"
59     "big32\0"
60     "little16\0"
61     "little32\0"
62     "byte\0"
63     "\0";
64 
65 static const int magicRuleTypes_indices[] = {
66     0, 8, 15, 22, 29, 35, 41, 50, 59, 64, 0
67 };
68 
type(const QByteArray & theTypeName)69 QMimeMagicRule::Type QMimeMagicRule::type(const QByteArray &theTypeName)
70 {
71     for (int i = String; i <= Byte; ++i) {
72         if (theTypeName == magicRuleTypes_string + magicRuleTypes_indices[i])
73             return Type(i);
74     }
75     return Invalid;
76 }
77 
typeName(QMimeMagicRule::Type theType)78 QByteArray QMimeMagicRule::typeName(QMimeMagicRule::Type theType)
79 {
80     return magicRuleTypes_string + magicRuleTypes_indices[theType];
81 }
82 
operator ==(const QMimeMagicRule & other) const83 bool QMimeMagicRule::operator==(const QMimeMagicRule &other) const
84 {
85     return m_type == other.m_type &&
86            m_value == other.m_value &&
87            m_startPos == other.m_startPos &&
88            m_endPos == other.m_endPos &&
89            m_mask == other.m_mask &&
90            m_pattern == other.m_pattern &&
91            m_number == other.m_number &&
92            m_numberMask == other.m_numberMask &&
93            m_matchFunction == other.m_matchFunction;
94 }
95 
96 // Used by both providers
matchSubstring(const char * dataPtr,int dataSize,int rangeStart,int rangeLength,int valueLength,const char * valueData,const char * mask)97 bool QMimeMagicRule::matchSubstring(const char *dataPtr, int dataSize, int rangeStart, int rangeLength,
98                                     int valueLength, const char *valueData, const char *mask)
99 {
100     // Size of searched data.
101     // Example: value="ABC", rangeLength=3 -> we need 3+3-1=5 bytes (ABCxx,xABCx,xxABC would match)
102     const int dataNeeded = qMin(rangeLength + valueLength - 1, dataSize - rangeStart);
103 
104     if (!mask) {
105         // callgrind says QByteArray::indexOf is much slower, since our strings are typically too
106         // short for be worth Boyer-Moore matching (1 to 71 bytes, 11 bytes on average).
107         bool found = false;
108         for (int i = rangeStart; i < rangeStart + rangeLength; ++i) {
109             if (i + valueLength > dataSize)
110                 break;
111 
112             if (memcmp(valueData, dataPtr + i, valueLength) == 0) {
113                 found = true;
114                 break;
115             }
116         }
117         if (!found)
118             return false;
119     } else {
120         bool found = false;
121         const char *readDataBase = dataPtr + rangeStart;
122         // Example (continued from above):
123         // deviceSize is 4, so dataNeeded was max'ed to 4.
124         // maxStartPos = 4 - 3 + 1 = 2, and indeed
125         // we need to check for a match a positions 0 and 1 (ABCx and xABC).
126         const int maxStartPos = dataNeeded - valueLength + 1;
127         for (int i = 0; i < maxStartPos; ++i) {
128             const char *d = readDataBase + i;
129             bool valid = true;
130             for (int idx = 0; idx < valueLength; ++idx) {
131                 if (((*d++) & mask[idx]) != (valueData[idx] & mask[idx])) {
132                     valid = false;
133                     break;
134                 }
135             }
136             if (valid)
137                 found = true;
138         }
139         if (!found)
140             return false;
141     }
142     //qDebug() << "Found" << value << "in" << searchedData;
143     return true;
144 }
145 
matchString(const QByteArray & data) const146 bool QMimeMagicRule::matchString(const QByteArray &data) const
147 {
148     const int rangeLength = m_endPos - m_startPos + 1;
149     return QMimeMagicRule::matchSubstring(data.constData(), data.size(), m_startPos, rangeLength, m_pattern.size(), m_pattern.constData(), m_mask.constData());
150 }
151 
152 template <typename T>
matchNumber(const QByteArray & data) const153 bool QMimeMagicRule::matchNumber(const QByteArray &data) const
154 {
155     const T value(m_number);
156     const T mask(m_numberMask);
157 
158     //qDebug() << "matchNumber" << "0x" << QString::number(m_number, 16) << "size" << sizeof(T);
159     //qDebug() << "mask" << QString::number(m_numberMask, 16);
160 
161     const char *p = data.constData() + m_startPos;
162     const char *e = data.constData() + qMin(data.size() - int(sizeof(T)), m_endPos);
163     for ( ; p <= e; ++p) {
164         if ((qFromUnaligned<T>(p) & mask) == (value & mask))
165             return true;
166     }
167 
168     return false;
169 }
170 
makePattern(const QByteArray & value)171 static inline QByteArray makePattern(const QByteArray &value)
172 {
173     QByteArray pattern(value.size(), Qt::Uninitialized);
174     char *data = pattern.data();
175 
176     const char *p = value.constData();
177     const char *e = p + value.size();
178     for ( ; p < e; ++p) {
179         if (*p == '\\' && ++p < e) {
180             if (*p == 'x') { // hex (\\xff)
181                 char c = 0;
182                 for (int i = 0; i < 2 && p + 1 < e; ++i) {
183                     ++p;
184                     if (*p >= '0' && *p <= '9')
185                         c = (c << 4) + *p - '0';
186                     else if (*p >= 'a' && *p <= 'f')
187                         c = (c << 4) + *p - 'a' + 10;
188                     else if (*p >= 'A' && *p <= 'F')
189                         c = (c << 4) + *p - 'A' + 10;
190                     else
191                         continue;
192                 }
193                 *data++ = c;
194             } else if (*p >= '0' && *p <= '7') { // oct (\\7, or \\77, or \\377)
195                 char c = *p - '0';
196                 if (p + 1 < e && p[1] >= '0' && p[1] <= '7') {
197                     c = (c << 3) + *(++p) - '0';
198                     if (p + 1 < e && p[1] >= '0' && p[1] <= '7' && p[-1] <= '3')
199                         c = (c << 3) + *(++p) - '0';
200                 }
201                 *data++ = c;
202             } else if (*p == 'n') {
203                 *data++ = '\n';
204             } else if (*p == 'r') {
205                 *data++ = '\r';
206             } else if (*p == 't') {
207                 *data++ = '\t';
208             } else { // escaped
209                 *data++ = *p;
210             }
211         } else {
212             *data++ = *p;
213         }
214     }
215     pattern.truncate(data - pattern.data());
216 
217     return pattern;
218 }
219 
220 // Evaluate a magic match rule like
221 //  <match value="must be converted with BinHex" type="string" offset="11"/>
222 //  <match value="0x9501" type="big16" offset="0:64"/>
223 
QMimeMagicRule(const QString & type,const QByteArray & value,const QString & offsets,const QByteArray & mask,QString * errorString)224 QMimeMagicRule::QMimeMagicRule(const QString &type,
225                                const QByteArray &value,
226                                const QString &offsets,
227                                const QByteArray &mask,
228                                QString *errorString)
229     : m_type(QMimeMagicRule::type(type.toLatin1())),
230       m_value(value),
231       m_mask(mask),
232       m_matchFunction(nullptr)
233 {
234     if (Q_UNLIKELY(m_type == Invalid))
235         *errorString = QLatin1String("Type ") + type + QLatin1String(" is not supported");
236 
237     // Parse for offset as "1" or "1:10"
238     const int colonIndex = offsets.indexOf(QLatin1Char(':'));
239     const QStringRef startPosStr = offsets.midRef(0, colonIndex); // \ These decay to returning 'offsets'
240     const QStringRef endPosStr   = offsets.midRef(colonIndex + 1);// / unchanged when colonIndex == -1
241     if (Q_UNLIKELY(!QMimeTypeParserBase::parseNumber(startPosStr, &m_startPos, errorString)) ||
242         Q_UNLIKELY(!QMimeTypeParserBase::parseNumber(endPosStr, &m_endPos, errorString))) {
243         m_type = Invalid;
244         return;
245     }
246 
247     if (Q_UNLIKELY(m_value.isEmpty())) {
248         m_type = Invalid;
249         if (errorString)
250             *errorString = QStringLiteral("Invalid empty magic rule value");
251         return;
252     }
253 
254     if (m_type >= Host16 && m_type <= Byte) {
255         bool ok;
256         m_number = m_value.toUInt(&ok, 0); // autodetect base
257         if (Q_UNLIKELY(!ok)) {
258             m_type = Invalid;
259             if (errorString)
260                 *errorString = QLatin1String("Invalid magic rule value \"") + QLatin1String(m_value) + QLatin1Char('"');
261             return;
262         }
263         m_numberMask = !m_mask.isEmpty() ? m_mask.toUInt(&ok, 0) : 0; // autodetect base
264     }
265 
266     switch (m_type) {
267     case String:
268         m_pattern = makePattern(m_value);
269         m_pattern.squeeze();
270         if (!m_mask.isEmpty()) {
271             if (Q_UNLIKELY(m_mask.size() < 4 || !m_mask.startsWith("0x"))) {
272                 m_type = Invalid;
273                 if (errorString)
274                     *errorString = QLatin1String("Invalid magic rule mask \"") + QLatin1String(m_mask) + QLatin1Char('"');
275                 return;
276             }
277             const QByteArray &tempMask = QByteArray::fromHex(QByteArray::fromRawData(
278                                                      m_mask.constData() + 2, m_mask.size() - 2));
279             if (Q_UNLIKELY(tempMask.size() != m_pattern.size())) {
280                 m_type = Invalid;
281                 if (errorString)
282                     *errorString = QLatin1String("Invalid magic rule mask size \"") + QLatin1String(m_mask) + QLatin1Char('"');
283                 return;
284             }
285             m_mask = tempMask;
286         } else {
287             m_mask.fill(char(-1), m_pattern.size());
288         }
289         m_mask.squeeze();
290         m_matchFunction = &QMimeMagicRule::matchString;
291         break;
292     case Byte:
293         if (m_number <= quint8(-1)) {
294             if (m_numberMask == 0)
295                 m_numberMask = quint8(-1);
296             m_matchFunction = &QMimeMagicRule::matchNumber<quint8>;
297         }
298         break;
299     case Big16:
300     case Little16:
301         if (m_number <= quint16(-1)) {
302             m_number = m_type == Little16 ? qFromLittleEndian<quint16>(m_number) : qFromBigEndian<quint16>(m_number);
303             if (m_numberMask != 0)
304                 m_numberMask = m_type == Little16 ? qFromLittleEndian<quint16>(m_numberMask) : qFromBigEndian<quint16>(m_numberMask);
305         }
306         Q_FALLTHROUGH();
307     case Host16:
308         if (m_number <= quint16(-1)) {
309             if (m_numberMask == 0)
310                 m_numberMask = quint16(-1);
311             m_matchFunction = &QMimeMagicRule::matchNumber<quint16>;
312         }
313         break;
314     case Big32:
315     case Little32:
316         m_number = m_type == Little32 ? qFromLittleEndian<quint32>(m_number) : qFromBigEndian<quint32>(m_number);
317         if (m_numberMask != 0)
318             m_numberMask = m_type == Little32 ? qFromLittleEndian<quint32>(m_numberMask) : qFromBigEndian<quint32>(m_numberMask);
319         Q_FALLTHROUGH();
320     case Host32:
321         if (m_numberMask == 0)
322             m_numberMask = quint32(-1);
323         m_matchFunction = &QMimeMagicRule::matchNumber<quint32>;
324         break;
325     default:
326         break;
327     }
328 }
329 
mask() const330 QByteArray QMimeMagicRule::mask() const
331 {
332     QByteArray result = m_mask;
333     if (m_type == String) {
334         // restore '0x'
335         result = "0x" + result.toHex();
336     }
337     return result;
338 }
339 
matches(const QByteArray & data) const340 bool QMimeMagicRule::matches(const QByteArray &data) const
341 {
342     const bool ok = m_matchFunction && (this->*m_matchFunction)(data);
343     if (!ok)
344         return false;
345 
346     // No submatch? Then we are done.
347     if (m_subMatches.isEmpty())
348         return true;
349 
350     //qDebug() << "Checking" << m_subMatches.count() << "sub-rules";
351     // Check that one of the submatches matches too
352     for ( QList<QMimeMagicRule>::const_iterator it = m_subMatches.begin(), end = m_subMatches.end() ;
353           it != end ; ++it ) {
354         if ((*it).matches(data)) {
355             // One of the hierarchies matched -> mimetype recognized.
356             return true;
357         }
358     }
359     return false;
360 
361 
362 }
363 
364 QT_END_NAMESPACE
365