1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 The Qt Company Ltd.
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of the QtCore module of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see https://www.qt.io/terms-conditions. For further
15 ** information use the contact form at https://www.qt.io/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 3 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL3 included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 3 requirements
23 ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24 **
25 ** GNU General Public License Usage
26 ** Alternatively, this file may be used under the terms of the GNU
27 ** General Public License version 2.0 or (at your option) the GNU General
28 ** Public license version 3 or any later version approved by the KDE Free
29 ** Qt Foundation. The licenses are as published by the Free Software
30 ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31 ** included in the packaging of this file. Please review the following
32 ** information to ensure the GNU General Public License requirements will
33 ** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34 ** https://www.gnu.org/licenses/gpl-3.0.html.
35 **
36 ** $QT_END_LICENSE$
37 **
38 ****************************************************************************/
39 
40 #include "mimemagicrule_p.h"
41 
42 #include <QtCore/QList>
43 #include <QtCore/QRegularExpression>
44 #include <QtCore/QDebug>
45 #include <qendian.h>
46 
47 using namespace Utils;
48 using namespace Utils::Internal;
49 
50 // in the same order as Type!
51 static const char magicRuleTypes_string[] =
52     "invalid\0"
53     "string\0"
54     "regexp\0"
55     "host16\0"
56     "host32\0"
57     "big16\0"
58     "big32\0"
59     "little16\0"
60     "little32\0"
61     "byte\0"
62     "\0";
63 
64 static const int magicRuleTypes_indices[] = {
65     0, 8, 15, 22, 29, 36, 42, 48, 57, 66, 71, 0
66 };
67 
type(const QByteArray & theTypeName)68 MimeMagicRule::Type MimeMagicRule::type(const QByteArray &theTypeName)
69 {
70     for (int i = String; i <= Byte; ++i) {
71         if (theTypeName == magicRuleTypes_string + magicRuleTypes_indices[i])
72             return Type(i);
73     }
74     return Invalid;
75 }
76 
typeName(MimeMagicRule::Type theType)77 QByteArray MimeMagicRule::typeName(MimeMagicRule::Type theType)
78 {
79     return magicRuleTypes_string + magicRuleTypes_indices[theType];
80 }
81 
82 namespace Utils {
83 namespace Internal {
84 
85 class MimeMagicRulePrivate
86 {
87 public:
88     bool operator==(const MimeMagicRulePrivate &other) const;
89 
90     MimeMagicRule::Type type;
91     QByteArray value;
92     int startPos;
93     int endPos;
94     QByteArray mask;
95 
96     QRegularExpression regexp;
97     QByteArray pattern;
98     quint32 number;
99     quint32 numberMask;
100 
101     using MatchFunction = bool (*)(const MimeMagicRulePrivate*, const QByteArray&);
102     MatchFunction matchFunction;
103 };
104 
operator ==(const MimeMagicRulePrivate & other) const105 bool MimeMagicRulePrivate::operator==(const MimeMagicRulePrivate &other) const
106 {
107     return type == other.type &&
108            value == other.value &&
109            startPos == other.startPos &&
110            endPos == other.endPos &&
111            mask == other.mask &&
112            pattern == other.pattern &&
113            number == other.number &&
114            numberMask == other.numberMask &&
115            matchFunction == other.matchFunction;
116 }
117 
118 } // Internal
119 } // Utils
120 
121 // Used by both providers
matchSubstring(const char * dataPtr,int dataSize,int rangeStart,int rangeLength,int valueLength,const char * valueData,const char * mask)122 bool MimeMagicRule::matchSubstring(const char *dataPtr, int dataSize, int rangeStart, int rangeLength,
123                                     int valueLength, const char *valueData, const char *mask)
124 {
125     // Size of searched data.
126     // Example: value="ABC", rangeLength=3 -> we need 3+3-1=5 bytes (ABCxx,xABCx,xxABC would match)
127     const int dataNeeded = qMin(rangeLength + valueLength - 1, dataSize - rangeStart);
128 
129     if (!mask) {
130         // callgrind says QByteArray::indexOf is much slower, since our strings are typically too
131         // short for be worth Boyer-Moore matching (1 to 71 bytes, 11 bytes on average).
132         bool found = false;
133         for (int i = rangeStart; i < rangeStart + rangeLength; ++i) {
134             if (i + valueLength > dataSize)
135                 break;
136 
137             if (memcmp(valueData, dataPtr + i, valueLength) == 0) {
138                 found = true;
139                 break;
140             }
141         }
142         if (!found)
143             return false;
144     } else {
145         bool found = false;
146         const char *readDataBase = dataPtr + rangeStart;
147         // Example (continued from above):
148         // deviceSize is 4, so dataNeeded was max'ed to 4.
149         // maxStartPos = 4 - 3 + 1 = 2, and indeed
150         // we need to check for a match a positions 0 and 1 (ABCx and xABC).
151         const int maxStartPos = dataNeeded - valueLength + 1;
152         for (int i = 0; i < maxStartPos; ++i) {
153             const char *d = readDataBase + i;
154             bool valid = true;
155             for (int idx = 0; idx < valueLength; ++idx) {
156                 if (((*d++) & mask[idx]) != (valueData[idx] & mask[idx])) {
157                     valid = false;
158                     break;
159                 }
160             }
161             if (valid)
162                 found = true;
163         }
164         if (!found)
165             return false;
166     }
167     //qDebug() << "Found" << value << "in" << searchedData;
168     return true;
169 }
170 
matchString(const MimeMagicRulePrivate * d,const QByteArray & data)171 static bool matchString(const MimeMagicRulePrivate *d, const QByteArray &data)
172 {
173     const int rangeLength = d->endPos - d->startPos + 1;
174     return MimeMagicRule::matchSubstring(data.constData(), data.size(), d->startPos, rangeLength, d->pattern.size(), d->pattern.constData(), d->mask.constData());
175 }
176 
matchRegExp(const MimeMagicRulePrivate * d,const QByteArray & data)177 static bool matchRegExp(const MimeMagicRulePrivate *d, const QByteArray &data)
178 {
179     const QString str = QString::fromUtf8(data);
180     int length = d->endPos;
181     if (length == d->startPos)
182         length = -1; // from startPos to end of string
183     const QString subStr = str.left(length);
184     return d->regexp.match(subStr, d->startPos).hasMatch();
185 }
186 
187 template <typename T>
matchNumber(const MimeMagicRulePrivate * d,const QByteArray & data)188 static bool matchNumber(const MimeMagicRulePrivate *d, const QByteArray &data)
189 {
190     const T value(d->number);
191     const T mask(d->numberMask);
192 
193     //qDebug() << "matchNumber" << "0x" << QString::number(d->number, 16) << "size" << sizeof(T);
194     //qDebug() << "mask" << QString::number(d->numberMask, 16);
195 
196     const char *p = data.constData() + d->startPos;
197     const char *e = data.constData() + qMin(data.size() - int(sizeof(T)), d->endPos + 1);
198     for ( ; p <= e; ++p) {
199         if ((*reinterpret_cast<const T*>(p) & mask) == (value & mask))
200             return true;
201     }
202 
203     return false;
204 }
205 
makePattern(const QByteArray & value)206 static inline QByteArray makePattern(const QByteArray &value)
207 {
208     QByteArray pattern(value.size(), Qt::Uninitialized);
209     char *data = pattern.data();
210 
211     const char *p = value.constData();
212     const char *e = p + value.size();
213     for ( ; p < e; ++p) {
214         if (*p == '\\' && ++p < e) {
215             if (*p == 'x') { // hex (\\xff)
216                 char c = 0;
217                 for (int i = 0; i < 2 && p + 1 < e; ++i) {
218                     ++p;
219                     if (*p >= '0' && *p <= '9')
220                         c = (c << 4) + *p - '0';
221                     else if (*p >= 'a' && *p <= 'f')
222                         c = (c << 4) + *p - 'a' + 10;
223                     else if (*p >= 'A' && *p <= 'F')
224                         c = (c << 4) + *p - 'A' + 10;
225                     else
226                         continue;
227                 }
228                 *data++ = c;
229             } else if (*p >= '0' && *p <= '7') { // oct (\\7, or \\77, or \\377)
230                 char c = *p - '0';
231                 if (p + 1 < e && p[1] >= '0' && p[1] <= '7') {
232                     c = (c << 3) + *(++p) - '0';
233                     if (p + 1 < e && p[1] >= '0' && p[1] <= '7' && p[-1] <= '3')
234                         c = (c << 3) + *(++p) - '0';
235                 }
236                 *data++ = c;
237             } else if (*p == 'n') {
238                 *data++ = '\n';
239             } else if (*p == 'r') {
240                 *data++ = '\r';
241             } else { // escaped
242                 *data++ = *p;
243             }
244         } else {
245             *data++ = *p;
246         }
247     }
248     pattern.truncate(data - pattern.data());
249 
250     return pattern;
251 }
252 
MimeMagicRule(MimeMagicRule::Type theType,const QByteArray & theValue,int theStartPos,int theEndPos,const QByteArray & theMask,QString * errorString)253 MimeMagicRule::MimeMagicRule(MimeMagicRule::Type theType,
254                              const QByteArray &theValue,
255                              int theStartPos,
256                              int theEndPos,
257                              const QByteArray &theMask,
258                              QString *errorString) :
259     d(new MimeMagicRulePrivate)
260 {
261     d->type = theType;
262     d->value = theValue;
263     d->startPos = theStartPos;
264     d->endPos = theEndPos;
265     d->mask = theMask;
266     d->matchFunction = nullptr;
267 
268     if (d->value.isEmpty()) {
269         d->type = Invalid;
270         if (errorString)
271             *errorString = QLatin1String("Invalid empty magic rule value");
272         return;
273     }
274 
275     if (d->type >= Host16 && d->type <= Byte) {
276         bool ok;
277         d->number = d->value.toUInt(&ok, 0); // autodetect
278         if (!ok) {
279             d->type = Invalid;
280             if (errorString)
281                 *errorString = QString::fromLatin1("Invalid magic rule value \"%1\"").arg(
282                         QString::fromLatin1(d->value));
283             return;
284         }
285         d->numberMask = !d->mask.isEmpty() ? d->mask.toUInt(&ok, 0) : 0; // autodetect
286     }
287 
288     switch (d->type) {
289     case String:
290         d->pattern = makePattern(d->value);
291         d->pattern.squeeze();
292         if (!d->mask.isEmpty()) {
293             if (d->mask.size() < 4 || !d->mask.startsWith("0x")) {
294                 d->type = Invalid;
295                 if (errorString)
296                     *errorString = QString::fromLatin1("Invalid magic rule mask \"%1\"").arg(
297                             QString::fromLatin1(d->mask));
298                 return;
299             }
300             const QByteArray &tempMask = QByteArray::fromHex(QByteArray::fromRawData(
301                                                      d->mask.constData() + 2, d->mask.size() - 2));
302             if (tempMask.size() != d->pattern.size()) {
303                 d->type = Invalid;
304                 if (errorString)
305                     *errorString = QString::fromLatin1("Invalid magic rule mask size \"%1\"").arg(
306                             QString::fromLatin1(d->mask));
307                 return;
308             }
309             d->mask = tempMask;
310         } else {
311             d->mask.fill(char(-1), d->pattern.size());
312         }
313         d->mask.squeeze();
314         d->matchFunction = matchString;
315         break;
316     case RegExp:
317         d->regexp.setPatternOptions(QRegularExpression::MultilineOption
318                                     | QRegularExpression::DotMatchesEverythingOption
319                                     );
320         d->regexp.setPattern(QString::fromUtf8(d->value));
321         if (!d->regexp.isValid()) {
322             d->type = Invalid;
323             if (errorString)
324                 *errorString = QString::fromLatin1("Invalid magic rule regexp value \"%1\"").arg(
325                         QString::fromLatin1(d->value));
326             return;
327         }
328         d->matchFunction = matchRegExp;
329         break;
330     case Byte:
331         if (d->number <= quint8(-1)) {
332             if (d->numberMask == 0)
333                 d->numberMask = quint8(-1);
334             d->matchFunction = matchNumber<quint8>;
335         }
336         break;
337     case Big16:
338     case Host16:
339     case Little16:
340         if (d->number <= quint16(-1)) {
341             d->number = d->type == Little16 ? qFromLittleEndian<quint16>(d->number) : qFromBigEndian<quint16>(d->number);
342             if (d->numberMask == 0)
343                 d->numberMask = quint16(-1);
344             d->matchFunction = matchNumber<quint16>;
345         }
346         break;
347     case Big32:
348     case Host32:
349     case Little32:
350         if (d->number <= quint32(-1)) {
351             d->number = d->type == Little32 ? qFromLittleEndian<quint32>(d->number) : qFromBigEndian<quint32>(d->number);
352             if (d->numberMask == 0)
353                 d->numberMask = quint32(-1);
354             d->matchFunction = matchNumber<quint32>;
355         }
356         break;
357     default:
358         break;
359     }
360 }
361 
MimeMagicRule(const MimeMagicRule & other)362 MimeMagicRule::MimeMagicRule(const MimeMagicRule &other)
363     : m_subMatches(other.m_subMatches)
364     , d(new MimeMagicRulePrivate(*other.d))
365 {
366 }
367 
368 MimeMagicRule::~MimeMagicRule() = default;
369 
operator =(const MimeMagicRule & other)370 MimeMagicRule &MimeMagicRule::operator=(const MimeMagicRule &other)
371 {
372     *d = *other.d;
373     m_subMatches = other.m_subMatches;
374     return *this;
375 }
376 
operator ==(const MimeMagicRule & other) const377 bool MimeMagicRule::operator==(const MimeMagicRule &other) const
378 {
379     return (d == other.d || *d == *other.d) && m_subMatches == other.m_subMatches;
380 }
381 
type() const382 MimeMagicRule::Type MimeMagicRule::type() const
383 {
384     return d->type;
385 }
386 
value() const387 QByteArray MimeMagicRule::value() const
388 {
389     return d->value;
390 }
391 
startPos() const392 int MimeMagicRule::startPos() const
393 {
394     return d->startPos;
395 }
396 
endPos() const397 int MimeMagicRule::endPos() const
398 {
399     return d->endPos;
400 }
401 
mask() const402 QByteArray MimeMagicRule::mask() const
403 {
404     QByteArray result = d->mask;
405     if (d->type == String) {
406         // restore '0x'
407         result = "0x" + result.toHex();
408     }
409     return result;
410 }
411 
isValid() const412 bool MimeMagicRule::isValid() const
413 {
414     return d->matchFunction;
415 }
416 
matches(const QByteArray & data) const417 bool MimeMagicRule::matches(const QByteArray &data) const
418 {
419     const bool ok = d->matchFunction && d->matchFunction(d.data(), data);
420     if (!ok)
421         return false;
422 
423     // No submatch? Then we are done.
424     if (m_subMatches.isEmpty())
425         return true;
426 
427     //qDebug() << "Checking" << m_subMatches.count() << "sub-rules";
428     // Check that one of the submatches matches too
429     for ( QList<MimeMagicRule>::const_iterator it = m_subMatches.begin(), end = m_subMatches.end() ;
430           it != end ; ++it ) {
431         if ((*it).matches(data)) {
432             // One of the hierarchies matched -> mimetype recognized.
433             return true;
434         }
435     }
436     return false;
437 
438 
439 }
440