1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 The Qt Company Ltd.
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of the QtCore module of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see https://www.qt.io/terms-conditions. For further
15 ** information use the contact form at https://www.qt.io/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 3 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL3 included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 3 requirements
23 ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24 **
25 ** GNU General Public License Usage
26 ** Alternatively, this file may be used under the terms of the GNU
27 ** General Public License version 2.0 or (at your option) the GNU General
28 ** Public license version 3 or any later version approved by the KDE Free
29 ** Qt Foundation. The licenses are as published by the Free Software
30 ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31 ** included in the packaging of this file. Please review the following
32 ** information to ensure the GNU General Public License requirements will
33 ** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34 ** https://www.gnu.org/licenses/gpl-3.0.html.
35 **
36 ** $QT_END_LICENSE$
37 **
38 ****************************************************************************/
39
40 #include "mimemagicrule_p.h"
41
42 #include <QtCore/QList>
43 #include <QtCore/QRegularExpression>
44 #include <QtCore/QDebug>
45 #include <qendian.h>
46
47 using namespace Utils;
48 using namespace Utils::Internal;
49
50 // in the same order as Type!
51 static const char magicRuleTypes_string[] =
52 "invalid\0"
53 "string\0"
54 "regexp\0"
55 "host16\0"
56 "host32\0"
57 "big16\0"
58 "big32\0"
59 "little16\0"
60 "little32\0"
61 "byte\0"
62 "\0";
63
64 static const int magicRuleTypes_indices[] = {
65 0, 8, 15, 22, 29, 36, 42, 48, 57, 66, 71, 0
66 };
67
type(const QByteArray & theTypeName)68 MimeMagicRule::Type MimeMagicRule::type(const QByteArray &theTypeName)
69 {
70 for (int i = String; i <= Byte; ++i) {
71 if (theTypeName == magicRuleTypes_string + magicRuleTypes_indices[i])
72 return Type(i);
73 }
74 return Invalid;
75 }
76
typeName(MimeMagicRule::Type theType)77 QByteArray MimeMagicRule::typeName(MimeMagicRule::Type theType)
78 {
79 return magicRuleTypes_string + magicRuleTypes_indices[theType];
80 }
81
82 namespace Utils {
83 namespace Internal {
84
85 class MimeMagicRulePrivate
86 {
87 public:
88 bool operator==(const MimeMagicRulePrivate &other) const;
89
90 MimeMagicRule::Type type;
91 QByteArray value;
92 int startPos;
93 int endPos;
94 QByteArray mask;
95
96 QRegularExpression regexp;
97 QByteArray pattern;
98 quint32 number;
99 quint32 numberMask;
100
101 using MatchFunction = bool (*)(const MimeMagicRulePrivate*, const QByteArray&);
102 MatchFunction matchFunction;
103 };
104
operator ==(const MimeMagicRulePrivate & other) const105 bool MimeMagicRulePrivate::operator==(const MimeMagicRulePrivate &other) const
106 {
107 return type == other.type &&
108 value == other.value &&
109 startPos == other.startPos &&
110 endPos == other.endPos &&
111 mask == other.mask &&
112 pattern == other.pattern &&
113 number == other.number &&
114 numberMask == other.numberMask &&
115 matchFunction == other.matchFunction;
116 }
117
118 } // Internal
119 } // Utils
120
121 // Used by both providers
matchSubstring(const char * dataPtr,int dataSize,int rangeStart,int rangeLength,int valueLength,const char * valueData,const char * mask)122 bool MimeMagicRule::matchSubstring(const char *dataPtr, int dataSize, int rangeStart, int rangeLength,
123 int valueLength, const char *valueData, const char *mask)
124 {
125 // Size of searched data.
126 // Example: value="ABC", rangeLength=3 -> we need 3+3-1=5 bytes (ABCxx,xABCx,xxABC would match)
127 const int dataNeeded = qMin(rangeLength + valueLength - 1, dataSize - rangeStart);
128
129 if (!mask) {
130 // callgrind says QByteArray::indexOf is much slower, since our strings are typically too
131 // short for be worth Boyer-Moore matching (1 to 71 bytes, 11 bytes on average).
132 bool found = false;
133 for (int i = rangeStart; i < rangeStart + rangeLength; ++i) {
134 if (i + valueLength > dataSize)
135 break;
136
137 if (memcmp(valueData, dataPtr + i, valueLength) == 0) {
138 found = true;
139 break;
140 }
141 }
142 if (!found)
143 return false;
144 } else {
145 bool found = false;
146 const char *readDataBase = dataPtr + rangeStart;
147 // Example (continued from above):
148 // deviceSize is 4, so dataNeeded was max'ed to 4.
149 // maxStartPos = 4 - 3 + 1 = 2, and indeed
150 // we need to check for a match a positions 0 and 1 (ABCx and xABC).
151 const int maxStartPos = dataNeeded - valueLength + 1;
152 for (int i = 0; i < maxStartPos; ++i) {
153 const char *d = readDataBase + i;
154 bool valid = true;
155 for (int idx = 0; idx < valueLength; ++idx) {
156 if (((*d++) & mask[idx]) != (valueData[idx] & mask[idx])) {
157 valid = false;
158 break;
159 }
160 }
161 if (valid)
162 found = true;
163 }
164 if (!found)
165 return false;
166 }
167 //qDebug() << "Found" << value << "in" << searchedData;
168 return true;
169 }
170
matchString(const MimeMagicRulePrivate * d,const QByteArray & data)171 static bool matchString(const MimeMagicRulePrivate *d, const QByteArray &data)
172 {
173 const int rangeLength = d->endPos - d->startPos + 1;
174 return MimeMagicRule::matchSubstring(data.constData(), data.size(), d->startPos, rangeLength, d->pattern.size(), d->pattern.constData(), d->mask.constData());
175 }
176
matchRegExp(const MimeMagicRulePrivate * d,const QByteArray & data)177 static bool matchRegExp(const MimeMagicRulePrivate *d, const QByteArray &data)
178 {
179 const QString str = QString::fromUtf8(data);
180 int length = d->endPos;
181 if (length == d->startPos)
182 length = -1; // from startPos to end of string
183 const QString subStr = str.left(length);
184 return d->regexp.match(subStr, d->startPos).hasMatch();
185 }
186
187 template <typename T>
matchNumber(const MimeMagicRulePrivate * d,const QByteArray & data)188 static bool matchNumber(const MimeMagicRulePrivate *d, const QByteArray &data)
189 {
190 const T value(d->number);
191 const T mask(d->numberMask);
192
193 //qDebug() << "matchNumber" << "0x" << QString::number(d->number, 16) << "size" << sizeof(T);
194 //qDebug() << "mask" << QString::number(d->numberMask, 16);
195
196 const char *p = data.constData() + d->startPos;
197 const char *e = data.constData() + qMin(data.size() - int(sizeof(T)), d->endPos + 1);
198 for ( ; p <= e; ++p) {
199 if ((*reinterpret_cast<const T*>(p) & mask) == (value & mask))
200 return true;
201 }
202
203 return false;
204 }
205
makePattern(const QByteArray & value)206 static inline QByteArray makePattern(const QByteArray &value)
207 {
208 QByteArray pattern(value.size(), Qt::Uninitialized);
209 char *data = pattern.data();
210
211 const char *p = value.constData();
212 const char *e = p + value.size();
213 for ( ; p < e; ++p) {
214 if (*p == '\\' && ++p < e) {
215 if (*p == 'x') { // hex (\\xff)
216 char c = 0;
217 for (int i = 0; i < 2 && p + 1 < e; ++i) {
218 ++p;
219 if (*p >= '0' && *p <= '9')
220 c = (c << 4) + *p - '0';
221 else if (*p >= 'a' && *p <= 'f')
222 c = (c << 4) + *p - 'a' + 10;
223 else if (*p >= 'A' && *p <= 'F')
224 c = (c << 4) + *p - 'A' + 10;
225 else
226 continue;
227 }
228 *data++ = c;
229 } else if (*p >= '0' && *p <= '7') { // oct (\\7, or \\77, or \\377)
230 char c = *p - '0';
231 if (p + 1 < e && p[1] >= '0' && p[1] <= '7') {
232 c = (c << 3) + *(++p) - '0';
233 if (p + 1 < e && p[1] >= '0' && p[1] <= '7' && p[-1] <= '3')
234 c = (c << 3) + *(++p) - '0';
235 }
236 *data++ = c;
237 } else if (*p == 'n') {
238 *data++ = '\n';
239 } else if (*p == 'r') {
240 *data++ = '\r';
241 } else { // escaped
242 *data++ = *p;
243 }
244 } else {
245 *data++ = *p;
246 }
247 }
248 pattern.truncate(data - pattern.data());
249
250 return pattern;
251 }
252
MimeMagicRule(MimeMagicRule::Type theType,const QByteArray & theValue,int theStartPos,int theEndPos,const QByteArray & theMask,QString * errorString)253 MimeMagicRule::MimeMagicRule(MimeMagicRule::Type theType,
254 const QByteArray &theValue,
255 int theStartPos,
256 int theEndPos,
257 const QByteArray &theMask,
258 QString *errorString) :
259 d(new MimeMagicRulePrivate)
260 {
261 d->type = theType;
262 d->value = theValue;
263 d->startPos = theStartPos;
264 d->endPos = theEndPos;
265 d->mask = theMask;
266 d->matchFunction = nullptr;
267
268 if (d->value.isEmpty()) {
269 d->type = Invalid;
270 if (errorString)
271 *errorString = QLatin1String("Invalid empty magic rule value");
272 return;
273 }
274
275 if (d->type >= Host16 && d->type <= Byte) {
276 bool ok;
277 d->number = d->value.toUInt(&ok, 0); // autodetect
278 if (!ok) {
279 d->type = Invalid;
280 if (errorString)
281 *errorString = QString::fromLatin1("Invalid magic rule value \"%1\"").arg(
282 QString::fromLatin1(d->value));
283 return;
284 }
285 d->numberMask = !d->mask.isEmpty() ? d->mask.toUInt(&ok, 0) : 0; // autodetect
286 }
287
288 switch (d->type) {
289 case String:
290 d->pattern = makePattern(d->value);
291 d->pattern.squeeze();
292 if (!d->mask.isEmpty()) {
293 if (d->mask.size() < 4 || !d->mask.startsWith("0x")) {
294 d->type = Invalid;
295 if (errorString)
296 *errorString = QString::fromLatin1("Invalid magic rule mask \"%1\"").arg(
297 QString::fromLatin1(d->mask));
298 return;
299 }
300 const QByteArray &tempMask = QByteArray::fromHex(QByteArray::fromRawData(
301 d->mask.constData() + 2, d->mask.size() - 2));
302 if (tempMask.size() != d->pattern.size()) {
303 d->type = Invalid;
304 if (errorString)
305 *errorString = QString::fromLatin1("Invalid magic rule mask size \"%1\"").arg(
306 QString::fromLatin1(d->mask));
307 return;
308 }
309 d->mask = tempMask;
310 } else {
311 d->mask.fill(char(-1), d->pattern.size());
312 }
313 d->mask.squeeze();
314 d->matchFunction = matchString;
315 break;
316 case RegExp:
317 d->regexp.setPatternOptions(QRegularExpression::MultilineOption
318 | QRegularExpression::DotMatchesEverythingOption
319 );
320 d->regexp.setPattern(QString::fromUtf8(d->value));
321 if (!d->regexp.isValid()) {
322 d->type = Invalid;
323 if (errorString)
324 *errorString = QString::fromLatin1("Invalid magic rule regexp value \"%1\"").arg(
325 QString::fromLatin1(d->value));
326 return;
327 }
328 d->matchFunction = matchRegExp;
329 break;
330 case Byte:
331 if (d->number <= quint8(-1)) {
332 if (d->numberMask == 0)
333 d->numberMask = quint8(-1);
334 d->matchFunction = matchNumber<quint8>;
335 }
336 break;
337 case Big16:
338 case Host16:
339 case Little16:
340 if (d->number <= quint16(-1)) {
341 d->number = d->type == Little16 ? qFromLittleEndian<quint16>(d->number) : qFromBigEndian<quint16>(d->number);
342 if (d->numberMask == 0)
343 d->numberMask = quint16(-1);
344 d->matchFunction = matchNumber<quint16>;
345 }
346 break;
347 case Big32:
348 case Host32:
349 case Little32:
350 if (d->number <= quint32(-1)) {
351 d->number = d->type == Little32 ? qFromLittleEndian<quint32>(d->number) : qFromBigEndian<quint32>(d->number);
352 if (d->numberMask == 0)
353 d->numberMask = quint32(-1);
354 d->matchFunction = matchNumber<quint32>;
355 }
356 break;
357 default:
358 break;
359 }
360 }
361
MimeMagicRule(const MimeMagicRule & other)362 MimeMagicRule::MimeMagicRule(const MimeMagicRule &other)
363 : m_subMatches(other.m_subMatches)
364 , d(new MimeMagicRulePrivate(*other.d))
365 {
366 }
367
368 MimeMagicRule::~MimeMagicRule() = default;
369
operator =(const MimeMagicRule & other)370 MimeMagicRule &MimeMagicRule::operator=(const MimeMagicRule &other)
371 {
372 *d = *other.d;
373 m_subMatches = other.m_subMatches;
374 return *this;
375 }
376
operator ==(const MimeMagicRule & other) const377 bool MimeMagicRule::operator==(const MimeMagicRule &other) const
378 {
379 return (d == other.d || *d == *other.d) && m_subMatches == other.m_subMatches;
380 }
381
type() const382 MimeMagicRule::Type MimeMagicRule::type() const
383 {
384 return d->type;
385 }
386
value() const387 QByteArray MimeMagicRule::value() const
388 {
389 return d->value;
390 }
391
startPos() const392 int MimeMagicRule::startPos() const
393 {
394 return d->startPos;
395 }
396
endPos() const397 int MimeMagicRule::endPos() const
398 {
399 return d->endPos;
400 }
401
mask() const402 QByteArray MimeMagicRule::mask() const
403 {
404 QByteArray result = d->mask;
405 if (d->type == String) {
406 // restore '0x'
407 result = "0x" + result.toHex();
408 }
409 return result;
410 }
411
isValid() const412 bool MimeMagicRule::isValid() const
413 {
414 return d->matchFunction;
415 }
416
matches(const QByteArray & data) const417 bool MimeMagicRule::matches(const QByteArray &data) const
418 {
419 const bool ok = d->matchFunction && d->matchFunction(d.data(), data);
420 if (!ok)
421 return false;
422
423 // No submatch? Then we are done.
424 if (m_subMatches.isEmpty())
425 return true;
426
427 //qDebug() << "Checking" << m_subMatches.count() << "sub-rules";
428 // Check that one of the submatches matches too
429 for ( QList<MimeMagicRule>::const_iterator it = m_subMatches.begin(), end = m_subMatches.end() ;
430 it != end ; ++it ) {
431 if ((*it).matches(data)) {
432 // One of the hierarchies matched -> mimetype recognized.
433 return true;
434 }
435 }
436 return false;
437
438
439 }
440