1 /****************************************************************************
2 **
3 ** Copyright (C) 2015 The Qt Company Ltd.
4 ** Contact: http://www.qt.io/licensing/
5 **
6 ** This file is part of the QtCore module of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see http://www.qt.io/terms-conditions. For further
15 ** information use the contact form at http://www.qt.io/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 2.1 or version 3 as published by the Free
20 ** Software Foundation and appearing in the file LICENSE.LGPLv21 and
21 ** LICENSE.LGPLv3 included in the packaging of this file. Please review the
22 ** following information to ensure the GNU Lesser General Public License
23 ** requirements will be met: https://www.gnu.org/licenses/lgpl.html and
24 ** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
25 **
26 ** As a special exception, The Qt Company gives you certain additional
27 ** rights. These rights are described in The Qt Company LGPL Exception
28 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
29 **
30 ** GNU General Public License Usage
31 ** Alternatively, this file may be used under the terms of the GNU
32 ** General Public License version 3.0 as published by the Free Software
33 ** Foundation and appearing in the file LICENSE.GPL included in the
34 ** packaging of this file. Please review the following information to
35 ** ensure the GNU General Public License version 3.0 requirements will be
36 ** met: http://www.gnu.org/copyleft/gpl.html.
37 **
38 ** $QT_END_LICENSE$
39 **
40 ****************************************************************************/
41
42 #include "qbytearraymatcher.h"
43
44 #include <limits.h>
45
46 QT_BEGIN_NAMESPACE
47
bm_init_skiptable(const uchar * cc,int len,uchar * skiptable)48 static inline void bm_init_skiptable(const uchar *cc, int len, uchar *skiptable)
49 {
50 int l = qMin(len, 255);
51 memset(skiptable, l, 256*sizeof(uchar));
52 cc += len - l;
53 while (l--)
54 skiptable[*cc++] = l;
55 }
56
bm_find(const uchar * cc,int l,int index,const uchar * puc,uint pl,const uchar * skiptable)57 static inline int bm_find(const uchar *cc, int l, int index, const uchar *puc, uint pl,
58 const uchar *skiptable)
59 {
60 if (pl == 0)
61 return index > l ? -1 : index;
62 const uint pl_minus_one = pl - 1;
63
64 const uchar *current = cc + index + pl_minus_one;
65 const uchar *end = cc + l;
66 while (current < end) {
67 uint skip = skiptable[*current];
68 if (!skip) {
69 // possible match
70 while (skip < pl) {
71 if (*(current - skip) != puc[pl_minus_one - skip])
72 break;
73 skip++;
74 }
75 if (skip > pl_minus_one) // we have a match
76 return (current - cc) - skip + 1;
77
78 // in case we don't have a match we are a bit inefficient as we only skip by one
79 // when we have the non matching char in the string.
80 if (skiptable[*(current - skip)] == pl)
81 skip = pl - skip;
82 else
83 skip = 1;
84 }
85 if (current > end - skip)
86 break;
87 current += skip;
88 }
89 return -1; // not found
90 }
91
92 /*! \class QByteArrayMatcher
93 \brief The QByteArrayMatcher class holds a sequence of bytes that
94 can be quickly matched in a byte array.
95
96 \ingroup tools
97 \ingroup string-processing
98
99 This class is useful when you have a sequence of bytes that you
100 want to repeatedly match against some byte arrays (perhaps in a
101 loop), or when you want to search for the same sequence of bytes
102 multiple times in the same byte array. Using a matcher object and
103 indexIn() is faster than matching a plain QByteArray with
104 QByteArray::indexOf() if repeated matching takes place. This
105 class offers no benefit if you are doing one-off byte array
106 matches.
107
108 Create the QByteArrayMatcher with the QByteArray you want to
109 search for. Then call indexIn() on the QByteArray that you want to
110 search.
111
112 \sa QByteArray, QStringMatcher
113 */
114
115 /*!
116 Constructs an empty byte array matcher that won't match anything.
117 Call setPattern() to give it a pattern to match.
118 */
QByteArrayMatcher()119 QByteArrayMatcher::QByteArrayMatcher()
120 : d(0)
121 {
122 p.p = 0;
123 p.l = 0;
124 qMemSet(p.q_skiptable, 0, sizeof(p.q_skiptable));
125 }
126
127 /*!
128 Constructs a byte array matcher from \a pattern. \a pattern
129 has the given \a length. \a pattern must remain in scope, but
130 the destructor does not delete \a pattern.
131 */
QByteArrayMatcher(const char * pattern,int length)132 QByteArrayMatcher::QByteArrayMatcher(const char *pattern, int length)
133 : d(0)
134 {
135 p.p = reinterpret_cast<const uchar *>(pattern);
136 p.l = length;
137 bm_init_skiptable(p.p, p.l, p.q_skiptable);
138 }
139
140 /*!
141 Constructs a byte array matcher that will search for \a pattern.
142 Call indexIn() to perform a search.
143 */
QByteArrayMatcher(const QByteArray & pattern)144 QByteArrayMatcher::QByteArrayMatcher(const QByteArray &pattern)
145 : d(0), q_pattern(pattern)
146 {
147 p.p = reinterpret_cast<const uchar *>(pattern.constData());
148 p.l = pattern.size();
149 bm_init_skiptable(p.p, p.l, p.q_skiptable);
150 }
151
152 /*!
153 Copies the \a other byte array matcher to this byte array matcher.
154 */
QByteArrayMatcher(const QByteArrayMatcher & other)155 QByteArrayMatcher::QByteArrayMatcher(const QByteArrayMatcher &other)
156 : d(0)
157 {
158 operator=(other);
159 }
160
161 /*!
162 Destroys the byte array matcher.
163 */
~QByteArrayMatcher()164 QByteArrayMatcher::~QByteArrayMatcher()
165 {
166 }
167
168 /*!
169 Assigns the \a other byte array matcher to this byte array matcher.
170 */
operator =(const QByteArrayMatcher & other)171 QByteArrayMatcher &QByteArrayMatcher::operator=(const QByteArrayMatcher &other)
172 {
173 q_pattern = other.q_pattern;
174 memcpy(&p, &other.p, sizeof(p));
175 return *this;
176 }
177
178 /*!
179 Sets the byte array that this byte array matcher will search for
180 to \a pattern.
181
182 \sa pattern(), indexIn()
183 */
setPattern(const QByteArray & pattern)184 void QByteArrayMatcher::setPattern(const QByteArray &pattern)
185 {
186 q_pattern = pattern;
187 p.p = reinterpret_cast<const uchar *>(pattern.constData());
188 p.l = pattern.size();
189 bm_init_skiptable(p.p, p.l, p.q_skiptable);
190 }
191
192 /*!
193 Searches the byte array \a ba, from byte position \a from (default
194 0, i.e. from the first byte), for the byte array pattern() that
195 was set in the constructor or in the most recent call to
196 setPattern(). Returns the position where the pattern() matched in
197 \a ba, or -1 if no match was found.
198 */
indexIn(const QByteArray & ba,int from) const199 int QByteArrayMatcher::indexIn(const QByteArray &ba, int from) const
200 {
201 if (from < 0)
202 from = 0;
203 return bm_find(reinterpret_cast<const uchar *>(ba.constData()), ba.size(), from,
204 p.p, p.l, p.q_skiptable);
205 }
206
207 /*!
208 Searches the char string \a str, which has length \a len, from
209 byte position \a from (default 0, i.e. from the first byte), for
210 the byte array pattern() that was set in the constructor or in the
211 most recent call to setPattern(). Returns the position where the
212 pattern() matched in \a str, or -1 if no match was found.
213 */
indexIn(const char * str,int len,int from) const214 int QByteArrayMatcher::indexIn(const char *str, int len, int from) const
215 {
216 if (from < 0)
217 from = 0;
218 return bm_find(reinterpret_cast<const uchar *>(str), len, from,
219 p.p, p.l, p.q_skiptable);
220 }
221
222 /*!
223 \fn QByteArray QByteArrayMatcher::pattern() const
224
225 Returns the byte array pattern that this byte array matcher will
226 search for.
227
228 \sa setPattern()
229 */
230
231
findChar(const char * str,int len,char ch,int from)232 static int findChar(const char *str, int len, char ch, int from)
233 {
234 const uchar *s = (const uchar *)str;
235 uchar c = (uchar)ch;
236 if (from < 0)
237 from = qMax(from + len, 0);
238 if (from < len) {
239 const uchar *n = s + from - 1;
240 const uchar *e = s + len;
241 while (++n != e)
242 if (*n == c)
243 return n - s;
244 }
245 return -1;
246 }
247
248 /*! \internal
249 */
qFindByteArrayBoyerMoore(const char * haystack,int haystackLen,int haystackOffset,const char * needle,int needleLen)250 static int qFindByteArrayBoyerMoore(
251 const char *haystack, int haystackLen, int haystackOffset,
252 const char *needle, int needleLen)
253 {
254 uchar skiptable[256];
255 bm_init_skiptable((const uchar *)needle, needleLen, skiptable);
256 if (haystackOffset < 0)
257 haystackOffset = 0;
258 return bm_find((const uchar *)haystack, haystackLen, haystackOffset,
259 (const uchar *)needle, needleLen, skiptable);
260 }
261
262 #define REHASH(a) \
263 if (sl_minus_1 < sizeof(uint) * CHAR_BIT) \
264 hashHaystack -= (a) << sl_minus_1; \
265 hashHaystack <<= 1
266
267 /*! \internal
268 */
qFindByteArray(const char * haystack0,int haystackLen,int from,const char * needle,int needleLen)269 int qFindByteArray(
270 const char *haystack0, int haystackLen, int from,
271 const char *needle, int needleLen)
272 {
273 const int l = haystackLen;
274 const int sl = needleLen;
275 if (from < 0)
276 from += l;
277 if (uint(sl + from) > (uint)l)
278 return -1;
279 if (!sl)
280 return from;
281 if (!l)
282 return -1;
283
284 if (sl == 1)
285 return findChar(haystack0, haystackLen, needle[0], from);
286
287 /*
288 We use the Boyer-Moore algorithm in cases where the overhead
289 for the skip table should pay off, otherwise we use a simple
290 hash function.
291 */
292 if (l > 500 && sl > 5)
293 return qFindByteArrayBoyerMoore(haystack0, haystackLen, from,
294 needle, needleLen);
295
296 /*
297 We use some hashing for efficiency's sake. Instead of
298 comparing strings, we compare the hash value of str with that
299 of a part of this QString. Only if that matches, we call memcmp().
300 */
301 const char *haystack = haystack0 + from;
302 const char *end = haystack0 + (l - sl);
303 const uint sl_minus_1 = sl - 1;
304 uint hashNeedle = 0, hashHaystack = 0;
305 int idx;
306 for (idx = 0; idx < sl; ++idx) {
307 hashNeedle = ((hashNeedle<<1) + needle[idx]);
308 hashHaystack = ((hashHaystack<<1) + haystack[idx]);
309 }
310 hashHaystack -= *(haystack + sl_minus_1);
311
312 while (haystack <= end) {
313 hashHaystack += *(haystack + sl_minus_1);
314 if (hashHaystack == hashNeedle && *needle == *haystack
315 && memcmp(needle, haystack, sl) == 0)
316 return haystack - haystack0;
317
318 REHASH(*haystack);
319 ++haystack;
320 }
321 return -1;
322 }
323
324 QT_END_NAMESPACE
325