1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 The Qt Company Ltd.
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of the QtQml module of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see https://www.qt.io/terms-conditions. For further
15 ** information use the contact form at https://www.qt.io/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 3 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPL3 included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 3 requirements
23 ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
24 **
25 ** GNU General Public License Usage
26 ** Alternatively, this file may be used under the terms of the GNU
27 ** General Public License version 2.0 or (at your option) the GNU General
28 ** Public license version 3 or any later version approved by the KDE Free
29 ** Qt Foundation. The licenses are as published by the Free Software
30 ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
31 ** included in the packaging of this file. Please review the following
32 ** information to ensure the GNU General Public License requirements will
33 ** be met: https://www.gnu.org/licenses/gpl-2.0.html and
34 ** https://www.gnu.org/licenses/gpl-3.0.html.
35 **
36 ** $QT_END_LICENSE$
37 **
38 ****************************************************************************/
39 
40 #include "qv4regexp_p.h"
41 #include "qv4engine_p.h"
42 #include "qv4scopedvalue_p.h"
43 #include <private/qv4mm_p.h>
44 #include <runtime/VM.h>
45 
46 using namespace QV4;
47 
jscFlags(uint flags)48 static JSC::RegExpFlags jscFlags(uint flags)
49 {
50     JSC::RegExpFlags jscFlags = JSC::NoFlags;
51     if (flags & CompiledData::RegExp::RegExp_Global)
52         jscFlags = static_cast<JSC::RegExpFlags>(flags | JSC::FlagGlobal);
53     if (flags & CompiledData::RegExp::RegExp_IgnoreCase)
54         jscFlags = static_cast<JSC::RegExpFlags>(flags | JSC::FlagIgnoreCase);
55     if (flags & CompiledData::RegExp::RegExp_Multiline)
56         jscFlags = static_cast<JSC::RegExpFlags>(flags | JSC::FlagMultiline);
57     if (flags & CompiledData::RegExp::RegExp_Unicode)
58         jscFlags = static_cast<JSC::RegExpFlags>(flags | JSC::FlagUnicode);
59     if (flags & CompiledData::RegExp::RegExp_Sticky)
60         jscFlags = static_cast<JSC::RegExpFlags>(flags | JSC::FlagSticky);
61     return jscFlags;
62 }
63 
~RegExpCache()64 RegExpCache::~RegExpCache()
65 {
66     for (RegExpCache::Iterator it = begin(), e = end(); it != e; ++it) {
67         if (RegExp *re = it.value().as<RegExp>())
68             re->d()->cache = nullptr;
69     }
70 }
71 
72 DEFINE_MANAGED_VTABLE(RegExp);
73 
match(const QString & string,int start,uint * matchOffsets)74 uint RegExp::match(const QString &string, int start, uint *matchOffsets)
75 {
76     if (!isValid())
77         return JSC::Yarr::offsetNoMatch;
78 
79     WTF::String s(string);
80 
81 #if ENABLE(YARR_JIT)
82     static const uint offsetJITFail = std::numeric_limits<unsigned>::max() - 1;
83     auto *priv = d();
84     if (priv->hasValidJITCode()) {
85         uint ret = JSC::Yarr::offsetNoMatch;
86 #if ENABLE(YARR_JIT_ALL_PARENS_EXPRESSIONS)
87         char buffer[8192];
88         ret = uint(priv->jitCode->execute(s.characters16(), start, s.length(),
89                                           (int*)matchOffsets, buffer, 8192).start);
90 #else
91         ret = uint(priv->jitCode->execute(s.characters16(), start, s.length(),
92                                           (int*)matchOffsets).start);
93 #endif
94         if (ret != offsetJITFail)
95             return ret;
96 
97         // JIT failed. We need byteCode to run the interpreter.
98         if (!priv->byteCode) {
99             JSC::Yarr::ErrorCode error = JSC::Yarr::ErrorCode::NoError;
100             JSC::Yarr::YarrPattern yarrPattern(WTF::String(*priv->pattern), jscFlags(priv->flags),
101                                                error);
102 
103             // As we successfully parsed the pattern before, we should still be able to.
104             Q_ASSERT(error == JSC::Yarr::ErrorCode::NoError);
105 
106             priv->byteCode = JSC::Yarr::byteCompile(
107                                      yarrPattern,
108                                      priv->internalClass->engine->bumperPointerAllocator).release();
109         }
110     }
111 #endif // ENABLE(YARR_JIT)
112 
113     return JSC::Yarr::interpret(byteCode(), s.characters16(), string.length(), start, matchOffsets);
114 }
115 
getSubstitution(const QString & matched,const QString & str,int position,const Value * captures,int nCaptures,const QString & replacement)116 QString RegExp::getSubstitution(const QString &matched, const QString &str, int position, const Value *captures, int nCaptures, const QString &replacement)
117 {
118     QString result;
119 
120     int matchedLength = matched.length();
121     Q_ASSERT(position >= 0 && position <= str.length());
122     int tailPos = position + matchedLength;
123     int seenDollar = -1;
124     for (int i = 0; i < replacement.length(); ++i) {
125         QChar ch = replacement.at(i);
126         if (seenDollar >= 0) {
127             if (ch.unicode() == '$') {
128                 result += QLatin1Char('$');
129             } else if (ch.unicode() == '&') {
130                 result += matched;
131             } else if (ch.unicode() == '`') {
132                 result += str.left(position);
133             } else if (ch.unicode() == '\'') {
134                 result += str.mid(tailPos);
135             } else if (ch.unicode() >= '0' && ch.unicode() <= '9') {
136                 int n = ch.unicode() - '0';
137                 if (i + 1 < replacement.length()) {
138                     ch = replacement.at(i + 1);
139                     if (ch.unicode() >= '0' && ch.unicode() <= '9') {
140                         n = n*10 + (ch.unicode() - '0');
141                         ++i;
142                     }
143                 }
144                 if (n > 0 && n <= nCaptures) {
145                     String *s = captures[n].stringValue();
146                     if (s)
147                         result += s->toQString();
148                 } else {
149                     for (int j = seenDollar; j <= i; ++j)
150                         result += replacement.at(j);
151                 }
152             } else {
153                 result += QLatin1Char('$');
154                 result += ch;
155             }
156             seenDollar = -1;
157         } else {
158             if (ch == QLatin1Char('$')) {
159                 seenDollar = i;
160                 continue;
161             }
162             result += ch;
163         }
164     }
165     if (seenDollar >= 0)
166         result += QLatin1Char('$');
167     return result;
168 }
169 
flagsAsString() const170 QString Heap::RegExp::flagsAsString() const
171 {
172     QString result;
173     if (flags & CompiledData::RegExp::RegExp_Global)
174         result += QLatin1Char('g');
175     if (flags & CompiledData::RegExp::RegExp_IgnoreCase)
176         result += QLatin1Char('i');
177     if (flags & CompiledData::RegExp::RegExp_Multiline)
178         result += QLatin1Char('m');
179     if (flags & CompiledData::RegExp::RegExp_Unicode)
180         result += QLatin1Char('u');
181     if (flags & CompiledData::RegExp::RegExp_Sticky)
182         result += QLatin1Char('y');
183     return result;
184 }
185 
create(ExecutionEngine * engine,const QString & pattern,uint flags)186 Heap::RegExp *RegExp::create(ExecutionEngine* engine, const QString& pattern, uint flags)
187 {
188     RegExpCacheKey key(pattern, flags);
189 
190     RegExpCache *cache = engine->regExpCache;
191     if (!cache)
192         cache = engine->regExpCache = new RegExpCache;
193 
194     QV4::WeakValue &cachedValue = (*cache)[key];
195     if (QV4::RegExp *result = cachedValue.as<RegExp>())
196         return result->d();
197 
198     Scope scope(engine);
199     Scoped<RegExp> result(scope, engine->memoryManager->alloc<RegExp>(engine, pattern, flags));
200 
201     result->d()->cache = cache;
202     cachedValue.set(engine, result);
203 
204     return result->d();
205 }
206 
init(ExecutionEngine * engine,const QString & pattern,uint flags)207 void Heap::RegExp::init(ExecutionEngine *engine, const QString &pattern, uint flags)
208 {
209     Base::init();
210     this->pattern = new QString(pattern);
211     this->flags = flags;
212 
213     valid = false;
214 
215     JSC::Yarr::ErrorCode error = JSC::Yarr::ErrorCode::NoError;
216     JSC::Yarr::YarrPattern yarrPattern(WTF::String(pattern), jscFlags(flags), error);
217     if (error != JSC::Yarr::ErrorCode::NoError)
218         return;
219     subPatternCount = yarrPattern.m_numSubpatterns;
220 #if ENABLE(YARR_JIT)
221     if (!yarrPattern.m_containsBackreferences && engine->canJIT()) {
222         jitCode = new JSC::Yarr::YarrCodeBlock;
223         JSC::VM *vm = static_cast<JSC::VM *>(engine);
224         JSC::Yarr::jitCompile(yarrPattern, JSC::Yarr::Char16, vm, *jitCode);
225     }
226 #else
227     Q_UNUSED(engine)
228 #endif
229     if (hasValidJITCode()) {
230         valid = true;
231         return;
232     }
233     byteCode = JSC::Yarr::byteCompile(yarrPattern, internalClass->engine->bumperPointerAllocator).release();
234     if (byteCode)
235         valid = true;
236 }
237 
destroy()238 void Heap::RegExp::destroy()
239 {
240     if (cache) {
241         RegExpCacheKey key(this);
242         cache->remove(key);
243     }
244 #if ENABLE(YARR_JIT)
245     delete jitCode;
246 #endif
247     delete byteCode;
248     delete pattern;
249     Base::destroy();
250 }
251