1 /*
2  *  Copyright (C) 1999-2001, 2004 Harri Porten (porten@kde.org)
3  *  Copyright (c) 2007, 2008 Apple Inc. All rights reserved.
4  *  Copyright (C) 2009 Torch Mobile, Inc.
5  *  Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
6  *
7  *  This library is free software; you can redistribute it and/or
8  *  modify it under the terms of the GNU Lesser General Public
9  *  License as published by the Free Software Foundation; either
10  *  version 2 of the License, or (at your option) any later version.
11  *
12  *  This library is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  *  Lesser General Public License for more details.
16  *
17  *  You should have received a copy of the GNU Lesser General Public
18  *  License along with this library; if not, write to the Free Software
19  *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
20  *
21  */
22 
23 #include "config.h"
24 #include "RegExp.h"
25 
26 #include "Lexer.h"
27 #include "yarr/Yarr.h"
28 #include "yarr/YarrJIT.h"
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <wtf/Assertions.h>
33 #include <wtf/OwnArrayPtr.h>
34 
35 namespace JSC {
36 
regExpFlags(const UString & string)37 RegExpFlags regExpFlags(const UString& string)
38 {
39     RegExpFlags flags = NoFlags;
40 
41     for (unsigned i = 0; i < string.length(); ++i) {
42         switch (string.characters()[i]) {
43         case 'g':
44             if (flags & FlagGlobal)
45                 return InvalidFlags;
46             flags = static_cast<RegExpFlags>(flags | FlagGlobal);
47             break;
48 
49         case 'i':
50             if (flags & FlagIgnoreCase)
51                 return InvalidFlags;
52             flags = static_cast<RegExpFlags>(flags | FlagIgnoreCase);
53             break;
54 
55         case 'm':
56             if (flags & FlagMultiline)
57                 return InvalidFlags;
58             flags = static_cast<RegExpFlags>(flags | FlagMultiline);
59             break;
60 
61         default:
62             return InvalidFlags;
63         }
64     }
65 
66     return flags;
67 }
68 
69 struct RegExpRepresentation {
70 #if ENABLE(YARR_JIT)
71     Yarr::YarrCodeBlock m_regExpJITCode;
72 #endif
73     OwnPtr<Yarr::BytecodePattern> m_regExpBytecode;
74 };
75 
RegExp(JSGlobalData * globalData,const UString & patternString,RegExpFlags flags)76 inline RegExp::RegExp(JSGlobalData* globalData, const UString& patternString, RegExpFlags flags)
77     : m_patternString(patternString)
78     , m_flags(flags)
79     , m_constructionError(0)
80     , m_numSubpatterns(0)
81 #if ENABLE(REGEXP_TRACING)
82     , m_rtMatchCallCount(0)
83     , m_rtMatchFoundCount(0)
84 #endif
85     , m_representation(adoptPtr(new RegExpRepresentation))
86 {
87     m_state = compile(globalData);
88 }
89 
~RegExp()90 RegExp::~RegExp()
91 {
92 }
93 
create(JSGlobalData * globalData,const UString & patternString,RegExpFlags flags)94 PassRefPtr<RegExp> RegExp::create(JSGlobalData* globalData, const UString& patternString, RegExpFlags flags)
95 {
96     RefPtr<RegExp> res = adoptRef(new RegExp(globalData, patternString, flags));
97 #if ENABLE(REGEXP_TRACING)
98     globalData->addRegExpToTrace(res);
99 #endif
100     return res.release();
101 }
102 
compile(JSGlobalData * globalData)103 RegExp::RegExpState RegExp::compile(JSGlobalData* globalData)
104 {
105     Yarr::YarrPattern pattern(m_patternString, ignoreCase(), multiline(), &m_constructionError);
106     if (m_constructionError)
107         return ParseError;
108 
109     m_numSubpatterns = pattern.m_numSubpatterns;
110 
111     RegExpState res = ByteCode;
112 
113 #if ENABLE(YARR_JIT)
114     if (!pattern.m_containsBackreferences && globalData->canUseJIT()) {
115         Yarr::jitCompile(pattern, globalData, m_representation->m_regExpJITCode);
116 #if ENABLE(YARR_JIT_DEBUG)
117         if (!m_representation->m_regExpJITCode.isFallBack())
118             res = JITCode;
119         else
120             res = ByteCode;
121 #else
122         if (!m_representation->m_regExpJITCode.isFallBack())
123             return JITCode;
124 #endif
125     }
126 #endif
127 
128     m_representation->m_regExpBytecode = Yarr::byteCompile(pattern, &globalData->m_regExpAllocator);
129 
130     return res;
131 }
132 
match(const UString & s,int startOffset,Vector<int,32> * ovector)133 int RegExp::match(const UString& s, int startOffset, Vector<int, 32>* ovector)
134 {
135     if (startOffset < 0)
136         startOffset = 0;
137 
138 #if ENABLE(REGEXP_TRACING)
139     m_rtMatchCallCount++;
140 #endif
141 
142     if (static_cast<unsigned>(startOffset) > s.length() || s.isNull())
143         return -1;
144 
145     if (m_state != ParseError) {
146         int offsetVectorSize = (m_numSubpatterns + 1) * 2;
147         int* offsetVector;
148         Vector<int, 32> nonReturnedOvector;
149         if (ovector) {
150             ovector->resize(offsetVectorSize);
151             offsetVector = ovector->data();
152         } else {
153             nonReturnedOvector.resize(offsetVectorSize);
154             offsetVector = nonReturnedOvector.data();
155         }
156 
157         ASSERT(offsetVector);
158         // Initialize offsetVector with the return value (index 0) and the
159         // first subpattern start indicies (even index values) set to -1.
160         // No need to init the subpattern end indicies.
161         for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++)
162             offsetVector[j] = -1;
163 
164         int result;
165 #if ENABLE(YARR_JIT)
166         if (m_state == JITCode) {
167             result = Yarr::execute(m_representation->m_regExpJITCode, s.characters(), startOffset, s.length(), offsetVector);
168 #if ENABLE(YARR_JIT_DEBUG)
169             matchCompareWithInterpreter(s, startOffset, offsetVector, result);
170 #endif
171         } else
172 #endif
173             result = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), offsetVector);
174         ASSERT(result >= -1);
175 
176 #if ENABLE(REGEXP_TRACING)
177         if (result != -1)
178             m_rtMatchFoundCount++;
179 #endif
180 
181         return result;
182     }
183 
184     return -1;
185 }
186 
187 
188 #if ENABLE(YARR_JIT_DEBUG)
matchCompareWithInterpreter(const UString & s,int startOffset,int * offsetVector,int jitResult)189 void RegExp::matchCompareWithInterpreter(const UString& s, int startOffset, int* offsetVector, int jitResult)
190 {
191     int offsetVectorSize = (m_numSubpatterns + 1) * 2;
192     Vector<int, 32> interpreterOvector;
193     interpreterOvector.resize(offsetVectorSize);
194     int* interpreterOffsetVector = interpreterOvector.data();
195     int interpreterResult = 0;
196     int differences = 0;
197 
198     // Initialize interpreterOffsetVector with the return value (index 0) and the
199     // first subpattern start indicies (even index values) set to -1.
200     // No need to init the subpattern end indicies.
201     for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++)
202         interpreterOffsetVector[j] = -1;
203 
204     interpreterResult = Yarr::interpret(m_representation->m_regExpBytecode.get(), s.characters(), startOffset, s.length(), interpreterOffsetVector);
205 
206     if (jitResult != interpreterResult)
207         differences++;
208 
209     for (unsigned j = 2, i = 0; i < m_numSubpatterns; j +=2, i++)
210         if ((offsetVector[j] != interpreterOffsetVector[j])
211             || ((offsetVector[j] >= 0) && (offsetVector[j+1] != interpreterOffsetVector[j+1])))
212             differences++;
213 
214     if (differences) {
215         fprintf(stderr, "RegExp Discrepency for /%s/\n    string input ", pattern().utf8().data());
216         unsigned segmentLen = s.length() - static_cast<unsigned>(startOffset);
217 
218         fprintf(stderr, (segmentLen < 150) ? "\"%s\"\n" : "\"%148s...\"\n", s.utf8().data() + startOffset);
219 
220         if (jitResult != interpreterResult) {
221             fprintf(stderr, "    JIT result = %d, blah interpreted result = %d\n", jitResult, interpreterResult);
222             differences--;
223         } else {
224             fprintf(stderr, "    Correct result = %d\n", jitResult);
225         }
226 
227         if (differences) {
228             for (unsigned j = 2, i = 0; i < m_numSubpatterns; j +=2, i++) {
229                 if (offsetVector[j] != interpreterOffsetVector[j])
230                     fprintf(stderr, "    JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j, offsetVector[j], j, interpreterOffsetVector[j]);
231                 if ((offsetVector[j] >= 0) && (offsetVector[j+1] != interpreterOffsetVector[j+1]))
232                     fprintf(stderr, "    JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j+1, offsetVector[j+1], j+1, interpreterOffsetVector[j+1]);
233             }
234         }
235     }
236 }
237 #endif
238 
239 #if ENABLE(REGEXP_TRACING)
printTraceData()240     void RegExp::printTraceData()
241     {
242         char formattedPattern[41];
243         char rawPattern[41];
244 
245         strncpy(rawPattern, pattern().utf8().data(), 40);
246         rawPattern[40]= '\0';
247 
248         int pattLen = strlen(rawPattern);
249 
250         snprintf(formattedPattern, 41, (pattLen <= 38) ? "/%.38s/" : "/%.36s...", rawPattern);
251 
252 #if ENABLE(YARR_JIT)
253         Yarr::YarrCodeBlock& codeBlock = m_representation->m_regExpJITCode;
254 
255         const size_t jitAddrSize = 20;
256         char jitAddr[jitAddrSize];
257         if (m_state == JITCode)
258             snprintf(jitAddr, jitAddrSize, "fallback");
259         else
260             snprintf(jitAddr, jitAddrSize, "0x%014lx", reinterpret_cast<unsigned long int>(codeBlock.getAddr()));
261 #else
262         const char* jitAddr = "JIT Off";
263 #endif
264 
265         printf("%-40.40s %16.16s %10d %10d\n", formattedPattern, jitAddr, m_rtMatchCallCount, m_rtMatchFoundCount);
266     }
267 #endif
268 
269 } // namespace JSC
270