1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 // Copyright 2020 the V8 project authors. All rights reserved.
8 // Use of this source code is governed by a BSD-style license that can be
9 // found in the LICENSE file.
10
11 #include "new-regexp/RegExpAPI.h"
12
13 #include "mozilla/ArrayUtils.h"
14 #include "mozilla/Casting.h"
15
16 #include "gc/Zone.h"
17 #include "jit/JitCommon.h"
18 #include "new-regexp/regexp-bytecode-generator.h"
19 #include "new-regexp/regexp-compiler.h"
20 #include "new-regexp/regexp-interpreter.h"
21 #include "new-regexp/regexp-macro-assembler-arch.h"
22 #include "new-regexp/regexp-macro-assembler-tracer.h"
23 #include "new-regexp/regexp-parser.h"
24 #include "new-regexp/regexp-shim.h"
25 #include "new-regexp/regexp.h"
26 #include "util/StringBuffer.h"
27 #include "vm/MatchPairs.h"
28 #include "vm/RegExpShared.h"
29
30 namespace js {
31 namespace irregexp {
32
33 using mozilla::AssertedCast;
34 using mozilla::Maybe;
35 using mozilla::PointerRangeSize;
36
37 using frontend::DummyTokenStream;
38 using frontend::TokenStreamAnyChars;
39
40 using v8::internal::FlatStringReader;
41 using v8::internal::HandleScope;
42 using v8::internal::InputOutputData;
43 using v8::internal::IrregexpInterpreter;
44 using v8::internal::NativeRegExpMacroAssembler;
45 using v8::internal::RegExpBytecodeGenerator;
46 using v8::internal::RegExpCompileData;
47 using v8::internal::RegExpCompiler;
48 using v8::internal::RegExpError;
49 using v8::internal::RegExpMacroAssembler;
50 using v8::internal::RegExpMacroAssemblerTracer;
51 using v8::internal::RegExpNode;
52 using v8::internal::RegExpParser;
53 using v8::internal::SMRegExpMacroAssembler;
54 using v8::internal::Zone;
55
56 using V8HandleString = v8::internal::Handle<v8::internal::String>;
57 using V8HandleRegExp = v8::internal::Handle<v8::internal::JSRegExp>;
58
59 using namespace v8::internal::regexp_compiler_constants;
60
ErrorNumber(RegExpError err)61 static uint32_t ErrorNumber(RegExpError err) {
62 switch (err) {
63 case RegExpError::kNone:
64 return JSMSG_NOT_AN_ERROR;
65 case RegExpError::kStackOverflow:
66 return JSMSG_OVER_RECURSED;
67 case RegExpError::kAnalysisStackOverflow:
68 return JSMSG_OVER_RECURSED;
69 case RegExpError::kTooLarge:
70 return JSMSG_TOO_MANY_PARENS;
71 case RegExpError::kUnterminatedGroup:
72 return JSMSG_MISSING_PAREN;
73 case RegExpError::kUnmatchedParen:
74 return JSMSG_UNMATCHED_RIGHT_PAREN;
75 case RegExpError::kEscapeAtEndOfPattern:
76 return JSMSG_ESCAPE_AT_END_OF_REGEXP;
77 case RegExpError::kInvalidPropertyName:
78 return JSMSG_INVALID_PROPERTY_NAME;
79 case RegExpError::kInvalidEscape:
80 return JSMSG_INVALID_IDENTITY_ESCAPE;
81 case RegExpError::kInvalidDecimalEscape:
82 return JSMSG_INVALID_DECIMAL_ESCAPE;
83 case RegExpError::kInvalidUnicodeEscape:
84 return JSMSG_INVALID_UNICODE_ESCAPE;
85 case RegExpError::kNothingToRepeat:
86 return JSMSG_NOTHING_TO_REPEAT;
87 case RegExpError::kLoneQuantifierBrackets:
88 // Note: V8 reports the same error for both ']' and '}'.
89 return JSMSG_RAW_BRACKET_IN_REGEXP;
90 case RegExpError::kRangeOutOfOrder:
91 return JSMSG_NUMBERS_OUT_OF_ORDER;
92 case RegExpError::kIncompleteQuantifier:
93 return JSMSG_INCOMPLETE_QUANTIFIER;
94 case RegExpError::kInvalidQuantifier:
95 return JSMSG_INVALID_QUANTIFIER;
96 case RegExpError::kInvalidGroup:
97 return JSMSG_INVALID_GROUP;
98 case RegExpError::kMultipleFlagDashes:
99 case RegExpError::kRepeatedFlag:
100 case RegExpError::kInvalidFlagGroup:
101 // V8 contains experimental support for turning regexp flags on
102 // and off in the middle of a regular expression. Unless it
103 // becomes standardized, SM does not support this feature.
104 MOZ_CRASH("Mode modifiers not supported");
105 case RegExpError::kTooManyCaptures:
106 return JSMSG_TOO_MANY_PARENS;
107 case RegExpError::kInvalidCaptureGroupName:
108 return JSMSG_INVALID_CAPTURE_NAME;
109 case RegExpError::kDuplicateCaptureGroupName:
110 return JSMSG_DUPLICATE_CAPTURE_NAME;
111 case RegExpError::kInvalidNamedReference:
112 return JSMSG_INVALID_NAMED_REF;
113 case RegExpError::kInvalidNamedCaptureReference:
114 return JSMSG_INVALID_NAMED_CAPTURE_REF;
115 case RegExpError::kInvalidClassEscape:
116 return JSMSG_RANGE_WITH_CLASS_ESCAPE;
117 case RegExpError::kInvalidClassPropertyName:
118 return JSMSG_INVALID_CLASS_PROPERTY_NAME;
119 case RegExpError::kInvalidCharacterClass:
120 return JSMSG_RANGE_WITH_CLASS_ESCAPE;
121 case RegExpError::kUnterminatedCharacterClass:
122 return JSMSG_UNTERM_CLASS;
123 case RegExpError::kOutOfOrderCharacterClass:
124 return JSMSG_BAD_CLASS_RANGE;
125 case RegExpError::NumErrors:
126 MOZ_CRASH("Unreachable");
127 }
128 MOZ_CRASH("Unreachable");
129 }
130
CreateIsolate(JSContext * cx)131 Isolate* CreateIsolate(JSContext* cx) {
132 auto isolate = MakeUnique<Isolate>(cx);
133 if (!isolate || !isolate->init()) {
134 return nullptr;
135 }
136 return isolate.release();
137 }
138
DestroyIsolate(Isolate * isolate)139 void DestroyIsolate(Isolate* isolate) {
140 js_delete(isolate);
141 }
142
ComputeColumn(const Latin1Char * begin,const Latin1Char * end)143 static size_t ComputeColumn(const Latin1Char* begin, const Latin1Char* end) {
144 return PointerRangeSize(begin, end);
145 }
146
ComputeColumn(const char16_t * begin,const char16_t * end)147 static size_t ComputeColumn(const char16_t* begin, const char16_t* end) {
148 return unicode::CountCodePoints(begin, end);
149 }
150
151 // This function is varargs purely so it can call ReportCompileErrorLatin1.
152 // We never call it with additional arguments.
153 template <typename CharT>
ReportSyntaxError(TokenStreamAnyChars & ts,RegExpCompileData & result,CharT * start,size_t length,...)154 static void ReportSyntaxError(TokenStreamAnyChars& ts,
155 RegExpCompileData& result, CharT* start,
156 size_t length, ...) {
157 gc::AutoSuppressGC suppressGC(ts.context());
158 uint32_t errorNumber = ErrorNumber(result.error);
159
160 if (errorNumber == JSMSG_OVER_RECURSED) {
161 ReportOverRecursed(ts.context());
162 return;
163 }
164
165 uint32_t offset = std::max(result.error_pos, 0);
166 MOZ_ASSERT(offset <= length);
167
168 ErrorMetadata err;
169
170 // Ordinarily this indicates whether line-of-context information can be
171 // added, but we entirely ignore that here because we create a
172 // a line of context based on the expression source.
173 uint32_t location = ts.currentToken().pos.begin;
174 if (ts.fillExceptingContext(&err, location)) {
175 // Line breaks are not significant in pattern text in the same way as
176 // in source text, so act as though pattern text is a single line, then
177 // compute a column based on "code point" count (treating a lone
178 // surrogate as a "code point" in UTF-16). Gak.
179 err.lineNumber = 1;
180 err.columnNumber =
181 AssertedCast<uint32_t>(ComputeColumn(start, start + offset));
182 }
183
184 // For most error reporting, the line of context derives from the token
185 // stream. So when location information doesn't come from the token
186 // stream, we can't give a line of context. But here the "line of context"
187 // can be (and is) derived from the pattern text, so we can provide it no
188 // matter if the location is derived from the caller.
189
190 const CharT* windowStart =
191 (offset > ErrorMetadata::lineOfContextRadius)
192 ? start + (offset - ErrorMetadata::lineOfContextRadius)
193 : start;
194
195 const CharT* windowEnd =
196 (length - offset > ErrorMetadata::lineOfContextRadius)
197 ? start + offset + ErrorMetadata::lineOfContextRadius
198 : start + length;
199
200 size_t windowLength = PointerRangeSize(windowStart, windowEnd);
201 MOZ_ASSERT(windowLength <= ErrorMetadata::lineOfContextRadius * 2);
202
203 // Create the windowed string, not including the potential line
204 // terminator.
205 StringBuffer windowBuf(ts.context());
206 if (!windowBuf.append(windowStart, windowEnd)) return;
207
208 // The line of context must be null-terminated, and StringBuffer doesn't
209 // make that happen unless we force it to.
210 if (!windowBuf.append('\0')) return;
211
212 err.lineOfContext.reset(windowBuf.stealChars());
213 if (!err.lineOfContext) return;
214
215 err.lineLength = windowLength;
216 err.tokenOffset = offset - (windowStart - start);
217
218 va_list args;
219 va_start(args, length);
220 ReportCompileErrorLatin1(ts.context(), std::move(err), nullptr, errorNumber,
221 &args);
222 va_end(args);
223 }
224
ReportSyntaxError(TokenStreamAnyChars & ts,RegExpCompileData & result,HandleAtom pattern)225 static void ReportSyntaxError(TokenStreamAnyChars& ts,
226 RegExpCompileData& result, HandleAtom pattern) {
227 JS::AutoCheckCannotGC nogc_;
228 if (pattern->hasLatin1Chars()) {
229 ReportSyntaxError(ts, result, pattern->latin1Chars(nogc_),
230 pattern->length());
231 } else {
232 ReportSyntaxError(ts, result, pattern->twoByteChars(nogc_),
233 pattern->length());
234 }
235 }
236
CheckPatternSyntaxImpl(JSContext * cx,FlatStringReader * pattern,JS::RegExpFlags flags,RegExpCompileData * result)237 static bool CheckPatternSyntaxImpl(JSContext* cx, FlatStringReader* pattern,
238 JS::RegExpFlags flags,
239 RegExpCompileData* result) {
240 LifoAllocScope allocScope(&cx->tempLifoAlloc());
241 Zone zone(allocScope.alloc());
242
243 HandleScope handleScope(cx->isolate);
244 return RegExpParser::ParseRegExp(cx->isolate, &zone, pattern, flags, result);
245 }
246
CheckPatternSyntax(JSContext * cx,TokenStreamAnyChars & ts,const mozilla::Range<const char16_t> chars,JS::RegExpFlags flags)247 bool CheckPatternSyntax(JSContext* cx, TokenStreamAnyChars& ts,
248 const mozilla::Range<const char16_t> chars,
249 JS::RegExpFlags flags) {
250 FlatStringReader reader(chars);
251 RegExpCompileData result;
252 if (!CheckPatternSyntaxImpl(cx, &reader, flags, &result)) {
253 ReportSyntaxError(ts, result, chars.begin().get(), chars.length());
254 return false;
255 }
256 return true;
257 }
258
CheckPatternSyntax(JSContext * cx,TokenStreamAnyChars & ts,HandleAtom pattern,JS::RegExpFlags flags)259 bool CheckPatternSyntax(JSContext* cx, TokenStreamAnyChars& ts,
260 HandleAtom pattern, JS::RegExpFlags flags) {
261 FlatStringReader reader(cx, pattern);
262 RegExpCompileData result;
263 if (!CheckPatternSyntaxImpl(cx, &reader, flags, &result)) {
264 ReportSyntaxError(ts, result, pattern);
265 return false;
266 }
267 return true;
268 }
269
270 // A regexp is a good candidate for Boyer-Moore if it has at least 3
271 // times as many characters as it has unique characters. Note that
272 // table lookups in irregexp are done modulo tableSize (128).
273 template <typename CharT>
HasFewDifferentCharacters(const CharT * chars,size_t length)274 static bool HasFewDifferentCharacters(const CharT* chars, size_t length) {
275 const uint32_t tableSize =
276 v8::internal::NativeRegExpMacroAssembler::kTableSize;
277 bool character_found[tableSize];
278 uint32_t different = 0;
279 memset(&character_found[0], 0, sizeof(character_found));
280 for (uint32_t i = 0; i < length; i++) {
281 uint32_t ch = chars[i] % tableSize;
282 if (!character_found[ch]) {
283 character_found[ch] = true;
284 different++;
285 // We declare a regexp low-alphabet if it has at least 3 times as many
286 // characters as it has different characters.
287 if (different * 3 > length) {
288 return false;
289 }
290 }
291 }
292 return true;
293 }
294
295 // Identifies the sort of pattern where Boyer-Moore is faster than string search
UseBoyerMoore(HandleAtom pattern,JS::AutoAssertNoGC & nogc)296 static bool UseBoyerMoore(HandleAtom pattern, JS::AutoAssertNoGC& nogc) {
297 size_t length =
298 std::min(size_t(kMaxLookaheadForBoyerMoore), pattern->length());
299 if (length <= kPatternTooShortForBoyerMoore) {
300 return false;
301 }
302
303 if (pattern->hasLatin1Chars()) {
304 return HasFewDifferentCharacters(pattern->latin1Chars(nogc), length);
305 }
306 MOZ_ASSERT(pattern->hasTwoByteChars());
307 return HasFewDifferentCharacters(pattern->twoByteChars(nogc), length);
308 }
309
310 // Sample character frequency information for use in Boyer-Moore.
SampleCharacters(FlatStringReader * sample_subject,RegExpCompiler & compiler)311 static void SampleCharacters(FlatStringReader* sample_subject,
312 RegExpCompiler& compiler) {
313 static const int kSampleSize = 128;
314 int chars_sampled = 0;
315
316 int length = sample_subject->length();
317
318 int half_way = (length - kSampleSize) / 2;
319 for (int i = std::max(0, half_way); i < length && chars_sampled < kSampleSize;
320 i++, chars_sampled++) {
321 compiler.frequency_collator()->CountCharacter(sample_subject->Get(i));
322 }
323 }
324
325 enum class AssembleResult {
326 Success,
327 TooLarge,
328 OutOfMemory,
329 };
330
Assemble(JSContext * cx,RegExpCompiler * compiler,RegExpCompileData * data,MutableHandleRegExpShared re,HandleAtom pattern,Zone * zone,bool useNativeCode,bool isLatin1)331 static MOZ_MUST_USE AssembleResult Assemble(JSContext* cx,
332 RegExpCompiler* compiler,
333 RegExpCompileData* data,
334 MutableHandleRegExpShared re,
335 HandleAtom pattern, Zone* zone,
336 bool useNativeCode, bool isLatin1) {
337 // Because we create a StackMacroAssembler, this function is not allowed
338 // to GC. If needed, we allocate and throw errors in the caller.
339 Maybe<jit::JitContext> jctx;
340 Maybe<js::jit::StackMacroAssembler> stack_masm;
341 UniquePtr<RegExpMacroAssembler> masm;
342 if (useNativeCode) {
343 NativeRegExpMacroAssembler::Mode mode =
344 isLatin1 ? NativeRegExpMacroAssembler::LATIN1
345 : NativeRegExpMacroAssembler::UC16;
346 // If we are compiling native code, we need a macroassembler,
347 // which needs a jit context.
348 jctx.emplace(cx, nullptr);
349 stack_masm.emplace();
350 uint32_t num_capture_registers = re->pairCount() * 2;
351 masm = MakeUnique<SMRegExpMacroAssembler>(cx, stack_masm.ref(), zone, mode,
352 num_capture_registers);
353 } else {
354 masm = MakeUnique<RegExpBytecodeGenerator>(cx->isolate, zone);
355 }
356 if (!masm) {
357 return AssembleResult::OutOfMemory;
358 }
359
360 bool isLargePattern =
361 pattern->length() > v8::internal::RegExp::kRegExpTooLargeToOptimize;
362 masm->set_slow_safe(isLargePattern);
363 if (compiler->optimize()) {
364 compiler->set_optimize(!isLargePattern);
365 }
366
367 // When matching a regexp with known maximum length that is anchored
368 // at the end, we may be able to skip the beginning of long input
369 // strings. This decision is made here because it depends on
370 // information in the AST that isn't replicated in the Node
371 // structure used inside the compiler.
372 bool is_start_anchored = data->tree->IsAnchoredAtStart();
373 bool is_end_anchored = data->tree->IsAnchoredAtEnd();
374 int max_length = data->tree->max_match();
375 static const int kMaxBacksearchLimit = 1024;
376 if (is_end_anchored && !is_start_anchored && !re->sticky() &&
377 max_length < kMaxBacksearchLimit) {
378 masm->SetCurrentPositionFromEnd(max_length);
379 }
380
381 if (re->global()) {
382 RegExpMacroAssembler::GlobalMode mode = RegExpMacroAssembler::GLOBAL;
383 if (data->tree->min_match() > 0) {
384 mode = RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK;
385 } else if (re->unicode()) {
386 mode = RegExpMacroAssembler::GLOBAL_UNICODE;
387 }
388 masm->set_global_mode(mode);
389 }
390
391 // The masm tracer works as a thin wrapper around another macroassembler.
392 RegExpMacroAssembler* masm_ptr = masm.get();
393 #ifdef DEBUG
394 UniquePtr<RegExpMacroAssembler> tracer_masm;
395 if (jit::JitOptions.traceRegExpAssembler) {
396 tracer_masm = MakeUnique<RegExpMacroAssemblerTracer>(cx->isolate, masm_ptr);
397 masm_ptr = tracer_masm.get();
398 }
399 #endif
400
401 // Compile the regexp.
402 V8HandleString wrappedPattern(v8::internal::String(pattern), cx->isolate);
403 RegExpCompiler::CompilationResult result = compiler->Assemble(
404 cx->isolate, masm_ptr, data->node, data->capture_count, wrappedPattern);
405 if (!result.Succeeded()) {
406 MOZ_ASSERT(result.error == RegExpError::kTooLarge);
407 return AssembleResult::TooLarge;
408 }
409 if (result.code->value().isUndefined()) {
410 // SMRegExpMacroAssembler::GetCode returns undefined on OOM.
411 MOZ_ASSERT(useNativeCode);
412 return AssembleResult::OutOfMemory;
413 }
414
415 re->updateMaxRegisters(result.num_registers);
416 if (useNativeCode) {
417 // Transfer ownership of the tables from the macroassembler to the
418 // RegExpShared.
419 SMRegExpMacroAssembler::TableVector& tables =
420 static_cast<SMRegExpMacroAssembler*>(masm.get())->tables();
421 for (uint32_t i = 0; i < tables.length(); i++) {
422 if (!re->addTable(std::move(tables[i]))) {
423 return AssembleResult::OutOfMemory;
424 }
425 }
426 re->setJitCode(v8::internal::Code::cast(*result.code).inner(), isLatin1);
427 } else {
428 // Transfer ownership of the bytecode from the HandleScope to the
429 // RegExpShared.
430 ByteArray bytecode =
431 v8::internal::ByteArray::cast(*result.code).takeOwnership(cx->isolate);
432 uint32_t length = bytecode->length;
433 re->setByteCode(bytecode.release(), isLatin1);
434 js::AddCellMemory(re, length, MemoryUse::RegExpSharedBytecode);
435 }
436
437 return AssembleResult::Success;
438 }
439
CompilePattern(JSContext * cx,MutableHandleRegExpShared re,HandleLinearString input,RegExpShared::CodeKind codeKind)440 bool CompilePattern(JSContext* cx, MutableHandleRegExpShared re,
441 HandleLinearString input, RegExpShared::CodeKind codeKind) {
442 RootedAtom pattern(cx, re->getSource());
443 JS::RegExpFlags flags = re->getFlags();
444 LifoAllocScope allocScope(&cx->tempLifoAlloc());
445 Zone zone(allocScope.alloc());
446
447 RegExpCompileData data;
448 {
449 FlatStringReader patternBytes(cx, pattern);
450 if (!RegExpParser::ParseRegExp(cx->isolate, &zone, &patternBytes, flags,
451 &data)) {
452 JS::CompileOptions options(cx);
453 DummyTokenStream dummyTokenStream(cx, options);
454 ReportSyntaxError(dummyTokenStream, data, pattern);
455 return false;
456 }
457 }
458
459 if (re->kind() == RegExpShared::Kind::Unparsed) {
460 // This is the first time we have compiled this regexp.
461 // First, check to see if we should use simple string search
462 // with an atom.
463 if (!flags.ignoreCase() && !flags.sticky()) {
464 RootedAtom searchAtom(cx);
465 if (data.simple) {
466 // The parse-tree is a single atom that is equal to the pattern.
467 searchAtom = re->getSource();
468 } else if (data.tree->IsAtom() && data.capture_count == 0) {
469 // The parse-tree is a single atom that is not equal to the pattern.
470 v8::internal::RegExpAtom* atom = data.tree->AsAtom();
471 const char16_t* twoByteChars = atom->data().begin();
472 searchAtom = AtomizeChars(cx, twoByteChars, atom->length());
473 if (!searchAtom) {
474 return false;
475 }
476 }
477 JS::AutoAssertNoGC nogc(cx);
478 if (searchAtom && !UseBoyerMoore(searchAtom, nogc)) {
479 re->useAtomMatch(searchAtom);
480 return true;
481 }
482 }
483 if (!data.capture_name_map.is_null()) {
484 RootedNativeObject namedCaptures(cx, data.capture_name_map->inner());
485 if (!RegExpShared::initializeNamedCaptures(cx, re, namedCaptures)) {
486 return false;
487 }
488 }
489 // All fallible initialization has succeeded, so we can change state.
490 // Add one to capture_count to account for the whole-match capture.
491 uint32_t pairCount = data.capture_count + 1;
492 re->useRegExpMatch(pairCount);
493 }
494
495 MOZ_ASSERT(re->kind() == RegExpShared::Kind::RegExp);
496
497 HandleScope handleScope(cx->isolate);
498 RegExpCompiler compiler(cx->isolate, &zone, data.capture_count,
499 input->hasLatin1Chars());
500
501 bool isLatin1 = input->hasLatin1Chars();
502
503 FlatStringReader sample_subject(cx, input);
504 SampleCharacters(&sample_subject, compiler);
505 data.node = compiler.PreprocessRegExp(&data, flags, isLatin1);
506 data.error = AnalyzeRegExp(cx->isolate, isLatin1, data.node);
507 if (data.error != RegExpError::kNone) {
508 MOZ_ASSERT(data.error == RegExpError::kAnalysisStackOverflow);
509 ReportOverRecursed(cx);
510 return false;
511 }
512
513 bool useNativeCode = codeKind == RegExpShared::CodeKind::Jitcode;
514 MOZ_ASSERT_IF(useNativeCode, IsNativeRegExpEnabled());
515
516 switch (Assemble(cx, &compiler, &data, re, pattern, &zone, useNativeCode,
517 isLatin1)) {
518 case AssembleResult::TooLarge:
519 JS_ReportErrorASCII(cx, "regexp too big");
520 return false;
521 case AssembleResult::OutOfMemory:
522 ReportOutOfMemory(cx);
523 return false;
524 case AssembleResult::Success:
525 break;
526 }
527 return true;
528 }
529
530 template <typename CharT>
ExecuteRaw(jit::JitCode * code,const CharT * chars,size_t length,size_t startIndex,VectorMatchPairs * matches)531 RegExpRunStatus ExecuteRaw(jit::JitCode* code, const CharT* chars,
532 size_t length, size_t startIndex,
533 VectorMatchPairs* matches) {
534 InputOutputData data(chars, chars + length, startIndex, matches);
535
536 static_assert(RegExpRunStatus_Error ==
537 v8::internal::RegExp::kInternalRegExpException);
538 static_assert(RegExpRunStatus_Success ==
539 v8::internal::RegExp::kInternalRegExpSuccess);
540 static_assert(RegExpRunStatus_Success_NotFound ==
541 v8::internal::RegExp::kInternalRegExpFailure);
542
543 typedef int (*RegExpCodeSignature)(InputOutputData*);
544 auto function = reinterpret_cast<RegExpCodeSignature>(code->raw());
545 {
546 JS::AutoSuppressGCAnalysis nogc;
547 return (RegExpRunStatus) CALL_GENERATED_1(function, &data);
548 }
549 }
550
Interpret(JSContext * cx,MutableHandleRegExpShared re,HandleLinearString input,size_t startIndex,VectorMatchPairs * matches)551 RegExpRunStatus Interpret(JSContext* cx, MutableHandleRegExpShared re,
552 HandleLinearString input, size_t startIndex,
553 VectorMatchPairs* matches) {
554 HandleScope handleScope(cx->isolate);
555 V8HandleRegExp wrappedRegExp(v8::internal::JSRegExp(re), cx->isolate);
556 V8HandleString wrappedInput(v8::internal::String(input), cx->isolate);
557
558 static_assert(RegExpRunStatus_Error ==
559 v8::internal::RegExp::kInternalRegExpException);
560 static_assert(RegExpRunStatus_Success ==
561 v8::internal::RegExp::kInternalRegExpSuccess);
562 static_assert(RegExpRunStatus_Success_NotFound ==
563 v8::internal::RegExp::kInternalRegExpFailure);
564
565 RegExpRunStatus status =
566 (RegExpRunStatus)IrregexpInterpreter::MatchForCallFromRuntime(
567 cx->isolate, wrappedRegExp, wrappedInput, matches->pairsRaw(),
568 matches->pairCount() * 2, startIndex);
569
570 MOZ_ASSERT(status == RegExpRunStatus_Error ||
571 status == RegExpRunStatus_Success ||
572 status == RegExpRunStatus_Success_NotFound);
573
574 return status;
575 }
576
Execute(JSContext * cx,MutableHandleRegExpShared re,HandleLinearString input,size_t startIndex,VectorMatchPairs * matches)577 RegExpRunStatus Execute(JSContext* cx, MutableHandleRegExpShared re,
578 HandleLinearString input, size_t startIndex,
579 VectorMatchPairs* matches) {
580 bool latin1 = input->hasLatin1Chars();
581 jit::JitCode* jitCode = re->getJitCode(latin1);
582 bool isCompiled = !!jitCode;
583
584 if (isCompiled) {
585 JS::AutoCheckCannotGC nogc;
586 if (latin1) {
587 return ExecuteRaw(jitCode, input->latin1Chars(nogc), input->length(),
588 startIndex, matches);
589 }
590 return ExecuteRaw(jitCode, input->twoByteChars(nogc), input->length(),
591 startIndex, matches);
592 }
593
594 return Interpret(cx, re, input, startIndex, matches);
595 }
596
ExecuteForFuzzing(JSContext * cx,HandleAtom pattern,HandleLinearString input,JS::RegExpFlags flags,size_t startIndex,VectorMatchPairs * matches,RegExpShared::CodeKind codeKind)597 RegExpRunStatus ExecuteForFuzzing(JSContext* cx, HandleAtom pattern,
598 HandleLinearString input,
599 JS::RegExpFlags flags,
600 size_t startIndex,
601 VectorMatchPairs* matches,
602 RegExpShared::CodeKind codeKind) {
603 RootedRegExpShared re(cx, cx->zone()->regExps().get(cx, pattern, flags));
604 if (!RegExpShared::compileIfNecessary(cx, &re, input, codeKind)) {
605 return RegExpRunStatus_Error;
606 }
607 return RegExpShared::execute(cx, &re, input, startIndex, matches);
608 }
609
610 } // namespace irregexp
611 } // namespace js
612