1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2  * vim: set ts=8 sts=2 et sw=2 tw=80:
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 // Copyright 2020 the V8 project authors. All rights reserved.
8 // Use of this source code is governed by a BSD-style license that can be
9 // found in the LICENSE file.
10 
11 #include "new-regexp/RegExpAPI.h"
12 
13 #include "mozilla/ArrayUtils.h"
14 #include "mozilla/Casting.h"
15 
16 #include "gc/Zone.h"
17 #include "jit/JitCommon.h"
18 #include "new-regexp/regexp-bytecode-generator.h"
19 #include "new-regexp/regexp-compiler.h"
20 #include "new-regexp/regexp-interpreter.h"
21 #include "new-regexp/regexp-macro-assembler-arch.h"
22 #include "new-regexp/regexp-macro-assembler-tracer.h"
23 #include "new-regexp/regexp-parser.h"
24 #include "new-regexp/regexp-shim.h"
25 #include "new-regexp/regexp.h"
26 #include "util/StringBuffer.h"
27 #include "vm/MatchPairs.h"
28 #include "vm/RegExpShared.h"
29 
30 namespace js {
31 namespace irregexp {
32 
33 using mozilla::AssertedCast;
34 using mozilla::Maybe;
35 using mozilla::PointerRangeSize;
36 
37 using frontend::DummyTokenStream;
38 using frontend::TokenStreamAnyChars;
39 
40 using v8::internal::FlatStringReader;
41 using v8::internal::HandleScope;
42 using v8::internal::InputOutputData;
43 using v8::internal::IrregexpInterpreter;
44 using v8::internal::NativeRegExpMacroAssembler;
45 using v8::internal::RegExpBytecodeGenerator;
46 using v8::internal::RegExpCompileData;
47 using v8::internal::RegExpCompiler;
48 using v8::internal::RegExpError;
49 using v8::internal::RegExpMacroAssembler;
50 using v8::internal::RegExpMacroAssemblerTracer;
51 using v8::internal::RegExpNode;
52 using v8::internal::RegExpParser;
53 using v8::internal::SMRegExpMacroAssembler;
54 using v8::internal::Zone;
55 
56 using V8HandleString = v8::internal::Handle<v8::internal::String>;
57 using V8HandleRegExp = v8::internal::Handle<v8::internal::JSRegExp>;
58 
59 using namespace v8::internal::regexp_compiler_constants;
60 
ErrorNumber(RegExpError err)61 static uint32_t ErrorNumber(RegExpError err) {
62   switch (err) {
63     case RegExpError::kNone:
64       return JSMSG_NOT_AN_ERROR;
65     case RegExpError::kStackOverflow:
66       return JSMSG_OVER_RECURSED;
67     case RegExpError::kAnalysisStackOverflow:
68       return JSMSG_OVER_RECURSED;
69     case RegExpError::kTooLarge:
70       return JSMSG_TOO_MANY_PARENS;
71     case RegExpError::kUnterminatedGroup:
72       return JSMSG_MISSING_PAREN;
73     case RegExpError::kUnmatchedParen:
74       return JSMSG_UNMATCHED_RIGHT_PAREN;
75     case RegExpError::kEscapeAtEndOfPattern:
76       return JSMSG_ESCAPE_AT_END_OF_REGEXP;
77     case RegExpError::kInvalidPropertyName:
78       return JSMSG_INVALID_PROPERTY_NAME;
79     case RegExpError::kInvalidEscape:
80       return JSMSG_INVALID_IDENTITY_ESCAPE;
81     case RegExpError::kInvalidDecimalEscape:
82       return JSMSG_INVALID_DECIMAL_ESCAPE;
83     case RegExpError::kInvalidUnicodeEscape:
84       return JSMSG_INVALID_UNICODE_ESCAPE;
85     case RegExpError::kNothingToRepeat:
86       return JSMSG_NOTHING_TO_REPEAT;
87     case RegExpError::kLoneQuantifierBrackets:
88       // Note: V8 reports the same error for both ']' and '}'.
89       return JSMSG_RAW_BRACKET_IN_REGEXP;
90     case RegExpError::kRangeOutOfOrder:
91       return JSMSG_NUMBERS_OUT_OF_ORDER;
92     case RegExpError::kIncompleteQuantifier:
93       return JSMSG_INCOMPLETE_QUANTIFIER;
94     case RegExpError::kInvalidQuantifier:
95       return JSMSG_INVALID_QUANTIFIER;
96     case RegExpError::kInvalidGroup:
97       return JSMSG_INVALID_GROUP;
98     case RegExpError::kMultipleFlagDashes:
99     case RegExpError::kRepeatedFlag:
100     case RegExpError::kInvalidFlagGroup:
101       // V8 contains experimental support for turning regexp flags on
102       // and off in the middle of a regular expression. Unless it
103       // becomes standardized, SM does not support this feature.
104       MOZ_CRASH("Mode modifiers not supported");
105     case RegExpError::kTooManyCaptures:
106       return JSMSG_TOO_MANY_PARENS;
107     case RegExpError::kInvalidCaptureGroupName:
108       return JSMSG_INVALID_CAPTURE_NAME;
109     case RegExpError::kDuplicateCaptureGroupName:
110       return JSMSG_DUPLICATE_CAPTURE_NAME;
111     case RegExpError::kInvalidNamedReference:
112       return JSMSG_INVALID_NAMED_REF;
113     case RegExpError::kInvalidNamedCaptureReference:
114       return JSMSG_INVALID_NAMED_CAPTURE_REF;
115     case RegExpError::kInvalidClassEscape:
116       return JSMSG_RANGE_WITH_CLASS_ESCAPE;
117     case RegExpError::kInvalidClassPropertyName:
118       return JSMSG_INVALID_CLASS_PROPERTY_NAME;
119     case RegExpError::kInvalidCharacterClass:
120       return JSMSG_RANGE_WITH_CLASS_ESCAPE;
121     case RegExpError::kUnterminatedCharacterClass:
122       return JSMSG_UNTERM_CLASS;
123     case RegExpError::kOutOfOrderCharacterClass:
124       return JSMSG_BAD_CLASS_RANGE;
125     case RegExpError::NumErrors:
126       MOZ_CRASH("Unreachable");
127   }
128   MOZ_CRASH("Unreachable");
129 }
130 
CreateIsolate(JSContext * cx)131 Isolate* CreateIsolate(JSContext* cx) {
132   auto isolate = MakeUnique<Isolate>(cx);
133   if (!isolate || !isolate->init()) {
134     return nullptr;
135   }
136   return isolate.release();
137 }
138 
DestroyIsolate(Isolate * isolate)139 void DestroyIsolate(Isolate* isolate) {
140   js_delete(isolate);
141 }
142 
ComputeColumn(const Latin1Char * begin,const Latin1Char * end)143 static size_t ComputeColumn(const Latin1Char* begin, const Latin1Char* end) {
144   return PointerRangeSize(begin, end);
145 }
146 
ComputeColumn(const char16_t * begin,const char16_t * end)147 static size_t ComputeColumn(const char16_t* begin, const char16_t* end) {
148   return unicode::CountCodePoints(begin, end);
149 }
150 
151 // This function is varargs purely so it can call ReportCompileErrorLatin1.
152 // We never call it with additional arguments.
153 template <typename CharT>
ReportSyntaxError(TokenStreamAnyChars & ts,RegExpCompileData & result,CharT * start,size_t length,...)154 static void ReportSyntaxError(TokenStreamAnyChars& ts,
155                               RegExpCompileData& result, CharT* start,
156                               size_t length, ...) {
157   gc::AutoSuppressGC suppressGC(ts.context());
158   uint32_t errorNumber = ErrorNumber(result.error);
159 
160   if (errorNumber == JSMSG_OVER_RECURSED) {
161     ReportOverRecursed(ts.context());
162     return;
163   }
164 
165   uint32_t offset = std::max(result.error_pos, 0);
166   MOZ_ASSERT(offset <= length);
167 
168   ErrorMetadata err;
169 
170   // Ordinarily this indicates whether line-of-context information can be
171   // added, but we entirely ignore that here because we create a
172   // a line of context based on the expression source.
173   uint32_t location = ts.currentToken().pos.begin;
174   if (ts.fillExceptingContext(&err, location)) {
175     // Line breaks are not significant in pattern text in the same way as
176     // in source text, so act as though pattern text is a single line, then
177     // compute a column based on "code point" count (treating a lone
178     // surrogate as a "code point" in UTF-16).  Gak.
179     err.lineNumber = 1;
180     err.columnNumber =
181         AssertedCast<uint32_t>(ComputeColumn(start, start + offset));
182   }
183 
184   // For most error reporting, the line of context derives from the token
185   // stream.  So when location information doesn't come from the token
186   // stream, we can't give a line of context.  But here the "line of context"
187   // can be (and is) derived from the pattern text, so we can provide it no
188   // matter if the location is derived from the caller.
189 
190   const CharT* windowStart =
191       (offset > ErrorMetadata::lineOfContextRadius)
192           ? start + (offset - ErrorMetadata::lineOfContextRadius)
193           : start;
194 
195   const CharT* windowEnd =
196       (length - offset > ErrorMetadata::lineOfContextRadius)
197           ? start + offset + ErrorMetadata::lineOfContextRadius
198           : start + length;
199 
200   size_t windowLength = PointerRangeSize(windowStart, windowEnd);
201   MOZ_ASSERT(windowLength <= ErrorMetadata::lineOfContextRadius * 2);
202 
203   // Create the windowed string, not including the potential line
204   // terminator.
205   StringBuffer windowBuf(ts.context());
206   if (!windowBuf.append(windowStart, windowEnd)) return;
207 
208   // The line of context must be null-terminated, and StringBuffer doesn't
209   // make that happen unless we force it to.
210   if (!windowBuf.append('\0')) return;
211 
212   err.lineOfContext.reset(windowBuf.stealChars());
213   if (!err.lineOfContext) return;
214 
215   err.lineLength = windowLength;
216   err.tokenOffset = offset - (windowStart - start);
217 
218   va_list args;
219   va_start(args, length);
220   ReportCompileErrorLatin1(ts.context(), std::move(err), nullptr, errorNumber,
221                            &args);
222   va_end(args);
223 }
224 
ReportSyntaxError(TokenStreamAnyChars & ts,RegExpCompileData & result,HandleAtom pattern)225 static void ReportSyntaxError(TokenStreamAnyChars& ts,
226                               RegExpCompileData& result, HandleAtom pattern) {
227   JS::AutoCheckCannotGC nogc_;
228   if (pattern->hasLatin1Chars()) {
229     ReportSyntaxError(ts, result, pattern->latin1Chars(nogc_),
230                       pattern->length());
231   } else {
232     ReportSyntaxError(ts, result, pattern->twoByteChars(nogc_),
233                       pattern->length());
234   }
235 }
236 
CheckPatternSyntaxImpl(JSContext * cx,FlatStringReader * pattern,JS::RegExpFlags flags,RegExpCompileData * result)237 static bool CheckPatternSyntaxImpl(JSContext* cx, FlatStringReader* pattern,
238                                    JS::RegExpFlags flags,
239                                    RegExpCompileData* result) {
240   LifoAllocScope allocScope(&cx->tempLifoAlloc());
241   Zone zone(allocScope.alloc());
242 
243   HandleScope handleScope(cx->isolate);
244   return RegExpParser::ParseRegExp(cx->isolate, &zone, pattern, flags, result);
245 }
246 
CheckPatternSyntax(JSContext * cx,TokenStreamAnyChars & ts,const mozilla::Range<const char16_t> chars,JS::RegExpFlags flags)247 bool CheckPatternSyntax(JSContext* cx, TokenStreamAnyChars& ts,
248                         const mozilla::Range<const char16_t> chars,
249                         JS::RegExpFlags flags) {
250   FlatStringReader reader(chars);
251   RegExpCompileData result;
252   if (!CheckPatternSyntaxImpl(cx, &reader, flags, &result)) {
253     ReportSyntaxError(ts, result, chars.begin().get(), chars.length());
254     return false;
255   }
256   return true;
257 }
258 
CheckPatternSyntax(JSContext * cx,TokenStreamAnyChars & ts,HandleAtom pattern,JS::RegExpFlags flags)259 bool CheckPatternSyntax(JSContext* cx, TokenStreamAnyChars& ts,
260                         HandleAtom pattern, JS::RegExpFlags flags) {
261   FlatStringReader reader(cx, pattern);
262   RegExpCompileData result;
263   if (!CheckPatternSyntaxImpl(cx, &reader, flags, &result)) {
264     ReportSyntaxError(ts, result, pattern);
265     return false;
266   }
267   return true;
268 }
269 
270 // A regexp is a good candidate for Boyer-Moore if it has at least 3
271 // times as many characters as it has unique characters. Note that
272 // table lookups in irregexp are done modulo tableSize (128).
273 template <typename CharT>
HasFewDifferentCharacters(const CharT * chars,size_t length)274 static bool HasFewDifferentCharacters(const CharT* chars, size_t length) {
275   const uint32_t tableSize =
276       v8::internal::NativeRegExpMacroAssembler::kTableSize;
277   bool character_found[tableSize];
278   uint32_t different = 0;
279   memset(&character_found[0], 0, sizeof(character_found));
280   for (uint32_t i = 0; i < length; i++) {
281     uint32_t ch = chars[i] % tableSize;
282     if (!character_found[ch]) {
283       character_found[ch] = true;
284       different++;
285       // We declare a regexp low-alphabet if it has at least 3 times as many
286       // characters as it has different characters.
287       if (different * 3 > length) {
288         return false;
289       }
290     }
291   }
292   return true;
293 }
294 
295 // Identifies the sort of pattern where Boyer-Moore is faster than string search
UseBoyerMoore(HandleAtom pattern,JS::AutoAssertNoGC & nogc)296 static bool UseBoyerMoore(HandleAtom pattern, JS::AutoAssertNoGC& nogc) {
297   size_t length =
298       std::min(size_t(kMaxLookaheadForBoyerMoore), pattern->length());
299   if (length <= kPatternTooShortForBoyerMoore) {
300     return false;
301   }
302 
303   if (pattern->hasLatin1Chars()) {
304     return HasFewDifferentCharacters(pattern->latin1Chars(nogc), length);
305   }
306   MOZ_ASSERT(pattern->hasTwoByteChars());
307   return HasFewDifferentCharacters(pattern->twoByteChars(nogc), length);
308 }
309 
310 // Sample character frequency information for use in Boyer-Moore.
SampleCharacters(FlatStringReader * sample_subject,RegExpCompiler & compiler)311 static void SampleCharacters(FlatStringReader* sample_subject,
312                              RegExpCompiler& compiler) {
313   static const int kSampleSize = 128;
314   int chars_sampled = 0;
315 
316   int length = sample_subject->length();
317 
318   int half_way = (length - kSampleSize) / 2;
319   for (int i = std::max(0, half_way); i < length && chars_sampled < kSampleSize;
320        i++, chars_sampled++) {
321     compiler.frequency_collator()->CountCharacter(sample_subject->Get(i));
322   }
323 }
324 
325 enum class AssembleResult {
326   Success,
327   TooLarge,
328   OutOfMemory,
329 };
330 
Assemble(JSContext * cx,RegExpCompiler * compiler,RegExpCompileData * data,MutableHandleRegExpShared re,HandleAtom pattern,Zone * zone,bool useNativeCode,bool isLatin1)331 static MOZ_MUST_USE AssembleResult Assemble(JSContext* cx,
332                                             RegExpCompiler* compiler,
333                                             RegExpCompileData* data,
334                                             MutableHandleRegExpShared re,
335                                             HandleAtom pattern, Zone* zone,
336                                             bool useNativeCode, bool isLatin1) {
337   // Because we create a StackMacroAssembler, this function is not allowed
338   // to GC. If needed, we allocate and throw errors in the caller.
339   Maybe<jit::JitContext> jctx;
340   Maybe<js::jit::StackMacroAssembler> stack_masm;
341   UniquePtr<RegExpMacroAssembler> masm;
342   if (useNativeCode) {
343     NativeRegExpMacroAssembler::Mode mode =
344         isLatin1 ? NativeRegExpMacroAssembler::LATIN1
345                  : NativeRegExpMacroAssembler::UC16;
346     // If we are compiling native code, we need a macroassembler,
347     // which needs a jit context.
348     jctx.emplace(cx, nullptr);
349     stack_masm.emplace();
350     uint32_t num_capture_registers = re->pairCount() * 2;
351     masm = MakeUnique<SMRegExpMacroAssembler>(cx, stack_masm.ref(), zone, mode,
352                                               num_capture_registers);
353   } else {
354     masm = MakeUnique<RegExpBytecodeGenerator>(cx->isolate, zone);
355   }
356   if (!masm) {
357     return AssembleResult::OutOfMemory;
358   }
359 
360   bool isLargePattern =
361       pattern->length() > v8::internal::RegExp::kRegExpTooLargeToOptimize;
362   masm->set_slow_safe(isLargePattern);
363   if (compiler->optimize()) {
364     compiler->set_optimize(!isLargePattern);
365   }
366 
367   // When matching a regexp with known maximum length that is anchored
368   // at the end, we may be able to skip the beginning of long input
369   // strings. This decision is made here because it depends on
370   // information in the AST that isn't replicated in the Node
371   // structure used inside the compiler.
372   bool is_start_anchored = data->tree->IsAnchoredAtStart();
373   bool is_end_anchored = data->tree->IsAnchoredAtEnd();
374   int max_length = data->tree->max_match();
375   static const int kMaxBacksearchLimit = 1024;
376   if (is_end_anchored && !is_start_anchored && !re->sticky() &&
377       max_length < kMaxBacksearchLimit) {
378     masm->SetCurrentPositionFromEnd(max_length);
379   }
380 
381   if (re->global()) {
382     RegExpMacroAssembler::GlobalMode mode = RegExpMacroAssembler::GLOBAL;
383     if (data->tree->min_match() > 0) {
384       mode = RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK;
385     } else if (re->unicode()) {
386       mode = RegExpMacroAssembler::GLOBAL_UNICODE;
387     }
388     masm->set_global_mode(mode);
389   }
390 
391   // The masm tracer works as a thin wrapper around another macroassembler.
392   RegExpMacroAssembler* masm_ptr = masm.get();
393 #ifdef DEBUG
394   UniquePtr<RegExpMacroAssembler> tracer_masm;
395   if (jit::JitOptions.traceRegExpAssembler) {
396     tracer_masm = MakeUnique<RegExpMacroAssemblerTracer>(cx->isolate, masm_ptr);
397     masm_ptr = tracer_masm.get();
398   }
399 #endif
400 
401   // Compile the regexp.
402   V8HandleString wrappedPattern(v8::internal::String(pattern), cx->isolate);
403   RegExpCompiler::CompilationResult result = compiler->Assemble(
404       cx->isolate, masm_ptr, data->node, data->capture_count, wrappedPattern);
405   if (!result.Succeeded()) {
406     MOZ_ASSERT(result.error == RegExpError::kTooLarge);
407     return AssembleResult::TooLarge;
408   }
409   if (result.code->value().isUndefined()) {
410     // SMRegExpMacroAssembler::GetCode returns undefined on OOM.
411     MOZ_ASSERT(useNativeCode);
412     return AssembleResult::OutOfMemory;
413   }
414 
415   re->updateMaxRegisters(result.num_registers);
416   if (useNativeCode) {
417     // Transfer ownership of the tables from the macroassembler to the
418     // RegExpShared.
419     SMRegExpMacroAssembler::TableVector& tables =
420         static_cast<SMRegExpMacroAssembler*>(masm.get())->tables();
421     for (uint32_t i = 0; i < tables.length(); i++) {
422       if (!re->addTable(std::move(tables[i]))) {
423         return AssembleResult::OutOfMemory;
424       }
425     }
426     re->setJitCode(v8::internal::Code::cast(*result.code).inner(), isLatin1);
427   } else {
428     // Transfer ownership of the bytecode from the HandleScope to the
429     // RegExpShared.
430     ByteArray bytecode =
431         v8::internal::ByteArray::cast(*result.code).takeOwnership(cx->isolate);
432     uint32_t length = bytecode->length;
433     re->setByteCode(bytecode.release(), isLatin1);
434     js::AddCellMemory(re, length, MemoryUse::RegExpSharedBytecode);
435   }
436 
437   return AssembleResult::Success;
438 }
439 
CompilePattern(JSContext * cx,MutableHandleRegExpShared re,HandleLinearString input,RegExpShared::CodeKind codeKind)440 bool CompilePattern(JSContext* cx, MutableHandleRegExpShared re,
441                     HandleLinearString input, RegExpShared::CodeKind codeKind) {
442   RootedAtom pattern(cx, re->getSource());
443   JS::RegExpFlags flags = re->getFlags();
444   LifoAllocScope allocScope(&cx->tempLifoAlloc());
445   Zone zone(allocScope.alloc());
446 
447   RegExpCompileData data;
448   {
449     FlatStringReader patternBytes(cx, pattern);
450     if (!RegExpParser::ParseRegExp(cx->isolate, &zone, &patternBytes, flags,
451                                    &data)) {
452       JS::CompileOptions options(cx);
453       DummyTokenStream dummyTokenStream(cx, options);
454       ReportSyntaxError(dummyTokenStream, data, pattern);
455       return false;
456     }
457   }
458 
459   if (re->kind() == RegExpShared::Kind::Unparsed) {
460     // This is the first time we have compiled this regexp.
461     // First, check to see if we should use simple string search
462     // with an atom.
463     if (!flags.ignoreCase() && !flags.sticky()) {
464       RootedAtom searchAtom(cx);
465       if (data.simple) {
466         // The parse-tree is a single atom that is equal to the pattern.
467         searchAtom = re->getSource();
468       } else if (data.tree->IsAtom() && data.capture_count == 0) {
469         // The parse-tree is a single atom that is not equal to the pattern.
470         v8::internal::RegExpAtom* atom = data.tree->AsAtom();
471         const char16_t* twoByteChars = atom->data().begin();
472         searchAtom = AtomizeChars(cx, twoByteChars, atom->length());
473         if (!searchAtom) {
474           return false;
475         }
476       }
477       JS::AutoAssertNoGC nogc(cx);
478       if (searchAtom && !UseBoyerMoore(searchAtom, nogc)) {
479         re->useAtomMatch(searchAtom);
480         return true;
481       }
482     }
483     if (!data.capture_name_map.is_null()) {
484       RootedNativeObject namedCaptures(cx, data.capture_name_map->inner());
485       if (!RegExpShared::initializeNamedCaptures(cx, re, namedCaptures)) {
486         return false;
487       }
488     }
489     // All fallible initialization has succeeded, so we can change state.
490     // Add one to capture_count to account for the whole-match capture.
491     uint32_t pairCount = data.capture_count + 1;
492     re->useRegExpMatch(pairCount);
493   }
494 
495   MOZ_ASSERT(re->kind() == RegExpShared::Kind::RegExp);
496 
497   HandleScope handleScope(cx->isolate);
498   RegExpCompiler compiler(cx->isolate, &zone, data.capture_count,
499                           input->hasLatin1Chars());
500 
501   bool isLatin1 = input->hasLatin1Chars();
502 
503   FlatStringReader sample_subject(cx, input);
504   SampleCharacters(&sample_subject, compiler);
505   data.node = compiler.PreprocessRegExp(&data, flags, isLatin1);
506   data.error = AnalyzeRegExp(cx->isolate, isLatin1, data.node);
507   if (data.error != RegExpError::kNone) {
508     MOZ_ASSERT(data.error == RegExpError::kAnalysisStackOverflow);
509     ReportOverRecursed(cx);
510     return false;
511   }
512 
513   bool useNativeCode = codeKind == RegExpShared::CodeKind::Jitcode;
514   MOZ_ASSERT_IF(useNativeCode, IsNativeRegExpEnabled());
515 
516   switch (Assemble(cx, &compiler, &data, re, pattern, &zone, useNativeCode,
517                    isLatin1)) {
518     case AssembleResult::TooLarge:
519       JS_ReportErrorASCII(cx, "regexp too big");
520       return false;
521     case AssembleResult::OutOfMemory:
522       ReportOutOfMemory(cx);
523       return false;
524     case AssembleResult::Success:
525       break;
526   }
527   return true;
528 }
529 
530 template <typename CharT>
ExecuteRaw(jit::JitCode * code,const CharT * chars,size_t length,size_t startIndex,VectorMatchPairs * matches)531 RegExpRunStatus ExecuteRaw(jit::JitCode* code, const CharT* chars,
532                            size_t length, size_t startIndex,
533                            VectorMatchPairs* matches) {
534   InputOutputData data(chars, chars + length, startIndex, matches);
535 
536   static_assert(RegExpRunStatus_Error ==
537                 v8::internal::RegExp::kInternalRegExpException);
538   static_assert(RegExpRunStatus_Success ==
539                 v8::internal::RegExp::kInternalRegExpSuccess);
540   static_assert(RegExpRunStatus_Success_NotFound ==
541                 v8::internal::RegExp::kInternalRegExpFailure);
542 
543   typedef int (*RegExpCodeSignature)(InputOutputData*);
544   auto function = reinterpret_cast<RegExpCodeSignature>(code->raw());
545   {
546     JS::AutoSuppressGCAnalysis nogc;
547     return (RegExpRunStatus) CALL_GENERATED_1(function, &data);
548   }
549 }
550 
Interpret(JSContext * cx,MutableHandleRegExpShared re,HandleLinearString input,size_t startIndex,VectorMatchPairs * matches)551 RegExpRunStatus Interpret(JSContext* cx, MutableHandleRegExpShared re,
552                           HandleLinearString input, size_t startIndex,
553                           VectorMatchPairs* matches) {
554   HandleScope handleScope(cx->isolate);
555   V8HandleRegExp wrappedRegExp(v8::internal::JSRegExp(re), cx->isolate);
556   V8HandleString wrappedInput(v8::internal::String(input), cx->isolate);
557 
558   static_assert(RegExpRunStatus_Error ==
559                 v8::internal::RegExp::kInternalRegExpException);
560   static_assert(RegExpRunStatus_Success ==
561                 v8::internal::RegExp::kInternalRegExpSuccess);
562   static_assert(RegExpRunStatus_Success_NotFound ==
563                 v8::internal::RegExp::kInternalRegExpFailure);
564 
565   RegExpRunStatus status =
566       (RegExpRunStatus)IrregexpInterpreter::MatchForCallFromRuntime(
567            cx->isolate, wrappedRegExp, wrappedInput, matches->pairsRaw(),
568            matches->pairCount() * 2, startIndex);
569 
570   MOZ_ASSERT(status == RegExpRunStatus_Error ||
571              status == RegExpRunStatus_Success ||
572              status == RegExpRunStatus_Success_NotFound);
573 
574   return status;
575 }
576 
Execute(JSContext * cx,MutableHandleRegExpShared re,HandleLinearString input,size_t startIndex,VectorMatchPairs * matches)577 RegExpRunStatus Execute(JSContext* cx, MutableHandleRegExpShared re,
578                         HandleLinearString input, size_t startIndex,
579                         VectorMatchPairs* matches) {
580   bool latin1 = input->hasLatin1Chars();
581   jit::JitCode* jitCode = re->getJitCode(latin1);
582   bool isCompiled = !!jitCode;
583 
584   if (isCompiled) {
585     JS::AutoCheckCannotGC nogc;
586     if (latin1) {
587       return ExecuteRaw(jitCode, input->latin1Chars(nogc), input->length(),
588                         startIndex, matches);
589     }
590     return ExecuteRaw(jitCode, input->twoByteChars(nogc), input->length(),
591                       startIndex, matches);
592   }
593 
594   return Interpret(cx, re, input, startIndex, matches);
595 }
596 
ExecuteForFuzzing(JSContext * cx,HandleAtom pattern,HandleLinearString input,JS::RegExpFlags flags,size_t startIndex,VectorMatchPairs * matches,RegExpShared::CodeKind codeKind)597 RegExpRunStatus ExecuteForFuzzing(JSContext* cx, HandleAtom pattern,
598                                   HandleLinearString input,
599                                   JS::RegExpFlags flags,
600                                   size_t startIndex,
601                                   VectorMatchPairs* matches,
602                                   RegExpShared::CodeKind codeKind) {
603   RootedRegExpShared re(cx, cx->zone()->regExps().get(cx, pattern, flags));
604   if (!RegExpShared::compileIfNecessary(cx, &re, input, codeKind)) {
605     return RegExpRunStatus_Error;
606   }
607   return RegExpShared::execute(cx, &re, input, startIndex, matches);
608 }
609 
610 }  // namespace irregexp
611 }  // namespace js
612