1 // Copyright 2012 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_REGEXP_REGEXP_H_ 6 #define V8_REGEXP_REGEXP_H_ 7 8 #include "irregexp/imported/regexp-error.h" 9 #include "irregexp/RegExpShim.h" 10 11 namespace v8 { 12 namespace internal { 13 14 class RegExpNode; 15 class RegExpTree; 16 17 enum class RegExpCompilationTarget : int { kBytecode, kNative }; 18 19 // TODO(jgruber): Do not expose in regexp.h. 20 // TODO(jgruber): Consider splitting between ParseData and CompileData. 21 struct RegExpCompileData { 22 // The parsed AST as produced by the RegExpParser. 23 RegExpTree* tree = nullptr; 24 25 // The compiled Node graph as produced by RegExpTree::ToNode methods. 26 RegExpNode* node = nullptr; 27 28 // Either the generated code as produced by the compiler or a trampoline 29 // to the interpreter. 30 Handle<Object> code; 31 32 // True, iff the pattern is a 'simple' atom with zero captures. In other 33 // words, the pattern consists of a string with no metacharacters and special 34 // regexp features, and can be implemented as a standard string search. 35 bool simple = true; 36 37 // True, iff the pattern is anchored at the start of the string with '^'. 38 bool contains_anchor = false; 39 40 // Only use if the pattern contains named captures. If so, this contains a 41 // mapping of capture names to capture indices. 42 Handle<FixedArray> capture_name_map; 43 44 // The error message. Only used if an error occurred during parsing or 45 // compilation. 46 RegExpError error = RegExpError::kNone; 47 48 // The position at which the error was detected. Only used if an 49 // error occurred. 50 int error_pos = 0; 51 52 // The number of capture groups, without the global capture \0. 53 int capture_count = 0; 54 55 // The number of registers used by the generated code. 56 int register_count = 0; 57 58 // The compilation target (bytecode or native code). 59 RegExpCompilationTarget compilation_target; 60 }; 61 62 class RegExp final : public AllStatic { 63 public: 64 // Whether the irregexp engine generates interpreter bytecode. CanGenerateBytecode()65 static bool CanGenerateBytecode() { 66 return FLAG_regexp_interpret_all || FLAG_regexp_tier_up; 67 } 68 69 // Parses the RegExp pattern and prepares the JSRegExp object with 70 // generic data and choice of implementation - as well as what 71 // the implementation wants to store in the data field. 72 // Returns false if compilation fails. 73 V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Compile( 74 Isolate* isolate, Handle<JSRegExp> re, Handle<String> pattern, 75 JSRegExp::Flags flags, uint32_t backtrack_limit); 76 77 // Ensures that a regexp is fully compiled and ready to be executed on a 78 // subject string. Returns true on success. Return false on failure, and 79 // then an exception will be pending. 80 V8_WARN_UNUSED_RESULT static bool EnsureFullyCompiled(Isolate* isolate, 81 Handle<JSRegExp> re, 82 Handle<String> subject); 83 84 enum CallOrigin : int { 85 kFromRuntime = 0, 86 kFromJs = 1, 87 }; 88 89 enum class ExecQuirks { 90 kNone, 91 // Used to work around an issue in the RegExpPrototypeSplit fast path, 92 // which diverges from the spec by not creating a sticky copy of the RegExp 93 // instance and calling `exec` in a loop. If called in this context, we 94 // must not update the last_match_info on a successful match at the subject 95 // string end. See crbug.com/1075514 for more information. 96 kTreatMatchAtEndAsFailure, 97 }; 98 99 // See ECMA-262 section 15.10.6.2. 100 // This function calls the garbage collector if necessary. 101 V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Exec( 102 Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, 103 int index, Handle<RegExpMatchInfo> last_match_info, 104 ExecQuirks exec_quirks = ExecQuirks::kNone); 105 106 V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle<Object> 107 ExperimentalOneshotExec(Isolate* isolate, Handle<JSRegExp> regexp, 108 Handle<String> subject, int index, 109 Handle<RegExpMatchInfo> last_match_info, 110 ExecQuirks exec_quirks = ExecQuirks::kNone); 111 112 // Integral return values used throughout regexp code layers. 113 static constexpr int kInternalRegExpFailure = 0; 114 static constexpr int kInternalRegExpSuccess = 1; 115 static constexpr int kInternalRegExpException = -1; 116 static constexpr int kInternalRegExpRetry = -2; 117 static constexpr int kInternalRegExpFallbackToExperimental = -3; 118 static constexpr int kInternalRegExpSmallestResult = -3; 119 120 enum IrregexpResult : int32_t { 121 RE_FAILURE = kInternalRegExpFailure, 122 RE_SUCCESS = kInternalRegExpSuccess, 123 RE_EXCEPTION = kInternalRegExpException, 124 RE_RETRY = kInternalRegExpRetry, 125 RE_FALLBACK_TO_EXPERIMENTAL = kInternalRegExpFallbackToExperimental, 126 }; 127 128 // Set last match info. If match is nullptr, then setting captures is 129 // omitted. 130 static Handle<RegExpMatchInfo> SetLastMatchInfo( 131 Isolate* isolate, Handle<RegExpMatchInfo> last_match_info, 132 Handle<String> subject, int capture_count, int32_t* match); 133 134 V8_EXPORT_PRIVATE static bool CompileForTesting(Isolate* isolate, Zone* zone, 135 RegExpCompileData* input, 136 JSRegExp::Flags flags, 137 Handle<String> pattern, 138 Handle<String> sample_subject, 139 bool is_one_byte); 140 141 V8_EXPORT_PRIVATE static void DotPrintForTesting(const char* label, 142 RegExpNode* node); 143 144 static const int kRegExpTooLargeToOptimize = 20 * KB; 145 146 V8_WARN_UNUSED_RESULT 147 static MaybeHandle<Object> ThrowRegExpException(Isolate* isolate, 148 Handle<JSRegExp> re, 149 Handle<String> pattern, 150 RegExpError error); 151 static void ThrowRegExpException(Isolate* isolate, Handle<JSRegExp> re, 152 RegExpError error_text); 153 154 static bool IsUnmodifiedRegExp(Isolate* isolate, Handle<JSRegExp> regexp); 155 }; 156 157 // Uses a special global mode of irregexp-generated code to perform a global 158 // search and return multiple results at once. As such, this is essentially an 159 // iterator over multiple results (retrieved batch-wise in advance). 160 class RegExpGlobalCache final { 161 public: 162 RegExpGlobalCache(Handle<JSRegExp> regexp, Handle<String> subject, 163 Isolate* isolate); 164 165 ~RegExpGlobalCache(); 166 167 // Fetch the next entry in the cache for global regexp match results. 168 // This does not set the last match info. Upon failure, nullptr is 169 // returned. The cause can be checked with Result(). The previous result is 170 // still in available in memory when a failure happens. 171 int32_t* FetchNext(); 172 173 int32_t* LastSuccessfulMatch(); 174 HasException()175 bool HasException() { return num_matches_ < 0; } 176 177 private: 178 int AdvanceZeroLength(int last_index); 179 180 int num_matches_; 181 int max_matches_; 182 int current_match_index_; 183 int registers_per_match_; 184 // Pointer to the last set of captures. 185 int32_t* register_array_; 186 int register_array_size_; 187 Handle<JSRegExp> regexp_; 188 Handle<String> subject_; 189 Isolate* isolate_; 190 }; 191 192 // Caches results for specific regexp queries on the isolate. At the time of 193 // writing, this is used during global calls to RegExp.prototype.exec and 194 // @@split. 195 class RegExpResultsCache final : public AllStatic { 196 public: 197 enum ResultsCacheType { REGEXP_MULTIPLE_INDICES, STRING_SPLIT_SUBSTRINGS }; 198 199 // Attempt to retrieve a cached result. On failure, 0 is returned as a Smi. 200 // On success, the returned result is guaranteed to be a COW-array. 201 static Object Lookup(Heap* heap, String key_string, Object key_pattern, 202 FixedArray* last_match_out, ResultsCacheType type); 203 // Attempt to add value_array to the cache specified by type. On success, 204 // value_array is turned into a COW-array. 205 static void Enter(Isolate* isolate, Handle<String> key_string, 206 Handle<Object> key_pattern, Handle<FixedArray> value_array, 207 Handle<FixedArray> last_match_cache, ResultsCacheType type); 208 static void Clear(FixedArray cache); 209 210 static constexpr int kRegExpResultsCacheSize = 0x100; 211 212 private: 213 static constexpr int kStringOffset = 0; 214 static constexpr int kPatternOffset = 1; 215 static constexpr int kArrayOffset = 2; 216 static constexpr int kLastMatchOffset = 3; 217 static constexpr int kArrayEntriesPerCacheEntry = 4; 218 }; 219 220 } // namespace internal 221 } // namespace v8 222 223 #endif // V8_REGEXP_REGEXP_H_ 224