1 // Copyright 2012 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef V8_REGEXP_REGEXP_H_ 6 #define V8_REGEXP_REGEXP_H_ 7 8 #include "src/objects/js-regexp.h" 9 #include "src/regexp/regexp-error.h" 10 11 namespace v8 { 12 namespace internal { 13 14 class RegExpNode; 15 class RegExpTree; 16 17 enum class RegExpCompilationTarget : int { kBytecode, kNative }; 18 19 // TODO(jgruber): Do not expose in regexp.h. 20 // TODO(jgruber): Consider splitting between ParseData and CompileData. 21 struct RegExpCompileData { 22 // The parsed AST as produced by the RegExpParser. 23 RegExpTree* tree = nullptr; 24 25 // The compiled Node graph as produced by RegExpTree::ToNode methods. 26 RegExpNode* node = nullptr; 27 28 // Either the generated code as produced by the compiler or a trampoline 29 // to the interpreter. 30 Handle<Object> code; 31 32 // True, iff the pattern is a 'simple' atom with zero captures. In other 33 // words, the pattern consists of a string with no metacharacters and special 34 // regexp features, and can be implemented as a standard string search. 35 bool simple = true; 36 37 // True, iff the pattern is anchored at the start of the string with '^'. 38 bool contains_anchor = false; 39 40 // Only use if the pattern contains named captures. If so, this contains a 41 // mapping of capture names to capture indices. 42 Handle<FixedArray> capture_name_map; 43 44 // The error message. Only used if an error occurred during parsing or 45 // compilation. 46 RegExpError error = RegExpError::kNone; 47 48 // The position at which the error was detected. Only used if an 49 // error occurred. 50 int error_pos = 0; 51 52 // The number of capture groups, without the global capture \0. 53 int capture_count = 0; 54 55 // The number of registers used by the generated code. 56 int register_count = 0; 57 58 // The compilation target (bytecode or native code). 59 RegExpCompilationTarget compilation_target; 60 }; 61 62 class RegExp final : public AllStatic { 63 public: 64 // Whether the irregexp engine generates interpreter bytecode. CanGenerateBytecode()65 static bool CanGenerateBytecode() { 66 return FLAG_regexp_interpret_all || FLAG_regexp_tier_up; 67 } 68 69 // Parses the RegExp pattern and prepares the JSRegExp object with 70 // generic data and choice of implementation - as well as what 71 // the implementation wants to store in the data field. 72 // Returns false if compilation fails. 73 V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Compile( 74 Isolate* isolate, Handle<JSRegExp> re, Handle<String> pattern, 75 JSRegExp::Flags flags, uint32_t backtrack_limit); 76 77 // Ensures that a regexp is fully compiled and ready to be executed on a 78 // subject string. Returns true on success. Return false on failure, and 79 // then an exception will be pending. 80 V8_WARN_UNUSED_RESULT static bool EnsureFullyCompiled(Isolate* isolate, 81 Handle<JSRegExp> re, 82 Handle<String> subject); 83 84 enum CallOrigin : int { 85 kFromRuntime = 0, 86 kFromJs = 1, 87 }; 88 89 // See ECMA-262 section 15.10.6.2. 90 // This function calls the garbage collector if necessary. 91 V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Exec( 92 Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject, 93 int index, Handle<RegExpMatchInfo> last_match_info); 94 95 V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle<Object> 96 ExperimentalOneshotExec(Isolate* isolate, Handle<JSRegExp> regexp, 97 Handle<String> subject, int index, 98 Handle<RegExpMatchInfo> last_match_info); 99 100 // Integral return values used throughout regexp code layers. 101 static constexpr int kInternalRegExpFailure = 0; 102 static constexpr int kInternalRegExpSuccess = 1; 103 static constexpr int kInternalRegExpException = -1; 104 static constexpr int kInternalRegExpRetry = -2; 105 static constexpr int kInternalRegExpFallbackToExperimental = -3; 106 static constexpr int kInternalRegExpSmallestResult = -3; 107 108 enum IrregexpResult : int32_t { 109 RE_FAILURE = kInternalRegExpFailure, 110 RE_SUCCESS = kInternalRegExpSuccess, 111 RE_EXCEPTION = kInternalRegExpException, 112 RE_RETRY = kInternalRegExpRetry, 113 RE_FALLBACK_TO_EXPERIMENTAL = kInternalRegExpFallbackToExperimental, 114 }; 115 116 // Set last match info. If match is nullptr, then setting captures is 117 // omitted. 118 static Handle<RegExpMatchInfo> SetLastMatchInfo( 119 Isolate* isolate, Handle<RegExpMatchInfo> last_match_info, 120 Handle<String> subject, int capture_count, int32_t* match); 121 122 V8_EXPORT_PRIVATE static bool CompileForTesting(Isolate* isolate, Zone* zone, 123 RegExpCompileData* input, 124 JSRegExp::Flags flags, 125 Handle<String> pattern, 126 Handle<String> sample_subject, 127 bool is_one_byte); 128 129 V8_EXPORT_PRIVATE static void DotPrintForTesting(const char* label, 130 RegExpNode* node); 131 132 static const int kRegExpTooLargeToOptimize = 20 * KB; 133 134 V8_WARN_UNUSED_RESULT 135 static MaybeHandle<Object> ThrowRegExpException(Isolate* isolate, 136 Handle<JSRegExp> re, 137 Handle<String> pattern, 138 RegExpError error); 139 static void ThrowRegExpException(Isolate* isolate, Handle<JSRegExp> re, 140 RegExpError error_text); 141 142 static bool IsUnmodifiedRegExp(Isolate* isolate, Handle<JSRegExp> regexp); 143 }; 144 145 // Uses a special global mode of irregexp-generated code to perform a global 146 // search and return multiple results at once. As such, this is essentially an 147 // iterator over multiple results (retrieved batch-wise in advance). 148 class RegExpGlobalCache final { 149 public: 150 RegExpGlobalCache(Handle<JSRegExp> regexp, Handle<String> subject, 151 Isolate* isolate); 152 153 ~RegExpGlobalCache(); 154 155 // Fetch the next entry in the cache for global regexp match results. 156 // This does not set the last match info. Upon failure, nullptr is 157 // returned. The cause can be checked with Result(). The previous result is 158 // still in available in memory when a failure happens. 159 int32_t* FetchNext(); 160 161 int32_t* LastSuccessfulMatch(); 162 HasException()163 bool HasException() { return num_matches_ < 0; } 164 165 private: 166 int AdvanceZeroLength(int last_index); 167 168 int num_matches_; 169 int max_matches_; 170 int current_match_index_; 171 int registers_per_match_; 172 // Pointer to the last set of captures. 173 int32_t* register_array_; 174 int register_array_size_; 175 Handle<JSRegExp> regexp_; 176 Handle<String> subject_; 177 Isolate* isolate_; 178 }; 179 180 // Caches results for specific regexp queries on the isolate. At the time of 181 // writing, this is used during global calls to RegExp.prototype.exec and 182 // @@split. 183 class RegExpResultsCache final : public AllStatic { 184 public: 185 enum ResultsCacheType { REGEXP_MULTIPLE_INDICES, STRING_SPLIT_SUBSTRINGS }; 186 187 // Attempt to retrieve a cached result. On failure, 0 is returned as a Smi. 188 // On success, the returned result is guaranteed to be a COW-array. 189 static Object Lookup(Heap* heap, String key_string, Object key_pattern, 190 FixedArray* last_match_out, ResultsCacheType type); 191 // Attempt to add value_array to the cache specified by type. On success, 192 // value_array is turned into a COW-array. 193 static void Enter(Isolate* isolate, Handle<String> key_string, 194 Handle<Object> key_pattern, Handle<FixedArray> value_array, 195 Handle<FixedArray> last_match_cache, ResultsCacheType type); 196 static void Clear(FixedArray cache); 197 198 static constexpr int kRegExpResultsCacheSize = 0x100; 199 200 private: 201 static constexpr int kStringOffset = 0; 202 static constexpr int kPatternOffset = 1; 203 static constexpr int kArrayOffset = 2; 204 static constexpr int kLastMatchOffset = 3; 205 static constexpr int kArrayEntriesPerCacheEntry = 4; 206 }; 207 208 } // namespace internal 209 } // namespace v8 210 211 #endif // V8_REGEXP_REGEXP_H_ 212