1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- 2 * vim: set ts=8 sts=4 et sw=4 tw=99: */ 3 4 // Copyright 2012 the V8 project authors. All rights reserved. 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following 13 // disclaimer in the documentation and/or other materials provided 14 // with the distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived 17 // from this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 #ifndef V8_REGEXP_AST_H_ 32 #define V8_REGEXP_AST_H_ 33 34 // Prevent msvc build failures as indicated in bug 1205328 35 #ifdef min 36 # undef min 37 #endif 38 #ifdef max 39 # undef max 40 #endif 41 42 #include "irregexp/RegExpEngine.h" 43 44 namespace js { 45 namespace irregexp { 46 47 class RegExpCompiler; 48 class RegExpNode; 49 50 class RegExpVisitor 51 { 52 public: ~RegExpVisitor()53 virtual ~RegExpVisitor() { } 54 #define MAKE_CASE(Name) \ 55 virtual void* Visit##Name(RegExp##Name*, void* data) = 0; 56 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE) 57 #undef MAKE_CASE 58 }; 59 60 class RegExpTree 61 { 62 public: 63 static const int kInfinity = INT32_MAX; ~RegExpTree()64 virtual ~RegExpTree() {} 65 virtual RegExpNode* ToNode(RegExpCompiler* compiler, 66 RegExpNode* on_success) = 0; IsTextElement()67 virtual bool IsTextElement() { return false; } IsAnchoredAtStart()68 virtual bool IsAnchoredAtStart() { return false; } IsAnchoredAtEnd()69 virtual bool IsAnchoredAtEnd() { return false; } 70 virtual int min_match() = 0; 71 virtual int max_match() = 0; 72 // Returns the interval of registers used for captures within this 73 // expression. CaptureRegisters()74 virtual Interval CaptureRegisters() { return Interval::Empty(); } AppendToText(RegExpText * text)75 virtual void AppendToText(RegExpText* text) { 76 MOZ_CRASH("Bad call"); 77 } 78 #define MAKE_ASTYPE(Name) \ 79 virtual RegExp##Name* As##Name(); \ 80 virtual bool Is##Name(); 81 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE) 82 #undef MAKE_ASTYPE 83 }; 84 85 typedef Vector<RegExpTree*, 1, LifoAllocPolicy<Infallible> > RegExpTreeVector; 86 87 class RegExpDisjunction : public RegExpTree 88 { 89 public: 90 explicit RegExpDisjunction(RegExpTreeVector* alternatives); 91 virtual void* Accept(RegExpVisitor* visitor, void* data); 92 virtual RegExpNode* ToNode(RegExpCompiler* compiler, 93 RegExpNode* on_success); 94 virtual RegExpDisjunction* AsDisjunction(); 95 virtual Interval CaptureRegisters(); 96 virtual bool IsDisjunction(); 97 virtual bool IsAnchoredAtStart(); 98 virtual bool IsAnchoredAtEnd(); min_match()99 virtual int min_match() { return min_match_; } max_match()100 virtual int max_match() { return max_match_; } 101 alternatives()102 const RegExpTreeVector& alternatives() { return *alternatives_; } 103 104 private: 105 RegExpTreeVector* alternatives_; 106 int min_match_; 107 int max_match_; 108 }; 109 110 class RegExpAlternative : public RegExpTree 111 { 112 public: 113 explicit RegExpAlternative(RegExpTreeVector* nodes); 114 virtual void* Accept(RegExpVisitor* visitor, void* data); 115 virtual RegExpNode* ToNode(RegExpCompiler* compiler, 116 RegExpNode* on_success); 117 virtual RegExpAlternative* AsAlternative(); 118 virtual Interval CaptureRegisters(); 119 virtual bool IsAlternative(); 120 virtual bool IsAnchoredAtStart(); 121 virtual bool IsAnchoredAtEnd(); min_match()122 virtual int min_match() { return min_match_; } max_match()123 virtual int max_match() { return max_match_; } 124 nodes()125 const RegExpTreeVector& nodes() { return *nodes_; } 126 127 private: 128 RegExpTreeVector* nodes_; 129 int min_match_; 130 int max_match_; 131 }; 132 133 class RegExpAssertion : public RegExpTree { 134 public: 135 enum AssertionType { 136 START_OF_LINE, 137 START_OF_INPUT, 138 END_OF_LINE, 139 END_OF_INPUT, 140 BOUNDARY, 141 NON_BOUNDARY 142 }; RegExpAssertion(AssertionType type)143 explicit RegExpAssertion(AssertionType type) : assertion_type_(type) { } 144 virtual void* Accept(RegExpVisitor* visitor, void* data); 145 virtual RegExpNode* ToNode(RegExpCompiler* compiler, 146 RegExpNode* on_success); 147 virtual RegExpAssertion* AsAssertion(); 148 virtual bool IsAssertion(); 149 virtual bool IsAnchoredAtStart(); 150 virtual bool IsAnchoredAtEnd(); min_match()151 virtual int min_match() { return 0; } max_match()152 virtual int max_match() { return 0; } assertion_type()153 AssertionType assertion_type() { return assertion_type_; } 154 private: 155 AssertionType assertion_type_; 156 }; 157 158 class CharacterSet 159 { 160 public: CharacterSet(char16_t standard_set_type)161 explicit CharacterSet(char16_t standard_set_type) 162 : ranges_(nullptr), 163 standard_set_type_(standard_set_type) 164 {} CharacterSet(CharacterRangeVector * ranges)165 explicit CharacterSet(CharacterRangeVector* ranges) 166 : ranges_(ranges), 167 standard_set_type_(0) 168 {} 169 170 CharacterRangeVector& ranges(LifoAlloc* alloc); standard_set_type()171 char16_t standard_set_type() { return standard_set_type_; } set_standard_set_type(char16_t special_set_type)172 void set_standard_set_type(char16_t special_set_type) { 173 standard_set_type_ = special_set_type; 174 } is_standard()175 bool is_standard() { return standard_set_type_ != 0; } 176 void Canonicalize(); 177 178 private: 179 CharacterRangeVector* ranges_; 180 181 // If non-zero, the value represents a standard set (e.g., all whitespace 182 // characters) without having to expand the ranges. 183 char16_t standard_set_type_; 184 }; 185 186 class RegExpCharacterClass : public RegExpTree 187 { 188 public: RegExpCharacterClass(CharacterRangeVector * ranges,bool is_negated)189 RegExpCharacterClass(CharacterRangeVector* ranges, bool is_negated) 190 : set_(ranges), 191 is_negated_(is_negated) 192 {} 193 RegExpCharacterClass(char16_t type)194 explicit RegExpCharacterClass(char16_t type) 195 : set_(type), 196 is_negated_(false) 197 {} 198 199 virtual void* Accept(RegExpVisitor* visitor, void* data); 200 virtual RegExpNode* ToNode(RegExpCompiler* compiler, 201 RegExpNode* on_success); 202 virtual RegExpCharacterClass* AsCharacterClass(); 203 virtual bool IsCharacterClass(); IsTextElement()204 virtual bool IsTextElement() { return true; } min_match()205 virtual int min_match() { return 1; } max_match()206 virtual int max_match() { return 1; } 207 virtual void AppendToText(RegExpText* text); 208 character_set()209 CharacterSet character_set() { return set_; } 210 211 // TODO(lrn): Remove need for complex version if is_standard that 212 // recognizes a mangled standard set and just do { return set_.is_special(); } 213 bool is_standard(LifoAlloc* alloc); 214 215 // Returns a value representing the standard character set if is_standard() 216 // returns true. 217 // Currently used values are: 218 // s : unicode whitespace 219 // S : unicode non-whitespace 220 // w : ASCII word character (digit, letter, underscore) 221 // W : non-ASCII word character 222 // d : ASCII digit 223 // D : non-ASCII digit 224 // . : non-unicode non-newline 225 // * : All characters standard_type()226 char16_t standard_type() { return set_.standard_set_type(); } 227 ranges(LifoAlloc * alloc)228 CharacterRangeVector& ranges(LifoAlloc* alloc) { return set_.ranges(alloc); } is_negated()229 bool is_negated() { return is_negated_; } 230 231 private: 232 CharacterSet set_; 233 bool is_negated_; 234 }; 235 236 typedef Vector<char16_t, 10, LifoAllocPolicy<Infallible> > CharacterVector; 237 238 class RegExpAtom : public RegExpTree 239 { 240 public: RegExpAtom(CharacterVector * data)241 explicit RegExpAtom(CharacterVector* data) 242 : data_(data) 243 {} 244 245 virtual void* Accept(RegExpVisitor* visitor, void* data); 246 virtual RegExpNode* ToNode(RegExpCompiler* compiler, 247 RegExpNode* on_success); 248 virtual RegExpAtom* AsAtom(); 249 virtual bool IsAtom(); IsTextElement()250 virtual bool IsTextElement() { return true; } min_match()251 virtual int min_match() { return data_->length(); } max_match()252 virtual int max_match() { return data_->length(); } 253 virtual void AppendToText(RegExpText* text); 254 data()255 const CharacterVector& data() { return *data_; } length()256 int length() { return data_->length(); } 257 258 private: 259 CharacterVector* data_; 260 }; 261 262 class RegExpText : public RegExpTree 263 { 264 public: RegExpText(LifoAlloc * alloc)265 explicit RegExpText(LifoAlloc* alloc) 266 : elements_(*alloc), length_(0) 267 {} 268 269 virtual void* Accept(RegExpVisitor* visitor, void* data); 270 virtual RegExpNode* ToNode(RegExpCompiler* compiler, 271 RegExpNode* on_success); 272 virtual RegExpText* AsText(); 273 virtual bool IsText(); IsTextElement()274 virtual bool IsTextElement() { return true; } min_match()275 virtual int min_match() { return length_; } max_match()276 virtual int max_match() { return length_; } 277 virtual void AppendToText(RegExpText* text); 278 AddElement(TextElement elm)279 void AddElement(TextElement elm) { 280 elements_.append(elm); 281 length_ += elm.length(); 282 } elements()283 const TextElementVector& elements() { return elements_; } 284 285 private: 286 TextElementVector elements_; 287 int length_; 288 }; 289 290 class RegExpQuantifier : public RegExpTree 291 { 292 public: 293 enum QuantifierType { GREEDY, NON_GREEDY, POSSESSIVE }; RegExpQuantifier(int min,int max,QuantifierType type,RegExpTree * body)294 RegExpQuantifier(int min, int max, QuantifierType type, RegExpTree* body) 295 : body_(body), 296 min_(min), 297 max_(max), 298 min_match_(min * body->min_match()), 299 quantifier_type_(type) 300 { 301 if (max > 0 && body->max_match() > kInfinity / max) { 302 max_match_ = kInfinity; 303 } else { 304 max_match_ = max * body->max_match(); 305 } 306 } 307 308 virtual void* Accept(RegExpVisitor* visitor, void* data); 309 virtual RegExpNode* ToNode(RegExpCompiler* compiler, 310 RegExpNode* on_success); 311 static RegExpNode* ToNode(int min, 312 int max, 313 bool is_greedy, 314 RegExpTree* body, 315 RegExpCompiler* compiler, 316 RegExpNode* on_success, 317 bool not_at_start = false); 318 virtual RegExpQuantifier* AsQuantifier(); 319 virtual Interval CaptureRegisters(); 320 virtual bool IsQuantifier(); min_match()321 virtual int min_match() { return min_match_; } max_match()322 virtual int max_match() { return max_match_; } min()323 int min() { return min_; } max()324 int max() { return max_; } is_possessive()325 bool is_possessive() { return quantifier_type_ == POSSESSIVE; } is_non_greedy()326 bool is_non_greedy() { return quantifier_type_ == NON_GREEDY; } is_greedy()327 bool is_greedy() { return quantifier_type_ == GREEDY; } body()328 RegExpTree* body() { return body_; } 329 330 private: 331 RegExpTree* body_; 332 int min_; 333 int max_; 334 int min_match_; 335 int max_match_; 336 QuantifierType quantifier_type_; 337 }; 338 339 class RegExpCapture : public RegExpTree 340 { 341 public: RegExpCapture(RegExpTree * body,int index)342 explicit RegExpCapture(RegExpTree* body, int index) 343 : body_(body), index_(index) 344 {} 345 346 virtual void* Accept(RegExpVisitor* visitor, void* data); 347 virtual RegExpNode* ToNode(RegExpCompiler* compiler, 348 RegExpNode* on_success); 349 static RegExpNode* ToNode(RegExpTree* body, 350 int index, 351 RegExpCompiler* compiler, 352 RegExpNode* on_success); 353 virtual RegExpCapture* AsCapture(); 354 virtual bool IsAnchoredAtStart(); 355 virtual bool IsAnchoredAtEnd(); 356 virtual Interval CaptureRegisters(); 357 virtual bool IsCapture(); min_match()358 virtual int min_match() { return body_->min_match(); } max_match()359 virtual int max_match() { return body_->max_match(); } body()360 RegExpTree* body() { return body_; } index()361 int index() { return index_; } StartRegister(int index)362 static int StartRegister(int index) { return index * 2; } EndRegister(int index)363 static int EndRegister(int index) { return index * 2 + 1; } 364 365 private: 366 RegExpTree* body_; 367 int index_; 368 }; 369 370 class RegExpLookahead : public RegExpTree 371 { 372 public: RegExpLookahead(RegExpTree * body,bool is_positive,int capture_count,int capture_from)373 RegExpLookahead(RegExpTree* body, 374 bool is_positive, 375 int capture_count, 376 int capture_from) 377 : body_(body), 378 is_positive_(is_positive), 379 capture_count_(capture_count), 380 capture_from_(capture_from) 381 {} 382 383 virtual void* Accept(RegExpVisitor* visitor, void* data); 384 virtual RegExpNode* ToNode(RegExpCompiler* compiler, 385 RegExpNode* on_success); 386 virtual RegExpLookahead* AsLookahead(); 387 virtual Interval CaptureRegisters(); 388 virtual bool IsLookahead(); 389 virtual bool IsAnchoredAtStart(); min_match()390 virtual int min_match() { return 0; } max_match()391 virtual int max_match() { return 0; } body()392 RegExpTree* body() { return body_; } is_positive()393 bool is_positive() { return is_positive_; } capture_count()394 int capture_count() { return capture_count_; } capture_from()395 int capture_from() { return capture_from_; } 396 397 private: 398 RegExpTree* body_; 399 bool is_positive_; 400 int capture_count_; 401 int capture_from_; 402 }; 403 404 typedef Vector<RegExpCapture*, 1, LifoAllocPolicy<Infallible> > RegExpCaptureVector; 405 406 class RegExpBackReference : public RegExpTree 407 { 408 public: RegExpBackReference(RegExpCapture * capture)409 explicit RegExpBackReference(RegExpCapture* capture) 410 : capture_(capture) 411 {} 412 413 virtual void* Accept(RegExpVisitor* visitor, void* data); 414 virtual RegExpNode* ToNode(RegExpCompiler* compiler, 415 RegExpNode* on_success); 416 virtual RegExpBackReference* AsBackReference(); 417 virtual bool IsBackReference(); min_match()418 virtual int min_match() { return 0; } max_match()419 virtual int max_match() { return capture_->max_match(); } index()420 int index() { return capture_->index(); } capture()421 RegExpCapture* capture() { return capture_; } 422 private: 423 RegExpCapture* capture_; 424 }; 425 426 class RegExpEmpty : public RegExpTree 427 { 428 public: RegExpEmpty()429 RegExpEmpty() 430 {} 431 432 virtual void* Accept(RegExpVisitor* visitor, void* data); 433 virtual RegExpNode* ToNode(RegExpCompiler* compiler, 434 RegExpNode* on_success); 435 virtual RegExpEmpty* AsEmpty(); 436 virtual bool IsEmpty(); min_match()437 virtual int min_match() { return 0; } max_match()438 virtual int max_match() { return 0; } GetInstance()439 static RegExpEmpty* GetInstance() { 440 static RegExpEmpty instance; 441 return &instance; 442 } 443 }; 444 445 } } // namespace js::irregexp 446 447 #endif // V8_REGEXP_AST_H_ 448