1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 * vim: set ts=8 sts=4 et sw=4 tw=99: */
3
4 // Copyright 2012 the V8 project authors. All rights reserved.
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following
13 // disclaimer in the documentation and/or other materials provided
14 // with the distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived
17 // from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 #include "irregexp/RegExpParser.h"
32
33 #include "mozilla/ArrayUtils.h"
34 #include "mozilla/Move.h"
35
36 #include "frontend/TokenStream.h"
37 #include "gc/GC.h"
38 #include "irregexp/RegExpCharacters.h"
39 #include "util/StringBuffer.h"
40 #include "vm/ErrorReporting.h"
41
42 using namespace js;
43 using namespace js::irregexp;
44
45 using mozilla::Move;
46 using mozilla::PointerRangeSize;
47
48 // ----------------------------------------------------------------------------
49 // RegExpBuilder
50
RegExpBuilder(LifoAlloc * alloc)51 RegExpBuilder::RegExpBuilder(LifoAlloc* alloc)
52 : alloc(alloc),
53 pending_empty_(false),
54 characters_(nullptr)
55 #ifdef DEBUG
56 , last_added_(ADD_NONE)
57 #endif
58 {}
59
60 void
FlushCharacters()61 RegExpBuilder::FlushCharacters()
62 {
63 pending_empty_ = false;
64 if (characters_ != nullptr) {
65 RegExpTree* atom = alloc->newInfallible<RegExpAtom>(characters_);
66 characters_ = nullptr;
67 text_.Add(alloc, atom);
68 #ifdef DEBUG
69 last_added_ = ADD_ATOM;
70 #endif
71 }
72 }
73
74 void
FlushText()75 RegExpBuilder::FlushText()
76 {
77 FlushCharacters();
78 int num_text = text_.length();
79 if (num_text == 0)
80 return;
81 if (num_text == 1) {
82 terms_.Add(alloc, text_.last());
83 } else {
84 RegExpText* text = alloc->newInfallible<RegExpText>(alloc);
85 for (int i = 0; i < num_text; i++)
86 text_.Get(i)->AppendToText(text);
87 terms_.Add(alloc, text);
88 }
89 text_.Clear();
90 }
91
92 void
AddCharacter(char16_t c)93 RegExpBuilder::AddCharacter(char16_t c)
94 {
95 pending_empty_ = false;
96 if (characters_ == nullptr)
97 characters_ = alloc->newInfallible<CharacterVector>(*alloc);
98 characters_->append(c);
99 #ifdef DEBUG
100 last_added_ = ADD_CHAR;
101 #endif
102 }
103
104 void
AddEmpty()105 RegExpBuilder::AddEmpty()
106 {
107 pending_empty_ = true;
108 }
109
110 void
AddAtom(RegExpTree * term)111 RegExpBuilder::AddAtom(RegExpTree* term)
112 {
113 if (term->IsEmpty()) {
114 AddEmpty();
115 return;
116 }
117 if (term->IsTextElement()) {
118 FlushCharacters();
119 text_.Add(alloc, term);
120 } else {
121 FlushText();
122 terms_.Add(alloc, term);
123 }
124 #ifdef DEBUG
125 last_added_ = ADD_ATOM;
126 #endif
127 }
128
129 void
AddAssertion(RegExpTree * assert)130 RegExpBuilder::AddAssertion(RegExpTree* assert)
131 {
132 FlushText();
133 if (terms_.length() > 0 && terms_.last()->IsAssertion()) {
134 // Omit repeated assertions of the same type.
135 RegExpAssertion* last = terms_.last()->AsAssertion();
136 RegExpAssertion* next = assert->AsAssertion();
137 if (last->assertion_type() == next->assertion_type()) return;
138 }
139 terms_.Add(alloc, assert);
140 #ifdef DEBUG
141 last_added_ = ADD_ASSERT;
142 #endif
143 }
144
145 void
NewAlternative()146 RegExpBuilder::NewAlternative()
147 {
148 FlushTerms();
149 }
150
151 void
FlushTerms()152 RegExpBuilder::FlushTerms()
153 {
154 FlushText();
155 int num_terms = terms_.length();
156 RegExpTree* alternative;
157 if (num_terms == 0)
158 alternative = RegExpEmpty::GetInstance();
159 else if (num_terms == 1)
160 alternative = terms_.last();
161 else
162 alternative = alloc->newInfallible<RegExpAlternative>(terms_.GetList(alloc));
163 alternatives_.Add(alloc, alternative);
164 terms_.Clear();
165 #ifdef DEBUG
166 last_added_ = ADD_NONE;
167 #endif
168 }
169
170 RegExpTree*
ToRegExp()171 RegExpBuilder::ToRegExp()
172 {
173 FlushTerms();
174 int num_alternatives = alternatives_.length();
175 if (num_alternatives == 0) {
176 return RegExpEmpty::GetInstance();
177 }
178 if (num_alternatives == 1) {
179 return alternatives_.last();
180 }
181 return alloc->newInfallible<RegExpDisjunction>(alternatives_.GetList(alloc));
182 }
183
184 void
AddQuantifierToAtom(int min,int max,RegExpQuantifier::QuantifierType quantifier_type)185 RegExpBuilder::AddQuantifierToAtom(int min, int max,
186 RegExpQuantifier::QuantifierType quantifier_type)
187 {
188 if (pending_empty_) {
189 pending_empty_ = false;
190 return;
191 }
192 RegExpTree* atom;
193 if (characters_ != nullptr) {
194 MOZ_ASSERT(last_added_ == ADD_CHAR);
195 // Last atom was character.
196 CharacterVector* char_vector = characters_;
197 int num_chars = char_vector->length();
198 if (num_chars > 1) {
199 CharacterVector* prefix = alloc->newInfallible<CharacterVector>(*alloc);
200 prefix->append(char_vector->begin(), num_chars - 1);
201 text_.Add(alloc, alloc->newInfallible<RegExpAtom>(prefix));
202 char_vector = alloc->newInfallible<CharacterVector>(*alloc);
203 char_vector->append((*characters_)[num_chars - 1]);
204 }
205 characters_ = nullptr;
206 atom = alloc->newInfallible<RegExpAtom>(char_vector);
207 FlushText();
208 } else if (text_.length() > 0) {
209 MOZ_ASSERT(last_added_ == ADD_ATOM);
210 atom = text_.RemoveLast();
211 FlushText();
212 } else if (terms_.length() > 0) {
213 MOZ_ASSERT(last_added_ == ADD_ATOM);
214 atom = terms_.RemoveLast();
215 if (atom->max_match() == 0) {
216 // Guaranteed to only match an empty string.
217 #ifdef DEBUG
218 last_added_ = ADD_TERM;
219 #endif
220 if (min == 0)
221 return;
222 terms_.Add(alloc, atom);
223 return;
224 }
225 } else {
226 // Only call immediately after adding an atom or character!
227 MOZ_CRASH("Bad call");
228 }
229 terms_.Add(alloc, alloc->newInfallible<RegExpQuantifier>(min, max, quantifier_type, atom));
230 #ifdef DEBUG
231 last_added_ = ADD_TERM;
232 #endif
233 }
234
235 // ----------------------------------------------------------------------------
236 // RegExpParser
237
238 template <typename CharT>
RegExpParser(frontend::TokenStreamAnyChars & ts,LifoAlloc * alloc,const CharT * chars,const CharT * end,bool multiline_mode,bool unicode,bool ignore_case)239 RegExpParser<CharT>::RegExpParser(frontend::TokenStreamAnyChars& ts, LifoAlloc* alloc,
240 const CharT* chars, const CharT* end, bool multiline_mode,
241 bool unicode, bool ignore_case)
242 : ts(ts),
243 alloc(alloc),
244 captures_(nullptr),
245 start_(chars),
246 next_pos_(start_),
247 end_(end),
248 current_(kEndMarker),
249 capture_count_(0),
250 has_more_(true),
251 multiline_(multiline_mode),
252 unicode_(unicode),
253 ignore_case_(ignore_case),
254 simple_(false),
255 contains_anchor_(false),
256 is_scanned_for_captures_(false)
257 {
258 Advance();
259 }
260
261 template <typename CharT>
262 void
SyntaxError(unsigned errorNumber,...)263 RegExpParser<CharT>::SyntaxError(unsigned errorNumber, ...)
264 {
265 ErrorMetadata err;
266
267 ts.fillExcludingContext(&err, ts.currentToken().pos.begin);
268
269 // For most error reporting, the line of context derives from the token
270 // stream. So when location information doesn't come from the token
271 // stream, we can't give a line of context. But here the "line of context"
272 // can be (and is) derived from the pattern text, so we can provide it no
273 // matter if the location is derived from the caller.
274 size_t offset = PointerRangeSize(start_, next_pos_ - 1);
275 size_t end = PointerRangeSize(start_, end_);
276
277 const CharT* windowStart = (offset > ErrorMetadata::lineOfContextRadius)
278 ? start_ + (offset - ErrorMetadata::lineOfContextRadius)
279 : start_;
280
281 const CharT* windowEnd = (end - offset > ErrorMetadata::lineOfContextRadius)
282 ? start_ + offset + ErrorMetadata::lineOfContextRadius
283 : end_;
284
285 size_t windowLength = PointerRangeSize(windowStart, windowEnd);
286 MOZ_ASSERT(windowLength <= ErrorMetadata::lineOfContextRadius * 2);
287
288 // Create the windowed string, not including the potential line
289 // terminator.
290 StringBuffer windowBuf(ts.context());
291 if (!windowBuf.append(windowStart, windowEnd))
292 return;
293
294 // The line of context must be null-terminated, and StringBuffer doesn't
295 // make that happen unless we force it to.
296 if (!windowBuf.append('\0'))
297 return;
298
299 err.lineOfContext.reset(windowBuf.stealChars());
300 if (!err.lineOfContext)
301 return;
302
303 err.lineLength = windowLength;
304 err.tokenOffset = offset - (windowStart - start_);
305
306 va_list args;
307 va_start(args, errorNumber);
308
309 ReportCompileError(ts.context(), Move(err), nullptr, JSREPORT_ERROR, errorNumber, args);
310
311 va_end(args);
312 }
313
314 template <typename CharT>
315 RegExpTree*
ReportError(unsigned errorNumber,const char * param)316 RegExpParser<CharT>::ReportError(unsigned errorNumber, const char* param /* = nullptr */)
317 {
318 gc::AutoSuppressGC suppressGC(ts.context());
319 SyntaxError(errorNumber, param);
320 return nullptr;
321 }
322
323 template <typename CharT>
324 void
Advance()325 RegExpParser<CharT>::Advance()
326 {
327 if (next_pos_ < end_) {
328 current_ = *next_pos_;
329 next_pos_++;
330 } else {
331 current_ = kEndMarker;
332 next_pos_ = end_ + 1;
333 has_more_ = false;
334 }
335 }
336
337 // Returns the value (0 .. 15) of a hexadecimal character c.
338 // If c is not a legal hexadecimal character, returns a value < 0.
339 inline int
HexValue(uint32_t c)340 HexValue(uint32_t c)
341 {
342 c -= '0';
343 if (static_cast<unsigned>(c) <= 9) return c;
344 c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36.
345 if (static_cast<unsigned>(c) <= 5) return c + 10;
346 return -1;
347 }
348
349 template <typename CharT>
350 widechar
ParseOctalLiteral()351 RegExpParser<CharT>::ParseOctalLiteral()
352 {
353 MOZ_ASSERT('0' <= current() && current() <= '7');
354 // For compatibility with some other browsers (not all), we parse
355 // up to three octal digits with a value below 256.
356 widechar value = current() - '0';
357 Advance();
358 if ('0' <= current() && current() <= '7') {
359 value = value * 8 + current() - '0';
360 Advance();
361 if (value < 32 && '0' <= current() && current() <= '7') {
362 value = value * 8 + current() - '0';
363 Advance();
364 }
365 }
366 return value;
367 }
368
369 template <typename CharT>
370 bool
ParseHexEscape(int length,widechar * value)371 RegExpParser<CharT>::ParseHexEscape(int length, widechar* value)
372 {
373 const CharT* start = position();
374 uint32_t val = 0;
375 bool done = false;
376 for (int i = 0; !done; i++) {
377 widechar c = current();
378 int d = HexValue(c);
379 if (d < 0) {
380 Reset(start);
381 return false;
382 }
383 val = val * 16 + d;
384 Advance();
385 if (i == length - 1) {
386 done = true;
387 }
388 }
389 *value = val;
390 return true;
391 }
392
393 template <typename CharT>
394 bool
ParseBracedHexEscape(widechar * value)395 RegExpParser<CharT>::ParseBracedHexEscape(widechar* value)
396 {
397 MOZ_ASSERT(current() == '{');
398 Advance();
399
400 bool first = true;
401 uint32_t code = 0;
402 while (true) {
403 widechar c = current();
404 if (c == kEndMarker) {
405 ReportError(JSMSG_INVALID_UNICODE_ESCAPE);
406 return false;
407 }
408 if (c == '}') {
409 if (first) {
410 ReportError(JSMSG_INVALID_UNICODE_ESCAPE);
411 return false;
412 }
413 Advance();
414 break;
415 }
416
417 int d = HexValue(c);
418 if (d < 0) {
419 ReportError(JSMSG_INVALID_UNICODE_ESCAPE);
420 return false;
421 }
422 code = (code << 4) | d;
423 if (code > unicode::NonBMPMax) {
424 ReportError(JSMSG_UNICODE_OVERFLOW, "regular expression");
425 return false;
426 }
427 Advance();
428 first = false;
429 }
430
431 *value = code;
432 return true;
433 }
434
435 template <typename CharT>
436 bool
ParseTrailSurrogate(widechar * value)437 RegExpParser<CharT>::ParseTrailSurrogate(widechar* value)
438 {
439 if (current() != '\\')
440 return false;
441
442 const CharT* start = position();
443 Advance();
444 if (current() != 'u') {
445 Reset(start);
446 return false;
447 }
448 Advance();
449 if (!ParseHexEscape(4, value)) {
450 Reset(start);
451 return false;
452 }
453 if (!unicode::IsTrailSurrogate(*value)) {
454 Reset(start);
455 return false;
456 }
457 return true;
458 }
459
460 template <typename CharT>
461 bool
ParseRawSurrogatePair(char16_t * lead,char16_t * trail)462 RegExpParser<CharT>::ParseRawSurrogatePair(char16_t* lead, char16_t* trail)
463 {
464 widechar c1 = current();
465 if (!unicode::IsLeadSurrogate(c1))
466 return false;
467
468 const CharT* start = position();
469 Advance();
470 widechar c2 = current();
471 if (!unicode::IsTrailSurrogate(c2)) {
472 Reset(start);
473 return false;
474 }
475 Advance();
476 *lead = c1;
477 *trail = c2;
478 return true;
479 }
480
481 static inline RegExpTree*
RangeAtom(LifoAlloc * alloc,char16_t from,char16_t to)482 RangeAtom(LifoAlloc* alloc, char16_t from, char16_t to)
483 {
484 CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
485 ranges->append(CharacterRange::Range(from, to));
486 return alloc->newInfallible<RegExpCharacterClass>(ranges, false);
487 }
488
489 static inline RegExpTree*
NegativeLookahead(LifoAlloc * alloc,char16_t from,char16_t to)490 NegativeLookahead(LifoAlloc* alloc, char16_t from, char16_t to)
491 {
492 return alloc->newInfallible<RegExpLookahead>(RangeAtom(alloc, from, to), false, 0, 0);
493 }
494
495 static bool
IsSyntaxCharacter(widechar c)496 IsSyntaxCharacter(widechar c)
497 {
498 switch (c) {
499 case '^':
500 case '$':
501 case '\\':
502 case '.':
503 case '*':
504 case '+':
505 case '?':
506 case '(':
507 case ')':
508 case '[':
509 case ']':
510 case '{':
511 case '}':
512 case '|':
513 case '/':
514 return true;
515 default:
516 return false;
517 }
518 }
519
520 inline bool
IsInRange(int value,int lower_limit,int higher_limit)521 IsInRange(int value, int lower_limit, int higher_limit)
522 {
523 MOZ_ASSERT(lower_limit <= higher_limit);
524 return static_cast<unsigned int>(value - lower_limit) <=
525 static_cast<unsigned int>(higher_limit - lower_limit);
526 }
527
528 inline bool
IsDecimalDigit(widechar c)529 IsDecimalDigit(widechar c)
530 {
531 // ECMA-262, 3rd, 7.8.3 (p 16)
532 return IsInRange(c, '0', '9');
533 }
534
535 #ifdef DEBUG
536 // Currently only used in an assert.kASSERT.
537 static bool
IsSpecialClassEscape(widechar c)538 IsSpecialClassEscape(widechar c)
539 {
540 switch (c) {
541 case 'd': case 'D':
542 case 's': case 'S':
543 case 'w': case 'W':
544 return true;
545 default:
546 return false;
547 }
548 }
549 #endif
550
551 template <typename CharT>
552 bool
ParseClassCharacterEscape(widechar * code)553 RegExpParser<CharT>::ParseClassCharacterEscape(widechar* code)
554 {
555 MOZ_ASSERT(current() == '\\');
556 MOZ_ASSERT(has_next() && !IsSpecialClassEscape(Next()));
557 Advance();
558 switch (current()) {
559 case 'b':
560 Advance();
561 *code = '\b';
562 return true;
563 // ControlEscape :: one of
564 // f n r t v
565 case 'f':
566 Advance();
567 *code = '\f';
568 return true;
569 case 'n':
570 Advance();
571 *code = '\n';
572 return true;
573 case 'r':
574 Advance();
575 *code = '\r';
576 return true;
577 case 't':
578 Advance();
579 *code = '\t';
580 return true;
581 case 'v':
582 Advance();
583 *code = '\v';
584 return true;
585 case 'c': {
586 widechar controlLetter = Next();
587 widechar letter = controlLetter & ~('A' ^ 'a');
588 // For compatibility with JSC, inside a character class
589 // we also accept digits and underscore as control characters,
590 // but only in non-unicode mode
591 if ((!unicode_ &&
592 ((controlLetter >= '0' && controlLetter <= '9') ||
593 controlLetter == '_')) ||
594 (letter >= 'A' && letter <= 'Z'))
595 {
596 Advance(2);
597 // Control letters mapped to ASCII control characters in the range
598 // 0x00-0x1f.
599 *code = controlLetter & 0x1f;
600 return true;
601 }
602 if (unicode_) {
603 ReportError(JSMSG_INVALID_IDENTITY_ESCAPE);
604 return false;
605 }
606 // We match JSC in reading the backslash as a literal
607 // character instead of as starting an escape.
608 *code = '\\';
609 return true;
610 }
611 case '0':
612 if (unicode_) {
613 Advance();
614 if (IsDecimalDigit(current()))
615 return ReportError(JSMSG_INVALID_DECIMAL_ESCAPE);
616 *code = 0;
617 return true;
618 }
619 MOZ_FALLTHROUGH;
620 case '1': case '2': case '3': case '4': case '5': case '6': case '7':
621 if (unicode_) {
622 ReportError(JSMSG_INVALID_IDENTITY_ESCAPE);
623 return false;
624 }
625 // For compatibility, outside of unicode mode, we interpret a decimal
626 // escape that isn't a back reference (and therefore either \0 or not
627 // valid according to the specification) as a 1..3 digit octal
628 // character code.
629 *code = ParseOctalLiteral();
630 return true;
631 case 'x': {
632 Advance();
633 widechar value;
634 if (ParseHexEscape(2, &value)) {
635 *code = value;
636 return true;
637 }
638 if (unicode_) {
639 ReportError(JSMSG_INVALID_IDENTITY_ESCAPE);
640 return false;
641 }
642 // If \x is not followed by a two-digit hexadecimal, treat it
643 // as an identity escape in non-unicode mode.
644 *code = 'x';
645 return true;
646 }
647 case 'u': {
648 Advance();
649 widechar value;
650 if (unicode_) {
651 if (current() == '{') {
652 if (!ParseBracedHexEscape(&value))
653 return false;
654 *code = value;
655 return true;
656 }
657 if (ParseHexEscape(4, &value)) {
658 if (unicode::IsLeadSurrogate(value)) {
659 widechar trail;
660 if (ParseTrailSurrogate(&trail)) {
661 *code = unicode::UTF16Decode(value, trail);
662 return true;
663 }
664 }
665 *code = value;
666 return true;
667 }
668 ReportError(JSMSG_INVALID_UNICODE_ESCAPE);
669 return false;
670 }
671 if (ParseHexEscape(4, &value)) {
672 *code = value;
673 return true;
674 }
675 // If \u is not followed by a four-digit or braced hexadecimal, treat it
676 // as an identity escape.
677 *code = 'u';
678 return true;
679 }
680 default: {
681 // Extended identity escape (non-unicode only). We accept any character
682 // that hasn't been matched by a more specific case, not just the subset
683 // required by the ECMAScript specification.
684 widechar result = current();
685 if (unicode_ && result != '-' && !IsSyntaxCharacter(result)) {
686 ReportError(JSMSG_INVALID_IDENTITY_ESCAPE);
687 return false;
688 }
689 Advance();
690 *code = result;
691 return true;
692 }
693 }
694 return true;
695 }
696
697 class WideCharRange
698 {
699 public:
WideCharRange()700 WideCharRange()
701 : from_(0), to_(0)
702 {}
703
WideCharRange(widechar from,widechar to)704 WideCharRange(widechar from, widechar to)
705 : from_(from), to_(to)
706 {}
707
Singleton(widechar value)708 static inline WideCharRange Singleton(widechar value) {
709 return WideCharRange(value, value);
710 }
Range(widechar from,widechar to)711 static inline WideCharRange Range(widechar from, widechar to) {
712 MOZ_ASSERT(from <= to);
713 return WideCharRange(from, to);
714 }
715
Contains(widechar i) const716 bool Contains(widechar i) const { return from_ <= i && i <= to_; }
from() const717 widechar from() const { return from_; }
to() const718 widechar to() const { return to_; }
719
720 private:
721 widechar from_;
722 widechar to_;
723 };
724
725 typedef InfallibleVector<WideCharRange, 1> WideCharRangeVector;
726
727 static inline CharacterRange
LeadSurrogateRange()728 LeadSurrogateRange()
729 {
730 return CharacterRange::Range(unicode::LeadSurrogateMin, unicode::LeadSurrogateMax);
731 }
732
733 static inline CharacterRange
TrailSurrogateRange()734 TrailSurrogateRange()
735 {
736 return CharacterRange::Range(unicode::TrailSurrogateMin, unicode::TrailSurrogateMax);
737 }
738
739 static inline WideCharRange
NonBMPRange()740 NonBMPRange()
741 {
742 return WideCharRange::Range(unicode::NonBMPMin, unicode::NonBMPMax);
743 }
744
745 static const char16_t kNoCharClass = 0;
746
747 // Adds a character or pre-defined character class to character ranges.
748 // If char_class is not kInvalidClass, it's interpreted as a class
749 // escape (i.e., 's' means whitespace, from '\s').
750 static inline void
AddCharOrEscape(LifoAlloc * alloc,CharacterRangeVector * ranges,char16_t char_class,widechar c)751 AddCharOrEscape(LifoAlloc* alloc,
752 CharacterRangeVector* ranges,
753 char16_t char_class,
754 widechar c)
755 {
756 if (char_class != kNoCharClass)
757 CharacterRange::AddClassEscape(alloc, char_class, ranges);
758 else
759 ranges->append(CharacterRange::Singleton(c));
760 }
761
762 static inline void
AddCharOrEscapeUnicode(LifoAlloc * alloc,CharacterRangeVector * ranges,CharacterRangeVector * lead_ranges,CharacterRangeVector * trail_ranges,WideCharRangeVector * wide_ranges,char16_t char_class,widechar c,bool ignore_case)763 AddCharOrEscapeUnicode(LifoAlloc* alloc,
764 CharacterRangeVector* ranges,
765 CharacterRangeVector* lead_ranges,
766 CharacterRangeVector* trail_ranges,
767 WideCharRangeVector* wide_ranges,
768 char16_t char_class,
769 widechar c,
770 bool ignore_case)
771 {
772 if (char_class != kNoCharClass) {
773 CharacterRange::AddClassEscapeUnicode(alloc, char_class, ranges, ignore_case);
774 switch (char_class) {
775 case 'S':
776 case 'W':
777 case 'D':
778 lead_ranges->append(LeadSurrogateRange());
779 trail_ranges->append(TrailSurrogateRange());
780 wide_ranges->append(NonBMPRange());
781 break;
782 case '.':
783 MOZ_CRASH("Bad char_class!");
784 }
785 return;
786 }
787
788 if (unicode::IsLeadSurrogate(c))
789 lead_ranges->append(CharacterRange::Singleton(c));
790 else if (unicode::IsTrailSurrogate(c))
791 trail_ranges->append(CharacterRange::Singleton(c));
792 else if (c >= unicode::NonBMPMin)
793 wide_ranges->append(WideCharRange::Singleton(c));
794 else
795 ranges->append(CharacterRange::Singleton(c));
796 }
797
798 static inline void
AddUnicodeRange(LifoAlloc * alloc,CharacterRangeVector * ranges,CharacterRangeVector * lead_ranges,CharacterRangeVector * trail_ranges,WideCharRangeVector * wide_ranges,widechar first,widechar next)799 AddUnicodeRange(LifoAlloc* alloc,
800 CharacterRangeVector* ranges,
801 CharacterRangeVector* lead_ranges,
802 CharacterRangeVector* trail_ranges,
803 WideCharRangeVector* wide_ranges,
804 widechar first,
805 widechar next)
806 {
807 MOZ_ASSERT(first <= next);
808 if (first < unicode::LeadSurrogateMin) {
809 if (next < unicode::LeadSurrogateMin) {
810 ranges->append(CharacterRange::Range(first, next));
811 return;
812 }
813 ranges->append(CharacterRange::Range(first, unicode::LeadSurrogateMin - 1));
814 first = unicode::LeadSurrogateMin;
815 }
816 if (first <= unicode::LeadSurrogateMax) {
817 if (next <= unicode::LeadSurrogateMax) {
818 lead_ranges->append(CharacterRange::Range(first, next));
819 return;
820 }
821 lead_ranges->append(CharacterRange::Range(first, unicode::LeadSurrogateMax));
822 first = unicode::LeadSurrogateMax + 1;
823 }
824 MOZ_ASSERT(unicode::LeadSurrogateMax + 1 == unicode::TrailSurrogateMin);
825 if (first <= unicode::TrailSurrogateMax) {
826 if (next <= unicode::TrailSurrogateMax) {
827 trail_ranges->append(CharacterRange::Range(first, next));
828 return;
829 }
830 trail_ranges->append(CharacterRange::Range(first, unicode::TrailSurrogateMax));
831 first = unicode::TrailSurrogateMax + 1;
832 }
833 if (first <= unicode::UTF16Max) {
834 if (next <= unicode::UTF16Max) {
835 ranges->append(CharacterRange::Range(first, next));
836 return;
837 }
838 ranges->append(CharacterRange::Range(first, unicode::UTF16Max));
839 first = unicode::NonBMPMin;
840 }
841 MOZ_ASSERT(unicode::UTF16Max + 1 == unicode::NonBMPMin);
842 wide_ranges->append(WideCharRange::Range(first, next));
843 }
844
845 // Negate a vector of ranges by subtracting its ranges from a range
846 // encompassing the full range of possible values.
847 template <typename RangeType>
848 static inline void
NegateUnicodeRanges(LifoAlloc * alloc,InfallibleVector<RangeType,1> ** ranges,RangeType full_range)849 NegateUnicodeRanges(LifoAlloc* alloc, InfallibleVector<RangeType, 1>** ranges,
850 RangeType full_range)
851 {
852 typedef InfallibleVector<RangeType, 1> RangeVector;
853 RangeVector* tmp_ranges = alloc->newInfallible<RangeVector>(*alloc);
854 tmp_ranges->append(full_range);
855 RangeVector* result_ranges = alloc->newInfallible<RangeVector>(*alloc);
856
857 // Perform the following calculation:
858 // result_ranges = tmp_ranges - ranges
859 // with the following steps:
860 // result_ranges = tmp_ranges - ranges[0]
861 // SWAP(result_ranges, tmp_ranges)
862 // result_ranges = tmp_ranges - ranges[1]
863 // SWAP(result_ranges, tmp_ranges)
864 // ...
865 // result_ranges = tmp_ranges - ranges[N-1]
866 // SWAP(result_ranges, tmp_ranges)
867 // The last SWAP is just for simplicity of the loop.
868 for (size_t i = 0; i < (*ranges)->length(); i++) {
869 result_ranges->clear();
870
871 const RangeType& range = (**ranges)[i];
872 for (size_t j = 0; j < tmp_ranges->length(); j++) {
873 const RangeType& tmpRange = (*tmp_ranges)[j];
874 auto from1 = tmpRange.from();
875 auto to1 = tmpRange.to();
876 auto from2 = range.from();
877 auto to2 = range.to();
878
879 if (from1 < from2) {
880 if (to1 < from2) {
881 result_ranges->append(tmpRange);
882 } else if (to1 <= to2) {
883 result_ranges->append(RangeType::Range(from1, from2 - 1));
884 } else {
885 result_ranges->append(RangeType::Range(from1, from2 - 1));
886 result_ranges->append(RangeType::Range(to2 + 1, to1));
887 }
888 } else if (from1 <= to2) {
889 if (to1 > to2)
890 result_ranges->append(RangeType::Range(to2 + 1, to1));
891 } else {
892 result_ranges->append(tmpRange);
893 }
894 }
895
896 auto tmp = tmp_ranges;
897 tmp_ranges = result_ranges;
898 result_ranges = tmp;
899 }
900
901 // After the loop, result is pointed at by tmp_ranges, instead of
902 // result_ranges.
903 *ranges = tmp_ranges;
904 }
905
906 static bool
WideCharRangesContain(WideCharRangeVector * wide_ranges,widechar c)907 WideCharRangesContain(WideCharRangeVector* wide_ranges, widechar c)
908 {
909 for (size_t i = 0; i < wide_ranges->length(); i++) {
910 const WideCharRange& range = (*wide_ranges)[i];
911 if (range.Contains(c))
912 return true;
913 }
914 return false;
915 }
916
917 static void
CalculateCaseInsensitiveRanges(LifoAlloc * alloc,widechar from,widechar to,int32_t diff,WideCharRangeVector * wide_ranges,WideCharRangeVector ** tmp_wide_ranges)918 CalculateCaseInsensitiveRanges(LifoAlloc* alloc, widechar from, widechar to, int32_t diff,
919 WideCharRangeVector* wide_ranges,
920 WideCharRangeVector** tmp_wide_ranges)
921 {
922 widechar contains_from = 0;
923 widechar contains_to = 0;
924 for (widechar c = from; c <= to; c++) {
925 if (WideCharRangesContain(wide_ranges, c) &&
926 !WideCharRangesContain(wide_ranges, c + diff))
927 {
928 if (contains_from == 0)
929 contains_from = c;
930 contains_to = c;
931 } else if (contains_from != 0) {
932 if (!*tmp_wide_ranges)
933 *tmp_wide_ranges = alloc->newInfallible<WideCharRangeVector>(*alloc);
934
935 (*tmp_wide_ranges)->append(WideCharRange::Range(contains_from + diff,
936 contains_to + diff));
937 contains_from = 0;
938 }
939 }
940
941 if (contains_from != 0) {
942 if (!*tmp_wide_ranges)
943 *tmp_wide_ranges = alloc->newInfallible<WideCharRangeVector>(*alloc);
944
945 (*tmp_wide_ranges)->append(WideCharRange::Range(contains_from + diff,
946 contains_to + diff));
947 }
948 }
949
950 static RegExpTree*
UnicodeRangesAtom(LifoAlloc * alloc,CharacterRangeVector * ranges,CharacterRangeVector * lead_ranges,CharacterRangeVector * trail_ranges,WideCharRangeVector * wide_ranges,bool is_negated,bool ignore_case)951 UnicodeRangesAtom(LifoAlloc* alloc,
952 CharacterRangeVector* ranges,
953 CharacterRangeVector* lead_ranges,
954 CharacterRangeVector* trail_ranges,
955 WideCharRangeVector* wide_ranges,
956 bool is_negated,
957 bool ignore_case)
958 {
959 // Calculate case folding for non-BMP first and negate the range if needed.
960 if (ignore_case) {
961 WideCharRangeVector* tmp_wide_ranges = nullptr;
962 #define CALL_CALC(FROM, TO, LEAD, TRAIL_FROM, TRAIL_TO, DIFF) \
963 CalculateCaseInsensitiveRanges(alloc, FROM, TO, DIFF, wide_ranges, &tmp_wide_ranges);
964 FOR_EACH_NON_BMP_CASE_FOLDING(CALL_CALC)
965 FOR_EACH_NON_BMP_REV_CASE_FOLDING(CALL_CALC)
966 #undef CALL_CALC
967
968 if (tmp_wide_ranges) {
969 for (size_t i = 0; i < tmp_wide_ranges->length(); i++)
970 wide_ranges->append((*tmp_wide_ranges)[i]);
971 }
972 }
973
974 if (is_negated) {
975 NegateUnicodeRanges(alloc, &lead_ranges, LeadSurrogateRange());
976 NegateUnicodeRanges(alloc, &trail_ranges, TrailSurrogateRange());
977 NegateUnicodeRanges(alloc, &wide_ranges, NonBMPRange());
978 }
979
980 RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
981
982 bool added = false;
983
984 if (is_negated) {
985 ranges->append(LeadSurrogateRange());
986 ranges->append(TrailSurrogateRange());
987 }
988 if (ranges->length() > 0) {
989 builder->AddAtom(alloc->newInfallible<RegExpCharacterClass>(ranges, is_negated));
990 added = true;
991 }
992
993 if (lead_ranges->length() > 0) {
994 if (added)
995 builder->NewAlternative();
996 builder->AddAtom(alloc->newInfallible<RegExpCharacterClass>(lead_ranges, false));
997 builder->AddAtom(NegativeLookahead(alloc, unicode::TrailSurrogateMin,
998 unicode::TrailSurrogateMax));
999 added = true;
1000 }
1001
1002 if (trail_ranges->length() > 0) {
1003 if (added)
1004 builder->NewAlternative();
1005 builder->AddAssertion(alloc->newInfallible<RegExpAssertion>(
1006 RegExpAssertion::NOT_AFTER_LEAD_SURROGATE));
1007 builder->AddAtom(alloc->newInfallible<RegExpCharacterClass>(trail_ranges, false));
1008 added = true;
1009 }
1010
1011 for (size_t i = 0; i < wide_ranges->length(); i++) {
1012 if (added)
1013 builder->NewAlternative();
1014
1015 const WideCharRange& range = (*wide_ranges)[i];
1016 widechar from = range.from();
1017 widechar to = range.to();
1018 char16_t from_lead, from_trail;
1019 char16_t to_lead, to_trail;
1020
1021 unicode::UTF16Encode(from, &from_lead, &from_trail);
1022 if (from == to) {
1023 builder->AddCharacter(from_lead);
1024 builder->AddCharacter(from_trail);
1025 } else {
1026 unicode::UTF16Encode(to, &to_lead, &to_trail);
1027 if (from_lead == to_lead) {
1028 MOZ_ASSERT(from_trail != to_trail);
1029 builder->AddCharacter(from_lead);
1030 builder->AddAtom(RangeAtom(alloc, from_trail, to_trail));
1031 } else if (from_trail == unicode::TrailSurrogateMin &&
1032 to_trail == unicode::TrailSurrogateMax)
1033 {
1034 builder->AddAtom(RangeAtom(alloc, from_lead, to_lead));
1035 builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin,
1036 unicode::TrailSurrogateMax));
1037 } else if (from_lead + 1 == to_lead) {
1038 builder->AddCharacter(from_lead);
1039 builder->AddAtom(RangeAtom(alloc, from_trail, unicode::TrailSurrogateMax));
1040
1041 builder->NewAlternative();
1042
1043 builder->AddCharacter(to_lead);
1044 builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin, to_trail));
1045 } else if (from_lead + 2 == to_lead) {
1046 builder->AddCharacter(from_lead);
1047 builder->AddAtom(RangeAtom(alloc, from_trail, unicode::TrailSurrogateMax));
1048
1049 builder->NewAlternative();
1050
1051 builder->AddCharacter(from_lead + 1);
1052 builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin,
1053 unicode::TrailSurrogateMax));
1054
1055 builder->NewAlternative();
1056
1057 builder->AddCharacter(to_lead);
1058 builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin, to_trail));
1059 } else {
1060 builder->AddCharacter(from_lead);
1061 builder->AddAtom(RangeAtom(alloc, from_trail, unicode::TrailSurrogateMax));
1062
1063 builder->NewAlternative();
1064
1065 builder->AddAtom(RangeAtom(alloc, from_lead + 1, to_lead - 1));
1066 builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin,
1067 unicode::TrailSurrogateMax));
1068
1069 builder->NewAlternative();
1070
1071 builder->AddCharacter(to_lead);
1072 builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin, to_trail));
1073 }
1074 }
1075 added = true;
1076 }
1077
1078 return builder->ToRegExp();
1079 }
1080
1081 template <typename CharT>
1082 RegExpTree*
ParseCharacterClass()1083 RegExpParser<CharT>::ParseCharacterClass()
1084 {
1085 MOZ_ASSERT(current() == '[');
1086 Advance();
1087 bool is_negated = false;
1088 if (current() == '^') {
1089 is_negated = true;
1090 Advance();
1091 }
1092 CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
1093 CharacterRangeVector* lead_ranges = nullptr;
1094 CharacterRangeVector* trail_ranges = nullptr;
1095 WideCharRangeVector* wide_ranges = nullptr;
1096
1097 if (unicode_) {
1098 lead_ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
1099 trail_ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
1100 wide_ranges = alloc->newInfallible<WideCharRangeVector>(*alloc);
1101 }
1102
1103 while (has_more() && current() != ']') {
1104 char16_t char_class = kNoCharClass;
1105 widechar first = 0;
1106 if (!ParseClassAtom(&char_class, &first))
1107 return nullptr;
1108 if (current() == '-') {
1109 Advance();
1110 if (current() == kEndMarker) {
1111 // If we reach the end we break out of the loop and let the
1112 // following code report an error.
1113 break;
1114 } else if (current() == ']') {
1115 if (unicode_) {
1116 AddCharOrEscapeUnicode(alloc, ranges, lead_ranges, trail_ranges, wide_ranges,
1117 char_class, first, ignore_case_);
1118 } else {
1119 AddCharOrEscape(alloc, ranges, char_class, first);
1120 }
1121 ranges->append(CharacterRange::Singleton('-'));
1122 break;
1123 }
1124 char16_t char_class_2 = kNoCharClass;
1125 widechar next = 0;
1126 if (!ParseClassAtom(&char_class_2, &next))
1127 return nullptr;
1128 if (char_class != kNoCharClass || char_class_2 != kNoCharClass) {
1129 if (unicode_)
1130 return ReportError(JSMSG_RANGE_WITH_CLASS_ESCAPE);
1131
1132 // Either end is an escaped character class. Treat the '-' verbatim.
1133 AddCharOrEscape(alloc, ranges, char_class, first);
1134 ranges->append(CharacterRange::Singleton('-'));
1135 AddCharOrEscape(alloc, ranges, char_class_2, next);
1136 continue;
1137 }
1138 if (first > next)
1139 return ReportError(JSMSG_BAD_CLASS_RANGE);
1140 if (unicode_)
1141 AddUnicodeRange(alloc, ranges, lead_ranges, trail_ranges,wide_ranges, first, next);
1142 else
1143 ranges->append(CharacterRange::Range(first, next));
1144 } else {
1145 if (unicode_) {
1146 AddCharOrEscapeUnicode(alloc, ranges, lead_ranges, trail_ranges, wide_ranges,
1147 char_class, first, ignore_case_);
1148 } else {
1149 AddCharOrEscape(alloc, ranges, char_class, first);
1150 }
1151 }
1152 }
1153 if (!has_more())
1154 return ReportError(JSMSG_UNTERM_CLASS);
1155 Advance();
1156 if (!unicode_) {
1157 if (ranges->length() == 0) {
1158 ranges->append(CharacterRange::Everything());
1159 is_negated = !is_negated;
1160 }
1161 return alloc->newInfallible<RegExpCharacterClass>(ranges, is_negated);
1162 }
1163
1164 if (!is_negated && ranges->length() == 0 && lead_ranges->length() == 0 &&
1165 trail_ranges->length() == 0 && wide_ranges->length() == 0)
1166 {
1167 ranges->append(CharacterRange::Everything());
1168 return alloc->newInfallible<RegExpCharacterClass>(ranges, true);
1169 }
1170
1171 return UnicodeRangesAtom(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, is_negated,
1172 ignore_case_);
1173 }
1174
1175 template <typename CharT>
1176 bool
ParseClassAtom(char16_t * char_class,widechar * value)1177 RegExpParser<CharT>::ParseClassAtom(char16_t* char_class, widechar* value)
1178 {
1179 MOZ_ASSERT(*char_class == kNoCharClass);
1180 widechar first = current();
1181 if (first == '\\') {
1182 switch (Next()) {
1183 case 'w': case 'W': case 'd': case 'D': case 's': case 'S': {
1184 *char_class = Next();
1185 Advance(2);
1186 return true;
1187 }
1188 case kEndMarker:
1189 return ReportError(JSMSG_ESCAPE_AT_END_OF_REGEXP);
1190 default:
1191 if (!ParseClassCharacterEscape(value))
1192 return false;
1193 return true;
1194 }
1195 } else {
1196 if (unicode_) {
1197 char16_t lead, trail;
1198 if (ParseRawSurrogatePair(&lead, &trail)) {
1199 *value = unicode::UTF16Decode(lead, trail);
1200 return true;
1201 }
1202 }
1203 Advance();
1204 *value = first;
1205 return true;
1206 }
1207 }
1208
1209 // In order to know whether an escape is a backreference or not we have to scan
1210 // the entire regexp and find the number of capturing parentheses. However we
1211 // don't want to scan the regexp twice unless it is necessary. This mini-parser
1212 // is called when needed. It can see the difference between capturing and
1213 // noncapturing parentheses and can skip character classes and backslash-escaped
1214 // characters.
1215 template <typename CharT>
1216 void
ScanForCaptures()1217 RegExpParser<CharT>::ScanForCaptures()
1218 {
1219 // Start with captures started previous to current position
1220 int capture_count = captures_started();
1221 // Add count of captures after this position.
1222 widechar n;
1223 while ((n = current()) != kEndMarker) {
1224 Advance();
1225 switch (n) {
1226 case '\\':
1227 Advance();
1228 break;
1229 case '[': {
1230 widechar c;
1231 while ((c = current()) != kEndMarker) {
1232 Advance();
1233 if (c == '\\') {
1234 Advance();
1235 } else {
1236 if (c == ']') break;
1237 }
1238 }
1239 break;
1240 }
1241 case '(':
1242 if (current() != '?') capture_count++;
1243 break;
1244 }
1245 }
1246 capture_count_ = capture_count;
1247 is_scanned_for_captures_ = true;
1248 }
1249
1250 template <typename CharT>
1251 bool
ParseBackReferenceIndex(int * index_out)1252 RegExpParser<CharT>::ParseBackReferenceIndex(int* index_out)
1253 {
1254 MOZ_ASSERT('\\' == current());
1255 MOZ_ASSERT('1' <= Next() && Next() <= '9');
1256
1257 // Try to parse a decimal literal that is no greater than the total number
1258 // of left capturing parentheses in the input.
1259 const CharT* start = position();
1260 int value = Next() - '0';
1261 Advance(2);
1262 while (true) {
1263 widechar c = current();
1264 if (IsDecimalDigit(c)) {
1265 value = 10 * value + (c - '0');
1266 if (value > kMaxCaptures) {
1267 Reset(start);
1268 return false;
1269 }
1270 Advance();
1271 } else {
1272 break;
1273 }
1274 }
1275 if (value > captures_started()) {
1276 if (!is_scanned_for_captures_) {
1277 const CharT* saved_position = position();
1278 ScanForCaptures();
1279 Reset(saved_position);
1280 }
1281 if (value > capture_count_) {
1282 Reset(start);
1283 return false;
1284 }
1285 }
1286 *index_out = value;
1287 return true;
1288 }
1289
1290 // QuantifierPrefix ::
1291 // { DecimalDigits }
1292 // { DecimalDigits , }
1293 // { DecimalDigits , DecimalDigits }
1294 //
1295 // Returns true if parsing succeeds, and set the min_out and max_out
1296 // values. Values are truncated to RegExpTree::kInfinity if they overflow.
1297 template <typename CharT>
1298 bool
ParseIntervalQuantifier(int * min_out,int * max_out)1299 RegExpParser<CharT>::ParseIntervalQuantifier(int* min_out, int* max_out)
1300 {
1301 MOZ_ASSERT(current() == '{');
1302 const CharT* start = position();
1303 Advance();
1304 int min = 0;
1305 if (!IsDecimalDigit(current())) {
1306 Reset(start);
1307 return false;
1308 }
1309 while (IsDecimalDigit(current())) {
1310 int next = current() - '0';
1311 if (min > (RegExpTree::kInfinity - next) / 10) {
1312 // Overflow. Skip past remaining decimal digits and return -1.
1313 do {
1314 Advance();
1315 } while (IsDecimalDigit(current()));
1316 min = RegExpTree::kInfinity;
1317 break;
1318 }
1319 min = 10 * min + next;
1320 Advance();
1321 }
1322 int max = 0;
1323 if (current() == '}') {
1324 max = min;
1325 Advance();
1326 } else if (current() == ',') {
1327 Advance();
1328 if (current() == '}') {
1329 max = RegExpTree::kInfinity;
1330 Advance();
1331 } else {
1332 while (IsDecimalDigit(current())) {
1333 int next = current() - '0';
1334 if (max > (RegExpTree::kInfinity - next) / 10) {
1335 do {
1336 Advance();
1337 } while (IsDecimalDigit(current()));
1338 max = RegExpTree::kInfinity;
1339 break;
1340 }
1341 max = 10 * max + next;
1342 Advance();
1343 }
1344 if (current() != '}') {
1345 Reset(start);
1346 return false;
1347 }
1348 Advance();
1349 }
1350 } else {
1351 Reset(start);
1352 return false;
1353 }
1354 *min_out = min;
1355 *max_out = max;
1356 return true;
1357 }
1358
1359 // Pattern ::
1360 // Disjunction
1361 template <typename CharT>
1362 RegExpTree*
ParsePattern()1363 RegExpParser<CharT>::ParsePattern()
1364 {
1365 RegExpTree* result = ParseDisjunction();
1366 MOZ_ASSERT_IF(result, !has_more());
1367 return result;
1368 }
1369
1370 static inline RegExpTree*
CaseFoldingSurrogatePairAtom(LifoAlloc * alloc,char16_t lead,char16_t trail,int32_t diff)1371 CaseFoldingSurrogatePairAtom(LifoAlloc* alloc, char16_t lead, char16_t trail, int32_t diff)
1372 {
1373 RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
1374
1375 builder->AddCharacter(lead);
1376 CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
1377 ranges->append(CharacterRange::Range(trail, trail));
1378 ranges->append(CharacterRange::Range(trail + diff, trail + diff));
1379 builder->AddAtom(alloc->newInfallible<RegExpCharacterClass>(ranges, false));
1380
1381 return builder->ToRegExp();
1382 }
1383
1384 static inline RegExpTree*
SurrogatePairAtom(LifoAlloc * alloc,char16_t lead,char16_t trail,bool ignore_case)1385 SurrogatePairAtom(LifoAlloc* alloc, char16_t lead, char16_t trail, bool ignore_case)
1386 {
1387 if (ignore_case) {
1388 #define CALL_ATOM(FROM, TO, LEAD, TRAIL_FROM, TRAIL_TO, DIFF) \
1389 if (lead == LEAD &&trail >= TRAIL_FROM && trail <= TRAIL_TO) \
1390 return CaseFoldingSurrogatePairAtom(alloc, lead, trail, DIFF);
1391 FOR_EACH_NON_BMP_CASE_FOLDING(CALL_ATOM)
1392 FOR_EACH_NON_BMP_REV_CASE_FOLDING(CALL_ATOM)
1393 #undef CALL_ATOM
1394 }
1395
1396 RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
1397 builder->AddCharacter(lead);
1398 builder->AddCharacter(trail);
1399 return builder->ToRegExp();
1400 }
1401
1402 static inline RegExpTree*
LeadSurrogateAtom(LifoAlloc * alloc,char16_t value)1403 LeadSurrogateAtom(LifoAlloc* alloc, char16_t value)
1404 {
1405 RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
1406 builder->AddCharacter(value);
1407 builder->AddAtom(NegativeLookahead(alloc, unicode::TrailSurrogateMin,
1408 unicode::TrailSurrogateMax));
1409 return builder->ToRegExp();
1410 }
1411
1412 static inline RegExpTree*
TrailSurrogateAtom(LifoAlloc * alloc,char16_t value)1413 TrailSurrogateAtom(LifoAlloc* alloc, char16_t value)
1414 {
1415 RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
1416 builder->AddAssertion(alloc->newInfallible<RegExpAssertion>(
1417 RegExpAssertion::NOT_AFTER_LEAD_SURROGATE));
1418 builder->AddCharacter(value);
1419 return builder->ToRegExp();
1420 }
1421
1422 static inline RegExpTree*
UnicodeEverythingAtom(LifoAlloc * alloc)1423 UnicodeEverythingAtom(LifoAlloc* alloc)
1424 {
1425 RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
1426
1427 // everything except \x0a, \x0d, \u2028 and \u2029
1428
1429 CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
1430 AddClassNegated(kLineTerminatorAndSurrogateRanges,
1431 kLineTerminatorAndSurrogateRangeCount,
1432 ranges);
1433 builder->AddAtom(alloc->newInfallible<RegExpCharacterClass>(ranges, false));
1434
1435 builder->NewAlternative();
1436
1437 builder->AddAtom(RangeAtom(alloc, unicode::LeadSurrogateMin, unicode::LeadSurrogateMax));
1438 builder->AddAtom(NegativeLookahead(alloc, unicode::TrailSurrogateMin,
1439 unicode::TrailSurrogateMax));
1440
1441 builder->NewAlternative();
1442
1443 builder->AddAssertion(alloc->newInfallible<RegExpAssertion>(
1444 RegExpAssertion::NOT_AFTER_LEAD_SURROGATE));
1445 builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin, unicode::TrailSurrogateMax));
1446
1447 builder->NewAlternative();
1448
1449 builder->AddAtom(RangeAtom(alloc, unicode::LeadSurrogateMin, unicode::LeadSurrogateMax));
1450 builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin, unicode::TrailSurrogateMax));
1451
1452 return builder->ToRegExp();
1453 }
1454
1455 RegExpTree*
UnicodeCharacterClassEscapeAtom(LifoAlloc * alloc,char16_t char_class,bool ignore_case)1456 UnicodeCharacterClassEscapeAtom(LifoAlloc* alloc, char16_t char_class, bool ignore_case)
1457 {
1458 CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
1459 CharacterRangeVector* lead_ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
1460 CharacterRangeVector* trail_ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
1461 WideCharRangeVector* wide_ranges = alloc->newInfallible<WideCharRangeVector>(*alloc);
1462 AddCharOrEscapeUnicode(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, char_class, 0,
1463 ignore_case);
1464
1465 return UnicodeRangesAtom(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, false, false);
1466 }
1467
1468 static inline RegExpTree*
UnicodeBackReferenceAtom(LifoAlloc * alloc,RegExpTree * atom)1469 UnicodeBackReferenceAtom(LifoAlloc* alloc, RegExpTree* atom)
1470 {
1471 // If a back reference has a standalone lead surrogate as its last
1472 // character, then that lead surrogate shouldn't match lead surrogates that
1473 // are paired with a corresponding trail surrogate.
1474 RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
1475
1476 builder->AddAtom(atom);
1477 builder->AddAssertion(alloc->newInfallible<RegExpAssertion>(
1478 RegExpAssertion::NOT_IN_SURROGATE_PAIR));
1479
1480 return builder->ToRegExp();
1481 }
1482
1483 // Disjunction ::
1484 // Alternative
1485 // Alternative | Disjunction
1486 // Alternative ::
1487 // [empty]
1488 // Term Alternative
1489 // Term ::
1490 // Assertion
1491 // Atom
1492 // Atom Quantifier
1493 template <typename CharT>
1494 RegExpTree*
ParseDisjunction()1495 RegExpParser<CharT>::ParseDisjunction()
1496 {
1497 // Used to store current state while parsing subexpressions.
1498 RegExpParserState initial_state(alloc, nullptr, INITIAL, 0);
1499 RegExpParserState* stored_state = &initial_state;
1500 // Cache the builder in a local variable for quick access.
1501 RegExpBuilder* builder = initial_state.builder();
1502 while (true) {
1503 switch (current()) {
1504 case kEndMarker:
1505 if (stored_state->IsSubexpression()) {
1506 // Inside a parenthesized group when hitting end of input.
1507 return ReportError(JSMSG_MISSING_PAREN);
1508 }
1509 MOZ_ASSERT(INITIAL == stored_state->group_type());
1510 // Parsing completed successfully.
1511 return builder->ToRegExp();
1512 case ')': {
1513 if (!stored_state->IsSubexpression())
1514 return ReportError(JSMSG_UNMATCHED_RIGHT_PAREN);
1515 MOZ_ASSERT(INITIAL != stored_state->group_type());
1516
1517 Advance();
1518 // End disjunction parsing and convert builder content to new single
1519 // regexp atom.
1520 RegExpTree* body = builder->ToRegExp();
1521
1522 int end_capture_index = captures_started();
1523
1524 int capture_index = stored_state->capture_index();
1525 SubexpressionType group_type = stored_state->group_type();
1526
1527 // Restore previous state.
1528 stored_state = stored_state->previous_state();
1529 builder = stored_state->builder();
1530
1531 // Build result of subexpression.
1532 if (group_type == CAPTURE) {
1533 RegExpCapture* capture = alloc->newInfallible<RegExpCapture>(body, capture_index);
1534 (*captures_)[capture_index - 1] = capture;
1535 body = capture;
1536 } else if (group_type != GROUPING) {
1537 MOZ_ASSERT(group_type == POSITIVE_LOOKAHEAD ||
1538 group_type == NEGATIVE_LOOKAHEAD);
1539 bool is_positive = (group_type == POSITIVE_LOOKAHEAD);
1540 body = alloc->newInfallible<RegExpLookahead>(body,
1541 is_positive,
1542 end_capture_index - capture_index,
1543 capture_index);
1544 }
1545 builder->AddAtom(body);
1546 if (unicode_ && (group_type == POSITIVE_LOOKAHEAD || group_type == NEGATIVE_LOOKAHEAD))
1547 continue;
1548 // For compatability with JSC and ES3, we allow quantifiers after
1549 // lookaheads, and break in all cases.
1550 break;
1551 }
1552 case '|': {
1553 Advance();
1554 builder->NewAlternative();
1555 continue;
1556 }
1557 case '*':
1558 case '+':
1559 case '?':
1560 return ReportError(JSMSG_NOTHING_TO_REPEAT);
1561 case '^': {
1562 Advance();
1563 if (multiline_) {
1564 builder->AddAssertion(alloc->newInfallible<RegExpAssertion>(RegExpAssertion::START_OF_LINE));
1565 } else {
1566 builder->AddAssertion(alloc->newInfallible<RegExpAssertion>(RegExpAssertion::START_OF_INPUT));
1567 set_contains_anchor();
1568 }
1569 continue;
1570 }
1571 case '$': {
1572 Advance();
1573 RegExpAssertion::AssertionType assertion_type =
1574 multiline_ ? RegExpAssertion::END_OF_LINE :
1575 RegExpAssertion::END_OF_INPUT;
1576 builder->AddAssertion(alloc->newInfallible<RegExpAssertion>(assertion_type));
1577 continue;
1578 }
1579 case '.': {
1580 Advance();
1581 // everything except \x0a, \x0d, \u2028 and \u2029
1582 if (unicode_) {
1583 builder->AddAtom(UnicodeEverythingAtom(alloc));
1584 break;
1585 }
1586 CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
1587 CharacterRange::AddClassEscape(alloc, '.', ranges);
1588 RegExpTree* atom = alloc->newInfallible<RegExpCharacterClass>(ranges, false);
1589 builder->AddAtom(atom);
1590 break;
1591 }
1592 case '(': {
1593 SubexpressionType subexpr_type = CAPTURE;
1594 Advance();
1595 if (current() == '?') {
1596 switch (Next()) {
1597 case ':':
1598 subexpr_type = GROUPING;
1599 break;
1600 case '=':
1601 subexpr_type = POSITIVE_LOOKAHEAD;
1602 break;
1603 case '!':
1604 subexpr_type = NEGATIVE_LOOKAHEAD;
1605 break;
1606 default:
1607 return ReportError(JSMSG_INVALID_GROUP);
1608 }
1609 Advance(2);
1610 } else {
1611 if (captures_ == nullptr)
1612 captures_ = alloc->newInfallible<RegExpCaptureVector>(*alloc);
1613 if (captures_started() >= kMaxCaptures)
1614 return ReportError(JSMSG_TOO_MANY_PARENS);
1615 captures_->append((RegExpCapture*) nullptr);
1616 }
1617 // Store current state and begin new disjunction parsing.
1618 stored_state = alloc->newInfallible<RegExpParserState>(alloc, stored_state, subexpr_type,
1619 captures_started());
1620 builder = stored_state->builder();
1621 continue;
1622 }
1623 case '[': {
1624 RegExpTree* atom = ParseCharacterClass();
1625 if (!atom)
1626 return nullptr;
1627 builder->AddAtom(atom);
1628 break;
1629 }
1630 // Atom ::
1631 // \ AtomEscape
1632 case '\\':
1633 switch (Next()) {
1634 case kEndMarker:
1635 return ReportError(JSMSG_ESCAPE_AT_END_OF_REGEXP);
1636 case 'b':
1637 Advance(2);
1638 builder->AddAssertion(alloc->newInfallible<RegExpAssertion>(RegExpAssertion::BOUNDARY));
1639 continue;
1640 case 'B':
1641 Advance(2);
1642 builder->AddAssertion(alloc->newInfallible<RegExpAssertion>(RegExpAssertion::NON_BOUNDARY));
1643 continue;
1644 // AtomEscape ::
1645 // CharacterClassEscape
1646 //
1647 // CharacterClassEscape :: one of
1648 // d D s S w W
1649 case 'D': case 'S': case 'W':
1650 if (unicode_) {
1651 Advance();
1652 builder->AddAtom(UnicodeCharacterClassEscapeAtom(alloc, current(),
1653 ignore_case_));
1654 Advance();
1655 break;
1656 }
1657 MOZ_FALLTHROUGH;
1658 case 'd': case 's': case 'w': {
1659 widechar c = Next();
1660 Advance(2);
1661 CharacterRangeVector* ranges =
1662 alloc->newInfallible<CharacterRangeVector>(*alloc);
1663 if (unicode_)
1664 CharacterRange::AddClassEscapeUnicode(alloc, c, ranges, ignore_case_);
1665 else
1666 CharacterRange::AddClassEscape(alloc, c, ranges);
1667 RegExpTree* atom = alloc->newInfallible<RegExpCharacterClass>(ranges, false);
1668 builder->AddAtom(atom);
1669 break;
1670 }
1671 case '1': case '2': case '3': case '4': case '5': case '6':
1672 case '7': case '8': case '9': {
1673 int index = 0;
1674 if (ParseBackReferenceIndex(&index)) {
1675 RegExpCapture* capture = nullptr;
1676 if (captures_ != nullptr && index <= (int) captures_->length()) {
1677 capture = (*captures_)[index - 1];
1678 }
1679 if (capture == nullptr) {
1680 builder->AddEmpty();
1681 break;
1682 }
1683 RegExpTree* atom = alloc->newInfallible<RegExpBackReference>(capture);
1684 if (unicode_)
1685 builder->AddAtom(UnicodeBackReferenceAtom(alloc, atom));
1686 else
1687 builder->AddAtom(atom);
1688 break;
1689 }
1690 if (unicode_)
1691 return ReportError(JSMSG_BACK_REF_OUT_OF_RANGE);
1692 widechar first_digit = Next();
1693 if (first_digit == '8' || first_digit == '9') {
1694 // Treat as identity escape
1695 builder->AddCharacter(first_digit);
1696 Advance(2);
1697 break;
1698 }
1699 MOZ_FALLTHROUGH;
1700 }
1701 case '0': {
1702 if (unicode_) {
1703 Advance(2);
1704 if (IsDecimalDigit(current()))
1705 return ReportError(JSMSG_INVALID_DECIMAL_ESCAPE);
1706 builder->AddCharacter(0);
1707 break;
1708 }
1709
1710 Advance();
1711 widechar octal = ParseOctalLiteral();
1712 builder->AddCharacter(octal);
1713 break;
1714 }
1715 // ControlEscape :: one of
1716 // f n r t v
1717 case 'f':
1718 Advance(2);
1719 builder->AddCharacter('\f');
1720 break;
1721 case 'n':
1722 Advance(2);
1723 builder->AddCharacter('\n');
1724 break;
1725 case 'r':
1726 Advance(2);
1727 builder->AddCharacter('\r');
1728 break;
1729 case 't':
1730 Advance(2);
1731 builder->AddCharacter('\t');
1732 break;
1733 case 'v':
1734 Advance(2);
1735 builder->AddCharacter('\v');
1736 break;
1737 case 'c': {
1738 Advance();
1739 widechar controlLetter = Next();
1740 // Special case if it is an ASCII letter.
1741 // Convert lower case letters to uppercase.
1742 widechar letter = controlLetter & ~('a' ^ 'A');
1743 if (letter < 'A' || 'Z' < letter) {
1744 if (unicode_)
1745 return ReportError(JSMSG_INVALID_IDENTITY_ESCAPE);
1746 // controlLetter is not in range 'A'-'Z' or 'a'-'z'.
1747 // This is outside the specification. We match JSC in
1748 // reading the backslash as a literal character instead
1749 // of as starting an escape.
1750 builder->AddCharacter('\\');
1751 } else {
1752 Advance(2);
1753 builder->AddCharacter(controlLetter & 0x1f);
1754 }
1755 break;
1756 }
1757 case 'x': {
1758 Advance(2);
1759 widechar value;
1760 if (ParseHexEscape(2, &value)) {
1761 builder->AddCharacter(value);
1762 } else {
1763 if (unicode_)
1764 return ReportError(JSMSG_INVALID_IDENTITY_ESCAPE);
1765 builder->AddCharacter('x');
1766 }
1767 break;
1768 }
1769 case 'u': {
1770 Advance(2);
1771 widechar value;
1772 if (unicode_) {
1773 if (current() == '{') {
1774 if (!ParseBracedHexEscape(&value))
1775 return nullptr;
1776 if (unicode::IsLeadSurrogate(value)) {
1777 builder->AddAtom(LeadSurrogateAtom(alloc, value));
1778 } else if (unicode::IsTrailSurrogate(value)) {
1779 builder->AddAtom(TrailSurrogateAtom(alloc, value));
1780 } else if (value >= unicode::NonBMPMin) {
1781 char16_t lead, trail;
1782 unicode::UTF16Encode(value, &lead, &trail);
1783 builder->AddAtom(SurrogatePairAtom(alloc, lead, trail,
1784 ignore_case_));
1785 } else {
1786 builder->AddCharacter(value);
1787 }
1788 } else if (ParseHexEscape(4, &value)) {
1789 if (unicode::IsLeadSurrogate(value)) {
1790 widechar trail;
1791 if (ParseTrailSurrogate(&trail)) {
1792 builder->AddAtom(SurrogatePairAtom(alloc, value, trail,
1793 ignore_case_));
1794 } else {
1795 builder->AddAtom(LeadSurrogateAtom(alloc, value));
1796 }
1797 } else if (unicode::IsTrailSurrogate(value)) {
1798 builder->AddAtom(TrailSurrogateAtom(alloc, value));
1799 } else {
1800 builder->AddCharacter(value);
1801 }
1802 } else {
1803 return ReportError(JSMSG_INVALID_UNICODE_ESCAPE);
1804 }
1805 break;
1806 }
1807 if (ParseHexEscape(4, &value)) {
1808 builder->AddCharacter(value);
1809 } else {
1810 builder->AddCharacter('u');
1811 }
1812 break;
1813 }
1814 default:
1815 // Identity escape.
1816 if (unicode_ && !IsSyntaxCharacter(Next()))
1817 return ReportError(JSMSG_INVALID_IDENTITY_ESCAPE);
1818 builder->AddCharacter(Next());
1819 Advance(2);
1820 break;
1821 }
1822 break;
1823 case '{': {
1824 if (unicode_)
1825 return ReportError(JSMSG_RAW_BRACE_IN_REGEP);
1826 int dummy;
1827 if (ParseIntervalQuantifier(&dummy, &dummy))
1828 return ReportError(JSMSG_NOTHING_TO_REPEAT);
1829 MOZ_FALLTHROUGH;
1830 }
1831 default:
1832 if (unicode_) {
1833 char16_t lead, trail;
1834 if (ParseRawSurrogatePair(&lead, &trail)) {
1835 builder->AddAtom(SurrogatePairAtom(alloc, lead, trail, ignore_case_));
1836 } else {
1837 widechar c = current();
1838 if (unicode::IsLeadSurrogate(c))
1839 builder->AddAtom(LeadSurrogateAtom(alloc, c));
1840 else if (unicode::IsTrailSurrogate(c))
1841 builder->AddAtom(TrailSurrogateAtom(alloc, c));
1842 else if (c == ']')
1843 return ReportError(JSMSG_RAW_BRACKET_IN_REGEP);
1844 else if (c == '}')
1845 return ReportError(JSMSG_RAW_BRACE_IN_REGEP);
1846 else
1847 builder->AddCharacter(c);
1848 Advance();
1849 }
1850 break;
1851 }
1852 builder->AddCharacter(current());
1853 Advance();
1854 break;
1855 } // end switch(current())
1856
1857 int min;
1858 int max;
1859 switch (current()) {
1860 // QuantifierPrefix ::
1861 // *
1862 // +
1863 // ?
1864 // {
1865 case '*':
1866 min = 0;
1867 max = RegExpTree::kInfinity;
1868 Advance();
1869 break;
1870 case '+':
1871 min = 1;
1872 max = RegExpTree::kInfinity;
1873 Advance();
1874 break;
1875 case '?':
1876 min = 0;
1877 max = 1;
1878 Advance();
1879 break;
1880 case '{':
1881 if (ParseIntervalQuantifier(&min, &max)) {
1882 if (max < min)
1883 return ReportError(JSMSG_NUMBERS_OUT_OF_ORDER);
1884 break;
1885 } else {
1886 continue;
1887 }
1888 default:
1889 continue;
1890 }
1891 RegExpQuantifier::QuantifierType quantifier_type = RegExpQuantifier::GREEDY;
1892 if (current() == '?') {
1893 quantifier_type = RegExpQuantifier::NON_GREEDY;
1894 Advance();
1895 }
1896 builder->AddQuantifierToAtom(min, max, quantifier_type);
1897 }
1898 }
1899
1900 template class irregexp::RegExpParser<Latin1Char>;
1901 template class irregexp::RegExpParser<char16_t>;
1902
1903 template <typename CharT>
1904 static bool
ParsePattern(frontend::TokenStreamAnyChars & ts,LifoAlloc & alloc,const CharT * chars,size_t length,bool multiline,bool match_only,bool unicode,bool ignore_case,bool global,bool sticky,RegExpCompileData * data)1905 ParsePattern(frontend::TokenStreamAnyChars& ts, LifoAlloc& alloc,
1906 const CharT* chars, size_t length,
1907 bool multiline, bool match_only, bool unicode, bool ignore_case,
1908 bool global, bool sticky, RegExpCompileData* data)
1909 {
1910 // We shouldn't strip pattern for exec, or test with global/sticky,
1911 // to reflect correct match position and lastIndex.
1912 if (match_only && !global && !sticky) {
1913 // Try to strip a leading '.*' from the RegExp, but only if it is not
1914 // followed by a '?' (which will affect how the .* is parsed). This
1915 // pattern will affect the captures produced by the RegExp, but not
1916 // whether there is a match or not.
1917 if (length >= 3 && chars[0] == '.' && chars[1] == '*' && chars[2] != '?') {
1918 chars += 2;
1919 length -= 2;
1920 }
1921
1922 // Try to strip a trailing '.*' from the RegExp, which as above will
1923 // affect the captures but not whether there is a match. Only do this
1924 // when there are no other meta characters in the RegExp, so that we
1925 // are sure this will not affect how the RegExp is parsed.
1926 if (length >= 3 && !HasRegExpMetaChars(chars, length - 2) &&
1927 chars[length - 2] == '.' && chars[length - 1] == '*')
1928 {
1929 length -= 2;
1930 }
1931 }
1932
1933 RegExpParser<CharT> parser(ts, &alloc, chars, chars + length, multiline, unicode, ignore_case);
1934 data->tree = parser.ParsePattern();
1935 if (!data->tree)
1936 return false;
1937
1938 data->simple = parser.simple();
1939 data->contains_anchor = parser.contains_anchor();
1940 data->capture_count = parser.captures_started();
1941 return true;
1942 }
1943
1944 bool
ParsePattern(frontend::TokenStreamAnyChars & ts,LifoAlloc & alloc,JSAtom * str,bool multiline,bool match_only,bool unicode,bool ignore_case,bool global,bool sticky,RegExpCompileData * data)1945 irregexp::ParsePattern(frontend::TokenStreamAnyChars& ts, LifoAlloc& alloc, JSAtom* str,
1946 bool multiline, bool match_only, bool unicode, bool ignore_case,
1947 bool global, bool sticky, RegExpCompileData* data)
1948 {
1949 JS::AutoCheckCannotGC nogc;
1950 return str->hasLatin1Chars()
1951 ? ::ParsePattern(ts, alloc, str->latin1Chars(nogc), str->length(),
1952 multiline, match_only, unicode, ignore_case, global, sticky, data)
1953 : ::ParsePattern(ts, alloc, str->twoByteChars(nogc), str->length(),
1954 multiline, match_only, unicode, ignore_case, global, sticky, data);
1955 }
1956
1957 template <typename CharT>
1958 static bool
ParsePatternSyntax(frontend::TokenStreamAnyChars & ts,LifoAlloc & alloc,const CharT * chars,size_t length,bool unicode)1959 ParsePatternSyntax(frontend::TokenStreamAnyChars& ts, LifoAlloc& alloc,
1960 const CharT* chars, size_t length, bool unicode)
1961 {
1962 LifoAllocScope scope(&alloc);
1963
1964 RegExpParser<CharT> parser(ts, &alloc, chars, chars + length, false, unicode, false);
1965 return parser.ParsePattern() != nullptr;
1966 }
1967
1968 bool
ParsePatternSyntax(frontend::TokenStreamAnyChars & ts,LifoAlloc & alloc,JSAtom * str,bool unicode)1969 irregexp::ParsePatternSyntax(frontend::TokenStreamAnyChars& ts, LifoAlloc& alloc, JSAtom* str,
1970 bool unicode)
1971 {
1972 JS::AutoCheckCannotGC nogc;
1973 return str->hasLatin1Chars()
1974 ? ::ParsePatternSyntax(ts, alloc, str->latin1Chars(nogc), str->length(), unicode)
1975 : ::ParsePatternSyntax(ts, alloc, str->twoByteChars(nogc), str->length(), unicode);
1976 }
1977
1978 bool
ParsePatternSyntax(frontend::TokenStreamAnyChars & ts,LifoAlloc & alloc,const mozilla::Range<const char16_t> chars,bool unicode)1979 irregexp::ParsePatternSyntax(frontend::TokenStreamAnyChars& ts, LifoAlloc& alloc,
1980 const mozilla::Range<const char16_t> chars, bool unicode)
1981 {
1982 return ::ParsePatternSyntax(ts, alloc, chars.begin().get(), chars.length(), unicode);
1983 }
1984