1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "Tokenizer.h"
8 
9 #include "nsUnicharUtils.h"
10 #include <algorithm>
11 
12 namespace mozilla {
13 
14 template <>
15 char const TokenizerBase<char>::sWhitespaces[] = {' ', '\t', 0};
16 template <>
17 char16_t const TokenizerBase<char16_t>::sWhitespaces[3] = {' ', '\t', 0};
18 
19 template <typename TChar>
contains(TChar const * const list,TChar const needle)20 static bool contains(TChar const* const list, TChar const needle) {
21   for (TChar const* c = list; *c; ++c) {
22     if (needle == *c) {
23       return true;
24     }
25   }
26   return false;
27 }
28 
29 template <typename TChar>
TTokenizer(const typename base::TAString & aSource,const TChar * aWhitespaces,const TChar * aAdditionalWordChars)30 TTokenizer<TChar>::TTokenizer(const typename base::TAString& aSource,
31                               const TChar* aWhitespaces,
32                               const TChar* aAdditionalWordChars)
33     : TokenizerBase<TChar>(aWhitespaces, aAdditionalWordChars) {
34   base::mInputFinished = true;
35   aSource.BeginReading(base::mCursor);
36   mRecord = mRollback = base::mCursor;
37   aSource.EndReading(base::mEnd);
38 }
39 
40 template <typename TChar>
TTokenizer(const TChar * aSource,const TChar * aWhitespaces,const TChar * aAdditionalWordChars)41 TTokenizer<TChar>::TTokenizer(const TChar* aSource, const TChar* aWhitespaces,
42                               const TChar* aAdditionalWordChars)
43     : TTokenizer(typename base::TDependentString(aSource), aWhitespaces,
44                  aAdditionalWordChars) {}
45 
46 template <typename TChar>
Next(typename base::Token & aToken)47 bool TTokenizer<TChar>::Next(typename base::Token& aToken) {
48   if (!base::HasInput()) {
49     base::mHasFailed = true;
50     return false;
51   }
52 
53   mRollback = base::mCursor;
54   base::mCursor = base::Parse(aToken);
55 
56   base::AssignFragment(aToken, mRollback, base::mCursor);
57 
58   base::mPastEof = aToken.Type() == base::TOKEN_EOF;
59   base::mHasFailed = false;
60   return true;
61 }
62 
63 template <typename TChar>
Check(const typename base::TokenType aTokenType,typename base::Token & aResult)64 bool TTokenizer<TChar>::Check(const typename base::TokenType aTokenType,
65                               typename base::Token& aResult) {
66   if (!base::HasInput()) {
67     base::mHasFailed = true;
68     return false;
69   }
70 
71   typename base::TAString::const_char_iterator next = base::Parse(aResult);
72   if (aTokenType != aResult.Type()) {
73     base::mHasFailed = true;
74     return false;
75   }
76 
77   mRollback = base::mCursor;
78   base::mCursor = next;
79 
80   base::AssignFragment(aResult, mRollback, base::mCursor);
81 
82   base::mPastEof = aResult.Type() == base::TOKEN_EOF;
83   base::mHasFailed = false;
84   return true;
85 }
86 
87 template <typename TChar>
Check(const typename base::Token & aToken)88 bool TTokenizer<TChar>::Check(const typename base::Token& aToken) {
89 #ifdef DEBUG
90   base::Validate(aToken);
91 #endif
92 
93   if (!base::HasInput()) {
94     base::mHasFailed = true;
95     return false;
96   }
97 
98   typename base::Token parsed;
99   typename base::TAString::const_char_iterator next = base::Parse(parsed);
100   if (!aToken.Equals(parsed)) {
101     base::mHasFailed = true;
102     return false;
103   }
104 
105   mRollback = base::mCursor;
106   base::mCursor = next;
107   base::mPastEof = parsed.Type() == base::TOKEN_EOF;
108   base::mHasFailed = false;
109   return true;
110 }
111 
112 template <typename TChar>
SkipWhites(WhiteSkipping aIncludeNewLines)113 void TTokenizer<TChar>::SkipWhites(WhiteSkipping aIncludeNewLines) {
114   if (!CheckWhite() &&
115       (aIncludeNewLines == DONT_INCLUDE_NEW_LINE || !CheckEOL())) {
116     return;
117   }
118 
119   typename base::TAString::const_char_iterator rollback = mRollback;
120   while (CheckWhite() || (aIncludeNewLines == INCLUDE_NEW_LINE && CheckEOL())) {
121   }
122 
123   base::mHasFailed = false;
124   mRollback = rollback;
125 }
126 
127 template <typename TChar>
SkipUntil(typename base::Token const & aToken)128 void TTokenizer<TChar>::SkipUntil(typename base::Token const& aToken) {
129   typename base::TAString::const_char_iterator rollback = base::mCursor;
130   const typename base::Token eof = base::Token::EndOfFile();
131 
132   typename base::Token t;
133   while (Next(t)) {
134     if (aToken.Equals(t) || eof.Equals(t)) {
135       Rollback();
136       break;
137     }
138   }
139 
140   mRollback = rollback;
141 }
142 
143 template <typename TChar>
CheckChar(bool (* aClassifier)(const TChar aChar))144 bool TTokenizer<TChar>::CheckChar(bool (*aClassifier)(const TChar aChar)) {
145   if (!aClassifier) {
146     MOZ_ASSERT(false);
147     return false;
148   }
149 
150   if (!base::HasInput() || base::mCursor == base::mEnd) {
151     base::mHasFailed = true;
152     return false;
153   }
154 
155   if (!aClassifier(*base::mCursor)) {
156     base::mHasFailed = true;
157     return false;
158   }
159 
160   mRollback = base::mCursor;
161   ++base::mCursor;
162   base::mHasFailed = false;
163   return true;
164 }
165 
166 template <typename TChar>
CheckPhrase(const typename base::TAString & aPhrase)167 bool TTokenizer<TChar>::CheckPhrase(const typename base::TAString& aPhrase) {
168   if (!base::HasInput()) {
169     return false;
170   }
171 
172   typedef typename base::TAString::const_char_iterator Cursor;
173 
174   TTokenizer<TChar> pattern(aPhrase);
175   MOZ_ASSERT(!pattern.CheckEOF(),
176              "This will return true but won't shift the Tokenizer's cursor");
177 
178   return [&](Cursor cursor, Cursor rollback) mutable {
179     while (true) {
180       if (pattern.CheckEOF()) {
181         base::mHasFailed = false;
182         mRollback = cursor;
183         return true;
184       }
185 
186       typename base::Token t1, t2;
187       Unused << Next(t1);
188       Unused << pattern.Next(t2);
189       if (t1.Type() == t2.Type() && t1.Fragment().Equals(t2.Fragment())) {
190         continue;
191       }
192 
193       break;
194     }
195 
196     base::mHasFailed = true;
197     base::mPastEof = false;
198     base::mCursor = cursor;
199     mRollback = rollback;
200     return false;
201   }(base::mCursor, mRollback);
202 }
203 
204 template <typename TChar>
ReadChar(TChar * aValue)205 bool TTokenizer<TChar>::ReadChar(TChar* aValue) {
206   MOZ_RELEASE_ASSERT(aValue);
207 
208   typename base::Token t;
209   if (!Check(base::TOKEN_CHAR, t)) {
210     return false;
211   }
212 
213   *aValue = t.AsChar();
214   return true;
215 }
216 
217 template <typename TChar>
ReadChar(bool (* aClassifier)(const TChar aChar),TChar * aValue)218 bool TTokenizer<TChar>::ReadChar(bool (*aClassifier)(const TChar aChar),
219                                  TChar* aValue) {
220   MOZ_RELEASE_ASSERT(aValue);
221 
222   if (!CheckChar(aClassifier)) {
223     return false;
224   }
225 
226   *aValue = *mRollback;
227   return true;
228 }
229 
230 template <typename TChar>
ReadWord(typename base::TAString & aValue)231 bool TTokenizer<TChar>::ReadWord(typename base::TAString& aValue) {
232   typename base::Token t;
233   if (!Check(base::TOKEN_WORD, t)) {
234     return false;
235   }
236 
237   aValue.Assign(t.AsString());
238   return true;
239 }
240 
241 template <typename TChar>
ReadWord(typename base::TDependentSubstring & aValue)242 bool TTokenizer<TChar>::ReadWord(typename base::TDependentSubstring& aValue) {
243   typename base::Token t;
244   if (!Check(base::TOKEN_WORD, t)) {
245     return false;
246   }
247 
248   aValue.Rebind(t.AsString().BeginReading(), t.AsString().Length());
249   return true;
250 }
251 
252 template <typename TChar>
ReadUntil(typename base::Token const & aToken,typename base::TAString & aResult,ClaimInclusion aInclude)253 bool TTokenizer<TChar>::ReadUntil(typename base::Token const& aToken,
254                                   typename base::TAString& aResult,
255                                   ClaimInclusion aInclude) {
256   typename base::TDependentSubstring substring;
257   bool rv = ReadUntil(aToken, substring, aInclude);
258   aResult.Assign(substring);
259   return rv;
260 }
261 
262 template <typename TChar>
ReadUntil(typename base::Token const & aToken,typename base::TDependentSubstring & aResult,ClaimInclusion aInclude)263 bool TTokenizer<TChar>::ReadUntil(typename base::Token const& aToken,
264                                   typename base::TDependentSubstring& aResult,
265                                   ClaimInclusion aInclude) {
266   typename base::TAString::const_char_iterator record = mRecord;
267   Record();
268   typename base::TAString::const_char_iterator rollback = mRollback =
269       base::mCursor;
270 
271   bool found = false;
272   typename base::Token t;
273   while (Next(t)) {
274     if (aToken.Equals(t)) {
275       found = true;
276       break;
277     }
278     if (t.Equals(base::Token::EndOfFile())) {
279       // We don't want to eat it.
280       Rollback();
281       break;
282     }
283   }
284 
285   Claim(aResult, aInclude);
286   mRollback = rollback;
287   mRecord = record;
288   return found;
289 }
290 
291 template <typename TChar>
Rollback()292 void TTokenizer<TChar>::Rollback() {
293   MOZ_ASSERT(base::mCursor > mRollback || base::mPastEof, "TODO!!!");
294 
295   base::mPastEof = false;
296   base::mHasFailed = false;
297   base::mCursor = mRollback;
298 }
299 
300 template <typename TChar>
Record(ClaimInclusion aInclude)301 void TTokenizer<TChar>::Record(ClaimInclusion aInclude) {
302   mRecord = aInclude == INCLUDE_LAST ? mRollback : base::mCursor;
303 }
304 
305 template <typename TChar>
Claim(typename base::TAString & aResult,ClaimInclusion aInclusion)306 void TTokenizer<TChar>::Claim(typename base::TAString& aResult,
307                               ClaimInclusion aInclusion) {
308   typename base::TAString::const_char_iterator close =
309       aInclusion == EXCLUDE_LAST ? mRollback : base::mCursor;
310   aResult.Assign(Substring(mRecord, close));
311 }
312 
313 template <typename TChar>
Claim(typename base::TDependentSubstring & aResult,ClaimInclusion aInclusion)314 void TTokenizer<TChar>::Claim(typename base::TDependentSubstring& aResult,
315                               ClaimInclusion aInclusion) {
316   typename base::TAString::const_char_iterator close =
317       aInclusion == EXCLUDE_LAST ? mRollback : base::mCursor;
318 
319   MOZ_RELEASE_ASSERT(close >= mRecord, "Overflow!");
320   aResult.Rebind(mRecord, close - mRecord);
321 }
322 
323 // TokenizerBase
324 
325 template <typename TChar>
TokenizerBase(const TChar * aWhitespaces,const TChar * aAdditionalWordChars)326 TokenizerBase<TChar>::TokenizerBase(const TChar* aWhitespaces,
327                                     const TChar* aAdditionalWordChars)
328     : mPastEof(false),
329       mHasFailed(false),
330       mInputFinished(true),
331       mMode(Mode::FULL),
332       mMinRawDelivery(1024),
333       mWhitespaces(aWhitespaces ? aWhitespaces : sWhitespaces),
334       mAdditionalWordChars(aAdditionalWordChars),
335       mCursor(nullptr),
336       mEnd(nullptr),
337       mNextCustomTokenID(TOKEN_CUSTOM0) {}
338 
339 template <typename TChar>
AddCustomToken(const TAString & aValue,ECaseSensitivity aCaseInsensitivity,bool aEnabled)340 auto TokenizerBase<TChar>::AddCustomToken(const TAString& aValue,
341                                           ECaseSensitivity aCaseInsensitivity,
342                                           bool aEnabled) -> Token {
343   MOZ_ASSERT(!aValue.IsEmpty());
344 
345   UniquePtr<Token>& t = *mCustomTokens.AppendElement();
346   t = MakeUnique<Token>();
347 
348   t->mType = static_cast<TokenType>(++mNextCustomTokenID);
349   t->mCustomCaseInsensitivity = aCaseInsensitivity;
350   t->mCustomEnabled = aEnabled;
351   t->mCustom.Assign(aValue);
352   return *t;
353 }
354 
355 template <typename TChar>
RemoveCustomToken(Token & aToken)356 void TokenizerBase<TChar>::RemoveCustomToken(Token& aToken) {
357   if (aToken.mType == TOKEN_UNKNOWN) {
358     // Already removed
359     return;
360   }
361 
362   for (UniquePtr<Token> const& custom : mCustomTokens) {
363     if (custom->mType == aToken.mType) {
364       mCustomTokens.RemoveElement(custom);
365       aToken.mType = TOKEN_UNKNOWN;
366       return;
367     }
368   }
369 
370   MOZ_ASSERT(false, "Token to remove not found");
371 }
372 
373 template <typename TChar>
EnableCustomToken(Token const & aToken,bool aEnabled)374 void TokenizerBase<TChar>::EnableCustomToken(Token const& aToken,
375                                              bool aEnabled) {
376   if (aToken.mType == TOKEN_UNKNOWN) {
377     // Already removed
378     return;
379   }
380 
381   for (UniquePtr<Token> const& custom : mCustomTokens) {
382     if (custom->Type() == aToken.Type()) {
383       // This effectively destroys the token instance.
384       custom->mCustomEnabled = aEnabled;
385       return;
386     }
387   }
388 
389   MOZ_ASSERT(false, "Token to change not found");
390 }
391 
392 template <typename TChar>
SetTokenizingMode(Mode aMode)393 void TokenizerBase<TChar>::SetTokenizingMode(Mode aMode) {
394   mMode = aMode;
395 }
396 
397 template <typename TChar>
HasFailed() const398 bool TokenizerBase<TChar>::HasFailed() const {
399   return mHasFailed;
400 }
401 
402 template <typename TChar>
HasInput() const403 bool TokenizerBase<TChar>::HasInput() const {
404   return !mPastEof;
405 }
406 
407 template <typename TChar>
Parse(Token & aToken) const408 auto TokenizerBase<TChar>::Parse(Token& aToken) const ->
409     typename TAString::const_char_iterator {
410   if (mCursor == mEnd) {
411     if (!mInputFinished) {
412       return mCursor;
413     }
414 
415     aToken = Token::EndOfFile();
416     return mEnd;
417   }
418 
419   MOZ_RELEASE_ASSERT(mEnd >= mCursor, "Overflow!");
420   typename TAString::size_type available = mEnd - mCursor;
421 
422   uint32_t longestCustom = 0;
423   for (UniquePtr<Token> const& custom : mCustomTokens) {
424     if (IsCustom(mCursor, *custom, &longestCustom)) {
425       aToken = *custom;
426       return mCursor + custom->mCustom.Length();
427     }
428   }
429 
430   if (!mInputFinished && available < longestCustom) {
431     // Not enough data to deterministically decide.
432     return mCursor;
433   }
434 
435   typename TAString::const_char_iterator next = mCursor;
436 
437   if (mMode == Mode::CUSTOM_ONLY) {
438     // We have to do a brute-force search for all of the enabled custom
439     // tokens.
440     while (next < mEnd) {
441       ++next;
442       for (UniquePtr<Token> const& custom : mCustomTokens) {
443         if (IsCustom(next, *custom)) {
444           aToken = Token::Raw();
445           return next;
446         }
447       }
448     }
449 
450     if (mInputFinished) {
451       // End of the data reached.
452       aToken = Token::Raw();
453       return next;
454     }
455 
456     if (longestCustom < available && available > mMinRawDelivery) {
457       // We can return some data w/o waiting for either a custom token
458       // or call to FinishData() when we leave the tail where all the
459       // custom tokens potentially fit, so we can't lose only partially
460       // delivered tokens.  This preserves reasonable granularity.
461       aToken = Token::Raw();
462       return mEnd - longestCustom + 1;
463     }
464 
465     // Not enough data to deterministically decide.
466     return mCursor;
467   }
468 
469   enum State {
470     PARSE_INTEGER,
471     PARSE_WORD,
472     PARSE_CRLF,
473     PARSE_LF,
474     PARSE_WS,
475     PARSE_CHAR,
476   } state;
477 
478   if (IsWordFirst(*next)) {
479     state = PARSE_WORD;
480   } else if (IsNumber(*next)) {
481     state = PARSE_INTEGER;
482   } else if (contains(mWhitespaces, *next)) {  // not UTF-8 friendly?
483     state = PARSE_WS;
484   } else if (*next == '\r') {
485     state = PARSE_CRLF;
486   } else if (*next == '\n') {
487     state = PARSE_LF;
488   } else {
489     state = PARSE_CHAR;
490   }
491 
492   mozilla::CheckedUint64 resultingNumber = 0;
493 
494   while (next < mEnd) {
495     switch (state) {
496       case PARSE_INTEGER:
497         // Keep it simple for now
498         resultingNumber *= 10;
499         resultingNumber += static_cast<uint64_t>(*next - '0');
500 
501         ++next;
502         if (IsPending(next)) {
503           break;
504         }
505         if (IsEnd(next) || !IsNumber(*next)) {
506           if (!resultingNumber.isValid()) {
507             aToken = Token::Error();
508           } else {
509             aToken = Token::Number(resultingNumber.value());
510           }
511           return next;
512         }
513         break;
514 
515       case PARSE_WORD:
516         ++next;
517         if (IsPending(next)) {
518           break;
519         }
520         if (IsEnd(next) || !IsWord(*next)) {
521           aToken = Token::Word(Substring(mCursor, next));
522           return next;
523         }
524         break;
525 
526       case PARSE_CRLF:
527         ++next;
528         if (IsPending(next)) {
529           break;
530         }
531         if (!IsEnd(next) && *next == '\n') {  // LF is optional
532           ++next;
533         }
534         aToken = Token::NewLine();
535         return next;
536 
537       case PARSE_LF:
538         ++next;
539         aToken = Token::NewLine();
540         return next;
541 
542       case PARSE_WS:
543         ++next;
544         aToken = Token::Whitespace();
545         return next;
546 
547       case PARSE_CHAR:
548         ++next;
549         aToken = Token::Char(*mCursor);
550         return next;
551     }  // switch (state)
552   }    // while (next < end)
553 
554   MOZ_ASSERT(!mInputFinished);
555   return mCursor;
556 }
557 
558 template <typename TChar>
IsEnd(const typename TAString::const_char_iterator & caret) const559 bool TokenizerBase<TChar>::IsEnd(
560     const typename TAString::const_char_iterator& caret) const {
561   return caret == mEnd;
562 }
563 
564 template <typename TChar>
IsPending(const typename TAString::const_char_iterator & caret) const565 bool TokenizerBase<TChar>::IsPending(
566     const typename TAString::const_char_iterator& caret) const {
567   return IsEnd(caret) && !mInputFinished;
568 }
569 
570 template <typename TChar>
IsWordFirst(const TChar aInput) const571 bool TokenizerBase<TChar>::IsWordFirst(const TChar aInput) const {
572   // TODO: make this fully work with unicode
573   return (ToLowerCase(static_cast<uint32_t>(aInput)) !=
574           ToUpperCase(static_cast<uint32_t>(aInput))) ||
575          '_' == aInput ||
576          (mAdditionalWordChars ? contains(mAdditionalWordChars, aInput)
577                                : false);
578 }
579 
580 template <typename TChar>
IsWord(const TChar aInput) const581 bool TokenizerBase<TChar>::IsWord(const TChar aInput) const {
582   return IsWordFirst(aInput) || IsNumber(aInput);
583 }
584 
585 template <typename TChar>
IsNumber(const TChar aInput) const586 bool TokenizerBase<TChar>::IsNumber(const TChar aInput) const {
587   // TODO: are there unicode numbers?
588   return aInput >= '0' && aInput <= '9';
589 }
590 
591 template <typename TChar>
IsCustom(const typename TAString::const_char_iterator & caret,const Token & aCustomToken,uint32_t * aLongest) const592 bool TokenizerBase<TChar>::IsCustom(
593     const typename TAString::const_char_iterator& caret,
594     const Token& aCustomToken, uint32_t* aLongest) const {
595   MOZ_ASSERT(aCustomToken.mType > TOKEN_CUSTOM0);
596   if (!aCustomToken.mCustomEnabled) {
597     return false;
598   }
599 
600   if (aLongest) {
601     *aLongest = std::max<uint32_t>(*aLongest, aCustomToken.mCustom.Length());
602   }
603 
604   // This is not very likely to happen according to how we call this method
605   // and since it's on a hot path, it's just a diagnostic assert,
606   // not a release assert.
607   MOZ_DIAGNOSTIC_ASSERT(mEnd >= caret, "Overflow?");
608   uint32_t inputLength = mEnd - caret;
609   if (aCustomToken.mCustom.Length() > inputLength) {
610     return false;
611   }
612 
613   TDependentSubstring inputFragment(caret, aCustomToken.mCustom.Length());
614   if (aCustomToken.mCustomCaseInsensitivity == CASE_INSENSITIVE) {
615     if constexpr (std::is_same_v<TChar, char>) {
616       return inputFragment.Equals(aCustomToken.mCustom,
617                                   nsCaseInsensitiveUTF8StringComparator);
618     } else {
619       return inputFragment.Equals(aCustomToken.mCustom,
620                                   nsCaseInsensitiveStringComparator);
621     }
622   }
623   return inputFragment.Equals(aCustomToken.mCustom);
624 }
625 
626 template <typename TChar>
AssignFragment(Token & aToken,typename TAString::const_char_iterator begin,typename TAString::const_char_iterator end)627 void TokenizerBase<TChar>::AssignFragment(
628     Token& aToken, typename TAString::const_char_iterator begin,
629     typename TAString::const_char_iterator end) {
630   aToken.AssignFragment(begin, end);
631 }
632 
633 #ifdef DEBUG
634 
635 template <typename TChar>
Validate(Token const & aToken)636 void TokenizerBase<TChar>::Validate(Token const& aToken) {
637   if (aToken.Type() == TOKEN_WORD) {
638     typename TAString::const_char_iterator c = aToken.AsString().BeginReading();
639     typename TAString::const_char_iterator e = aToken.AsString().EndReading();
640 
641     if (c < e) {
642       MOZ_ASSERT(IsWordFirst(*c));
643       while (++c < e) {
644         MOZ_ASSERT(IsWord(*c));
645       }
646     }
647   }
648 }
649 
650 #endif
651 
652 // TokenizerBase::Token
653 
654 template <typename TChar>
Token()655 TokenizerBase<TChar>::Token::Token()
656     : mType(TOKEN_UNKNOWN),
657       mChar(0),
658       mInteger(0),
659       mCustomCaseInsensitivity(CASE_SENSITIVE),
660       mCustomEnabled(false) {}
661 
662 template <typename TChar>
Token(const Token & aOther)663 TokenizerBase<TChar>::Token::Token(const Token& aOther)
664     : mType(aOther.mType),
665       mCustom(aOther.mCustom),
666       mChar(aOther.mChar),
667       mInteger(aOther.mInteger),
668       mCustomCaseInsensitivity(aOther.mCustomCaseInsensitivity),
669       mCustomEnabled(aOther.mCustomEnabled) {
670   if (mType == TOKEN_WORD || mType > TOKEN_CUSTOM0) {
671     mWord.Rebind(aOther.mWord.BeginReading(), aOther.mWord.Length());
672   }
673 }
674 
675 template <typename TChar>
operator =(const Token & aOther)676 auto TokenizerBase<TChar>::Token::operator=(const Token& aOther) -> Token& {
677   mType = aOther.mType;
678   mCustom = aOther.mCustom;
679   mChar = aOther.mChar;
680   mWord.Rebind(aOther.mWord.BeginReading(), aOther.mWord.Length());
681   mInteger = aOther.mInteger;
682   mCustomCaseInsensitivity = aOther.mCustomCaseInsensitivity;
683   mCustomEnabled = aOther.mCustomEnabled;
684   return *this;
685 }
686 
687 template <typename TChar>
AssignFragment(typename TAString::const_char_iterator begin,typename TAString::const_char_iterator end)688 void TokenizerBase<TChar>::Token::AssignFragment(
689     typename TAString::const_char_iterator begin,
690     typename TAString::const_char_iterator end) {
691   MOZ_RELEASE_ASSERT(end >= begin, "Overflow!");
692   mFragment.Rebind(begin, end - begin);
693 }
694 
695 // static
696 template <typename TChar>
Raw()697 auto TokenizerBase<TChar>::Token::Raw() -> Token {
698   Token t;
699   t.mType = TOKEN_RAW;
700   return t;
701 }
702 
703 // static
704 template <typename TChar>
Word(TAString const & aValue)705 auto TokenizerBase<TChar>::Token::Word(TAString const& aValue) -> Token {
706   Token t;
707   t.mType = TOKEN_WORD;
708   t.mWord.Rebind(aValue.BeginReading(), aValue.Length());
709   return t;
710 }
711 
712 // static
713 template <typename TChar>
Char(TChar const aValue)714 auto TokenizerBase<TChar>::Token::Char(TChar const aValue) -> Token {
715   Token t;
716   t.mType = TOKEN_CHAR;
717   t.mChar = aValue;
718   return t;
719 }
720 
721 // static
722 template <typename TChar>
Number(uint64_t const aValue)723 auto TokenizerBase<TChar>::Token::Number(uint64_t const aValue) -> Token {
724   Token t;
725   t.mType = TOKEN_INTEGER;
726   t.mInteger = aValue;
727   return t;
728 }
729 
730 // static
731 template <typename TChar>
Whitespace()732 auto TokenizerBase<TChar>::Token::Whitespace() -> Token {
733   Token t;
734   t.mType = TOKEN_WS;
735   t.mChar = '\0';
736   return t;
737 }
738 
739 // static
740 template <typename TChar>
NewLine()741 auto TokenizerBase<TChar>::Token::NewLine() -> Token {
742   Token t;
743   t.mType = TOKEN_EOL;
744   return t;
745 }
746 
747 // static
748 template <typename TChar>
EndOfFile()749 auto TokenizerBase<TChar>::Token::EndOfFile() -> Token {
750   Token t;
751   t.mType = TOKEN_EOF;
752   return t;
753 }
754 
755 // static
756 template <typename TChar>
Error()757 auto TokenizerBase<TChar>::Token::Error() -> Token {
758   Token t;
759   t.mType = TOKEN_ERROR;
760   return t;
761 }
762 
763 template <typename TChar>
Equals(const Token & aOther) const764 bool TokenizerBase<TChar>::Token::Equals(const Token& aOther) const {
765   if (mType != aOther.mType) {
766     return false;
767   }
768 
769   switch (mType) {
770     case TOKEN_INTEGER:
771       return AsInteger() == aOther.AsInteger();
772     case TOKEN_WORD:
773       return AsString() == aOther.AsString();
774     case TOKEN_CHAR:
775       return AsChar() == aOther.AsChar();
776     default:
777       return true;
778   }
779 }
780 
781 template <typename TChar>
AsChar() const782 TChar TokenizerBase<TChar>::Token::AsChar() const {
783   MOZ_ASSERT(mType == TOKEN_CHAR || mType == TOKEN_WS);
784   return mChar;
785 }
786 
787 template <typename TChar>
AsString() const788 auto TokenizerBase<TChar>::Token::AsString() const -> TDependentSubstring {
789   MOZ_ASSERT(mType == TOKEN_WORD);
790   return mWord;
791 }
792 
793 template <typename TChar>
AsInteger() const794 uint64_t TokenizerBase<TChar>::Token::AsInteger() const {
795   MOZ_ASSERT(mType == TOKEN_INTEGER);
796   return mInteger;
797 }
798 
799 template class TokenizerBase<char>;
800 template class TokenizerBase<char16_t>;
801 
802 template class TTokenizer<char>;
803 template class TTokenizer<char16_t>;
804 
805 }  // namespace mozilla
806