1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #include "Tokenizer.h"
8
9 #include "nsUnicharUtils.h"
10 #include <algorithm>
11
12 namespace mozilla {
13
14 template <>
15 char const TokenizerBase<char>::sWhitespaces[] = {' ', '\t', 0};
16 template <>
17 char16_t const TokenizerBase<char16_t>::sWhitespaces[3] = {' ', '\t', 0};
18
19 template <typename TChar>
contains(TChar const * const list,TChar const needle)20 static bool contains(TChar const* const list, TChar const needle) {
21 for (TChar const* c = list; *c; ++c) {
22 if (needle == *c) {
23 return true;
24 }
25 }
26 return false;
27 }
28
29 template <typename TChar>
TTokenizer(const typename base::TAString & aSource,const TChar * aWhitespaces,const TChar * aAdditionalWordChars)30 TTokenizer<TChar>::TTokenizer(const typename base::TAString& aSource,
31 const TChar* aWhitespaces,
32 const TChar* aAdditionalWordChars)
33 : TokenizerBase<TChar>(aWhitespaces, aAdditionalWordChars) {
34 base::mInputFinished = true;
35 aSource.BeginReading(base::mCursor);
36 mRecord = mRollback = base::mCursor;
37 aSource.EndReading(base::mEnd);
38 }
39
40 template <typename TChar>
TTokenizer(const TChar * aSource,const TChar * aWhitespaces,const TChar * aAdditionalWordChars)41 TTokenizer<TChar>::TTokenizer(const TChar* aSource, const TChar* aWhitespaces,
42 const TChar* aAdditionalWordChars)
43 : TTokenizer(typename base::TDependentString(aSource), aWhitespaces,
44 aAdditionalWordChars) {}
45
46 template <typename TChar>
Next(typename base::Token & aToken)47 bool TTokenizer<TChar>::Next(typename base::Token& aToken) {
48 if (!base::HasInput()) {
49 base::mHasFailed = true;
50 return false;
51 }
52
53 mRollback = base::mCursor;
54 base::mCursor = base::Parse(aToken);
55
56 base::AssignFragment(aToken, mRollback, base::mCursor);
57
58 base::mPastEof = aToken.Type() == base::TOKEN_EOF;
59 base::mHasFailed = false;
60 return true;
61 }
62
63 template <typename TChar>
Check(const typename base::TokenType aTokenType,typename base::Token & aResult)64 bool TTokenizer<TChar>::Check(const typename base::TokenType aTokenType,
65 typename base::Token& aResult) {
66 if (!base::HasInput()) {
67 base::mHasFailed = true;
68 return false;
69 }
70
71 typename base::TAString::const_char_iterator next = base::Parse(aResult);
72 if (aTokenType != aResult.Type()) {
73 base::mHasFailed = true;
74 return false;
75 }
76
77 mRollback = base::mCursor;
78 base::mCursor = next;
79
80 base::AssignFragment(aResult, mRollback, base::mCursor);
81
82 base::mPastEof = aResult.Type() == base::TOKEN_EOF;
83 base::mHasFailed = false;
84 return true;
85 }
86
87 template <typename TChar>
Check(const typename base::Token & aToken)88 bool TTokenizer<TChar>::Check(const typename base::Token& aToken) {
89 #ifdef DEBUG
90 base::Validate(aToken);
91 #endif
92
93 if (!base::HasInput()) {
94 base::mHasFailed = true;
95 return false;
96 }
97
98 typename base::Token parsed;
99 typename base::TAString::const_char_iterator next = base::Parse(parsed);
100 if (!aToken.Equals(parsed)) {
101 base::mHasFailed = true;
102 return false;
103 }
104
105 mRollback = base::mCursor;
106 base::mCursor = next;
107 base::mPastEof = parsed.Type() == base::TOKEN_EOF;
108 base::mHasFailed = false;
109 return true;
110 }
111
112 template <typename TChar>
SkipWhites(WhiteSkipping aIncludeNewLines)113 void TTokenizer<TChar>::SkipWhites(WhiteSkipping aIncludeNewLines) {
114 if (!CheckWhite() &&
115 (aIncludeNewLines == DONT_INCLUDE_NEW_LINE || !CheckEOL())) {
116 return;
117 }
118
119 typename base::TAString::const_char_iterator rollback = mRollback;
120 while (CheckWhite() || (aIncludeNewLines == INCLUDE_NEW_LINE && CheckEOL())) {
121 }
122
123 base::mHasFailed = false;
124 mRollback = rollback;
125 }
126
127 template <typename TChar>
SkipUntil(typename base::Token const & aToken)128 void TTokenizer<TChar>::SkipUntil(typename base::Token const& aToken) {
129 typename base::TAString::const_char_iterator rollback = base::mCursor;
130 const typename base::Token eof = base::Token::EndOfFile();
131
132 typename base::Token t;
133 while (Next(t)) {
134 if (aToken.Equals(t) || eof.Equals(t)) {
135 Rollback();
136 break;
137 }
138 }
139
140 mRollback = rollback;
141 }
142
143 template <typename TChar>
CheckChar(bool (* aClassifier)(const TChar aChar))144 bool TTokenizer<TChar>::CheckChar(bool (*aClassifier)(const TChar aChar)) {
145 if (!aClassifier) {
146 MOZ_ASSERT(false);
147 return false;
148 }
149
150 if (!base::HasInput() || base::mCursor == base::mEnd) {
151 base::mHasFailed = true;
152 return false;
153 }
154
155 if (!aClassifier(*base::mCursor)) {
156 base::mHasFailed = true;
157 return false;
158 }
159
160 mRollback = base::mCursor;
161 ++base::mCursor;
162 base::mHasFailed = false;
163 return true;
164 }
165
166 template <typename TChar>
CheckPhrase(const typename base::TAString & aPhrase)167 bool TTokenizer<TChar>::CheckPhrase(const typename base::TAString& aPhrase) {
168 if (!base::HasInput()) {
169 return false;
170 }
171
172 typedef typename base::TAString::const_char_iterator Cursor;
173
174 TTokenizer<TChar> pattern(aPhrase);
175 MOZ_ASSERT(!pattern.CheckEOF(),
176 "This will return true but won't shift the Tokenizer's cursor");
177
178 return [&](Cursor cursor, Cursor rollback) mutable {
179 while (true) {
180 if (pattern.CheckEOF()) {
181 base::mHasFailed = false;
182 mRollback = cursor;
183 return true;
184 }
185
186 typename base::Token t1, t2;
187 Unused << Next(t1);
188 Unused << pattern.Next(t2);
189 if (t1.Type() == t2.Type() && t1.Fragment().Equals(t2.Fragment())) {
190 continue;
191 }
192
193 break;
194 }
195
196 base::mHasFailed = true;
197 base::mPastEof = false;
198 base::mCursor = cursor;
199 mRollback = rollback;
200 return false;
201 }(base::mCursor, mRollback);
202 }
203
204 template <typename TChar>
ReadChar(TChar * aValue)205 bool TTokenizer<TChar>::ReadChar(TChar* aValue) {
206 MOZ_RELEASE_ASSERT(aValue);
207
208 typename base::Token t;
209 if (!Check(base::TOKEN_CHAR, t)) {
210 return false;
211 }
212
213 *aValue = t.AsChar();
214 return true;
215 }
216
217 template <typename TChar>
ReadChar(bool (* aClassifier)(const TChar aChar),TChar * aValue)218 bool TTokenizer<TChar>::ReadChar(bool (*aClassifier)(const TChar aChar),
219 TChar* aValue) {
220 MOZ_RELEASE_ASSERT(aValue);
221
222 if (!CheckChar(aClassifier)) {
223 return false;
224 }
225
226 *aValue = *mRollback;
227 return true;
228 }
229
230 template <typename TChar>
ReadWord(typename base::TAString & aValue)231 bool TTokenizer<TChar>::ReadWord(typename base::TAString& aValue) {
232 typename base::Token t;
233 if (!Check(base::TOKEN_WORD, t)) {
234 return false;
235 }
236
237 aValue.Assign(t.AsString());
238 return true;
239 }
240
241 template <typename TChar>
ReadWord(typename base::TDependentSubstring & aValue)242 bool TTokenizer<TChar>::ReadWord(typename base::TDependentSubstring& aValue) {
243 typename base::Token t;
244 if (!Check(base::TOKEN_WORD, t)) {
245 return false;
246 }
247
248 aValue.Rebind(t.AsString().BeginReading(), t.AsString().Length());
249 return true;
250 }
251
252 template <typename TChar>
ReadUntil(typename base::Token const & aToken,typename base::TAString & aResult,ClaimInclusion aInclude)253 bool TTokenizer<TChar>::ReadUntil(typename base::Token const& aToken,
254 typename base::TAString& aResult,
255 ClaimInclusion aInclude) {
256 typename base::TDependentSubstring substring;
257 bool rv = ReadUntil(aToken, substring, aInclude);
258 aResult.Assign(substring);
259 return rv;
260 }
261
262 template <typename TChar>
ReadUntil(typename base::Token const & aToken,typename base::TDependentSubstring & aResult,ClaimInclusion aInclude)263 bool TTokenizer<TChar>::ReadUntil(typename base::Token const& aToken,
264 typename base::TDependentSubstring& aResult,
265 ClaimInclusion aInclude) {
266 typename base::TAString::const_char_iterator record = mRecord;
267 Record();
268 typename base::TAString::const_char_iterator rollback = mRollback =
269 base::mCursor;
270
271 bool found = false;
272 typename base::Token t;
273 while (Next(t)) {
274 if (aToken.Equals(t)) {
275 found = true;
276 break;
277 }
278 if (t.Equals(base::Token::EndOfFile())) {
279 // We don't want to eat it.
280 Rollback();
281 break;
282 }
283 }
284
285 Claim(aResult, aInclude);
286 mRollback = rollback;
287 mRecord = record;
288 return found;
289 }
290
291 template <typename TChar>
Rollback()292 void TTokenizer<TChar>::Rollback() {
293 MOZ_ASSERT(base::mCursor > mRollback || base::mPastEof, "TODO!!!");
294
295 base::mPastEof = false;
296 base::mHasFailed = false;
297 base::mCursor = mRollback;
298 }
299
300 template <typename TChar>
Record(ClaimInclusion aInclude)301 void TTokenizer<TChar>::Record(ClaimInclusion aInclude) {
302 mRecord = aInclude == INCLUDE_LAST ? mRollback : base::mCursor;
303 }
304
305 template <typename TChar>
Claim(typename base::TAString & aResult,ClaimInclusion aInclusion)306 void TTokenizer<TChar>::Claim(typename base::TAString& aResult,
307 ClaimInclusion aInclusion) {
308 typename base::TAString::const_char_iterator close =
309 aInclusion == EXCLUDE_LAST ? mRollback : base::mCursor;
310 aResult.Assign(Substring(mRecord, close));
311 }
312
313 template <typename TChar>
Claim(typename base::TDependentSubstring & aResult,ClaimInclusion aInclusion)314 void TTokenizer<TChar>::Claim(typename base::TDependentSubstring& aResult,
315 ClaimInclusion aInclusion) {
316 typename base::TAString::const_char_iterator close =
317 aInclusion == EXCLUDE_LAST ? mRollback : base::mCursor;
318
319 MOZ_RELEASE_ASSERT(close >= mRecord, "Overflow!");
320 aResult.Rebind(mRecord, close - mRecord);
321 }
322
323 // TokenizerBase
324
325 template <typename TChar>
TokenizerBase(const TChar * aWhitespaces,const TChar * aAdditionalWordChars)326 TokenizerBase<TChar>::TokenizerBase(const TChar* aWhitespaces,
327 const TChar* aAdditionalWordChars)
328 : mPastEof(false),
329 mHasFailed(false),
330 mInputFinished(true),
331 mMode(Mode::FULL),
332 mMinRawDelivery(1024),
333 mWhitespaces(aWhitespaces ? aWhitespaces : sWhitespaces),
334 mAdditionalWordChars(aAdditionalWordChars),
335 mCursor(nullptr),
336 mEnd(nullptr),
337 mNextCustomTokenID(TOKEN_CUSTOM0) {}
338
339 template <typename TChar>
AddCustomToken(const TAString & aValue,ECaseSensitivity aCaseInsensitivity,bool aEnabled)340 auto TokenizerBase<TChar>::AddCustomToken(const TAString& aValue,
341 ECaseSensitivity aCaseInsensitivity,
342 bool aEnabled) -> Token {
343 MOZ_ASSERT(!aValue.IsEmpty());
344
345 UniquePtr<Token>& t = *mCustomTokens.AppendElement();
346 t = MakeUnique<Token>();
347
348 t->mType = static_cast<TokenType>(++mNextCustomTokenID);
349 t->mCustomCaseInsensitivity = aCaseInsensitivity;
350 t->mCustomEnabled = aEnabled;
351 t->mCustom.Assign(aValue);
352 return *t;
353 }
354
355 template <typename TChar>
RemoveCustomToken(Token & aToken)356 void TokenizerBase<TChar>::RemoveCustomToken(Token& aToken) {
357 if (aToken.mType == TOKEN_UNKNOWN) {
358 // Already removed
359 return;
360 }
361
362 for (UniquePtr<Token> const& custom : mCustomTokens) {
363 if (custom->mType == aToken.mType) {
364 mCustomTokens.RemoveElement(custom);
365 aToken.mType = TOKEN_UNKNOWN;
366 return;
367 }
368 }
369
370 MOZ_ASSERT(false, "Token to remove not found");
371 }
372
373 template <typename TChar>
EnableCustomToken(Token const & aToken,bool aEnabled)374 void TokenizerBase<TChar>::EnableCustomToken(Token const& aToken,
375 bool aEnabled) {
376 if (aToken.mType == TOKEN_UNKNOWN) {
377 // Already removed
378 return;
379 }
380
381 for (UniquePtr<Token> const& custom : mCustomTokens) {
382 if (custom->Type() == aToken.Type()) {
383 // This effectively destroys the token instance.
384 custom->mCustomEnabled = aEnabled;
385 return;
386 }
387 }
388
389 MOZ_ASSERT(false, "Token to change not found");
390 }
391
392 template <typename TChar>
SetTokenizingMode(Mode aMode)393 void TokenizerBase<TChar>::SetTokenizingMode(Mode aMode) {
394 mMode = aMode;
395 }
396
397 template <typename TChar>
HasFailed() const398 bool TokenizerBase<TChar>::HasFailed() const {
399 return mHasFailed;
400 }
401
402 template <typename TChar>
HasInput() const403 bool TokenizerBase<TChar>::HasInput() const {
404 return !mPastEof;
405 }
406
407 template <typename TChar>
Parse(Token & aToken) const408 auto TokenizerBase<TChar>::Parse(Token& aToken) const ->
409 typename TAString::const_char_iterator {
410 if (mCursor == mEnd) {
411 if (!mInputFinished) {
412 return mCursor;
413 }
414
415 aToken = Token::EndOfFile();
416 return mEnd;
417 }
418
419 MOZ_RELEASE_ASSERT(mEnd >= mCursor, "Overflow!");
420 typename TAString::size_type available = mEnd - mCursor;
421
422 uint32_t longestCustom = 0;
423 for (UniquePtr<Token> const& custom : mCustomTokens) {
424 if (IsCustom(mCursor, *custom, &longestCustom)) {
425 aToken = *custom;
426 return mCursor + custom->mCustom.Length();
427 }
428 }
429
430 if (!mInputFinished && available < longestCustom) {
431 // Not enough data to deterministically decide.
432 return mCursor;
433 }
434
435 typename TAString::const_char_iterator next = mCursor;
436
437 if (mMode == Mode::CUSTOM_ONLY) {
438 // We have to do a brute-force search for all of the enabled custom
439 // tokens.
440 while (next < mEnd) {
441 ++next;
442 for (UniquePtr<Token> const& custom : mCustomTokens) {
443 if (IsCustom(next, *custom)) {
444 aToken = Token::Raw();
445 return next;
446 }
447 }
448 }
449
450 if (mInputFinished) {
451 // End of the data reached.
452 aToken = Token::Raw();
453 return next;
454 }
455
456 if (longestCustom < available && available > mMinRawDelivery) {
457 // We can return some data w/o waiting for either a custom token
458 // or call to FinishData() when we leave the tail where all the
459 // custom tokens potentially fit, so we can't lose only partially
460 // delivered tokens. This preserves reasonable granularity.
461 aToken = Token::Raw();
462 return mEnd - longestCustom + 1;
463 }
464
465 // Not enough data to deterministically decide.
466 return mCursor;
467 }
468
469 enum State {
470 PARSE_INTEGER,
471 PARSE_WORD,
472 PARSE_CRLF,
473 PARSE_LF,
474 PARSE_WS,
475 PARSE_CHAR,
476 } state;
477
478 if (IsWordFirst(*next)) {
479 state = PARSE_WORD;
480 } else if (IsNumber(*next)) {
481 state = PARSE_INTEGER;
482 } else if (contains(mWhitespaces, *next)) { // not UTF-8 friendly?
483 state = PARSE_WS;
484 } else if (*next == '\r') {
485 state = PARSE_CRLF;
486 } else if (*next == '\n') {
487 state = PARSE_LF;
488 } else {
489 state = PARSE_CHAR;
490 }
491
492 mozilla::CheckedUint64 resultingNumber = 0;
493
494 while (next < mEnd) {
495 switch (state) {
496 case PARSE_INTEGER:
497 // Keep it simple for now
498 resultingNumber *= 10;
499 resultingNumber += static_cast<uint64_t>(*next - '0');
500
501 ++next;
502 if (IsPending(next)) {
503 break;
504 }
505 if (IsEnd(next) || !IsNumber(*next)) {
506 if (!resultingNumber.isValid()) {
507 aToken = Token::Error();
508 } else {
509 aToken = Token::Number(resultingNumber.value());
510 }
511 return next;
512 }
513 break;
514
515 case PARSE_WORD:
516 ++next;
517 if (IsPending(next)) {
518 break;
519 }
520 if (IsEnd(next) || !IsWord(*next)) {
521 aToken = Token::Word(Substring(mCursor, next));
522 return next;
523 }
524 break;
525
526 case PARSE_CRLF:
527 ++next;
528 if (IsPending(next)) {
529 break;
530 }
531 if (!IsEnd(next) && *next == '\n') { // LF is optional
532 ++next;
533 }
534 aToken = Token::NewLine();
535 return next;
536
537 case PARSE_LF:
538 ++next;
539 aToken = Token::NewLine();
540 return next;
541
542 case PARSE_WS:
543 ++next;
544 aToken = Token::Whitespace();
545 return next;
546
547 case PARSE_CHAR:
548 ++next;
549 aToken = Token::Char(*mCursor);
550 return next;
551 } // switch (state)
552 } // while (next < end)
553
554 MOZ_ASSERT(!mInputFinished);
555 return mCursor;
556 }
557
558 template <typename TChar>
IsEnd(const typename TAString::const_char_iterator & caret) const559 bool TokenizerBase<TChar>::IsEnd(
560 const typename TAString::const_char_iterator& caret) const {
561 return caret == mEnd;
562 }
563
564 template <typename TChar>
IsPending(const typename TAString::const_char_iterator & caret) const565 bool TokenizerBase<TChar>::IsPending(
566 const typename TAString::const_char_iterator& caret) const {
567 return IsEnd(caret) && !mInputFinished;
568 }
569
570 template <typename TChar>
IsWordFirst(const TChar aInput) const571 bool TokenizerBase<TChar>::IsWordFirst(const TChar aInput) const {
572 // TODO: make this fully work with unicode
573 return (ToLowerCase(static_cast<uint32_t>(aInput)) !=
574 ToUpperCase(static_cast<uint32_t>(aInput))) ||
575 '_' == aInput ||
576 (mAdditionalWordChars ? contains(mAdditionalWordChars, aInput)
577 : false);
578 }
579
580 template <typename TChar>
IsWord(const TChar aInput) const581 bool TokenizerBase<TChar>::IsWord(const TChar aInput) const {
582 return IsWordFirst(aInput) || IsNumber(aInput);
583 }
584
585 template <typename TChar>
IsNumber(const TChar aInput) const586 bool TokenizerBase<TChar>::IsNumber(const TChar aInput) const {
587 // TODO: are there unicode numbers?
588 return aInput >= '0' && aInput <= '9';
589 }
590
591 template <typename TChar>
IsCustom(const typename TAString::const_char_iterator & caret,const Token & aCustomToken,uint32_t * aLongest) const592 bool TokenizerBase<TChar>::IsCustom(
593 const typename TAString::const_char_iterator& caret,
594 const Token& aCustomToken, uint32_t* aLongest) const {
595 MOZ_ASSERT(aCustomToken.mType > TOKEN_CUSTOM0);
596 if (!aCustomToken.mCustomEnabled) {
597 return false;
598 }
599
600 if (aLongest) {
601 *aLongest = std::max<uint32_t>(*aLongest, aCustomToken.mCustom.Length());
602 }
603
604 // This is not very likely to happen according to how we call this method
605 // and since it's on a hot path, it's just a diagnostic assert,
606 // not a release assert.
607 MOZ_DIAGNOSTIC_ASSERT(mEnd >= caret, "Overflow?");
608 uint32_t inputLength = mEnd - caret;
609 if (aCustomToken.mCustom.Length() > inputLength) {
610 return false;
611 }
612
613 TDependentSubstring inputFragment(caret, aCustomToken.mCustom.Length());
614 if (aCustomToken.mCustomCaseInsensitivity == CASE_INSENSITIVE) {
615 if constexpr (std::is_same_v<TChar, char>) {
616 return inputFragment.Equals(aCustomToken.mCustom,
617 nsCaseInsensitiveUTF8StringComparator);
618 } else {
619 return inputFragment.Equals(aCustomToken.mCustom,
620 nsCaseInsensitiveStringComparator);
621 }
622 }
623 return inputFragment.Equals(aCustomToken.mCustom);
624 }
625
626 template <typename TChar>
AssignFragment(Token & aToken,typename TAString::const_char_iterator begin,typename TAString::const_char_iterator end)627 void TokenizerBase<TChar>::AssignFragment(
628 Token& aToken, typename TAString::const_char_iterator begin,
629 typename TAString::const_char_iterator end) {
630 aToken.AssignFragment(begin, end);
631 }
632
633 #ifdef DEBUG
634
635 template <typename TChar>
Validate(Token const & aToken)636 void TokenizerBase<TChar>::Validate(Token const& aToken) {
637 if (aToken.Type() == TOKEN_WORD) {
638 typename TAString::const_char_iterator c = aToken.AsString().BeginReading();
639 typename TAString::const_char_iterator e = aToken.AsString().EndReading();
640
641 if (c < e) {
642 MOZ_ASSERT(IsWordFirst(*c));
643 while (++c < e) {
644 MOZ_ASSERT(IsWord(*c));
645 }
646 }
647 }
648 }
649
650 #endif
651
652 // TokenizerBase::Token
653
654 template <typename TChar>
Token()655 TokenizerBase<TChar>::Token::Token()
656 : mType(TOKEN_UNKNOWN),
657 mChar(0),
658 mInteger(0),
659 mCustomCaseInsensitivity(CASE_SENSITIVE),
660 mCustomEnabled(false) {}
661
662 template <typename TChar>
Token(const Token & aOther)663 TokenizerBase<TChar>::Token::Token(const Token& aOther)
664 : mType(aOther.mType),
665 mCustom(aOther.mCustom),
666 mChar(aOther.mChar),
667 mInteger(aOther.mInteger),
668 mCustomCaseInsensitivity(aOther.mCustomCaseInsensitivity),
669 mCustomEnabled(aOther.mCustomEnabled) {
670 if (mType == TOKEN_WORD || mType > TOKEN_CUSTOM0) {
671 mWord.Rebind(aOther.mWord.BeginReading(), aOther.mWord.Length());
672 }
673 }
674
675 template <typename TChar>
operator =(const Token & aOther)676 auto TokenizerBase<TChar>::Token::operator=(const Token& aOther) -> Token& {
677 mType = aOther.mType;
678 mCustom = aOther.mCustom;
679 mChar = aOther.mChar;
680 mWord.Rebind(aOther.mWord.BeginReading(), aOther.mWord.Length());
681 mInteger = aOther.mInteger;
682 mCustomCaseInsensitivity = aOther.mCustomCaseInsensitivity;
683 mCustomEnabled = aOther.mCustomEnabled;
684 return *this;
685 }
686
687 template <typename TChar>
AssignFragment(typename TAString::const_char_iterator begin,typename TAString::const_char_iterator end)688 void TokenizerBase<TChar>::Token::AssignFragment(
689 typename TAString::const_char_iterator begin,
690 typename TAString::const_char_iterator end) {
691 MOZ_RELEASE_ASSERT(end >= begin, "Overflow!");
692 mFragment.Rebind(begin, end - begin);
693 }
694
695 // static
696 template <typename TChar>
Raw()697 auto TokenizerBase<TChar>::Token::Raw() -> Token {
698 Token t;
699 t.mType = TOKEN_RAW;
700 return t;
701 }
702
703 // static
704 template <typename TChar>
Word(TAString const & aValue)705 auto TokenizerBase<TChar>::Token::Word(TAString const& aValue) -> Token {
706 Token t;
707 t.mType = TOKEN_WORD;
708 t.mWord.Rebind(aValue.BeginReading(), aValue.Length());
709 return t;
710 }
711
712 // static
713 template <typename TChar>
Char(TChar const aValue)714 auto TokenizerBase<TChar>::Token::Char(TChar const aValue) -> Token {
715 Token t;
716 t.mType = TOKEN_CHAR;
717 t.mChar = aValue;
718 return t;
719 }
720
721 // static
722 template <typename TChar>
Number(uint64_t const aValue)723 auto TokenizerBase<TChar>::Token::Number(uint64_t const aValue) -> Token {
724 Token t;
725 t.mType = TOKEN_INTEGER;
726 t.mInteger = aValue;
727 return t;
728 }
729
730 // static
731 template <typename TChar>
Whitespace()732 auto TokenizerBase<TChar>::Token::Whitespace() -> Token {
733 Token t;
734 t.mType = TOKEN_WS;
735 t.mChar = '\0';
736 return t;
737 }
738
739 // static
740 template <typename TChar>
NewLine()741 auto TokenizerBase<TChar>::Token::NewLine() -> Token {
742 Token t;
743 t.mType = TOKEN_EOL;
744 return t;
745 }
746
747 // static
748 template <typename TChar>
EndOfFile()749 auto TokenizerBase<TChar>::Token::EndOfFile() -> Token {
750 Token t;
751 t.mType = TOKEN_EOF;
752 return t;
753 }
754
755 // static
756 template <typename TChar>
Error()757 auto TokenizerBase<TChar>::Token::Error() -> Token {
758 Token t;
759 t.mType = TOKEN_ERROR;
760 return t;
761 }
762
763 template <typename TChar>
Equals(const Token & aOther) const764 bool TokenizerBase<TChar>::Token::Equals(const Token& aOther) const {
765 if (mType != aOther.mType) {
766 return false;
767 }
768
769 switch (mType) {
770 case TOKEN_INTEGER:
771 return AsInteger() == aOther.AsInteger();
772 case TOKEN_WORD:
773 return AsString() == aOther.AsString();
774 case TOKEN_CHAR:
775 return AsChar() == aOther.AsChar();
776 default:
777 return true;
778 }
779 }
780
781 template <typename TChar>
AsChar() const782 TChar TokenizerBase<TChar>::Token::AsChar() const {
783 MOZ_ASSERT(mType == TOKEN_CHAR || mType == TOKEN_WS);
784 return mChar;
785 }
786
787 template <typename TChar>
AsString() const788 auto TokenizerBase<TChar>::Token::AsString() const -> TDependentSubstring {
789 MOZ_ASSERT(mType == TOKEN_WORD);
790 return mWord;
791 }
792
793 template <typename TChar>
AsInteger() const794 uint64_t TokenizerBase<TChar>::Token::AsInteger() const {
795 MOZ_ASSERT(mType == TOKEN_INTEGER);
796 return mInteger;
797 }
798
799 template class TokenizerBase<char>;
800 template class TokenizerBase<char16_t>;
801
802 template class TTokenizer<char>;
803 template class TTokenizer<char16_t>;
804
805 } // namespace mozilla
806