1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #ifndef INCREMENTAL_TOKENIZER_H__ 8 #define INCREMENTAL_TOKENIZER_H__ 9 10 #include "mozilla/Tokenizer.h" 11 12 #include "nsError.h" 13 #include <functional> 14 15 class nsIInputStream; 16 17 namespace mozilla { 18 19 class IncrementalTokenizer : public TokenizerBase { 20 public: 21 /** 22 * The consumer callback. The function is called for every single token 23 * as found in the input. Failure result returned by this callback stops 24 * the tokenization immediately and bubbles to result of Feed/FinishInput. 25 * 26 * Fragment()s of consumed tokens are ensured to remain valid until next call 27 * to Feed/FinishInput and are pointing to a single linear buffer. Hence, 28 * those can be safely used to accumulate the data for processing after 29 * Feed/FinishInput returned. 30 */ 31 typedef std::function<nsresult(Token const&, IncrementalTokenizer& i)> 32 Consumer; 33 34 /** 35 * For aWhitespaces and aAdditionalWordChars arguments see TokenizerBase. 36 * 37 * @param aConsumer 38 * A mandatory non-null argument, a function that consumes the tokens as 39 * they come when the tokenizer is fed. 40 * @param aRawMinBuffered 41 * When we have buffered at least aRawMinBuffered data, but there was no 42 * custom token found so far because of too small incremental feed chunks, 43 * deliver the raw data to preserve streaming and to save memory. This 44 * only has effect in OnlyCustomTokenizing mode. 45 */ 46 explicit IncrementalTokenizer(Consumer&& aConsumer, 47 const char* aWhitespaces = nullptr, 48 const char* aAdditionalWordChars = nullptr, 49 uint32_t aRawMinBuffered = 1024); 50 51 /** 52 * Pushes the input to be tokenized. These directly call the Consumer 53 * callback on every found token. Result of the Consumer callback is returned 54 * here. 55 * 56 * The tokenizer must be initialized with a valid consumer prior call to these 57 * methods. It's not allowed to call Feed/FinishInput from inside the 58 * Consumer callback. 59 */ 60 nsresult FeedInput(const nsACString& aInput); 61 nsresult FeedInput(nsIInputStream* aInput, uint32_t aCount); 62 nsresult FinishInput(); 63 64 /** 65 * Can only be called from inside the consumer callback. 66 * 67 * When there is still anything to read from the input, tokenize it, store 68 * the token type and value to aToken result and shift the cursor past this 69 * just parsed token. Each call to Next() reads another token from 70 * the input and shifts the cursor. 71 * 72 * Returns false if there is not enough data to deterministically recognize 73 * tokens or when the last returned token was EOF. 74 */ 75 MOZ_MUST_USE 76 bool Next(Token& aToken); 77 78 /** 79 * Can only be called from inside the consumer callback. 80 * 81 * Tells the tokenizer to revert the cursor and stop the async parsing until 82 * next feed of the input. This is useful when more than one token is needed 83 * to decide on the syntax but there is not enough input to get a next token 84 * (Next() returned false.) 85 */ 86 void NeedMoreInput(); 87 88 /** 89 * Can only be called from inside the consumer callback. 90 * 91 * This makes the consumer callback be called again while parsing 92 * the input at the previous cursor position again. This is useful when 93 * the tokenizer state (custom tokens, tokenization mode) has changed and 94 * we want to re-parse the input again. 95 */ 96 void Rollback(); 97 98 private: 99 // Loops over the input with TokenizerBase::Parse and calls the Consumer 100 // callback. 101 nsresult Process(); 102 103 #ifdef DEBUG 104 // True when inside the consumer callback, used only for assertions. 105 bool mConsuming; 106 #endif // DEBUG 107 // Modifyable only from the Consumer callback, tells the parser to break, 108 // rollback and wait for more input. 109 bool mNeedMoreInput; 110 // Modifyable only from the Consumer callback, tells the parser to rollback 111 // and parse the input again, with (if modified) new settings of the 112 // tokenizer. 113 bool mRollback; 114 // The input buffer. Updated with each call to Feed/FinishInput. 115 nsCString mInput; 116 // Numerical index pointing at the current cursor position. We don't keep 117 // direct reference to the string buffer since the buffer gets often 118 // reallocated. 119 nsCString::index_type mInputCursor; 120 // Refernce to the consumer function. 121 Consumer mConsumer; 122 }; 123 124 } // namespace mozilla 125 126 #endif 127