1 #ifndef SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66 2 #define SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66 3 4 #if defined(_MSC_VER) || \ 5 (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \ 6 (__GNUC__ >= 4)) // GCC supports "pragma once" correctly since 3.4 7 #pragma once 8 #endif 9 10 #include <cstddef> 11 #include <ios> 12 #include <map> 13 #include <queue> 14 #include <set> 15 #include <stack> 16 #include <string> 17 18 #include "ptr_vector.h" 19 #include "stream.h" 20 #include "token.h" 21 #include "yaml-cpp/mark.h" 22 23 namespace YAML { 24 class Node; 25 class RegEx; 26 27 /** 28 * A scanner transforms a stream of characters into a stream of tokens. 29 */ 30 class Scanner { 31 public: 32 explicit Scanner(std::istream &in); 33 ~Scanner(); 34 35 /** Returns true if there are no more tokens to be read. */ 36 bool empty(); 37 38 /** Removes the next token in the queue. */ 39 void pop(); 40 41 /** Returns, but does not remove, the next token in the queue. */ 42 Token &peek(); 43 44 /** Returns the current mark in the input stream. */ 45 Mark mark() const; 46 47 private: 48 struct IndentMarker { 49 enum INDENT_TYPE { MAP, SEQ, NONE }; 50 enum STATUS { VALID, INVALID, UNKNOWN }; IndentMarkerIndentMarker51 IndentMarker(int column_, INDENT_TYPE type_) 52 : column(column_), type(type_), status(VALID), pStartToken(0) {} 53 54 int column; 55 INDENT_TYPE type; 56 STATUS status; 57 Token *pStartToken; 58 }; 59 60 enum FLOW_MARKER { FLOW_MAP, FLOW_SEQ }; 61 62 private: 63 // scanning 64 65 /** 66 * Scans until there's a valid token at the front of the queue, or the queue 67 * is empty. The state can be checked by {@link #empty}, and the next token 68 * retrieved by {@link #peek}. 69 */ 70 void EnsureTokensInQueue(); 71 72 /** 73 * The main scanning function; this method branches out to scan whatever the 74 * next token should be. 75 */ 76 void ScanNextToken(); 77 78 /** Eats the input stream until it reaches the next token-like thing. */ 79 void ScanToNextToken(); 80 81 /** Sets the initial conditions for starting a stream. */ 82 void StartStream(); 83 84 /** Closes out the stream, finish up, etc. */ 85 void EndStream(); 86 87 Token *PushToken(Token::TYPE type); 88 InFlowContext()89 bool InFlowContext() const { return !m_flows.empty(); } InBlockContext()90 bool InBlockContext() const { return m_flows.empty(); } GetFlowLevel()91 std::size_t GetFlowLevel() const { return m_flows.size(); } 92 93 Token::TYPE GetStartTokenFor(IndentMarker::INDENT_TYPE type) const; 94 95 /** 96 * Pushes an indentation onto the stack, and enqueues the proper token 97 * (sequence start or mapping start). 98 * 99 * @return the indent marker it generates (if any). 100 */ 101 IndentMarker *PushIndentTo(int column, IndentMarker::INDENT_TYPE type); 102 103 /** 104 * Pops indentations off the stack until it reaches the current indentation 105 * level, and enqueues the proper token each time. Then pops all invalid 106 * indentations off. 107 */ 108 void PopIndentToHere(); 109 110 /** 111 * Pops all indentations (except for the base empty one) off the stack, and 112 * enqueues the proper token each time. 113 */ 114 void PopAllIndents(); 115 116 /** Pops a single indent, pushing the proper token. */ 117 void PopIndent(); 118 int GetTopIndent() const; 119 120 // checking input 121 bool CanInsertPotentialSimpleKey() const; 122 bool ExistsActiveSimpleKey() const; 123 void InsertPotentialSimpleKey(); 124 void InvalidateSimpleKey(); 125 bool VerifySimpleKey(); 126 void PopAllSimpleKeys(); 127 128 /** 129 * Throws a ParserException with the current token location (if available), 130 * and does not parse any more tokens. 131 */ 132 void ThrowParserException(const std::string &msg) const; 133 134 bool IsWhitespaceToBeEaten(char ch); 135 136 /** 137 * Returns the appropriate regex to check if the next token is a value token. 138 */ 139 const RegEx &GetValueRegex() const; 140 141 struct SimpleKey { 142 SimpleKey(const Mark &mark_, std::size_t flowLevel_); 143 144 void Validate(); 145 void Invalidate(); 146 147 Mark mark; 148 std::size_t flowLevel; 149 IndentMarker *pIndent; 150 Token *pMapStart, *pKey; 151 }; 152 153 // and the tokens 154 void ScanDirective(); 155 void ScanDocStart(); 156 void ScanDocEnd(); 157 void ScanBlockSeqStart(); 158 void ScanBlockMapSTart(); 159 void ScanBlockEnd(); 160 void ScanBlockEntry(); 161 void ScanFlowStart(); 162 void ScanFlowEnd(); 163 void ScanFlowEntry(); 164 void ScanKey(); 165 void ScanValue(); 166 void ScanAnchorOrAlias(); 167 void ScanTag(); 168 void ScanPlainScalar(); 169 void ScanQuotedScalar(); 170 void ScanBlockScalar(); 171 172 private: 173 // the stream 174 Stream INPUT; 175 176 // the output (tokens) 177 std::queue<Token> m_tokens; 178 179 // state info 180 bool m_startedStream, m_endedStream; 181 bool m_simpleKeyAllowed; 182 bool m_canBeJSONFlow; 183 std::stack<SimpleKey> m_simpleKeys; 184 std::stack<IndentMarker *> m_indents; 185 ptr_vector<IndentMarker> m_indentRefs; // for "garbage collection" 186 std::stack<FLOW_MARKER> m_flows; 187 }; 188 } 189 190 #endif // SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66 191