1 #ifndef SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
2 #define SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
3 
4 #if defined(_MSC_VER) ||                                            \
5     (defined(__GNUC__) && (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
6      (__GNUC__ >= 4))  // GCC supports "pragma once" correctly since 3.4
7 #pragma once
8 #endif
9 
10 #include <cstddef>
11 #include <ios>
12 #include <map>
13 #include <queue>
14 #include <set>
15 #include <stack>
16 #include <string>
17 
18 #include "ptr_vector.h"
19 #include "stream.h"
20 #include "token.h"
21 #include "yaml-cpp/mark.h"
22 
23 namespace YAML {
24 class Node;
25 class RegEx;
26 
27 /**
28  * A scanner transforms a stream of characters into a stream of tokens.
29  */
30 class Scanner {
31  public:
32   explicit Scanner(std::istream &in);
33   ~Scanner();
34 
35   /** Returns true if there are no more tokens to be read. */
36   bool empty();
37 
38   /** Removes the next token in the queue. */
39   void pop();
40 
41   /** Returns, but does not remove, the next token in the queue. */
42   Token &peek();
43 
44   /** Returns the current mark in the input stream. */
45   Mark mark() const;
46 
47  private:
48   struct IndentMarker {
49     enum INDENT_TYPE { MAP, SEQ, NONE };
50     enum STATUS { VALID, INVALID, UNKNOWN };
IndentMarkerIndentMarker51     IndentMarker(int column_, INDENT_TYPE type_)
52         : column(column_), type(type_), status(VALID), pStartToken(0) {}
53 
54     int column;
55     INDENT_TYPE type;
56     STATUS status;
57     Token *pStartToken;
58   };
59 
60   enum FLOW_MARKER { FLOW_MAP, FLOW_SEQ };
61 
62  private:
63   // scanning
64 
65   /**
66    * Scans until there's a valid token at the front of the queue, or the queue
67    * is empty. The state can be checked by {@link #empty}, and the next token
68    * retrieved by {@link #peek}.
69    */
70   void EnsureTokensInQueue();
71 
72   /**
73    * The main scanning function; this method branches out to scan whatever the
74    * next token should be.
75    */
76   void ScanNextToken();
77 
78   /** Eats the input stream until it reaches the next token-like thing. */
79   void ScanToNextToken();
80 
81   /** Sets the initial conditions for starting a stream. */
82   void StartStream();
83 
84   /** Closes out the stream, finish up, etc. */
85   void EndStream();
86 
87   Token *PushToken(Token::TYPE type);
88 
InFlowContext()89   bool InFlowContext() const { return !m_flows.empty(); }
InBlockContext()90   bool InBlockContext() const { return m_flows.empty(); }
GetFlowLevel()91   std::size_t GetFlowLevel() const { return m_flows.size(); }
92 
93   Token::TYPE GetStartTokenFor(IndentMarker::INDENT_TYPE type) const;
94 
95   /**
96    * Pushes an indentation onto the stack, and enqueues the proper token
97    * (sequence start or mapping start).
98    *
99    * @return the indent marker it generates (if any).
100    */
101   IndentMarker *PushIndentTo(int column, IndentMarker::INDENT_TYPE type);
102 
103   /**
104    * Pops indentations off the stack until it reaches the current indentation
105    * level, and enqueues the proper token each time. Then pops all invalid
106    * indentations off.
107    */
108   void PopIndentToHere();
109 
110   /**
111    * Pops all indentations (except for the base empty one) off the stack, and
112    * enqueues the proper token each time.
113    */
114   void PopAllIndents();
115 
116   /** Pops a single indent, pushing the proper token. */
117   void PopIndent();
118   int GetTopIndent() const;
119 
120   // checking input
121   bool CanInsertPotentialSimpleKey() const;
122   bool ExistsActiveSimpleKey() const;
123   void InsertPotentialSimpleKey();
124   void InvalidateSimpleKey();
125   bool VerifySimpleKey();
126   void PopAllSimpleKeys();
127 
128   /**
129    * Throws a ParserException with the current token location (if available),
130    * and does not parse any more tokens.
131    */
132   void ThrowParserException(const std::string &msg) const;
133 
134   bool IsWhitespaceToBeEaten(char ch);
135 
136   /**
137    * Returns the appropriate regex to check if the next token is a value token.
138    */
139   const RegEx &GetValueRegex() const;
140 
141   struct SimpleKey {
142     SimpleKey(const Mark &mark_, std::size_t flowLevel_);
143 
144     void Validate();
145     void Invalidate();
146 
147     Mark mark;
148     std::size_t flowLevel;
149     IndentMarker *pIndent;
150     Token *pMapStart, *pKey;
151   };
152 
153   // and the tokens
154   void ScanDirective();
155   void ScanDocStart();
156   void ScanDocEnd();
157   void ScanBlockSeqStart();
158   void ScanBlockMapSTart();
159   void ScanBlockEnd();
160   void ScanBlockEntry();
161   void ScanFlowStart();
162   void ScanFlowEnd();
163   void ScanFlowEntry();
164   void ScanKey();
165   void ScanValue();
166   void ScanAnchorOrAlias();
167   void ScanTag();
168   void ScanPlainScalar();
169   void ScanQuotedScalar();
170   void ScanBlockScalar();
171 
172  private:
173   // the stream
174   Stream INPUT;
175 
176   // the output (tokens)
177   std::queue<Token> m_tokens;
178 
179   // state info
180   bool m_startedStream, m_endedStream;
181   bool m_simpleKeyAllowed;
182   bool m_canBeJSONFlow;
183   std::stack<SimpleKey> m_simpleKeys;
184   std::stack<IndentMarker *> m_indents;
185   ptr_vector<IndentMarker> m_indentRefs;  // for "garbage collection"
186   std::stack<FLOW_MARKER> m_flows;
187 };
188 }
189 
190 #endif  // SCANNER_H_62B23520_7C8E_11DE_8A39_0800200C9A66
191