1 #include <cassert>
2 #include <memory>
3 
4 #include "exp.h"
5 #include "scanner.h"
6 #include "token.h"
7 #include "yaml-cpp/exceptions.h"  // IWYU pragma: keep
8 
9 namespace YAML {
Scanner(std::istream & in)10 Scanner::Scanner(std::istream& in)
11     : INPUT(in),
12       m_tokens{},
13       m_startedStream(false),
14       m_endedStream(false),
15       m_simpleKeyAllowed(false),
16       m_canBeJSONFlow(false),
17       m_simpleKeys{},
18       m_indents{},
19       m_indentRefs{},
20       m_flows{} {}
21 
~Scanner()22 Scanner::~Scanner() {}
23 
empty()24 bool Scanner::empty() {
25   EnsureTokensInQueue();
26   return m_tokens.empty();
27 }
28 
pop()29 void Scanner::pop() {
30   EnsureTokensInQueue();
31   if (!m_tokens.empty())
32     m_tokens.pop();
33 }
34 
peek()35 Token& Scanner::peek() {
36   EnsureTokensInQueue();
37   assert(!m_tokens.empty());  // should we be asserting here? I mean, we really
38                               // just be checking
39                               // if it's empty before peeking.
40 
41 #if 0
42 		static Token *pLast = 0;
43 		if(pLast != &m_tokens.front())
44 			std::cerr << "peek: " << m_tokens.front() << "\n";
45 		pLast = &m_tokens.front();
46 #endif
47 
48   return m_tokens.front();
49 }
50 
mark() const51 Mark Scanner::mark() const { return INPUT.mark(); }
52 
EnsureTokensInQueue()53 void Scanner::EnsureTokensInQueue() {
54   while (1) {
55     if (!m_tokens.empty()) {
56       Token& token = m_tokens.front();
57 
58       // if this guy's valid, then we're done
59       if (token.status == Token::VALID) {
60         return;
61       }
62 
63       // here's where we clean up the impossible tokens
64       if (token.status == Token::INVALID) {
65         m_tokens.pop();
66         continue;
67       }
68 
69       // note: what's left are the unverified tokens
70     }
71 
72     // no token? maybe we've actually finished
73     if (m_endedStream) {
74       return;
75     }
76 
77     // no? then scan...
78     ScanNextToken();
79   }
80 }
81 
ScanNextToken()82 void Scanner::ScanNextToken() {
83   if (m_endedStream) {
84     return;
85   }
86 
87   if (!m_startedStream) {
88     return StartStream();
89   }
90 
91   // get rid of whitespace, etc. (in between tokens it should be irrelevent)
92   ScanToNextToken();
93 
94   // maybe need to end some blocks
95   PopIndentToHere();
96 
97   // *****
98   // And now branch based on the next few characters!
99   // *****
100 
101   // end of stream
102   if (!INPUT) {
103     return EndStream();
104   }
105 
106   if (INPUT.column() == 0 && INPUT.peek() == Keys::Directive) {
107     return ScanDirective();
108   }
109 
110   // document token
111   if (INPUT.column() == 0 && Exp::DocStart().Matches(INPUT)) {
112     return ScanDocStart();
113   }
114 
115   if (INPUT.column() == 0 && Exp::DocEnd().Matches(INPUT)) {
116     return ScanDocEnd();
117   }
118 
119   // flow start/end/entry
120   if (INPUT.peek() == Keys::FlowSeqStart ||
121       INPUT.peek() == Keys::FlowMapStart) {
122     return ScanFlowStart();
123   }
124 
125   if (INPUT.peek() == Keys::FlowSeqEnd || INPUT.peek() == Keys::FlowMapEnd) {
126     return ScanFlowEnd();
127   }
128 
129   if (INPUT.peek() == Keys::FlowEntry) {
130     return ScanFlowEntry();
131   }
132 
133   // block/map stuff
134   if (Exp::BlockEntry().Matches(INPUT)) {
135     return ScanBlockEntry();
136   }
137 
138   if ((InBlockContext() ? Exp::Key() : Exp::KeyInFlow()).Matches(INPUT)) {
139     return ScanKey();
140   }
141 
142   if (GetValueRegex().Matches(INPUT)) {
143     return ScanValue();
144   }
145 
146   // alias/anchor
147   if (INPUT.peek() == Keys::Alias || INPUT.peek() == Keys::Anchor) {
148     return ScanAnchorOrAlias();
149   }
150 
151   // tag
152   if (INPUT.peek() == Keys::Tag) {
153     return ScanTag();
154   }
155 
156   // special scalars
157   if (InBlockContext() && (INPUT.peek() == Keys::LiteralScalar ||
158                            INPUT.peek() == Keys::FoldedScalar)) {
159     return ScanBlockScalar();
160   }
161 
162   if (INPUT.peek() == '\'' || INPUT.peek() == '\"') {
163     return ScanQuotedScalar();
164   }
165 
166   // plain scalars
167   if ((InBlockContext() ? Exp::PlainScalar() : Exp::PlainScalarInFlow())
168           .Matches(INPUT)) {
169     return ScanPlainScalar();
170   }
171 
172   // don't know what it is!
173   throw ParserException(INPUT.mark(), ErrorMsg::UNKNOWN_TOKEN);
174 }
175 
ScanToNextToken()176 void Scanner::ScanToNextToken() {
177   while (1) {
178     // first eat whitespace
179     while (INPUT && IsWhitespaceToBeEaten(INPUT.peek())) {
180       if (InBlockContext() && Exp::Tab().Matches(INPUT)) {
181         m_simpleKeyAllowed = false;
182       }
183       INPUT.eat(1);
184     }
185 
186     // then eat a comment
187     if (Exp::Comment().Matches(INPUT)) {
188       // eat until line break
189       while (INPUT && !Exp::Break().Matches(INPUT)) {
190         INPUT.eat(1);
191       }
192     }
193 
194     // if it's NOT a line break, then we're done!
195     if (!Exp::Break().Matches(INPUT)) {
196       break;
197     }
198 
199     // otherwise, let's eat the line break and keep going
200     int n = Exp::Break().Match(INPUT);
201     INPUT.eat(n);
202 
203     // oh yeah, and let's get rid of that simple key
204     InvalidateSimpleKey();
205 
206     // new line - we may be able to accept a simple key now
207     if (InBlockContext()) {
208       m_simpleKeyAllowed = true;
209     }
210   }
211 }
212 
213 ///////////////////////////////////////////////////////////////////////
214 // Misc. helpers
215 
216 // IsWhitespaceToBeEaten
217 // . We can eat whitespace if it's a space or tab
218 // . Note: originally tabs in block context couldn't be eaten
219 //         "where a simple key could be allowed
220 //         (i.e., not at the beginning of a line, or following '-', '?', or
221 // ':')"
222 //   I think this is wrong, since tabs can be non-content whitespace; it's just
223 //   that they can't contribute to indentation, so once you've seen a tab in a
224 //   line, you can't start a simple key
IsWhitespaceToBeEaten(char ch)225 bool Scanner::IsWhitespaceToBeEaten(char ch) {
226   if (ch == ' ') {
227     return true;
228   }
229 
230   if (ch == '\t') {
231     return true;
232   }
233 
234   return false;
235 }
236 
GetValueRegex() const237 const RegEx& Scanner::GetValueRegex() const {
238   if (InBlockContext()) {
239     return Exp::Value();
240   }
241 
242   return m_canBeJSONFlow ? Exp::ValueInJSONFlow() : Exp::ValueInFlow();
243 }
244 
StartStream()245 void Scanner::StartStream() {
246   m_startedStream = true;
247   m_simpleKeyAllowed = true;
248   std::unique_ptr<IndentMarker> pIndent(
249       new IndentMarker(-1, IndentMarker::NONE));
250   m_indentRefs.push_back(std::move(pIndent));
251   m_indents.push(&m_indentRefs.back());
252 }
253 
EndStream()254 void Scanner::EndStream() {
255   // force newline
256   if (INPUT.column() > 0) {
257     INPUT.ResetColumn();
258   }
259 
260   PopAllIndents();
261   PopAllSimpleKeys();
262 
263   m_simpleKeyAllowed = false;
264   m_endedStream = true;
265 }
266 
PushToken(Token::TYPE type)267 Token* Scanner::PushToken(Token::TYPE type) {
268   m_tokens.push(Token(type, INPUT.mark()));
269   return &m_tokens.back();
270 }
271 
GetStartTokenFor(IndentMarker::INDENT_TYPE type) const272 Token::TYPE Scanner::GetStartTokenFor(IndentMarker::INDENT_TYPE type) const {
273   switch (type) {
274     case IndentMarker::SEQ:
275       return Token::BLOCK_SEQ_START;
276     case IndentMarker::MAP:
277       return Token::BLOCK_MAP_START;
278     case IndentMarker::NONE:
279       assert(false);
280       break;
281   }
282   assert(false);
283   throw std::runtime_error("yaml-cpp: internal error, invalid indent type");
284 }
285 
PushIndentTo(int column,IndentMarker::INDENT_TYPE type)286 Scanner::IndentMarker* Scanner::PushIndentTo(int column,
287                                              IndentMarker::INDENT_TYPE type) {
288   // are we in flow?
289   if (InFlowContext()) {
290     return nullptr;
291   }
292 
293   std::unique_ptr<IndentMarker> pIndent(new IndentMarker(column, type));
294   IndentMarker& indent = *pIndent;
295   const IndentMarker& lastIndent = *m_indents.top();
296 
297   // is this actually an indentation?
298   if (indent.column < lastIndent.column) {
299     return nullptr;
300   }
301   if (indent.column == lastIndent.column &&
302       !(indent.type == IndentMarker::SEQ &&
303         lastIndent.type == IndentMarker::MAP)) {
304     return nullptr;
305   }
306 
307   // push a start token
308   indent.pStartToken = PushToken(GetStartTokenFor(type));
309 
310   // and then the indent
311   m_indents.push(&indent);
312   m_indentRefs.push_back(std::move(pIndent));
313   return &m_indentRefs.back();
314 }
315 
PopIndentToHere()316 void Scanner::PopIndentToHere() {
317   // are we in flow?
318   if (InFlowContext()) {
319     return;
320   }
321 
322   // now pop away
323   while (!m_indents.empty()) {
324     const IndentMarker& indent = *m_indents.top();
325     if (indent.column < INPUT.column()) {
326       break;
327     }
328     if (indent.column == INPUT.column() &&
329         !(indent.type == IndentMarker::SEQ &&
330           !Exp::BlockEntry().Matches(INPUT))) {
331       break;
332     }
333 
334     PopIndent();
335   }
336 
337   while (!m_indents.empty() &&
338          m_indents.top()->status == IndentMarker::INVALID) {
339     PopIndent();
340   }
341 }
342 
PopAllIndents()343 void Scanner::PopAllIndents() {
344   // are we in flow?
345   if (InFlowContext()) {
346     return;
347   }
348 
349   // now pop away
350   while (!m_indents.empty()) {
351     const IndentMarker& indent = *m_indents.top();
352     if (indent.type == IndentMarker::NONE) {
353       break;
354     }
355 
356     PopIndent();
357   }
358 }
359 
PopIndent()360 void Scanner::PopIndent() {
361   const IndentMarker& indent = *m_indents.top();
362   m_indents.pop();
363 
364   if (indent.status != IndentMarker::VALID) {
365     InvalidateSimpleKey();
366     return;
367   }
368 
369   if (indent.type == IndentMarker::SEQ) {
370     m_tokens.push(Token(Token::BLOCK_SEQ_END, INPUT.mark()));
371   } else if (indent.type == IndentMarker::MAP) {
372     m_tokens.push(Token(Token::BLOCK_MAP_END, INPUT.mark()));
373   }
374 }
375 
GetTopIndent() const376 int Scanner::GetTopIndent() const {
377   if (m_indents.empty()) {
378     return 0;
379   }
380   return m_indents.top()->column;
381 }
382 
ThrowParserException(const std::string & msg) const383 void Scanner::ThrowParserException(const std::string& msg) const {
384   Mark mark = Mark::null_mark();
385   if (!m_tokens.empty()) {
386     const Token& token = m_tokens.front();
387     mark = token.mark;
388   }
389   throw ParserException(mark, msg);
390 }
391 }  // namespace YAML
392