1 #include <algorithm>
2 #include <cstdio>
3 #include <sstream>
4 
5 #include "collectionstack.h"  // IWYU pragma: keep
6 #include "scanner.h"
7 #include "singledocparser.h"
8 #include "tag.h"
9 #include "token.h"
10 #include "yaml-cpp/emitterstyle.h"
11 #include "yaml-cpp/eventhandler.h"
12 #include "yaml-cpp/exceptions.h"  // IWYU pragma: keep
13 #include "yaml-cpp/mark.h"
14 #include "yaml-cpp/null.h"
15 
16 namespace YAML {
SingleDocParser(Scanner & scanner,const Directives & directives)17 SingleDocParser::SingleDocParser(Scanner& scanner, const Directives& directives)
18     : m_scanner(scanner),
19       m_directives(directives),
20       m_pCollectionStack(new CollectionStack),
21       m_anchors{},
22       m_curAnchor(0) {}
23 
~SingleDocParser()24 SingleDocParser::~SingleDocParser() {}
25 
26 // HandleDocument
27 // . Handles the next document
28 // . Throws a ParserException on error.
HandleDocument(EventHandler & eventHandler)29 void SingleDocParser::HandleDocument(EventHandler& eventHandler) {
30   assert(!m_scanner.empty());  // guaranteed that there are tokens
31   assert(!m_curAnchor);
32 
33   eventHandler.OnDocumentStart(m_scanner.peek().mark);
34 
35   // eat doc start
36   if (m_scanner.peek().type == Token::DOC_START)
37     m_scanner.pop();
38 
39   // recurse!
40   HandleNode(eventHandler);
41 
42   eventHandler.OnDocumentEnd();
43 
44   // and finally eat any doc ends we see
45   while (!m_scanner.empty() && m_scanner.peek().type == Token::DOC_END)
46     m_scanner.pop();
47 }
48 
HandleNode(EventHandler & eventHandler)49 void SingleDocParser::HandleNode(EventHandler& eventHandler) {
50   // an empty node *is* a possibility
51   if (m_scanner.empty()) {
52     eventHandler.OnNull(m_scanner.mark(), NullAnchor);
53     return;
54   }
55 
56   // save location
57   Mark mark = m_scanner.peek().mark;
58 
59   // special case: a value node by itself must be a map, with no header
60   if (m_scanner.peek().type == Token::VALUE) {
61     eventHandler.OnMapStart(mark, "?", NullAnchor, EmitterStyle::Default);
62     HandleMap(eventHandler);
63     eventHandler.OnMapEnd();
64     return;
65   }
66 
67   // special case: an alias node
68   if (m_scanner.peek().type == Token::ALIAS) {
69     eventHandler.OnAlias(mark, LookupAnchor(mark, m_scanner.peek().value));
70     m_scanner.pop();
71     return;
72   }
73 
74   std::string tag;
75   std::string anchor_name;
76   anchor_t anchor;
77   ParseProperties(tag, anchor, anchor_name);
78 
79   if (!anchor_name.empty())
80     eventHandler.OnAnchor(mark, anchor_name);
81 
82   const Token& token = m_scanner.peek();
83 
84   if (token.type == Token::PLAIN_SCALAR && IsNullString(token.value)) {
85     eventHandler.OnNull(mark, anchor);
86     m_scanner.pop();
87     return;
88   }
89 
90   // add non-specific tags
91   if (tag.empty())
92     tag = (token.type == Token::NON_PLAIN_SCALAR ? "!" : "?");
93 
94   // now split based on what kind of node we should be
95   switch (token.type) {
96     case Token::PLAIN_SCALAR:
97     case Token::NON_PLAIN_SCALAR:
98       eventHandler.OnScalar(mark, tag, anchor, token.value);
99       m_scanner.pop();
100       return;
101     case Token::FLOW_SEQ_START:
102       eventHandler.OnSequenceStart(mark, tag, anchor, EmitterStyle::Flow);
103       HandleSequence(eventHandler);
104       eventHandler.OnSequenceEnd();
105       return;
106     case Token::BLOCK_SEQ_START:
107       eventHandler.OnSequenceStart(mark, tag, anchor, EmitterStyle::Block);
108       HandleSequence(eventHandler);
109       eventHandler.OnSequenceEnd();
110       return;
111     case Token::FLOW_MAP_START:
112       eventHandler.OnMapStart(mark, tag, anchor, EmitterStyle::Flow);
113       HandleMap(eventHandler);
114       eventHandler.OnMapEnd();
115       return;
116     case Token::BLOCK_MAP_START:
117       eventHandler.OnMapStart(mark, tag, anchor, EmitterStyle::Block);
118       HandleMap(eventHandler);
119       eventHandler.OnMapEnd();
120       return;
121     case Token::KEY:
122       // compact maps can only go in a flow sequence
123       if (m_pCollectionStack->GetCurCollectionType() ==
124           CollectionType::FlowSeq) {
125         eventHandler.OnMapStart(mark, tag, anchor, EmitterStyle::Flow);
126         HandleMap(eventHandler);
127         eventHandler.OnMapEnd();
128         return;
129       }
130       break;
131     default:
132       break;
133   }
134 
135   if (tag == "?")
136     eventHandler.OnNull(mark, anchor);
137   else
138     eventHandler.OnScalar(mark, tag, anchor, "");
139 }
140 
HandleSequence(EventHandler & eventHandler)141 void SingleDocParser::HandleSequence(EventHandler& eventHandler) {
142   // split based on start token
143   switch (m_scanner.peek().type) {
144     case Token::BLOCK_SEQ_START:
145       HandleBlockSequence(eventHandler);
146       break;
147     case Token::FLOW_SEQ_START:
148       HandleFlowSequence(eventHandler);
149       break;
150     default:
151       break;
152   }
153 }
154 
HandleBlockSequence(EventHandler & eventHandler)155 void SingleDocParser::HandleBlockSequence(EventHandler& eventHandler) {
156   // eat start token
157   m_scanner.pop();
158   m_pCollectionStack->PushCollectionType(CollectionType::BlockSeq);
159 
160   while (1) {
161     if (m_scanner.empty())
162       throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_SEQ);
163 
164     Token token = m_scanner.peek();
165     if (token.type != Token::BLOCK_ENTRY && token.type != Token::BLOCK_SEQ_END)
166       throw ParserException(token.mark, ErrorMsg::END_OF_SEQ);
167 
168     m_scanner.pop();
169     if (token.type == Token::BLOCK_SEQ_END)
170       break;
171 
172     // check for null
173     if (!m_scanner.empty()) {
174       const Token& nextToken = m_scanner.peek();
175       if (nextToken.type == Token::BLOCK_ENTRY ||
176           nextToken.type == Token::BLOCK_SEQ_END) {
177         eventHandler.OnNull(nextToken.mark, NullAnchor);
178         continue;
179       }
180     }
181 
182     HandleNode(eventHandler);
183   }
184 
185   m_pCollectionStack->PopCollectionType(CollectionType::BlockSeq);
186 }
187 
HandleFlowSequence(EventHandler & eventHandler)188 void SingleDocParser::HandleFlowSequence(EventHandler& eventHandler) {
189   // eat start token
190   m_scanner.pop();
191   m_pCollectionStack->PushCollectionType(CollectionType::FlowSeq);
192 
193   while (1) {
194     if (m_scanner.empty())
195       throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_SEQ_FLOW);
196 
197     // first check for end
198     if (m_scanner.peek().type == Token::FLOW_SEQ_END) {
199       m_scanner.pop();
200       break;
201     }
202 
203     // then read the node
204     HandleNode(eventHandler);
205 
206     if (m_scanner.empty())
207       throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_SEQ_FLOW);
208 
209     // now eat the separator (or could be a sequence end, which we ignore - but
210     // if it's neither, then it's a bad node)
211     Token& token = m_scanner.peek();
212     if (token.type == Token::FLOW_ENTRY)
213       m_scanner.pop();
214     else if (token.type != Token::FLOW_SEQ_END)
215       throw ParserException(token.mark, ErrorMsg::END_OF_SEQ_FLOW);
216   }
217 
218   m_pCollectionStack->PopCollectionType(CollectionType::FlowSeq);
219 }
220 
HandleMap(EventHandler & eventHandler)221 void SingleDocParser::HandleMap(EventHandler& eventHandler) {
222   // split based on start token
223   switch (m_scanner.peek().type) {
224     case Token::BLOCK_MAP_START:
225       HandleBlockMap(eventHandler);
226       break;
227     case Token::FLOW_MAP_START:
228       HandleFlowMap(eventHandler);
229       break;
230     case Token::KEY:
231       HandleCompactMap(eventHandler);
232       break;
233     case Token::VALUE:
234       HandleCompactMapWithNoKey(eventHandler);
235       break;
236     default:
237       break;
238   }
239 }
240 
HandleBlockMap(EventHandler & eventHandler)241 void SingleDocParser::HandleBlockMap(EventHandler& eventHandler) {
242   // eat start token
243   m_scanner.pop();
244   m_pCollectionStack->PushCollectionType(CollectionType::BlockMap);
245 
246   while (1) {
247     if (m_scanner.empty())
248       throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_MAP);
249 
250     Token token = m_scanner.peek();
251     if (token.type != Token::KEY && token.type != Token::VALUE &&
252         token.type != Token::BLOCK_MAP_END)
253       throw ParserException(token.mark, ErrorMsg::END_OF_MAP);
254 
255     if (token.type == Token::BLOCK_MAP_END) {
256       m_scanner.pop();
257       break;
258     }
259 
260     // grab key (if non-null)
261     if (token.type == Token::KEY) {
262       m_scanner.pop();
263       HandleNode(eventHandler);
264     } else {
265       eventHandler.OnNull(token.mark, NullAnchor);
266     }
267 
268     // now grab value (optional)
269     if (!m_scanner.empty() && m_scanner.peek().type == Token::VALUE) {
270       m_scanner.pop();
271       HandleNode(eventHandler);
272     } else {
273       eventHandler.OnNull(token.mark, NullAnchor);
274     }
275   }
276 
277   m_pCollectionStack->PopCollectionType(CollectionType::BlockMap);
278 }
279 
HandleFlowMap(EventHandler & eventHandler)280 void SingleDocParser::HandleFlowMap(EventHandler& eventHandler) {
281   // eat start token
282   m_scanner.pop();
283   m_pCollectionStack->PushCollectionType(CollectionType::FlowMap);
284 
285   while (1) {
286     if (m_scanner.empty())
287       throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_MAP_FLOW);
288 
289     Token& token = m_scanner.peek();
290     const Mark mark = token.mark;
291     // first check for end
292     if (token.type == Token::FLOW_MAP_END) {
293       m_scanner.pop();
294       break;
295     }
296 
297     // grab key (if non-null)
298     if (token.type == Token::KEY) {
299       m_scanner.pop();
300       HandleNode(eventHandler);
301     } else {
302       eventHandler.OnNull(mark, NullAnchor);
303     }
304 
305     // now grab value (optional)
306     if (!m_scanner.empty() && m_scanner.peek().type == Token::VALUE) {
307       m_scanner.pop();
308       HandleNode(eventHandler);
309     } else {
310       eventHandler.OnNull(mark, NullAnchor);
311     }
312 
313     if (m_scanner.empty())
314       throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_MAP_FLOW);
315 
316     // now eat the separator (or could be a map end, which we ignore - but if
317     // it's neither, then it's a bad node)
318     Token& nextToken = m_scanner.peek();
319     if (nextToken.type == Token::FLOW_ENTRY)
320       m_scanner.pop();
321     else if (nextToken.type != Token::FLOW_MAP_END)
322       throw ParserException(nextToken.mark, ErrorMsg::END_OF_MAP_FLOW);
323   }
324 
325   m_pCollectionStack->PopCollectionType(CollectionType::FlowMap);
326 }
327 
328 // . Single "key: value" pair in a flow sequence
HandleCompactMap(EventHandler & eventHandler)329 void SingleDocParser::HandleCompactMap(EventHandler& eventHandler) {
330   m_pCollectionStack->PushCollectionType(CollectionType::CompactMap);
331 
332   // grab key
333   Mark mark = m_scanner.peek().mark;
334   m_scanner.pop();
335   HandleNode(eventHandler);
336 
337   // now grab value (optional)
338   if (!m_scanner.empty() && m_scanner.peek().type == Token::VALUE) {
339     m_scanner.pop();
340     HandleNode(eventHandler);
341   } else {
342     eventHandler.OnNull(mark, NullAnchor);
343   }
344 
345   m_pCollectionStack->PopCollectionType(CollectionType::CompactMap);
346 }
347 
348 // . Single ": value" pair in a flow sequence
HandleCompactMapWithNoKey(EventHandler & eventHandler)349 void SingleDocParser::HandleCompactMapWithNoKey(EventHandler& eventHandler) {
350   m_pCollectionStack->PushCollectionType(CollectionType::CompactMap);
351 
352   // null key
353   eventHandler.OnNull(m_scanner.peek().mark, NullAnchor);
354 
355   // grab value
356   m_scanner.pop();
357   HandleNode(eventHandler);
358 
359   m_pCollectionStack->PopCollectionType(CollectionType::CompactMap);
360 }
361 
362 // ParseProperties
363 // . Grabs any tag or anchor tokens and deals with them.
ParseProperties(std::string & tag,anchor_t & anchor,std::string & anchor_name)364 void SingleDocParser::ParseProperties(std::string& tag, anchor_t& anchor,
365                                       std::string& anchor_name) {
366   tag.clear();
367   anchor_name.clear();
368   anchor = NullAnchor;
369 
370   while (1) {
371     if (m_scanner.empty())
372       return;
373 
374     switch (m_scanner.peek().type) {
375       case Token::TAG:
376         ParseTag(tag);
377         break;
378       case Token::ANCHOR:
379         ParseAnchor(anchor, anchor_name);
380         break;
381       default:
382         return;
383     }
384   }
385 }
386 
ParseTag(std::string & tag)387 void SingleDocParser::ParseTag(std::string& tag) {
388   Token& token = m_scanner.peek();
389   if (!tag.empty())
390     throw ParserException(token.mark, ErrorMsg::MULTIPLE_TAGS);
391 
392   Tag tagInfo(token);
393   tag = tagInfo.Translate(m_directives);
394   m_scanner.pop();
395 }
396 
ParseAnchor(anchor_t & anchor,std::string & anchor_name)397 void SingleDocParser::ParseAnchor(anchor_t& anchor, std::string& anchor_name) {
398   Token& token = m_scanner.peek();
399   if (anchor)
400     throw ParserException(token.mark, ErrorMsg::MULTIPLE_ANCHORS);
401 
402   anchor_name = token.value;
403   anchor = RegisterAnchor(token.value);
404   m_scanner.pop();
405 }
406 
RegisterAnchor(const std::string & name)407 anchor_t SingleDocParser::RegisterAnchor(const std::string& name) {
408   if (name.empty())
409     return NullAnchor;
410 
411   return m_anchors[name] = ++m_curAnchor;
412 }
413 
LookupAnchor(const Mark & mark,const std::string & name) const414 anchor_t SingleDocParser::LookupAnchor(const Mark& mark,
415                                        const std::string& name) const {
416   Anchors::const_iterator it = m_anchors.find(name);
417   if (it == m_anchors.end())
418     throw ParserException(mark, ErrorMsg::UNKNOWN_ANCHOR);
419 
420   return it->second;
421 }
422 }  // namespace YAML
423