1 #include <algorithm>
2 #include <cstdio>
3 #include <sstream>
4
5 #include "collectionstack.h" // IWYU pragma: keep
6 #include "scanner.h"
7 #include "singledocparser.h"
8 #include "tag.h"
9 #include "token.h"
10 #include "yaml-cpp/emitterstyle.h"
11 #include "yaml-cpp/eventhandler.h"
12 #include "yaml-cpp/exceptions.h" // IWYU pragma: keep
13 #include "yaml-cpp/mark.h"
14 #include "yaml-cpp/null.h"
15
16 namespace YAML {
SingleDocParser(Scanner & scanner,const Directives & directives)17 SingleDocParser::SingleDocParser(Scanner& scanner, const Directives& directives)
18 : m_scanner(scanner),
19 m_directives(directives),
20 m_pCollectionStack(new CollectionStack),
21 m_anchors{},
22 m_curAnchor(0) {}
23
~SingleDocParser()24 SingleDocParser::~SingleDocParser() {}
25
26 // HandleDocument
27 // . Handles the next document
28 // . Throws a ParserException on error.
HandleDocument(EventHandler & eventHandler)29 void SingleDocParser::HandleDocument(EventHandler& eventHandler) {
30 assert(!m_scanner.empty()); // guaranteed that there are tokens
31 assert(!m_curAnchor);
32
33 eventHandler.OnDocumentStart(m_scanner.peek().mark);
34
35 // eat doc start
36 if (m_scanner.peek().type == Token::DOC_START)
37 m_scanner.pop();
38
39 // recurse!
40 HandleNode(eventHandler);
41
42 eventHandler.OnDocumentEnd();
43
44 // and finally eat any doc ends we see
45 while (!m_scanner.empty() && m_scanner.peek().type == Token::DOC_END)
46 m_scanner.pop();
47 }
48
HandleNode(EventHandler & eventHandler)49 void SingleDocParser::HandleNode(EventHandler& eventHandler) {
50 // an empty node *is* a possibility
51 if (m_scanner.empty()) {
52 eventHandler.OnNull(m_scanner.mark(), NullAnchor);
53 return;
54 }
55
56 // save location
57 Mark mark = m_scanner.peek().mark;
58
59 // special case: a value node by itself must be a map, with no header
60 if (m_scanner.peek().type == Token::VALUE) {
61 eventHandler.OnMapStart(mark, "?", NullAnchor, EmitterStyle::Default);
62 HandleMap(eventHandler);
63 eventHandler.OnMapEnd();
64 return;
65 }
66
67 // special case: an alias node
68 if (m_scanner.peek().type == Token::ALIAS) {
69 eventHandler.OnAlias(mark, LookupAnchor(mark, m_scanner.peek().value));
70 m_scanner.pop();
71 return;
72 }
73
74 std::string tag;
75 std::string anchor_name;
76 anchor_t anchor;
77 ParseProperties(tag, anchor, anchor_name);
78
79 if (!anchor_name.empty())
80 eventHandler.OnAnchor(mark, anchor_name);
81
82 const Token& token = m_scanner.peek();
83
84 if (token.type == Token::PLAIN_SCALAR && IsNullString(token.value)) {
85 eventHandler.OnNull(mark, anchor);
86 m_scanner.pop();
87 return;
88 }
89
90 // add non-specific tags
91 if (tag.empty())
92 tag = (token.type == Token::NON_PLAIN_SCALAR ? "!" : "?");
93
94 // now split based on what kind of node we should be
95 switch (token.type) {
96 case Token::PLAIN_SCALAR:
97 case Token::NON_PLAIN_SCALAR:
98 eventHandler.OnScalar(mark, tag, anchor, token.value);
99 m_scanner.pop();
100 return;
101 case Token::FLOW_SEQ_START:
102 eventHandler.OnSequenceStart(mark, tag, anchor, EmitterStyle::Flow);
103 HandleSequence(eventHandler);
104 eventHandler.OnSequenceEnd();
105 return;
106 case Token::BLOCK_SEQ_START:
107 eventHandler.OnSequenceStart(mark, tag, anchor, EmitterStyle::Block);
108 HandleSequence(eventHandler);
109 eventHandler.OnSequenceEnd();
110 return;
111 case Token::FLOW_MAP_START:
112 eventHandler.OnMapStart(mark, tag, anchor, EmitterStyle::Flow);
113 HandleMap(eventHandler);
114 eventHandler.OnMapEnd();
115 return;
116 case Token::BLOCK_MAP_START:
117 eventHandler.OnMapStart(mark, tag, anchor, EmitterStyle::Block);
118 HandleMap(eventHandler);
119 eventHandler.OnMapEnd();
120 return;
121 case Token::KEY:
122 // compact maps can only go in a flow sequence
123 if (m_pCollectionStack->GetCurCollectionType() ==
124 CollectionType::FlowSeq) {
125 eventHandler.OnMapStart(mark, tag, anchor, EmitterStyle::Flow);
126 HandleMap(eventHandler);
127 eventHandler.OnMapEnd();
128 return;
129 }
130 break;
131 default:
132 break;
133 }
134
135 if (tag == "?")
136 eventHandler.OnNull(mark, anchor);
137 else
138 eventHandler.OnScalar(mark, tag, anchor, "");
139 }
140
HandleSequence(EventHandler & eventHandler)141 void SingleDocParser::HandleSequence(EventHandler& eventHandler) {
142 // split based on start token
143 switch (m_scanner.peek().type) {
144 case Token::BLOCK_SEQ_START:
145 HandleBlockSequence(eventHandler);
146 break;
147 case Token::FLOW_SEQ_START:
148 HandleFlowSequence(eventHandler);
149 break;
150 default:
151 break;
152 }
153 }
154
HandleBlockSequence(EventHandler & eventHandler)155 void SingleDocParser::HandleBlockSequence(EventHandler& eventHandler) {
156 // eat start token
157 m_scanner.pop();
158 m_pCollectionStack->PushCollectionType(CollectionType::BlockSeq);
159
160 while (1) {
161 if (m_scanner.empty())
162 throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_SEQ);
163
164 Token token = m_scanner.peek();
165 if (token.type != Token::BLOCK_ENTRY && token.type != Token::BLOCK_SEQ_END)
166 throw ParserException(token.mark, ErrorMsg::END_OF_SEQ);
167
168 m_scanner.pop();
169 if (token.type == Token::BLOCK_SEQ_END)
170 break;
171
172 // check for null
173 if (!m_scanner.empty()) {
174 const Token& nextToken = m_scanner.peek();
175 if (nextToken.type == Token::BLOCK_ENTRY ||
176 nextToken.type == Token::BLOCK_SEQ_END) {
177 eventHandler.OnNull(nextToken.mark, NullAnchor);
178 continue;
179 }
180 }
181
182 HandleNode(eventHandler);
183 }
184
185 m_pCollectionStack->PopCollectionType(CollectionType::BlockSeq);
186 }
187
HandleFlowSequence(EventHandler & eventHandler)188 void SingleDocParser::HandleFlowSequence(EventHandler& eventHandler) {
189 // eat start token
190 m_scanner.pop();
191 m_pCollectionStack->PushCollectionType(CollectionType::FlowSeq);
192
193 while (1) {
194 if (m_scanner.empty())
195 throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_SEQ_FLOW);
196
197 // first check for end
198 if (m_scanner.peek().type == Token::FLOW_SEQ_END) {
199 m_scanner.pop();
200 break;
201 }
202
203 // then read the node
204 HandleNode(eventHandler);
205
206 if (m_scanner.empty())
207 throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_SEQ_FLOW);
208
209 // now eat the separator (or could be a sequence end, which we ignore - but
210 // if it's neither, then it's a bad node)
211 Token& token = m_scanner.peek();
212 if (token.type == Token::FLOW_ENTRY)
213 m_scanner.pop();
214 else if (token.type != Token::FLOW_SEQ_END)
215 throw ParserException(token.mark, ErrorMsg::END_OF_SEQ_FLOW);
216 }
217
218 m_pCollectionStack->PopCollectionType(CollectionType::FlowSeq);
219 }
220
HandleMap(EventHandler & eventHandler)221 void SingleDocParser::HandleMap(EventHandler& eventHandler) {
222 // split based on start token
223 switch (m_scanner.peek().type) {
224 case Token::BLOCK_MAP_START:
225 HandleBlockMap(eventHandler);
226 break;
227 case Token::FLOW_MAP_START:
228 HandleFlowMap(eventHandler);
229 break;
230 case Token::KEY:
231 HandleCompactMap(eventHandler);
232 break;
233 case Token::VALUE:
234 HandleCompactMapWithNoKey(eventHandler);
235 break;
236 default:
237 break;
238 }
239 }
240
HandleBlockMap(EventHandler & eventHandler)241 void SingleDocParser::HandleBlockMap(EventHandler& eventHandler) {
242 // eat start token
243 m_scanner.pop();
244 m_pCollectionStack->PushCollectionType(CollectionType::BlockMap);
245
246 while (1) {
247 if (m_scanner.empty())
248 throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_MAP);
249
250 Token token = m_scanner.peek();
251 if (token.type != Token::KEY && token.type != Token::VALUE &&
252 token.type != Token::BLOCK_MAP_END)
253 throw ParserException(token.mark, ErrorMsg::END_OF_MAP);
254
255 if (token.type == Token::BLOCK_MAP_END) {
256 m_scanner.pop();
257 break;
258 }
259
260 // grab key (if non-null)
261 if (token.type == Token::KEY) {
262 m_scanner.pop();
263 HandleNode(eventHandler);
264 } else {
265 eventHandler.OnNull(token.mark, NullAnchor);
266 }
267
268 // now grab value (optional)
269 if (!m_scanner.empty() && m_scanner.peek().type == Token::VALUE) {
270 m_scanner.pop();
271 HandleNode(eventHandler);
272 } else {
273 eventHandler.OnNull(token.mark, NullAnchor);
274 }
275 }
276
277 m_pCollectionStack->PopCollectionType(CollectionType::BlockMap);
278 }
279
HandleFlowMap(EventHandler & eventHandler)280 void SingleDocParser::HandleFlowMap(EventHandler& eventHandler) {
281 // eat start token
282 m_scanner.pop();
283 m_pCollectionStack->PushCollectionType(CollectionType::FlowMap);
284
285 while (1) {
286 if (m_scanner.empty())
287 throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_MAP_FLOW);
288
289 Token& token = m_scanner.peek();
290 const Mark mark = token.mark;
291 // first check for end
292 if (token.type == Token::FLOW_MAP_END) {
293 m_scanner.pop();
294 break;
295 }
296
297 // grab key (if non-null)
298 if (token.type == Token::KEY) {
299 m_scanner.pop();
300 HandleNode(eventHandler);
301 } else {
302 eventHandler.OnNull(mark, NullAnchor);
303 }
304
305 // now grab value (optional)
306 if (!m_scanner.empty() && m_scanner.peek().type == Token::VALUE) {
307 m_scanner.pop();
308 HandleNode(eventHandler);
309 } else {
310 eventHandler.OnNull(mark, NullAnchor);
311 }
312
313 if (m_scanner.empty())
314 throw ParserException(m_scanner.mark(), ErrorMsg::END_OF_MAP_FLOW);
315
316 // now eat the separator (or could be a map end, which we ignore - but if
317 // it's neither, then it's a bad node)
318 Token& nextToken = m_scanner.peek();
319 if (nextToken.type == Token::FLOW_ENTRY)
320 m_scanner.pop();
321 else if (nextToken.type != Token::FLOW_MAP_END)
322 throw ParserException(nextToken.mark, ErrorMsg::END_OF_MAP_FLOW);
323 }
324
325 m_pCollectionStack->PopCollectionType(CollectionType::FlowMap);
326 }
327
328 // . Single "key: value" pair in a flow sequence
HandleCompactMap(EventHandler & eventHandler)329 void SingleDocParser::HandleCompactMap(EventHandler& eventHandler) {
330 m_pCollectionStack->PushCollectionType(CollectionType::CompactMap);
331
332 // grab key
333 Mark mark = m_scanner.peek().mark;
334 m_scanner.pop();
335 HandleNode(eventHandler);
336
337 // now grab value (optional)
338 if (!m_scanner.empty() && m_scanner.peek().type == Token::VALUE) {
339 m_scanner.pop();
340 HandleNode(eventHandler);
341 } else {
342 eventHandler.OnNull(mark, NullAnchor);
343 }
344
345 m_pCollectionStack->PopCollectionType(CollectionType::CompactMap);
346 }
347
348 // . Single ": value" pair in a flow sequence
HandleCompactMapWithNoKey(EventHandler & eventHandler)349 void SingleDocParser::HandleCompactMapWithNoKey(EventHandler& eventHandler) {
350 m_pCollectionStack->PushCollectionType(CollectionType::CompactMap);
351
352 // null key
353 eventHandler.OnNull(m_scanner.peek().mark, NullAnchor);
354
355 // grab value
356 m_scanner.pop();
357 HandleNode(eventHandler);
358
359 m_pCollectionStack->PopCollectionType(CollectionType::CompactMap);
360 }
361
362 // ParseProperties
363 // . Grabs any tag or anchor tokens and deals with them.
ParseProperties(std::string & tag,anchor_t & anchor,std::string & anchor_name)364 void SingleDocParser::ParseProperties(std::string& tag, anchor_t& anchor,
365 std::string& anchor_name) {
366 tag.clear();
367 anchor_name.clear();
368 anchor = NullAnchor;
369
370 while (1) {
371 if (m_scanner.empty())
372 return;
373
374 switch (m_scanner.peek().type) {
375 case Token::TAG:
376 ParseTag(tag);
377 break;
378 case Token::ANCHOR:
379 ParseAnchor(anchor, anchor_name);
380 break;
381 default:
382 return;
383 }
384 }
385 }
386
ParseTag(std::string & tag)387 void SingleDocParser::ParseTag(std::string& tag) {
388 Token& token = m_scanner.peek();
389 if (!tag.empty())
390 throw ParserException(token.mark, ErrorMsg::MULTIPLE_TAGS);
391
392 Tag tagInfo(token);
393 tag = tagInfo.Translate(m_directives);
394 m_scanner.pop();
395 }
396
ParseAnchor(anchor_t & anchor,std::string & anchor_name)397 void SingleDocParser::ParseAnchor(anchor_t& anchor, std::string& anchor_name) {
398 Token& token = m_scanner.peek();
399 if (anchor)
400 throw ParserException(token.mark, ErrorMsg::MULTIPLE_ANCHORS);
401
402 anchor_name = token.value;
403 anchor = RegisterAnchor(token.value);
404 m_scanner.pop();
405 }
406
RegisterAnchor(const std::string & name)407 anchor_t SingleDocParser::RegisterAnchor(const std::string& name) {
408 if (name.empty())
409 return NullAnchor;
410
411 return m_anchors[name] = ++m_curAnchor;
412 }
413
LookupAnchor(const Mark & mark,const std::string & name) const414 anchor_t SingleDocParser::LookupAnchor(const Mark& mark,
415 const std::string& name) const {
416 Anchors::const_iterator it = m_anchors.find(name);
417 if (it == m_anchors.end())
418 throw ParserException(mark, ErrorMsg::UNKNOWN_ANCHOR);
419
420 return it->second;
421 }
422 } // namespace YAML
423