1 #include <sstream>
2 
3 #include "exp.h"
4 #include "regex_yaml.h"
5 #include "regeximpl.h"
6 #include "scanner.h"
7 #include "scanscalar.h"
8 #include "scantag.h"  // IWYU pragma: keep
9 #include "tag.h"      // IWYU pragma: keep
10 #include "token.h"
11 #include "yaml-cpp/exceptions.h"  // IWYU pragma: keep
12 #include "yaml-cpp/mark.h"
13 
14 namespace YAML {
15 ///////////////////////////////////////////////////////////////////////
16 // Specialization for scanning specific tokens
17 
18 // Directive
19 // . Note: no semantic checking is done here (that's for the parser to do)
ScanDirective()20 void Scanner::ScanDirective() {
21   std::string name;
22   std::vector<std::string> params;
23 
24   // pop indents and simple keys
25   PopAllIndents();
26   PopAllSimpleKeys();
27 
28   m_simpleKeyAllowed = false;
29   m_canBeJSONFlow = false;
30 
31   // store pos and eat indicator
32   Token token(Token::DIRECTIVE, INPUT.mark());
33   INPUT.eat(1);
34 
35   // read name
36   while (INPUT && !Exp::BlankOrBreak().Matches(INPUT))
37     token.value += INPUT.get();
38 
39   // read parameters
40   while (1) {
41     // first get rid of whitespace
42     while (Exp::Blank().Matches(INPUT))
43       INPUT.eat(1);
44 
45     // break on newline or comment
46     if (!INPUT || Exp::Break().Matches(INPUT) || Exp::Comment().Matches(INPUT))
47       break;
48 
49     // now read parameter
50     std::string param;
51     while (INPUT && !Exp::BlankOrBreak().Matches(INPUT))
52       param += INPUT.get();
53 
54     token.params.push_back(param);
55   }
56 
57   m_tokens.push(token);
58 }
59 
60 // DocStart
ScanDocStart()61 void Scanner::ScanDocStart() {
62   PopAllIndents();
63   PopAllSimpleKeys();
64   m_simpleKeyAllowed = false;
65   m_canBeJSONFlow = false;
66 
67   // eat
68   Mark mark = INPUT.mark();
69   INPUT.eat(3);
70   m_tokens.push(Token(Token::DOC_START, mark));
71 }
72 
73 // DocEnd
ScanDocEnd()74 void Scanner::ScanDocEnd() {
75   PopAllIndents();
76   PopAllSimpleKeys();
77   m_simpleKeyAllowed = false;
78   m_canBeJSONFlow = false;
79 
80   // eat
81   Mark mark = INPUT.mark();
82   INPUT.eat(3);
83   m_tokens.push(Token(Token::DOC_END, mark));
84 }
85 
86 // FlowStart
ScanFlowStart()87 void Scanner::ScanFlowStart() {
88   // flows can be simple keys
89   InsertPotentialSimpleKey();
90   m_simpleKeyAllowed = true;
91   m_canBeJSONFlow = false;
92 
93   // eat
94   Mark mark = INPUT.mark();
95   char ch = INPUT.get();
96   FLOW_MARKER flowType = (ch == Keys::FlowSeqStart ? FLOW_SEQ : FLOW_MAP);
97   m_flows.push(flowType);
98   Token::TYPE type =
99       (flowType == FLOW_SEQ ? Token::FLOW_SEQ_START : Token::FLOW_MAP_START);
100   m_tokens.push(Token(type, mark));
101 }
102 
103 // FlowEnd
ScanFlowEnd()104 void Scanner::ScanFlowEnd() {
105   if (InBlockContext())
106     throw ParserException(INPUT.mark(), ErrorMsg::FLOW_END);
107 
108   // we might have a solo entry in the flow context
109   if (InFlowContext()) {
110     if (m_flows.top() == FLOW_MAP && VerifySimpleKey())
111       m_tokens.push(Token(Token::VALUE, INPUT.mark()));
112     else if (m_flows.top() == FLOW_SEQ)
113       InvalidateSimpleKey();
114   }
115 
116   m_simpleKeyAllowed = false;
117   m_canBeJSONFlow = true;
118 
119   // eat
120   Mark mark = INPUT.mark();
121   char ch = INPUT.get();
122 
123   // check that it matches the start
124   FLOW_MARKER flowType = (ch == Keys::FlowSeqEnd ? FLOW_SEQ : FLOW_MAP);
125   if (m_flows.top() != flowType)
126     throw ParserException(mark, ErrorMsg::FLOW_END);
127   m_flows.pop();
128 
129   Token::TYPE type = (flowType ? Token::FLOW_SEQ_END : Token::FLOW_MAP_END);
130   m_tokens.push(Token(type, mark));
131 }
132 
133 // FlowEntry
ScanFlowEntry()134 void Scanner::ScanFlowEntry() {
135   // we might have a solo entry in the flow context
136   if (InFlowContext()) {
137     if (m_flows.top() == FLOW_MAP && VerifySimpleKey())
138       m_tokens.push(Token(Token::VALUE, INPUT.mark()));
139     else if (m_flows.top() == FLOW_SEQ)
140       InvalidateSimpleKey();
141   }
142 
143   m_simpleKeyAllowed = true;
144   m_canBeJSONFlow = false;
145 
146   // eat
147   Mark mark = INPUT.mark();
148   INPUT.eat(1);
149   m_tokens.push(Token(Token::FLOW_ENTRY, mark));
150 }
151 
152 // BlockEntry
ScanBlockEntry()153 void Scanner::ScanBlockEntry() {
154   // we better be in the block context!
155   if (InFlowContext())
156     throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY);
157 
158   // can we put it here?
159   if (!m_simpleKeyAllowed)
160     throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY);
161 
162   PushIndentTo(INPUT.column(), IndentMarker::SEQ);
163   m_simpleKeyAllowed = true;
164   m_canBeJSONFlow = false;
165 
166   // eat
167   Mark mark = INPUT.mark();
168   INPUT.eat(1);
169   m_tokens.push(Token(Token::BLOCK_ENTRY, mark));
170 }
171 
172 // Key
ScanKey()173 void Scanner::ScanKey() {
174   // handle keys diffently in the block context (and manage indents)
175   if (InBlockContext()) {
176     if (!m_simpleKeyAllowed)
177       throw ParserException(INPUT.mark(), ErrorMsg::MAP_KEY);
178 
179     PushIndentTo(INPUT.column(), IndentMarker::MAP);
180   }
181 
182   // can only put a simple key here if we're in block context
183   m_simpleKeyAllowed = InBlockContext();
184 
185   // eat
186   Mark mark = INPUT.mark();
187   INPUT.eat(1);
188   m_tokens.push(Token(Token::KEY, mark));
189 }
190 
191 // Value
ScanValue()192 void Scanner::ScanValue() {
193   // and check that simple key
194   bool isSimpleKey = VerifySimpleKey();
195   m_canBeJSONFlow = false;
196 
197   if (isSimpleKey) {
198     // can't follow a simple key with another simple key (dunno why, though - it
199     // seems fine)
200     m_simpleKeyAllowed = false;
201   } else {
202     // handle values diffently in the block context (and manage indents)
203     if (InBlockContext()) {
204       if (!m_simpleKeyAllowed)
205         throw ParserException(INPUT.mark(), ErrorMsg::MAP_VALUE);
206 
207       PushIndentTo(INPUT.column(), IndentMarker::MAP);
208     }
209 
210     // can only put a simple key here if we're in block context
211     m_simpleKeyAllowed = InBlockContext();
212   }
213 
214   // eat
215   Mark mark = INPUT.mark();
216   INPUT.eat(1);
217   m_tokens.push(Token(Token::VALUE, mark));
218 }
219 
220 // AnchorOrAlias
ScanAnchorOrAlias()221 void Scanner::ScanAnchorOrAlias() {
222   bool alias;
223   std::string name;
224 
225   // insert a potential simple key
226   InsertPotentialSimpleKey();
227   m_simpleKeyAllowed = false;
228   m_canBeJSONFlow = false;
229 
230   // eat the indicator
231   Mark mark = INPUT.mark();
232   char indicator = INPUT.get();
233   alias = (indicator == Keys::Alias);
234 
235   // now eat the content
236   while (INPUT && Exp::Anchor().Matches(INPUT))
237     name += INPUT.get();
238 
239   // we need to have read SOMETHING!
240   if (name.empty())
241     throw ParserException(INPUT.mark(), alias ? ErrorMsg::ALIAS_NOT_FOUND
242                                               : ErrorMsg::ANCHOR_NOT_FOUND);
243 
244   // and needs to end correctly
245   if (INPUT && !Exp::AnchorEnd().Matches(INPUT))
246     throw ParserException(INPUT.mark(), alias ? ErrorMsg::CHAR_IN_ALIAS
247                                               : ErrorMsg::CHAR_IN_ANCHOR);
248 
249   // and we're done
250   Token token(alias ? Token::ALIAS : Token::ANCHOR, mark);
251   token.value = name;
252   m_tokens.push(token);
253 }
254 
255 // Tag
ScanTag()256 void Scanner::ScanTag() {
257   // insert a potential simple key
258   InsertPotentialSimpleKey();
259   m_simpleKeyAllowed = false;
260   m_canBeJSONFlow = false;
261 
262   Token token(Token::TAG, INPUT.mark());
263 
264   // eat the indicator
265   INPUT.get();
266 
267   if (INPUT && INPUT.peek() == Keys::VerbatimTagStart) {
268     std::string tag = ScanVerbatimTag(INPUT);
269 
270     token.value = tag;
271     token.data = Tag::VERBATIM;
272   } else {
273     bool canBeHandle;
274     token.value = ScanTagHandle(INPUT, canBeHandle);
275     if (!canBeHandle && token.value.empty())
276       token.data = Tag::NON_SPECIFIC;
277     else if (token.value.empty())
278       token.data = Tag::SECONDARY_HANDLE;
279     else
280       token.data = Tag::PRIMARY_HANDLE;
281 
282     // is there a suffix?
283     if (canBeHandle && INPUT.peek() == Keys::Tag) {
284       // eat the indicator
285       INPUT.get();
286       token.params.push_back(ScanTagSuffix(INPUT));
287       token.data = Tag::NAMED_HANDLE;
288     }
289   }
290 
291   m_tokens.push(token);
292 }
293 
294 // PlainScalar
ScanPlainScalar()295 void Scanner::ScanPlainScalar() {
296   std::string scalar;
297 
298   // set up the scanning parameters
299   ScanScalarParams params;
300   params.end =
301       (InFlowContext() ? &Exp::ScanScalarEndInFlow() : &Exp::ScanScalarEnd());
302   params.eatEnd = false;
303   params.indent = (InFlowContext() ? 0 : GetTopIndent() + 1);
304   params.fold = FOLD_FLOW;
305   params.eatLeadingWhitespace = true;
306   params.trimTrailingSpaces = true;
307   params.chomp = STRIP;
308   params.onDocIndicator = BREAK;
309   params.onTabInIndentation = THROW;
310 
311   // insert a potential simple key
312   InsertPotentialSimpleKey();
313 
314   Mark mark = INPUT.mark();
315   scalar = ScanScalar(INPUT, params);
316 
317   // can have a simple key only if we ended the scalar by starting a new line
318   m_simpleKeyAllowed = params.leadingSpaces;
319   m_canBeJSONFlow = false;
320 
321   // finally, check and see if we ended on an illegal character
322   // if(Exp::IllegalCharInScalar.Matches(INPUT))
323   //	throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_SCALAR);
324 
325   Token token(Token::PLAIN_SCALAR, mark);
326   token.value = scalar;
327   m_tokens.push(token);
328 }
329 
330 // QuotedScalar
ScanQuotedScalar()331 void Scanner::ScanQuotedScalar() {
332   std::string scalar;
333 
334   // peek at single or double quote (don't eat because we need to preserve (for
335   // the time being) the input position)
336   char quote = INPUT.peek();
337   bool single = (quote == '\'');
338 
339   // setup the scanning parameters
340   ScanScalarParams params;
341   RegEx end = (single ? RegEx(quote) && !Exp::EscSingleQuote() : RegEx(quote));
342   params.end = &end;
343   params.eatEnd = true;
344   params.escape = (single ? '\'' : '\\');
345   params.indent = 0;
346   params.fold = FOLD_FLOW;
347   params.eatLeadingWhitespace = true;
348   params.trimTrailingSpaces = false;
349   params.chomp = CLIP;
350   params.onDocIndicator = THROW;
351 
352   // insert a potential simple key
353   InsertPotentialSimpleKey();
354 
355   Mark mark = INPUT.mark();
356 
357   // now eat that opening quote
358   INPUT.get();
359 
360   // and scan
361   scalar = ScanScalar(INPUT, params);
362   m_simpleKeyAllowed = false;
363   m_canBeJSONFlow = true;
364 
365   Token token(Token::NON_PLAIN_SCALAR, mark);
366   token.value = scalar;
367   m_tokens.push(token);
368 }
369 
370 // BlockScalarToken
371 // . These need a little extra processing beforehand.
372 // . We need to scan the line where the indicator is (this doesn't count as part
373 // of the scalar),
374 //   and then we need to figure out what level of indentation we'll be using.
ScanBlockScalar()375 void Scanner::ScanBlockScalar() {
376   std::string scalar;
377 
378   ScanScalarParams params;
379   params.indent = 1;
380   params.detectIndent = true;
381 
382   // eat block indicator ('|' or '>')
383   Mark mark = INPUT.mark();
384   char indicator = INPUT.get();
385   params.fold = (indicator == Keys::FoldedScalar ? FOLD_BLOCK : DONT_FOLD);
386 
387   // eat chomping/indentation indicators
388   params.chomp = CLIP;
389   int n = Exp::Chomp().Match(INPUT);
390   for (int i = 0; i < n; i++) {
391     char ch = INPUT.get();
392     if (ch == '+')
393       params.chomp = KEEP;
394     else if (ch == '-')
395       params.chomp = STRIP;
396     else if (Exp::Digit().Matches(ch)) {
397       if (ch == '0')
398         throw ParserException(INPUT.mark(), ErrorMsg::ZERO_INDENT_IN_BLOCK);
399 
400       params.indent = ch - '0';
401       params.detectIndent = false;
402     }
403   }
404 
405   // now eat whitespace
406   while (Exp::Blank().Matches(INPUT))
407     INPUT.eat(1);
408 
409   // and comments to the end of the line
410   if (Exp::Comment().Matches(INPUT))
411     while (INPUT && !Exp::Break().Matches(INPUT))
412       INPUT.eat(1);
413 
414   // if it's not a line break, then we ran into a bad character inline
415   if (INPUT && !Exp::Break().Matches(INPUT))
416     throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_BLOCK);
417 
418   // set the initial indentation
419   if (GetTopIndent() >= 0)
420     params.indent += GetTopIndent();
421 
422   params.eatLeadingWhitespace = false;
423   params.trimTrailingSpaces = false;
424   params.onTabInIndentation = THROW;
425 
426   scalar = ScanScalar(INPUT, params);
427 
428   // simple keys always ok after block scalars (since we're gonna start a new
429   // line anyways)
430   m_simpleKeyAllowed = true;
431   m_canBeJSONFlow = false;
432 
433   Token token(Token::NON_PLAIN_SCALAR, mark);
434   token.value = scalar;
435   m_tokens.push(token);
436 }
437 }
438