1 #include <sstream>
2
3 #include "exp.h"
4 #include "regex_yaml.h"
5 #include "regeximpl.h"
6 #include "scanner.h"
7 #include "scanscalar.h"
8 #include "scantag.h" // IWYU pragma: keep
9 #include "tag.h" // IWYU pragma: keep
10 #include "token.h"
11 #include "yaml-cpp/exceptions.h" // IWYU pragma: keep
12 #include "yaml-cpp/mark.h"
13
14 namespace YAML {
15 ///////////////////////////////////////////////////////////////////////
16 // Specialization for scanning specific tokens
17
18 // Directive
19 // . Note: no semantic checking is done here (that's for the parser to do)
ScanDirective()20 void Scanner::ScanDirective() {
21 std::string name;
22 std::vector<std::string> params;
23
24 // pop indents and simple keys
25 PopAllIndents();
26 PopAllSimpleKeys();
27
28 m_simpleKeyAllowed = false;
29 m_canBeJSONFlow = false;
30
31 // store pos and eat indicator
32 Token token(Token::DIRECTIVE, INPUT.mark());
33 INPUT.eat(1);
34
35 // read name
36 while (INPUT && !Exp::BlankOrBreak().Matches(INPUT))
37 token.value += INPUT.get();
38
39 // read parameters
40 while (1) {
41 // first get rid of whitespace
42 while (Exp::Blank().Matches(INPUT))
43 INPUT.eat(1);
44
45 // break on newline or comment
46 if (!INPUT || Exp::Break().Matches(INPUT) || Exp::Comment().Matches(INPUT))
47 break;
48
49 // now read parameter
50 std::string param;
51 while (INPUT && !Exp::BlankOrBreak().Matches(INPUT))
52 param += INPUT.get();
53
54 token.params.push_back(param);
55 }
56
57 m_tokens.push(token);
58 }
59
60 // DocStart
ScanDocStart()61 void Scanner::ScanDocStart() {
62 PopAllIndents();
63 PopAllSimpleKeys();
64 m_simpleKeyAllowed = false;
65 m_canBeJSONFlow = false;
66
67 // eat
68 Mark mark = INPUT.mark();
69 INPUT.eat(3);
70 m_tokens.push(Token(Token::DOC_START, mark));
71 }
72
73 // DocEnd
ScanDocEnd()74 void Scanner::ScanDocEnd() {
75 PopAllIndents();
76 PopAllSimpleKeys();
77 m_simpleKeyAllowed = false;
78 m_canBeJSONFlow = false;
79
80 // eat
81 Mark mark = INPUT.mark();
82 INPUT.eat(3);
83 m_tokens.push(Token(Token::DOC_END, mark));
84 }
85
86 // FlowStart
ScanFlowStart()87 void Scanner::ScanFlowStart() {
88 // flows can be simple keys
89 InsertPotentialSimpleKey();
90 m_simpleKeyAllowed = true;
91 m_canBeJSONFlow = false;
92
93 // eat
94 Mark mark = INPUT.mark();
95 char ch = INPUT.get();
96 FLOW_MARKER flowType = (ch == Keys::FlowSeqStart ? FLOW_SEQ : FLOW_MAP);
97 m_flows.push(flowType);
98 Token::TYPE type =
99 (flowType == FLOW_SEQ ? Token::FLOW_SEQ_START : Token::FLOW_MAP_START);
100 m_tokens.push(Token(type, mark));
101 }
102
103 // FlowEnd
ScanFlowEnd()104 void Scanner::ScanFlowEnd() {
105 if (InBlockContext())
106 throw ParserException(INPUT.mark(), ErrorMsg::FLOW_END);
107
108 // we might have a solo entry in the flow context
109 if (InFlowContext()) {
110 if (m_flows.top() == FLOW_MAP && VerifySimpleKey())
111 m_tokens.push(Token(Token::VALUE, INPUT.mark()));
112 else if (m_flows.top() == FLOW_SEQ)
113 InvalidateSimpleKey();
114 }
115
116 m_simpleKeyAllowed = false;
117 m_canBeJSONFlow = true;
118
119 // eat
120 Mark mark = INPUT.mark();
121 char ch = INPUT.get();
122
123 // check that it matches the start
124 FLOW_MARKER flowType = (ch == Keys::FlowSeqEnd ? FLOW_SEQ : FLOW_MAP);
125 if (m_flows.top() != flowType)
126 throw ParserException(mark, ErrorMsg::FLOW_END);
127 m_flows.pop();
128
129 Token::TYPE type = (flowType ? Token::FLOW_SEQ_END : Token::FLOW_MAP_END);
130 m_tokens.push(Token(type, mark));
131 }
132
133 // FlowEntry
ScanFlowEntry()134 void Scanner::ScanFlowEntry() {
135 // we might have a solo entry in the flow context
136 if (InFlowContext()) {
137 if (m_flows.top() == FLOW_MAP && VerifySimpleKey())
138 m_tokens.push(Token(Token::VALUE, INPUT.mark()));
139 else if (m_flows.top() == FLOW_SEQ)
140 InvalidateSimpleKey();
141 }
142
143 m_simpleKeyAllowed = true;
144 m_canBeJSONFlow = false;
145
146 // eat
147 Mark mark = INPUT.mark();
148 INPUT.eat(1);
149 m_tokens.push(Token(Token::FLOW_ENTRY, mark));
150 }
151
152 // BlockEntry
ScanBlockEntry()153 void Scanner::ScanBlockEntry() {
154 // we better be in the block context!
155 if (InFlowContext())
156 throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY);
157
158 // can we put it here?
159 if (!m_simpleKeyAllowed)
160 throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY);
161
162 PushIndentTo(INPUT.column(), IndentMarker::SEQ);
163 m_simpleKeyAllowed = true;
164 m_canBeJSONFlow = false;
165
166 // eat
167 Mark mark = INPUT.mark();
168 INPUT.eat(1);
169 m_tokens.push(Token(Token::BLOCK_ENTRY, mark));
170 }
171
172 // Key
ScanKey()173 void Scanner::ScanKey() {
174 // handle keys diffently in the block context (and manage indents)
175 if (InBlockContext()) {
176 if (!m_simpleKeyAllowed)
177 throw ParserException(INPUT.mark(), ErrorMsg::MAP_KEY);
178
179 PushIndentTo(INPUT.column(), IndentMarker::MAP);
180 }
181
182 // can only put a simple key here if we're in block context
183 m_simpleKeyAllowed = InBlockContext();
184
185 // eat
186 Mark mark = INPUT.mark();
187 INPUT.eat(1);
188 m_tokens.push(Token(Token::KEY, mark));
189 }
190
191 // Value
ScanValue()192 void Scanner::ScanValue() {
193 // and check that simple key
194 bool isSimpleKey = VerifySimpleKey();
195 m_canBeJSONFlow = false;
196
197 if (isSimpleKey) {
198 // can't follow a simple key with another simple key (dunno why, though - it
199 // seems fine)
200 m_simpleKeyAllowed = false;
201 } else {
202 // handle values diffently in the block context (and manage indents)
203 if (InBlockContext()) {
204 if (!m_simpleKeyAllowed)
205 throw ParserException(INPUT.mark(), ErrorMsg::MAP_VALUE);
206
207 PushIndentTo(INPUT.column(), IndentMarker::MAP);
208 }
209
210 // can only put a simple key here if we're in block context
211 m_simpleKeyAllowed = InBlockContext();
212 }
213
214 // eat
215 Mark mark = INPUT.mark();
216 INPUT.eat(1);
217 m_tokens.push(Token(Token::VALUE, mark));
218 }
219
220 // AnchorOrAlias
ScanAnchorOrAlias()221 void Scanner::ScanAnchorOrAlias() {
222 bool alias;
223 std::string name;
224
225 // insert a potential simple key
226 InsertPotentialSimpleKey();
227 m_simpleKeyAllowed = false;
228 m_canBeJSONFlow = false;
229
230 // eat the indicator
231 Mark mark = INPUT.mark();
232 char indicator = INPUT.get();
233 alias = (indicator == Keys::Alias);
234
235 // now eat the content
236 while (INPUT && Exp::Anchor().Matches(INPUT))
237 name += INPUT.get();
238
239 // we need to have read SOMETHING!
240 if (name.empty())
241 throw ParserException(INPUT.mark(), alias ? ErrorMsg::ALIAS_NOT_FOUND
242 : ErrorMsg::ANCHOR_NOT_FOUND);
243
244 // and needs to end correctly
245 if (INPUT && !Exp::AnchorEnd().Matches(INPUT))
246 throw ParserException(INPUT.mark(), alias ? ErrorMsg::CHAR_IN_ALIAS
247 : ErrorMsg::CHAR_IN_ANCHOR);
248
249 // and we're done
250 Token token(alias ? Token::ALIAS : Token::ANCHOR, mark);
251 token.value = name;
252 m_tokens.push(token);
253 }
254
255 // Tag
ScanTag()256 void Scanner::ScanTag() {
257 // insert a potential simple key
258 InsertPotentialSimpleKey();
259 m_simpleKeyAllowed = false;
260 m_canBeJSONFlow = false;
261
262 Token token(Token::TAG, INPUT.mark());
263
264 // eat the indicator
265 INPUT.get();
266
267 if (INPUT && INPUT.peek() == Keys::VerbatimTagStart) {
268 std::string tag = ScanVerbatimTag(INPUT);
269
270 token.value = tag;
271 token.data = Tag::VERBATIM;
272 } else {
273 bool canBeHandle;
274 token.value = ScanTagHandle(INPUT, canBeHandle);
275 if (!canBeHandle && token.value.empty())
276 token.data = Tag::NON_SPECIFIC;
277 else if (token.value.empty())
278 token.data = Tag::SECONDARY_HANDLE;
279 else
280 token.data = Tag::PRIMARY_HANDLE;
281
282 // is there a suffix?
283 if (canBeHandle && INPUT.peek() == Keys::Tag) {
284 // eat the indicator
285 INPUT.get();
286 token.params.push_back(ScanTagSuffix(INPUT));
287 token.data = Tag::NAMED_HANDLE;
288 }
289 }
290
291 m_tokens.push(token);
292 }
293
294 // PlainScalar
ScanPlainScalar()295 void Scanner::ScanPlainScalar() {
296 std::string scalar;
297
298 // set up the scanning parameters
299 ScanScalarParams params;
300 params.end =
301 (InFlowContext() ? &Exp::ScanScalarEndInFlow() : &Exp::ScanScalarEnd());
302 params.eatEnd = false;
303 params.indent = (InFlowContext() ? 0 : GetTopIndent() + 1);
304 params.fold = FOLD_FLOW;
305 params.eatLeadingWhitespace = true;
306 params.trimTrailingSpaces = true;
307 params.chomp = STRIP;
308 params.onDocIndicator = BREAK;
309 params.onTabInIndentation = THROW;
310
311 // insert a potential simple key
312 InsertPotentialSimpleKey();
313
314 Mark mark = INPUT.mark();
315 scalar = ScanScalar(INPUT, params);
316
317 // can have a simple key only if we ended the scalar by starting a new line
318 m_simpleKeyAllowed = params.leadingSpaces;
319 m_canBeJSONFlow = false;
320
321 // finally, check and see if we ended on an illegal character
322 // if(Exp::IllegalCharInScalar.Matches(INPUT))
323 // throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_SCALAR);
324
325 Token token(Token::PLAIN_SCALAR, mark);
326 token.value = scalar;
327 m_tokens.push(token);
328 }
329
330 // QuotedScalar
ScanQuotedScalar()331 void Scanner::ScanQuotedScalar() {
332 std::string scalar;
333
334 // peek at single or double quote (don't eat because we need to preserve (for
335 // the time being) the input position)
336 char quote = INPUT.peek();
337 bool single = (quote == '\'');
338
339 // setup the scanning parameters
340 ScanScalarParams params;
341 RegEx end = (single ? RegEx(quote) && !Exp::EscSingleQuote() : RegEx(quote));
342 params.end = &end;
343 params.eatEnd = true;
344 params.escape = (single ? '\'' : '\\');
345 params.indent = 0;
346 params.fold = FOLD_FLOW;
347 params.eatLeadingWhitespace = true;
348 params.trimTrailingSpaces = false;
349 params.chomp = CLIP;
350 params.onDocIndicator = THROW;
351
352 // insert a potential simple key
353 InsertPotentialSimpleKey();
354
355 Mark mark = INPUT.mark();
356
357 // now eat that opening quote
358 INPUT.get();
359
360 // and scan
361 scalar = ScanScalar(INPUT, params);
362 m_simpleKeyAllowed = false;
363 m_canBeJSONFlow = true;
364
365 Token token(Token::NON_PLAIN_SCALAR, mark);
366 token.value = scalar;
367 m_tokens.push(token);
368 }
369
370 // BlockScalarToken
371 // . These need a little extra processing beforehand.
372 // . We need to scan the line where the indicator is (this doesn't count as part
373 // of the scalar),
374 // and then we need to figure out what level of indentation we'll be using.
ScanBlockScalar()375 void Scanner::ScanBlockScalar() {
376 std::string scalar;
377
378 ScanScalarParams params;
379 params.indent = 1;
380 params.detectIndent = true;
381
382 // eat block indicator ('|' or '>')
383 Mark mark = INPUT.mark();
384 char indicator = INPUT.get();
385 params.fold = (indicator == Keys::FoldedScalar ? FOLD_BLOCK : DONT_FOLD);
386
387 // eat chomping/indentation indicators
388 params.chomp = CLIP;
389 int n = Exp::Chomp().Match(INPUT);
390 for (int i = 0; i < n; i++) {
391 char ch = INPUT.get();
392 if (ch == '+')
393 params.chomp = KEEP;
394 else if (ch == '-')
395 params.chomp = STRIP;
396 else if (Exp::Digit().Matches(ch)) {
397 if (ch == '0')
398 throw ParserException(INPUT.mark(), ErrorMsg::ZERO_INDENT_IN_BLOCK);
399
400 params.indent = ch - '0';
401 params.detectIndent = false;
402 }
403 }
404
405 // now eat whitespace
406 while (Exp::Blank().Matches(INPUT))
407 INPUT.eat(1);
408
409 // and comments to the end of the line
410 if (Exp::Comment().Matches(INPUT))
411 while (INPUT && !Exp::Break().Matches(INPUT))
412 INPUT.eat(1);
413
414 // if it's not a line break, then we ran into a bad character inline
415 if (INPUT && !Exp::Break().Matches(INPUT))
416 throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_BLOCK);
417
418 // set the initial indentation
419 if (GetTopIndent() >= 0)
420 params.indent += GetTopIndent();
421
422 params.eatLeadingWhitespace = false;
423 params.trimTrailingSpaces = false;
424 params.onTabInIndentation = THROW;
425
426 scalar = ScanScalar(INPUT, params);
427
428 // simple keys always ok after block scalars (since we're gonna start a new
429 // line anyways)
430 m_simpleKeyAllowed = true;
431 m_canBeJSONFlow = false;
432
433 Token token(Token::NON_PLAIN_SCALAR, mark);
434 token.value = scalar;
435 m_tokens.push(token);
436 }
437 }
438