1 #include <cassert>
2 #include <memory>
3
4 #include "exp.h"
5 #include "scanner.h"
6 #include "token.h"
7 #include "yaml-cpp/exceptions.h" // IWYU pragma: keep
8
9 namespace YAML {
Scanner(std::istream & in)10 Scanner::Scanner(std::istream& in)
11 : INPUT(in),
12 m_tokens{},
13 m_startedStream(false),
14 m_endedStream(false),
15 m_simpleKeyAllowed(false),
16 m_canBeJSONFlow(false),
17 m_simpleKeys{},
18 m_indents{},
19 m_indentRefs{},
20 m_flows{} {}
21
~Scanner()22 Scanner::~Scanner() {}
23
empty()24 bool Scanner::empty() {
25 EnsureTokensInQueue();
26 return m_tokens.empty();
27 }
28
pop()29 void Scanner::pop() {
30 EnsureTokensInQueue();
31 if (!m_tokens.empty())
32 m_tokens.pop();
33 }
34
peek()35 Token& Scanner::peek() {
36 EnsureTokensInQueue();
37 assert(!m_tokens.empty()); // should we be asserting here? I mean, we really
38 // just be checking
39 // if it's empty before peeking.
40
41 #if 0
42 static Token *pLast = 0;
43 if(pLast != &m_tokens.front())
44 std::cerr << "peek: " << m_tokens.front() << "\n";
45 pLast = &m_tokens.front();
46 #endif
47
48 return m_tokens.front();
49 }
50
mark() const51 Mark Scanner::mark() const { return INPUT.mark(); }
52
EnsureTokensInQueue()53 void Scanner::EnsureTokensInQueue() {
54 while (1) {
55 if (!m_tokens.empty()) {
56 Token& token = m_tokens.front();
57
58 // if this guy's valid, then we're done
59 if (token.status == Token::VALID) {
60 return;
61 }
62
63 // here's where we clean up the impossible tokens
64 if (token.status == Token::INVALID) {
65 m_tokens.pop();
66 continue;
67 }
68
69 // note: what's left are the unverified tokens
70 }
71
72 // no token? maybe we've actually finished
73 if (m_endedStream) {
74 return;
75 }
76
77 // no? then scan...
78 ScanNextToken();
79 }
80 }
81
ScanNextToken()82 void Scanner::ScanNextToken() {
83 if (m_endedStream) {
84 return;
85 }
86
87 if (!m_startedStream) {
88 return StartStream();
89 }
90
91 // get rid of whitespace, etc. (in between tokens it should be irrelevent)
92 ScanToNextToken();
93
94 // maybe need to end some blocks
95 PopIndentToHere();
96
97 // *****
98 // And now branch based on the next few characters!
99 // *****
100
101 // end of stream
102 if (!INPUT) {
103 return EndStream();
104 }
105
106 if (INPUT.column() == 0 && INPUT.peek() == Keys::Directive) {
107 return ScanDirective();
108 }
109
110 // document token
111 if (INPUT.column() == 0 && Exp::DocStart().Matches(INPUT)) {
112 return ScanDocStart();
113 }
114
115 if (INPUT.column() == 0 && Exp::DocEnd().Matches(INPUT)) {
116 return ScanDocEnd();
117 }
118
119 // flow start/end/entry
120 if (INPUT.peek() == Keys::FlowSeqStart ||
121 INPUT.peek() == Keys::FlowMapStart) {
122 return ScanFlowStart();
123 }
124
125 if (INPUT.peek() == Keys::FlowSeqEnd || INPUT.peek() == Keys::FlowMapEnd) {
126 return ScanFlowEnd();
127 }
128
129 if (INPUT.peek() == Keys::FlowEntry) {
130 return ScanFlowEntry();
131 }
132
133 // block/map stuff
134 if (Exp::BlockEntry().Matches(INPUT)) {
135 return ScanBlockEntry();
136 }
137
138 if ((InBlockContext() ? Exp::Key() : Exp::KeyInFlow()).Matches(INPUT)) {
139 return ScanKey();
140 }
141
142 if (GetValueRegex().Matches(INPUT)) {
143 return ScanValue();
144 }
145
146 // alias/anchor
147 if (INPUT.peek() == Keys::Alias || INPUT.peek() == Keys::Anchor) {
148 return ScanAnchorOrAlias();
149 }
150
151 // tag
152 if (INPUT.peek() == Keys::Tag) {
153 return ScanTag();
154 }
155
156 // special scalars
157 if (InBlockContext() && (INPUT.peek() == Keys::LiteralScalar ||
158 INPUT.peek() == Keys::FoldedScalar)) {
159 return ScanBlockScalar();
160 }
161
162 if (INPUT.peek() == '\'' || INPUT.peek() == '\"') {
163 return ScanQuotedScalar();
164 }
165
166 // plain scalars
167 if ((InBlockContext() ? Exp::PlainScalar() : Exp::PlainScalarInFlow())
168 .Matches(INPUT)) {
169 return ScanPlainScalar();
170 }
171
172 // don't know what it is!
173 throw ParserException(INPUT.mark(), ErrorMsg::UNKNOWN_TOKEN);
174 }
175
ScanToNextToken()176 void Scanner::ScanToNextToken() {
177 while (1) {
178 // first eat whitespace
179 while (INPUT && IsWhitespaceToBeEaten(INPUT.peek())) {
180 if (InBlockContext() && Exp::Tab().Matches(INPUT)) {
181 m_simpleKeyAllowed = false;
182 }
183 INPUT.eat(1);
184 }
185
186 // then eat a comment
187 if (Exp::Comment().Matches(INPUT)) {
188 // eat until line break
189 while (INPUT && !Exp::Break().Matches(INPUT)) {
190 INPUT.eat(1);
191 }
192 }
193
194 // if it's NOT a line break, then we're done!
195 if (!Exp::Break().Matches(INPUT)) {
196 break;
197 }
198
199 // otherwise, let's eat the line break and keep going
200 int n = Exp::Break().Match(INPUT);
201 INPUT.eat(n);
202
203 // oh yeah, and let's get rid of that simple key
204 InvalidateSimpleKey();
205
206 // new line - we may be able to accept a simple key now
207 if (InBlockContext()) {
208 m_simpleKeyAllowed = true;
209 }
210 }
211 }
212
213 ///////////////////////////////////////////////////////////////////////
214 // Misc. helpers
215
216 // IsWhitespaceToBeEaten
217 // . We can eat whitespace if it's a space or tab
218 // . Note: originally tabs in block context couldn't be eaten
219 // "where a simple key could be allowed
220 // (i.e., not at the beginning of a line, or following '-', '?', or
221 // ':')"
222 // I think this is wrong, since tabs can be non-content whitespace; it's just
223 // that they can't contribute to indentation, so once you've seen a tab in a
224 // line, you can't start a simple key
IsWhitespaceToBeEaten(char ch)225 bool Scanner::IsWhitespaceToBeEaten(char ch) {
226 if (ch == ' ') {
227 return true;
228 }
229
230 if (ch == '\t') {
231 return true;
232 }
233
234 return false;
235 }
236
GetValueRegex() const237 const RegEx& Scanner::GetValueRegex() const {
238 if (InBlockContext()) {
239 return Exp::Value();
240 }
241
242 return m_canBeJSONFlow ? Exp::ValueInJSONFlow() : Exp::ValueInFlow();
243 }
244
StartStream()245 void Scanner::StartStream() {
246 m_startedStream = true;
247 m_simpleKeyAllowed = true;
248 std::unique_ptr<IndentMarker> pIndent(
249 new IndentMarker(-1, IndentMarker::NONE));
250 m_indentRefs.push_back(std::move(pIndent));
251 m_indents.push(&m_indentRefs.back());
252 }
253
EndStream()254 void Scanner::EndStream() {
255 // force newline
256 if (INPUT.column() > 0) {
257 INPUT.ResetColumn();
258 }
259
260 PopAllIndents();
261 PopAllSimpleKeys();
262
263 m_simpleKeyAllowed = false;
264 m_endedStream = true;
265 }
266
PushToken(Token::TYPE type)267 Token* Scanner::PushToken(Token::TYPE type) {
268 m_tokens.push(Token(type, INPUT.mark()));
269 return &m_tokens.back();
270 }
271
GetStartTokenFor(IndentMarker::INDENT_TYPE type) const272 Token::TYPE Scanner::GetStartTokenFor(IndentMarker::INDENT_TYPE type) const {
273 switch (type) {
274 case IndentMarker::SEQ:
275 return Token::BLOCK_SEQ_START;
276 case IndentMarker::MAP:
277 return Token::BLOCK_MAP_START;
278 case IndentMarker::NONE:
279 assert(false);
280 break;
281 }
282 assert(false);
283 throw std::runtime_error("yaml-cpp: internal error, invalid indent type");
284 }
285
PushIndentTo(int column,IndentMarker::INDENT_TYPE type)286 Scanner::IndentMarker* Scanner::PushIndentTo(int column,
287 IndentMarker::INDENT_TYPE type) {
288 // are we in flow?
289 if (InFlowContext()) {
290 return nullptr;
291 }
292
293 std::unique_ptr<IndentMarker> pIndent(new IndentMarker(column, type));
294 IndentMarker& indent = *pIndent;
295 const IndentMarker& lastIndent = *m_indents.top();
296
297 // is this actually an indentation?
298 if (indent.column < lastIndent.column) {
299 return nullptr;
300 }
301 if (indent.column == lastIndent.column &&
302 !(indent.type == IndentMarker::SEQ &&
303 lastIndent.type == IndentMarker::MAP)) {
304 return nullptr;
305 }
306
307 // push a start token
308 indent.pStartToken = PushToken(GetStartTokenFor(type));
309
310 // and then the indent
311 m_indents.push(&indent);
312 m_indentRefs.push_back(std::move(pIndent));
313 return &m_indentRefs.back();
314 }
315
PopIndentToHere()316 void Scanner::PopIndentToHere() {
317 // are we in flow?
318 if (InFlowContext()) {
319 return;
320 }
321
322 // now pop away
323 while (!m_indents.empty()) {
324 const IndentMarker& indent = *m_indents.top();
325 if (indent.column < INPUT.column()) {
326 break;
327 }
328 if (indent.column == INPUT.column() &&
329 !(indent.type == IndentMarker::SEQ &&
330 !Exp::BlockEntry().Matches(INPUT))) {
331 break;
332 }
333
334 PopIndent();
335 }
336
337 while (!m_indents.empty() &&
338 m_indents.top()->status == IndentMarker::INVALID) {
339 PopIndent();
340 }
341 }
342
PopAllIndents()343 void Scanner::PopAllIndents() {
344 // are we in flow?
345 if (InFlowContext()) {
346 return;
347 }
348
349 // now pop away
350 while (!m_indents.empty()) {
351 const IndentMarker& indent = *m_indents.top();
352 if (indent.type == IndentMarker::NONE) {
353 break;
354 }
355
356 PopIndent();
357 }
358 }
359
PopIndent()360 void Scanner::PopIndent() {
361 const IndentMarker& indent = *m_indents.top();
362 m_indents.pop();
363
364 if (indent.status != IndentMarker::VALID) {
365 InvalidateSimpleKey();
366 return;
367 }
368
369 if (indent.type == IndentMarker::SEQ) {
370 m_tokens.push(Token(Token::BLOCK_SEQ_END, INPUT.mark()));
371 } else if (indent.type == IndentMarker::MAP) {
372 m_tokens.push(Token(Token::BLOCK_MAP_END, INPUT.mark()));
373 }
374 }
375
GetTopIndent() const376 int Scanner::GetTopIndent() const {
377 if (m_indents.empty()) {
378 return 0;
379 }
380 return m_indents.top()->column;
381 }
382
ThrowParserException(const std::string & msg) const383 void Scanner::ThrowParserException(const std::string& msg) const {
384 Mark mark = Mark::null_mark();
385 if (!m_tokens.empty()) {
386 const Token& token = m_tokens.front();
387 mark = token.mark;
388 }
389 throw ParserException(mark, msg);
390 }
391 } // namespace YAML
392