1 /*
2 * Copyright (c) 2003-2019, John Wiegley. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 * - Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * - Neither the name of New Artisans LLC nor the names of its
16 * contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <system.hh>
33
34 #include "token.h"
35 #include "parser.h"
36
37 namespace ledger {
38
parse_reserved_word(std::istream & in)39 int expr_t::token_t::parse_reserved_word(std::istream& in)
40 {
41 char c = static_cast<char>(in.peek());
42
43 if (c == 'a' || c == 'd' || c == 'e' || c == 'f' ||
44 c == 'i' || c == 'o' || c == 'n' || c == 't') {
45 length = 0;
46
47 char buf[6];
48 READ_INTO_(in, buf, 5, c, length, std::isalpha(c));
49
50 switch (buf[0]) {
51 case 'a':
52 if (std::strcmp(buf, "and") == 0) {
53 symbol[0] = '&';
54 symbol[1] = '\0';
55 kind = KW_AND;
56 return 1;
57 }
58 break;
59
60 case 'd':
61 if (std::strcmp(buf, "div") == 0) {
62 symbol[0] = '/';
63 symbol[1] = '\0';
64 kind = KW_DIV;
65 return 1;
66 }
67 break;
68
69 case 'e':
70 if (std::strcmp(buf, "else") == 0) {
71 std::strcpy(symbol, "else");
72 kind = KW_ELSE;
73 return 1;
74 }
75 break;
76
77 case 'f':
78 if (std::strcmp(buf, "false") == 0) {
79 std::strcpy(symbol, "false");
80 kind = VALUE;
81 value = false;
82 return 1;
83 }
84 break;
85
86 case 'i':
87 if (std::strcmp(buf, "if") == 0) {
88 symbol[0] = 'i';
89 symbol[1] = 'f';
90 symbol[2] = '\0';
91 kind = KW_IF;
92 return 1;
93 }
94 break;
95
96 case 'o':
97 if (std::strcmp(buf, "or") == 0) {
98 symbol[0] = '|';
99 symbol[1] = '\0';
100 kind = KW_OR;
101 return 1;
102 }
103 break;
104
105 case 'n':
106 if (std::strcmp(buf, "not") == 0) {
107 symbol[0] = '!';
108 symbol[1] = '\0';
109 kind = EXCLAM;
110 return 1;
111 }
112 break;
113
114 case 't':
115 if (std::strcmp(buf, "true") == 0) {
116 std::strcpy(symbol, "true");
117 kind = VALUE;
118 value = true;
119 return 1;
120 }
121 break;
122 }
123
124 return 0;
125 }
126 return -1;
127 }
128
parse_ident(std::istream & in)129 void expr_t::token_t::parse_ident(std::istream& in)
130 {
131 kind = IDENT;
132 length = 0;
133
134 char c, buf[256];
135 READ_INTO_(in, buf, 255, c, length, std::isalnum(c) || c == '_');
136
137 value.set_string(buf);
138 }
139
next(std::istream & in,const parse_flags_t & pflags)140 void expr_t::token_t::next(std::istream& in, const parse_flags_t& pflags)
141 {
142 if (in.eof()) {
143 kind = TOK_EOF;
144 return;
145 }
146 if (! in.good())
147 throw_(parse_error, _("Input stream no longer valid"));
148
149 char c = peek_next_nonws(in);
150
151 if (in.eof() || c == -1) {
152 kind = TOK_EOF;
153 return;
154 }
155 if (! in.good())
156 throw_(parse_error, _("Input stream no longer valid"));
157
158 symbol[0] = c;
159 symbol[1] = '\0';
160
161 length = 1;
162
163 switch (c) {
164 case '&':
165 in.get(c);
166 c = static_cast<char>(in.peek());
167 if (c == '&') {
168 in.get(c);
169 kind = KW_AND;
170 length = 2;
171 break;
172 }
173 kind = KW_AND;
174 break;
175 case '|':
176 in.get(c);
177 c = static_cast<char>(in.peek());
178 if (c == '|') {
179 in.get(c);
180 kind = KW_OR;
181 length = 2;
182 break;
183 }
184 kind = KW_OR;
185 break;
186
187 case '(':
188 in.get(c);
189 kind = LPAREN;
190 break;
191 case ')':
192 in.get(c);
193 kind = RPAREN;
194 break;
195
196 case '[': {
197 in.get(c);
198
199 char buf[256];
200 READ_INTO_(in, buf, 255, c, length, c != ']');
201 if (c != ']')
202 expected(']', c);
203
204 in.get(c);
205 length++;
206
207 date_interval_t timespan(buf);
208 optional<date_t> begin = timespan.begin();
209 if (! begin)
210 throw_(parse_error,
211 _("Date specifier does not refer to a starting date"));
212 kind = VALUE;
213 value = *begin;
214 break;
215 }
216
217 case '\'':
218 case '"': {
219 char delim;
220 in.get(delim);
221 char buf[4096];
222 READ_INTO_(in, buf, 4095, c, length, c != delim);
223 if (c != delim)
224 expected(delim, c);
225 in.get(c);
226 length++;
227 kind = VALUE;
228 value.set_string(buf);
229 break;
230 }
231
232 case '{': {
233 in.get(c);
234 amount_t temp;
235 temp.parse(in, PARSE_NO_MIGRATE);
236 in.get(c);
237 if (c != '}')
238 expected('}', c);
239 length++;
240 kind = VALUE;
241 value = temp;
242 break;
243 }
244
245 case '!':
246 in.get(c);
247 c = static_cast<char>(in.peek());
248 if (c == '=') {
249 in.get(c);
250 symbol[1] = c;
251 symbol[2] = '\0';
252 kind = NEQUAL;
253 length = 2;
254 break;
255 }
256 else if (c == '~') {
257 in.get(c);
258 symbol[1] = c;
259 symbol[2] = '\0';
260 kind = NMATCH;
261 length = 2;
262 break;
263 }
264 kind = EXCLAM;
265 break;
266
267 case '-':
268 in.get(c);
269 c = static_cast<char>(in.peek());
270 if (c == '>') {
271 in.get(c);
272 symbol[1] = c;
273 symbol[2] = '\0';
274 kind = ARROW;
275 length = 2;
276 break;
277 }
278 kind = MINUS;
279 break;
280 case '+':
281 in.get(c);
282 kind = PLUS;
283 break;
284
285 case '*':
286 in.get(c);
287 kind = STAR;
288 break;
289
290 case '?':
291 in.get(c);
292 kind = QUERY;
293 break;
294 case ':':
295 in.get(c);
296 c = static_cast<char>(in.peek());
297 kind = COLON;
298 break;
299
300 case '/': {
301 in.get(c);
302 if (pflags.has_flags(PARSE_OP_CONTEXT)) { // operator context
303 kind = SLASH;
304 } else { // terminal context
305 // Read in the regexp
306 char buf[4096];
307 READ_INTO_(in, buf, 4095, c, length, c != '/');
308 if (c != '/')
309 expected('/', c);
310 in.get(c);
311 length++;
312
313 kind = VALUE;
314 value.set_mask(buf);
315 }
316 break;
317 }
318
319 case '=':
320 in.get(c);
321 c = static_cast<char>(in.peek());
322 if (c == '~') {
323 in.get(c);
324 symbol[1] = c;
325 symbol[2] = '\0';
326 kind = MATCH;
327 length = 2;
328 break;
329 }
330 else if (c == '=') {
331 in.get(c);
332 symbol[1] = c;
333 symbol[2] = '\0';
334 kind = EQUAL;
335 length = 2;
336 break;
337 }
338 kind = ASSIGN;
339 break;
340
341 case '<':
342 in.get(c);
343 if (static_cast<char>(in.peek()) == '=') {
344 in.get(c);
345 symbol[1] = c;
346 symbol[2] = '\0';
347 kind = LESSEQ;
348 length = 2;
349 break;
350 }
351 kind = LESS;
352 break;
353
354 case '>':
355 in.get(c);
356 if (static_cast<char>(in.peek()) == '=') {
357 in.get(c);
358 symbol[1] = c;
359 symbol[2] = '\0';
360 kind = GREATEREQ;
361 length = 2;
362 break;
363 }
364 kind = GREATER;
365 break;
366
367 case '.':
368 in.get(c);
369 kind = DOT;
370 break;
371
372 case ',':
373 in.get(c);
374 kind = COMMA;
375 break;
376
377 case ';':
378 in.get(c);
379 kind = SEMI;
380 break;
381
382 default: {
383 std::istream::pos_type pos = in.tellg();
384
385 // First, check to see if it's a reserved word, such as: and or not
386 int result = parse_reserved_word(in);
387 if (std::isalpha(c) && result == 1)
388 break;
389
390 // If not, rewind back to the beginning of the word to scan it
391 // again. If the result was -1, it means no identifier was scanned
392 // so we don't have to rewind.
393 if (result == 0 || ! in.good()) {
394 in.clear();
395 in.seekg(pos, std::ios::beg);
396 if (in.fail())
397 throw_(parse_error, _("Failed to reset input stream"));
398 }
399
400 assert(in.good());
401 assert(! in.eof());
402 assert(static_cast<int>(in.tellg()) != -1);
403
404 // When in relaxed parsing mode, we want to migrate commodity flags
405 // so that any precision specified by the user updates the current
406 // maximum displayed precision.
407 parse_flags_t parse_flags;
408
409 parse_flags.add_flags(PARSE_NO_ANNOT);
410 if (pflags.has_flags(PARSE_NO_MIGRATE))
411 parse_flags.add_flags(PARSE_NO_MIGRATE);
412 if (pflags.has_flags(PARSE_NO_REDUCE))
413 parse_flags.add_flags(PARSE_NO_REDUCE);
414
415 try {
416 amount_t temp;
417 if (! temp.parse(in, parse_flags.plus_flags(PARSE_SOFT_FAIL))) {
418 in.clear();
419 in.seekg(pos, std::ios::beg);
420 if (in.fail() || ! in.good())
421 throw_(parse_error, _("Failed to reset input stream"));
422
423 c = static_cast<char>(in.peek());
424 if (c != -1) {
425 if (! std::isalpha(c) && c != '_')
426 expected('\0', c);
427
428 parse_ident(in);
429 } else {
430 throw_(parse_error, _("Unexpected EOF"));
431 }
432
433 if (! value.is_string() || value.as_string().empty()) {
434 kind = ERROR;
435 symbol[0] = c;
436 symbol[1] = '\0';
437 throw_(parse_error, _("Failed to parse identifier"));
438 }
439 } else {
440 if (! in.good()) {
441 in.clear();
442 in.seekg(0, std::ios::end);
443 if (in.fail())
444 throw_(parse_error, _("Failed to reset input stream"));
445 }
446
447 kind = VALUE;
448 value = temp;
449 length = static_cast<std::size_t>(in.tellg() - pos);
450 }
451 }
452 catch (const std::exception&) {
453 kind = ERROR;
454 length = static_cast<std::size_t>(in.tellg() - pos);
455 throw;
456 }
457 break;
458 }
459 }
460 }
461
rewind(std::istream & in)462 void expr_t::token_t::rewind(std::istream& in)
463 {
464 in.clear();
465 in.seekg(- int(length), std::ios::cur);
466 if (in.fail())
467 throw_(parse_error, _("Failed to rewind input stream"));
468 }
469
470
unexpected(const char wanted)471 void expr_t::token_t::unexpected(const char wanted)
472 {
473 kind_t prev_kind = kind;
474
475 kind = ERROR;
476
477 if (wanted == '\0') {
478 switch (prev_kind) {
479 case TOK_EOF:
480 throw_(parse_error, _("Unexpected end of expression"));
481 case IDENT:
482 throw_(parse_error, _f("Unexpected symbol '%1%'") % value);
483 case VALUE:
484 throw_(parse_error, _f("Unexpected value '%1%'") % value);
485 default:
486 throw_(parse_error, _f("Unexpected expression token '%1%'") % symbol);
487 }
488 } else {
489 switch (prev_kind) {
490 case TOK_EOF:
491 throw_(parse_error,
492 _f("Unexpected end of expression (wanted '%1%')") % wanted);
493 case IDENT:
494 throw_(parse_error,
495 _f("Unexpected symbol '%1%' (wanted '%2%')") % value % wanted);
496 case VALUE:
497 throw_(parse_error,
498 _f("Unexpected value '%1%' (wanted '%2%')") % value % wanted);
499 default:
500 throw_(parse_error, _f("Unexpected expression token '%1%' (wanted '%2%')")
501 % symbol % wanted);
502 }
503 }
504 }
505
expected(const char wanted,char c)506 void expr_t::token_t::expected(const char wanted, char c)
507 {
508 if (c == '\0' || c == -1) {
509 if (wanted == '\0' || wanted == -1)
510 throw_(parse_error, _("Unexpected end"));
511 else
512 throw_(parse_error, _f("Missing '%1%'") % wanted);
513 } else {
514 if (wanted == '\0' || wanted == -1)
515 throw_(parse_error, _f("Invalid char '%1%'") % c);
516 else
517 throw_(parse_error,
518 _f("Invalid char '%1%' (wanted '%2%')") % c % wanted);
519 }
520 }
521
expected(const kind_t wanted)522 void expr_t::token_t::expected(const kind_t wanted)
523 {
524 try {
525 if (wanted == expr_t::token_t::ERROR ||
526 wanted == expr_t::token_t::UNKNOWN)
527 throw_(parse_error, _f("Invalid token '%1%'") % *this);
528 else
529 throw_(parse_error,
530 _f("Invalid token '%1%' (wanted '%2%')") % *this % wanted);
531 }
532 catch (...) {
533 kind = ERROR;
534 throw;
535 }
536 }
537
operator <<(std::ostream & out,const expr_t::token_t::kind_t & kind)538 std::ostream& operator<<(std::ostream& out, const expr_t::token_t::kind_t& kind)
539 {
540 switch (kind) {
541 case expr_t::token_t::ERROR: out << "<error token>"; break;
542 case expr_t::token_t::VALUE: out << "<value>"; break;
543 case expr_t::token_t::IDENT: out << "<identifier>"; break;
544 case expr_t::token_t::MASK: out << "<regex mask>"; break;
545
546 case expr_t::token_t::LPAREN: out << "("; break;
547 case expr_t::token_t::RPAREN: out << ")"; break;
548 case expr_t::token_t::LBRACE: out << "{"; break;
549 case expr_t::token_t::RBRACE: out << "}"; break;
550
551 case expr_t::token_t::EQUAL: out << "=="; break;
552 case expr_t::token_t::NEQUAL: out << "!="; break;
553 case expr_t::token_t::LESS: out << "<"; break;
554 case expr_t::token_t::LESSEQ: out << "<="; break;
555 case expr_t::token_t::GREATER: out << ">"; break;
556 case expr_t::token_t::GREATEREQ: out << ">="; break;
557
558 case expr_t::token_t::ASSIGN: out << "="; break;
559 case expr_t::token_t::MATCH: out << "=~"; break;
560 case expr_t::token_t::NMATCH: out << "!~"; break;
561 case expr_t::token_t::MINUS: out << "-"; break;
562 case expr_t::token_t::PLUS: out << "+"; break;
563 case expr_t::token_t::STAR: out << "*"; break;
564 case expr_t::token_t::SLASH: out << "/"; break;
565 case expr_t::token_t::ARROW: out << "->"; break;
566 case expr_t::token_t::KW_DIV: out << "div"; break;
567
568 case expr_t::token_t::EXCLAM: out << "!"; break;
569 case expr_t::token_t::KW_AND: out << "and"; break;
570 case expr_t::token_t::KW_OR: out << "or"; break;
571 case expr_t::token_t::KW_MOD: out << "mod"; break;
572
573 case expr_t::token_t::KW_IF: out << "if"; break;
574 case expr_t::token_t::KW_ELSE: out << "else"; break;
575
576 case expr_t::token_t::QUERY: out << "?"; break;
577 case expr_t::token_t::COLON: out << ":"; break;
578
579 case expr_t::token_t::DOT: out << "."; break;
580 case expr_t::token_t::COMMA: out << ","; break;
581 case expr_t::token_t::SEMI: out << ";"; break;
582
583 case expr_t::token_t::TOK_EOF: out << "<end of input>"; break;
584 case expr_t::token_t::UNKNOWN: out << "<unknown>"; break;
585 }
586
587 return out;
588 }
589
operator <<(std::ostream & out,const expr_t::token_t & token)590 std::ostream& operator<<(std::ostream& out, const expr_t::token_t& token)
591 {
592 switch (token.kind) {
593 case expr_t::token_t::VALUE:
594 out << "<value '" << token.value << "'>";
595 break;
596 case expr_t::token_t::IDENT:
597 out << "<ident '" << token.value << "'>";
598 break;
599 case expr_t::token_t::MASK:
600 out << "<mask '" << token.value << "'>";
601 break;
602
603 default:
604 out << token.kind;
605 break;
606 }
607
608 return out;
609 }
610
611 } // namespace ledger
612