1 /*
2  * Copyright (c) 2003-2019, John Wiegley.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are
6  * met:
7  *
8  * - Redistributions of source code must retain the above copyright
9  *   notice, this list of conditions and the following disclaimer.
10  *
11  * - Redistributions in binary form must reproduce the above copyright
12  *   notice, this list of conditions and the following disclaimer in the
13  *   documentation and/or other materials provided with the distribution.
14  *
15  * - Neither the name of New Artisans LLC nor the names of its
16  *   contributors may be used to endorse or promote products derived from
17  *   this software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include <system.hh>
33 
34 #include "token.h"
35 #include "parser.h"
36 
37 namespace ledger {
38 
parse_reserved_word(std::istream & in)39 int expr_t::token_t::parse_reserved_word(std::istream& in)
40 {
41   char c = static_cast<char>(in.peek());
42 
43   if (c == 'a' || c == 'd' || c == 'e' || c == 'f' ||
44       c == 'i' || c == 'o' || c == 'n' || c == 't') {
45     length = 0;
46 
47     char buf[6];
48     READ_INTO_(in, buf, 5, c, length, std::isalpha(c));
49 
50     switch (buf[0]) {
51     case 'a':
52       if (std::strcmp(buf, "and") == 0) {
53         symbol[0] = '&';
54         symbol[1] = '\0';
55         kind = KW_AND;
56         return 1;
57       }
58       break;
59 
60     case 'd':
61       if (std::strcmp(buf, "div") == 0) {
62         symbol[0] = '/';
63         symbol[1] = '\0';
64         kind = KW_DIV;
65         return 1;
66       }
67       break;
68 
69     case 'e':
70       if (std::strcmp(buf, "else") == 0) {
71         std::strcpy(symbol, "else");
72         kind = KW_ELSE;
73         return 1;
74       }
75       break;
76 
77     case 'f':
78       if (std::strcmp(buf, "false") == 0) {
79         std::strcpy(symbol, "false");
80         kind = VALUE;
81         value = false;
82         return 1;
83       }
84       break;
85 
86     case 'i':
87       if (std::strcmp(buf, "if") == 0) {
88         symbol[0] = 'i';
89         symbol[1] = 'f';
90         symbol[2] = '\0';
91         kind = KW_IF;
92         return 1;
93       }
94       break;
95 
96     case 'o':
97       if (std::strcmp(buf, "or") == 0) {
98         symbol[0] = '|';
99         symbol[1] = '\0';
100         kind = KW_OR;
101         return 1;
102       }
103       break;
104 
105     case 'n':
106       if (std::strcmp(buf, "not") == 0) {
107         symbol[0] = '!';
108         symbol[1] = '\0';
109         kind = EXCLAM;
110         return 1;
111       }
112       break;
113 
114     case 't':
115       if (std::strcmp(buf, "true") == 0) {
116         std::strcpy(symbol, "true");
117         kind = VALUE;
118         value = true;
119         return 1;
120       }
121       break;
122     }
123 
124     return 0;
125   }
126   return -1;
127 }
128 
parse_ident(std::istream & in)129 void expr_t::token_t::parse_ident(std::istream& in)
130 {
131   kind   = IDENT;
132   length = 0;
133 
134   char c, buf[256];
135   READ_INTO_(in, buf, 255, c, length, std::isalnum(c) || c == '_');
136 
137   value.set_string(buf);
138 }
139 
next(std::istream & in,const parse_flags_t & pflags)140 void expr_t::token_t::next(std::istream& in, const parse_flags_t& pflags)
141 {
142   if (in.eof()) {
143     kind = TOK_EOF;
144     return;
145   }
146   if (! in.good())
147     throw_(parse_error, _("Input stream no longer valid"));
148 
149   char c = peek_next_nonws(in);
150 
151   if (in.eof() || c == -1) {
152     kind = TOK_EOF;
153     return;
154   }
155   if (! in.good())
156     throw_(parse_error, _("Input stream no longer valid"));
157 
158   symbol[0] = c;
159   symbol[1] = '\0';
160 
161   length = 1;
162 
163   switch (c) {
164   case '&':
165     in.get(c);
166     c = static_cast<char>(in.peek());
167     if (c == '&') {
168       in.get(c);
169       kind = KW_AND;
170       length = 2;
171       break;
172     }
173     kind = KW_AND;
174     break;
175   case '|':
176     in.get(c);
177     c = static_cast<char>(in.peek());
178     if (c == '|') {
179       in.get(c);
180       kind = KW_OR;
181       length = 2;
182       break;
183     }
184     kind = KW_OR;
185     break;
186 
187   case '(':
188     in.get(c);
189     kind = LPAREN;
190     break;
191   case ')':
192     in.get(c);
193     kind = RPAREN;
194     break;
195 
196   case '[': {
197     in.get(c);
198 
199     char buf[256];
200     READ_INTO_(in, buf, 255, c, length, c != ']');
201     if (c != ']')
202       expected(']', c);
203 
204     in.get(c);
205     length++;
206 
207     date_interval_t timespan(buf);
208     optional<date_t> begin = timespan.begin();
209     if (! begin)
210       throw_(parse_error,
211              _("Date specifier does not refer to a starting date"));
212     kind  = VALUE;
213     value = *begin;
214     break;
215   }
216 
217   case '\'':
218   case '"': {
219     char delim;
220     in.get(delim);
221     char buf[4096];
222     READ_INTO_(in, buf, 4095, c, length, c != delim);
223     if (c != delim)
224       expected(delim, c);
225     in.get(c);
226     length++;
227     kind = VALUE;
228     value.set_string(buf);
229     break;
230   }
231 
232   case '{': {
233     in.get(c);
234     amount_t temp;
235     temp.parse(in, PARSE_NO_MIGRATE);
236     in.get(c);
237     if (c != '}')
238       expected('}', c);
239     length++;
240     kind  = VALUE;
241     value = temp;
242     break;
243   }
244 
245   case '!':
246     in.get(c);
247     c = static_cast<char>(in.peek());
248     if (c == '=') {
249       in.get(c);
250       symbol[1] = c;
251       symbol[2] = '\0';
252       kind = NEQUAL;
253       length = 2;
254       break;
255     }
256     else if (c == '~') {
257       in.get(c);
258       symbol[1] = c;
259       symbol[2] = '\0';
260       kind = NMATCH;
261       length = 2;
262       break;
263     }
264     kind = EXCLAM;
265     break;
266 
267   case '-':
268     in.get(c);
269     c = static_cast<char>(in.peek());
270     if (c == '>') {
271       in.get(c);
272       symbol[1] = c;
273       symbol[2] = '\0';
274       kind = ARROW;
275       length = 2;
276       break;
277     }
278     kind = MINUS;
279     break;
280   case '+':
281     in.get(c);
282     kind = PLUS;
283     break;
284 
285   case '*':
286     in.get(c);
287     kind = STAR;
288     break;
289 
290   case '?':
291     in.get(c);
292     kind = QUERY;
293     break;
294   case ':':
295     in.get(c);
296     c = static_cast<char>(in.peek());
297     kind = COLON;
298     break;
299 
300   case '/': {
301     in.get(c);
302     if (pflags.has_flags(PARSE_OP_CONTEXT)) { // operator context
303       kind = SLASH;
304     } else {                    // terminal context
305       // Read in the regexp
306       char buf[4096];
307       READ_INTO_(in, buf, 4095, c, length, c != '/');
308       if (c != '/')
309         expected('/', c);
310       in.get(c);
311       length++;
312 
313       kind = VALUE;
314       value.set_mask(buf);
315     }
316     break;
317   }
318 
319   case '=':
320     in.get(c);
321     c = static_cast<char>(in.peek());
322     if (c == '~') {
323       in.get(c);
324       symbol[1] = c;
325       symbol[2] = '\0';
326       kind = MATCH;
327       length = 2;
328       break;
329     }
330     else if (c == '=') {
331       in.get(c);
332       symbol[1] = c;
333       symbol[2] = '\0';
334       kind = EQUAL;
335       length = 2;
336       break;
337     }
338     kind = ASSIGN;
339     break;
340 
341   case '<':
342     in.get(c);
343     if (static_cast<char>(in.peek()) == '=') {
344       in.get(c);
345       symbol[1] = c;
346       symbol[2] = '\0';
347       kind = LESSEQ;
348       length = 2;
349       break;
350     }
351     kind = LESS;
352     break;
353 
354   case '>':
355     in.get(c);
356     if (static_cast<char>(in.peek()) == '=') {
357       in.get(c);
358       symbol[1] = c;
359       symbol[2] = '\0';
360       kind = GREATEREQ;
361       length = 2;
362       break;
363     }
364     kind = GREATER;
365     break;
366 
367   case '.':
368     in.get(c);
369     kind = DOT;
370     break;
371 
372   case ',':
373     in.get(c);
374     kind = COMMA;
375     break;
376 
377   case ';':
378     in.get(c);
379     kind = SEMI;
380     break;
381 
382   default: {
383     std::istream::pos_type pos = in.tellg();
384 
385     // First, check to see if it's a reserved word, such as: and or not
386     int result = parse_reserved_word(in);
387     if (std::isalpha(c) && result == 1)
388       break;
389 
390     // If not, rewind back to the beginning of the word to scan it
391     // again.  If the result was -1, it means no identifier was scanned
392     // so we don't have to rewind.
393     if (result == 0 || ! in.good()) {
394       in.clear();
395       in.seekg(pos, std::ios::beg);
396       if (in.fail())
397         throw_(parse_error, _("Failed to reset input stream"));
398     }
399 
400     assert(in.good());
401     assert(! in.eof());
402     assert(static_cast<int>(in.tellg()) != -1);
403 
404     // When in relaxed parsing mode, we want to migrate commodity flags
405     // so that any precision specified by the user updates the current
406     // maximum displayed precision.
407     parse_flags_t parse_flags;
408 
409     parse_flags.add_flags(PARSE_NO_ANNOT);
410     if (pflags.has_flags(PARSE_NO_MIGRATE))
411       parse_flags.add_flags(PARSE_NO_MIGRATE);
412     if (pflags.has_flags(PARSE_NO_REDUCE))
413       parse_flags.add_flags(PARSE_NO_REDUCE);
414 
415     try {
416       amount_t temp;
417       if (! temp.parse(in, parse_flags.plus_flags(PARSE_SOFT_FAIL))) {
418         in.clear();
419         in.seekg(pos, std::ios::beg);
420         if (in.fail() || ! in.good())
421           throw_(parse_error, _("Failed to reset input stream"));
422 
423         c = static_cast<char>(in.peek());
424         if (c != -1) {
425           if (! std::isalpha(c) && c != '_')
426             expected('\0', c);
427 
428           parse_ident(in);
429         } else {
430           throw_(parse_error, _("Unexpected EOF"));
431         }
432 
433         if (! value.is_string() || value.as_string().empty()) {
434           kind = ERROR;
435           symbol[0] = c;
436           symbol[1] = '\0';
437           throw_(parse_error, _("Failed to parse identifier"));
438         }
439       } else {
440         if (! in.good()) {
441           in.clear();
442           in.seekg(0, std::ios::end);
443           if (in.fail())
444             throw_(parse_error, _("Failed to reset input stream"));
445         }
446 
447         kind   = VALUE;
448         value  = temp;
449         length = static_cast<std::size_t>(in.tellg() - pos);
450       }
451     }
452     catch (const std::exception&) {
453       kind   = ERROR;
454       length = static_cast<std::size_t>(in.tellg() - pos);
455       throw;
456     }
457     break;
458   }
459   }
460 }
461 
rewind(std::istream & in)462 void expr_t::token_t::rewind(std::istream& in)
463 {
464   in.clear();
465   in.seekg(- int(length), std::ios::cur);
466   if (in.fail())
467     throw_(parse_error, _("Failed to rewind input stream"));
468 }
469 
470 
unexpected(const char wanted)471 void expr_t::token_t::unexpected(const char wanted)
472 {
473   kind_t prev_kind = kind;
474 
475   kind = ERROR;
476 
477   if (wanted == '\0') {
478     switch (prev_kind) {
479     case TOK_EOF:
480       throw_(parse_error, _("Unexpected end of expression"));
481     case IDENT:
482       throw_(parse_error, _f("Unexpected symbol '%1%'") % value);
483     case VALUE:
484       throw_(parse_error, _f("Unexpected value '%1%'") % value);
485     default:
486       throw_(parse_error, _f("Unexpected expression token '%1%'") % symbol);
487     }
488   } else {
489     switch (prev_kind) {
490     case TOK_EOF:
491       throw_(parse_error,
492              _f("Unexpected end of expression (wanted '%1%')") % wanted);
493     case IDENT:
494       throw_(parse_error,
495              _f("Unexpected symbol '%1%' (wanted '%2%')") % value % wanted);
496     case VALUE:
497       throw_(parse_error,
498              _f("Unexpected value '%1%' (wanted '%2%')") % value % wanted);
499     default:
500       throw_(parse_error, _f("Unexpected expression token '%1%' (wanted '%2%')")
501              % symbol % wanted);
502     }
503   }
504 }
505 
expected(const char wanted,char c)506 void expr_t::token_t::expected(const char wanted, char c)
507 {
508   if (c == '\0' || c == -1) {
509     if (wanted == '\0' || wanted == -1)
510       throw_(parse_error, _("Unexpected end"));
511     else
512       throw_(parse_error, _f("Missing '%1%'") % wanted);
513   } else {
514     if (wanted == '\0' || wanted == -1)
515       throw_(parse_error, _f("Invalid char '%1%'") % c);
516     else
517       throw_(parse_error,
518              _f("Invalid char '%1%' (wanted '%2%')") % c % wanted);
519   }
520 }
521 
expected(const kind_t wanted)522 void expr_t::token_t::expected(const kind_t wanted)
523 {
524   try {
525     if (wanted == expr_t::token_t::ERROR ||
526         wanted == expr_t::token_t::UNKNOWN)
527       throw_(parse_error, _f("Invalid token '%1%'") % *this);
528     else
529       throw_(parse_error,
530              _f("Invalid token '%1%' (wanted '%2%')") % *this % wanted);
531   }
532   catch (...) {
533     kind = ERROR;
534     throw;
535   }
536 }
537 
operator <<(std::ostream & out,const expr_t::token_t::kind_t & kind)538 std::ostream& operator<<(std::ostream& out, const expr_t::token_t::kind_t& kind)
539 {
540   switch (kind) {
541   case expr_t::token_t::ERROR:     out << "<error token>"; break;
542   case expr_t::token_t::VALUE:     out << "<value>"; break;
543   case expr_t::token_t::IDENT:     out << "<identifier>"; break;
544   case expr_t::token_t::MASK:      out << "<regex mask>"; break;
545 
546   case expr_t::token_t::LPAREN:    out << "("; break;
547   case expr_t::token_t::RPAREN:    out << ")"; break;
548   case expr_t::token_t::LBRACE:    out << "{"; break;
549   case expr_t::token_t::RBRACE:    out << "}"; break;
550 
551   case expr_t::token_t::EQUAL:     out << "=="; break;
552   case expr_t::token_t::NEQUAL:    out << "!="; break;
553   case expr_t::token_t::LESS:      out << "<"; break;
554   case expr_t::token_t::LESSEQ:    out << "<="; break;
555   case expr_t::token_t::GREATER:   out << ">"; break;
556   case expr_t::token_t::GREATEREQ: out << ">="; break;
557 
558   case expr_t::token_t::ASSIGN:    out << "="; break;
559   case expr_t::token_t::MATCH:     out << "=~"; break;
560   case expr_t::token_t::NMATCH:    out << "!~"; break;
561   case expr_t::token_t::MINUS:     out << "-"; break;
562   case expr_t::token_t::PLUS:      out << "+"; break;
563   case expr_t::token_t::STAR:      out << "*"; break;
564   case expr_t::token_t::SLASH:     out << "/"; break;
565   case expr_t::token_t::ARROW:     out << "->"; break;
566   case expr_t::token_t::KW_DIV:    out << "div"; break;
567 
568   case expr_t::token_t::EXCLAM:    out << "!"; break;
569   case expr_t::token_t::KW_AND:    out << "and"; break;
570   case expr_t::token_t::KW_OR:     out << "or"; break;
571   case expr_t::token_t::KW_MOD:    out << "mod"; break;
572 
573   case expr_t::token_t::KW_IF:     out << "if"; break;
574   case expr_t::token_t::KW_ELSE:   out << "else"; break;
575 
576   case expr_t::token_t::QUERY:     out << "?"; break;
577   case expr_t::token_t::COLON:     out << ":"; break;
578 
579   case expr_t::token_t::DOT:       out << "."; break;
580   case expr_t::token_t::COMMA:     out << ","; break;
581   case expr_t::token_t::SEMI:      out << ";"; break;
582 
583   case expr_t::token_t::TOK_EOF:   out << "<end of input>"; break;
584   case expr_t::token_t::UNKNOWN:   out << "<unknown>"; break;
585   }
586 
587   return out;
588 }
589 
operator <<(std::ostream & out,const expr_t::token_t & token)590 std::ostream& operator<<(std::ostream& out, const expr_t::token_t& token)
591 {
592   switch (token.kind) {
593   case expr_t::token_t::VALUE:
594     out << "<value '" << token.value << "'>";
595     break;
596   case expr_t::token_t::IDENT:
597     out << "<ident '" << token.value << "'>";
598     break;
599   case expr_t::token_t::MASK:
600     out << "<mask '" << token.value << "'>";
601     break;
602 
603   default:
604     out << token.kind;
605     break;
606   }
607 
608   return out;
609 }
610 
611 } // namespace ledger
612