1 /** @file lexer.cpp
2 *
3 * Implementation of GiNaC's lexer. */
4
5 /*
6 * GiNaC Copyright (C) 1999-2022 Johannes Gutenberg University Mainz, Germany
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 */
22
23 #include "lexer.h"
24 #include "compiler.h"
25
26 #include <iostream>
27 #include <sstream>
28 #include <string>
29 #include <cstdio>
30
31 namespace GiNaC {
32
33 /// Skip to the end of line
34 static int skipline(std::istream* s);
35 /// Skip to the next non-whitespace character
36 static int skipspace(std::istream* s, int c, std::size_t& line);
37 /// Check if the identifier is predefined literal
38 static bool literal_p(const std::string& name);
39
40 /// gettok - Return the next token from standard input.
gettok()41 int lexer::gettok()
42 {
43 // Skip any whitespace.
44 c = skipspace(input, c, line_num);
45
46 // identifier: [a-zA-Z][a-zA-Z0-9_]*
47 if (isalpha(c)) {
48 str = c;
49 do {
50 c = input->get();
51 if ( isalnum(c) || c=='_' )
52 str += c;
53 else
54 break;
55 } while (true);
56 if (unlikely(literal_p(str)))
57 return token_type::literal;
58 else
59 return token_type::identifier;
60 }
61
62 // Number: [0-9]+([.][0-9]*(eE[+-][0-9]+)*)*
63 if (isdigit(c) || c == '.') {
64 str = "";
65 do {
66 str += c;
67 c = input->get();
68 } while (isdigit(c) || c == '.');
69 if (c == 'E' || c == 'e') {
70 str += 'E';
71 c = input->get();
72 if (isdigit(c))
73 str += '+';
74 do {
75 str += c;
76 c = input->get();
77 } while (isdigit(c));
78 }
79 return token_type::number;
80 }
81
82 // Comment until end of line.
83 if (c == '#') {
84 c = skipline(input);
85 ++line_num;
86 if (c != EOF)
87 return gettok();
88 }
89
90 // Check for end of file. Don't eat the EOF.
91 if (c == EOF)
92 return token_type::eof;
93
94 // Otherwise, just return the character as its ascii value.
95 int current = c;
96 c = input->get();
97 return current;
98 }
99
skipline(std::istream * s)100 static int skipline(std::istream* s)
101 {
102 int c;
103 do {
104 c = s->get();
105 } while (c != EOF && c != '\n' && c != '\r');
106 return c;
107 }
108
skipspace(std::istream * s,int c,std::size_t & line)109 static int skipspace(std::istream* s, int c, std::size_t& line)
110 {
111 while (isspace(c)) {
112 if (c == '\n')
113 ++line;
114 c = s->get();
115 }
116 return c;
117 }
118
literal_p(const std::string & name)119 static bool literal_p(const std::string& name)
120 {
121 if (name == "I")
122 return true;
123 else if (name == "Pi")
124 return true;
125 else if (name == "Euler")
126 return true;
127 else if (name == "Catalan")
128 return true;
129 else
130 return false;
131 }
132
lexer(std::istream * in,std::ostream * out,std::ostream * err)133 lexer::lexer(std::istream* in, std::ostream* out, std::ostream* err)
134 {
135 if (in)
136 input = in;
137 else
138 in = &std::cin;
139
140 if (out)
141 output = out;
142 else
143 output = &std::cout;
144
145 if (err)
146 error = err;
147 else
148 error = &std::cerr;
149
150 c = ' ';
151 str = "";
152 line_num = 0;
153 column = 0;
154 }
155
~lexer()156 lexer::~lexer() { }
157
switch_input(std::istream * in)158 void lexer::switch_input(std::istream* in)
159 {
160 input = in;
161 line_num = 0;
162 column = 0;
163 c = ' ';
164 }
165
166 /// Symbolic name of current token (for error reporting)
tok2str(const int tok) const167 std::string lexer::tok2str(const int tok) const
168 {
169 switch (tok) {
170 case lexer::token_type::identifier:
171 case lexer::token_type::number:
172 return std::string("\"") + str + "\"";
173 case lexer::token_type::eof:
174 return std::string("EOF");
175 default:
176 return std::string("\"") + char(tok) + "\"";
177 }
178 }
179
180 } // namespace GiNaC
181