1 /*************************************************************************
2 ** InputReader.cpp                                                      **
3 **                                                                      **
4 ** This file is part of dvisvgm -- the DVI to SVG converter             **
5 ** Copyright (C) 2005-2015 Martin Gieseking <martin.gieseking@uos.de>   **
6 **                                                                      **
7 ** This program is free software; you can redistribute it and/or        **
8 ** modify it under the terms of the GNU General Public License as       **
9 ** published by the Free Software Foundation; either version 3 of       **
10 ** the License, or (at your option) any later version.                  **
11 **                                                                      **
12 ** This program is distributed in the hope that it will be useful, but  **
13 ** WITHOUT ANY WARRANTY; without even the implied warranty of           **
14 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the         **
15 ** GNU General Public License for more details.                         **
16 **                                                                      **
17 ** You should have received a copy of the GNU General Public License    **
18 ** along with this program; if not, see <http://www.gnu.org/licenses/>. **
19 *************************************************************************/
20 
21 #include <config.h>
22 #include <cmath>
23 #include <functional>
24 #include <vector>
25 #include "InputReader.h"
26 
27 using namespace std;
28 
29 
30 /** Skips n characters. */
skip(size_t n)31 void InputReader::skip (size_t n) {
32 	while (n-- > 0)
33 		get();
34 }
35 
36 
37 /** Moves the buffer pointer to the next non-space character. A following call
38  *  of get() returns this character. */
skipSpace()39 void InputReader::skipSpace () {
40 	while (isspace(peek()))
41 		get();
42 }
43 
44 
45 /** Tries to find a given string and skips all characters preceding that string.
46  *  @param[in] s string to look for (must not be longer than the maximal buffer size)
47  *  @param[in] consume if true, the buffer pointer is moved to the first character following string s
48  *  @return true if s was found */
skipUntil(const char * s,bool consume)49 bool InputReader::skipUntil (const char *s, bool consume) {
50 	bool found = false;
51 	while (!eof() && !(found = check(s, consume)))
52 		get();
53 	return found;
54 }
55 
56 
57 /** Looks for the first occurrence of a given character.
58  *  @param[in] c character to lookup
59  *  @return position of character relative to current location, -1 if character was not found */
find(char c) const60 int InputReader::find (char c) const {
61 	int pos = 0;
62 	int cc;
63 	while ((cc = peek(pos)) >= 0 && cc != c)
64 		pos++;
65 	return cc < 0 ? -1 : pos;
66 }
67 
68 
69 /** Checks if the next characters to be read match a given string.
70  *  @param[in] s string to be matched
71  *  @param[in] consume if true, the characters of the matched string are skipped
72  *  @return true if s matches */
check(const char * s,bool consume)73 bool InputReader::check (const char *s, bool consume) {
74 	size_t count = 0;
75 	for (const char *p=s; *p; p++) {
76 		if (peek(count++) != *p)
77 			return false;
78 	}
79 	if (consume)
80 		skip(count);
81 	return true;
82 }
83 
84 
compare(const char * s,bool consume)85 int InputReader::compare (const char *s, bool consume) {
86 	size_t count = 0;
87 	for (const char *p=s; *p; p++) {
88 		int c = peek(count++);
89 		if (c != *p)
90 			return c < *p ? -1 : 1;
91 	}
92 	int c = peek(count);
93 	if (c < 0 || !isspace(c))
94 		return 1;
95 	if (consume)
96 		skip(count);
97 	return 0;
98 }
99 
100 
101 /** Reads an integer from the buffer. All characters that are part of
102  *  the read integer constant are skipped. If this function returns false,
103  *  the buffer pointer points to the same position as before the function call.
104  *  @param[out] val contains the read integer value on success
105  *  @param[in] accept_sign if false, only positive integers (without sign) are accepted
106  *  @return true if integer could be read */
parseInt(int & val,bool accept_sign)107 bool InputReader::parseInt (int &val, bool accept_sign) {
108 	val = 0;
109 	int fac=1;
110 	char sign;    // explicitly given sign
111 	if (accept_sign && ((sign = peek()) == '+' || sign == '-')) {
112 		if (isdigit(peek(1))) {
113 			get();  // skip sign
114 			if (sign == '-')
115 				fac = -1;
116 		}
117 		else
118 			return false;
119 	}
120 	else if (!isdigit(peek()))
121 		return false;
122 
123 	while (isdigit(peek()))
124 		val = val*10 + (get()-'0');
125 	val *= fac;
126 	return true;
127 }
128 
129 
parseUInt(unsigned & val)130 bool InputReader::parseUInt (unsigned &val) {
131 	val = 0;
132 	if (!isdigit(peek()))
133 		return false;
134 	while (isdigit(peek()))
135 		val = val*10 + (get()-'0');
136 	return true;
137 }
138 
139 
parseInt(int base,int & val)140 bool InputReader::parseInt (int base, int &val) {
141 	if (base < 2 || base > 32)
142 		return false;
143 
144 	const char *digits = "0123456789abcdefghijklmnopqrstuvwxyz";
145 	const char maxdigit = digits[base-1];
146 	char c;
147 	if (!isalnum(c = tolower(peek())) || c > maxdigit)
148 		return false;
149 
150 	val = 0;
151 	while (isalnum(c = tolower(peek())) && c <= maxdigit) {
152 		get();
153 		int digit = c - (c <= '9' ? '0' : 'a'-10);
154 		val = val*base + digit;
155 	}
156 	return true;
157 }
158 
159 
160 /** Reads a double from the buffer. All characters that are part of
161  *  the read double constant are skipped. If this function returns false,
162  *  the buffer pointer points to the same position as before the function call.
163  *  @param[out] val contains the read double value on success
164  *  @return number details: 0=no number, 'i'=integer, 'f'=floating point number */
parseDouble(double & val)165 char InputReader::parseDouble (double &val) {
166 	int fac=1;
167 	int int_part=0;
168 	bool is_float = false;
169 	skipSpace();
170 	char sign = peek();
171 	if (parseInt(int_part)) { // match [+-]?[0-9]+\.?
172 		if (peek() == '.') {
173 			get();
174 			is_float = true;
175 		}
176 		if (int_part < 0 || sign == '-') {
177 			fac = -1;
178 			int_part = -int_part;
179 		}
180 	}
181 	else {  // match [+-]?\.
182 		char sign;   // explicitly given sign
183 		if ((sign = peek()) == '+' || sign == '-') { // match [+-]?\.[0-9]
184 			if (peek(1) != '.' || !isdigit(peek(2)))
185 				return 0;
186 			if (sign == '-')
187 				fac = -1;
188 			skip(2);  // skip sign and dot
189 		}
190 		else if (peek() == '.' && isdigit(peek(1)))
191 			get();
192 		else
193 			return 0;
194 		is_float = true;
195 	}
196 	// parse fractional part
197 	double frac_part=0.0;
198 	for (double u=10; isdigit(peek()); u*=10)
199 		frac_part += (get()-'0')/u;
200 	val = (int_part + frac_part) * fac;
201 	// parse exponent
202 	char c;
203 	if (tolower(peek()) == 'e' && (isdigit(c=peek(1)) || ((c == '+' || c == '-') && isdigit(peek(2))))) {
204 		get(); // skip 'e'
205 		int exp;
206 		parseInt(exp);
207 		val *= pow(10.0, exp);
208 		is_float = true;
209 	}
210 	return is_float ? 'f' : 'i';
211 }
212 
213 
214 /** Reads an integer value from the buffer. If no valid integer constant
215  *  could be found at the current position 0 is returned. */
getInt()216 int InputReader::getInt () {
217 	skipSpace();
218 	int val;
219 	return parseInt(val) ? val : 0;
220 }
221 
222 
223 /** Reads an double value from the buffer. If no valid double constant
224  *  could be found at the current position 0 is returned. */
getDouble()225 double InputReader::getDouble () {
226 	skipSpace();
227 	double val;
228 	return parseDouble(val) ? val : 0.0;
229 }
230 
231 
232 /** Reads a string that consists of alphabetic letters only. Reading stops as
233  *  soon as a non-alphabetic character is found or EOF is reached. */
getWord()234 string InputReader::getWord () {
235 	string ret;
236 	skipSpace();
237 	while (isalpha(peek()))
238 		ret += get();
239 	return ret;
240 }
241 
242 
243 /** Reads a single punctuation character.
244  *  @return the read character or 0 if there's no punctuation character at the current position */
getPunct()245 char InputReader::getPunct () {
246 	skipSpace();
247 	if (ispunct(peek()))
248 		return get();
249 	return 0;
250 }
251 
252 
253 /** Reads a string delimited by a given quotation character.
254  *  Before reading the string, all leading whitespace is skipped. Then, the function checks
255  *  for the given quotation character. If it is found, all characters until the second
256  *  appearance of the quotation char are appended to the result. Otherwise, an empty string
257  *  is returned. If the quotation character is 0, the behavior of this function is identical to
258  *  a call of getString().
259  *  @param[in] quotechar the quotation character bounding the string to be read
260  *  @return the string read */
getQuotedString(char quotechar)261 string InputReader::getQuotedString (char quotechar) {
262 	if (quotechar == 0)
263 		return getString();
264 
265 	string ret;
266 	skipSpace();
267 	if (peek() == quotechar) {
268 		get();
269 		while (!eof() && peek() != quotechar)
270 			ret += get();
271 		get();
272 	}
273 	return ret;
274 }
275 
276 
277 /** Reads a string delimited by whitespace and/or invisible characters.
278  *  Before reading the string, all leading whitespace is skipped. Then, the function adds
279  *  all printable characters to the result until a whitespace, an unprintable character, or
280  *  EOF is found.
281  *  @return the string read */
getString()282 string InputReader::getString () {
283 	string ret;
284 	skipSpace();
285 	while (!eof() && !isspace(peek()) && isprint(peek()))
286 		ret += get();
287 	return ret;
288 }
289 
290 
291 /** Reads a given number of characters and returns the resulting string.
292  *  @param n number of character to read
293  *  @return the string read */
getString(size_t n)294 string InputReader::getString (size_t n) {
295 	string ret;
296 	while (n-- > 0)
297 		ret += get();
298 	return ret;
299 }
300 
301 
getLine()302 string InputReader::getLine () {
303 	string ret;
304 	skipSpace();
305 	while (!eof() && peek() > 0 && peek() != '\n')
306 		ret += get();
307 	// trim trailing whitespace
308 	ret.erase(std::find_if(ret.rbegin(), ret.rend(), not1(ptr_fun<int, int>(isspace))).base(), ret.end());
309 	return ret;
310 }
311 
312 
313 /** Parses a sequence of key-value pairs of the form KEY=VALUE or KEY="VALUE"
314  *  @param[out] attr the scanned atributes
315  *  @param[in] quotechar quote character used to enclose the attribute values
316  *  @return number of attributes scanned */
parseAttributes(map<string,string> & attr,char quotechar)317 int InputReader::parseAttributes (map<string,string> &attr, char quotechar) {
318 	bool ready=false;
319 	while (!eof() && !ready) {
320 		string key;
321 		skipSpace();
322 		while (isalnum(peek()))
323 			key += get();
324 		skipSpace();
325 		if (peek() == '=') {
326 			get();
327 			skipSpace();
328 			string val = getQuotedString(quotechar);
329 			attr[key] = val;
330 		}
331 		else
332 			ready = true;
333 	}
334 	return attr.size();
335 }
336 
337 //////////////////////////////////////////
338 
339 
peek(size_t n) const340 int StreamInputReader::peek (size_t n) const {
341 	if (n == 0)
342 		return peek();
343 	vector<char> chars(n);
344 	_is.read(&chars[0], n);
345 	int ret = peek();
346 	for (int i=n-1; i >= 0; i--)
347 		_is.putback(chars[i]);
348 	return ret;
349 }
350