1 /* Copyright (C) 2001-2006 Artifex Software, Inc.
2    All Rights Reserved.
3 
4    This software is provided AS-IS with no warranty, either express or
5    implied.
6 
7    This software is distributed under license and may not be copied, modified
8    or distributed except as expressly authorized under the terms of that
9    license.  Refer to licensing information at http://www.artifex.com/
10    or contact Artifex Software, Inc.,  7 Mt. Lassen Drive - Suite A-134,
11    San Rafael, CA  94903, U.S.A., +1(415)492-9861, for further information.
12 */
13 
14 /* $Id: iscannum.c 10571 2009-12-30 17:30:30Z alexcher $ */
15 /* Number scanner for Ghostscript interpreter */
16 #include "math_.h"
17 #include "ghost.h"
18 #include "ierrors.h"
19 #include "scommon.h"
20 #include "iscan.h"
21 #include "iscannum.h"		/* defines interface */
22 #include "scanchar.h"
23 #include "store.h"
24 
25 /*
26  * Warning: this file has a "spaghetti" control structure.  But since this
27  * code accounts for over 10% of the execution time of some PostScript
28  * files, this is one of the few places we feel this is justified.
29  */
30 
31 /*
32  * Scan a number.  If the number consumes the entire string, return 0;
33  * if not, set *psp to the first character beyond the number and return 1.
34  */
35 int
scan_number(const byte * str,const byte * end,int sign,ref * pref,const byte ** psp,int scanner_options)36 scan_number(const byte * str, const byte * end, int sign,
37 	    ref * pref, const byte ** psp, int scanner_options)
38 {
39     const byte *sp = str;
40 #define GET_NEXT(cvar, sp, end_action)\
41   if (sp >= end) { end_action; } else cvar = *sp++
42 
43     /*
44      * Powers of 10 up to 6 can be represented accurately as
45      * a single-precision float.
46      */
47 #define NUM_POWERS_10 6
48     static const float powers_10[NUM_POWERS_10 + 1] = {
49 	1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6
50     };
51     static const double neg_powers_10[NUM_POWERS_10 + 1] = {
52 	1e0, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6
53     };
54 
55     int ival;
56     double dval;
57     int exp10;
58     int code = 0;
59     int c, d;
60     uint max_scan; /* max signed or unsigned int */
61     const byte *const decoder = scan_char_decoder;
62 #define IS_DIGIT(d, c)\
63   ((d = decoder[c]) < 10)
64 #define WOULD_OVERFLOW(val, d, maxv)\
65   (val >= maxv / 10 && (val > maxv / 10 || d > (int)(maxv % 10)))
66 
67     GET_NEXT(c, sp, return_error(e_syntaxerror));
68     if (!IS_DIGIT(d, c)) {
69 	if (c != '.')
70 	    return_error(e_syntaxerror);
71 	/* Might be a number starting with '.'. */
72 	GET_NEXT(c, sp, return_error(e_syntaxerror));
73 	if (!IS_DIGIT(d, c))
74 	    return_error(e_syntaxerror);
75 	ival = 0;
76 	goto i2r;
77     }
78     /* Accumulate an integer in ival. */
79     /* Do up to 4 digits without a loop, */
80     /* since we know this can't overflow and since */
81     /* most numbers have 4 (integer) digits or fewer. */
82     ival = d;
83     if (end - sp >= 3) {	/* just check once */
84 	if (!IS_DIGIT(d, (c = *sp))) {
85 	    sp++;
86 	    goto ind;
87 	}
88 	ival = ival * 10 + d;
89 	if (!IS_DIGIT(d, (c = sp[1]))) {
90 	    sp += 2;
91 	    goto ind;
92 	}
93 	ival = ival * 10 + d;
94 	sp += 3;
95 	if (!IS_DIGIT(d, (c = sp[-1])))
96 	    goto ind;
97 	ival = ival * 10 + d;
98     }
99     max_scan = scanner_options & SCAN_PDF_UNSIGNED && sign >= 0 ? ~0 : max_int;
100     for (;; ival = ival * 10 + d) {
101 	GET_NEXT(c, sp, goto iret);
102 	if (!IS_DIGIT(d, c))
103 	    break;
104         if (WOULD_OVERFLOW(((unsigned)ival), d, max_scan)) {
105 	    if (ival == max_int / 10 && d == (max_int % 10) + 1 && sign < 0) {
106 		GET_NEXT(c, sp, c = EOFC);
107 		dval = -(double)min_int;
108 		if (c == 'e' || c == 'E') {
109 		    exp10 = 0;
110 		    goto fs;
111 		} else if (c == '.') {
112                     GET_NEXT(c, sp, c = EOFC);
113 		    exp10 = 0;
114 		    goto fd;
115                 } else if (!IS_DIGIT(d, c)) {
116 		    ival = min_int;
117 		    break;
118 		}
119 	    } else
120 		dval = ival;
121 	    goto l2d;
122         }
123     }
124   ind:				/* We saw a non-digit while accumulating an integer in ival. */
125     switch (c) {
126 	case '.':
127 	    GET_NEXT(c, sp, c = EOFC);
128 	    goto i2r;
129 	default:
130 	    *psp = sp;
131 	    code = 1;
132             break;
133         case EOFC:
134 	    break;
135 	case 'e':
136 	case 'E':
137 	    if (sign < 0)
138 		ival = -ival;
139 	    dval = ival;
140 	    exp10 = 0;
141 	    goto fe;
142 	case '#':
143 	    {
144 		const int radix = ival;
145 		uint uval = 0, imax;
146 
147 		if (sign || radix < min_radix || radix > max_radix)
148 		    return_error(e_syntaxerror);
149 		/* Avoid multiplies for power-of-2 radix. */
150 		if (!(radix & (radix - 1))) {
151 		    int shift;
152 
153 		    switch (radix) {
154 			case 2:
155 			    shift = 1, imax = max_uint >> 1;
156 			    break;
157 			case 4:
158 			    shift = 2, imax = max_uint >> 2;
159 			    break;
160 			case 8:
161 			    shift = 3, imax = max_uint >> 3;
162 			    break;
163 			case 16:
164 			    shift = 4, imax = max_uint >> 4;
165 			    break;
166 			case 32:
167 			    shift = 5, imax = max_uint >> 5;
168 			    break;
169 			default:	/* can't happen */
170 			    return_error(e_rangecheck);
171 		    }
172 		    for (;; uval = (uval << shift) + d) {
173 			GET_NEXT(c, sp, break);
174 			d = decoder[c];
175 			if (d >= radix) {
176 			    *psp = sp;
177 			    code = 1;
178 			    break;
179 			}
180 			if (uval > imax)
181 			    return_error(e_limitcheck);
182 		    }
183 		} else {
184 		    int irem = max_uint % radix;
185 
186 		    imax = max_uint / radix;
187 		    for (;; uval = uval * radix + d) {
188 			GET_NEXT(c, sp, break);
189 			d = decoder[c];
190 			if (d >= radix) {
191 			    *psp = sp;
192 			    code = 1;
193 			    break;
194 			}
195 			if (uval >= imax &&
196 			    (uval > imax || d > irem)
197 			    )
198 			    return_error(e_limitcheck);
199 		    }
200 		}
201 		make_int(pref, uval);
202 		return code;
203 	    }
204     }
205 iret:
206     make_int(pref, (sign < 0 ? -ival : ival));
207     return code;
208 
209     /* Accumulate a double in dval. */
210 l2d:
211     exp10 = 0;
212     for (;;) {
213 	dval = dval * 10 + d;
214 	GET_NEXT(c, sp, c = EOFC);
215 	if (!IS_DIGIT(d, c))
216 	    break;
217     }
218     switch (c) {
219 	case '.':
220 	    GET_NEXT(c, sp, c = EOFC);
221 	    exp10 = 0;
222 	    goto fd;
223 	default:
224 	    *psp = sp;
225 	    code = 1;
226 	    /* falls through */
227 	case EOFC:
228 	    if (sign < 0)
229 		dval = -dval;
230 	    goto rret;
231 	case 'e':
232 	case 'E':
233 	    exp10 = 0;
234 	    goto fs;
235 	case '#':
236 	    return_error(e_syntaxerror);
237     }
238 
239     /* We saw a '.' while accumulating an integer in ival. */
240 i2r:
241     exp10 = 0;
242     while (IS_DIGIT(d, c) || c == '-') {
243 	/*
244 	 * PostScript gives an error on numbers with a '-' following a '.'
245 	 * Adobe Acrobat Reader (PDF) apparently doesn't treat this as an
246 	 * error. Experiments show that the numbers following the '-' are
247 	 * ignored, so we swallow the fractional part. SCAN_PDF_INV_NUM
248 	 *  enables this compatibility kloodge.
249 	 */
250 	if (c == '-') {
251 	    if ((SCAN_PDF_INV_NUM & scanner_options) == 0)
252 		break;
253 	    do {
254 		GET_NEXT(c, sp, c = EOFC);
255 	    } while (IS_DIGIT(d, c));
256 	    break;
257 	}
258 	if (WOULD_OVERFLOW(ival, d, max_int)) {
259 	    dval = ival;
260 	    goto fd;
261 	}
262 	ival = ival * 10 + d;
263 	exp10--;
264 	GET_NEXT(c, sp, c = EOFC);
265     }
266     if (sign < 0)
267 	ival = -ival;
268     /* Take a shortcut for the common case */
269     if (!(c == 'e' || c == 'E' || exp10 < -NUM_POWERS_10)) {	/* Check for trailing garbage */
270 	if (c != EOFC)
271 	    *psp = sp, code = 1;
272 	make_real(pref, ival * neg_powers_10[-exp10]);
273 	return code;
274     }
275     dval = ival;
276     goto fe;
277 
278     /* Now we are accumulating a double in dval. */
279 fd:
280     while (IS_DIGIT(d, c)) {
281 	dval = dval * 10 + d;
282 	exp10--;
283 	GET_NEXT(c, sp, c = EOFC);
284     }
285 fs:
286     if (sign < 0)
287 	dval = -dval;
288 fe:
289     /* Now dval contains the value, negated if necessary. */
290     switch (c) {
291 	case 'e':
292 	case 'E':
293 	    {			/* Check for a following exponent. */
294 		int esign = 0;
295 		int iexp;
296 
297 		GET_NEXT(c, sp, return_error(e_syntaxerror));
298 		switch (c) {
299 		    case '-':
300 			esign = 1;
301 		    case '+':
302 			GET_NEXT(c, sp, return_error(e_syntaxerror));
303 		}
304 		/* Scan the exponent.  We limit it arbitrarily to 999. */
305 		if (!IS_DIGIT(d, c))
306 		    return_error(e_syntaxerror);
307 		iexp = d;
308 		for (;; iexp = iexp * 10 + d) {
309 		    GET_NEXT(c, sp, break);
310 		    if (!IS_DIGIT(d, c)) {
311 			*psp = sp;
312 			code = 1;
313 			break;
314 		    }
315 		    if (iexp > 99)
316 			return_error(e_limitcheck);
317 		}
318 		if (esign)
319 		    exp10 -= iexp;
320 		else
321 		    exp10 += iexp;
322 		break;
323 	    }
324 	default:
325 	    *psp = sp;
326 	    code = 1;
327 	case EOFC:
328 	    ;
329     }
330     /* Compute dval * 10^exp10. */
331     if (exp10 > 0) {
332 	while (exp10 > NUM_POWERS_10)
333 	    dval *= powers_10[NUM_POWERS_10],
334 		exp10 -= NUM_POWERS_10;
335 	if (exp10 > 0)
336 	    dval *= powers_10[exp10];
337     } else if (exp10 < 0) {
338 	while (exp10 < -NUM_POWERS_10)
339 	    dval /= powers_10[NUM_POWERS_10],
340 		exp10 += NUM_POWERS_10;
341 	if (exp10 < 0)
342 	    dval /= powers_10[-exp10];
343     }
344     /*
345      * Check for an out-of-range result.  Currently we don't check for
346      * absurdly large numbers of digits in the accumulation loops,
347      * but we should.
348      */
349     if (dval >= 0) {
350 	if (dval > MAX_FLOAT)
351 	    return_error(e_limitcheck);
352     } else {
353 	if (dval < -MAX_FLOAT)
354 	    return_error(e_limitcheck);
355     }
356 rret:
357     make_real(pref, dval);
358     return code;
359 }
360