1 /* $XConsortium: token.c,v 1.2 91/10/10 11:19:55 rws Exp $ */
2 /* Copyright International Business Machines,Corp. 1991
3 * All Rights Reserved
4 *
5 * License to use, copy, modify, and distribute this software
6 * and its documentation for any purpose and without fee is
7 * hereby granted, provided that the above copyright notice
8 * appear in all copies and that both that copyright notice and
9 * this permission notice appear in supporting documentation,
10 * and that the name of IBM not be used in advertising or
11 * publicity pertaining to distribution of the software without
12 * specific, written prior permission.
13 *
14 * IBM PROVIDES THIS SOFTWARE "AS IS", WITHOUT ANY WARRANTIES
15 * OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, BUT NOT
16 * LIMITED TO ANY IMPLIED WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, AND NONINFRINGEMENT OF
18 * THIRD PARTY RIGHTS. THE ENTIRE RISK AS TO THE QUALITY AND
19 * PERFORMANCE OF THE SOFTWARE, INCLUDING ANY DUTY TO SUPPORT
20 * OR MAINTAIN, BELONGS TO THE LICENSEE. SHOULD ANY PORTION OF
21 * THE SOFTWARE PROVE DEFECTIVE, THE LICENSEE (NOT IBM) ASSUMES
22 * THE ENTIRE COST OF ALL SERVICING, REPAIR AND CORRECTION. IN
23 * NO EVENT SHALL IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR
24 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
25 * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
26 * CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
27 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
28 * SOFTWARE.
29 */
30 /* Authors: Sig Nin & Carol Thompson IBM Almaden Research Laboratory */
31 #include "types.h"
32 #include "t1stdio.h"
33 #include "util.h"
34 #include "digit.h"
35 #include "token.h"
36 #include "tokst.h"
37 #include "hdigit.h"
38 /*
39 * -------------------------------------------------------------------
40 * Globals
41 * -------------------------------------------------------------------
42 */
43
44 /* These variables are set by the caller */
45 char *tokenStartP; /* Pointer to token buffer in VM */
46 char *tokenMaxP; /* Pointer to last byte in buffer + 1 */
47
48 /* These variables are set by TOKEN */
49 int tokenLength; /* Characters in token */
50 boolean tokenTooLong; /* Token too long for buffer */
51 int tokenType; /* Type of token identified */
52 psvalue tokenValue; /* Token value */
53
54 /*
55 * -------------------------------------------------------------------
56 * Private variables
57 * -------------------------------------------------------------------
58 */
59
60 static FILE *inputFileP; /* Current input file */
61
62
63 /* Token */
64 static char *tokenCharP; /* Pointer to next character in token */
65
66 /*
67 * -------------------------------------------------------------------
68 * Private routines for manipulating numbers
69 * -------------------------------------------------------------------
70 */
71
72 #define Exp10(e) \
73 ((e) == 0\
74 ? (DOUBLE)(1.0)\
75 : (-64 <= (e) && (e) <= 63\
76 ? Exp10T[(e)+64]\
77 : P10(e)\
78 )\
79 )
80
81 static DOUBLE Exp10T[128] = {
82 1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57,
83 1e-56, 1e-55, 1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49,
84 1e-48, 1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41,
85 1e-40, 1e-39, 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33,
86 1e-32, 1e-31, 1e-30, 1e-29, 1e-28, 1e-27, 1e-26, 1e-25,
87 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17,
88 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9,
89 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1,
90 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7,
91 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15,
92 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 1e23,
93 1e24, 1e25, 1e26, 1e27, 1e28, 1e29, 1e30, 1e31,
94 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39,
95 1e40, 1e41, 1e42, 1e43, 1e44, 1e45, 1e46, 1e47,
96 1e48, 1e49, 1e50, 1e51, 1e52, 1e53, 1e54, 1e55,
97 1e56, 1e57, 1e58, 1e59, 1e60, 1e61, 1e62, 1e63
98 };
99
P10(int32_t exponent)100 static DOUBLE P10(int32_t exponent)
101 {
102 DOUBLE value, power;
103
104 if (exponent < 0) {
105 power = 0.1;
106 value = (exponent & 1 ? power : 1.0);
107 exponent = -((exponent + 1) >> 1); /* portable C for -(exponent/2) */
108 }
109 else {
110 power = 10.0;
111 value = (exponent & 1 ? power : 1.0);
112 exponent = exponent >> 1;
113 }
114
115 while(exponent > 0) {
116 power *= power;
117 if (exponent & 1)
118 value *= power;
119 exponent >>= 1;
120 }
121
122 return(value);
123 }
124
125 /*
126 * -------------------------------------------------------------------
127 * Private routines and macros for manipulating the input
128 * -------------------------------------------------------------------
129 */
130
131 /* Get next character from the input --
132 *
133 */
134 #define next_ch() (getc(inputFileP))
135
136 /* Push a character back into the input --
137 *
138 * Ungetc of EOF will fail, but that's ok: the next getc will
139 * return EOF.
140 *
141 * NOTE: These macros are presently written to return the character
142 * pushed, or EOF if none was pushed. However, they are not
143 * required to return anything in particular, and callers should
144 * not rely on the returned value.
145 */
146 #define back_ch(ch) (ungetc(ch, inputFileP))
147
148 /* Push a character back into the input if it was not white space.
149 * If it is a carriage return (\r) then check next char for
150 * linefeed and consume them both, otherwise put next char back.
151 *
152 */
153 #define back_ch_not_white(ch) \
154 (\
155 isWHITE_SPACE(ch)\
156 ? ((ch == '\r')\
157 ? (((ch = next_ch()) == '\n')\
158 ? EOF\
159 : back_ch(ch)\
160 )\
161 : EOF\
162 )\
163 : back_ch(ch)\
164 )
165
166 /*
167 * -------------------------------------------------------------------
168 * Private routines and macros for manipulating the token buffer
169 * -------------------------------------------------------------------
170 */
171
172 /* Add a character to the token
173 * ---- use ONLY when you KNOW that this character will
174 * be stored within the token buffer.
175 */
176 #define save_unsafe_ch(ch) (*tokenCharP++ = ch)
177
178 /* Add a character to the token, if not too long to fit */
179 #define save_ch(ch) \
180 ((tokenCharP < tokenMaxP)\
181 ? save_unsafe_ch(ch)\
182 : (tokenTooLong = TRUE)\
183 )
184
185 /*
186 * -------------------------------------------------------------------
187 * Action Routines
188 *
189 * These routines all
190 * -- take int ch as a parameter
191 * -- return int ch if no token was recognized, DONE otherwise
192 * -- leave the next character in the input, if returning DONE
193 * -------------------------------------------------------------------
194 */
195
196 #define DONE (256)
197
198 /* Get the next input character */
next_char(int ch)199 static int next_char(int ch)
200 {
201 return(next_ch());
202 }
203
204 /* Add character to token */
add_char(int ch)205 static int add_char(int ch)
206 {
207 save_ch(ch);
208 return(next_ch());
209 }
210
211
212 /* -------------------------------------------------------------------
213 * Skip white space and comments
214 */
215
216 /* Skip white space */
skip_space(int ch)217 static int skip_space(int ch)
218 {
219 do {
220 ch = next_ch();
221 } while(isWHITE_SPACE(ch));
222 return(ch);
223 }
224
225 /* Skip comments */
skip_comment(int ch)226 static int skip_comment(int ch)
227 {
228 do {
229 ch = next_ch();
230 } while(isCOMMENT(ch));
231 return(ch);
232 }
233
234 /* -------------------------------------------------------------------
235 * Collect value elements for a number
236 */
237
238 /* decimal integer or real number mantissa */
239 static int m_sign;
240 static int32_t m_value;
241 static int32_t m_scale;
242
243 /* real number exponent */
244 static int e_sign;
245 static int32_t e_value;
246 static int32_t e_scale;
247
248 /* radix number */
249 static int32_t r_base;
250 static int32_t r_value;
251 static int32_t r_scale;
252
add_sign(int ch)253 static int add_sign(int ch)
254 {
255 m_sign = ch;
256 save_unsafe_ch(ch);
257 return(next_ch());
258 }
259
add_1st_digits(int ch)260 static int add_1st_digits(int ch)
261 {
262 m_sign = '+';
263 return(add_digits(ch));
264 }
265
add_digits(int ch)266 static int add_digits(int ch)
267 {
268 int32_t value, p_value, scale;
269 int digit;
270
271 /* On entry, expect m_sign to be set to '+' or '-';
272 * ch is a decimal digit.
273 * Expect at most one character saved at this point,
274 * a sign. This routine will save up to 10 more
275 * characters without checking the buffer boundary.
276 */
277
278 value = ch - '0';
279 save_unsafe_ch(ch);
280 ch = next_ch();
281
282 while(isDECIMAL_DIGIT(ch) && value < (MAX_INTEGER/10)) {
283 value = (value << 3) + (value << 1) + (ch - '0');
284 save_unsafe_ch(ch);
285 ch = next_ch();
286 }
287
288 /* Quick exit for small integers --
289 * |x| <= 10*((MAX_INTEGER/10)-1)+9
290 * |x| <= 2,147,483,639 for 32 bit integers
291 */
292 if (isNUMBER_ENDER(ch)) {
293 back_ch_not_white(ch);
294 tokenValue.integer = (m_sign == '-' ? -value : value);
295 tokenType = TOKEN_INTEGER;
296 return(DONE);
297 }
298
299 /* Handle additional digits. Beyond the boundary case,
300 * 10*(MAX_INTEGER/10) <= |number| <= MAX_INTEGER
301 * just count the digits: the number is too large to
302 * represent as an integer and will be returned as a real.
303 * The mantissa of a real holds fewer bits than an integer.
304 */
305 p_value = value;
306 value = (m_sign == '-' ? -value : value);
307 scale = 0;
308
309 if (isDECIMAL_DIGIT(ch)) {
310
311 /* Handle the boundary case */
312 if (p_value == (MAX_INTEGER/10)) {
313 digit = ch - '0';
314
315 /* Must handle positive and negative values separately */
316 /* for 2's complement arithmetic */
317 if (value > 0) {
318 if (digit <= MAX_INTEGER%10)
319 value = (value << 3) + (value << 1) + digit;
320 else
321 ++scale; /* Too big, just count it */
322 }
323 else {
324 /* Use positive % operands for portability */
325 if (digit <= -(MIN_INTEGER+10)%10)
326 value = (value << 3) + (value << 1) - digit;
327 else
328 ++scale; /* Too big, just count it */
329 }
330 }
331 else
332 ++scale; /* Not boundary case, just count digit */
333
334 save_unsafe_ch(ch);
335 ch = next_ch();
336
337 /* Continue scanning digits, but can't store them */
338 while(isDECIMAL_DIGIT(ch)) {
339 ++scale;
340 save_ch(ch);
341 ch = next_ch();
342 }
343 }
344
345 /* Continue from here scanning radix integer or real */
346 m_value = value;
347 m_scale = scale;
348
349 /* Initialize for possible real */
350 e_sign = '+';
351 e_value = 0;
352 e_scale = 0;
353
354 return(ch);
355 }
356
add_1st_decpt(int ch)357 static int add_1st_decpt(int ch)
358 {
359 m_sign = '+';
360 return(add_decpt(ch));
361 }
362
add_decpt(int ch)363 static int add_decpt(int ch)
364 {
365 /* On entry, expect m_sign to be set to '+' or '-' */
366 m_value = 0;
367 m_scale = 0;
368 save_unsafe_ch(ch);
369 return(next_ch());
370 }
371
add_fraction(int ch)372 static int add_fraction(int ch)
373 {
374 int32_t value, scale;
375 int digit;
376
377 /* On entry, expect m_value and m_scale to be initialized,
378 * and m_sign to be set to '+' or '-'. Expect m_value and m_sign
379 * to be consistent (this is not checked).
380 */
381 value = m_value;
382 scale = m_scale;
383
384 /* Scan leading zeroes */
385 if (value == 0) {
386 while(ch == '0') {
387 --scale;
388 save_ch(ch);
389 ch = next_ch();
390 }
391
392 /* Scan first significant digit */
393 if (isDECIMAL_DIGIT(ch)) {
394 --scale;
395 value = ch - '0';
396 value = (m_sign == '-' ? -value : value);
397 save_ch(ch);
398 ch = next_ch();
399 }
400 else
401 /* no significant digits -- number is zero */
402 scale = 0;
403 }
404 /* value != 0 || value == 0 && !isDECIMAL_DIGIT(ch) */
405
406 /* Scan additional significant digits */
407 if (isDECIMAL_DIGIT(ch)) {
408 if (value > 0) {
409 while(isDECIMAL_DIGIT(ch) && value < (MAX_INTEGER/10)) {
410 --scale;
411 value = (value << 3) + (value << 1) + (ch - '0');
412 save_ch(ch);
413 ch = next_ch();
414 }
415 /* Check boundary case */
416 if (isDECIMAL_DIGIT(ch) && value == (MAX_INTEGER/10)) {
417 digit = ch - '0';
418 if (digit <= MAX_INTEGER%10) {
419 --scale;
420 value = (value << 3) + (value << 1) + digit;
421 save_ch(ch);
422 ch = next_ch();
423 }
424 }
425 }
426 else {
427 /* value < 0 */
428 while(isDECIMAL_DIGIT(ch) && value > -(-(MIN_INTEGER+10)/10+1)) {
429 /* Use positive / operands for portability */
430 --scale;
431 value = (value << 3) + (value << 1) - (ch - '0');
432 save_ch(ch);
433 ch = next_ch();
434 }
435 /* Check boundary case */
436 if (isDECIMAL_DIGIT(ch)
437 && value == -(-(MIN_INTEGER+10)/10+1)) {
438 digit = ch - '0';
439 if (digit <= -(MIN_INTEGER+10)%10) {
440 /* Use positive % operands for portability */
441 --scale;
442 value = (value << 3) + (value << 1) - digit;
443 save_ch(ch);
444 ch = next_ch();
445 }
446 }
447 }
448
449 /* Additional digits can be discarded */
450 while(isDECIMAL_DIGIT(ch)) {
451 save_ch(ch);
452 ch = next_ch();
453 }
454 }
455
456 /* Store results */
457 m_value = value;
458 m_scale = scale;
459
460 /* Initialize for possible real */
461 e_sign = '+';
462 e_value = 0;
463 e_scale = 0;
464
465 return(ch);
466 }
467
add_e_sign(int ch)468 static int add_e_sign(int ch)
469 {
470 e_sign = ch;
471 save_ch(ch);
472 return(next_ch());
473 }
474
add_exponent(int ch)475 static int add_exponent(int ch)
476 {
477 int32_t value, p_value;
478 int32_t scale = 0;
479 int digit;
480
481 /* On entry, expect e_sign to be set to '+' or '-' */
482
483 value = ch - '0';
484 save_ch(ch);
485 ch = next_ch();
486
487 while(isDECIMAL_DIGIT(ch) && value < (MAX_INTEGER/10)) {
488 value = (value << 3) + (value << 1) + (ch - '0');
489 save_ch(ch);
490 ch = next_ch();
491 }
492
493 p_value = value;
494 value = (e_sign == '-' ? -value : value);
495
496 /* Handle additional digits. Beyond the boundary case,
497 * 10*(MAX_INTEGER/10) <= |number| <= MAX_INTEGER
498 * just count the digits: the number is too large to
499 * represent as an integer.
500 */
501 if (isDECIMAL_DIGIT(ch)) {
502
503 /* Examine boundary case */
504 if (p_value == (MAX_INTEGER/10)) {
505 digit = ch - '0';
506
507 /* Must handle positive and negative values separately */
508 /* for 2's complement arithmetic */
509 if (value > 0) {
510 if (digit <= MAX_INTEGER%10)
511 value = (value << 3) + (value << 1) + digit;
512 else
513 ++scale; /* Too big, just count it */
514 }
515 else {
516 /* Use positive % operands for portability */
517 if (digit <= -(MIN_INTEGER+10)%10)
518 value = (value << 3) + (value << 1) - digit;
519 else
520 ++scale; /* Too big, just count it */
521 }
522 }
523 else
524 ++scale; /* Not boundary case, just count digit */
525
526 save_ch(ch);
527 ch = next_ch();
528
529 /* Continue scanning digits, but can't store any more */
530 while(isDECIMAL_DIGIT(ch)) {
531 ++scale;
532 save_ch(ch);
533 ch = next_ch();
534 }
535 }
536
537 /* Store results */
538 e_value = value;
539 e_scale = scale;
540
541 return(ch);
542 }
543
add_radix(int ch)544 static int add_radix(int ch)
545 {
546 if (2 <= m_value && m_value <= 36 && m_scale == 0) {
547 r_base = m_value;
548 save_ch(ch);
549 return(next_ch());
550 }
551 else {
552 /* Radix invalid, complete a name token */
553 return(AAH_NAME(ch));
554 }
555 }
556
add_r_digits(int ch)557 static int add_r_digits(int ch)
558 {
559 uint32_t value;
560 int32_t radix, scale;
561 int digit;
562
563 /* NOTE: The syntax of a radix number allows only for
564 * values of zero or more. The value will be stored as
565 * a 32 bit integer, which PostScript then interprets
566 * as signed. This means, for example, that the numbers:
567 *
568 * 8#37777777777
569 * 10#4294967295
570 * 16#FFFFFFFF
571 * 36#1Z141Z3
572 *
573 * are all interpreted as -1. This routine implements this
574 * idea explicitly: it accumulates the number's value
575 * as unsigned, then casts it to signed when done.
576 */
577
578 /* Expect r_base to be initialized */
579 radix = r_base;
580 value = 0;
581 scale = 0;
582
583 /* Scan leading zeroes */
584 while(ch == '0') {
585 save_ch(ch);
586 ch = next_ch();
587 }
588
589 /* Handle first non-zero digit */
590 if ((digit=digit_value[ch]) < radix) {
591 value = digit;
592 save_ch(ch);
593 ch = next_ch();
594
595 /* Add digits until boundary case reached */
596 while((digit=digit_value[ch]) < radix
597 && value < (MAX_INT32 / radix)) {
598 value = value * radix + digit;
599 save_ch(ch);
600 ch = next_ch();
601 };
602
603 /* Scan remaining digits */
604 if ((digit=digit_value[ch]) < radix) {
605
606 /* Examine boundary case ---
607 * radix*(MAX_INT32/radix) <= number <= MAX_INT32
608 */
609 if (value == (MAX_INT32/radix) && digit <= MAX_INT32%radix)
610 value = value * radix + digit;
611 else
612 ++scale;
613
614 /* Continue scanning digits, but can't store them */
615 save_ch(ch);
616 ch = next_ch();
617 while(digit_value[ch] < radix) {
618 ++scale;
619 save_ch(ch);
620 ch = next_ch();
621 }
622 }
623 }
624
625 /* Store result */
626 r_value = (int32_t) value; /* result is signed */
627 r_scale = scale;
628
629 return(ch);
630 }
631
632 /* -------------------------------------------------------------------
633 * Complete a number; set token type and done flag.
634 * Put current input character back, if it is not white space.
635 */
636
637 /* Done: Radix Number */
RADIX_NUMBER(int ch)638 static int RADIX_NUMBER(int ch)
639 {
640 back_ch_not_white(ch);
641 if (r_scale == 0) {
642 tokenValue.integer = r_value;
643 tokenType = TOKEN_INTEGER;
644 }
645 else {
646 tokenType = TOKEN_NAME;
647 }
648 return(DONE);
649 }
650
651 /* Done: Integer */
INTEGER(int ch)652 static int INTEGER(int ch)
653 {
654 back_ch_not_white(ch);
655 if (m_scale == 0) {
656 tokenValue.integer = m_value;
657 tokenType = TOKEN_INTEGER;
658 }
659 else {
660 tokenValue.real = (DOUBLE)(m_value) * Exp10(m_scale);
661 tokenType = TOKEN_REAL;
662 }
663 return(DONE);
664 }
665
666 /* Done: Real */
REAL(int ch)667 static int REAL(int ch)
668 {
669 DOUBLE temp;
670
671 back_ch_not_white(ch);
672
673 /* NOTE: ignore e_scale, since e_value alone will cause
674 * exponent overflow if e_scale > 0.
675 */
676
677 /* HAZARD: exponent overflow of intermediate result
678 * (e.g., in 370 floating point); this should not be a problem
679 * with IEEE floating point. Reduce exponent overflow hazard by
680 * combining m_scale and e_value first, if they have different signs,
681 * or multiplying m_value and one of the other factors, if both
682 * m_scale and e_value are negative.
683 */
684 if ((m_scale >= 0 && e_value <= 0)
685 || (m_scale <= 0 && e_value >= 0)) {
686 tokenValue.real = (DOUBLE)(m_value) * Exp10(m_scale + e_value);
687 }
688 else {
689 temp = (DOUBLE)(m_value) * Exp10(m_scale);
690 tokenValue.real = temp * Exp10(e_value);
691 }
692
693 tokenType = TOKEN_REAL;
694 return(DONE);
695 }
696
697
698 /* -------------------------------------------------------------------
699 * Assemble a hex string; set token type and done flag.
700 */
701
702 /* Done: Hex String */
HEX_STRING(int ch)703 static int HEX_STRING(int ch)
704 {
705 int value;
706
707 while(TRUE) {
708
709 /* Process odd digit */
710 ch = next_ch();
711 if (!isHEX_DIGIT(ch)) {
712
713 /* Skip white space */
714 while(isWHITE_SPACE(ch))
715 ch = next_ch();
716
717 /* Check for terminator */
718 if (!isHEX_DIGIT(ch)) {
719 break;
720 }
721 }
722 value = digit_value[ch] << 4;
723
724 /* Process even digit */
725 ch = next_ch();
726 if (!isHEX_DIGIT(ch)) {
727
728 /* Skip white space */
729 while(isWHITE_SPACE(ch))
730 ch = next_ch();
731
732 /* Check for terminator */
733 if (!isHEX_DIGIT(ch)) {
734 save_ch(value);
735 break;
736 }
737 }
738 save_ch(value + digit_value[ch]);
739 }
740
741 /* Classify result, based on why loop ended */
742 if (ch == '>')
743 tokenType = TOKEN_HEX_STRING;
744 else {
745 /* save the invalid character for error reporting */
746 save_ch(ch);
747 tokenType = TOKEN_INVALID;
748 }
749
750 return(DONE);
751 }
752
753 /* -------------------------------------------------------------------
754 * Assemble a string; set token type and done flag
755 */
756
757 /* Save a backslash-coded character in a string --
758 *
759 * Store the proper character for special cases
760 * "\b", "\f", "\n", "\r", and "\t".
761 *
762 * Decode and store octal-coded character, up to
763 * three octal digits, "\o", "\oo", and "\ooo".
764 *
765 * The sequence "\<newline>" is a line continuation,
766 * so consume both without storing anything.
767 *
768 * The sequence "\<EOF>" is an error; exit without
769 * storing anything and let the caller handle it.
770 *
771 * For other characters, including the sequences
772 * "\\", "\(", and "\)", simply store the second
773 * character.
774 */
save_digraph(int ch)775 static void save_digraph(int ch)
776 {
777 int value;
778
779 switch (ch) {
780
781 case 'b': /* backspace */
782 ch = '\b';
783 break;
784
785 case 'f': /* formfeed */
786 ch = '\f';
787 break;
788
789 case 'n': /* newline */
790 ch = '\n';
791 break;
792
793 case 'r': /* carriage return */
794 ch = '\r';
795 break;
796
797 case 't': /* horizontal tab */
798 ch = '\t';
799 break;
800
801 case '\n': /* line continuation -- consume it */
802 return;
803
804 case '\r': /* carriage return -- consume it */
805 ch = next_ch(); /* look at next character, is it \n? */
806 if (ch == '\n') return;
807 back_ch(ch); /* if not a line feed, then return it */
808 return;
809
810 case EOF: /* end of file -- forget it */
811 return;
812
813 default:
814 /* scan up to three octal digits to get value */
815 if (isOCTAL_DIGIT(ch)) {
816 value = digit_value[ch];
817 ch = next_ch();
818 if (isOCTAL_DIGIT(ch)) {
819 value = (value << 3) + digit_value[ch];
820 ch = next_ch();
821 if (isOCTAL_DIGIT(ch))
822 value = (value << 3) + digit_value[ch];
823 else
824 back_ch(ch);
825 }
826 else
827 back_ch(ch);
828 ch = value;
829 }
830 }
831
832 /* Found a character to save */
833 save_ch(ch);
834 }
835
836 /* Done: String */
STRING(int ch)837 static int STRING(int ch)
838 {
839 int nest_level = 1;
840
841 tokenType = TOKEN_STRING;
842
843 do {
844
845 ch = next_ch();
846 while(!isSTRING_SPECIAL(ch)) {
847 save_ch(ch);
848 ch = next_ch();
849 };
850
851 switch (ch) {
852
853 case '(':
854 ++nest_level;
855 save_ch(ch);
856 break;
857
858 case ')':
859 if (--nest_level > 0)
860 save_ch(ch);
861 break;
862
863 case '\\':
864 save_digraph(next_ch());
865 break;
866
867 case '\r':
868 /* All carriage returns (\r) are turned into linefeeds (\n)*/
869 ch = next_ch(); /* get the next one, is it \n? */
870 if (ch != '\n') { /* if not, then put it back. */
871 back_ch(ch);
872 }
873 save_ch('\n'); /* in either case, save a linefeed */
874 break;
875
876
877 case EOF:
878 tokenType = TOKEN_INVALID; /* Unterminated string */
879 nest_level = 0;
880 break;
881 }
882
883 } while(nest_level > 0);
884
885 return(DONE);
886 }
887
888
889 /* -------------------------------------------------------------------
890 * Assemble a name; set token type and done flag.
891 * Put current input character back, if it is not white space.
892 */
893
894 /* Done: Name
895 * (Safe version used to complete name tokens that
896 * start out looking like something else).
897 */
898
AAH_NAME(int ch)899 static int AAH_NAME(int ch)
900 {
901 do {
902 save_ch(ch);
903 ch = next_ch();
904 } while(isNAME(ch));
905
906 back_ch_not_white(ch);
907 tokenType = TOKEN_NAME;
908 return(DONE);
909 }
910
911 /* Done: Name */
NAME(int ch)912 static int NAME(int ch)
913 {
914 save_unsafe_ch(ch);
915 ch = next_ch();
916 if (isNAME(ch)) {
917 save_unsafe_ch(ch);
918 ch = next_ch();
919 if (isNAME(ch)) {
920 save_unsafe_ch(ch);
921 ch = next_ch();
922 if (isNAME(ch)) {
923 save_unsafe_ch(ch);
924 ch = next_ch();
925 if (isNAME(ch)) {
926 save_unsafe_ch(ch);
927 ch = next_ch();
928 if (isNAME(ch)) {
929 save_unsafe_ch(ch);
930 ch = next_ch();
931 if (isNAME(ch)) {
932 save_unsafe_ch(ch);
933 ch = next_ch();
934 while(isNAME(ch)) {
935 save_ch(ch);
936 ch = next_ch();
937 }
938 }
939 }
940 }
941 }
942 }
943 }
944
945 back_ch_not_white(ch);
946 tokenType = TOKEN_NAME;
947 return(DONE);
948 }
949
950 /* Done: Literal Name */
LITERAL_NAME(int ch)951 static int LITERAL_NAME(int ch)
952 {
953 if (isNAME(ch)) {
954 save_unsafe_ch(ch);
955 ch = next_ch();
956 if (isNAME(ch)) {
957 save_unsafe_ch(ch);
958 ch = next_ch();
959 if (isNAME(ch)) {
960 save_unsafe_ch(ch);
961 ch = next_ch();
962 if (isNAME(ch)) {
963 save_unsafe_ch(ch);
964 ch = next_ch();
965 if (isNAME(ch)) {
966 save_unsafe_ch(ch);
967 ch = next_ch();
968 if (isNAME(ch)) {
969 save_unsafe_ch(ch);
970 ch = next_ch();
971 while(isNAME(ch)) {
972 save_ch(ch);
973 ch = next_ch();
974 }
975 }
976 }
977 }
978 }
979 }
980 }
981
982 back_ch_not_white(ch);
983 tokenType = TOKEN_LITERAL_NAME;
984 return(DONE);
985 }
986
987 /* Done: immediate Name */
IMMED_NAME(int ch)988 static int IMMED_NAME(int ch)
989 {
990 ch = next_ch();
991 if (isNAME(ch)) {
992 save_unsafe_ch(ch);
993 ch = next_ch();
994 if (isNAME(ch)) {
995 save_unsafe_ch(ch);
996 ch = next_ch();
997 if (isNAME(ch)) {
998 save_unsafe_ch(ch);
999 ch = next_ch();
1000 if (isNAME(ch)) {
1001 save_unsafe_ch(ch);
1002 ch = next_ch();
1003 if (isNAME(ch)) {
1004 save_unsafe_ch(ch);
1005 ch = next_ch();
1006 if (isNAME(ch)) {
1007 save_unsafe_ch(ch);
1008 ch = next_ch();
1009 while(isNAME(ch)) {
1010 save_ch(ch);
1011 ch = next_ch();
1012 }
1013 }
1014 }
1015 }
1016 }
1017 }
1018 }
1019
1020 back_ch_not_white(ch);
1021 tokenType = TOKEN_IMMED_NAME;
1022 return(DONE);
1023 }
1024
1025 /* Done: Name found while looking for something else */
OOPS_NAME(int ch)1026 static int OOPS_NAME(int ch)
1027 {
1028 back_ch_not_white(ch);
1029 tokenType = TOKEN_NAME;
1030 return(DONE);
1031 }
1032
1033
1034 /* -------------------------------------------------------------------
1035 * Complete a miscellaneous token; set token type and done flag.
1036 */
1037
1038 /* Done: Unmatched Right Angle-Bracket */
RIGHT_ANGLE(int ch)1039 static int RIGHT_ANGLE(int ch)
1040 {
1041 tokenType = TOKEN_RIGHT_ANGLE;
1042 return(DONE);
1043 }
1044
1045 /* Done: Unmatched Right Parenthesis */
RIGHT_PAREN(int ch)1046 static int RIGHT_PAREN(int ch)
1047 {
1048 tokenType = TOKEN_RIGHT_PAREN;
1049 return(DONE);
1050 }
1051
1052 /* Done: Left Brace */
LEFT_BRACE(int ch)1053 static int LEFT_BRACE(int ch)
1054 {
1055 tokenType = TOKEN_LEFT_BRACE;
1056 return(DONE);
1057 }
1058
1059 /* Done: Right Brace */
RIGHT_BRACE(int ch)1060 static int RIGHT_BRACE(int ch)
1061 {
1062 tokenType = TOKEN_RIGHT_BRACE;
1063 return(DONE);
1064 }
1065
1066 /* Done: Left Bracket */
LEFT_BRACKET(int ch)1067 static int LEFT_BRACKET(int ch)
1068 {
1069 save_unsafe_ch(ch);
1070 tokenType = TOKEN_LEFT_BRACKET;
1071 return(DONE);
1072 }
1073
1074 /* Done: Right Bracket */
RIGHT_BRACKET(int ch)1075 static int RIGHT_BRACKET(int ch)
1076 {
1077 save_unsafe_ch(ch);
1078 tokenType = TOKEN_RIGHT_BRACKET;
1079 return(DONE);
1080 }
1081
1082 /* Done: Break */
BREAK_SIGNAL(int ch)1083 static int BREAK_SIGNAL(int ch)
1084 {
1085 tokenType = TOKEN_BREAK;
1086 return(DONE);
1087 }
1088
1089 /* Done: No Token Found */
NO_TOKEN(int ch)1090 static int NO_TOKEN(int ch)
1091 {
1092 tokenType = TOKEN_EOF;
1093 return(DONE);
1094 }
1095
1096
1097 /*
1098 * -------------------------------------------------------------------
1099 * scan_token -- scan one token from the input. It uses a simple
1100 * finite state machine to recognize token classes.
1101 *
1102 * The input is from a file.
1103 *
1104 * On entry --
1105 *
1106 * inputP -> input PostScript object, a file.
1107 * tokenStartP -> buffer in VM for accumulating the token.
1108 * tokenMaxP -> last character in the token buffer
1109 *
1110 * On exit --
1111 *
1112 * tokenLength = number of characters in the token
1113 * tokenTooLong = TRUE if the token did not fit in the buffer
1114 * tokenType = code for the type of token parsed.
1115 * tokenValue = converted value of a numeric token.
1116 *
1117 *
1118 * -------------------------------------------------------------------
1119 */
scan_token(psobj * inputP)1120 void scan_token(psobj *inputP)
1121 {
1122 int ch;
1123 unsigned char *stateP = s0;
1124 unsigned char entry;
1125 int (*actionP)();
1126
1127 /* Define input source */
1128 inputFileP = inputP->data.fileP;
1129 if (inputFileP == NULL) {
1130 tokenType = TOKEN_EOF;
1131 return;
1132 }
1133
1134 /* Ensure enough space for most cases
1135 * (so we don't have to keep checking)
1136 * The length needs to cover the maximum number
1137 * of save_unsafe_ch() calls that might be executed.
1138 * That number is 11 (a sign and 10 decimal digits, e.g.,
1139 * when scanning -2147483648), but use MAX_NAME_LEN
1140 * in case someone changes that without checking.
1141 */
1142 if (vm_free_bytes() < (MAX_NAME_LEN)) {
1143 if (!(vm_init())) {
1144 tokenLength = 0;
1145 tokenTooLong = TRUE;
1146 tokenType = TOKEN_NONE;
1147 tokenValue.integer = 0;
1148 return;
1149 }
1150 }
1151
1152 tokenStartP = vm_next_byte();
1153
1154 /* Reset token */
1155 tokenCharP = tokenStartP;
1156 tokenTooLong = FALSE;
1157
1158 /* Scan one token */
1159 ch = next_ch();
1160 do {
1161 entry = stateP[ch];
1162 stateP = classActionTable[entry].nextStateP;
1163 actionP = classActionTable[entry].actionRoutineP;
1164 ch = (*actionP)(ch);
1165 } while(ch != DONE);
1166
1167
1168 /* Return results */
1169 tokenLength = tokenCharP - tokenStartP;
1170 }
1171