1 /* $XConsortium: token.c,v 1.3 94/02/04 17:07:17 gildea Exp $ */
2 /* Copyright International Business Machines,Corp. 1991
3 * All Rights Reserved
4 *
5 * License to use, copy, modify, and distribute this software
6 * and its documentation for any purpose and without fee is
7 * hereby granted, provided that the above copyright notice
8 * appear in all copies and that both that copyright notice and
9 * this permission notice appear in supporting documentation,
10 * and that the name of IBM not be used in advertising or
11 * publicity pertaining to distribution of the software without
12 * specific, written prior permission.
13 *
14 * IBM PROVIDES THIS SOFTWARE "AS IS", WITHOUT ANY WARRANTIES
15 * OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, BUT NOT
16 * LIMITED TO ANY IMPLIED WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, AND NONINFRINGEMENT OF
18 * THIRD PARTY RIGHTS. THE ENTIRE RISK AS TO THE QUALITY AND
19 * PERFORMANCE OF THE SOFTWARE, INCLUDING ANY DUTY TO SUPPORT
20 * OR MAINTAIN, BELONGS TO THE LICENSEE. SHOULD ANY PORTION OF
21 * THE SOFTWARE PROVE DEFECTIVE, THE LICENSEE (NOT IBM) ASSUMES
22 * THE ENTIRE COST OF ALL SERVICING, REPAIR AND CORRECTION. IN
23 * NO EVENT SHALL IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR
24 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
25 * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
26 * CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
27 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
28 * SOFTWARE.
29 */
30 /* Authors: Sig Nin & Carol Thompson IBM Almaden Research Laboratory */
31 #include "t1stdio.h"
32 #include "util.h"
33 #include "digit.h"
34 #include "token.h"
35 #include "tokst.h"
36 #include "hdigit.h"
37
38 /*
39 * -------------------------------------------------------------------
40 * Globals
41 * -------------------------------------------------------------------
42 */
43
44 /* These variables are set by the caller */
45 char *tokenStartP; /* Pointer to token buffer in VM */
46 char *tokenMaxP; /* Pointer to last byte in buffer + 1 */
47
48 /* These variables are set by TOKEN */
49 int tokenLength; /* Characters in token */
50 boolean tokenTooLong; /* Token too long for buffer */
51 int tokenType; /* Type of token identified */
52 psvalue tokenValue; /* Token value */
53
54 /*
55 * -------------------------------------------------------------------
56 * Private variables
57 * -------------------------------------------------------------------
58 */
59
60 static FILE *inputFileP; /* Current input file */
61
62
63 /* Token */
64 static char *tokenCharP; /* Pointer to next character in token */
65
66 /*
67 * -------------------------------------------------------------------
68 * Private routines for manipulating numbers
69 * -------------------------------------------------------------------
70 */
71
72 #define Exp10(e) \
73 ((e) == 0\
74 ? (double)(1.0)\
75 : (-64 <= (e) && (e) <= 63\
76 ? Exp10T[(e)+64]\
77 : P10(e)\
78 )\
79 )
80
81 static double Exp10T[128] = {
82 1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57,
83 1e-56, 1e-55, 1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49,
84 1e-48, 1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41,
85 1e-40, 1e-39, 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33,
86 1e-32, 1e-31, 1e-30, 1e-29, 1e-28, 1e-27, 1e-26, 1e-25,
87 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17,
88 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9,
89 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1,
90 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7,
91 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15,
92 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 1e23,
93 1e24, 1e25, 1e26, 1e27, 1e28, 1e29, 1e30, 1e31,
94 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39,
95 1e40, 1e41, 1e42, 1e43, 1e44, 1e45, 1e46, 1e47,
96 1e48, 1e49, 1e50, 1e51, 1e52, 1e53, 1e54, 1e55,
97 1e56, 1e57, 1e58, 1e59, 1e60, 1e61, 1e62, 1e63
98 };
99
P10(exponent)100 static double P10(exponent)
101 long exponent;
102 {
103 double value, power;
104
105 if (exponent < 0) {
106 power = 0.1;
107 value = (exponent & 1 ? power : 1.0);
108 exponent = -((exponent + 1) >> 1); /* portable C for -(exponent/2) */
109 }
110 else {
111 power = 10.0;
112 value = (exponent & 1 ? power : 1.0);
113 exponent = exponent >> 1;
114 }
115
116 while(exponent > 0) {
117 power *= power;
118 if (exponent & 1)
119 value *= power;
120 exponent >>= 1;
121 }
122
123 return(value);
124 }
125
126 /*
127 * -------------------------------------------------------------------
128 * Private routines and macros for manipulating the input
129 * -------------------------------------------------------------------
130 */
131
132 /* Get next character from the input --
133 *
134 */
135 #define next_ch() (getc(inputFileP))
136
137 /* Push a character back into the input --
138 *
139 * Ungetc of EOF will fail, but that's ok: the next getc will
140 * return EOF.
141 *
142 * NOTE: These macros are presently written to return the character
143 * pushed, or EOF if none was pushed. However, they are not
144 * required to return anything in particular, and callers should
145 * not rely on the returned value.
146 */
147 #define back_ch(ch) (ungetc(ch, inputFileP))
148
149 /* Push a character back into the input if it was not white space.
150 * If it is a carriage return (\r) then check next char for
151 * linefeed and consume them both, otherwise put next char back.
152 *
153 */
154 #define back_ch_not_white(ch) \
155 (\
156 isWHITE_SPACE(ch)\
157 ? ((ch == '\r')\
158 ? (((ch = next_ch()) == '\n')\
159 ? EOF\
160 : back_ch(ch)\
161 )\
162 : EOF\
163 )\
164 : back_ch(ch)\
165 )
166
167 /*
168 * -------------------------------------------------------------------
169 * Private routines and macros for manipulating the token buffer
170 * -------------------------------------------------------------------
171 */
172
173 /* Add a character to the token
174 * ---- use ONLY when you KNOW that this character will
175 * be stored within the token buffer.
176 */
177 #define save_unsafe_ch(ch) (*tokenCharP++ = ch)
178
179 /* Add a character to the token, if not too long to fit */
180 #define save_ch(ch) \
181 ((tokenCharP < tokenMaxP)\
182 ? save_unsafe_ch(ch)\
183 : (tokenTooLong = TRUE)\
184 )
185
186 #define save_ch_no_inc(ch) \
187 ((tokenCharP < tokenMaxP) && (*tokenCharP = ch))
188
189 /*
190 * -------------------------------------------------------------------
191 * Action Routines
192 *
193 * These routines all
194 * -- take int ch as a parameter
195 * -- return int ch if no token was recognized, DONE otherwise
196 * -- leave the next character in the input, if returning DONE
197 * -------------------------------------------------------------------
198 */
199
200 #define DONE (256)
201
202 /* Get the next input character */
next_char(ch)203 static int next_char(ch)
204 int ch;
205 {
206 return(next_ch());
207 }
208
209 /* Add character to token */
add_char(ch)210 static int add_char(ch)
211 int ch;
212 {
213 save_ch(ch);
214 return(next_ch());
215 }
216
217
218 /* -------------------------------------------------------------------
219 * Skip white space and comments
220 */
221
222 /* Skip white space */
skip_space(ch)223 static int skip_space(ch)
224 int ch;
225 {
226 do {
227 ch = next_ch();
228 } while(isWHITE_SPACE(ch));
229 return(ch);
230 }
231
232 /* Skip comments */
skip_comment(ch)233 static int skip_comment(ch)
234 int ch;
235 {
236 do {
237 ch = next_ch();
238 } while(isCOMMENT(ch));
239 return(ch);
240 }
241
242 /* -------------------------------------------------------------------
243 * Collect value elements for a number
244 */
245
246 /* decimal integer or real number mantissa */
247 static int m_sign;
248 static long m_value;
249 static long m_scale;
250
251 /* real number exponent */
252 static int e_sign;
253 static long e_value;
254 static long e_scale;
255
256 /* radix number */
257 static long r_base;
258 static long r_value;
259 static long r_scale;
260
add_sign(ch)261 static int add_sign(ch)
262 int ch;
263 {
264 m_sign = ch;
265 save_unsafe_ch(ch);
266 return(next_ch());
267 }
268
add_1st_digits(ch)269 static int add_1st_digits(ch)
270 int ch;
271 {
272 m_sign = '+';
273 return(add_digits(ch));
274 }
275
add_digits(ch)276 static int add_digits(ch)
277 int ch;
278 {
279 long value, p_value, scale;
280 int digit;
281
282 /* On entry, expect m_sign to be set to '+' or '-';
283 * ch is a decimal digit.
284 * Expect at most one character saved at this point,
285 * a sign. This routine will save up to 10 more
286 * characters without checking the buffer boundary.
287 */
288
289 value = ch - '0';
290 save_unsafe_ch(ch);
291 ch = next_ch();
292
293 while(isDECIMAL_DIGIT(ch) && value < (MAX_INTEGER/10)) {
294 value = (value << 3) + (value << 1) + (ch - '0');
295 save_unsafe_ch(ch);
296 ch = next_ch();
297 }
298
299 /* Quick exit for small integers --
300 * |x| <= 10*((MAX_INTEGER/10)-1)+9
301 * |x| <= 2,147,483,639 for 32 bit integers
302 */
303 if (isNUMBER_ENDER(ch)) {
304 back_ch_not_white(ch);
305 tokenValue.integer = (m_sign == '-' ? -value : value);
306 tokenType = TOKEN_INTEGER;
307 return(DONE);
308 }
309
310 /* Handle additional digits. Beyond the boundary case,
311 * 10*(MAX_INTEGER/10) <= |number| <= MAX_INTEGER
312 * just count the digits: the number is too large to
313 * represent as an integer and will be returned as a real.
314 * The mantissa of a real holds fewer bits than an integer.
315 */
316 p_value = value;
317 value = (m_sign == '-' ? -value : value);
318 scale = 0;
319
320 if (isDECIMAL_DIGIT(ch)) {
321
322 /* Handle the boundary case */
323 if (p_value == (MAX_INTEGER/10)) {
324 digit = ch - '0';
325
326 /* Must handle positive and negative values separately */
327 /* for 2's complement arithmetic */
328 if (value > 0) {
329 if (digit <= MAX_INTEGER%10)
330 value = (value << 3) + (value << 1) + digit;
331 else
332 ++scale; /* Too big, just count it */
333 }
334 else {
335 /* Use positive % operands for portability */
336 if (digit <= -(MIN_INTEGER+10)%10)
337 value = (value << 3) + (value << 1) - digit;
338 else
339 ++scale; /* Too big, just count it */
340 }
341 }
342 else
343 ++scale; /* Not boundary case, just count digit */
344
345 save_unsafe_ch(ch);
346 ch = next_ch();
347
348 /* Continue scanning digits, but can't store them */
349 while(isDECIMAL_DIGIT(ch)) {
350 ++scale;
351 save_ch(ch);
352 ch = next_ch();
353 }
354 }
355
356 /* Continue from here scanning radix integer or real */
357 m_value = value;
358 m_scale = scale;
359
360 /* Initialize for possible real */
361 e_sign = '+';
362 e_value = 0;
363 e_scale = 0;
364
365 return(ch);
366 }
367
add_1st_decpt(ch)368 static int add_1st_decpt(ch)
369 int ch;
370 {
371 m_sign = '+';
372 return(add_decpt(ch));
373 }
374
add_decpt(ch)375 static int add_decpt(ch)
376 int ch;
377 {
378 /* On entry, expect m_sign to be set to '+' or '-' */
379 m_value = 0;
380 m_scale = 0;
381 save_unsafe_ch(ch);
382 return(next_ch());
383 }
384
add_fraction(ch)385 static int add_fraction(ch)
386 int ch;
387 {
388 long value, scale;
389 int digit;
390
391 /* On entry, expect m_value and m_scale to be initialized,
392 * and m_sign to be set to '+' or '-'. Expect m_value and m_sign
393 * to be consistent (this is not checked).
394 */
395 value = m_value;
396 scale = m_scale;
397
398 /* Scan leading zeroes */
399 if (value == 0) {
400 while(ch == '0') {
401 --scale;
402 save_ch(ch);
403 ch = next_ch();
404 }
405
406 /* Scan first significant digit */
407 if (isDECIMAL_DIGIT(ch)) {
408 --scale;
409 value = ch - '0';
410 value = (m_sign == '-' ? -value : value);
411 save_ch(ch);
412 ch = next_ch();
413 }
414 else
415 /* no significant digits -- number is zero */
416 scale = 0;
417 }
418 /* value != 0 || value == 0 && !isDECIMAL_DIGIT(ch) */
419
420 /* Scan additional significant digits */
421 if (isDECIMAL_DIGIT(ch)) {
422 if (value > 0) {
423 while(isDECIMAL_DIGIT(ch) && value < (MAX_INTEGER/10)) {
424 --scale;
425 value = (value << 3) + (value << 1) + (ch - '0');
426 save_ch(ch);
427 ch = next_ch();
428 }
429 /* Check boundary case */
430 if (isDECIMAL_DIGIT(ch) && value == (MAX_INTEGER/10)) {
431 digit = ch - '0';
432 if (digit <= MAX_INTEGER%10) {
433 --scale;
434 value = (value << 3) + (value << 1) + digit;
435 save_ch(ch);
436 ch = next_ch();
437 }
438 }
439 }
440 else {
441 /* value < 0 */
442 while(isDECIMAL_DIGIT(ch) && value > -(-(MIN_INTEGER+10)/10+1)) {
443 /* Use positive / operands for portability */
444 --scale;
445 value = (value << 3) + (value << 1) - (ch - '0');
446 save_ch(ch);
447 ch = next_ch();
448 }
449 /* Check boundary case */
450 if (isDECIMAL_DIGIT(ch)
451 && value == -(-(MIN_INTEGER+10)/10+1)) {
452 digit = ch - '0';
453 if (digit <= -(MIN_INTEGER+10)%10) {
454 /* Use positive % operands for portability */
455 --scale;
456 value = (value << 3) + (value << 1) - digit;
457 save_ch(ch);
458 ch = next_ch();
459 }
460 }
461 }
462
463 /* Additional digits can be discarded */
464 while(isDECIMAL_DIGIT(ch)) {
465 save_ch(ch);
466 ch = next_ch();
467 }
468 }
469
470 /* Store results */
471 m_value = value;
472 m_scale = scale;
473
474 /* Initialize for possible real */
475 e_sign = '+';
476 e_value = 0;
477 e_scale = 0;
478
479 return(ch);
480 }
481
add_e_sign(ch)482 static int add_e_sign(ch)
483 int ch;
484 {
485 e_sign = ch;
486 save_ch(ch);
487 return(next_ch());
488 }
489
add_exponent(ch)490 static int add_exponent(ch)
491 int ch;
492 {
493 long value, p_value;
494 long scale = 0;
495 int digit;
496
497 /* On entry, expect e_sign to be set to '+' or '-' */
498
499 value = ch - '0';
500 save_ch(ch);
501 ch = next_ch();
502
503 while(isDECIMAL_DIGIT(ch) && value < (MAX_INTEGER/10)) {
504 value = (value << 3) + (value << 1) + (ch - '0');
505 save_ch(ch);
506 ch = next_ch();
507 }
508
509 p_value = value;
510 value = (e_sign == '-' ? -value : value);
511
512 /* Handle additional digits. Beyond the boundary case,
513 * 10*(MAX_INTEGER/10) <= |number| <= MAX_INTEGER
514 * just count the digits: the number is too large to
515 * represent as an integer.
516 */
517 if (isDECIMAL_DIGIT(ch)) {
518
519 /* Examine boundary case */
520 if (p_value == (MAX_INTEGER/10)) {
521 digit = ch - '0';
522
523 /* Must handle positive and negative values separately */
524 /* for 2's complement arithmetic */
525 if (value > 0) {
526 if (digit <= MAX_INTEGER%10)
527 value = (value << 3) + (value << 1) + digit;
528 else
529 ++scale; /* Too big, just count it */
530 }
531 else {
532 /* Use positive % operands for portability */
533 if (digit <= -(MIN_INTEGER+10)%10)
534 value = (value << 3) + (value << 1) - digit;
535 else
536 ++scale; /* Too big, just count it */
537 }
538 }
539 else
540 ++scale; /* Not boundary case, just count digit */
541
542 save_ch(ch);
543 ch = next_ch();
544
545 /* Continue scanning digits, but can't store any more */
546 while(isDECIMAL_DIGIT(ch)) {
547 ++scale;
548 save_ch(ch);
549 ch = next_ch();
550 }
551 }
552
553 /* Store results */
554 e_value = value;
555 e_scale = scale;
556
557 return(ch);
558 }
559
add_radix(ch)560 static int add_radix(ch)
561 int ch;
562 {
563 if (2 <= m_value && m_value <= 36 && m_scale == 0) {
564 r_base = m_value;
565 save_ch(ch);
566 return(next_ch());
567 }
568 else {
569 /* Radix invalid, complete a name token */
570 return(AAH_NAME(ch));
571 }
572 }
573
add_r_digits(ch)574 static int add_r_digits(ch)
575 int ch;
576 {
577 unsigned long value;
578 long radix, scale;
579 int digit;
580
581 /* NOTE: The syntax of a radix number allows only for
582 * values of zero or more. The value will be stored as
583 * a 32 bit integer, which PostScript then interprets
584 * as signed. This means, for example, that the numbers:
585 *
586 * 8#37777777777
587 * 10#4294967295
588 * 16#FFFFFFFF
589 * 36#1Z141Z3
590 *
591 * are all interpreted as -1. This routine implements this
592 * idea explicitly: it accumulates the number's value
593 * as unsigned, then casts it to signed when done.
594 */
595
596 /* Expect r_base to be initialized */
597 radix = r_base;
598 value = 0;
599 scale = 0;
600
601 /* Scan leading zeroes */
602 while(ch == '0') {
603 save_ch(ch);
604 ch = next_ch();
605 }
606
607 /* Handle first non-zero digit */
608 if ((digit=digit_value[ch]) < radix) {
609 value = digit;
610 save_ch(ch);
611 ch = next_ch();
612
613 /* Add digits until boundary case reached */
614 while((digit=digit_value[ch]) < radix
615 && value < (MAX_ULONG / radix)) {
616 value = value * radix + digit;
617 save_ch(ch);
618 ch = next_ch();
619 };
620
621 /* Scan remaining digits */
622 if ((digit=digit_value[ch]) < radix) {
623
624 /* Examine boundary case ---
625 * radix*(MAX_ULONG/radix) <= number <= MAX_ULONG
626 */
627 if (value == (MAX_ULONG/radix) && digit <= MAX_ULONG%radix)
628 value = value * radix + digit;
629 else
630 ++scale;
631
632 /* Continue scanning digits, but can't store them */
633 save_ch(ch);
634 ch = next_ch();
635 while(digit_value[ch] < radix) {
636 ++scale;
637 save_ch(ch);
638 ch = next_ch();
639 }
640 }
641 }
642
643 /* Store result */
644 r_value = (long) value; /* result is signed */
645 r_scale = scale;
646
647 return(ch);
648 }
649
650 /* -------------------------------------------------------------------
651 * Complete a number; set token type and done flag.
652 * Put current input character back, if it is not white space.
653 */
654
655 /* Done: Radix Number */
RADIX_NUMBER(ch)656 static int RADIX_NUMBER(ch)
657 int ch;
658 {
659 back_ch_not_white(ch);
660 if (r_scale == 0) {
661 tokenValue.integer = r_value;
662 tokenType = TOKEN_INTEGER;
663 }
664 else {
665 tokenType = TOKEN_NAME;
666 }
667 return(DONE);
668 }
669
670 /* Done: Integer */
INTEGER(ch)671 static int INTEGER(ch)
672 int ch;
673 {
674 back_ch_not_white(ch);
675 if (m_scale == 0) {
676 tokenValue.integer = m_value;
677 tokenType = TOKEN_INTEGER;
678 }
679 else {
680 tokenValue.real = (double)(m_value) * Exp10(m_scale);
681 tokenType = TOKEN_REAL;
682 }
683 return(DONE);
684 }
685
686 /* Done: Real */
REAL(ch)687 static int REAL(ch)
688 int ch;
689 {
690 double temp;
691
692 back_ch_not_white(ch);
693
694 /* NOTE: ignore e_scale, since e_value alone will cause
695 * exponent overflow if e_scale > 0.
696 */
697
698 /* HAZARD: exponent overflow of intermediate result
699 * (e.g., in 370 floating point); this should not be a problem
700 * with IEEE floating point. Reduce exponent overflow hazard by
701 * combining m_scale and e_value first, if they have different signs,
702 * or multiplying m_value and one of the other factors, if both
703 * m_scale and e_value are negative.
704 */
705 if ((m_scale >= 0 && e_value <= 0)
706 || (m_scale <= 0 && e_value >= 0)) {
707 tokenValue.real = (double)(m_value) * Exp10(m_scale + e_value);
708 }
709 else {
710 temp = (double)(m_value) * Exp10(m_scale);
711 tokenValue.real = temp * Exp10(e_value);
712 }
713
714 tokenType = TOKEN_REAL;
715 return(DONE);
716 }
717
718
719 /* -------------------------------------------------------------------
720 * Assemble a hex string; set token type and done flag.
721 */
722
723 /* Done: Hex String */
HEX_STRING(ch)724 static int HEX_STRING(ch)
725 int ch;
726 {
727 int value;
728
729 while(TRUE) {
730
731 /* Process odd digit */
732 ch = next_ch();
733 if (!isHEX_DIGIT(ch)) {
734
735 /* Skip white space */
736 while(isWHITE_SPACE(ch))
737 ch = next_ch();
738
739 /* Check for terminator */
740 if (!isHEX_DIGIT(ch)) {
741 break;
742 }
743 }
744 value = digit_value[ch] << 4;
745
746 /* Process even digit */
747 ch = next_ch();
748 if (!isHEX_DIGIT(ch)) {
749
750 /* Skip white space */
751 while(isWHITE_SPACE(ch))
752 ch = next_ch();
753
754 /* Check for terminator */
755 if (!isHEX_DIGIT(ch)) {
756 save_ch(value);
757 break;
758 }
759 }
760 save_ch(value + digit_value[ch]);
761 }
762
763 /* Classify result, based on why loop ended */
764 if (ch == '>')
765 tokenType = TOKEN_HEX_STRING;
766 else {
767 /* save the invalid character for error reporting */
768 save_ch(ch);
769 tokenType = TOKEN_INVALID;
770 }
771
772 return(DONE);
773 }
774
775 /* -------------------------------------------------------------------
776 * Assemble a string; set token type and done flag
777 */
778
779 /* Save a backslash-coded character in a string --
780 *
781 * Store the proper character for special cases
782 * "\b", "\f", "\n", "\r", and "\t".
783 *
784 * Decode and store octal-coded character, up to
785 * three octal digits, "\o", "\oo", and "\ooo".
786 *
787 * The sequence "\<newline>" is a line continuation,
788 * so consume both without storing anything.
789 *
790 * The sequence "\<EOF>" is an error; exit without
791 * storing anything and let the caller handle it.
792 *
793 * For other characters, including the sequences
794 * "\\", "\(", and "\)", simply store the second
795 * character.
796 */
save_digraph(ch)797 static void save_digraph(ch)
798 int ch;
799 {
800 int value;
801
802 switch (ch) {
803
804 case 'b': /* backspace */
805 ch = '\b';
806 break;
807
808 case 'f': /* formfeed */
809 ch = '\f';
810 break;
811
812 case 'n': /* newline */
813 ch = '\n';
814 break;
815
816 case 'r': /* carriage return */
817 ch = '\r';
818 break;
819
820 case 't': /* horizontal tab */
821 ch = '\t';
822 break;
823
824 case '\n': /* line continuation -- consume it */
825 return;
826
827 case '\r': /* carriage return -- consume it */
828 ch = next_ch(); /* look at next character, is it \n? */
829 if (ch == '\n') return;
830 back_ch(ch); /* if not a line feed, then return it */
831 return;
832
833 case EOF: /* end of file -- forget it */
834 return;
835
836 default:
837 /* scan up to three octal digits to get value */
838 if (isOCTAL_DIGIT(ch)) {
839 value = digit_value[ch];
840 ch = next_ch();
841 if (isOCTAL_DIGIT(ch)) {
842 value = (value << 3) + digit_value[ch];
843 ch = next_ch();
844 if (isOCTAL_DIGIT(ch))
845 value = (value << 3) + digit_value[ch];
846 else
847 back_ch(ch);
848 }
849 else
850 back_ch(ch);
851 ch = value;
852 }
853 }
854
855 /* Found a character to save */
856 save_ch(ch);
857 }
858
859 /* Done: String */
STRING(ch)860 static int STRING(ch)
861 int ch;
862 {
863 int nest_level = 1;
864
865 tokenType = TOKEN_STRING;
866
867 do {
868
869 ch = next_ch();
870 while(!isSTRING_SPECIAL(ch)) {
871 save_ch(ch);
872 ch = next_ch();
873 };
874
875 switch (ch) {
876
877 case '(':
878 ++nest_level;
879 save_ch(ch);
880 break;
881
882 case ')':
883 if (--nest_level > 0)
884 save_ch(ch);
885 break;
886
887 case '\\':
888 save_digraph(next_ch());
889 break;
890
891 case '\r':
892 /* All carriage returns (\r) are turned into linefeeds (\n)*/
893 ch = next_ch(); /* get the next one, is it \n? */
894 if (ch != '\n') { /* if not, then put it back. */
895 back_ch(ch);
896 }
897 save_ch('\n'); /* in either case, save a linefeed */
898 break;
899
900
901 case EOF:
902 tokenType = TOKEN_INVALID; /* Unterminated string */
903 nest_level = 0;
904 break;
905 }
906
907 } while(nest_level > 0);
908
909 /* If there's room, add a 0-byte termination without increasing string
910 length. This fixes certain dependencies on 0-terminated strings */
911 save_ch_no_inc(0);
912
913 return(DONE);
914 }
915
916
917 /* -------------------------------------------------------------------
918 * Assemble a name; set token type and done flag.
919 * Put current input character back, if it is not white space.
920 */
921
922 /* Done: Name
923 * (Safe version used to complete name tokens that
924 * start out looking like something else).
925 */
926
AAH_NAME(ch)927 static int AAH_NAME(ch)
928 int ch;
929 {
930 do {
931 save_ch(ch);
932 ch = next_ch();
933 } while(isNAME(ch));
934
935 back_ch_not_white(ch);
936 tokenType = TOKEN_NAME;
937 return(DONE);
938 }
939
940 /* Done: Name */
NAME(ch)941 static int NAME(ch)
942 int ch;
943 {
944 save_unsafe_ch(ch);
945 ch = next_ch();
946 if (isNAME(ch)) {
947 save_unsafe_ch(ch);
948 ch = next_ch();
949 if (isNAME(ch)) {
950 save_unsafe_ch(ch);
951 ch = next_ch();
952 if (isNAME(ch)) {
953 save_unsafe_ch(ch);
954 ch = next_ch();
955 if (isNAME(ch)) {
956 save_unsafe_ch(ch);
957 ch = next_ch();
958 if (isNAME(ch)) {
959 save_unsafe_ch(ch);
960 ch = next_ch();
961 if (isNAME(ch)) {
962 save_unsafe_ch(ch);
963 ch = next_ch();
964 while(isNAME(ch)) {
965 save_ch(ch);
966 ch = next_ch();
967 }
968 }
969 }
970 }
971 }
972 }
973 }
974
975 back_ch_not_white(ch);
976 tokenType = TOKEN_NAME;
977 return(DONE);
978 }
979
980 /* Done: Literal Name */
LITERAL_NAME(ch)981 static int LITERAL_NAME(ch)
982 int ch;
983 {
984 if (isNAME(ch)) {
985 save_unsafe_ch(ch);
986 ch = next_ch();
987 if (isNAME(ch)) {
988 save_unsafe_ch(ch);
989 ch = next_ch();
990 if (isNAME(ch)) {
991 save_unsafe_ch(ch);
992 ch = next_ch();
993 if (isNAME(ch)) {
994 save_unsafe_ch(ch);
995 ch = next_ch();
996 if (isNAME(ch)) {
997 save_unsafe_ch(ch);
998 ch = next_ch();
999 if (isNAME(ch)) {
1000 save_unsafe_ch(ch);
1001 ch = next_ch();
1002 while(isNAME(ch)) {
1003 save_ch(ch);
1004 ch = next_ch();
1005 }
1006 }
1007 }
1008 }
1009 }
1010 }
1011 }
1012
1013 back_ch_not_white(ch);
1014 tokenType = TOKEN_LITERAL_NAME;
1015 return(DONE);
1016 }
1017
1018 /* Done: immediate Name */
IMMED_NAME(ch)1019 static int IMMED_NAME(ch)
1020 int ch;
1021 {
1022 ch = next_ch();
1023 if (isNAME(ch)) {
1024 save_unsafe_ch(ch);
1025 ch = next_ch();
1026 if (isNAME(ch)) {
1027 save_unsafe_ch(ch);
1028 ch = next_ch();
1029 if (isNAME(ch)) {
1030 save_unsafe_ch(ch);
1031 ch = next_ch();
1032 if (isNAME(ch)) {
1033 save_unsafe_ch(ch);
1034 ch = next_ch();
1035 if (isNAME(ch)) {
1036 save_unsafe_ch(ch);
1037 ch = next_ch();
1038 if (isNAME(ch)) {
1039 save_unsafe_ch(ch);
1040 ch = next_ch();
1041 while(isNAME(ch)) {
1042 save_ch(ch);
1043 ch = next_ch();
1044 }
1045 }
1046 }
1047 }
1048 }
1049 }
1050 }
1051
1052 back_ch_not_white(ch);
1053 tokenType = TOKEN_IMMED_NAME;
1054 return(DONE);
1055 }
1056
1057 /* Done: Name found while looking for something else */
OOPS_NAME(ch)1058 static int OOPS_NAME(ch)
1059 int ch;
1060 {
1061 back_ch_not_white(ch);
1062 tokenType = TOKEN_NAME;
1063 return(DONE);
1064 }
1065
1066
1067 /* -------------------------------------------------------------------
1068 * Complete a miscellaneous token; set token type and done flag.
1069 */
1070
1071 /* Done: Unmatched Right Angle-Bracket */
RIGHT_ANGLE(ch)1072 static int RIGHT_ANGLE(ch)
1073 int ch;
1074 {
1075 tokenType = TOKEN_RIGHT_ANGLE;
1076 return(DONE);
1077 }
1078
1079 /* Done: Unmatched Right Parenthesis */
RIGHT_PAREN(ch)1080 static int RIGHT_PAREN(ch)
1081 int ch;
1082 {
1083 tokenType = TOKEN_RIGHT_PAREN;
1084 return(DONE);
1085 }
1086
1087 /* Done: Left Brace */
LEFT_BRACE(ch)1088 static int LEFT_BRACE(ch)
1089 int ch;
1090 {
1091 tokenType = TOKEN_LEFT_BRACE;
1092 return(DONE);
1093 }
1094
1095 /* Done: Right Brace */
RIGHT_BRACE(ch)1096 static int RIGHT_BRACE(ch)
1097 int ch;
1098 {
1099 tokenType = TOKEN_RIGHT_BRACE;
1100 return(DONE);
1101 }
1102
1103 /* Done: Left Bracket */
LEFT_BRACKET(ch)1104 static int LEFT_BRACKET(ch)
1105 int ch;
1106 {
1107 save_unsafe_ch(ch);
1108 tokenType = TOKEN_LEFT_BRACKET;
1109 return(DONE);
1110 }
1111
1112 /* Done: Right Bracket */
RIGHT_BRACKET(ch)1113 static int RIGHT_BRACKET(ch)
1114 int ch;
1115 {
1116 save_unsafe_ch(ch);
1117 tokenType = TOKEN_RIGHT_BRACKET;
1118 return(DONE);
1119 }
1120
1121 /* Done: Break */
BREAK_SIGNAL(ch)1122 static int BREAK_SIGNAL(ch)
1123 int ch;
1124 {
1125 tokenType = TOKEN_BREAK;
1126 return(DONE);
1127 }
1128
1129 /* Done: No Token Found */
NO_TOKEN(ch)1130 static int NO_TOKEN(ch)
1131 int ch;
1132 {
1133 tokenType = TOKEN_EOF;
1134 return(DONE);
1135 }
1136
1137
1138 /*
1139 * -------------------------------------------------------------------
1140 * scan_token -- scan one token from the input. It uses a simple
1141 * finite state machine to recognize token classes.
1142 *
1143 * The input is from a file.
1144 *
1145 * On entry --
1146 *
1147 * inputP -> input PostScript object, a file.
1148 * tokenStartP -> buffer in VM for accumulating the token.
1149 * tokenMaxP -> last character in the token buffer
1150 *
1151 * On exit --
1152 *
1153 * tokenLength = number of characters in the token
1154 * tokenTooLong = TRUE if the token did not fit in the buffer
1155 * tokenType = code for the type of token parsed.
1156 * tokenValue = converted value of a numeric token.
1157 *
1158 *
1159 * -------------------------------------------------------------------
1160 */
scan_token(inputP)1161 void scan_token(inputP)
1162 psobj *inputP;
1163 {
1164 int ch;
1165 unsigned char *stateP = s0;
1166 unsigned char entry;
1167 int (*actionP)();
1168
1169 /* Define input source */
1170 inputFileP = inputP->data.fileP;
1171 if (inputFileP == NULL) {
1172 tokenType = TOKEN_EOF;
1173 return;
1174 }
1175
1176 /* Ensure enough space for most cases
1177 * (so we don't have to keep checking)
1178 * The length needs to cover the maximum number
1179 * of save_unsafe_ch() calls that might be executed.
1180 * That number is 11 (a sign and 10 decimal digits, e.g.,
1181 * when scanning -2147483648), but use MAX_NAME_LEN
1182 * in case someone changes that without checking.
1183 */
1184 tokenStartP = vm_next_byte();
1185 tokenMaxP = tokenStartP + MIN(vm_free_bytes(), MAX_STRING_LEN);
1186
1187 if ((tokenMaxP-tokenStartP) < (MAX_NAME_LEN)) {
1188 tokenLength = 0;
1189 tokenTooLong = TRUE;
1190 tokenType = TOKEN_NONE;
1191 tokenValue.integer = 0;
1192 return;
1193 }
1194
1195 /* Reset token */
1196 tokenCharP = tokenStartP;
1197 tokenTooLong = FALSE;
1198
1199 /* Scan one token */
1200 ch = next_ch();
1201 do {
1202 entry = stateP[ch];
1203 stateP = classActionTable[entry].nextStateP;
1204 actionP = classActionTable[entry].actionRoutineP;
1205 ch = (*actionP)(ch);
1206 } while(ch != DONE);
1207
1208
1209 /* Return results */
1210 tokenLength = tokenCharP - tokenStartP;
1211 }
1212