1 /* $XConsortium: token.c,v 1.2 91/10/10 11:19:55 rws Exp $ */
2 /* Copyright International Business Machines,Corp. 1991
3  * All Rights Reserved
4  *
5  * License to use, copy, modify, and distribute this software
6  * and its documentation for any purpose and without fee is
7  * hereby granted, provided that the above copyright notice
8  * appear in all copies and that both that copyright notice and
9  * this permission notice appear in supporting documentation,
10  * and that the name of IBM not be used in advertising or
11  * publicity pertaining to distribution of the software without
12  * specific, written prior permission.
13  *
14  * IBM PROVIDES THIS SOFTWARE "AS IS", WITHOUT ANY WARRANTIES
15  * OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, BUT NOT
16  * LIMITED TO ANY IMPLIED WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE, AND NONINFRINGEMENT OF
18  * THIRD PARTY RIGHTS.  THE ENTIRE RISK AS TO THE QUALITY AND
19  * PERFORMANCE OF THE SOFTWARE, INCLUDING ANY DUTY TO SUPPORT
20  * OR MAINTAIN, BELONGS TO THE LICENSEE.  SHOULD ANY PORTION OF
21  * THE SOFTWARE PROVE DEFECTIVE, THE LICENSEE (NOT IBM) ASSUMES
22  * THE ENTIRE COST OF ALL SERVICING, REPAIR AND CORRECTION.  IN
23  * NO EVENT SHALL IBM BE LIABLE FOR ANY SPECIAL, INDIRECT OR
24  * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
25  * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
26  * CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
27  * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
28  * SOFTWARE.
29  */
30 /* Authors: Sig Nin & Carol Thompson IBM Almaden Research Laboratory */
31 #include "types.h"
32 #include "t1stdio.h"
33 #include "util.h"
34 #include "digit.h"
35 #include "token.h"
36 #include "tokst.h"
37 #include "hdigit.h"
38 /*
39  * -------------------------------------------------------------------
40  * Globals
41  * -------------------------------------------------------------------
42  */
43 
44 /* These variables are set by the caller */
45 char           *tokenStartP;   /* Pointer to token buffer in VM */
46 char           *tokenMaxP;     /* Pointer to last byte in buffer + 1 */
47 
48 /* These variables are set by TOKEN */
49 int             tokenLength;   /* Characters in token */
50 boolean         tokenTooLong;  /* Token too long for buffer */
51 int             tokenType;     /* Type of token identified */
52 psvalue         tokenValue;    /* Token value */
53 
54 /*
55  * -------------------------------------------------------------------
56  * Private variables
57  * -------------------------------------------------------------------
58  */
59 
60 static FILE    *inputFileP;    /* Current input file */
61 
62 
63 /* Token */
64 static char    *tokenCharP;    /* Pointer to next character in token */
65 
66 /*
67  * -------------------------------------------------------------------
68  * Private routines for manipulating numbers
69  * -------------------------------------------------------------------
70  */
71 
72 #define Exp10(e) \
73 ((e) == 0\
74  ? (DOUBLE)(1.0)\
75  : (-64 <= (e) && (e) <= 63\
76     ? Exp10T[(e)+64]\
77     : P10(e)\
78    )\
79 )
80 
81 static DOUBLE Exp10T[128] = {
82   1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 1e-59, 1e-58, 1e-57,
83   1e-56, 1e-55, 1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49,
84   1e-48, 1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41,
85   1e-40, 1e-39, 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33,
86   1e-32, 1e-31, 1e-30, 1e-29, 1e-28, 1e-27, 1e-26, 1e-25,
87   1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 1e-19, 1e-18, 1e-17,
88   1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9,
89   1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1,
90   1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7,
91   1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15,
92   1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 1e23,
93   1e24, 1e25, 1e26, 1e27, 1e28, 1e29, 1e30, 1e31,
94   1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39,
95   1e40, 1e41, 1e42, 1e43, 1e44, 1e45, 1e46, 1e47,
96   1e48, 1e49, 1e50, 1e51, 1e52, 1e53, 1e54, 1e55,
97   1e56, 1e57, 1e58, 1e59, 1e60, 1e61, 1e62, 1e63
98 };
99 
P10(int32_t exponent)100 static DOUBLE P10(int32_t exponent)
101 {
102   DOUBLE value, power;
103 
104   if (exponent < 0) {
105     power = 0.1;
106     value = (exponent & 1 ? power : 1.0);
107     exponent = -((exponent + 1) >> 1); /* portable C for -(exponent/2) */
108   }
109   else {
110     power = 10.0;
111     value = (exponent & 1 ? power : 1.0);
112     exponent = exponent >> 1;
113   }
114 
115   while(exponent > 0) {
116     power *= power;
117     if (exponent & 1)
118       value *= power;
119     exponent >>= 1;
120   }
121 
122   return(value);
123 }
124 
125 /*
126  * -------------------------------------------------------------------
127  * Private routines and macros for manipulating the input
128  * -------------------------------------------------------------------
129  */
130 
131 /* Get next character from the input --
132  *
133  */
134 #define next_ch()    (getc(inputFileP))
135 
136 /* Push a character back into the input --
137  *
138  * Ungetc of EOF will fail, but that's ok: the next getc will
139  * return EOF.
140  *
141  * NOTE:  These macros are presently written to return the character
142  * pushed, or EOF if none was pushed.  However, they are not
143  * required to return anything in particular, and callers should
144  * not rely on the returned value.
145  */
146 #define back_ch(ch)   (ungetc(ch, inputFileP))
147 
148 /* Push a character back into the input if it was not white space.
149  * If it is a carriage return (\r) then check next char for
150  * linefeed and consume them both, otherwise put next char back.
151  *
152  */
153 #define back_ch_not_white(ch) \
154 (\
155 isWHITE_SPACE(ch)\
156  ? ((ch == '\r')\
157    ? (((ch = next_ch()) == '\n')\
158      ? EOF\
159      : back_ch(ch)\
160      )\
161    : EOF\
162    )\
163  : back_ch(ch)\
164 )
165 
166 /*
167  * -------------------------------------------------------------------
168  * Private routines and macros for manipulating the token buffer
169  * -------------------------------------------------------------------
170  */
171 
172 /* Add a character to the token
173  * ---- use ONLY when you KNOW that this character will
174  *      be stored within the token buffer.
175  */
176 #define save_unsafe_ch(ch) (*tokenCharP++ = ch)
177 
178 /* Add a character to the token, if not too long to fit */
179 #define save_ch(ch) \
180 ((tokenCharP < tokenMaxP)\
181  ? save_unsafe_ch(ch)\
182  : (tokenTooLong = TRUE)\
183 )
184 
185 /*
186  * -------------------------------------------------------------------
187  * Action Routines
188  *
189  *  These routines all
190  *    -- take int ch as a parameter
191  *    -- return int ch if no token was recognized, DONE otherwise
192  *    -- leave the next character in the input, if returning DONE
193  * -------------------------------------------------------------------
194  */
195 
196 #define DONE  (256)
197 
198 /* Get the next input character */
next_char(int ch)199 static int next_char(int ch)
200 {
201   return(next_ch());
202 }
203 
204 /* Add character to token */
add_char(int ch)205 static int add_char(int ch)
206 {
207   save_ch(ch);
208   return(next_ch());
209 }
210 
211 
212 /* -------------------------------------------------------------------
213  * Skip white space and comments
214  */
215 
216 /* Skip white space */
skip_space(int ch)217 static int skip_space(int ch)
218 {
219   do {
220     ch = next_ch();
221   } while(isWHITE_SPACE(ch));
222   return(ch);
223 }
224 
225 /* Skip comments */
skip_comment(int ch)226 static int skip_comment(int ch)
227 {
228   do {
229     ch = next_ch();
230   } while(isCOMMENT(ch));
231   return(ch);
232 }
233 
234 /* -------------------------------------------------------------------
235  * Collect value elements for a number
236  */
237 
238 /* decimal integer or real number mantissa */
239 static int m_sign;
240 static int32_t m_value;
241 static int32_t m_scale;
242 
243 /* real number exponent */
244 static int e_sign;
245 static int32_t e_value;
246 static int32_t e_scale;
247 
248 /* radix number */
249 static int32_t r_base;
250 static int32_t r_value;
251 static int32_t r_scale;
252 
add_sign(int ch)253 static int add_sign(int ch)
254 {
255   m_sign = ch;
256   save_unsafe_ch(ch);
257   return(next_ch());
258 }
259 
add_1st_digits(int ch)260 static int add_1st_digits(int ch)
261 {
262   m_sign = '+';
263   return(add_digits(ch));
264 }
265 
add_digits(int ch)266 static int add_digits(int ch)
267 {
268   int32_t value, p_value, scale;
269   int digit;
270 
271   /* On entry, expect m_sign to be set to '+' or '-';
272    *  ch is a decimal digit.
273    * Expect at most one character saved at this point,
274    *  a sign.  This routine will save up to 10 more
275    *  characters without checking the buffer boundary.
276    */
277 
278   value = ch - '0';
279   save_unsafe_ch(ch);
280   ch = next_ch();
281 
282   while(isDECIMAL_DIGIT(ch) && value < (MAX_INTEGER/10)) {
283     value = (value << 3) + (value << 1) + (ch - '0');
284     save_unsafe_ch(ch);
285     ch = next_ch();
286   }
287 
288   /* Quick exit for small integers --
289    *    |x| <= 10*((MAX_INTEGER/10)-1)+9
290    *    |x| <= 2,147,483,639 for 32 bit integers
291    */
292   if (isNUMBER_ENDER(ch)) {
293     back_ch_not_white(ch);
294     tokenValue.integer = (m_sign == '-' ? -value : value);
295     tokenType = TOKEN_INTEGER;
296     return(DONE);
297   }
298 
299   /* Handle additional digits.  Beyond the boundary case,
300    *   10*(MAX_INTEGER/10) <= |number| <= MAX_INTEGER
301    * just count the digits: the number is too large to
302    * represent as an integer and will be returned as a real.
303    * The mantissa of a real holds fewer bits than an integer.
304    */
305   p_value = value;
306   value = (m_sign == '-' ? -value : value);
307   scale = 0;
308 
309   if (isDECIMAL_DIGIT(ch)) {
310 
311     /* Handle the boundary case */
312     if (p_value == (MAX_INTEGER/10)) {
313       digit = ch - '0';
314 
315       /* Must handle positive and negative values separately  */
316       /* for 2's complement arithmetic */
317       if (value > 0) {
318         if (digit <= MAX_INTEGER%10)
319           value = (value << 3) + (value << 1) + digit;
320         else
321           ++scale;  /* Too big, just count it */
322       }
323       else {
324         /* Use positive % operands for portability */
325         if (digit <= -(MIN_INTEGER+10)%10)
326           value = (value << 3) + (value << 1) - digit;
327         else
328           ++scale;  /* Too big, just count it */
329       }
330     }
331     else
332       ++scale;  /* Not boundary case, just count digit */
333 
334     save_unsafe_ch(ch);
335     ch = next_ch();
336 
337     /* Continue scanning digits, but can't store them */
338     while(isDECIMAL_DIGIT(ch)) {
339       ++scale;
340       save_ch(ch);
341       ch = next_ch();
342     }
343   }
344 
345   /* Continue from here scanning radix integer or real */
346   m_value = value;
347   m_scale = scale;
348 
349   /* Initialize for possible real */
350   e_sign = '+';
351   e_value = 0;
352   e_scale = 0;
353 
354   return(ch);
355 }
356 
add_1st_decpt(int ch)357 static int add_1st_decpt(int ch)
358 {
359   m_sign = '+';
360   return(add_decpt(ch));
361 }
362 
add_decpt(int ch)363 static int add_decpt(int ch)
364 {
365   /* On entry, expect m_sign to be set to '+' or '-' */
366   m_value = 0;
367   m_scale = 0;
368   save_unsafe_ch(ch);
369   return(next_ch());
370 }
371 
add_fraction(int ch)372 static int add_fraction(int ch)
373 {
374   int32_t value, scale;
375   int digit;
376 
377   /* On entry, expect m_value and m_scale to be initialized,
378    * and m_sign to be set to '+' or '-'.  Expect m_value and m_sign
379    * to be consistent (this is not checked).
380    */
381   value = m_value;
382   scale = m_scale;
383 
384   /* Scan leading zeroes */
385   if (value == 0) {
386     while(ch == '0') {
387       --scale;
388       save_ch(ch);
389       ch = next_ch();
390     }
391 
392     /* Scan first significant digit */
393     if (isDECIMAL_DIGIT(ch)) {
394       --scale;
395       value = ch - '0';
396       value = (m_sign == '-' ? -value : value);
397       save_ch(ch);
398       ch = next_ch();
399     }
400     else
401       /* no significant digits -- number is zero */
402       scale = 0;
403   }
404   /* value != 0 || value == 0 && !isDECIMAL_DIGIT(ch) */
405 
406   /* Scan additional significant digits */
407   if (isDECIMAL_DIGIT(ch)) {
408     if (value > 0) {
409       while(isDECIMAL_DIGIT(ch) && value < (MAX_INTEGER/10)) {
410         --scale;
411         value = (value << 3) + (value << 1) + (ch - '0');
412         save_ch(ch);
413         ch = next_ch();
414       }
415       /* Check boundary case */
416       if (isDECIMAL_DIGIT(ch) && value == (MAX_INTEGER/10)) {
417         digit = ch - '0';
418         if (digit <= MAX_INTEGER%10) {
419           --scale;
420           value = (value << 3) + (value << 1) + digit;
421           save_ch(ch);
422           ch = next_ch();
423         }
424       }
425     }
426     else {
427       /* value < 0 */
428       while(isDECIMAL_DIGIT(ch) && value > -(-(MIN_INTEGER+10)/10+1)) {
429         /* Use positive / operands for portability */
430         --scale;
431         value = (value << 3) + (value << 1) - (ch - '0');
432         save_ch(ch);
433         ch = next_ch();
434       }
435       /* Check boundary case */
436       if (isDECIMAL_DIGIT(ch)
437           && value == -(-(MIN_INTEGER+10)/10+1)) {
438         digit = ch - '0';
439         if (digit <= -(MIN_INTEGER+10)%10) {
440         /* Use positive % operands for portability */
441           --scale;
442           value = (value << 3) + (value << 1) - digit;
443           save_ch(ch);
444           ch = next_ch();
445         }
446       }
447     }
448 
449     /* Additional digits can be discarded */
450     while(isDECIMAL_DIGIT(ch)) {
451       save_ch(ch);
452       ch = next_ch();
453     }
454   }
455 
456   /* Store results */
457   m_value = value;
458   m_scale = scale;
459 
460   /* Initialize for possible real */
461   e_sign = '+';
462   e_value = 0;
463   e_scale = 0;
464 
465   return(ch);
466 }
467 
add_e_sign(int ch)468 static int add_e_sign(int ch)
469 {
470   e_sign = ch;
471   save_ch(ch);
472   return(next_ch());
473 }
474 
add_exponent(int ch)475 static int add_exponent(int ch)
476 {
477   int32_t value, p_value;
478   int32_t scale = 0;
479   int digit;
480 
481   /* On entry, expect e_sign to be set to '+' or '-' */
482 
483   value = ch - '0';
484   save_ch(ch);
485   ch = next_ch();
486 
487   while(isDECIMAL_DIGIT(ch) && value < (MAX_INTEGER/10)) {
488     value = (value << 3) + (value << 1) + (ch - '0');
489     save_ch(ch);
490     ch = next_ch();
491   }
492 
493   p_value = value;
494   value = (e_sign == '-' ? -value : value);
495 
496   /* Handle additional digits.  Beyond the boundary case,
497    *   10*(MAX_INTEGER/10) <= |number| <= MAX_INTEGER
498    * just count the digits: the number is too large to
499    * represent as an integer.
500    */
501   if (isDECIMAL_DIGIT(ch)) {
502 
503     /* Examine boundary case */
504     if (p_value == (MAX_INTEGER/10)) {
505       digit = ch - '0';
506 
507       /* Must handle positive and negative values separately */
508       /*  for 2's complement arithmetic */
509       if (value > 0) {
510         if (digit <= MAX_INTEGER%10)
511           value = (value << 3) + (value << 1) + digit;
512         else
513           ++scale; /* Too big, just count it */
514       }
515       else {
516         /* Use positive % operands for portability */
517         if (digit <= -(MIN_INTEGER+10)%10)
518           value = (value << 3) + (value << 1) - digit;
519         else
520           ++scale; /* Too big, just count it */
521       }
522     }
523     else
524       ++scale;  /* Not boundary case, just count digit */
525 
526     save_ch(ch);
527     ch = next_ch();
528 
529     /* Continue scanning digits, but can't store any more */
530     while(isDECIMAL_DIGIT(ch)) {
531       ++scale;
532       save_ch(ch);
533       ch = next_ch();
534     }
535   }
536 
537   /* Store results */
538   e_value = value;
539   e_scale = scale;
540 
541   return(ch);
542 }
543 
add_radix(int ch)544 static int add_radix(int ch)
545 {
546   if (2 <= m_value && m_value <= 36 && m_scale == 0) {
547     r_base = m_value;
548     save_ch(ch);
549     return(next_ch());
550   }
551   else {
552     /* Radix invalid, complete a name token */
553     return(AAH_NAME(ch));
554   }
555 }
556 
add_r_digits(int ch)557 static int add_r_digits(int ch)
558 {
559   uint32_t value;
560   int32_t radix, scale;
561   int digit;
562 
563   /* NOTE:  The syntax of a radix number allows only for
564    * values of zero or more.  The value will be stored as
565    * a 32 bit integer, which PostScript then interprets
566    * as signed.  This means, for example, that the numbers:
567    *
568    *     8#37777777777
569    *    10#4294967295
570    *    16#FFFFFFFF
571    *    36#1Z141Z3
572    *
573    * are all interpreted as -1.  This routine implements this
574    * idea explicitly:  it accumulates the number's value
575    * as unsigned, then casts it to signed when done.
576    */
577 
578   /* Expect r_base to be initialized */
579   radix = r_base;
580   value = 0;
581   scale = 0;
582 
583   /* Scan leading zeroes */
584   while(ch == '0') {
585     save_ch(ch);
586     ch = next_ch();
587   }
588 
589   /* Handle first non-zero digit */
590   if ((digit=digit_value[ch]) < radix) {
591     value = digit;
592     save_ch(ch);
593     ch = next_ch();
594 
595     /* Add digits until boundary case reached */
596     while((digit=digit_value[ch]) < radix
597             && value < (MAX_INT32 / radix)) {
598       value = value * radix + digit;
599       save_ch(ch);
600       ch = next_ch();
601     };
602 
603     /* Scan remaining digits */
604     if ((digit=digit_value[ch]) < radix) {
605 
606       /* Examine boundary case ---
607        *   radix*(MAX_INT32/radix) <= number <= MAX_INT32
608        */
609       if (value == (MAX_INT32/radix) && digit <= MAX_INT32%radix)
610         value = value * radix + digit;
611       else
612         ++scale;
613 
614       /* Continue scanning digits, but can't store them */
615       save_ch(ch);
616       ch = next_ch();
617       while(digit_value[ch] < radix) {
618         ++scale;
619         save_ch(ch);
620         ch = next_ch();
621       }
622     }
623   }
624 
625   /* Store result */
626   r_value = (int32_t) value; /* result is signed */
627   r_scale = scale;
628 
629   return(ch);
630 }
631 
632 /* -------------------------------------------------------------------
633  * Complete a number; set token type and done flag.
634  * Put current input character back, if it is not white space.
635  */
636 
637 /* Done: Radix Number */
RADIX_NUMBER(int ch)638 static int RADIX_NUMBER(int ch)
639 {
640   back_ch_not_white(ch);
641   if (r_scale == 0) {
642     tokenValue.integer = r_value;
643     tokenType = TOKEN_INTEGER;
644   }
645   else {
646     tokenType = TOKEN_NAME;
647   }
648   return(DONE);
649 }
650 
651 /* Done: Integer */
INTEGER(int ch)652 static int INTEGER(int ch)
653 {
654   back_ch_not_white(ch);
655   if (m_scale == 0) {
656     tokenValue.integer = m_value;
657     tokenType = TOKEN_INTEGER;
658   }
659   else {
660     tokenValue.real = (DOUBLE)(m_value) * Exp10(m_scale);
661     tokenType = TOKEN_REAL;
662   }
663   return(DONE);
664 }
665 
666 /* Done: Real */
REAL(int ch)667 static int REAL(int ch)
668 {
669   DOUBLE temp;
670 
671   back_ch_not_white(ch);
672 
673   /* NOTE: ignore e_scale, since e_value alone will cause
674    *   exponent overflow if e_scale > 0.
675    */
676 
677   /* HAZARD: exponent overflow of intermediate result
678    * (e.g., in 370 floating point); this should not be a problem
679    * with IEEE floating point.  Reduce exponent overflow hazard by
680    * combining m_scale and e_value first, if they have different signs,
681    * or multiplying m_value and one of the other factors, if both
682    * m_scale and e_value are negative.
683    */
684   if ((m_scale >= 0 && e_value <= 0)
685       || (m_scale <= 0 && e_value >= 0)) {
686     tokenValue.real = (DOUBLE)(m_value) * Exp10(m_scale + e_value);
687   }
688   else {
689     temp = (DOUBLE)(m_value) * Exp10(m_scale);
690     tokenValue.real = temp * Exp10(e_value);
691   }
692 
693   tokenType = TOKEN_REAL;
694   return(DONE);
695 }
696 
697 
698 /* -------------------------------------------------------------------
699  * Assemble a hex string; set token type and done flag.
700  */
701 
702 /* Done: Hex String */
HEX_STRING(int ch)703 static int HEX_STRING(int ch)
704 {
705   int value;
706 
707   while(TRUE) {
708 
709     /* Process odd digit */
710     ch = next_ch();
711     if (!isHEX_DIGIT(ch)) {
712 
713       /* Skip white space */
714       while(isWHITE_SPACE(ch))
715         ch = next_ch();
716 
717       /* Check for terminator */
718       if (!isHEX_DIGIT(ch)) {
719         break;
720       }
721     }
722     value = digit_value[ch] << 4;
723 
724     /* Process even digit */
725     ch = next_ch();
726     if (!isHEX_DIGIT(ch)) {
727 
728       /* Skip white space */
729       while(isWHITE_SPACE(ch))
730         ch = next_ch();
731 
732       /* Check for terminator */
733       if (!isHEX_DIGIT(ch)) {
734         save_ch(value);
735         break;
736       }
737     }
738     save_ch(value + digit_value[ch]);
739   }
740 
741   /* Classify result, based on why loop ended */
742   if (ch == '>')
743     tokenType = TOKEN_HEX_STRING;
744   else {
745     /* save the invalid character for error reporting */
746     save_ch(ch);
747     tokenType = TOKEN_INVALID;
748   }
749 
750   return(DONE);
751 }
752 
753 /* -------------------------------------------------------------------
754  * Assemble a string; set token type and done flag
755  */
756 
757 /* Save a backslash-coded character in a string --
758  *
759  *   Store the proper character for special cases
760  *   "\b", "\f", "\n", "\r", and "\t".
761  *
762  *   Decode and store octal-coded character, up to
763  *   three octal digits, "\o", "\oo", and "\ooo".
764  *
765  *   The sequence "\<newline>" is a line continuation,
766  *   so consume both without storing anything.
767  *
768  *   The sequence "\<EOF>" is an error; exit without
769  *   storing anything and let the caller handle it.
770  *
771  *   For other characters, including the sequences
772  *   "\\", "\(", and "\)", simply store the second
773  *   character.
774  */
save_digraph(int ch)775 static void save_digraph(int ch)
776 {
777   int value;
778 
779   switch (ch) {
780 
781     case 'b':   /* backspace */
782       ch = '\b';
783       break;
784 
785     case 'f':   /* formfeed */
786       ch = '\f';
787       break;
788 
789     case 'n':   /* newline */
790       ch = '\n';
791       break;
792 
793     case 'r':   /* carriage return */
794       ch = '\r';
795       break;
796 
797     case 't':   /* horizontal tab */
798       ch = '\t';
799       break;
800 
801     case '\n':  /* line continuation -- consume it */
802       return;
803 
804     case '\r':  /* carriage return   -- consume it */
805       ch = next_ch();   /* look at next character, is it \n?  */
806       if (ch == '\n')  return;
807       back_ch(ch);      /* if not a line feed, then return it */
808       return;
809 
810     case EOF:   /* end of file -- forget it */
811       return;
812 
813   default:
814     /* scan up to three octal digits to get value */
815     if (isOCTAL_DIGIT(ch)) {
816       value = digit_value[ch];
817       ch = next_ch();
818       if (isOCTAL_DIGIT(ch)) {
819         value = (value << 3) + digit_value[ch];
820         ch = next_ch();
821         if (isOCTAL_DIGIT(ch))
822           value = (value << 3) + digit_value[ch];
823         else
824           back_ch(ch);
825       }
826       else
827         back_ch(ch);
828       ch = value;
829     }
830   }
831 
832   /* Found a character to save */
833   save_ch(ch);
834 }
835 
836 /* Done: String */
STRING(int ch)837 static int STRING(int ch)
838 {
839   int nest_level = 1;
840 
841   tokenType = TOKEN_STRING;
842 
843   do {
844 
845     ch = next_ch();
846     while(!isSTRING_SPECIAL(ch)) {
847       save_ch(ch);
848       ch = next_ch();
849     };
850 
851     switch (ch) {
852 
853      case '(':
854        ++nest_level;
855        save_ch(ch);
856        break;
857 
858      case ')':
859        if (--nest_level > 0)
860          save_ch(ch);
861        break;
862 
863      case '\\':
864           save_digraph(next_ch());
865         break;
866 
867      case '\r':
868         /* All carriage returns (\r) are turned into linefeeds (\n)*/
869           ch = next_ch();       /* get the next one, is it \n? */
870           if (ch != '\n') {     /* if not, then put it back.   */
871             back_ch(ch);
872           }
873           save_ch('\n');        /* in either case, save a linefeed */
874         break;
875 
876 
877      case EOF:
878        tokenType = TOKEN_INVALID;  /* Unterminated string */
879        nest_level = 0;
880        break;
881     }
882 
883   } while(nest_level > 0);
884 
885   return(DONE);
886 }
887 
888 
889 /* -------------------------------------------------------------------
890  * Assemble a name; set token type and done flag.
891  * Put current input character back, if it is not white space.
892  */
893 
894 /* Done: Name
895  *  (Safe version used to complete name tokens that
896  *   start out looking like something else).
897  */
898 
AAH_NAME(int ch)899 static int AAH_NAME(int ch)
900 {
901   do {
902     save_ch(ch);
903     ch = next_ch();
904   } while(isNAME(ch));
905 
906   back_ch_not_white(ch);
907   tokenType = TOKEN_NAME;
908   return(DONE);
909 }
910 
911 /* Done: Name */
NAME(int ch)912 static int NAME(int ch)
913 {
914   save_unsafe_ch(ch);
915   ch = next_ch();
916   if (isNAME(ch)) {
917     save_unsafe_ch(ch);
918     ch = next_ch();
919     if (isNAME(ch)) {
920       save_unsafe_ch(ch);
921       ch = next_ch();
922       if (isNAME(ch)) {
923         save_unsafe_ch(ch);
924         ch = next_ch();
925         if (isNAME(ch)) {
926           save_unsafe_ch(ch);
927           ch = next_ch();
928           if (isNAME(ch)) {
929             save_unsafe_ch(ch);
930             ch = next_ch();
931             if (isNAME(ch)) {
932               save_unsafe_ch(ch);
933               ch = next_ch();
934               while(isNAME(ch)) {
935                 save_ch(ch);
936                 ch = next_ch();
937               }
938             }
939           }
940         }
941       }
942     }
943   }
944 
945   back_ch_not_white(ch);
946   tokenType = TOKEN_NAME;
947   return(DONE);
948 }
949 
950 /* Done: Literal Name */
LITERAL_NAME(int ch)951 static int LITERAL_NAME(int ch)
952 {
953   if (isNAME(ch)) {
954     save_unsafe_ch(ch);
955     ch = next_ch();
956     if (isNAME(ch)) {
957       save_unsafe_ch(ch);
958       ch = next_ch();
959       if (isNAME(ch)) {
960         save_unsafe_ch(ch);
961         ch = next_ch();
962         if (isNAME(ch)) {
963           save_unsafe_ch(ch);
964           ch = next_ch();
965           if (isNAME(ch)) {
966             save_unsafe_ch(ch);
967             ch = next_ch();
968             if (isNAME(ch)) {
969               save_unsafe_ch(ch);
970               ch = next_ch();
971               while(isNAME(ch)) {
972                 save_ch(ch);
973                 ch = next_ch();
974               }
975             }
976           }
977         }
978       }
979     }
980   }
981 
982   back_ch_not_white(ch);
983   tokenType = TOKEN_LITERAL_NAME;
984   return(DONE);
985 }
986 
987 /* Done: immediate Name */
IMMED_NAME(int ch)988 static int IMMED_NAME(int ch)
989 {
990   ch = next_ch();
991   if (isNAME(ch)) {
992     save_unsafe_ch(ch);
993     ch = next_ch();
994     if (isNAME(ch)) {
995       save_unsafe_ch(ch);
996       ch = next_ch();
997       if (isNAME(ch)) {
998         save_unsafe_ch(ch);
999         ch = next_ch();
1000         if (isNAME(ch)) {
1001           save_unsafe_ch(ch);
1002           ch = next_ch();
1003           if (isNAME(ch)) {
1004             save_unsafe_ch(ch);
1005             ch = next_ch();
1006             if (isNAME(ch)) {
1007               save_unsafe_ch(ch);
1008               ch = next_ch();
1009               while(isNAME(ch)) {
1010                 save_ch(ch);
1011                 ch = next_ch();
1012               }
1013             }
1014           }
1015         }
1016       }
1017     }
1018   }
1019 
1020   back_ch_not_white(ch);
1021   tokenType = TOKEN_IMMED_NAME;
1022   return(DONE);
1023 }
1024 
1025 /* Done: Name found while looking for something else */
OOPS_NAME(int ch)1026 static int OOPS_NAME(int ch)
1027 {
1028   back_ch_not_white(ch);
1029   tokenType = TOKEN_NAME;
1030   return(DONE);
1031 }
1032 
1033 
1034 /* -------------------------------------------------------------------
1035  * Complete a miscellaneous token; set token type and done flag.
1036  */
1037 
1038 /* Done: Unmatched Right Angle-Bracket */
RIGHT_ANGLE(int ch)1039 static int RIGHT_ANGLE(int ch)
1040 {
1041   tokenType = TOKEN_RIGHT_ANGLE;
1042   return(DONE);
1043 }
1044 
1045 /* Done: Unmatched Right Parenthesis */
RIGHT_PAREN(int ch)1046 static int RIGHT_PAREN(int ch)
1047 {
1048   tokenType = TOKEN_RIGHT_PAREN;
1049   return(DONE);
1050 }
1051 
1052 /* Done: Left Brace */
LEFT_BRACE(int ch)1053 static int LEFT_BRACE(int ch)
1054 {
1055   tokenType = TOKEN_LEFT_BRACE;
1056   return(DONE);
1057 }
1058 
1059 /* Done: Right Brace */
RIGHT_BRACE(int ch)1060 static int RIGHT_BRACE(int ch)
1061 {
1062   tokenType = TOKEN_RIGHT_BRACE;
1063   return(DONE);
1064 }
1065 
1066 /* Done: Left Bracket */
LEFT_BRACKET(int ch)1067 static int LEFT_BRACKET(int ch)
1068 {
1069   save_unsafe_ch(ch);
1070   tokenType = TOKEN_LEFT_BRACKET;
1071   return(DONE);
1072 }
1073 
1074 /* Done: Right Bracket */
RIGHT_BRACKET(int ch)1075 static int RIGHT_BRACKET(int ch)
1076 {
1077   save_unsafe_ch(ch);
1078   tokenType = TOKEN_RIGHT_BRACKET;
1079   return(DONE);
1080 }
1081 
1082 /* Done: Break */
BREAK_SIGNAL(int ch)1083 static int BREAK_SIGNAL(int ch)
1084 {
1085   tokenType = TOKEN_BREAK;
1086   return(DONE);
1087 }
1088 
1089 /* Done: No Token Found */
NO_TOKEN(int ch)1090 static int NO_TOKEN(int ch)
1091 {
1092   tokenType = TOKEN_EOF;
1093   return(DONE);
1094 }
1095 
1096 
1097 /*
1098  * -------------------------------------------------------------------
1099  *  scan_token -- scan one token from the input.  It uses a simple
1100  *    finite state machine to recognize token classes.
1101  *
1102  *  The input is from a file.
1103  *
1104  *  On entry --
1105  *
1106  *    inputP -> input PostScript object, a file.
1107  *    tokenStartP -> buffer in VM for accumulating the token.
1108  *    tokenMaxP -> last character in the token buffer
1109  *
1110  *  On exit --
1111  *
1112  *    tokenLength = number of characters in the token
1113  *    tokenTooLong = TRUE if the token did not fit in the buffer
1114  *    tokenType = code for the type of token parsed.
1115  *    tokenValue = converted value of a numeric token.
1116  *
1117  *
1118  * -------------------------------------------------------------------
1119  */
scan_token(psobj * inputP)1120 void scan_token(psobj *inputP)
1121 {
1122   int ch;
1123   unsigned char *stateP = s0;
1124   unsigned char entry;
1125   int (*actionP)();
1126 
1127   /* Define input source */
1128   inputFileP = inputP->data.fileP;
1129   if (inputFileP == NULL)  {
1130     tokenType = TOKEN_EOF;
1131     return;
1132   }
1133 
1134   /* Ensure enough space for most cases
1135    * (so we don't have to keep checking)
1136    * The length needs to cover the maximum number
1137    * of save_unsafe_ch() calls that might be executed.
1138    * That number is 11 (a sign and 10 decimal digits, e.g.,
1139    * when scanning -2147483648), but use MAX_NAME_LEN
1140    * in case someone changes that without checking.
1141    */
1142   if (vm_free_bytes() < (MAX_NAME_LEN)) {
1143      if (!(vm_init())) {
1144         tokenLength = 0;
1145         tokenTooLong = TRUE;
1146         tokenType = TOKEN_NONE;
1147         tokenValue.integer = 0;
1148         return;
1149      }
1150   }
1151 
1152   tokenStartP = vm_next_byte();
1153 
1154   /* Reset token */
1155   tokenCharP = tokenStartP;
1156   tokenTooLong = FALSE;
1157 
1158   /* Scan one token */
1159   ch = next_ch();
1160   do {
1161     entry = stateP[ch];
1162     stateP = classActionTable[entry].nextStateP;
1163     actionP = classActionTable[entry].actionRoutineP;
1164     ch = (*actionP)(ch);
1165   } while(ch != DONE);
1166 
1167 
1168   /* Return results */
1169   tokenLength = tokenCharP - tokenStartP;
1170 }
1171