1 /*
2  * Motif
3  *
4  * Copyright (c) 1987-2012, The Open Group. All rights reserved.
5  *
6  * These libraries and programs are free software; you can
7  * redistribute them and/or modify them under the terms of the GNU
8  * Lesser General Public License as published by the Free Software
9  * Foundation; either version 2 of the License, or (at your option)
10  * any later version.
11  *
12  * These libraries and programs are distributed in the hope that
13  * they will be useful, but WITHOUT ANY WARRANTY; without even the
14  * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15  * PURPOSE. See the GNU Lesser General Public License for more
16  * details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with these librararies and programs; if not, write
20  * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21  * Floor, Boston, MA 02110-1301 USA
22 */
23 #ifdef REV_INFO
24 #ifndef lint
25 static char rcsid[] = "$TOG: UilLexAna.c /main/14 1997/03/12 15:10:52 dbl $"
26 #endif
27 #endif
28 
29 #ifdef HAVE_CONFIG_H
30 #include <config.h>
31 #endif
32 
33 
34 /*
35 **++
36 **  FACILITY:
37 **
38 **      User Interface Language Compiler (UIL)
39 **
40 **  ABSTRACT:
41 **
42 **      This module hold the routines that build tokens for the UIL
43 **	compiler.
44 **
45 **--
46 **/
47 
48 
49 /*
50 **
51 **  INCLUDE FILES
52 **
53 **/
54 
55 #include <Xm/Xm.h>
56 /* I think this one should be public too, it's not the case right now,
57    and I don't want to include XmP.h here - dd */
58 extern char *_XmStringGetCurrentCharset ();
59 #include <Xm/XmosP.h>	/* Need this for MB_CUR_MAX */
60 
61 #include <Mrm/MrmosI.h> /* Need this for _MrmOSSetLocale. */
62 
63 #include "UilDefI.h"
64 #include <ctype.h>
65 
66 #ifndef X_NOT_STDC_ENV
67 #include <stdlib.h>
68 #else
69 double atof();
70 #endif
71 
72 #include <errno.h>	/* needed to support conversion functions */
73 #ifdef X_NOT_STDC_ENV
74 extern int errno;
75 #endif
76 
77 /*
78 **
79 **  TABLE OF CONTENTS
80 **
81 */
82 
83 /*
84 ** FORWARD DECLARATIONS
85 */
86 
87 typedef struct	_lex_buffer_type
88 {
89     struct _lex_buffer_type	*az_next_buffer;
90     unsigned char		c_text[ 1 ];
91 } lex_buffer_type;
92 
93 
94 static lex_buffer_type *get_lex_buffer  _ARGUMENTS(( lex_buffer_type *az_current_lex_buffer ));
95 #if debug_version
96 static void dump_token  _ARGUMENTS(( lex_buffer_type *az_current_lex_buffer , int l_lex_pos ));
97 #endif
98 
99 
100 /*
101 **
102 **  EXTERNAL DEFINITIONS
103 **
104 */
105 
106 
107 /*    These values are set by Lex initiailize routine or if the charset	    */
108 /*    option is specified on the module declaration			    */
109 externaldef(uil_comp_glbl) int			Uil_lex_l_user_default_charset;
110 externaldef(uil_comp_glbl) sym_value_entry_type *Uil_lex_az_charset_entry;
111 externaldef(uil_comp_glbl) int 			Uil_lex_l_localized;
112 
113 
114 /*  This is the most recent character set recognized by the grammar.  It    */
115 /*  allows the use of the CHARACTER_SET function to prefix general string   */
116 /*  literals.								    */
117 externaldef(uil_comp_glbl) int			 Uil_lex_l_charset_specified;
118 externaldef(uil_comp_glbl) int			 Uil_lex_l_literal_charset;
119 externaldef(uil_comp_glbl) sym_value_entry_type *Uil_lex_az_literal_charset;
120 /* %COMPLETE */
121 externaldef(uil_comp_glbl) int Uil_characters_read;
122 
123 /*    For portability, declare all yy* variables as extern.    */
124 
125 extern yystype			yylval;
126 extern yystype			prev_yylval;
127 
128 /*    This stack entry is used as the result of epsilon productions.   */
129 
130 extern yystype			gz_yynullval;
131 
132 
133 /*
134  * Retain comments as scanned
135  */
136 #define INITIAL_COMMENT_SIZE 8000
137 #define INCR_COMMENT_SIZE 2000
138 
139 
140 externaldef(uil_comp_glbl) char *comment_text;
141 externaldef(uil_comp_glbl) int comment_size;
142 
143 static int last_token_seen = 0;
144 
145 
146 
147 /*
148 **
149 **  MACRO DEFINITIONS
150 **
151 **/
152 
153 /*
154 **  Each of the 256 possible input characters has an associated class.
155 **  These defines gives names to the classes.
156 */
157 
158 #define	    class_blank	    0		/* white space */
159 #define     class_punc	    1		/* punctuation characters */
160 #define     class_name	    2		/* alphabetics - {eEntvbrf} + {$ _} */
161 #define     class_e	    3		/* e */
162 #define     class_escape    4		/* n t v b r f */
163 #define     class_ff	    5		/* form feed */
164 #define     class_digit	    6		/* 0..9 */
165 #define     class_dot	    7		/* . */
166 #define	    class_sign	    8		/* - + */
167 #define     class_quote	    9		/* ' */
168 #define     class_bslash    10		/* \ */
169 #define     class_slash	    11		/* / */
170 #define     class_star	    12		/* * */
171 #define     class_exclam    13		/* ! */
172 #define     class_eol	    14		/* end of line - ascii nul */
173 #define     class_rest	    15		/* remaining printable characters */
174 #define     class_illegal   16		/* remaining non printable characters */
175 #define     class_pound	    17		/* # */
176 #define     class_dquote    18		/* " */
177 #define	    class_langle    19		/* < */
178 #define	    class_rangle    20		/* > */
179 #define     class_highbit   21		/* high order bit set */
180 #define     max_class	    21
181 
182 /*
183 **   states within the token table
184 */
185 
186 #define	    state_initial   0		/* initial state of automaton */
187 #define	    state_name	    1		/* looking for a name */
188 #define	    state_integer   2		/* looking for a integer */
189 #define	    state_real_1    3		/* looking for a real */
190 #define	    state_real_2    4		/* looking for a real */
191 #define	    state_exp_1	    5		/* looking for a real with exponent */
192 #define	    state_exp_2	    6		/* looking for a real with exponent */
193 #define	    state_exp_3	    7		/* looking for a real with exponent */
194 #define	    state_str_1     8		/* looking for a string */
195 #define	    state_str_2     9		/* looking for a string */
196 #define	    state_str_3     10		/* looking for a string */
197 #define	    state_comment_1 11		/* looking for a comment */
198 #define	    state_comment_2 12		/* looking for a comment */
199 #define	    state_comment_3 13		/* looking for a comment */
200 #define	    state_comment_4 14		/* looking for a comment */
201 #define	    state_eat_rest  15		/* eat up rest characters */
202 #define	    state_gstr_1    16		/* looking for a general string */
203 #define	    state_gstr_2    17		/* looking for a general string */
204 #define	    state_gstr_3    18		/* looking for a general string */
205 #define	    state_gstr_4    19		/* looking for a general string */
206 #define	    state_gstr_5    20		/* looking for a general string */
207 #define	    state_langle    21		/* looking for << */
208 #define	    state_rangle    22		/* looking for >> */
209 #define	    max_state	    22
210 
211 /*
212 **   actions in token table
213 **       all actions requiring a move of the current character have
214 **       a negative value
215 */
216 
217 #define	    NEGATIVE 0x40
218 
219 #define     min_action	    1
220 #define	    move_advance    (NEGATIVE | 1)	/* save character - goto next state */
221 #define	    advance	    1		/* goto next state */
222 #define     move_final      (NEGATIVE | 2)	/* save character - found token */
223 #define     final           2		/* found token */
224 #define     move_error      (NEGATIVE | 3)	/* save character - found error */
225 #define     error           3		/* found error */
226 #define	    move_special    (NEGATIVE | 4)		/* special action */
227 #define	    special	    4		/* save character - special action */
228 #define	    reset	    5		/* reset the analyzer */
229 #define     final_comment   (NEGATIVE | 6)       /* save whole comment  RAP */
230 
231 /*
232 **  errors encoded in the token table
233 */
234 
235 #define	    min_error	    0
236 #define	    bad_prefix      0		/* junk starts a token */
237 #define	    error_max       0
238 
239 /*
240 **  final states in the token table
241 */
242 
243 #define     token_min		1
244 #define	    token_punc		1	/* punctuation character */
245 #define     token_eol		2	/* end of line */
246 #define     token_real		3	/* real number */
247 #define     token_integer	4	/* integer */
248 #define     token_name		5	/* identifier */
249 #define     token_ustring	6	/* unterminated string */
250 #define     token_string	7	/* string */
251 #define     token_ff		8	/* form feed */
252 #define     token_ugstr		9	/* unterminated general string */
253 #define     token_gstr		10	/* general string */
254 #define     token_punc2		11	/* 2 character punctuation */
255 #define     token_comment       12      /* comment block RAP */
256 #define	    token_lstr		13	/* localized general string */
257 #define     token_max		13
258 
259 
260 /*
261 **  special actions that take place
262 */
263 
264 #define	    control_char    1		/* unprintable character in construct */
265 #define	    start_bslash    2		/* start of \ construct in string */
266 #define	    ignore_bslash   3		/* \ not followed by correct sequence */
267 #define	    found_digit	    4		/* digit in \ sequence */
268 #define	    end_digits	    5		/* end of \digit...\ sequence */
269 #define	    insert_char	    6		/* end of \x sequence */
270 #define	    missing_bslash  7		/* \ at end of \digit...\ missing */
271 #define	    string_wrap	    8		/* string wraps to next line */
272 #define	    comment_wrap    9		/* comment wraps to next line */
273 #define	    charset_gstr    10		/* found char set for general string */
274 #define	    nocharset_gstr  11		/* default char set for general string */
275 #define	    highbit_char    12		/* default char set for general string */
276 
277 /*
278 **  define backup field values of a token table cell
279 */
280 
281 #define	    not_used	    0		/* value not defined for this action */
282 #define	    backup_0	    0		/* no backup required */
283 #define	    backup_1	    1		/* 1 character backup needed */
284 #define	    backup_2	    2		/* 2 character backup needed */
285 #define	    backup_3	    3		/* 3 character backup needed */
286 
287 /*
288 **  form of each cell in the token table
289 */
290 
291 
292 typedef struct
293 {
294     char	action;			    /* action to be taken */
295     char        next_state;		    /* next state */
296 					    /* terminal found */
297 					    /* error found */
298     char	backup;			    /* number of character to backup */
299 					    /* action for special cases */
300     char	unused;
301 } cell;
302 
303 /*
304 **
305 **  OWN Storage for TOKEN BUILDER
306 **
307 */
308 
309 #define l_max_lex_buffer_pos  127
310 
311 static	lex_buffer_type  *az_first_lex_buffer;
312 
313 /*
314 **  the actual token table
315 */
316 
317 static cell XmConst token_table[ max_state+1][ max_class+1] =
318   {
319     { /* state_initial */
320 	/* class_blank   */ { reset,		state_initial,	    not_used },
321 	/* class_punc    */ { move_final,	token_punc,	    backup_0 },
322 	/* class_name    */ { move_advance,	state_name,	    not_used },
323 	/* class_e       */ { move_advance,	state_name,	    not_used },
324 	/* class_escape  */ { move_advance,	state_name,	    not_used },
325 	/* class_ff      */ { final,		token_ff,	    backup_0 },
326 	/* class_digit   */ { move_advance,	state_integer,	    not_used },
327 	/* class_dot     */ { move_advance,	state_real_1,	    not_used },
328 	/* class_sign    */ { move_final,	token_punc,	    backup_0 },
329 	/* class_quote   */ { advance,		state_str_1,	    not_used },
330 	/* class_bslash  */ { move_advance,	state_eat_rest,	    not_used },
331 	/* class_slash   */ { move_advance,	state_comment_1,    not_used },
332 	/* class_star    */ { move_final,  	token_punc,    	    backup_0 },
333 	/* class_exclam  */ { move_advance,	state_comment_4,    not_used },
334 	/* class_eol     */ { final,		token_eol,	    backup_0 },
335 	/* class_rest    */ { move_advance,	state_eat_rest,	    not_used },
336 	/* class_illegal */ { special,		state_initial,	control_char },
337 	/* class_pound	 */ { move_final,	token_punc,	    backup_0 },
338 	/* class_dquote  */ { special,		state_gstr_3, nocharset_gstr },
339 	/* class_langle  */ { move_advance,	state_langle,       not_used },
340 	/* class_rangle  */ { move_advance,	state_rangle,       not_used },
341 	/* class_highbit */ { move_advance,	state_eat_rest,	    not_used },
342     },
343     { /* state_name */
344 	/* class_blank   */ { final,		token_name,	    backup_0 },
345 	/* class_punc    */ { move_final,	token_name,	    backup_1 },
346 	/* class_name    */ { move_advance,	state_name,	    not_used },
347 	/* class_e       */ { move_advance,	state_name,	    not_used },
348 	/* class_escape  */ { move_advance,	state_name,	    not_used },
349 	/* class_ff      */ { move_final,	token_name,	    backup_1 },
350 	/* class_digit   */ { move_advance,	state_name,	    not_used },
351 	/* class_dot     */ { move_final,	token_name,	    backup_1 },
352 	/* class_sign    */ { move_final,	token_name,	    backup_1 },
353 	/* class_quote   */ { move_final,	token_name,	    backup_1 },
354 	/* class_bslash  */ { move_final,	token_name,	    backup_1 },
355 	/* class_slash   */ { move_final,	token_name,	    backup_1 },
356 	/* class_star    */ { move_final,	token_name,	    backup_1 },
357 	/* class_exclam  */ { move_final,	token_name,	    backup_1 },
358 	/* class_eol     */ { move_final,	token_name,	    backup_1 },
359 	/* class_rest    */ { move_final,	token_name,	    backup_1 },
360 	/* class_illegal */ { move_final,	token_name,	    backup_1 },
361 	/* class_pound   */ { move_final,	token_name,	    backup_1 },
362 	/* class_dquote  */ { move_final,	token_name,	    backup_1 },
363 	/* class_langle  */ { move_final,  	token_name,         backup_1 },
364 	/* class_rangle  */ { move_final,  	token_name,         backup_1 },
365 	/* class_highbit */ { move_final,	token_name,	    backup_1 },
366     },
367     { /* state_integer */
368 	/* class_blank   */ { final,		token_integer,	    backup_0 },
369 	/* class_punc    */ { move_final,	token_integer,	    backup_1 },
370 	/* class_name    */ { move_final,	token_integer,	    backup_1 },
371 	/* class_e       */ { move_final,	token_integer,	    backup_1 },
372 	/* class_escape  */ { move_final,	token_integer,	    backup_1 },
373 	/* class_ff      */ { move_final,	token_integer,	    backup_1 },
374 	/* class_digit   */ { move_advance,	state_integer,	    not_used },
375 	/* class_dot     */ { move_advance,	state_real_2,	    not_used },
376 	/* class_sign    */ { move_final,	token_integer,	    backup_1 },
377 	/* class_quote   */ { move_final,	token_integer,	    backup_1 },
378 	/* class_bslash  */ { move_final,	token_integer,	    backup_1 },
379 	/* class_slash   */ { move_final,	token_integer,	    backup_1 },
380 	/* class_star    */ { move_final,	token_integer,	    backup_1 },
381 	/* class_exclam  */ { move_final,	token_integer,	    backup_1 },
382 	/* class_eol     */ { move_final,	token_integer,	    backup_1 },
383 	/* class_rest    */ { move_final,	token_integer,	    backup_1 },
384 	/* class_illegal */ { move_final,	token_integer,	    backup_1 },
385 	/* class_pound   */ { move_final,	token_integer,	    backup_1 },
386 	/* class_dquote  */ { move_final,	token_integer,	    backup_1 },
387 	/* class_langle  */ { move_final,	token_integer,	    backup_1 },
388 	/* class_rangle  */ { move_final,	token_integer,	    backup_1 },
389 	/* class_highbit */ { move_final,	token_integer,	    backup_1 },
390     },
391     { /* state_real_1 */
392 	/* class_blank   */ { move_advance,	state_eat_rest,	    not_used },
393 	/* class_punc    */ { move_error,	bad_prefix,	    backup_1 },
394 	/* class_name    */ { move_error,	bad_prefix,	    backup_1 },
395 	/* class_e       */ { move_error,	bad_prefix,	    backup_1 },
396 	/* class_escape  */ { move_error,	bad_prefix,	    backup_1 },
397 	/* class_ff      */ { special,		state_real_1,	control_char },
398 	/* class_digit   */ { move_advance,	state_real_2,	    not_used },
399 	/* class_dot     */ { move_error,	bad_prefix,	    backup_1 },
400 	/* class_sign    */ { move_error,	bad_prefix,	    backup_1 },
401 	/* class_quote   */ { move_error,	bad_prefix,	    backup_1 },
402 	/* class_bslash  */ { move_error,	bad_prefix,	    backup_1 },
403 	/* class_slash   */ { move_error,	bad_prefix,	    backup_1 },
404 	/* class_star    */ { move_error,	bad_prefix,	    backup_1 },
405 	/* class_exclam  */ { move_error,	bad_prefix,	    backup_1 },
406 	/* class_eol     */ { move_error,	bad_prefix,	    backup_1 },
407 	/* class_rest    */ { move_advance,	state_eat_rest,	    not_used },
408 	/* class_illegal */ { special,		state_real_1,	control_char },
409 	/* class_pound	 */ { move_error,	bad_prefix,	    backup_1 },
410 	/* class_dquote  */ { move_error,	bad_prefix,	    backup_1 },
411 	/* class_langle  */ { move_error,	bad_prefix,	    backup_1 },
412 	/* class_rangle  */ { move_error,	bad_prefix,	    backup_1 },
413 	/* class_highbit */ { move_advance,	state_eat_rest,	    not_used },
414     },
415     { /* state_real_2 */
416 	/* class_blank   */ { final,		token_real,	    backup_0 },
417 	/* class_punc    */ { move_final,	token_real,	    backup_1 },
418 	/* class_name    */ { move_final,	token_real,	    backup_1 },
419 	/* class_e       */ { move_advance,	state_exp_1,	    not_used },
420 	/* class_escape  */ { move_final,	token_real,	    backup_1 },
421 	/* class_ff      */ { move_final,	token_real,	    backup_1 },
422 	/* class_digit   */ { move_advance,	state_real_2,	    not_used },
423 	/* class_dot     */ { move_final,	token_real,	    backup_1 },
424 	/* class_sign    */ { move_final,	token_real,	    backup_1 },
425 	/* class_quote   */ { move_final,	token_real,	    backup_1 },
426 	/* class_bslash  */ { move_final,	token_real,	    backup_1 },
427 	/* class_slash   */ { move_final,	token_real,	    backup_1 },
428 	/* class_star    */ { move_final,	token_real,	    backup_1 },
429 	/* class_exclam  */ { move_final,	token_real,	    backup_1 },
430 	/* class_eol     */ { move_final,	token_real,	    backup_1 },
431 	/* class_rest    */ { move_final,	token_real,	    backup_1 },
432 	/* class_illegal */ { move_final,	token_real,	    backup_1 },
433 	/* class_pound   */ { move_final,	token_real,	    backup_1 },
434 	/* class_dquote  */ { move_final,	token_real,	    backup_1 },
435 	/* class_langle  */ { move_final,	token_real,	    backup_1 },
436 	/* class_rangle  */ { move_final,	token_real,	    backup_1 },
437 	/* class_highbit */ { move_final,	token_real,	    backup_1 },
438     },
439     { /* state_exp_1 */
440 	/* class_blank   */ { move_final,	token_real,	    backup_2 },
441 	/* class_punc    */ { move_final,	token_real,	    backup_2 },
442 	/* class_name    */ { move_final,	token_real,	    backup_2 },
443 	/* class_e       */ { move_final,	token_real,	    backup_2 },
444 	/* class_escape  */ { move_final,	token_real,	    backup_2 },
445 	/* class_ff      */ { move_final,	token_real,	    backup_2 },
446 	/* class_digit   */ { move_advance,	state_exp_3,	    not_used },
447 	/* class_dot     */ { move_final,	token_real,	    backup_2 },
448 	/* class_sign    */ { move_advance,	state_exp_2,	    not_used },
449 	/* class_quote   */ { move_final,	token_real,	    backup_2 },
450 	/* class_bslash  */ { move_final,	token_real,	    backup_2 },
451 	/* class_slash   */ { move_final,	token_real,	    backup_2 },
452 	/* class_star    */ { move_final,	token_real,	    backup_2 },
453 	/* class_exclam  */ { move_final,	token_real,	    backup_2 },
454 	/* class_eol     */ { move_final,	token_real,	    backup_2 },
455 	/* class_rest    */ { move_final,	token_real,	    backup_2 },
456 	/* class_illegal */ { move_final,	token_real,	    backup_2 },
457 	/* class_pound   */ { move_final,	token_real,	    backup_2 },
458 	/* class_dquote  */ { move_final,	token_real,	    backup_2 },
459 	/* class_langle  */ { move_final,	token_real,	    backup_2 },
460 	/* class_rangle  */ { move_final,	token_real,	    backup_2 },
461 	/* class_highbit */ { move_final,	token_real,	    backup_2 },
462     },
463     { /* state_exp_2 */
464 	/* class_blank   */ { move_final,	token_real,	    backup_3 },
465 	/* class_punc    */ { move_final,	token_real,	    backup_3 },
466 	/* class_name    */ { move_final,	token_real,	    backup_3 },
467 	/* class_e       */ { move_final,	token_real,	    backup_3 },
468 	/* class_escape  */ { move_final,	token_real,	    backup_3 },
469 	/* class_ff      */ { move_final,	token_real,	    backup_3 },
470 	/* class_digit   */ { move_advance,	state_exp_3,	    not_used },
471 	/* class_dot     */ { move_final,	token_real,	    backup_3 },
472 	/* class_sign    */ { move_final,	token_real,	    backup_3 },
473 	/* class_quote   */ { move_final,	token_real,	    backup_3 },
474 	/* class_bslash  */ { move_final,	token_real,	    backup_3 },
475 	/* class_slash   */ { move_final,	token_real,	    backup_3 },
476 	/* class_star    */ { move_final,	token_real,	    backup_3 },
477 	/* class_exclam  */ { move_final,	token_real,	    backup_3 },
478 	/* class_eol     */ { move_final,	token_real,	    backup_3 },
479 	/* class_rest    */ { move_final,	token_real,	    backup_3 },
480 	/* class_illegal */ { move_final,	token_real,	    backup_3 },
481 	/* class_pound   */ { move_final,	token_real,	    backup_3 },
482 	/* class_dquote  */ { move_final,	token_real,	    backup_3 },
483 	/* class_langle  */ { move_final,	token_real,	    backup_3 },
484 	/* class_rangle  */ { move_final,	token_real,	    backup_3 },
485 	/* class_highbit */ { move_final,	token_real,	    backup_3 },
486     },
487     { /* state_exp_3 */
488 	/* class_blank   */ { final,		token_real,	    backup_0 },
489 	/* class_punc    */ { move_final,	token_real,	    backup_1 },
490 	/* class_name    */ { move_final,	token_real,	    backup_1 },
491 	/* class_e       */ { move_final,	token_real,	    backup_1 },
492 	/* class_escape  */ { move_final,	token_real,	    backup_1 },
493 	/* class_ff      */ { move_final,	token_real,	    backup_1 },
494 	/* class_digit   */ { move_advance,	state_exp_3,	    not_used },
495 	/* class_dot     */ { move_final,	token_real,	    backup_1 },
496 	/* class_sign    */ { move_final,	token_real,	    backup_1 },
497 	/* class_quote   */ { move_final,	token_real,	    backup_1 },
498 	/* class_bslash  */ { move_final,	token_real,	    backup_1 },
499 	/* class_slash   */ { move_final,	token_real,	    backup_1 },
500 	/* class_star    */ { move_final,	token_real,	    backup_1 },
501 	/* class_exclam  */ { move_final,	token_real,	    backup_1 },
502 	/* class_eol     */ { move_final,	token_real,	    backup_1 },
503 	/* class_rest    */ { move_final,	token_real,	    backup_1 },
504 	/* class_illegal */ { move_final,	token_real,	    backup_1 },
505 	/* class_pound   */ { move_final,	token_real,	    backup_1 },
506 	/* class_dquote  */ { move_final,	token_real,	    backup_1 },
507 	/* class_langle  */ { move_final,	token_real,	    backup_1 },
508 	/* class_rangle  */ { move_final,	token_real,	    backup_1 },
509 	/* class_highbit */ { move_final,	token_real,	    backup_1 },
510     },
511     { /* state_str_1 */
512 	/* class_blank   */ { move_advance,	state_str_1,	    not_used },
513 	/* class_punc    */ { move_advance,	state_str_1,	    not_used },
514 	/* class_name    */ { move_advance,	state_str_1,	    not_used },
515 	/* class_e       */ { move_advance,	state_str_1,	    not_used },
516 	/* class_escape  */ { move_advance,	state_str_1,	    not_used },
517 	/* class_ff      */ { special,		state_str_1,	control_char },
518 	/* class_digit   */ { move_advance,	state_str_1,	    not_used },
519 	/* class_dot     */ { move_advance,	state_str_1,	    not_used },
520 	/* class_sign    */ { move_advance,	state_str_1,	    not_used },
521 	/* class_quote   */ { final,		token_string,	    backup_0 },
522 	/* class_bslash  */ { special,		state_str_2,	start_bslash },
523 	/* class_slash   */ { move_advance,	state_str_1,	    not_used },
524 	/* class_star    */ { move_advance,	state_str_1,	    not_used },
525 	/* class_exclam  */ { move_advance,	state_str_1,	    not_used },
526 	/* class_eol     */ { move_final,	token_ustring,	    backup_1 },
527 	/* class_rest    */ { move_advance,	state_str_1,	    not_used },
528 	/* class_illegal */ { special,		state_str_1,	control_char },
529 	/* class_pound   */ { move_advance,	state_str_1,	    not_used },
530 	/* class_dquote  */ { move_advance,	state_str_1,	    not_used },
531 	/* class_langle  */ { move_advance,	state_str_1,	    not_used },
532 	/* class_rangle  */ { move_advance,	state_str_1,	    not_used },
533 	/* class_highbit */ { move_advance,	state_str_1,	    not_used },
534     },
535     { /* state_str_2 */
536 	/* class_blank   */ { move_special,	state_str_1,   ignore_bslash },
537 	/* class_punc    */ { move_special,	state_str_1,   ignore_bslash },
538 	/* class_name    */ { move_special,	state_str_1,   ignore_bslash },
539 	/* class_e       */ { move_special,	state_str_1,   ignore_bslash },
540 	/* class_escape  */ { special,		state_str_1,     insert_char },
541 	/* class_ff      */ { special,		state_str_1,    control_char },
542 	/* class_digit   */ { special,		state_str_3,	 found_digit },
543 	/* class_dot     */ { move_special,	state_str_1,   ignore_bslash },
544 	/* class_sign    */ { move_special,	state_str_1,   ignore_bslash },
545 	/* class_quote   */ { special,		state_str_1,     insert_char },
546 	/* class_bslash  */ { special,		state_str_1,     insert_char },
547 	/* class_slash   */ { move_special,	state_str_1,   ignore_bslash },
548 	/* class_star    */ { move_special,	state_str_1,   ignore_bslash },
549 	/* class_exclam  */ { move_special,	state_str_1,   ignore_bslash },
550 	/* class_eol     */ { special,		state_str_1,     string_wrap },
551 	/* class_rest    */ { move_special,	state_str_1,   ignore_bslash },
552 	/* class_illegal */ { special,		state_str_1,    control_char },
553 	/* class_pound   */ { move_special,	state_str_1,   ignore_bslash },
554 	/* class_dquote  */ { special,		state_str_1,     insert_char },
555 	/* class_langle  */ { move_special,	state_str_1,   ignore_bslash },
556 	/* class_rangle  */ { move_special,	state_str_1,   ignore_bslash },
557 	/* class_highbit */ { move_special,	state_str_1,   ignore_bslash },
558     },
559     { /* state_str_3 */
560 	/* class_blank   */ { move_special,	state_str_1,  missing_bslash },
561 	/* class_punc    */ { move_special,	state_str_1,  missing_bslash },
562 	/* class_name    */ { move_special,	state_str_1,  missing_bslash },
563 	/* class_e       */ { move_special,	state_str_1,  missing_bslash },
564 	/* class_escape  */ { move_special,	state_str_1,  missing_bslash },
565 	/* class_ff      */ { special,		state_str_1,    control_char },
566 	/* class_digit   */ { special,		state_str_3,	 found_digit },
567 	/* class_dot     */ { move_special,	state_str_1,  missing_bslash },
568 	/* class_sign    */ { move_special,	state_str_1,  missing_bslash },
569 	/* class_quote   */ { move_special,	state_str_1,  missing_bslash },
570 	/* class_bslash  */ { special,		state_str_1,	  end_digits },
571 	/* class_slash   */ { move_special,	state_str_1,  missing_bslash },
572 	/* class_star    */ { move_special,	state_str_1,  missing_bslash },
573 	/* class_exclam  */ { move_special,	state_str_1,  missing_bslash },
574 	/* class_eol     */ { move_final,       token_ustring,	    backup_1 },
575 	/* class_rest    */ { move_special,	state_str_1,  missing_bslash },
576 	/* class_illegal */ { special,		state_str_3,    control_char },
577 	/* class_pound   */ { move_special,	state_str_1,  missing_bslash },
578 	/* class_dquote  */ { move_special,	state_str_1,  missing_bslash },
579 	/* class_langle  */ { move_special,	state_str_1,  missing_bslash },
580 	/* class_rangle  */ { move_special,	state_str_1,  missing_bslash },
581 	/* class_highbit */ { move_special,	state_str_1,  missing_bslash },
582     },
583     { /* state_comment_1 */
584 	/* class_blank   */ { move_final,	token_punc,	    backup_0 },
585 	/* class_punc    */ { move_final,	token_punc,	    backup_1 },
586 	/* class_name    */ { move_final,	token_punc,	    backup_1 },
587 	/* class_e       */ { move_final,	token_punc,	    backup_1 },
588 	/* class_escape  */ { move_final,	token_punc,	    backup_1 },
589 	/* class_ff      */ { special,		state_comment_1,control_char },
590 	/* class_digit   */ { move_final,	token_punc,	    backup_1 },
591 	/* class_dot     */ { move_final,	token_punc,	    backup_1 },
592 	/* class_sign    */ { move_final,	token_punc,	    backup_1 },
593 	/* class_quote   */ { move_final,	token_punc,	    backup_1 },
594 	/* class_bslash  */ { move_final,	token_punc,	    backup_1 },
595 	/* class_slash   */ { move_final,	token_punc,	    backup_1 },
596 	/* class_star    */ { move_advance,	state_comment_2,    not_used },
597 	/* class_exclam  */ { move_final,	token_punc,	    backup_1 },
598 	/* class_eol     */ { move_final,	token_punc,	    backup_1 },
599 	/* class_rest    */ { move_final,  	token_punc,    	    backup_1 },
600 	/* class_illegal */ { special,		state_comment_1,control_char },
601 	/* class_pound	 */ { move_final,	token_punc,	    backup_1 },
602 	/* class_dquote  */ { move_final,	token_punc,	    backup_1 },
603 	/* class_langle  */ { move_final,	token_punc,	    backup_1 },
604 	/* class_rangle  */ { move_final,	token_punc,	    backup_1 },
605 	/* class_highbit */ { move_final,  	token_punc,    	    backup_1 },
606     },
607     { /* state_comment_2 */
608 	/* class_blank   */ { move_advance,	state_comment_2,    not_used },
609 	/* class_punc    */ { move_advance,	state_comment_2,    not_used },
610 	/* class_name    */ { move_advance,	state_comment_2,    not_used },
611 	/* class_e       */ { move_advance,	state_comment_2,    not_used },
612 	/* class_escape  */ { move_advance,	state_comment_2,    not_used },
613 	/* class_ff      */ { move_special,	state_comment_2,control_char },
614 	/* class_digit   */ { move_advance,	state_comment_2,    not_used },
615 	/* class_dot     */ { move_advance,	state_comment_2,    not_used },
616 	/* class_sign    */ { move_advance,	state_comment_2,    not_used },
617 	/* class_quote   */ { move_advance,	state_comment_2,    not_used },
618 	/* class_bslash  */ { move_advance,	state_comment_2,    not_used },
619 	/* class_slash   */ { move_advance,	state_comment_2,    not_used },
620 	/* class_star    */ { move_advance,	state_comment_3,    not_used },
621 	/* class_exclam  */ { move_advance,	state_comment_2,    not_used },
622 	/* class_eol     */ { move_special,	state_comment_2,comment_wrap },
623 	/* class_rest    */ { move_advance,	state_comment_2,    not_used },
624 	/* class_illegal */ { move_special,	state_comment_2,control_char },
625 	/* class_pound   */ { move_advance,	state_comment_2,    not_used },
626 	/* class_dquote  */ { move_advance,	state_comment_2,    not_used },
627 	/* class_langle  */ { move_advance,	state_comment_2,    not_used },
628 	/* class_rangle  */ { move_advance,	state_comment_2,    not_used },
629 	/* class_highbit */ { move_advance,	state_comment_2,    not_used },
630     },
631     { /* state_comment_3 */
632 	/* class_blank   */ { move_advance,	state_comment_2,    not_used },
633 	/* class_punc    */ { move_advance,	state_comment_2,    not_used },
634 	/* class_name    */ { move_advance,	state_comment_2,    not_used },
635 	/* class_e       */ { move_advance,	state_comment_2,    not_used },
636 	/* class_escape  */ { move_advance,	state_comment_2,    not_used },
637 	/* class_ff      */ { move_special,	state_comment_2,control_char },
638 	/* class_digit   */ { move_advance,	state_comment_2,    not_used },
639 	/* class_dot     */ { move_advance,	state_comment_2,    not_used },
640 	/* class_sign    */ { move_advance,	state_comment_2,    not_used },
641 	/* class_quote   */ { move_advance,	state_comment_2,    not_used },
642 	/* class_bslash  */ { move_advance,	state_comment_2,    not_used },
643 	/* class_slash   */ { final_comment,	token_comment,      not_used },
644 	/* class_star    */ { move_advance,	state_comment_3,    not_used },
645 	/* class_exclam  */ { move_advance,	state_comment_2,    not_used },
646 	/* class_eol     */ { move_special,	state_comment_2,comment_wrap },
647 	/* class_rest    */ { move_advance,	state_comment_2,    not_used },
648 	/* class_illegal */ { move_special,	state_comment_2,control_char },
649 	/* class_pound   */ { move_advance,	state_comment_2,    not_used },
650 	/* class_dquote  */ { move_advance,	state_comment_2,    not_used },
651 	/* class_langle  */ { move_advance,	state_comment_2,    not_used },
652 	/* class_rangle  */ { move_advance,	state_comment_2,    not_used },
653 	/* class_highbit */ { move_advance,	state_comment_2,    not_used },
654     },
655     { /* state_comment_4 */
656 	/* class_blank   */ { move_advance,	state_comment_4,    not_used },
657 	/* class_punc    */ { move_advance,	state_comment_4,    not_used },
658 	/* class_name    */ { move_advance,	state_comment_4,    not_used },
659 	/* class_e       */ { move_advance,	state_comment_4,    not_used },
660 	/* class_escape  */ { move_advance,	state_comment_4,    not_used },
661 	/* class_ff      */ { move_special,	state_comment_4,control_char },
662 	/* class_digit   */ { move_advance,	state_comment_4,    not_used },
663 	/* class_dot     */ { move_advance,	state_comment_4,    not_used },
664 	/* class_sign    */ { move_advance,	state_comment_4,    not_used },
665 	/* class_quote   */ { move_advance,	state_comment_4,    not_used },
666 	/* class_bslash  */ { move_advance,	state_comment_4,    not_used },
667 	/* class_slash   */ { move_advance,	state_comment_4,    not_used },
668 	/* class_star    */ { move_advance,	state_comment_4,    not_used },
669 	/* class_exclam  */ { move_advance,	state_comment_4,    not_used },
670 	/* class_eol     */ { final_comment,	token_comment,	    backup_1 },
671 	/* class_rest    */ { move_advance,	state_comment_4,    not_used },
672 	/* class_illegal */ { move_special,	state_comment_4,control_char },
673 	/* class_pound   */ { move_advance,	state_comment_4,    not_used },
674 	/* class_dquote  */ { move_advance,	state_comment_4,    not_used },
675 	/* class_langle  */ { move_advance,	state_comment_4,    not_used },
676 	/* class_rangle  */ { move_advance,	state_comment_4,    not_used },
677 	/* class_highbit */ { move_advance,	state_comment_4,    not_used },
678     },
679     { /* state_eat_rest */
680 	/* class_blank   */ { move_advance,	state_eat_rest,     not_used },
681 	/* class_punc    */ { move_error,	bad_prefix,	    backup_1 },
682 	/* class_name    */ { move_error,	bad_prefix,	    backup_1 },
683 	/* class_e       */ { move_error,	bad_prefix,	    backup_1 },
684 	/* class_escape  */ { move_error,	bad_prefix,	    backup_1 },
685 	/* class_ff      */ { special,		state_eat_rest, control_char },
686 	/* class_digit   */ { move_error,	bad_prefix,	    backup_1 },
687 	/* class_dot     */ { move_error,	bad_prefix,	    backup_1 },
688 	/* class_sign    */ { move_error,	bad_prefix,	    backup_1 },
689 	/* class_quote   */ { move_error,	bad_prefix,	    backup_1 },
690 	/* class_bslash  */ { move_advance,	state_eat_rest,     not_used },
691 	/* class_slash   */ { move_error,	bad_prefix,	    backup_1 },
692 	/* class_star    */ { move_advance,	state_eat_rest,     not_used },
693 	/* class_exclam  */ { move_error,	bad_prefix,	    backup_1 },
694 	/* class_eol     */ { move_error,	bad_prefix,	    backup_1 },
695 	/* class_rest    */ { move_advance,	state_eat_rest,     not_used },
696 	/* class_illegal */ { special,		state_eat_rest, control_char },
697 	/* class_pound   */ { move_error,	bad_prefix,	    backup_1 },
698 	/* class_dquote  */ { move_error,	bad_prefix,	    backup_1 },
699 	/* class_langle  */ { move_error,	bad_prefix,	    backup_1 },
700 	/* class_rangle  */ { move_error,	bad_prefix,	    backup_1 },
701 	/* class_highbit */ { move_advance,	state_eat_rest,     not_used },
702     },
703     { /* state_gstr_1 */
704 	/* class_blank   */ { move_error,	bad_prefix,         backup_1 },
705 	/* class_punc    */ { move_error,	bad_prefix,	    backup_1 },
706 	/* class_name    */ { move_advance,	state_gstr_2,	    not_used },
707 	/* class_e       */ { move_advance,	state_gstr_2,	    not_used },
708 	/* class_escape  */ { move_advance,	state_gstr_2,	    not_used },
709 	/* class_ff      */ { special,		state_gstr_1,   control_char },
710 	/* class_digit   */ { move_error,	bad_prefix,	    backup_1 },
711 	/* class_dot     */ { move_error,	bad_prefix,	    backup_1 },
712 	/* class_sign    */ { move_error,	bad_prefix,	    backup_1 },
713 	/* class_quote   */ { move_error,	bad_prefix,	    backup_1 },
714 	/* class_bslash  */ { move_advance,	state_eat_rest,     not_used },
715 	/* class_slash   */ { move_error,	bad_prefix,	    backup_1 },
716 	/* class_star    */ { move_advance,	state_eat_rest,     not_used },
717 	/* class_exclam  */ { move_error,	bad_prefix,	    backup_1 },
718 	/* class_eol     */ { move_error,	bad_prefix,	    backup_1 },
719 	/* class_rest    */ { move_advance,	state_eat_rest,     not_used },
720 	/* class_illegal */ { special,		state_eat_rest, control_char },
721 	/* class_pound   */ { move_error,	bad_prefix,	    backup_1 },
722 	/* class_dquote  */ { move_error,	bad_prefix,	    backup_1 },
723 	/* class_langle  */ { move_error,	bad_prefix,	    backup_1 },
724 	/* class_rangle  */ { move_error,	bad_prefix,	    backup_1 },
725 	/* class_highbit */ { move_advance,	state_eat_rest,     not_used },
726     },
727     { /* state_gstr_2 */
728 	/* class_blank   */ { move_error,	bad_prefix,         backup_1 },
729 	/* class_punc    */ { move_error,	bad_prefix,	    backup_1 },
730 	/* class_name    */ { move_advance,	state_gstr_2,	    not_used },
731 	/* class_e       */ { move_advance,	state_gstr_2,	    not_used },
732 	/* class_escape  */ { move_advance,	state_gstr_2,	    not_used },
733 	/* class_ff      */ { special,		state_gstr_2,   control_char },
734 	/* class_digit   */ { move_advance,	state_gstr_2,	    not_used },
735 	/* class_dot     */ { move_error,	bad_prefix,	    backup_1 },
736 	/* class_sign    */ { move_error,	bad_prefix,	    backup_1 },
737 	/* class_quote   */ { move_error,	bad_prefix,	    backup_1 },
738 	/* class_bslash  */ { move_advance,	state_eat_rest,     not_used },
739 	/* class_slash   */ { move_error,	bad_prefix,	    backup_1 },
740 	/* class_star    */ { move_advance,	state_eat_rest,     not_used },
741 	/* class_exclam  */ { move_error,	bad_prefix,	    backup_1 },
742 	/* class_eol     */ { move_error,	bad_prefix,	    backup_1 },
743 	/* class_rest    */ { move_advance,	state_eat_rest,     not_used },
744 	/* class_illegal */ { special,		state_eat_rest, control_char },
745 	/* class_pound   */ { move_error,	bad_prefix,	    backup_1 },
746 	/* class_dquote  */ { special,		state_gstr_3,   charset_gstr },
747 	/* class_langle  */ { move_error,	bad_prefix,	    backup_1 },
748 	/* class_rangle  */ { move_error,	bad_prefix,	    backup_1 },
749 	/* class_highbit */ { move_advance,	state_eat_rest,     not_used },
750     },
751     { /* state_gstr_3 */
752 	/* class_blank   */ { move_advance,	state_gstr_3,	    not_used },
753 	/* class_punc    */ { move_advance,	state_gstr_3,	    not_used },
754 	/* class_name    */ { move_advance,	state_gstr_3,	    not_used },
755 	/* class_e       */ { move_advance,	state_gstr_3,	    not_used },
756 	/* class_escape  */ { move_advance,	state_gstr_3,	    not_used },
757 	/* class_ff      */ { special,		state_gstr_3,	control_char },
758 	/* class_digit   */ { move_advance,	state_gstr_3,	    not_used },
759 	/* class_dot     */ { move_advance,	state_gstr_3,	    not_used },
760 	/* class_sign    */ { move_advance,	state_gstr_3,	    not_used },
761 	/* class_quote   */ { move_advance,	state_gstr_3,	    not_used },
762 	/* class_bslash  */ { special,		state_gstr_4,	start_bslash },
763 	/* class_slash   */ { move_advance,	state_gstr_3,	    not_used },
764 	/* class_star    */ { move_advance,	state_gstr_3,	    not_used },
765 	/* class_exclam  */ { move_advance,	state_gstr_3,	    not_used },
766 	/* class_eol     */ { move_final,	token_ugstr,	    backup_1 },
767 	/* class_rest    */ { move_advance,	state_gstr_3,	    not_used },
768 	/* class_illegal */ { special,		state_gstr_3,	control_char },
769 	/* class_pound   */ { move_advance,	state_gstr_3,	    not_used },
770 	/* class_dquote  */ { final,		token_gstr,	    backup_0 },
771 	/* class_langle  */ { move_advance,	state_gstr_3,	    not_used },
772 	/* class_rangle  */ { move_advance,	state_gstr_3,	    not_used },
773 	/* class_highbit */ { move_special,	state_gstr_3,	highbit_char },
774     },
775     { /* state_gstr_4 */
776 	/* class_blank   */ { move_special,	state_gstr_3,  ignore_bslash },
777 	/* class_punc    */ { move_special,	state_gstr_3,  ignore_bslash },
778 	/* class_name    */ { move_special,	state_gstr_3,  ignore_bslash },
779 	/* class_e       */ { move_special,	state_gstr_3,  ignore_bslash },
780 	/* class_escape  */ { special,		state_gstr_3,    insert_char },
781 	/* class_ff      */ { special,		state_gstr_3,   control_char },
782 	/* class_digit   */ { special,		state_gstr_5,	 found_digit },
783 	/* class_dot     */ { move_special,	state_gstr_3,  ignore_bslash },
784 	/* class_sign    */ { move_special,	state_gstr_3,  ignore_bslash },
785 	/* class_quote   */ { special,		state_gstr_3,    insert_char },
786 	/* class_bslash  */ { special,		state_gstr_3,    insert_char },
787 	/* class_slash   */ { move_special,	state_gstr_3,  ignore_bslash },
788 	/* class_star    */ { move_special,	state_gstr_3,  ignore_bslash },
789 	/* class_exclam  */ { move_special,	state_gstr_3,  ignore_bslash },
790 	/* class_eol     */ { move_final,	token_ugstr,	    backup_1 },
791 	/* class_rest    */ { move_special,	state_gstr_3,  ignore_bslash },
792 	/* class_illegal */ { special,		state_gstr_3,   control_char },
793 	/* class_pound   */ { move_special,	state_gstr_3,  ignore_bslash },
794 	/* class_dquote  */ { special,		state_gstr_3,    insert_char },
795 	/* class_langle  */ { move_special,	state_gstr_3,  ignore_bslash },
796 	/* class_rangle  */ { move_special,	state_gstr_3,  ignore_bslash },
797 	/* class_highbit */ { move_special,	state_gstr_3,  ignore_bslash },
798     },
799     { /* state_gstr_5 */
800 	/* class_blank   */ { move_special,	state_gstr_3, missing_bslash },
801 	/* class_punc    */ { move_special,	state_gstr_3, missing_bslash },
802 	/* class_name    */ { move_special,	state_gstr_3, missing_bslash },
803 	/* class_e       */ { move_special,	state_gstr_3, missing_bslash },
804 	/* class_escape  */ { move_special,	state_gstr_3, missing_bslash },
805 	/* class_ff      */ { special,		state_gstr_3,   control_char },
806 	/* class_digit   */ { special,		state_gstr_5,	 found_digit },
807 	/* class_dot     */ { move_special,	state_gstr_3, missing_bslash },
808 	/* class_sign    */ { move_special,	state_gstr_3, missing_bslash },
809 	/* class_quote   */ { move_special,	state_gstr_3, missing_bslash },
810 	/* class_bslash  */ { special,		state_gstr_3,	  end_digits },
811 	/* class_slash   */ { move_special,	state_gstr_3, missing_bslash },
812 	/* class_star    */ { move_special,	state_gstr_3, missing_bslash },
813 	/* class_exclam  */ { move_special,	state_gstr_3, missing_bslash },
814 	/* class_eol     */ { move_final,	token_ugstr,	    backup_1 },
815 	/* class_rest    */ { move_special,	state_gstr_3, missing_bslash },
816 	/* class_illegal */ { special,		state_str_3,    control_char },
817 	/* class_pound   */ { move_special,	state_gstr_3, missing_bslash },
818 	/* class_dquote  */ { move_special,	state_gstr_3, missing_bslash },
819 	/* class_langle  */ { move_special,	state_gstr_3, missing_bslash },
820 	/* class_rangle  */ { move_special,	state_gstr_3, missing_bslash },
821 	/* class_highbit */ { move_special,	state_gstr_3, missing_bslash },
822     },
823     { /* state_langle */
824 	/* class_blank   */ { move_error,  	bad_prefix,         backup_1 },
825 	/* class_punc    */ { move_error,  	bad_prefix,         backup_1 },
826 	/* class_name    */ { move_error,  	bad_prefix,         backup_1 },
827 	/* class_e       */ { move_error,  	bad_prefix,         backup_1 },
828 	/* class_escape  */ { move_error,  	bad_prefix,         backup_1 },
829 	/* class_ff      */ { special,		state_langle,   control_char },
830 	/* class_digit   */ { move_error,       bad_prefix, 	    backup_1 },
831 	/* class_dot     */ { move_error,  	bad_prefix,         backup_1 },
832 	/* class_sign    */ { move_error,  	bad_prefix,         backup_1 },
833 	/* class_quote   */ { move_error,  	bad_prefix,         backup_1 },
834 	/* class_bslash  */ { move_error,       bad_prefix,  	    backup_1 },
835 	/* class_slash   */ { move_error,  	bad_prefix,         backup_1 },
836 	/* class_star    */ { move_error,  	bad_prefix,         backup_1 },
837 	/* class_exclam  */ { move_error,  	bad_prefix,         backup_1 },
838 	/* class_eol     */ { move_final,	token_ugstr,	    backup_1 },
839 	/* class_rest    */ { move_advance,  	state_eat_rest,     not_used },
840 	/* class_illegal */ { special,		state_langle,   control_char },
841 	/* class_pound   */ { move_error,  	bad_prefix,         backup_1 },
842 	/* class_dquote  */ { move_error,  	bad_prefix,         backup_1 },
843 	/* class_langle  */ { move_final,  	token_punc2,        backup_0 },
844 	/* class_rangle  */ { move_error,  	bad_prefix,         backup_1 },
845 	/* class_highbit */ { move_error,  	bad_prefix,         backup_1 },
846     },
847     { /* state_rangle */
848 	/* class_blank   */ { move_error,  	bad_prefix,         backup_1 },
849 	/* class_punc    */ { move_error,  	bad_prefix,         backup_1 },
850 	/* class_name    */ { move_error,  	bad_prefix,         backup_1 },
851 	/* class_e       */ { move_error,  	bad_prefix,         backup_1 },
852 	/* class_escape  */ { move_error,  	bad_prefix,         backup_1 },
853 	/* class_ff      */ { special,		state_langle,   control_char },
854 	/* class_digit   */ { move_error,       bad_prefix, 	    backup_1 },
855 	/* class_dot     */ { move_error,  	bad_prefix,         backup_1 },
856 	/* class_sign    */ { move_error,  	bad_prefix,         backup_1 },
857 	/* class_quote   */ { move_error,  	bad_prefix,         backup_1 },
858 	/* class_bslash  */ { move_error,       bad_prefix,  	    backup_1 },
859 	/* class_slash   */ { move_error,  	bad_prefix,         backup_1 },
860 	/* class_star    */ { move_error,  	bad_prefix,         backup_1 },
861 	/* class_exclam  */ { move_error,  	bad_prefix,         backup_1 },
862 	/* class_eol     */ { move_final,	token_ugstr,	    backup_1 },
863 	/* class_rest    */ { move_advance,  	state_eat_rest,     not_used },
864 	/* class_illegal */ { special,		state_langle,   control_char },
865 	/* class_pound   */ { move_error,  	bad_prefix,         backup_1 },
866 	/* class_dquote  */ { move_error,  	bad_prefix,         backup_1 },
867 	/* class_langle  */ { move_error,  	bad_prefix,         backup_1 },
868 	/* class_rangle  */ { move_final,  	token_punc2,        backup_0 },
869 	/* class_highbit */ { move_error,  	bad_prefix,         backup_1 },
870     },
871   };
872 
873 
874 static char class_table[ 256 ] =
875   {
876 /* 00 */    class_eol,      class_illegal,  class_illegal,  class_illegal,
877 /* 04 */    class_illegal,  class_illegal,  class_illegal,  class_illegal,
878 /* 08 */    class_illegal,  class_blank,    class_illegal,  class_illegal,
879 /* 0C */    class_ff,	    class_illegal,  class_illegal,  class_illegal,
880 /* 10 */    class_illegal,  class_illegal,  class_illegal,  class_illegal,
881 /* 14 */    class_illegal,  class_illegal,  class_illegal,  class_illegal,
882 /* 18 */    class_illegal,  class_illegal,  class_illegal,  class_illegal,
883 /* 1C */    class_illegal,  class_illegal,  class_illegal,  class_illegal,
884 /* 20 */    class_blank,    class_exclam,   class_dquote,   class_pound,
885 /* 24 */    class_name,	    class_rest,     class_punc,	    class_quote,
886 /* 28 */    class_punc,	    class_punc,     class_star,     class_sign,
887 /* 2C */    class_punc,     class_sign,     class_dot,      class_slash,
888 /* 30 */    class_digit,    class_digit,    class_digit,    class_digit,
889 /* 34 */    class_digit,    class_digit,    class_digit,    class_digit,
890 /* 38 */    class_digit,    class_digit,    class_punc,     class_punc,
891 /* 3C */    class_langle,   class_punc,     class_rangle,   class_rest,
892 /* 40 */    class_rest,     class_name,     class_name,     class_name,
893 /* 44 */    class_name,     class_e,	    class_name,     class_name,
894 /* 48 */    class_name,     class_name,     class_name,     class_name,
895 /* 4C */    class_name,     class_name,     class_name,	    class_name,
896 /* 50 */    class_name,     class_name,     class_name,     class_name,
897 /* 54 */    class_name,     class_name,     class_name,     class_name,
898 /* 58 */    class_name,     class_name,     class_name,     class_rest,
899 /* 5C */    class_bslash,   class_rest,     class_punc,     class_name,
900 /* 60 */    class_rest,     class_name,     class_escape,   class_name,
901 /* 64 */    class_name,     class_e,        class_escape,   class_name,
902 /* 68 */    class_name,     class_name,     class_name,     class_name,
903 /* 6C */    class_name,     class_name,     class_escape,   class_name,
904 /* 70 */    class_name,     class_name,     class_escape,   class_name,
905 /* 74 */    class_escape,   class_name,     class_escape,   class_name,
906 /* 78 */    class_name,     class_name,     class_name,     class_punc,
907 /* 7C */    class_punc,	    class_punc,	    class_punc,     class_illegal,
908 /* 80 */    class_illegal,  class_illegal,  class_illegal,  class_illegal,
909 /* 84 */    class_illegal,  class_illegal,  class_illegal,  class_illegal,
910 /* 88 */    class_illegal,  class_illegal,  class_illegal,  class_illegal,
911 /* 8C */    class_illegal,  class_illegal,  class_illegal,  class_illegal,
912 /* 90 */    class_illegal,  class_illegal,  class_illegal,  class_illegal,
913 /* 94 */    class_illegal,  class_illegal,  class_illegal,  class_illegal,
914 /* 98 */    class_illegal,  class_illegal,  class_illegal,  class_illegal,
915 /* 9C */    class_illegal,  class_illegal,  class_illegal,  class_illegal,
916 /* A0 */    class_highbit,  class_highbit,  class_highbit,  class_highbit,
917 /* A4 */    class_highbit,  class_highbit,  class_highbit,  class_highbit,
918 /* A8 */    class_highbit,  class_highbit,  class_highbit,  class_highbit,
919 /* AC */    class_highbit,  class_highbit,  class_highbit,  class_highbit,
920 /* B0 */    class_highbit,  class_highbit,  class_highbit,  class_highbit,
921 /* B4 */    class_highbit,  class_highbit,  class_highbit,  class_highbit,
922 /* B8 */    class_highbit,  class_highbit,  class_highbit,  class_highbit,
923 /* BC */    class_highbit,  class_highbit,  class_highbit,  class_highbit,
924 /* C0 */    class_highbit,  class_highbit,  class_highbit,  class_highbit,
925 /* C4 */    class_highbit,  class_highbit,  class_highbit,  class_highbit,
926 /* C8 */    class_highbit,  class_highbit,  class_highbit,  class_highbit,
927 /* CC */    class_highbit,  class_highbit,  class_highbit,  class_highbit,
928 /* D0 */    class_highbit,  class_highbit,  class_highbit,  class_highbit,
929 /* D4 */    class_highbit,  class_highbit,  class_highbit,  class_highbit,
930 /* D8 */    class_highbit,  class_highbit,  class_highbit,  class_highbit,
931 /* DC */    class_highbit,  class_highbit,  class_highbit,  class_highbit,
932 /* E0 */    class_highbit,  class_highbit,  class_highbit,  class_highbit,
933 /* E4 */    class_highbit,  class_highbit,  class_highbit,  class_highbit,
934 /* E8 */    class_highbit,  class_highbit,  class_highbit,  class_highbit,
935 /* EC */    class_highbit,  class_highbit,  class_highbit,  class_highbit,
936 /* F0 */    class_highbit,  class_highbit,  class_highbit,  class_highbit,
937 /* F4 */    class_highbit,  class_highbit,  class_highbit,  class_highbit,
938 /* F8 */    class_highbit,  class_highbit,  class_highbit,  class_highbit,
939 /* FC */    class_highbit,  class_highbit,  class_highbit,  class_highbit
940 
941   };
942 
943 
944 /*    Tables to correlate token numbers and ASCII values for
945       punctuation characters.  Used by yylex and lex_issue_error.     */
946 
947 #define tok_punc_token_num	17
948 static unsigned char	punc_char[tok_punc_token_num] = {
949 			'{', '}', '=', ';', '(', ')', ':', '+', '-',
950 			',', '&', '~', '*', '/', '^', '|', '#'};
951 static int	punc_token[tok_punc_token_num] =
952 		    {	LEFT_BRACE,
953 			RIGHT_BRACE,
954 			EQUAL_SIGN,
955 			SEMICOLON,
956 			LEFT_PAREN,
957 			RIGHT_PAREN,
958 			COLON,
959 			PLUS,
960 			MINUS,
961 			COMMA,
962 			AND,
963 			NOT,
964 			MULTIPLY,
965 			DIVIDE,
966 			XOR,
967 			OR,
968 			POUND };
969 
970 /*    Tables to correlate token numbers and ASCII value pairs for
971       punctuation characters.  Used by yylex and lex_issue_error.     */
972 
973 static unsigned char	punc2_char[2] = {'<', '>'};
974 static int	punc2_token[2] =
975 		    {	LEFT_SHIFT,
976 			RIGHT_SHIFT };
977 
978 
979 /*
980 **++
981 **  FUNCTIONAL DESCRIPTION:
982 **
983 **      This function returns the next token to be built in the UIL source
984 **	program being read by the compiler.
985 **
986 **  FORMAL PARAMETERS:
987 **
988 **      none
989 **
990 **  IMPLICIT INPUTS:
991 **
992 **
993 **
994 **  IMPLICIT OUTPUTS:
995 **
996 **
997 **
998 **  FUNCTION VALUE:
999 **
1000 **      number of the token generated
1001 **
1002 **  SIDE EFFECTS:
1003 **
1004 **
1005 **
1006 **
1007 **--
1008 **/
yylex()1009 int	yylex()
1010 {
1011     unsigned char c_char;	    /* current character */
1012     int		l_class;	    /* current character's class */
1013     int		l_state;	    /* current token_table state */
1014     int		l_lex_pos;	    /* next available position in c_lex_buffer*/
1015     cell	z_cell;		    /* local copy of current token_table state*/
1016     int		l_bslash_value = 0;	    /* current value of \digit...\ construct */
1017     int		l_start_src_pos;    /* starting source position of a token */
1018     int		l_charset;	    /* character set for strings */
1019     int		l_write_direction;	    /* writing direction */
1020     int		l_parse_direction;	    /* parsing direction */
1021     int		l_charset_sixteen_bit = FALSE; /* true if charset is 16-bit */
1022     sym_value_entry_type
1023 		*az_charset_entry;  /* value entry for the current charset */
1024     int		l_16bit_char_count = 0; /* for strings, count 16-bit chars */
1025     int		l_16bit_chars_only; /* True if the gstr we are */
1026 				    /* processing only has 16-bit chars */
1027     src_source_record_type
1028 		*az_start_src_record;/* starting source record of a token */
1029     lex_buffer_type
1030 		*az_current_lex_buffer; /* current lexical buffer */
1031 
1032 	az_charset_entry = (sym_value_entry_type *) 0;
1033 
1034     /*
1035     **  Call the Status callback routine to report our progress.
1036     */
1037     /* %COMPLETE  (between 0-50) */
1038     Uil_percent_complete =
1039       CEIL((int)( .5 * ((float)Uil_characters_read/(float)Uil_file_size))*100, 50);
1040     if (Uil_cmd_z_command.status_cb != (Uil_continue_type(*)())NULL)
1041 	diag_report_status();
1042 
1043 
1044 initialize_token_builder:
1045 
1046     /* initialize the lexical analyzer by
1047      *	    saving starting source position of the token
1048      *	    resetting the lexical buffer
1049      *	    putting the analyser in its initial state */
1050 
1051     az_start_src_record = src_az_current_source_record;
1052     l_start_src_pos = src_az_current_source_buffer->w_current_position;
1053 
1054     az_current_lex_buffer = az_first_lex_buffer;
1055     l_lex_pos = 0;
1056     l_charset = lex_k_default_charset;
1057     l_16bit_chars_only = FALSE;
1058     l_state = state_initial;
1059 
1060     /* start looking for the token */
1061 
1062 continue_in_next_state:
1063 
1064     for (;;)
1065     {
1066 						    /* get next input char */
1067 						    /* advance source too */
1068 	c_char = src_az_current_source_buffer->c_text
1069 		    [ src_az_current_source_buffer->w_current_position++ ];
1070 
1071 	/* %COMPLETE */
1072 	Uil_characters_read++;
1073 
1074 	l_class = class_table[ c_char ];	    /* determine its class */
1075 	z_cell = token_table[ l_state][l_class ];   /* load state cell */
1076 
1077 	/* pick up the next state, or terminal, or error */
1078 
1079 	l_state = z_cell.next_state;
1080 
1081 	/* l_state is negative for action states requiring the current
1082 	 * character be saved in the current lexical buffer */
1083 
1084 	if (z_cell.action & NEGATIVE)
1085 	{
1086 	    if (l_lex_pos > l_max_lex_buffer_pos )
1087 	    {
1088 		az_current_lex_buffer = get_lex_buffer( az_current_lex_buffer );
1089 	        l_lex_pos = 0;
1090 	    }
1091 	    az_current_lex_buffer->c_text[ l_lex_pos ] = c_char;
1092 	    l_lex_pos++;
1093 	}
1094 
1095 	/* next step is based on action */
1096 
1097 	switch (z_cell.action)
1098 	{
1099 	case move_advance:
1100 	case advance:
1101 	    if (l_16bit_chars_only) goto found_16bit_char;
1102 	    continue;
1103 
1104 	case reset:
1105 
1106 	    goto initialize_token_builder;
1107 
1108 	case move_final:
1109 	case final:
1110         case final_comment:  /* RAP retain comments */
1111 
1112 	    goto found_token;
1113 
1114 	case move_error:
1115 	case error:
1116 
1117 	    goto found_error;
1118 
1119 	case move_special:
1120 	case special:
1121 
1122 	    goto special_processing;
1123 
1124 	default:
1125 
1126 	    _assert( FALSE, "unknown token_table action" );
1127 
1128 	}
1129     }
1130 
1131     /* process special actions */
1132 
1133 special_processing:
1134 
1135     switch (z_cell.backup)	/* backup holds special processing code */
1136     {
1137     case control_char:		/* encountered a control char in a string or
1138 				 * comment - issue a diagnotic and continue */
1139 	issue_control_char_diagnostic( c_char );
1140 	break;
1141 
1142     case start_bslash:		/* start of a \ construct in a string */
1143 
1144 	l_bslash_value = 0;	    /* initialize collection cell */
1145 	break;
1146 
1147     case found_digit:		/* next digit in a \digit...\ sequence */
1148 
1149 	if (l_bslash_value < 256 )  /* if still in range add in next digit */
1150 	    l_bslash_value = l_bslash_value * 10 + ( c_char - 48 );
1151 	break;
1152 
1153     case end_digits:		/* end of \digit...\ sequence */
1154 
1155 	if (l_bslash_value >= 256 )  /* issue a diagnostic */
1156 	{   diag_issue_diagnostic
1157 		( d_out_range,
1158 		  src_az_current_source_record,
1159 		  src_az_current_source_buffer->w_current_position - 1,
1160 		  "\\digit...\\ sequence",
1161 		  "0-255" );
1162 	    l_bslash_value = lex_k_unprint_sub;
1163 	}
1164 
1165 	if (l_bslash_value == 0 )  /* issue a diagnostic */
1166 	{   diag_issue_diagnostic
1167 		( d_null,
1168 		  src_az_current_source_record,
1169 		  src_az_current_source_buffer->w_current_position - 1 );
1170 	}
1171 
1172 	if (l_lex_pos > l_max_lex_buffer_pos )
1173 	{
1174 	    az_current_lex_buffer = get_lex_buffer( az_current_lex_buffer );
1175 	    l_lex_pos = 0;
1176 	}
1177 	az_current_lex_buffer->c_text[ l_lex_pos++ ] = l_bslash_value;
1178 	break;
1179 
1180     case insert_char:		/* place special character in lex buffer */
1181     {
1182 
1183 	static unsigned char c_bslash_char[10]  = {
1184 		'\\', '\'', 'n', 't', 'v', 'b', 'r', 'f', '"'};
1185 	static unsigned char ab_bslash_value[9] =
1186 		 { 0x5C, 0x27, 0x0A, 0x09, 0x0B, 0x08, 0x0D, 0x0C, 0x22 };
1187 
1188 	if (l_lex_pos > l_max_lex_buffer_pos )
1189 	{
1190 	    az_current_lex_buffer = get_lex_buffer( az_current_lex_buffer );
1191 	    l_lex_pos = 0;
1192 	}
1193 	az_current_lex_buffer->c_text[ l_lex_pos++ ] =
1194 	    ab_bslash_value
1195 		[ _index( c_char, c_bslash_char, sizeof( c_bslash_char )-1 )];
1196 	break;
1197     }
1198 
1199     case missing_bslash:	/* \digit...\ sequence not terminated */
1200 
1201 	diag_issue_diagnostic
1202 		( d_unterm_seq,
1203 		  src_az_current_source_record,
1204 		  src_az_current_source_buffer->w_current_position - 1,
1205 		  "\\digit...\\ sequence",
1206 		  "with \\" );
1207 	break;
1208 
1209     case ignore_bslash:		/* \ not followed by valid character */
1210 
1211 	diag_issue_diagnostic
1212 		( d_backslash_ignored,
1213 		  src_az_current_source_record,
1214 		  src_az_current_source_buffer->w_current_position - 1,
1215 		  c_char );
1216 	break;
1217 
1218     case string_wrap:
1219 
1220 	if (src_get_source_line( ) == src_k_end_source)
1221 	{
1222 	    diag_issue_diagnostic
1223 		    ( d_unterm_seq,
1224 		      src_az_current_source_record,
1225 		      src_az_current_source_buffer->w_current_position - 1,
1226 		      "character string",
1227 		      "before end of source" );
1228 
1229 	    src_az_current_source_buffer->w_current_position--;
1230 	}
1231 
1232 	break;
1233 
1234     case comment_wrap:
1235 
1236 	if (src_get_source_line( ) == src_k_end_source)
1237 	{
1238 	    diag_issue_diagnostic
1239 		    ( d_unterm_seq,
1240 		      src_az_current_source_record,
1241 		      src_az_current_source_buffer->w_current_position - 1,
1242 		      "comment",
1243 		      "before end of source" );
1244 
1245 	    src_az_current_source_buffer->w_current_position--;
1246 	    return UILEOF;
1247 	}
1248 	az_current_lex_buffer->
1249 	    c_text[strlen((char *)az_current_lex_buffer->c_text)] = '\n';
1250 	break;
1251 
1252     case highbit_char:		/* check if must accept extra chars */
1253 found_16bit_char:
1254     {
1255 	unsigned char    next_char;
1256 
1257 
1258 	/*
1259 	**  If the current character set allows 16-bit characters, then
1260 	**  process them specially.
1261 	*/
1262 	if ( l_charset_sixteen_bit )
1263 	{
1264 	    /* need to:
1265 	    **	1) get next input char and advance the source
1266 	    **	2) check that the next is not a control character
1267 	    **	3) place the next character in the lex buffer
1268 	    */
1269 
1270 	    next_char = src_az_current_source_buffer->c_text
1271 		    [ src_az_current_source_buffer->w_current_position++ ];
1272 
1273 	    switch (class_table[ next_char ])
1274 	    {
1275 
1276 	    case class_eol:
1277 		src_az_current_source_buffer->w_current_position--;
1278 	    case class_illegal:
1279 		issue_control_char_diagnostic( next_char );
1280 		break;
1281 	    default:
1282 		break;
1283 	    }
1284 
1285 	    if (l_lex_pos > l_max_lex_buffer_pos )
1286 	    {
1287 		az_current_lex_buffer = get_lex_buffer( az_current_lex_buffer );
1288 		l_lex_pos = 0;
1289 	    }
1290 
1291 	    az_current_lex_buffer->c_text[ l_lex_pos++ ] = next_char;
1292 	    l_16bit_char_count ++;
1293 	}
1294 
1295 	break;
1296     }
1297 
1298     case charset_gstr:		/* set l_charset with the char set */
1299     {
1300 	_assert( FALSE, "Should never get to charset_gstr" );
1301 	break;
1302     }
1303 
1304     case nocharset_gstr:
1305 	if (Uil_lex_l_charset_specified) {
1306 	    /* use the specified charset*/
1307 	    l_charset = Uil_lex_l_literal_charset;
1308 	    az_charset_entry = Uil_lex_az_literal_charset;
1309 	    }
1310 	else if (Uil_lex_l_localized) goto found_localized_string;
1311 	else {
1312 	    /* No charset specified, use the default */
1313 	    l_charset = Uil_lex_l_user_default_charset;
1314 	    az_charset_entry = Uil_lex_az_charset_entry;
1315 	    }
1316 
1317 
1318 
1319 	/* Get the charset information */
1320 	sem_charset_info
1321 	    (l_charset,
1322 	     az_charset_entry,
1323 	     &l_write_direction,
1324 	     &l_parse_direction,
1325 	     &l_charset_sixteen_bit);
1326 
1327 	/* reset 16 bit character count to 0 */
1328 
1329 	l_16bit_char_count = 0;
1330 
1331 	/*
1332 	**  if this is a user-defined, 16-bit charset then treat all
1333 	**  as 16-bit.
1334 	*/
1335 	if ((l_charset_sixteen_bit) && (l_charset == lex_k_userdefined_charset))
1336 	    l_16bit_chars_only = TRUE;
1337 
1338 	break;
1339 
1340     default:
1341 	_assert( FALSE, "unknown token_table special action" );
1342 
1343     }
1344 
1345     /* Next state of the token builder is should already be in l_state.
1346      * Continue at this point */
1347 
1348     goto continue_in_next_state;
1349 
1350 
1351 found_localized_string:
1352     {
1353       /* Local variables */
1354       int 		mb_len, i;
1355       unsigned char	mb_byte;
1356 
1357       /* Should be looking at the first byte of the string. */
1358       /* Localize... */
1359       _MrmOSSetLocale("");
1360 
1361       /* Parse the string. */
1362       while (TRUE)
1363 	{
1364 	  mb_len = mblen((char *)&src_az_current_source_buffer->c_text
1365 			 [src_az_current_source_buffer->w_current_position],
1366 			 MB_CUR_MAX);
1367 
1368 	  mb_byte = src_az_current_source_buffer->c_text
1369 	    [src_az_current_source_buffer->w_current_position];
1370 
1371 	  if (mb_len == 1)
1372 	    switch (class_table[mb_byte])
1373 	      {
1374 	      case class_eol:
1375 		z_cell.backup = backup_0;
1376 		l_state = token_ugstr;
1377 
1378 		if (l_lex_pos > l_max_lex_buffer_pos )
1379 		  {
1380 		    az_current_lex_buffer =
1381 		      get_lex_buffer( az_current_lex_buffer );
1382 		    l_lex_pos = 0;
1383 		  }
1384 		az_current_lex_buffer->c_text[ l_lex_pos++ ] = mb_byte;
1385 		_MrmOSSetLocale("C");
1386 		goto found_token;
1387 
1388 	      case class_dquote:
1389 		z_cell.backup = backup_0;
1390 		l_state = token_lstr;
1391 		src_az_current_source_buffer->w_current_position++;
1392 		_MrmOSSetLocale("C");
1393 		goto found_token;
1394 
1395 	      default:
1396 		break;
1397 	      }
1398 
1399 	  if (l_lex_pos > l_max_lex_buffer_pos )
1400 	    {
1401 	      az_current_lex_buffer = get_lex_buffer( az_current_lex_buffer );
1402 	      l_lex_pos = 0;
1403 	    }
1404 
1405 	  for (i = 0; i < mb_len; i++)
1406 	    {
1407 	      if (l_lex_pos > l_max_lex_buffer_pos )
1408 		{
1409 		  az_current_lex_buffer = get_lex_buffer(az_current_lex_buffer);
1410 		  l_lex_pos = 0;
1411 		}
1412 	      az_current_lex_buffer->c_text[l_lex_pos++] =
1413 		src_az_current_source_buffer->c_text
1414 		  [src_az_current_source_buffer->w_current_position++];
1415 	    }
1416 	}
1417     }
1418 
1419 found_token:
1420 
1421     /* do any backup of the source buffer position and lex buffer */
1422 
1423     src_az_current_source_buffer->w_current_position -= z_cell.backup;
1424     l_lex_pos -= z_cell.backup;
1425 
1426     /* put a null at the end of the current lex buffer */
1427 
1428     az_current_lex_buffer->c_text[ l_lex_pos ] = 0;
1429 
1430     /* case on the token found */
1431 
1432     switch (l_state)		/* l_state holds the token found */
1433     {
1434     case token_name:
1435     {
1436 
1437 	key_keytable_entry_type	*az_keyword;
1438 
1439 	/* check the case sensitivity flag and change case if necessary */
1440 
1441 	if (! uil_v_case_sensitive)
1442 	{
1443 	    char	* ptr;
1444 	    for ( ptr = (char *)(az_current_lex_buffer->c_text);
1445 		  (* ptr) != 0;
1446 		  (* ptr) = _upper (* ptr), ptr++)
1447 	    {}
1448 	}
1449 
1450 	/* check if the name is a keyword */
1451 
1452 	az_keyword =
1453 	    key_find_keyword( l_lex_pos, (char *)az_current_lex_buffer->c_text );
1454 	if( az_keyword != NULL)
1455 	    {
1456 	      /* check that the length of the name is in range */
1457 
1458 	      if (l_lex_pos > key_k_keyword_max_length)
1459 		{
1460 		  l_lex_pos = key_k_keyword_max_length;
1461 		  az_current_lex_buffer->c_text[ l_lex_pos ] = 0;
1462 		  diag_issue_diagnostic
1463 		    ( d_name_too_long,
1464 		     az_start_src_record,
1465 		     l_start_src_pos,
1466 		     az_current_lex_buffer->c_text );
1467 		}
1468 
1469 	      yylval.value.az_keyword_entry = az_keyword;
1470 	      yylval.b_type = az_keyword->b_token;
1471 	      break;
1472 	    }
1473 
1474 	/* process the name as an identifier */
1475 
1476         /* check that the length of the identifier is in range */
1477         /* Added for fix to CR 5566 */
1478 
1479 	if (l_lex_pos > lex_k_identifier_max_length)
1480 	    {
1481 	    l_lex_pos = lex_k_identifier_max_length;
1482 	    az_current_lex_buffer->c_text[ l_lex_pos ] = 0;
1483 	    diag_issue_diagnostic
1484 	        ( d_name_too_long,
1485 		  az_start_src_record,
1486 		  l_start_src_pos,
1487 		  lex_k_identifier_max_length,
1488 		  az_current_lex_buffer->c_text );
1489 	    }
1490 
1491 	yylval.value.az_symbol_entry =
1492 	    (sym_entry_type *) sym_insert_name( l_lex_pos, (char *)az_current_lex_buffer->c_text );
1493 
1494 	yylval.b_type = NAME;
1495 	break;
1496     }
1497 
1498     case token_punc:
1499     {
1500 	int		l_token;
1501 
1502 	/* found a punctuation mark - look up its token number in a table */
1503 
1504 	l_token = punc_token
1505 		    [ _index( az_current_lex_buffer->c_text[ 0 ],
1506 			      punc_char, sizeof( punc_char )) ];
1507 	yylval.b_type = l_token;
1508 
1509         break;
1510     }
1511 
1512     case token_punc2:
1513     {
1514 	int		l_token;
1515 
1516 	/* found a punctuation mark - look up its token number in a table */
1517 
1518 	l_token = punc2_token
1519 		    [ _index( az_current_lex_buffer->c_text[ 0 ],
1520 			      punc2_char, sizeof( punc2_char )) ];
1521 	yylval.b_type = l_token;
1522 
1523         break;
1524     }
1525 
1526     case token_eol:
1527 
1528 	/* if there is no more source
1529 	 *   then return an end of file
1530 	 *   otherwise go look for the next token */
1531 
1532 	if (src_get_source_line( ) == src_k_end_source)
1533 	    return UILEOF;
1534 
1535 	goto initialize_token_builder;
1536 
1537     case token_integer:
1538     {
1539 	long			l_integer;
1540 
1541 	yylval.b_type = UNS_INT_LITERAL;
1542 
1543 	/* convert the text to binary
1544 	 *    the sign of the number is applied as part of semantic
1545 	 *    analysis; thus we only handle integers in range 0..2**31-1
1546 	 */
1547 
1548         errno = 0;
1549 	l_integer = cvt_ascii_to_long(az_current_lex_buffer->c_text);
1550 
1551 	if (errno != 0)
1552 	    diag_issue_diagnostic
1553 		( d_out_range,
1554 		  az_start_src_record,
1555 		  l_start_src_pos,
1556 		  "integer",
1557 		  " " );
1558 
1559 	yylval.value.az_symbol_entry =
1560 		(sym_entry_type *)sem_create_value_entry
1561 		    ( (char *)&l_integer, sizeof( long ), sym_k_integer_value );
1562 	break;
1563     }
1564 
1565     case token_ustring:
1566 
1567 	diag_issue_diagnostic
1568 		( d_unterm_seq,
1569 		  src_az_current_source_record,
1570 		  src_az_current_source_buffer->w_current_position - 1,
1571 		  "character string",
1572 		  "before end of line" );
1573 
1574   case token_comment:       /* RAP preserve comments */
1575     {
1576       int size;
1577 
1578       if (last_token_seen != token_comment)
1579 	comment_text[0]=0;
1580 
1581       size = (int)strlen((char *)az_current_lex_buffer->c_text)+1;
1582       if ((size  + (int)strlen (comment_text)) >= comment_size)
1583 	{
1584 	  comment_text = XtRealloc(comment_text, INCR_COMMENT_SIZE + strlen(comment_text));
1585 	  comment_size = INCR_COMMENT_SIZE + strlen (comment_text);
1586 	}
1587 
1588       strcat (comment_text, (char *)az_current_lex_buffer->c_text);
1589       strcat (comment_text, "\n");
1590       last_token_seen = token_comment;
1591 	    goto initialize_token_builder;
1592     }
1593 
1594 
1595     case token_string:
1596 found_primitive_string:
1597     {
1598 	int			l_length;
1599 	lex_buffer_type		*az_lex_buffer;
1600 	sym_value_entry_type	*az_value;
1601 	unsigned char		*c_char;
1602 
1603 	l_length = l_lex_pos;
1604 
1605 	for (az_lex_buffer = az_first_lex_buffer;
1606 	     az_lex_buffer != az_current_lex_buffer;
1607 	     az_lex_buffer = az_lex_buffer->az_next_buffer)
1608 	    l_length = l_length + l_max_lex_buffer_pos + 1;
1609 
1610 	az_value = create_str_entry( l_length, l_charset, az_charset_entry );
1611 
1612 	c_char = (unsigned char *)az_value->value.c_value;
1613 
1614 	for (az_lex_buffer = az_first_lex_buffer;
1615 	     az_lex_buffer != az_current_lex_buffer;
1616 	     az_lex_buffer = az_lex_buffer->az_next_buffer)
1617 	{
1618 	    _move( c_char, az_lex_buffer->c_text, l_max_lex_buffer_pos + 1);
1619 	    c_char = c_char + l_max_lex_buffer_pos + 1;
1620 	}
1621 
1622 	_move( c_char, az_lex_buffer->c_text, l_lex_pos );
1623 
1624     	yylval.value.az_symbol_entry = (sym_entry_type *) az_value;
1625     	yylval.b_type = CHAR_8_LITERAL;
1626 
1627 	break;
1628     }
1629 
1630     case token_real:
1631     {
1632 	double	d_real;
1633 
1634 	yylval.b_type = UNS_FLOAT_LITERAL;
1635 
1636         errno = 0;
1637 	d_real = atof((char *)az_current_lex_buffer->c_text);
1638 
1639 	if (errno != 0)
1640 	    diag_issue_diagnostic
1641 		( d_out_range,
1642 		  az_start_src_record,
1643 		  l_start_src_pos,
1644 		  "real",
1645 		  " " );
1646 
1647 	yylval.value.az_symbol_entry =
1648 		(sym_entry_type *)sem_create_value_entry
1649 		    ( (char *)&d_real, sizeof( double ), sym_k_float_value );
1650 	break;
1651     }
1652 
1653     case token_ff:
1654 
1655 	if (l_start_src_pos != 0)
1656 	    issue_control_char_diagnostic( c_char );
1657 
1658         src_az_current_source_record->b_flags |= src_m_form_feed;
1659 
1660 	goto initialize_token_builder;
1661 
1662     case token_ugstr:
1663 
1664 	diag_issue_diagnostic
1665 		( d_unterm_seq,
1666 		  src_az_current_source_record,
1667 		  src_az_current_source_buffer->w_current_position - 1,
1668 		  "character string",
1669 		  "before end of line" );
1670 
1671     case token_gstr:
1672 
1673 	/*
1674 	** Some general strings require special processing.  Those
1675 	** that do not can go thru the normal string code.
1676 	*/
1677         if ( l_parse_direction == XmSTRING_DIRECTION_R_TO_L )
1678 	{
1679 	    int		    i,j;
1680 	    unsigned char   tmp1;
1681 	    unsigned char   tmp2;
1682 
1683 	    /* assuming the string is confined to a single lex buffer.	    */
1684 	    /* just flip the characters around.  16-bit characters need to  */
1685 	    /* be done in groups of two bytes				    */
1686 	    if (l_charset_sixteen_bit != TRUE)
1687 		/*
1688 		**  Just reverse the bytes from the first to last
1689 		*/
1690 		for (i=0, j=l_lex_pos-1;  i < (l_lex_pos>>1);  i++,j--)
1691 		{
1692 		    tmp1 = az_current_lex_buffer->c_text[ i ];
1693 		    az_current_lex_buffer->c_text[ i ] =
1694 			az_current_lex_buffer->c_text[ j ];
1695 		    az_current_lex_buffer->c_text[ j ] = tmp1;
1696 		}
1697 
1698 	    /*
1699 	    **  Don't reverse the string if have less than 2 characters (4 bytes)
1700 	    */
1701 	    else if ((l_lex_pos>>1) > 1)
1702 		/*
1703 		**  This reversing doesn't work for mixed 8/16-bit character
1704 		**  sets, but only built-in character sets allow mixing and
1705 		**  they are not right-to-left.  We do the same copying as in
1706 		**  the 8-bit case above, but we move two bytes at a time and
1707 		**  reverse the order as we copy so they end up correct.
1708 		*/
1709 		for (i=0, j=l_lex_pos-1;  i < (l_lex_pos>>1);  i+=2,j-=2)
1710 		{
1711 		    tmp1 = az_current_lex_buffer->c_text[ i ];
1712 		    tmp2 = az_current_lex_buffer->c_text[ i + 1 ];
1713 		    az_current_lex_buffer->c_text[ i ] =
1714 			az_current_lex_buffer->c_text[ j - 1 ];
1715 		    az_current_lex_buffer->c_text[ i + 1 ] =
1716 			az_current_lex_buffer->c_text[ j ];
1717 		    az_current_lex_buffer->c_text[ j ] = tmp2;
1718 		    az_current_lex_buffer->c_text[ j - 1 ] = tmp1;
1719 		}
1720 	}
1721 
1722 
1723 	/*
1724 	**  If the string isn't 16-bit or it is userdefined and thus
1725 	**  cannot be mixed 8/16-bit then we can just make a primitive
1726 	**  string.
1727 	*/
1728 	if ((l_charset_sixteen_bit != TRUE) ||
1729 	    (l_charset == lex_k_userdefined_charset))
1730 	    goto found_primitive_string;
1731 	else
1732 	{
1733 
1734 	    sym_value_entry_type    *cstr_entry;
1735 	    sym_value_entry_type    *str_entry;
1736 	    int			    a_off, off;
1737 
1738 	    /*
1739 	    **	if string consists solely of 8-bit ascii characters,
1740 	    **  l_16bit_char_count will be zero.
1741 	    **	if string consists solely of 16 bit characters,
1742 	    **  l_16bit_char_count*2 will equal l_lex_pos.
1743 	    **  In either of these cases, the result is still a
1744 	    **	primitive string.
1745 	    */
1746 
1747 	    /*
1748 	    **  For KANJI and HANZI treat 8-bit characters as ISO_LATIN1.
1749 	    */
1750 	    if (l_16bit_char_count == 0)
1751 	    {
1752 		l_charset = uil_sym_isolatin1_charset;
1753 		goto found_primitive_string;
1754 	    }
1755 
1756 	    /*
1757 	    **  If the string only contains 16-bit characters,
1758 	    **  it still can be stored as a primitive string.
1759 	    */
1760 	    if ((l_16bit_char_count<<1) == l_lex_pos)
1761 		goto found_primitive_string;
1762 
1763 
1764 	    /*
1765 	    **	lex buffer is a mix of 8 and 16 bit characters.
1766 	    **	need to build a compound string.
1767 	    */
1768 
1769 	    cstr_entry = sem_create_cstr();
1770 
1771 	    for ( a_off = 0,
1772 		  off = 0;
1773 		  off < l_lex_pos;
1774 		)
1775 
1776 	    {
1777 
1778 		for (off = a_off;  off < l_lex_pos;  off++)
1779 		    if (az_current_lex_buffer->c_text[ off ] > 0x97)
1780 			break;
1781 
1782 /*
1783  * Create the 8 bit string with iso_latin1
1784  */
1785 
1786 		if (off > a_off)
1787 		{
1788 		    str_entry = create_str_entry
1789 			( off - a_off,
1790 			 uil_sym_isolatin1_charset,
1791 			 az_charset_entry );
1792 
1793 		    _move( str_entry->value.c_value,
1794 			   &az_current_lex_buffer->c_text[ a_off ],
1795 			   off-a_off );
1796 
1797 		    sem_append_str_to_cstr( cstr_entry, str_entry, TRUE );
1798 		}
1799 
1800 		for (a_off = off;  a_off < l_lex_pos;  a_off += 2)
1801 		    if (az_current_lex_buffer->c_text[ a_off ] <= 0x97)
1802 			break;
1803 
1804 /*
1805  * Create the 16 bit string with its charset
1806  */
1807 
1808 		if (a_off > off)
1809 		{
1810 		    str_entry =
1811 			create_str_entry( a_off - off, l_charset, az_charset_entry );
1812 
1813 		    _move( str_entry->value.c_value,
1814 			   &az_current_lex_buffer->c_text[ off ],
1815 			   a_off-off );
1816 
1817 		    sem_append_str_to_cstr( cstr_entry, str_entry, TRUE );
1818 		}
1819 
1820 	    }
1821 
1822 	    yylval.value.az_symbol_entry = (sym_entry_type *)cstr_entry;
1823 	    yylval.b_type = COMP_STRING;
1824 
1825 	}
1826 	break;
1827 
1828   case token_lstr:
1829     {
1830       int			l_length = 0;
1831       lex_buffer_type		*az_lex_buffer;
1832       sym_value_entry_type	*str_entry;
1833       unsigned char		*c_char;
1834 
1835       l_length = l_lex_pos;
1836 
1837       for (az_lex_buffer = az_first_lex_buffer;
1838 	   az_lex_buffer != az_current_lex_buffer;
1839 	   az_lex_buffer = az_lex_buffer->az_next_buffer)
1840 	l_length = l_length + l_max_lex_buffer_pos + 1;
1841 
1842       str_entry = create_str_entry(l_length, lex_k_fontlist_default_tag,
1843 				  az_charset_entry );
1844 
1845       c_char = (unsigned char *)str_entry->value.c_value;
1846 
1847       for (az_lex_buffer = az_first_lex_buffer;
1848 	   az_lex_buffer != az_current_lex_buffer;
1849 	   az_lex_buffer = az_lex_buffer->az_next_buffer)
1850 	{
1851 	  _move( c_char, az_lex_buffer->c_text, l_max_lex_buffer_pos + 1);
1852 	  c_char = c_char + l_max_lex_buffer_pos + 1;
1853 	}
1854 
1855       _move( c_char, az_lex_buffer->c_text, l_lex_pos );
1856 
1857       yylval.value.az_symbol_entry = (sym_entry_type *)str_entry;
1858       yylval.b_type = LOC_STRING;
1859 
1860       break;
1861     }
1862 
1863     default:
1864 	_assert( FALSE, "unknown token table final state" );
1865   }
1866 
1867 /* RAP we want to keep track of whether we are appending sequential comments */
1868     last_token_seen = l_state;
1869 
1870     /* set position information in token value */
1871 
1872     yylval.az_source_record = az_start_src_record;
1873     yylval.b_source_pos = l_start_src_pos;
1874     yylval.b_source_end = src_az_current_source_buffer->w_current_position;  /*  was "l_start_src_pos + l_lex_pos;" */
1875     yylval.b_tag = sar_k_token_frame;
1876 
1877     /* dump the token if requested */
1878 
1879 #if debug_version
1880     if (uil_v_dump_tokens)
1881 	dump_token( az_current_lex_buffer, l_lex_pos );
1882 #endif
1883 
1884     /*
1885     ** save this token
1886     */
1887 
1888     prev_yylval = yylval;
1889 
1890     /* return the token generated */
1891 
1892     return yylval.b_type;
1893 
1894 found_error:
1895 
1896     /* do any backup of the source buffer position and lex buffer */
1897 
1898     src_az_current_source_buffer->w_current_position -= z_cell.backup;
1899     l_lex_pos -= z_cell.backup;
1900 
1901     /* put a null at the end of the current lex buffer */
1902 
1903     az_current_lex_buffer->c_text[ l_lex_pos ] = 0;
1904 
1905     /* case on the type of error */
1906 
1907     switch (l_state)		/* contains the type of error */
1908     {
1909     case bad_prefix:
1910 
1911 	/* printable characters that are not part of a token were found */
1912 
1913 	diag_issue_diagnostic
1914 		( d_unknown_seq,
1915 		  az_start_src_record,
1916 		  l_start_src_pos,
1917 		  az_current_lex_buffer->c_text );
1918 
1919 	break;
1920 
1921     default:
1922 	_assert( FALSE, "unknown token table error state" );
1923 	break;
1924       }
1925 
1926     goto initialize_token_builder;
1927 
1928   }
1929 
1930 
1931 /*
1932 **++
1933 **  FUNCTIONAL DESCRIPTION:
1934 **
1935 **      This function initializes the lexical analyzer.
1936 **
1937 **  FORMAL PARAMETERS:
1938 **
1939 **      none
1940 **
1941 **  IMPLICIT INPUTS:
1942 **
1943 **      none
1944 **
1945 **  IMPLICIT OUTPUTS:
1946 **
1947 **      az_first_lex_buffer
1948 **
1949 **  FUNCTION VALUE:
1950 **
1951 **      void
1952 **
1953 **  SIDE EFFECTS:
1954 **
1955 **      lexical buffer is allocated
1956 **
1957 **--
1958 **/
1959 
1960 #define UNSCHAR_MINUS_ONE (unsigned char) 255;
1961 
lex_initialize_analyzer()1962 void  lex_initialize_analyzer( )
1963 
1964 {
1965 String language;
1966 
1967 /* RAP preserve comments */
1968 comment_text = (char *) _get_memory(INITIAL_COMMENT_SIZE);
1969 comment_size = INITIAL_COMMENT_SIZE;
1970 
1971 comment_text[0] = '\0';
1972 
1973 /* BEGIN OSF Fix CR 4749 */
1974 /* The lex algorithm has the potential to write
1975  * into index l_max_lex_buffer_pos + 1, so allocate l_max_lex_buffer_pos
1976  * plus 2 positions in buffer.
1977  */
1978 az_first_lex_buffer =
1979     (lex_buffer_type *) _get_memory (l_max_lex_buffer_pos + 2 +
1980 				     sizeof(lex_buffer_type *));
1981 /* END OSF Fix CR 4749 */
1982 az_first_lex_buffer->az_next_buffer = NULL;
1983 
1984 /*   Initialize the stack frame entry for epsilon productions.   */
1985 
1986 gz_yynullval.b_tag = sar_k_null_frame;
1987 
1988 /*   Initialize the default character set  */
1989 
1990 language = (char *) _XmStringGetCurrentCharset();
1991 if ( language == NULL )
1992     Uil_lex_l_user_default_charset = lex_k_default_charset;
1993 else
1994     {
1995     Uil_lex_l_user_default_charset = sem_charset_lang_name (language);
1996     if (Uil_lex_l_user_default_charset == sym_k_error_charset)
1997 	{
1998 	diag_issue_diagnostic
1999 	    ( d_bad_lang_value,
2000 	     diag_k_no_source,
2001 	     diag_k_no_column);
2002 	Uil_lex_l_user_default_charset = lex_k_default_charset;
2003 	}
2004     }
2005 Uil_lex_az_charset_entry = NULL;
2006 
2007 /* Determine if localized strings are possible */
2008 if (Uil_cmd_z_command.v_use_setlocale == FALSE)
2009   Uil_lex_l_localized = FALSE;
2010 else
2011   {
2012     Uil_lex_l_localized = TRUE;
2013     _MrmOSSetLocale("C");
2014   }
2015 
2016 /*   Initialize the current character set */
2017 Uil_lex_l_charset_specified = FALSE;
2018 
2019 /*    Initialize the source position and record */
2020 
2021 prev_yylval.b_source_end = UNSCHAR_MINUS_ONE;
2022 prev_yylval.az_source_record = src_az_current_source_record;
2023 
2024 }
2025 
2026 
2027 /*
2028 **++
2029 **  FUNCTIONAL DESCRIPTION:
2030 **
2031 **      This function performs the cleanup processing of the lexical analyzer.
2032 **
2033 **  FORMAL PARAMETERS:
2034 **
2035 **      none
2036 **
2037 **  IMPLICIT INPUTS:
2038 **
2039 **      az_first_lex_buffer
2040 **
2041 **  IMPLICIT OUTPUTS:
2042 **
2043 **      az_first_lex_buffer
2044 **
2045 **  FUNCTION VALUE:
2046 **
2047 **      void
2048 **
2049 **  SIDE EFFECTS:
2050 **
2051 **      lexical buffer is freed
2052 **
2053 **--
2054 **/
2055 
Uil_lex_cleanup_analyzer()2056 void  Uil_lex_cleanup_analyzer( )
2057 
2058 {
2059     /*	pointer to next buffer to free	*/
2060     lex_buffer_type  *az_buffer_to_free;
2061 
2062     /* Loop through the list of buffers freeing them all */
2063     while (az_first_lex_buffer != NULL) {
2064 	az_buffer_to_free = az_first_lex_buffer;
2065 	az_first_lex_buffer = az_first_lex_buffer->az_next_buffer;
2066 	_free_memory((char*)az_buffer_to_free);
2067 	}
2068 }
2069 
2070 
2071 /*
2072 **++
2073 **  FUNCTIONAL DESCRIPTION:
2074 **
2075 **      This function issues a syntax error.  It is called from the
2076 **	error handling mechanism in the parser.
2077 **
2078 **  FORMAL PARAMETERS:
2079 **
2080 **      restart_token		the token number for the punctuation
2081 **				character where parsing will resume after
2082 **				this error is issued.
2083 **
2084 **  IMPLICIT INPUTS:
2085 **
2086 **      current lex buffer
2087 **	punc_token and punc_char tables
2088 **
2089 **  IMPLICIT OUTPUTS:
2090 **
2091 **      none
2092 **
2093 **  FUNCTION VALUE:
2094 **
2095 **      void
2096 **
2097 **  SIDE EFFECTS:
2098 **
2099 **      issue a diagnostic
2100 **
2101 **--
2102 **/
2103 
lex_issue_error(restart_token)2104 void  lex_issue_error( restart_token )
2105 
2106 int		restart_token;
2107 
2108 {
2109 
2110     int		    i, token_num;
2111     unsigned char   c_char = '.';
2112     char	    * tok_name;
2113 
2114 /*    Find the token number for the restart character in the table.
2115       It should be there.  Get the corresponding character for this
2116       token.    */
2117 
2118     for ( i = 0 ; i<tok_punc_token_num ; i++ )
2119 	{
2120 	if (restart_token == punc_token [i])
2121 	    {
2122 	    c_char = punc_char [i];
2123 	    break;
2124 	    }
2125 	}
2126 
2127 /*    Get the text of the token name which caused the error.    */
2128 
2129     token_num = yylval.b_type;
2130     if ( (token_num < 0) || (token_num > tok_num_tokens) )
2131 	tok_name = "UNKNOWN_TOKEN";
2132     else
2133 	tok_name = tok_token_name_table[token_num];
2134 
2135 /*    Issue the error.    */
2136 
2137     diag_issue_diagnostic
2138 	(d_syntax,
2139 	 yylval.az_source_record,
2140 	 yylval.b_source_pos,
2141 	 tok_name,
2142 	 c_char);
2143 
2144 }
2145 
2146 
2147 
2148 /*
2149 **
2150 **  LOCAL FUNCTIONS
2151 **
2152 */
2153 
2154 
2155 /*
2156 **++
2157 **  FUNCTIONAL DESCRIPTION:
2158 **
2159 **      Issue an error for an illegal control character.
2160 **
2161 **  FORMAL PARAMETERS:
2162 **
2163 **      c_char
2164 **
2165 **  IMPLICIT INPUTS:
2166 **
2167 **      current source position
2168 **
2169 **  IMPLICIT OUTPUTS:
2170 **
2171 **      none
2172 **
2173 **  FUNCTION VALUE:
2174 **
2175 **      void
2176 **
2177 **  SIDE EFFECTS:
2178 **
2179 **      issue a diagnostic
2180 **
2181 **--
2182 **/
2183 
issue_control_char_diagnostic(unsigned char c_char)2184 void issue_control_char_diagnostic
2185 
2186 	(unsigned char c_char )
2187 
2188 {
2189 
2190     diag_issue_diagnostic
2191 	( d_control_char,
2192 	  src_az_current_source_record,
2193 	  src_az_current_source_buffer->w_current_position - 1,
2194 	  c_char );
2195 
2196     src_az_current_source_record->b_flags |= src_m_unprintable_chars;
2197 
2198     return;
2199 
2200 }
2201 
2202 
2203 /*
2204 **++
2205 **  FUNCTIONAL DESCRIPTION:
2206 **
2207 **      This function obtains another lexical buffer.
2208 **
2209 **  FORMAL PARAMETERS:
2210 **
2211 **      az_current_lex_buffer
2212 **
2213 **  IMPLICIT INPUTS:
2214 **
2215 **      none
2216 **
2217 **  IMPLICIT OUTPUTS:
2218 **
2219 **      none
2220 **
2221 **  FUNCTION VALUE:
2222 **
2223 **      address of a new lexical buffer
2224 **
2225 **  SIDE EFFECTS:
2226 **
2227 **      another lexical buffer may be allocated
2228 **
2229 **--
2230 **/
2231 
get_lex_buffer(az_current_lex_buffer)2232 static lex_buffer_type *get_lex_buffer( az_current_lex_buffer )
2233 
2234 lex_buffer_type *az_current_lex_buffer;
2235 
2236 {
2237     lex_buffer_type *az_lex_buffer;
2238 
2239     /* check to see if another buffer is available - if not allocate one */
2240 
2241     az_lex_buffer = az_current_lex_buffer->az_next_buffer;
2242 
2243     if (az_lex_buffer == NULL)
2244     {
2245 /* BEGIN OSF Fix CR 4749 */
2246       /* The lex algorithm has the potential to write
2247        * into index l_max_lex_buffer_pos + 1, so allocate l_max_lex_buffer_pos
2248        * plus 2 positions in buffer.
2249        */
2250 	az_lex_buffer =
2251 	    (lex_buffer_type *)_get_memory( l_max_lex_buffer_pos + 2 +
2252 					   sizeof(lex_buffer_type *));
2253 /* END OSF Fix CR 4749 */
2254 	az_current_lex_buffer->az_next_buffer = az_lex_buffer;
2255 	az_lex_buffer->az_next_buffer = NULL;
2256     }
2257 
2258     return az_lex_buffer;
2259 
2260 }
2261 
2262 
2263 /*
2264 **++
2265 **  FUNCTIONAL DESCRIPTION:
2266 **
2267 **      This procedure will change all the unprintable characters in
2268 **	a buffer to lex_k_unprint_sub.
2269 **
2270 **  FORMAL PARAMETERS:
2271 **
2272 **      buffer		buffer to be checked
2273 **	length		length of the buffer
2274 **	flags		lex_m_filter_xxx flags to indicate if additional
2275 **			characters should be filtered.
2276 **
2277 **  IMPLICIT INPUTS:
2278 **
2279 **      class_table	gives the unprintable characters
2280 **
2281 **  IMPLICIT OUTPUTS:
2282 **
2283 **      none
2284 **
2285 **  FUNCTION VALUE:
2286 **
2287 **      void
2288 **
2289 **  SIDE EFFECTS:
2290 **
2291 **      none
2292 **
2293 **--
2294 **/
2295 
lex_filter_unprintable_chars(unsigned char * buffer,int length,unsigned long flags)2296 void lex_filter_unprintable_chars
2297 (unsigned char	*buffer,
2298  int		length,
2299  unsigned long	flags )
2300 {
2301     int		    i;
2302 
2303     for (i=0;  i<length;  i++)
2304     {
2305 	if ((class_table[ buffer[ i ] ] == class_illegal) ||
2306 	    (buffer[ i ] == 12) ||  /* form feed */
2307 	    (buffer[ i ] == 0) ||    /* null */
2308 	    ( (flags & lex_m_filter_tab)
2309 		&& buffer[ i ] == 9 )/* horizontal tab */
2310 	   )
2311 	    buffer[ i ] = lex_k_unprint_sub;
2312     }
2313 
2314 }
2315 
2316 
2317 #if debug_version
2318 /*
2319 **++
2320 **  FUNCTIONAL DESCRIPTION:
2321 **
2322 **      This procedure will dump a token.
2323 **
2324 **  FORMAL PARAMETERS:
2325 **
2326 **
2327 **  IMPLICIT INPUTS:
2328 **
2329 **
2330 **  IMPLICIT OUTPUTS:
2331 **
2332 **
2333 **  SIDE EFFECTS:
2334 **
2335 **
2336 **--
2337 **/
2338 
dump_token(az_current_lex_buffer,l_lex_pos)2339 static void	dump_token( az_current_lex_buffer,
2340 		    l_lex_pos)
2341 
2342 lex_buffer_type	*az_current_lex_buffer;
2343 int		l_lex_pos;
2344 {
2345     unsigned char    c_buffer[l_max_lex_buffer_pos +2];
2346     lex_buffer_type *az_lex_buffer;
2347     int		    i;
2348     int		    last;
2349     int		    last_buffer;
2350     unsigned char   c_char;
2351 
2352 
2353     _debug_output
2354 	 ("token: %d start: %d, %d  end: %d, %d \n",
2355 	   yylval.b_type,
2356 	   yylval.az_source_record->w_line_number,
2357 	   yylval.b_source_pos,
2358 	   src_az_current_source_record->w_line_number,
2359 	   src_az_current_source_buffer->w_current_position );
2360 
2361     for (az_lex_buffer = az_first_lex_buffer;
2362 	 ;
2363 	 az_lex_buffer = az_lex_buffer->az_next_buffer)
2364     {
2365 	last_buffer = ( az_lex_buffer == az_current_lex_buffer );
2366 	if (last_buffer)
2367 	    last = l_lex_pos;
2368 	else
2369 	    last = l_max_lex_buffer_pos+1;
2370 
2371 	_move( c_buffer, az_lex_buffer->c_text, last );
2372 
2373 	lex_filter_unprintable_chars (c_buffer, last, 0);
2374 
2375 	c_buffer[ last ] = 0;
2376 	_debug_output("%s \n", c_buffer);
2377 
2378         if (last_buffer)
2379 	    return;
2380     }
2381 }
2382 #endif
2383 
2384 /*
2385 **++
2386 **  FUNCTIONAL DESCRIPTION:
2387 **
2388 **      This function converts a null terminated string to a
2389 **	longword integer in the range 0..2**31-1.  If the ascii value is
2390 **	outside that range, the external variable errno is set to ERANGE
2391 **	and the value returned is 2**31-1
2392 **
2393 **  FORMAL PARAMETERS:
2394 **
2395 **      c_text		null terminate string holding integer in ascii
2396 **
2397 **  IMPLICIT INPUTS:
2398 **
2399 **      none
2400 **
2401 **  IMPLICIT OUTPUTS:
2402 **
2403 **      errno		set if overflow occurs
2404 **
2405 **  FUNCTION VALUE:
2406 **
2407 **      long 		integer value of c_text
2408 **
2409 **  SIDE EFFECTS:
2410 **
2411 **      none
2412 **
2413 **--
2414 **/
2415 
2416 #define k_max_int 2147483647
2417 #define k_max_div_10 214748364
2418 
cvt_ascii_to_long(c_text)2419 long		cvt_ascii_to_long(c_text)
2420 unsigned char XmConst	*c_text;
2421 {
2422     unsigned long   	l_value;
2423     int			pos;
2424 
2425     l_value = 0;
2426     for (pos = 0;  c_text[ pos ] != 0;  pos++)
2427     {
2428 	if (l_value >= k_max_div_10)
2429 	    goto potential_overflow;
2430 	l_value = (l_value * 10) + c_text[ pos ] - '0';
2431     }
2432 
2433     return l_value;
2434 
2435 potential_overflow:
2436     if (l_value > k_max_div_10)
2437     {
2438 	errno = ERANGE;
2439 	return k_max_int;
2440     }
2441 
2442     l_value = (l_value * 10) + c_text[ pos ] - '0';
2443 
2444     if (l_value > k_max_int)
2445     {
2446 	errno = ERANGE;
2447 	return k_max_int;
2448     }
2449 
2450     return l_value;
2451 }
2452 
2453 
2454 /*
2455 **++
2456 **  FUNCTIONAL DESCRIPTION:
2457 **
2458 **      This function creates a symbol entry for a primitive string.
2459 **
2460 **  FORMAL PARAMETERS:
2461 **
2462 **      l_size	    number of bytes to allocate
2463 **	l_charset   charset of the string (token value)
2464 **	az_charset_entry   charset of the string (symbol table value entry)
2465 **
2466 **  IMPLICIT INPUTS:
2467 **
2468 **      yylval
2469 **
2470 **  IMPLICIT OUTPUTS:
2471 **
2472 **      none
2473 **
2474 **  FUNCTION VALUE:
2475 **
2476 **      symbol node created
2477 **
2478 **  SIDE EFFECTS:
2479 **
2480 **      none
2481 **
2482 **--
2483 **/
2484 
create_str_entry(l_size,l_charset,az_charset_entry)2485 sym_value_entry_type *create_str_entry (l_size, l_charset, az_charset_entry)
2486 
2487     int				l_size;
2488     int				l_charset;
2489     sym_value_entry_type	*az_charset_entry;
2490 
2491 
2492 {
2493 
2494     sym_value_entry_type	*node;
2495     int				charset;	/* from sym_k_..._charset */
2496     unsigned char	     	direction;	/* writing direction */
2497 
2498     /*
2499      *	Determine character set and writing direction
2500      */
2501     if (l_charset != lex_k_userdefined_charset)
2502 	{
2503 	charset = sem_map_subclass_to_charset( l_charset );
2504 	direction = charset_writing_direction_table[charset];
2505 	}
2506     else
2507 	{
2508 	charset = sym_k_userdefined_charset;
2509 	direction = az_charset_entry->b_direction;
2510 	}
2511 
2512 
2513     /* size of entry
2514      *	sym_k_value_entry for common part of a value entry
2515      *	l_size for the string
2516      *	1 for the null on string
2517      */
2518 
2519     node = (sym_value_entry_type *)
2520 	    sem_allocate_node
2521 		( sym_k_value_entry,
2522 		  sym_k_value_entry_size );
2523 
2524     node->value.c_value = XtCalloc(1, l_size + 1);
2525 
2526     node->header.az_src_rec = yylval.az_source_record;
2527     node->header.b_src_pos = yylval.b_source_pos;
2528     node->header.b_end_pos = yylval.b_source_end;
2529 
2530     node->b_type = sym_k_char_8_value;
2531     node->w_length = l_size;
2532     node->b_charset = charset;
2533     node->b_direction = direction;
2534     node->az_charset_value = az_charset_entry;
2535     node->obj_header.b_flags = sym_m_private | sym_m_builtin;
2536 
2537     return node;
2538 
2539     }
2540 
2541