1 /*
2 * Copyright (c) 1985 Sun Microsystems, Inc.
3 * Copyright (c) 1980, 1993
4 * The Regents of the University of California. All rights reserved.
5 * All rights reserved.
6 *
7 * %sccs.include.redist.c%
8 */
9
10 #ifndef lint
11 static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 06/06/93";
12 #endif /* not lint */
13
14 /*
15 * Here we have the token scanner for indent. It scans off one token and puts
16 * it in the global variable "token". It returns a code, indicating the type
17 * of token scanned.
18 */
19
20 #include <stdio.h>
21 #include <ctype.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include "indent_globs.h"
25 #include "indent_codes.h"
26
27 #define alphanum 1
28 #define opchar 3
29
30 struct templ {
31 char *rwd;
32 int rwcode;
33 };
34
35 struct templ specials[100] =
36 {
37 "switch", 1,
38 "case", 2,
39 "break", 0,
40 "struct", 3,
41 "union", 3,
42 "enum", 3,
43 "default", 2,
44 "int", 4,
45 "char", 4,
46 "float", 4,
47 "double", 4,
48 "long", 4,
49 "short", 4,
50 "typdef", 4,
51 "unsigned", 4,
52 "register", 4,
53 "static", 4,
54 "global", 4,
55 "extern", 4,
56 "void", 4,
57 "goto", 0,
58 "return", 0,
59 "if", 5,
60 "while", 5,
61 "for", 5,
62 "else", 6,
63 "do", 6,
64 "sizeof", 7,
65 0, 0
66 };
67
68 char chartype[128] =
69 { /* this is used to facilitate the decision of
70 * what type (alphanumeric, operator) each
71 * character is */
72 0, 0, 0, 0, 0, 0, 0, 0,
73 0, 0, 0, 0, 0, 0, 0, 0,
74 0, 0, 0, 0, 0, 0, 0, 0,
75 0, 0, 0, 0, 0, 0, 0, 0,
76 0, 3, 0, 0, 1, 3, 3, 0,
77 0, 0, 3, 3, 0, 3, 0, 3,
78 1, 1, 1, 1, 1, 1, 1, 1,
79 1, 1, 0, 0, 3, 3, 3, 3,
80 0, 1, 1, 1, 1, 1, 1, 1,
81 1, 1, 1, 1, 1, 1, 1, 1,
82 1, 1, 1, 1, 1, 1, 1, 1,
83 1, 1, 1, 0, 0, 0, 3, 1,
84 0, 1, 1, 1, 1, 1, 1, 1,
85 1, 1, 1, 1, 1, 1, 1, 1,
86 1, 1, 1, 1, 1, 1, 1, 1,
87 1, 1, 1, 0, 3, 0, 3, 0
88 };
89
90
91
92
93 int
lexi()94 lexi()
95 {
96 int unary_delim; /* this is set to 1 if the current token
97 *
98 * forces a following operator to be unary */
99 static int last_code; /* the last token type returned */
100 static int l_struct; /* set to 1 if the last token was 'struct' */
101 int code; /* internal code to be returned */
102 char qchar; /* the delimiter character for a string */
103
104 e_token = s_token; /* point to start of place to save token */
105 unary_delim = false;
106 ps.col_1 = ps.last_nl; /* tell world that this token started in
107 * column 1 iff the last thing scanned was nl */
108 ps.last_nl = false;
109
110 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
111 ps.col_1 = false; /* leading blanks imply token is not in column
112 * 1 */
113 if (++buf_ptr >= buf_end)
114 fill_buffer();
115 }
116
117 /* Scan an alphanumeric token */
118 if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
119 /*
120 * we have a character or number
121 */
122 register char *j; /* used for searching thru list of
123 *
124 * reserved words */
125 register struct templ *p;
126
127 if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) {
128 int seendot = 0,
129 seenexp = 0;
130 if (*buf_ptr == '0' &&
131 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) {
132 *e_token++ = *buf_ptr++;
133 *e_token++ = *buf_ptr++;
134 while (isxdigit(*buf_ptr)) {
135 CHECK_SIZE_TOKEN;
136 *e_token++ = *buf_ptr++;
137 }
138 }
139 else
140 while (1) {
141 if (*buf_ptr == '.')
142 if (seendot)
143 break;
144 else
145 seendot++;
146 CHECK_SIZE_TOKEN;
147 *e_token++ = *buf_ptr++;
148 if (!isdigit(*buf_ptr) && *buf_ptr != '.')
149 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp)
150 break;
151 else {
152 seenexp++;
153 seendot++;
154 CHECK_SIZE_TOKEN;
155 *e_token++ = *buf_ptr++;
156 if (*buf_ptr == '+' || *buf_ptr == '-')
157 *e_token++ = *buf_ptr++;
158 }
159 }
160 if (*buf_ptr == 'L' || *buf_ptr == 'l')
161 *e_token++ = *buf_ptr++;
162 }
163 else
164 while (chartype[*buf_ptr] == alphanum) { /* copy it over */
165 CHECK_SIZE_TOKEN;
166 *e_token++ = *buf_ptr++;
167 if (buf_ptr >= buf_end)
168 fill_buffer();
169 }
170 *e_token++ = '\0';
171 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */
172 if (++buf_ptr >= buf_end)
173 fill_buffer();
174 }
175 ps.its_a_keyword = false;
176 ps.sizeof_keyword = false;
177 if (l_struct) { /* if last token was 'struct', then this token
178 * should be treated as a declaration */
179 l_struct = false;
180 last_code = ident;
181 ps.last_u_d = true;
182 return (decl);
183 }
184 ps.last_u_d = false; /* Operator after indentifier is binary */
185 last_code = ident; /* Remember that this is the code we will
186 * return */
187
188 /*
189 * This loop will check if the token is a keyword.
190 */
191 for (p = specials; (j = p->rwd) != 0; p++) {
192 register char *p = s_token; /* point at scanned token */
193 if (*j++ != *p++ || *j++ != *p++)
194 continue; /* This test depends on the fact that
195 * identifiers are always at least 1 character
196 * long (ie. the first two bytes of the
197 * identifier are always meaningful) */
198 if (p[-1] == 0)
199 break; /* If its a one-character identifier */
200 while (*p++ == *j)
201 if (*j++ == 0)
202 goto found_keyword; /* I wish that C had a multi-level
203 * break... */
204 }
205 if (p->rwd) { /* we have a keyword */
206 found_keyword:
207 ps.its_a_keyword = true;
208 ps.last_u_d = true;
209 switch (p->rwcode) {
210 case 1: /* it is a switch */
211 return (swstmt);
212 case 2: /* a case or default */
213 return (casestmt);
214
215 case 3: /* a "struct" */
216 if (ps.p_l_follow)
217 break; /* inside parens: cast */
218 l_struct = true;
219
220 /*
221 * Next time around, we will want to know that we have had a
222 * 'struct'
223 */
224 case 4: /* one of the declaration keywords */
225 if (ps.p_l_follow) {
226 ps.cast_mask |= 1 << ps.p_l_follow;
227 break; /* inside parens: cast */
228 }
229 last_code = decl;
230 return (decl);
231
232 case 5: /* if, while, for */
233 return (sp_paren);
234
235 case 6: /* do, else */
236 return (sp_nparen);
237
238 case 7:
239 ps.sizeof_keyword = true;
240 default: /* all others are treated like any other
241 * identifier */
242 return (ident);
243 } /* end of switch */
244 } /* end of if (found_it) */
245 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) {
246 register char *tp = buf_ptr;
247 while (tp < buf_end)
248 if (*tp++ == ')' && (*tp == ';' || *tp == ','))
249 goto not_proc;
250 strncpy(ps.procname, token, sizeof ps.procname - 1);
251 ps.in_parameter_declaration = 1;
252 rparen_count = 1;
253 not_proc:;
254 }
255 /*
256 * The following hack attempts to guess whether or not the current
257 * token is in fact a declaration keyword -- one that has been
258 * typedefd
259 */
260 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_')
261 && !ps.p_l_follow
262 && !ps.block_init
263 && (ps.last_token == rparen || ps.last_token == semicolon ||
264 ps.last_token == decl ||
265 ps.last_token == lbrace || ps.last_token == rbrace)) {
266 ps.its_a_keyword = true;
267 ps.last_u_d = true;
268 last_code = decl;
269 return decl;
270 }
271 if (last_code == decl) /* if this is a declared variable, then
272 * following sign is unary */
273 ps.last_u_d = true; /* will make "int a -1" work */
274 last_code = ident;
275 return (ident); /* the ident is not in the list */
276 } /* end of procesing for alpanum character */
277
278 /* Scan a non-alphanumeric token */
279
280 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is
281 * moved here */
282 *e_token = '\0';
283 if (++buf_ptr >= buf_end)
284 fill_buffer();
285
286 switch (*token) {
287 case '\n':
288 unary_delim = ps.last_u_d;
289 ps.last_nl = true; /* remember that we just had a newline */
290 code = (had_eof ? 0 : newline);
291
292 /*
293 * if data has been exausted, the newline is a dummy, and we should
294 * return code to stop
295 */
296 break;
297
298 case '\'': /* start of quoted character */
299 case '"': /* start of string */
300 qchar = *token;
301 if (troff) {
302 e_token[-1] = '`';
303 if (qchar == '"')
304 *e_token++ = '`';
305 e_token = chfont(&bodyf, &stringf, e_token);
306 }
307 do { /* copy the string */
308 while (1) { /* move one character or [/<char>]<char> */
309 if (*buf_ptr == '\n') {
310 printf("%d: Unterminated literal\n", line_no);
311 goto stop_lit;
312 }
313 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop,
314 * since CHECK_SIZE guarantees that there
315 * are at least 5 entries left */
316 *e_token = *buf_ptr++;
317 if (buf_ptr >= buf_end)
318 fill_buffer();
319 if (*e_token == BACKSLASH) { /* if escape, copy extra char */
320 if (*buf_ptr == '\n') /* check for escaped newline */
321 ++line_no;
322 if (troff) {
323 *++e_token = BACKSLASH;
324 if (*buf_ptr == BACKSLASH)
325 *++e_token = BACKSLASH;
326 }
327 *++e_token = *buf_ptr++;
328 ++e_token; /* we must increment this again because we
329 * copied two chars */
330 if (buf_ptr >= buf_end)
331 fill_buffer();
332 }
333 else
334 break; /* we copied one character */
335 } /* end of while (1) */
336 } while (*e_token++ != qchar);
337 if (troff) {
338 e_token = chfont(&stringf, &bodyf, e_token - 1);
339 if (qchar == '"')
340 *e_token++ = '\'';
341 }
342 stop_lit:
343 code = ident;
344 break;
345
346 case ('('):
347 case ('['):
348 unary_delim = true;
349 code = lparen;
350 break;
351
352 case (')'):
353 case (']'):
354 code = rparen;
355 break;
356
357 case '#':
358 unary_delim = ps.last_u_d;
359 code = preesc;
360 break;
361
362 case '?':
363 unary_delim = true;
364 code = question;
365 break;
366
367 case (':'):
368 code = colon;
369 unary_delim = true;
370 break;
371
372 case (';'):
373 unary_delim = true;
374 code = semicolon;
375 break;
376
377 case ('{'):
378 unary_delim = true;
379
380 /*
381 * if (ps.in_or_st) ps.block_init = 1;
382 */
383 /* ? code = ps.block_init ? lparen : lbrace; */
384 code = lbrace;
385 break;
386
387 case ('}'):
388 unary_delim = true;
389 /* ? code = ps.block_init ? rparen : rbrace; */
390 code = rbrace;
391 break;
392
393 case 014: /* a form feed */
394 unary_delim = ps.last_u_d;
395 ps.last_nl = true; /* remember this so we can set 'ps.col_1'
396 * right */
397 code = form_feed;
398 break;
399
400 case (','):
401 unary_delim = true;
402 code = comma;
403 break;
404
405 case '.':
406 unary_delim = false;
407 code = period;
408 break;
409
410 case '-':
411 case '+': /* check for -, +, --, ++ */
412 code = (ps.last_u_d ? unary_op : binary_op);
413 unary_delim = true;
414
415 if (*buf_ptr == token[0]) {
416 /* check for doubled character */
417 *e_token++ = *buf_ptr++;
418 /* buffer overflow will be checked at end of loop */
419 if (last_code == ident || last_code == rparen) {
420 code = (ps.last_u_d ? unary_op : postop);
421 /* check for following ++ or -- */
422 unary_delim = false;
423 }
424 }
425 else if (*buf_ptr == '=')
426 /* check for operator += */
427 *e_token++ = *buf_ptr++;
428 else if (*buf_ptr == '>') {
429 /* check for operator -> */
430 *e_token++ = *buf_ptr++;
431 if (!pointer_as_binop) {
432 unary_delim = false;
433 code = unary_op;
434 ps.want_blank = false;
435 }
436 }
437 break; /* buffer overflow will be checked at end of
438 * switch */
439
440 case '=':
441 if (ps.in_or_st)
442 ps.block_init = 1;
443 #ifdef undef
444 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */
445 e_token[-1] = *buf_ptr++;
446 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr)
447 *e_token++ = *buf_ptr++;
448 *e_token++ = '='; /* Flip =+ to += */
449 *e_token = 0;
450 }
451 #else
452 if (*buf_ptr == '=') {/* == */
453 *e_token++ = '='; /* Flip =+ to += */
454 buf_ptr++;
455 *e_token = 0;
456 }
457 #endif
458 code = binary_op;
459 unary_delim = true;
460 break;
461 /* can drop thru!!! */
462
463 case '>':
464 case '<':
465 case '!': /* ops like <, <<, <=, !=, etc */
466 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
467 *e_token++ = *buf_ptr;
468 if (++buf_ptr >= buf_end)
469 fill_buffer();
470 }
471 if (*buf_ptr == '=')
472 *e_token++ = *buf_ptr++;
473 code = (ps.last_u_d ? unary_op : binary_op);
474 unary_delim = true;
475 break;
476
477 default:
478 if (token[0] == '/' && *buf_ptr == '*') {
479 /* it is start of comment */
480 *e_token++ = '*';
481
482 if (++buf_ptr >= buf_end)
483 fill_buffer();
484
485 code = comment;
486 unary_delim = ps.last_u_d;
487 break;
488 }
489 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') {
490 /*
491 * handle ||, &&, etc, and also things as in int *****i
492 */
493 *e_token++ = *buf_ptr;
494 if (++buf_ptr >= buf_end)
495 fill_buffer();
496 }
497 code = (ps.last_u_d ? unary_op : binary_op);
498 unary_delim = true;
499
500
501 } /* end of switch */
502 if (code != newline) {
503 l_struct = false;
504 last_code = code;
505 }
506 if (buf_ptr >= buf_end) /* check for input buffer empty */
507 fill_buffer();
508 ps.last_u_d = unary_delim;
509 *e_token = '\0'; /* null terminate the token */
510 return (code);
511 }
512
513 /*
514 * Add the given keyword to the keyword table, using val as the keyword type
515 */
addkey(key,val)516 addkey(key, val)
517 char *key;
518 {
519 register struct templ *p = specials;
520 while (p->rwd)
521 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
522 return;
523 else
524 p++;
525 if (p >= specials + sizeof specials / sizeof specials[0])
526 return; /* For now, table overflows are silently
527 * ignored */
528 p->rwd = key;
529 p->rwcode = val;
530 p[1].rwd = 0;
531 p[1].rwcode = 0;
532 return;
533 }
534