1 /* 2 * Copyright (c) 1980 Regents of the University of California. 3 * All rights reserved. The Berkeley software License Agreement 4 * specifies the terms and conditions for redistribution. 5 */ 6 7 #ifndef lint 8 static char sccsid[] = "@(#)lexi.c 5.4 (Berkeley) 09/10/85"; 9 #endif not lint 10 11 /*- 12 * 13 * Copyright (C) 1976 14 * by the 15 * Board of Trustees 16 * of the 17 * University of Illinois 18 * 19 * All rights reserved 20 * 21 * 22 * NAME: 23 * lexi 24 * 25 * FUNCTION: 26 * This is the token scanner for indent 27 * 28 * ALGORITHM: 29 * 1) Strip off intervening blanks and/or tabs. 30 * 2) If it is an alphanumeric token, move it to the token buffer "token". 31 * Check if it is a special reserved word that indent will want to 32 * know about. 33 * 3) Non-alphanumeric tokens are handled with a big switch statement. A 34 * flag is kept to remember if the last token was a "unary delimiter", 35 * which forces a following operator to be unary as opposed to binary. 36 * 37 * PARAMETERS: 38 * None 39 * 40 * RETURNS: 41 * An integer code indicating the type of token scanned. 42 * 43 * GLOBALS: 44 * buf_ptr = 45 * had_eof 46 * ps.last_u_d = Set to true iff this token is a "unary delimiter" 47 * 48 * CALLS: 49 * fill_buffer 50 * printf (lib) 51 * 52 * CALLED BY: 53 * main 54 * 55 * NOTES: 56 * Start of comment is passed back so that the comment can be scanned by 57 * pr_comment. 58 * 59 * Strings and character literals are returned just like identifiers. 60 * 61 * HISTORY: 62 * initial coding November 1976 D A Willcox of CAC 63 * 1/7/77 D A Willcox of CAC Fix to provide proper handling 64 * of "int a -1;" 65 * 66 */ 67 68 /* 69 * Here we have the token scanner for indent. It scans off one token and 70 * puts it in the global variable "token". It returns a code, indicating 71 * the type of token scanned. 72 */ 73 74 #include "indent_globs.h"; 75 #include "indent_codes.h"; 76 #include "ctype.h" 77 78 #define alphanum 1 79 #define opchar 3 80 81 struct templ { 82 char *rwd; 83 int rwcode; 84 }; 85 86 struct templ specials[100] = 87 { 88 "switch", 1, 89 "case", 2, 90 "break", 0, 91 "struct", 3, 92 "union", 3, 93 "enum", 3, 94 "default", 2, 95 "int", 4, 96 "char", 4, 97 "float", 4, 98 "double", 4, 99 "long", 4, 100 "short", 4, 101 "typdef", 4, 102 "unsigned", 4, 103 "register", 4, 104 "static", 4, 105 "global", 4, 106 "extern", 4, 107 "void", 4, 108 "goto", 0, 109 "return", 0, 110 "if", 5, 111 "while", 5, 112 "for", 5, 113 "else", 6, 114 "do", 6, 115 "sizeof", 7, 116 0, 0 117 }; 118 119 char chartype[128] = 120 { /* this is used to facilitate the decision 121 * of what type (alphanumeric, operator) 122 * each character is */ 123 0, 0, 0, 0, 0, 0, 0, 0, 124 0, 0, 0, 0, 0, 0, 0, 0, 125 0, 0, 0, 0, 0, 0, 0, 0, 126 0, 0, 0, 0, 0, 0, 0, 0, 127 0, 3, 0, 0, 0, 3, 3, 0, 128 0, 0, 3, 3, 0, 3, 3, 3, 129 1, 1, 1, 1, 1, 1, 1, 1, 130 1, 1, 0, 0, 3, 3, 3, 3, 131 0, 1, 1, 1, 1, 1, 1, 1, 132 1, 1, 1, 1, 1, 1, 1, 1, 133 1, 1, 1, 1, 1, 1, 1, 1, 134 1, 1, 1, 0, 0, 0, 3, 1, 135 0, 1, 1, 1, 1, 1, 1, 1, 136 1, 1, 1, 1, 1, 1, 1, 1, 137 1, 1, 1, 1, 1, 1, 1, 1, 138 1, 1, 1, 0, 3, 0, 3, 0 139 }; 140 141 142 143 144 int 145 lexi() 146 { 147 register char *tok; /* local pointer to next char in token */ 148 int unary_delim; /* this is set to 1 if the current token 149 * 150 * forces a following operator to be unary */ 151 static int last_code; /* the last token type returned */ 152 static int l_struct; /* set to 1 if the last token was 'struct' */ 153 int code; /* internal code to be returned */ 154 char qchar; /* the delimiter character for a string */ 155 156 tok = token; /* point to start of place to save token */ 157 unary_delim = false; 158 ps.col_1 = ps.last_nl; /* tell world that this token started in 159 * column 1 iff the last thing scanned was 160 * nl */ 161 ps.last_nl = false; 162 163 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 164 ps.col_1 = false; /* leading blanks imply token is not in 165 * column 1 */ 166 if (++buf_ptr >= buf_end) 167 fill_buffer(); 168 } 169 170 /* Scan an alphanumeric token. Note that we must also handle 171 * stuff like "1.0e+03" and "7e-6". */ 172 if (chartype[*buf_ptr & 0177] == alphanum) { /* we have a character 173 * or number */ 174 register char *j; /* used for searching thru list of 175 * reserved words */ 176 register struct templ *p; 177 register int c; 178 179 do { /* copy it over */ 180 *tok++ = *buf_ptr++; 181 if (buf_ptr >= buf_end) 182 fill_buffer(); 183 } while (chartype[c = *buf_ptr & 0177] == alphanum || 184 isdigit(token[0]) && (c == '+' || c == '-') && 185 (tok[-1] == 'e' || tok[-1] == 'E')); 186 *tok++ = '\0'; 187 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 188 if (++buf_ptr >= buf_end) 189 fill_buffer(); 190 } 191 ps.its_a_keyword = false; 192 ps.sizeof_keyword = false; 193 if (l_struct) { /* if last token was 'struct', then this 194 * token should be treated as a 195 * declaration */ 196 l_struct = false; 197 last_code = ident; 198 ps.last_u_d = true; 199 return (decl); 200 } 201 ps.last_u_d = false; /* Operator after indentifier is binary */ 202 last_code = ident; /* Remember that this is the code we will 203 * return */ 204 205 /* 206 * This loop will check if the token is a keyword. 207 */ 208 for (p = specials; (j = p->rwd) != 0; p++) { 209 tok = token; /* point at scanned token */ 210 if (*j++ != *tok++ || *j++ != *tok++) 211 continue; /* This test depends on the fact that 212 * identifiers are always at least 1 213 * character long (ie. the first two bytes 214 * of the identifier are always 215 * meaningful) */ 216 if (tok[-1] == 0) 217 break; /* If its a one-character identifier */ 218 while (*tok++ == *j) 219 if (*j++ == 0) 220 goto found_keyword; /* I wish that C had a multi-level 221 * break... */ 222 } 223 if (p->rwd) { /* we have a keyword */ 224 found_keyword: 225 ps.its_a_keyword = true; 226 ps.last_u_d = true; 227 switch (p->rwcode) { 228 case 1: /* it is a switch */ 229 return (swstmt); 230 case 2: /* a case or default */ 231 return (casestmt); 232 233 case 3: /* a "struct" */ 234 if (ps.p_l_follow) 235 break; /* inside parens: cast */ 236 l_struct = true; 237 238 /* 239 * Next time around, we will want to know that we have 240 * had a 'struct' 241 */ 242 case 4: /* one of the declaration keywords */ 243 if (ps.p_l_follow) { 244 ps.cast_mask |= 1 << ps.p_l_follow; 245 break; /* inside parens: cast */ 246 } 247 last_code = decl; 248 return (decl); 249 250 case 5: /* if, while, for */ 251 return (sp_paren); 252 253 case 6: /* do, else */ 254 return (sp_nparen); 255 256 case 7: 257 ps.sizeof_keyword = true; 258 default: /* all others are treated like any other 259 * identifier */ 260 return (ident); 261 } /* end of switch */ 262 } /* end of if (found_it) */ 263 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0 264 && (buf_ptr[1] != ')' || buf_ptr[2] != ';')) { 265 strncpy(ps.procname, token, sizeof ps.procname - 1); 266 ps.in_parameter_declaration = 1; 267 } 268 269 /* 270 * The following hack attempts to guess whether or not the current 271 * token is in fact a declaration keyword -- one that has been 272 * typedefd 273 */ 274 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr)) 275 && !ps.p_l_follow 276 && (ps.last_token == rparen || ps.last_token == semicolon || 277 ps.last_token == decl || 278 ps.last_token == lbrace || ps.last_token == rbrace)) { 279 ps.its_a_keyword = true; 280 ps.last_u_d = true; 281 last_code = decl; 282 return decl; 283 } 284 if (last_code == decl) /* if this is a declared variable, then 285 * following sign is unary */ 286 ps.last_u_d = true; /* will make "int a -1" work */ 287 last_code = ident; 288 return (ident); /* the ident is not in the list */ 289 } /* end of procesing for alpanum character */ 290 /* Scan a non-alphanumeric token */ 291 292 *tok++ = *buf_ptr; /* if it is only a one-character token, it 293 * is moved here */ 294 *tok = '\0'; 295 if (++buf_ptr >= buf_end) 296 fill_buffer(); 297 298 switch (*token) { 299 case '\n': 300 unary_delim = ps.last_u_d; 301 ps.last_nl = true; /* remember that we just had a newline */ 302 code = (had_eof ? 0 : newline); 303 304 /* 305 * if data has been exausted, the newline is a dummy, and we 306 * should return code to stop 307 */ 308 break; 309 310 case '\'': /* start of quoted character */ 311 case '"': /* start of string */ 312 qchar = *token; 313 if (troff) { 314 tok[-1] = '`'; 315 if (qchar == '"') 316 *tok++ = '`'; 317 *tok++ = BACKSLASH; 318 *tok++ = 'f'; 319 *tok++ = 'L'; 320 } 321 do { /* copy the string */ 322 while (1) { /* move one character or [/<char>]<char> */ 323 if (*buf_ptr == '\n') { 324 printf("%d: Unterminated literal\n", line_no); 325 goto stop_lit; 326 } 327 *tok = *buf_ptr++; 328 if (buf_ptr >= buf_end) 329 fill_buffer(); 330 if (had_eof || ((tok - token) > (bufsize - 2))) { 331 printf("Unterminated literal\n"); 332 ++tok; 333 goto stop_lit; 334 /* get outof literal copying loop */ 335 } 336 if (*tok == BACKSLASH) { /* if escape, copy extra 337 * char */ 338 if (*buf_ptr == '\n') /* check for escaped 339 * newline */ 340 ++line_no; 341 if (troff) { 342 *++tok = BACKSLASH; 343 if (*buf_ptr == BACKSLASH) 344 *++tok = BACKSLASH; 345 } 346 *++tok = *buf_ptr++; 347 ++tok; /* we must increment this again because we 348 * copied two chars */ 349 if (buf_ptr >= buf_end) 350 fill_buffer(); 351 } 352 else 353 break; /* we copied one character */ 354 } /* end of while (1) */ 355 } while (*tok++ != qchar); 356 if (troff) { 357 tok[-1] = BACKSLASH; 358 *tok++ = 'f'; 359 *tok++ = 'R'; 360 *tok++ = '\''; 361 if (qchar == '"') 362 *tok++ = '\''; 363 } 364 stop_lit: 365 code = ident; 366 break; 367 368 case ('('): 369 case ('['): 370 unary_delim = true; 371 code = lparen; 372 break; 373 374 case (')'): 375 case (']'): 376 code = rparen; 377 break; 378 379 case '#': 380 unary_delim = ps.last_u_d; 381 code = preesc; 382 break; 383 384 case '?': 385 unary_delim = true; 386 code = question; 387 break; 388 389 case (':'): 390 code = colon; 391 unary_delim = true; 392 break; 393 394 case (';'): 395 unary_delim = true; 396 code = semicolon; 397 break; 398 399 case ('{'): 400 unary_delim = true; 401 402 /* 403 * if (ps.in_or_st) ps.block_init = 1; 404 */ 405 code = ps.block_init ? lparen : lbrace; 406 break; 407 408 case ('}'): 409 unary_delim = true; 410 code = ps.block_init ? rparen : rbrace; 411 break; 412 413 case 014: /* a form feed */ 414 unary_delim = ps.last_u_d; 415 ps.last_nl = true; /* remember this so we can set 'ps.col_1' 416 * right */ 417 code = form_feed; 418 break; 419 420 case (','): 421 unary_delim = true; 422 code = comma; 423 break; 424 425 case '.': 426 unary_delim = false; 427 code = period; 428 break; 429 430 case '-': 431 case '+': /* check for -, +, --, ++ */ 432 code = (ps.last_u_d ? unary_op : binary_op); 433 unary_delim = true; 434 435 if (*buf_ptr == token[0]) { 436 /* check for doubled character */ 437 *tok++ = *buf_ptr++; 438 /* buffer overflow will be checked at end of loop */ 439 if (last_code == ident || last_code == rparen) { 440 code = (ps.last_u_d ? unary_op : postop); 441 /* check for following ++ or -- */ 442 unary_delim = false; 443 } 444 } 445 else if (*buf_ptr == '=') 446 /* check for operator += */ 447 *tok++ = *buf_ptr++; 448 else if (token[0] == '-' && *buf_ptr == '>') { 449 /* check for operator -> */ 450 *tok++ = *buf_ptr++; 451 if (!pointer_as_binop) { 452 code = unary_op; 453 unary_delim = false; 454 ps.want_blank = false; 455 } 456 } 457 /* buffer overflow will be checked at end of switch */ 458 459 break; 460 461 case '=': 462 if (ps.in_or_st) 463 ps.block_init = 1; 464 if (chartype[*buf_ptr] == opchar) { /* we have two char 465 * assignment */ 466 tok[-1] = *buf_ptr++; 467 if ((tok[-1] == '<' || tok[-1] == '>') && tok[-1] == *buf_ptr) 468 *tok++ = *buf_ptr++; 469 *tok++ = '='; /* Flip =+ to += */ 470 *tok = 0; 471 } 472 code = binary_op; 473 unary_delim = true; 474 break; 475 /* can drop thru!!! */ 476 477 case '>': 478 case '<': 479 case '!': /* ops like <, <<, <=, !=, etc */ 480 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { 481 *tok++ = *buf_ptr; 482 if (++buf_ptr >= buf_end) 483 fill_buffer(); 484 } 485 if (*buf_ptr == '=') 486 *tok++ = *buf_ptr++; 487 code = (ps.last_u_d ? unary_op : binary_op); 488 unary_delim = true; 489 break; 490 491 default: 492 if (token[0] == '/' && *buf_ptr == '*') { 493 /* it is start of comment */ 494 *tok++ = '*'; 495 496 if (++buf_ptr >= buf_end) 497 fill_buffer(); 498 499 code = comment; 500 unary_delim = ps.last_u_d; 501 break; 502 } 503 while (*(tok - 1) == *buf_ptr || *buf_ptr == '=') { 504 /* handle ||, &&, etc, and also things as in int *****i */ 505 *tok++ = *buf_ptr; 506 if (++buf_ptr >= buf_end) 507 fill_buffer(); 508 } 509 code = (ps.last_u_d ? unary_op : binary_op); 510 unary_delim = true; 511 512 513 } /* end of switch */ 514 if (code != newline) { 515 l_struct = false; 516 last_code = code; 517 } 518 if (buf_ptr >= buf_end) /* check for input buffer empty */ 519 fill_buffer(); 520 ps.last_u_d = unary_delim; 521 *tok = '\0'; /* null terminate the token */ 522 return (code); 523 }; 524 525 /* Add the given keyword to the keyword table, using val as the keyword type 526 */ 527 addkey (key, val) 528 char *key; 529 { 530 register struct templ *p = specials; 531 while (p->rwd) 532 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0) 533 return; 534 else 535 p++; 536 if (p >= specials + sizeof specials / sizeof specials[0]) 537 return; /* For now, table overflows are silently 538 ignored */ 539 p->rwd = key; 540 p->rwcode = val; 541 p[1].rwd = 0; 542 p[1].rwcode = 0; 543 return; 544 } 545