1 /* 2 * Copyright (c) 1985 Sun Microsystems, Inc. 3 * Copyright (c) 1980 The Regents of the University of California. 4 * Copyright (c) 1976 Board of Trustees of the University of Illinois. 5 * All rights reserved. 6 * 7 * %sccs.include.redist.c% 8 */ 9 10 #ifndef lint 11 static char sccsid[] = "@(#)lexi.c 5.15 (Berkeley) 06/01/90"; 12 #endif /* not lint */ 13 14 /* 15 * Here we have the token scanner for indent. It scans off one token and puts 16 * it in the global variable "token". It returns a code, indicating the type 17 * of token scanned. 18 */ 19 20 #include "indent_globs.h" 21 #include "indent_codes.h" 22 #include <ctype.h> 23 24 #define alphanum 1 25 #define opchar 3 26 27 struct templ { 28 char *rwd; 29 int rwcode; 30 }; 31 32 struct templ specials[100] = 33 { 34 "switch", 1, 35 "case", 2, 36 "break", 0, 37 "struct", 3, 38 "union", 3, 39 "enum", 3, 40 "default", 2, 41 "int", 4, 42 "char", 4, 43 "float", 4, 44 "double", 4, 45 "long", 4, 46 "short", 4, 47 "typdef", 4, 48 "unsigned", 4, 49 "register", 4, 50 "static", 4, 51 "global", 4, 52 "extern", 4, 53 "void", 4, 54 "goto", 0, 55 "return", 0, 56 "if", 5, 57 "while", 5, 58 "for", 5, 59 "else", 6, 60 "do", 6, 61 "sizeof", 7, 62 0, 0 63 }; 64 65 char chartype[128] = 66 { /* this is used to facilitate the decision of 67 * what type (alphanumeric, operator) each 68 * character is */ 69 0, 0, 0, 0, 0, 0, 0, 0, 70 0, 0, 0, 0, 0, 0, 0, 0, 71 0, 0, 0, 0, 0, 0, 0, 0, 72 0, 0, 0, 0, 0, 0, 0, 0, 73 0, 3, 0, 0, 1, 3, 3, 0, 74 0, 0, 3, 3, 0, 3, 0, 3, 75 1, 1, 1, 1, 1, 1, 1, 1, 76 1, 1, 0, 0, 3, 3, 3, 3, 77 0, 1, 1, 1, 1, 1, 1, 1, 78 1, 1, 1, 1, 1, 1, 1, 1, 79 1, 1, 1, 1, 1, 1, 1, 1, 80 1, 1, 1, 0, 0, 0, 3, 1, 81 0, 1, 1, 1, 1, 1, 1, 1, 82 1, 1, 1, 1, 1, 1, 1, 1, 83 1, 1, 1, 1, 1, 1, 1, 1, 84 1, 1, 1, 0, 3, 0, 3, 0 85 }; 86 87 88 89 90 int 91 lexi() 92 { 93 int unary_delim; /* this is set to 1 if the current token 94 * 95 * forces a following operator to be unary */ 96 static int last_code; /* the last token type returned */ 97 static int l_struct; /* set to 1 if the last token was 'struct' */ 98 int code; /* internal code to be returned */ 99 char qchar; /* the delimiter character for a string */ 100 101 e_token = s_token; /* point to start of place to save token */ 102 unary_delim = false; 103 ps.col_1 = ps.last_nl; /* tell world that this token started in 104 * column 1 iff the last thing scanned was nl */ 105 ps.last_nl = false; 106 107 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 108 ps.col_1 = false; /* leading blanks imply token is not in column 109 * 1 */ 110 if (++buf_ptr >= buf_end) 111 fill_buffer(); 112 } 113 114 /* Scan an alphanumeric token */ 115 if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) { 116 /* 117 * we have a character or number 118 */ 119 register char *j; /* used for searching thru list of 120 * 121 * reserved words */ 122 register struct templ *p; 123 124 if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) { 125 int seendot = 0, 126 seenexp = 0; 127 if (*buf_ptr == '0' && 128 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) { 129 *e_token++ = *buf_ptr++; 130 *e_token++ = *buf_ptr++; 131 while (isxdigit(*buf_ptr)) { 132 CHECK_SIZE_TOKEN; 133 *e_token++ = *buf_ptr++; 134 } 135 } 136 else 137 while (1) { 138 if (*buf_ptr == '.') 139 if (seendot) 140 break; 141 else 142 seendot++; 143 CHECK_SIZE_TOKEN; 144 *e_token++ = *buf_ptr++; 145 if (!isdigit(*buf_ptr) && *buf_ptr != '.') 146 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) 147 break; 148 else { 149 seenexp++; 150 seendot++; 151 CHECK_SIZE_TOKEN; 152 *e_token++ = *buf_ptr++; 153 if (*buf_ptr == '+' || *buf_ptr == '-') 154 *e_token++ = *buf_ptr++; 155 } 156 } 157 if (*buf_ptr == 'L' || *buf_ptr == 'l') 158 *e_token++ = *buf_ptr++; 159 } 160 else 161 while (chartype[*buf_ptr] == alphanum) { /* copy it over */ 162 CHECK_SIZE_TOKEN; 163 *e_token++ = *buf_ptr++; 164 if (buf_ptr >= buf_end) 165 fill_buffer(); 166 } 167 *e_token++ = '\0'; 168 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 169 if (++buf_ptr >= buf_end) 170 fill_buffer(); 171 } 172 ps.its_a_keyword = false; 173 ps.sizeof_keyword = false; 174 if (l_struct) { /* if last token was 'struct', then this token 175 * should be treated as a declaration */ 176 l_struct = false; 177 last_code = ident; 178 ps.last_u_d = true; 179 return (decl); 180 } 181 ps.last_u_d = false; /* Operator after indentifier is binary */ 182 last_code = ident; /* Remember that this is the code we will 183 * return */ 184 185 /* 186 * This loop will check if the token is a keyword. 187 */ 188 for (p = specials; (j = p->rwd) != 0; p++) { 189 register char *p = s_token; /* point at scanned token */ 190 if (*j++ != *p++ || *j++ != *p++) 191 continue; /* This test depends on the fact that 192 * identifiers are always at least 1 character 193 * long (ie. the first two bytes of the 194 * identifier are always meaningful) */ 195 if (p[-1] == 0) 196 break; /* If its a one-character identifier */ 197 while (*p++ == *j) 198 if (*j++ == 0) 199 goto found_keyword; /* I wish that C had a multi-level 200 * break... */ 201 } 202 if (p->rwd) { /* we have a keyword */ 203 found_keyword: 204 ps.its_a_keyword = true; 205 ps.last_u_d = true; 206 switch (p->rwcode) { 207 case 1: /* it is a switch */ 208 return (swstmt); 209 case 2: /* a case or default */ 210 return (casestmt); 211 212 case 3: /* a "struct" */ 213 if (ps.p_l_follow) 214 break; /* inside parens: cast */ 215 l_struct = true; 216 217 /* 218 * Next time around, we will want to know that we have had a 219 * 'struct' 220 */ 221 case 4: /* one of the declaration keywords */ 222 if (ps.p_l_follow) { 223 ps.cast_mask |= 1 << ps.p_l_follow; 224 break; /* inside parens: cast */ 225 } 226 last_code = decl; 227 return (decl); 228 229 case 5: /* if, while, for */ 230 return (sp_paren); 231 232 case 6: /* do, else */ 233 return (sp_nparen); 234 235 case 7: 236 ps.sizeof_keyword = true; 237 default: /* all others are treated like any other 238 * identifier */ 239 return (ident); 240 } /* end of switch */ 241 } /* end of if (found_it) */ 242 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) { 243 register char *tp = buf_ptr; 244 while (tp < buf_end) 245 if (*tp++ == ')' && (*tp == ';' || *tp == ',')) 246 goto not_proc; 247 strncpy(ps.procname, token, sizeof ps.procname - 1); 248 ps.in_parameter_declaration = 1; 249 rparen_count = 1; 250 not_proc:; 251 } 252 /* 253 * The following hack attempts to guess whether or not the current 254 * token is in fact a declaration keyword -- one that has been 255 * typedefd 256 */ 257 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_') 258 && !ps.p_l_follow 259 && !ps.block_init 260 && (ps.last_token == rparen || ps.last_token == semicolon || 261 ps.last_token == decl || 262 ps.last_token == lbrace || ps.last_token == rbrace)) { 263 ps.its_a_keyword = true; 264 ps.last_u_d = true; 265 last_code = decl; 266 return decl; 267 } 268 if (last_code == decl) /* if this is a declared variable, then 269 * following sign is unary */ 270 ps.last_u_d = true; /* will make "int a -1" work */ 271 last_code = ident; 272 return (ident); /* the ident is not in the list */ 273 } /* end of procesing for alpanum character */ 274 275 /* Scan a non-alphanumeric token */ 276 277 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is 278 * moved here */ 279 *e_token = '\0'; 280 if (++buf_ptr >= buf_end) 281 fill_buffer(); 282 283 switch (*token) { 284 case '\n': 285 unary_delim = ps.last_u_d; 286 ps.last_nl = true; /* remember that we just had a newline */ 287 code = (had_eof ? 0 : newline); 288 289 /* 290 * if data has been exausted, the newline is a dummy, and we should 291 * return code to stop 292 */ 293 break; 294 295 case '\'': /* start of quoted character */ 296 case '"': /* start of string */ 297 qchar = *token; 298 if (troff) { 299 e_token[-1] = '`'; 300 if (qchar == '"') 301 *e_token++ = '`'; 302 e_token = chfont(&bodyf, &stringf, e_token); 303 } 304 do { /* copy the string */ 305 while (1) { /* move one character or [/<char>]<char> */ 306 if (*buf_ptr == '\n') { 307 printf("%d: Unterminated literal\n", line_no); 308 goto stop_lit; 309 } 310 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop, 311 * since CHECK_SIZE guarantees that there 312 * are at least 5 entries left */ 313 *e_token = *buf_ptr++; 314 if (buf_ptr >= buf_end) 315 fill_buffer(); 316 if (*e_token == BACKSLASH) { /* if escape, copy extra char */ 317 if (*buf_ptr == '\n') /* check for escaped newline */ 318 ++line_no; 319 if (troff) { 320 *++e_token = BACKSLASH; 321 if (*buf_ptr == BACKSLASH) 322 *++e_token = BACKSLASH; 323 } 324 *++e_token = *buf_ptr++; 325 ++e_token; /* we must increment this again because we 326 * copied two chars */ 327 if (buf_ptr >= buf_end) 328 fill_buffer(); 329 } 330 else 331 break; /* we copied one character */ 332 } /* end of while (1) */ 333 } while (*e_token++ != qchar); 334 if (troff) { 335 e_token = chfont(&stringf, &bodyf, e_token - 1); 336 if (qchar == '"') 337 *e_token++ = '\''; 338 } 339 stop_lit: 340 code = ident; 341 break; 342 343 case ('('): 344 case ('['): 345 unary_delim = true; 346 code = lparen; 347 break; 348 349 case (')'): 350 case (']'): 351 code = rparen; 352 break; 353 354 case '#': 355 unary_delim = ps.last_u_d; 356 code = preesc; 357 break; 358 359 case '?': 360 unary_delim = true; 361 code = question; 362 break; 363 364 case (':'): 365 code = colon; 366 unary_delim = true; 367 break; 368 369 case (';'): 370 unary_delim = true; 371 code = semicolon; 372 break; 373 374 case ('{'): 375 unary_delim = true; 376 377 /* 378 * if (ps.in_or_st) ps.block_init = 1; 379 */ 380 /* ? code = ps.block_init ? lparen : lbrace; */ 381 code = lbrace; 382 break; 383 384 case ('}'): 385 unary_delim = true; 386 /* ? code = ps.block_init ? rparen : rbrace; */ 387 code = rbrace; 388 break; 389 390 case 014: /* a form feed */ 391 unary_delim = ps.last_u_d; 392 ps.last_nl = true; /* remember this so we can set 'ps.col_1' 393 * right */ 394 code = form_feed; 395 break; 396 397 case (','): 398 unary_delim = true; 399 code = comma; 400 break; 401 402 case '.': 403 unary_delim = false; 404 code = period; 405 break; 406 407 case '-': 408 case '+': /* check for -, +, --, ++ */ 409 code = (ps.last_u_d ? unary_op : binary_op); 410 unary_delim = true; 411 412 if (*buf_ptr == token[0]) { 413 /* check for doubled character */ 414 *e_token++ = *buf_ptr++; 415 /* buffer overflow will be checked at end of loop */ 416 if (last_code == ident || last_code == rparen) { 417 code = (ps.last_u_d ? unary_op : postop); 418 /* check for following ++ or -- */ 419 unary_delim = false; 420 } 421 } 422 else if (*buf_ptr == '=') 423 /* check for operator += */ 424 *e_token++ = *buf_ptr++; 425 else if (*buf_ptr == '>') { 426 /* check for operator -> */ 427 *e_token++ = *buf_ptr++; 428 if (!pointer_as_binop) { 429 unary_delim = false; 430 code = unary_op; 431 ps.want_blank = false; 432 } 433 } 434 break; /* buffer overflow will be checked at end of 435 * switch */ 436 437 case '=': 438 if (ps.in_or_st) 439 ps.block_init = 1; 440 #ifdef undef 441 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */ 442 e_token[-1] = *buf_ptr++; 443 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr) 444 *e_token++ = *buf_ptr++; 445 *e_token++ = '='; /* Flip =+ to += */ 446 *e_token = 0; 447 } 448 #else 449 if (*buf_ptr == '=') {/* == */ 450 *e_token++ = '='; /* Flip =+ to += */ 451 buf_ptr++; 452 *e_token = 0; 453 } 454 #endif 455 code = binary_op; 456 unary_delim = true; 457 break; 458 /* can drop thru!!! */ 459 460 case '>': 461 case '<': 462 case '!': /* ops like <, <<, <=, !=, etc */ 463 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { 464 *e_token++ = *buf_ptr; 465 if (++buf_ptr >= buf_end) 466 fill_buffer(); 467 } 468 if (*buf_ptr == '=') 469 *e_token++ = *buf_ptr++; 470 code = (ps.last_u_d ? unary_op : binary_op); 471 unary_delim = true; 472 break; 473 474 default: 475 if (token[0] == '/' && *buf_ptr == '*') { 476 /* it is start of comment */ 477 *e_token++ = '*'; 478 479 if (++buf_ptr >= buf_end) 480 fill_buffer(); 481 482 code = comment; 483 unary_delim = ps.last_u_d; 484 break; 485 } 486 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') { 487 /* 488 * handle ||, &&, etc, and also things as in int *****i 489 */ 490 *e_token++ = *buf_ptr; 491 if (++buf_ptr >= buf_end) 492 fill_buffer(); 493 } 494 code = (ps.last_u_d ? unary_op : binary_op); 495 unary_delim = true; 496 497 498 } /* end of switch */ 499 if (code != newline) { 500 l_struct = false; 501 last_code = code; 502 } 503 if (buf_ptr >= buf_end) /* check for input buffer empty */ 504 fill_buffer(); 505 ps.last_u_d = unary_delim; 506 *e_token = '\0'; /* null terminate the token */ 507 return (code); 508 } 509 510 /* 511 * Add the given keyword to the keyword table, using val as the keyword type 512 */ 513 addkey(key, val) 514 char *key; 515 { 516 register struct templ *p = specials; 517 while (p->rwd) 518 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0) 519 return; 520 else 521 p++; 522 if (p >= specials + sizeof specials / sizeof specials[0]) 523 return; /* For now, table overflows are silently 524 * ignored */ 525 p->rwd = key; 526 p->rwcode = val; 527 p[1].rwd = 0; 528 p[1].rwcode = 0; 529 return; 530 } 531