1 /* 2 * Copyright (c) 1985 Sun Microsystems, Inc. 3 * Copyright (c) 1980 The Regents of the University of California. 4 * Copyright (c) 1976 Board of Trustees of the University of Illinois. 5 * All rights reserved. 6 * 7 * %sccs.include.redist.c% 8 */ 9 10 #ifndef lint 11 static char sccsid[] = "@(#)lexi.c 5.16 (Berkeley) 02/26/91"; 12 #endif /* not lint */ 13 14 /* 15 * Here we have the token scanner for indent. It scans off one token and puts 16 * it in the global variable "token". It returns a code, indicating the type 17 * of token scanned. 18 */ 19 20 #include <stdio.h> 21 #include <ctype.h> 22 #include <stdlib.h> 23 #include <string.h> 24 #include "indent_globs.h" 25 #include "indent_codes.h" 26 27 #define alphanum 1 28 #define opchar 3 29 30 struct templ { 31 char *rwd; 32 int rwcode; 33 }; 34 35 struct templ specials[100] = 36 { 37 "switch", 1, 38 "case", 2, 39 "break", 0, 40 "struct", 3, 41 "union", 3, 42 "enum", 3, 43 "default", 2, 44 "int", 4, 45 "char", 4, 46 "float", 4, 47 "double", 4, 48 "long", 4, 49 "short", 4, 50 "typdef", 4, 51 "unsigned", 4, 52 "register", 4, 53 "static", 4, 54 "global", 4, 55 "extern", 4, 56 "void", 4, 57 "goto", 0, 58 "return", 0, 59 "if", 5, 60 "while", 5, 61 "for", 5, 62 "else", 6, 63 "do", 6, 64 "sizeof", 7, 65 0, 0 66 }; 67 68 char chartype[128] = 69 { /* this is used to facilitate the decision of 70 * what type (alphanumeric, operator) each 71 * character is */ 72 0, 0, 0, 0, 0, 0, 0, 0, 73 0, 0, 0, 0, 0, 0, 0, 0, 74 0, 0, 0, 0, 0, 0, 0, 0, 75 0, 0, 0, 0, 0, 0, 0, 0, 76 0, 3, 0, 0, 1, 3, 3, 0, 77 0, 0, 3, 3, 0, 3, 0, 3, 78 1, 1, 1, 1, 1, 1, 1, 1, 79 1, 1, 0, 0, 3, 3, 3, 3, 80 0, 1, 1, 1, 1, 1, 1, 1, 81 1, 1, 1, 1, 1, 1, 1, 1, 82 1, 1, 1, 1, 1, 1, 1, 1, 83 1, 1, 1, 0, 0, 0, 3, 1, 84 0, 1, 1, 1, 1, 1, 1, 1, 85 1, 1, 1, 1, 1, 1, 1, 1, 86 1, 1, 1, 1, 1, 1, 1, 1, 87 1, 1, 1, 0, 3, 0, 3, 0 88 }; 89 90 91 92 93 int 94 lexi() 95 { 96 int unary_delim; /* this is set to 1 if the current token 97 * 98 * forces a following operator to be unary */ 99 static int last_code; /* the last token type returned */ 100 static int l_struct; /* set to 1 if the last token was 'struct' */ 101 int code; /* internal code to be returned */ 102 char qchar; /* the delimiter character for a string */ 103 104 e_token = s_token; /* point to start of place to save token */ 105 unary_delim = false; 106 ps.col_1 = ps.last_nl; /* tell world that this token started in 107 * column 1 iff the last thing scanned was nl */ 108 ps.last_nl = false; 109 110 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 111 ps.col_1 = false; /* leading blanks imply token is not in column 112 * 1 */ 113 if (++buf_ptr >= buf_end) 114 fill_buffer(); 115 } 116 117 /* Scan an alphanumeric token */ 118 if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) { 119 /* 120 * we have a character or number 121 */ 122 register char *j; /* used for searching thru list of 123 * 124 * reserved words */ 125 register struct templ *p; 126 127 if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) { 128 int seendot = 0, 129 seenexp = 0; 130 if (*buf_ptr == '0' && 131 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) { 132 *e_token++ = *buf_ptr++; 133 *e_token++ = *buf_ptr++; 134 while (isxdigit(*buf_ptr)) { 135 CHECK_SIZE_TOKEN; 136 *e_token++ = *buf_ptr++; 137 } 138 } 139 else 140 while (1) { 141 if (*buf_ptr == '.') 142 if (seendot) 143 break; 144 else 145 seendot++; 146 CHECK_SIZE_TOKEN; 147 *e_token++ = *buf_ptr++; 148 if (!isdigit(*buf_ptr) && *buf_ptr != '.') 149 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) 150 break; 151 else { 152 seenexp++; 153 seendot++; 154 CHECK_SIZE_TOKEN; 155 *e_token++ = *buf_ptr++; 156 if (*buf_ptr == '+' || *buf_ptr == '-') 157 *e_token++ = *buf_ptr++; 158 } 159 } 160 if (*buf_ptr == 'L' || *buf_ptr == 'l') 161 *e_token++ = *buf_ptr++; 162 } 163 else 164 while (chartype[*buf_ptr] == alphanum) { /* copy it over */ 165 CHECK_SIZE_TOKEN; 166 *e_token++ = *buf_ptr++; 167 if (buf_ptr >= buf_end) 168 fill_buffer(); 169 } 170 *e_token++ = '\0'; 171 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 172 if (++buf_ptr >= buf_end) 173 fill_buffer(); 174 } 175 ps.its_a_keyword = false; 176 ps.sizeof_keyword = false; 177 if (l_struct) { /* if last token was 'struct', then this token 178 * should be treated as a declaration */ 179 l_struct = false; 180 last_code = ident; 181 ps.last_u_d = true; 182 return (decl); 183 } 184 ps.last_u_d = false; /* Operator after indentifier is binary */ 185 last_code = ident; /* Remember that this is the code we will 186 * return */ 187 188 /* 189 * This loop will check if the token is a keyword. 190 */ 191 for (p = specials; (j = p->rwd) != 0; p++) { 192 register char *p = s_token; /* point at scanned token */ 193 if (*j++ != *p++ || *j++ != *p++) 194 continue; /* This test depends on the fact that 195 * identifiers are always at least 1 character 196 * long (ie. the first two bytes of the 197 * identifier are always meaningful) */ 198 if (p[-1] == 0) 199 break; /* If its a one-character identifier */ 200 while (*p++ == *j) 201 if (*j++ == 0) 202 goto found_keyword; /* I wish that C had a multi-level 203 * break... */ 204 } 205 if (p->rwd) { /* we have a keyword */ 206 found_keyword: 207 ps.its_a_keyword = true; 208 ps.last_u_d = true; 209 switch (p->rwcode) { 210 case 1: /* it is a switch */ 211 return (swstmt); 212 case 2: /* a case or default */ 213 return (casestmt); 214 215 case 3: /* a "struct" */ 216 if (ps.p_l_follow) 217 break; /* inside parens: cast */ 218 l_struct = true; 219 220 /* 221 * Next time around, we will want to know that we have had a 222 * 'struct' 223 */ 224 case 4: /* one of the declaration keywords */ 225 if (ps.p_l_follow) { 226 ps.cast_mask |= 1 << ps.p_l_follow; 227 break; /* inside parens: cast */ 228 } 229 last_code = decl; 230 return (decl); 231 232 case 5: /* if, while, for */ 233 return (sp_paren); 234 235 case 6: /* do, else */ 236 return (sp_nparen); 237 238 case 7: 239 ps.sizeof_keyword = true; 240 default: /* all others are treated like any other 241 * identifier */ 242 return (ident); 243 } /* end of switch */ 244 } /* end of if (found_it) */ 245 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) { 246 register char *tp = buf_ptr; 247 while (tp < buf_end) 248 if (*tp++ == ')' && (*tp == ';' || *tp == ',')) 249 goto not_proc; 250 strncpy(ps.procname, token, sizeof ps.procname - 1); 251 ps.in_parameter_declaration = 1; 252 rparen_count = 1; 253 not_proc:; 254 } 255 /* 256 * The following hack attempts to guess whether or not the current 257 * token is in fact a declaration keyword -- one that has been 258 * typedefd 259 */ 260 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_') 261 && !ps.p_l_follow 262 && !ps.block_init 263 && (ps.last_token == rparen || ps.last_token == semicolon || 264 ps.last_token == decl || 265 ps.last_token == lbrace || ps.last_token == rbrace)) { 266 ps.its_a_keyword = true; 267 ps.last_u_d = true; 268 last_code = decl; 269 return decl; 270 } 271 if (last_code == decl) /* if this is a declared variable, then 272 * following sign is unary */ 273 ps.last_u_d = true; /* will make "int a -1" work */ 274 last_code = ident; 275 return (ident); /* the ident is not in the list */ 276 } /* end of procesing for alpanum character */ 277 278 /* Scan a non-alphanumeric token */ 279 280 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is 281 * moved here */ 282 *e_token = '\0'; 283 if (++buf_ptr >= buf_end) 284 fill_buffer(); 285 286 switch (*token) { 287 case '\n': 288 unary_delim = ps.last_u_d; 289 ps.last_nl = true; /* remember that we just had a newline */ 290 code = (had_eof ? 0 : newline); 291 292 /* 293 * if data has been exausted, the newline is a dummy, and we should 294 * return code to stop 295 */ 296 break; 297 298 case '\'': /* start of quoted character */ 299 case '"': /* start of string */ 300 qchar = *token; 301 if (troff) { 302 e_token[-1] = '`'; 303 if (qchar == '"') 304 *e_token++ = '`'; 305 e_token = chfont(&bodyf, &stringf, e_token); 306 } 307 do { /* copy the string */ 308 while (1) { /* move one character or [/<char>]<char> */ 309 if (*buf_ptr == '\n') { 310 printf("%d: Unterminated literal\n", line_no); 311 goto stop_lit; 312 } 313 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop, 314 * since CHECK_SIZE guarantees that there 315 * are at least 5 entries left */ 316 *e_token = *buf_ptr++; 317 if (buf_ptr >= buf_end) 318 fill_buffer(); 319 if (*e_token == BACKSLASH) { /* if escape, copy extra char */ 320 if (*buf_ptr == '\n') /* check for escaped newline */ 321 ++line_no; 322 if (troff) { 323 *++e_token = BACKSLASH; 324 if (*buf_ptr == BACKSLASH) 325 *++e_token = BACKSLASH; 326 } 327 *++e_token = *buf_ptr++; 328 ++e_token; /* we must increment this again because we 329 * copied two chars */ 330 if (buf_ptr >= buf_end) 331 fill_buffer(); 332 } 333 else 334 break; /* we copied one character */ 335 } /* end of while (1) */ 336 } while (*e_token++ != qchar); 337 if (troff) { 338 e_token = chfont(&stringf, &bodyf, e_token - 1); 339 if (qchar == '"') 340 *e_token++ = '\''; 341 } 342 stop_lit: 343 code = ident; 344 break; 345 346 case ('('): 347 case ('['): 348 unary_delim = true; 349 code = lparen; 350 break; 351 352 case (')'): 353 case (']'): 354 code = rparen; 355 break; 356 357 case '#': 358 unary_delim = ps.last_u_d; 359 code = preesc; 360 break; 361 362 case '?': 363 unary_delim = true; 364 code = question; 365 break; 366 367 case (':'): 368 code = colon; 369 unary_delim = true; 370 break; 371 372 case (';'): 373 unary_delim = true; 374 code = semicolon; 375 break; 376 377 case ('{'): 378 unary_delim = true; 379 380 /* 381 * if (ps.in_or_st) ps.block_init = 1; 382 */ 383 /* ? code = ps.block_init ? lparen : lbrace; */ 384 code = lbrace; 385 break; 386 387 case ('}'): 388 unary_delim = true; 389 /* ? code = ps.block_init ? rparen : rbrace; */ 390 code = rbrace; 391 break; 392 393 case 014: /* a form feed */ 394 unary_delim = ps.last_u_d; 395 ps.last_nl = true; /* remember this so we can set 'ps.col_1' 396 * right */ 397 code = form_feed; 398 break; 399 400 case (','): 401 unary_delim = true; 402 code = comma; 403 break; 404 405 case '.': 406 unary_delim = false; 407 code = period; 408 break; 409 410 case '-': 411 case '+': /* check for -, +, --, ++ */ 412 code = (ps.last_u_d ? unary_op : binary_op); 413 unary_delim = true; 414 415 if (*buf_ptr == token[0]) { 416 /* check for doubled character */ 417 *e_token++ = *buf_ptr++; 418 /* buffer overflow will be checked at end of loop */ 419 if (last_code == ident || last_code == rparen) { 420 code = (ps.last_u_d ? unary_op : postop); 421 /* check for following ++ or -- */ 422 unary_delim = false; 423 } 424 } 425 else if (*buf_ptr == '=') 426 /* check for operator += */ 427 *e_token++ = *buf_ptr++; 428 else if (*buf_ptr == '>') { 429 /* check for operator -> */ 430 *e_token++ = *buf_ptr++; 431 if (!pointer_as_binop) { 432 unary_delim = false; 433 code = unary_op; 434 ps.want_blank = false; 435 } 436 } 437 break; /* buffer overflow will be checked at end of 438 * switch */ 439 440 case '=': 441 if (ps.in_or_st) 442 ps.block_init = 1; 443 #ifdef undef 444 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */ 445 e_token[-1] = *buf_ptr++; 446 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr) 447 *e_token++ = *buf_ptr++; 448 *e_token++ = '='; /* Flip =+ to += */ 449 *e_token = 0; 450 } 451 #else 452 if (*buf_ptr == '=') {/* == */ 453 *e_token++ = '='; /* Flip =+ to += */ 454 buf_ptr++; 455 *e_token = 0; 456 } 457 #endif 458 code = binary_op; 459 unary_delim = true; 460 break; 461 /* can drop thru!!! */ 462 463 case '>': 464 case '<': 465 case '!': /* ops like <, <<, <=, !=, etc */ 466 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { 467 *e_token++ = *buf_ptr; 468 if (++buf_ptr >= buf_end) 469 fill_buffer(); 470 } 471 if (*buf_ptr == '=') 472 *e_token++ = *buf_ptr++; 473 code = (ps.last_u_d ? unary_op : binary_op); 474 unary_delim = true; 475 break; 476 477 default: 478 if (token[0] == '/' && *buf_ptr == '*') { 479 /* it is start of comment */ 480 *e_token++ = '*'; 481 482 if (++buf_ptr >= buf_end) 483 fill_buffer(); 484 485 code = comment; 486 unary_delim = ps.last_u_d; 487 break; 488 } 489 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') { 490 /* 491 * handle ||, &&, etc, and also things as in int *****i 492 */ 493 *e_token++ = *buf_ptr; 494 if (++buf_ptr >= buf_end) 495 fill_buffer(); 496 } 497 code = (ps.last_u_d ? unary_op : binary_op); 498 unary_delim = true; 499 500 501 } /* end of switch */ 502 if (code != newline) { 503 l_struct = false; 504 last_code = code; 505 } 506 if (buf_ptr >= buf_end) /* check for input buffer empty */ 507 fill_buffer(); 508 ps.last_u_d = unary_delim; 509 *e_token = '\0'; /* null terminate the token */ 510 return (code); 511 } 512 513 /* 514 * Add the given keyword to the keyword table, using val as the keyword type 515 */ 516 addkey(key, val) 517 char *key; 518 { 519 register struct templ *p = specials; 520 while (p->rwd) 521 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0) 522 return; 523 else 524 p++; 525 if (p >= specials + sizeof specials / sizeof specials[0]) 526 return; /* For now, table overflows are silently 527 * ignored */ 528 p->rwd = key; 529 p->rwcode = val; 530 p[1].rwd = 0; 531 p[1].rwcode = 0; 532 return; 533 } 534