1 /* 2 * Copyright (c) 1985 Sun Microsystems, Inc. 3 * Copyright (c) 1980 The Regents of the University of California. 4 * Copyright (c) 1976 Board of Trustees of the University of Illinois. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms are permitted 8 * provided that the above copyright notice and this paragraph are 9 * duplicated in all such forms and that any documentation, 10 * advertising materials, and other materials related to such 11 * distribution and use acknowledge that the software was developed 12 * by the University of California, Berkeley, the University of Illinois, 13 * Urbana, and Sun Microsystems, Inc. The name of either University 14 * or Sun Microsystems may not be used to endorse or promote products 15 * derived from this software without specific prior written permission. 16 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 18 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. 19 */ 20 21 #ifndef lint 22 static char sccsid[] = "@(#)lexi.c 5.11 (Berkeley) 09/15/88"; 23 #endif /* not lint */ 24 25 /* 26 * Here we have the token scanner for indent. It scans off one token and puts 27 * it in the global variable "token". It returns a code, indicating the type 28 * of token scanned. 29 */ 30 31 #include "indent_globs.h" 32 #include "indent_codes.h" 33 #include "ctype.h" 34 35 #define alphanum 1 36 #define opchar 3 37 38 struct templ { 39 char *rwd; 40 int rwcode; 41 }; 42 43 struct templ specials[100] = 44 { 45 "switch", 1, 46 "case", 2, 47 "break", 0, 48 "struct", 3, 49 "union", 3, 50 "enum", 3, 51 "default", 2, 52 "int", 4, 53 "char", 4, 54 "float", 4, 55 "double", 4, 56 "long", 4, 57 "short", 4, 58 "typdef", 4, 59 "unsigned", 4, 60 "register", 4, 61 "static", 4, 62 "global", 4, 63 "extern", 4, 64 "void", 4, 65 "goto", 0, 66 "return", 0, 67 "if", 5, 68 "while", 5, 69 "for", 5, 70 "else", 6, 71 "do", 6, 72 "sizeof", 7, 73 0, 0 74 }; 75 76 char chartype[128] = 77 { /* this is used to facilitate the decision of 78 * what type (alphanumeric, operator) each 79 * character is */ 80 0, 0, 0, 0, 0, 0, 0, 0, 81 0, 0, 0, 0, 0, 0, 0, 0, 82 0, 0, 0, 0, 0, 0, 0, 0, 83 0, 0, 0, 0, 0, 0, 0, 0, 84 0, 3, 0, 0, 1, 3, 3, 0, 85 0, 0, 3, 3, 0, 3, 0, 3, 86 1, 1, 1, 1, 1, 1, 1, 1, 87 1, 1, 0, 0, 3, 3, 3, 3, 88 0, 1, 1, 1, 1, 1, 1, 1, 89 1, 1, 1, 1, 1, 1, 1, 1, 90 1, 1, 1, 1, 1, 1, 1, 1, 91 1, 1, 1, 0, 0, 0, 3, 1, 92 0, 1, 1, 1, 1, 1, 1, 1, 93 1, 1, 1, 1, 1, 1, 1, 1, 94 1, 1, 1, 1, 1, 1, 1, 1, 95 1, 1, 1, 0, 3, 0, 3, 0 96 }; 97 98 99 100 101 int 102 lexi() 103 { 104 register char *tok; /* local pointer to next char in token */ 105 int unary_delim; /* this is set to 1 if the current token 106 * 107 * forces a following operator to be unary */ 108 static int last_code; /* the last token type returned */ 109 static int l_struct; /* set to 1 if the last token was 'struct' */ 110 int code; /* internal code to be returned */ 111 char qchar; /* the delimiter character for a string */ 112 113 tok = token; /* point to start of place to save token */ 114 unary_delim = false; 115 ps.col_1 = ps.last_nl; /* tell world that this token started in 116 * column 1 iff the last thing scanned was nl */ 117 ps.last_nl = false; 118 119 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 120 ps.col_1 = false; /* leading blanks imply token is not in column 121 * 1 */ 122 if (++buf_ptr >= buf_end) 123 fill_buffer(); 124 } 125 126 /* Scan an alphanumeric token */ 127 if (chartype[*buf_ptr] == alphanum || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) { 128 /* 129 * we have a character or number 130 */ 131 register char *j; /* used for searching thru list of 132 * 133 * reserved words */ 134 register struct templ *p; 135 136 if (isdigit(*buf_ptr) || buf_ptr[0] == '.' && isdigit(buf_ptr[1])) { 137 int seendot = 0, 138 seenexp = 0; 139 if (*buf_ptr == '0' && 140 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) { 141 *tok++ = *buf_ptr++; 142 *tok++ = *buf_ptr++; 143 while (isxdigit(*buf_ptr)) 144 *tok++ = *buf_ptr++; 145 } 146 else 147 while (1) { 148 if (*buf_ptr == '.') 149 if (seendot) 150 break; 151 else 152 seendot++; 153 *tok++ = *buf_ptr++; 154 if (!isdigit(*buf_ptr) && *buf_ptr != '.') 155 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) 156 break; 157 else { 158 seenexp++; 159 seendot++; 160 *tok++ = *buf_ptr++; 161 if (*buf_ptr == '+' || *buf_ptr == '-') 162 *tok++ = *buf_ptr++; 163 } 164 } 165 if (*buf_ptr == 'L' || *buf_ptr == 'l') 166 *tok++ = *buf_ptr++; 167 } 168 else 169 while (chartype[*buf_ptr] == alphanum) { /* copy it over */ 170 *tok++ = *buf_ptr++; 171 if (buf_ptr >= buf_end) 172 fill_buffer(); 173 } 174 *tok++ = '\0'; 175 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 176 if (++buf_ptr >= buf_end) 177 fill_buffer(); 178 } 179 ps.its_a_keyword = false; 180 ps.sizeof_keyword = false; 181 if (l_struct) { /* if last token was 'struct', then this token 182 * should be treated as a declaration */ 183 l_struct = false; 184 last_code = ident; 185 ps.last_u_d = true; 186 return (decl); 187 } 188 ps.last_u_d = false; /* Operator after indentifier is binary */ 189 last_code = ident; /* Remember that this is the code we will 190 * return */ 191 192 /* 193 * This loop will check if the token is a keyword. 194 */ 195 for (p = specials; (j = p->rwd) != 0; p++) { 196 tok = token; /* point at scanned token */ 197 if (*j++ != *tok++ || *j++ != *tok++) 198 continue; /* This test depends on the fact that 199 * identifiers are always at least 1 character 200 * long (ie. the first two bytes of the 201 * identifier are always meaningful) */ 202 if (tok[-1] == 0) 203 break; /* If its a one-character identifier */ 204 while (*tok++ == *j) 205 if (*j++ == 0) 206 goto found_keyword; /* I wish that C had a multi-level 207 * break... */ 208 } 209 if (p->rwd) { /* we have a keyword */ 210 found_keyword: 211 ps.its_a_keyword = true; 212 ps.last_u_d = true; 213 switch (p->rwcode) { 214 case 1: /* it is a switch */ 215 return (swstmt); 216 case 2: /* a case or default */ 217 return (casestmt); 218 219 case 3: /* a "struct" */ 220 if (ps.p_l_follow) 221 break; /* inside parens: cast */ 222 l_struct = true; 223 224 /* 225 * Next time around, we will want to know that we have had a 226 * 'struct' 227 */ 228 case 4: /* one of the declaration keywords */ 229 if (ps.p_l_follow) { 230 ps.cast_mask |= 1 << ps.p_l_follow; 231 break; /* inside parens: cast */ 232 } 233 last_code = decl; 234 return (decl); 235 236 case 5: /* if, while, for */ 237 return (sp_paren); 238 239 case 6: /* do, else */ 240 return (sp_nparen); 241 242 case 7: 243 ps.sizeof_keyword = true; 244 default: /* all others are treated like any other 245 * identifier */ 246 return (ident); 247 } /* end of switch */ 248 } /* end of if (found_it) */ 249 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) { 250 register char *tp = buf_ptr; 251 while (tp < buf_end) 252 if (*tp++ == ')' && *tp == ';') 253 goto not_proc; 254 strncpy(ps.procname, token, sizeof ps.procname - 1); 255 ps.in_parameter_declaration = 1; 256 not_proc:; 257 } 258 /* 259 * The following hack attempts to guess whether or not the current 260 * token is in fact a declaration keyword -- one that has been 261 * typedefd 262 */ 263 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_') 264 && !ps.p_l_follow 265 && !ps.block_init 266 && (ps.last_token == rparen || ps.last_token == semicolon || 267 ps.last_token == decl || 268 ps.last_token == lbrace || ps.last_token == rbrace)) { 269 ps.its_a_keyword = true; 270 ps.last_u_d = true; 271 last_code = decl; 272 return decl; 273 } 274 if (last_code == decl) /* if this is a declared variable, then 275 * following sign is unary */ 276 ps.last_u_d = true; /* will make "int a -1" work */ 277 last_code = ident; 278 return (ident); /* the ident is not in the list */ 279 } /* end of procesing for alpanum character */ 280 /* l l l Scan a non-alphanumeric token */ 281 282 *tok++ = *buf_ptr; /* if it is only a one-character token, it is 283 * moved here */ 284 *tok = '\0'; 285 if (++buf_ptr >= buf_end) 286 fill_buffer(); 287 288 switch (*token) { 289 case '\n': 290 unary_delim = ps.last_u_d; 291 ps.last_nl = true; /* remember that we just had a newline */ 292 code = (had_eof ? 0 : newline); 293 294 /* 295 * if data has been exausted, the newline is a dummy, and we should 296 * return code to stop 297 */ 298 break; 299 300 case '\'': /* start of quoted character */ 301 case '"': /* start of string */ 302 qchar = *token; 303 if (troff) { 304 tok[-1] = '`'; 305 if (qchar == '"') 306 *tok++ = '`'; 307 tok = chfont(&bodyf, &stringf, tok); 308 } 309 do { /* copy the string */ 310 while (1) { /* move one character or [/<char>]<char> */ 311 if (*buf_ptr == '\n') { 312 printf("%d: Unterminated literal\n", line_no); 313 goto stop_lit; 314 } 315 *tok = *buf_ptr++; 316 if (buf_ptr >= buf_end) 317 fill_buffer(); 318 if (had_eof || ((tok - token) > (bufsize - 2))) { 319 printf("Unterminated literal\n"); 320 ++tok; 321 goto stop_lit; 322 /* get outof literal copying loop */ 323 } 324 if (*tok == BACKSLASH) { /* if escape, copy extra char */ 325 if (*buf_ptr == '\n') /* check for escaped newline */ 326 ++line_no; 327 if (troff) { 328 *++tok = BACKSLASH; 329 if (*buf_ptr == BACKSLASH) 330 *++tok = BACKSLASH; 331 } 332 *++tok = *buf_ptr++; 333 ++tok; /* we must increment this again because we 334 * copied two chars */ 335 if (buf_ptr >= buf_end) 336 fill_buffer(); 337 } 338 else 339 break; /* we copied one character */ 340 } /* end of while (1) */ 341 } while (*tok++ != qchar); 342 if (troff) { 343 tok = chfont(&stringf, &bodyf, tok - 1); 344 if (qchar == '"') 345 *tok++ = '\''; 346 } 347 stop_lit: 348 code = ident; 349 break; 350 351 case ('('): 352 case ('['): 353 unary_delim = true; 354 code = lparen; 355 break; 356 357 case (')'): 358 case (']'): 359 code = rparen; 360 break; 361 362 case '#': 363 unary_delim = ps.last_u_d; 364 code = preesc; 365 break; 366 367 case '?': 368 unary_delim = true; 369 code = question; 370 break; 371 372 case (':'): 373 code = colon; 374 unary_delim = true; 375 break; 376 377 case (';'): 378 unary_delim = true; 379 code = semicolon; 380 break; 381 382 case ('{'): 383 unary_delim = true; 384 385 /* 386 * if (ps.in_or_st) ps.block_init = 1; 387 */ 388 /* ? code = ps.block_init ? lparen : lbrace; */ 389 code = lbrace; 390 break; 391 392 case ('}'): 393 unary_delim = true; 394 /* ? code = ps.block_init ? rparen : rbrace; */ 395 code = rbrace; 396 break; 397 398 case 014: /* a form feed */ 399 unary_delim = ps.last_u_d; 400 ps.last_nl = true; /* remember this so we can set 'ps.col_1' 401 * right */ 402 code = form_feed; 403 break; 404 405 case (','): 406 unary_delim = true; 407 code = comma; 408 break; 409 410 case '.': 411 unary_delim = false; 412 code = period; 413 break; 414 415 case '-': 416 case '+': /* check for -, +, --, ++ */ 417 code = (ps.last_u_d ? unary_op : binary_op); 418 unary_delim = true; 419 420 if (*buf_ptr == token[0]) { 421 /* check for doubled character */ 422 *tok++ = *buf_ptr++; 423 /* buffer overflow will be checked at end of loop */ 424 if (last_code == ident || last_code == rparen) { 425 code = (ps.last_u_d ? unary_op : postop); 426 /* check for following ++ or -- */ 427 unary_delim = false; 428 } 429 } 430 else if (*buf_ptr == '=') 431 /* check for operator += */ 432 *tok++ = *buf_ptr++; 433 else if (*buf_ptr == '>') { 434 /* check for operator -> */ 435 *tok++ = *buf_ptr++; 436 if (!pointer_as_binop) { 437 unary_delim = false; 438 code = unary_op; 439 ps.want_blank = false; 440 } 441 } 442 break; /* buffer overflow will be checked at end of 443 * switch */ 444 445 case '=': 446 if (ps.in_or_st) 447 ps.block_init = 1; 448 #ifdef undef 449 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */ 450 tok[-1] = *buf_ptr++; 451 if ((tok[-1] == '<' || tok[-1] == '>') && tok[-1] == *buf_ptr) 452 *tok++ = *buf_ptr++; 453 *tok++ = '='; /* Flip =+ to += */ 454 *tok = 0; 455 } 456 #else 457 if (*buf_ptr == '=') {/* == */ 458 *tok++ = '='; /* Flip =+ to += */ 459 buf_ptr++; 460 *tok = 0; 461 } 462 #endif 463 code = binary_op; 464 unary_delim = true; 465 break; 466 /* can drop thru!!! */ 467 468 case '>': 469 case '<': 470 case '!': /* ops like <, <<, <=, !=, etc */ 471 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { 472 *tok++ = *buf_ptr; 473 if (++buf_ptr >= buf_end) 474 fill_buffer(); 475 } 476 if (*buf_ptr == '=') 477 *tok++ = *buf_ptr++; 478 code = (ps.last_u_d ? unary_op : binary_op); 479 unary_delim = true; 480 break; 481 482 default: 483 if (token[0] == '/' && *buf_ptr == '*') { 484 /* it is start of comment */ 485 *tok++ = '*'; 486 487 if (++buf_ptr >= buf_end) 488 fill_buffer(); 489 490 code = comment; 491 unary_delim = ps.last_u_d; 492 break; 493 } 494 while (*(tok - 1) == *buf_ptr || *buf_ptr == '=') { 495 /* 496 * handle ||, &&, etc, and also things as in int *****i 497 */ 498 *tok++ = *buf_ptr; 499 if (++buf_ptr >= buf_end) 500 fill_buffer(); 501 } 502 code = (ps.last_u_d ? unary_op : binary_op); 503 unary_delim = true; 504 505 506 } /* end of switch */ 507 if (code != newline) { 508 l_struct = false; 509 last_code = code; 510 } 511 if (buf_ptr >= buf_end) /* check for input buffer empty */ 512 fill_buffer(); 513 ps.last_u_d = unary_delim; 514 *tok = '\0'; /* null terminate the token */ 515 return (code); 516 }; 517 518 /* 519 * Add the given keyword to the keyword table, using val as the keyword type 520 */ 521 addkey(key, val) 522 char *key; 523 { 524 register struct templ *p = specials; 525 while (p->rwd) 526 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0) 527 return; 528 else 529 p++; 530 if (p >= specials + sizeof specials / sizeof specials[0]) 531 return; /* For now, table overflows are silently 532 * ignored */ 533 p->rwd = key; 534 p->rwcode = val; 535 p[1].rwd = 0; 536 p[1].rwcode = 0; 537 return; 538 } 539