1 /* 2 * Copyright (c) 1985 Sun Microsystems, Inc. 3 * Copyright (c) 1980, 1993 4 * The Regents of the University of California. All rights reserved. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the University of 18 * California, Berkeley and its contributors. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 * 35 * @(#)lexi.c 8.1 (Berkeley) 6/6/93 36 * $FreeBSD: src/usr.bin/indent/lexi.c,v 1.3.6.3 2001/12/06 19:28:47 schweikh Exp $ 37 * $DragonFly: src/usr.bin/indent/lexi.c,v 1.3 2005/04/10 20:55:38 drhodus Exp $ 38 */ 39 40 #if 0 41 #endif 42 43 /* 44 * Here we have the token scanner for indent. It scans off one token and puts 45 * it in the global variable "token". It returns a code, indicating the type 46 * of token scanned. 47 */ 48 49 #include <stdio.h> 50 #include <ctype.h> 51 #include <stdlib.h> 52 #include <string.h> 53 #include "indent_globs.h" 54 #include "indent_codes.h" 55 56 #define alphanum 1 57 #define opchar 3 58 59 void fill_buffer(void); 60 61 struct templ { 62 char *rwd; 63 int rwcode; 64 }; 65 66 struct templ specials[1000] = 67 { 68 {"switch", 1}, 69 {"case", 2}, 70 {"break", 0}, 71 {"struct", 3}, 72 {"union", 3}, 73 {"enum", 3}, 74 {"default", 2}, 75 {"int", 4}, 76 {"char", 4}, 77 {"float", 4}, 78 {"double", 4}, 79 {"long", 4}, 80 {"short", 4}, 81 {"typdef", 4}, 82 {"unsigned", 4}, 83 {"register", 4}, 84 {"static", 4}, 85 {"global", 4}, 86 {"extern", 4}, 87 {"void", 4}, 88 {"goto", 0}, 89 {"return", 0}, 90 {"if", 5}, 91 {"while", 5}, 92 {"for", 5}, 93 {"else", 6}, 94 {"do", 6}, 95 {"sizeof", 7}, 96 {"const", 9}, 97 {"volatile", 9}, 98 {0, 0} 99 }; 100 101 char chartype[128] = 102 { /* this is used to facilitate the decision of 103 * what type (alphanumeric, operator) each 104 * character is */ 105 0, 0, 0, 0, 0, 0, 0, 0, 106 0, 0, 0, 0, 0, 0, 0, 0, 107 0, 0, 0, 0, 0, 0, 0, 0, 108 0, 0, 0, 0, 0, 0, 0, 0, 109 0, 3, 0, 0, 1, 3, 3, 0, 110 0, 0, 3, 3, 0, 3, 0, 3, 111 1, 1, 1, 1, 1, 1, 1, 1, 112 1, 1, 0, 0, 3, 3, 3, 3, 113 0, 1, 1, 1, 1, 1, 1, 1, 114 1, 1, 1, 1, 1, 1, 1, 1, 115 1, 1, 1, 1, 1, 1, 1, 1, 116 1, 1, 1, 0, 0, 0, 3, 1, 117 0, 1, 1, 1, 1, 1, 1, 1, 118 1, 1, 1, 1, 1, 1, 1, 1, 119 1, 1, 1, 1, 1, 1, 1, 1, 120 1, 1, 1, 0, 3, 0, 3, 0 121 }; 122 123 int 124 lexi(void) 125 { 126 int unary_delim; /* this is set to 1 if the current token 127 * forces a following operator to be unary */ 128 static int last_code; /* the last token type returned */ 129 static int l_struct; /* set to 1 if the last token was 'struct' */ 130 int code; /* internal code to be returned */ 131 char qchar; /* the delimiter character for a string */ 132 133 e_token = s_token; /* point to start of place to save token */ 134 unary_delim = false; 135 ps.col_1 = ps.last_nl; /* tell world that this token started in 136 * column 1 iff the last thing scanned was nl */ 137 ps.last_nl = false; 138 139 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 140 ps.col_1 = false; /* leading blanks imply token is not in column 141 * 1 */ 142 if (++buf_ptr >= buf_end) 143 fill_buffer(); 144 } 145 146 /* Scan an alphanumeric token */ 147 if (chartype[(int)*buf_ptr] == alphanum || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { 148 /* 149 * we have a character or number 150 */ 151 char *j; /* used for searching thru list of 152 * 153 * reserved words */ 154 struct templ *p; 155 156 if (isdigit(*buf_ptr) || (buf_ptr[0] == '.' && isdigit(buf_ptr[1]))) { 157 int seendot = 0, 158 seenexp = 0, 159 seensfx = 0; 160 if (*buf_ptr == '0' && 161 (buf_ptr[1] == 'x' || buf_ptr[1] == 'X')) { 162 *e_token++ = *buf_ptr++; 163 *e_token++ = *buf_ptr++; 164 while (isxdigit(*buf_ptr)) { 165 CHECK_SIZE_TOKEN; 166 *e_token++ = *buf_ptr++; 167 } 168 } 169 else 170 while (1) { 171 if (*buf_ptr == '.') { 172 if (seendot) 173 break; 174 else 175 seendot++; 176 } 177 CHECK_SIZE_TOKEN; 178 *e_token++ = *buf_ptr++; 179 if (!isdigit(*buf_ptr) && *buf_ptr != '.') { 180 if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) 181 break; 182 else { 183 seenexp++; 184 seendot++; 185 CHECK_SIZE_TOKEN; 186 *e_token++ = *buf_ptr++; 187 if (*buf_ptr == '+' || *buf_ptr == '-') 188 *e_token++ = *buf_ptr++; 189 } 190 } 191 } 192 while (1) { 193 if (!(seensfx & 1) && 194 (*buf_ptr == 'U' || *buf_ptr == 'u')) { 195 CHECK_SIZE_TOKEN; 196 *e_token++ = *buf_ptr++; 197 seensfx |= 1; 198 continue; 199 } 200 if (!(seensfx & 2) && 201 (*buf_ptr == 'L' || *buf_ptr == 'l')) { 202 CHECK_SIZE_TOKEN; 203 if (buf_ptr[1] == buf_ptr[0]) 204 *e_token++ = *buf_ptr++; 205 *e_token++ = *buf_ptr++; 206 seensfx |= 2; 207 continue; 208 } 209 break; 210 } 211 } 212 else 213 while (chartype[(int)*buf_ptr] == alphanum || *buf_ptr == BACKSLASH) { 214 /* fill_buffer() terminates buffer with newline */ 215 if (*buf_ptr == BACKSLASH) { 216 if (*(buf_ptr + 1) == '\n') { 217 buf_ptr += 2; 218 if (buf_ptr >= buf_end) 219 fill_buffer(); 220 } else 221 break; 222 } 223 CHECK_SIZE_TOKEN; 224 /* copy it over */ 225 *e_token++ = *buf_ptr++; 226 if (buf_ptr >= buf_end) 227 fill_buffer(); 228 } 229 *e_token++ = '\0'; 230 while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ 231 if (++buf_ptr >= buf_end) 232 fill_buffer(); 233 } 234 ps.its_a_keyword = false; 235 ps.sizeof_keyword = false; 236 if (l_struct) { /* if last token was 'struct', then this token 237 * should be treated as a declaration */ 238 l_struct = false; 239 last_code = ident; 240 ps.last_u_d = true; 241 return (decl); 242 } 243 ps.last_u_d = false; /* Operator after indentifier is binary */ 244 last_code = ident; /* Remember that this is the code we will 245 * return */ 246 247 /* 248 * This loop will check if the token is a keyword. 249 */ 250 for (p = specials; (j = p->rwd) != 0; p++) { 251 char *p = s_token; /* point at scanned token */ 252 if (*j++ != *p++ || *j++ != *p++) 253 continue; /* This test depends on the fact that 254 * identifiers are always at least 1 character 255 * long (ie. the first two bytes of the 256 * identifier are always meaningful) */ 257 if (p[-1] == 0) 258 break; /* If its a one-character identifier */ 259 while (*p++ == *j) 260 if (*j++ == 0) 261 goto found_keyword; /* I wish that C had a multi-level 262 * break... */ 263 } 264 if (p->rwd) { /* we have a keyword */ 265 found_keyword: 266 ps.its_a_keyword = true; 267 ps.last_u_d = true; 268 switch (p->rwcode) { 269 case 1: /* it is a switch */ 270 return (swstmt); 271 case 2: /* a case or default */ 272 return (casestmt); 273 274 case 3: /* a "struct" */ 275 if (ps.p_l_follow) 276 break; /* inside parens: cast */ 277 /* 278 * Next time around, we may want to know that we have had a 279 * 'struct' 280 */ 281 l_struct = true; 282 283 /* 284 * Fall through to test for a cast, function prototype or 285 * sizeof(). 286 */ 287 case 4: /* one of the declaration keywords */ 288 if (ps.p_l_follow) { 289 ps.cast_mask |= 1 << ps.p_l_follow; 290 291 /* 292 * Forget that we saw `struct' if we're in a sizeof(). 293 */ 294 if (ps.sizeof_mask) 295 l_struct = false; 296 297 break; /* inside parens: cast, prototype or sizeof() */ 298 } 299 last_code = decl; 300 return (decl); 301 302 case 5: /* if, while, for */ 303 return (sp_paren); 304 305 case 6: /* do, else */ 306 return (sp_nparen); 307 308 case 7: 309 ps.sizeof_keyword = true; 310 default: /* all others are treated like any other 311 * identifier */ 312 return (ident); 313 } /* end of switch */ 314 } /* end of if (found_it) */ 315 if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0) { 316 char *tp = buf_ptr; 317 while (tp < buf_end) 318 if (*tp++ == ')' && (*tp == ';' || *tp == ',')) 319 goto not_proc; 320 strncpy(ps.procname, token, sizeof ps.procname - 1); 321 ps.in_parameter_declaration = 1; 322 rparen_count = 1; 323 not_proc:; 324 } 325 /* 326 * The following hack attempts to guess whether or not the current 327 * token is in fact a declaration keyword -- one that has been 328 * typedefd 329 */ 330 if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr) || *buf_ptr == '_') 331 && !ps.p_l_follow 332 && !ps.block_init 333 && (ps.last_token == rparen || ps.last_token == semicolon || 334 ps.last_token == decl || 335 ps.last_token == lbrace || ps.last_token == rbrace)) { 336 ps.its_a_keyword = true; 337 ps.last_u_d = true; 338 last_code = decl; 339 return decl; 340 } 341 if (last_code == decl) /* if this is a declared variable, then 342 * following sign is unary */ 343 ps.last_u_d = true; /* will make "int a -1" work */ 344 last_code = ident; 345 return (ident); /* the ident is not in the list */ 346 } /* end of procesing for alpanum character */ 347 348 /* Scan a non-alphanumeric token */ 349 350 *e_token++ = *buf_ptr; /* if it is only a one-character token, it is 351 * moved here */ 352 *e_token = '\0'; 353 if (++buf_ptr >= buf_end) 354 fill_buffer(); 355 356 switch (*token) { 357 case '\n': 358 unary_delim = ps.last_u_d; 359 ps.last_nl = true; /* remember that we just had a newline */ 360 code = (had_eof ? 0 : newline); 361 362 /* 363 * if data has been exausted, the newline is a dummy, and we should 364 * return code to stop 365 */ 366 break; 367 368 case '\'': /* start of quoted character */ 369 case '"': /* start of string */ 370 qchar = *token; 371 if (troff) { 372 e_token[-1] = '`'; 373 if (qchar == '"') 374 *e_token++ = '`'; 375 e_token = chfont(&bodyf, &stringf, e_token); 376 } 377 do { /* copy the string */ 378 while (1) { /* move one character or [/<char>]<char> */ 379 if (*buf_ptr == '\n') { 380 printf("%d: Unterminated literal\n", line_no); 381 goto stop_lit; 382 } 383 CHECK_SIZE_TOKEN; /* Only have to do this once in this loop, 384 * since CHECK_SIZE guarantees that there 385 * are at least 5 entries left */ 386 *e_token = *buf_ptr++; 387 if (buf_ptr >= buf_end) 388 fill_buffer(); 389 if (*e_token == BACKSLASH) { /* if escape, copy extra char */ 390 if (*buf_ptr == '\n') /* check for escaped newline */ 391 ++line_no; 392 if (troff) { 393 *++e_token = BACKSLASH; 394 if (*buf_ptr == BACKSLASH) 395 *++e_token = BACKSLASH; 396 } 397 *++e_token = *buf_ptr++; 398 ++e_token; /* we must increment this again because we 399 * copied two chars */ 400 if (buf_ptr >= buf_end) 401 fill_buffer(); 402 } 403 else 404 break; /* we copied one character */ 405 } /* end of while (1) */ 406 } while (*e_token++ != qchar); 407 if (troff) { 408 e_token = chfont(&stringf, &bodyf, e_token - 1); 409 if (qchar == '"') 410 *e_token++ = '\''; 411 } 412 stop_lit: 413 code = ident; 414 break; 415 416 case ('('): 417 case ('['): 418 unary_delim = true; 419 code = lparen; 420 break; 421 422 case (')'): 423 case (']'): 424 code = rparen; 425 break; 426 427 case '#': 428 unary_delim = ps.last_u_d; 429 code = preesc; 430 break; 431 432 case '?': 433 unary_delim = true; 434 code = question; 435 break; 436 437 case (':'): 438 code = colon; 439 unary_delim = true; 440 break; 441 442 case (';'): 443 unary_delim = true; 444 code = semicolon; 445 break; 446 447 case ('{'): 448 unary_delim = true; 449 450 /* 451 * if (ps.in_or_st) ps.block_init = 1; 452 */ 453 /* ? code = ps.block_init ? lparen : lbrace; */ 454 code = lbrace; 455 break; 456 457 case ('}'): 458 unary_delim = true; 459 /* ? code = ps.block_init ? rparen : rbrace; */ 460 code = rbrace; 461 break; 462 463 case 014: /* a form feed */ 464 unary_delim = ps.last_u_d; 465 ps.last_nl = true; /* remember this so we can set 'ps.col_1' 466 * right */ 467 code = form_feed; 468 break; 469 470 case (','): 471 unary_delim = true; 472 code = comma; 473 break; 474 475 case '.': 476 unary_delim = false; 477 code = period; 478 break; 479 480 case '-': 481 case '+': /* check for -, +, --, ++ */ 482 code = (ps.last_u_d ? unary_op : binary_op); 483 unary_delim = true; 484 485 if (*buf_ptr == token[0]) { 486 /* check for doubled character */ 487 *e_token++ = *buf_ptr++; 488 /* buffer overflow will be checked at end of loop */ 489 if (last_code == ident || last_code == rparen) { 490 code = (ps.last_u_d ? unary_op : postop); 491 /* check for following ++ or -- */ 492 unary_delim = false; 493 } 494 } 495 else if (*buf_ptr == '=') 496 /* check for operator += */ 497 *e_token++ = *buf_ptr++; 498 else if (*buf_ptr == '>') { 499 /* check for operator -> */ 500 *e_token++ = *buf_ptr++; 501 if (!pointer_as_binop) { 502 unary_delim = false; 503 code = unary_op; 504 ps.want_blank = false; 505 } 506 } 507 break; /* buffer overflow will be checked at end of 508 * switch */ 509 510 case '=': 511 if (ps.in_or_st) 512 ps.block_init = 1; 513 #ifdef undef 514 if (chartype[*buf_ptr] == opchar) { /* we have two char assignment */ 515 e_token[-1] = *buf_ptr++; 516 if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr) 517 *e_token++ = *buf_ptr++; 518 *e_token++ = '='; /* Flip =+ to += */ 519 *e_token = 0; 520 } 521 #else 522 if (*buf_ptr == '=') {/* == */ 523 *e_token++ = '='; /* Flip =+ to += */ 524 buf_ptr++; 525 *e_token = 0; 526 } 527 #endif 528 code = binary_op; 529 unary_delim = true; 530 break; 531 /* can drop thru!!! */ 532 533 case '>': 534 case '<': 535 case '!': /* ops like <, <<, <=, !=, etc */ 536 if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { 537 *e_token++ = *buf_ptr; 538 if (++buf_ptr >= buf_end) 539 fill_buffer(); 540 } 541 if (*buf_ptr == '=') 542 *e_token++ = *buf_ptr++; 543 code = (ps.last_u_d ? unary_op : binary_op); 544 unary_delim = true; 545 break; 546 547 default: 548 if (token[0] == '/' && *buf_ptr == '*') { 549 /* it is start of comment */ 550 *e_token++ = '*'; 551 552 if (++buf_ptr >= buf_end) 553 fill_buffer(); 554 555 code = comment; 556 unary_delim = ps.last_u_d; 557 break; 558 } 559 while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') { 560 /* 561 * handle ||, &&, etc, and also things as in int *****i 562 */ 563 *e_token++ = *buf_ptr; 564 if (++buf_ptr >= buf_end) 565 fill_buffer(); 566 } 567 code = (ps.last_u_d ? unary_op : binary_op); 568 unary_delim = true; 569 570 571 } /* end of switch */ 572 if (code != newline) { 573 l_struct = false; 574 last_code = code; 575 } 576 if (buf_ptr >= buf_end) /* check for input buffer empty */ 577 fill_buffer(); 578 ps.last_u_d = unary_delim; 579 *e_token = '\0'; /* null terminate the token */ 580 return (code); 581 } 582 583 /* 584 * Add the given keyword to the keyword table, using val as the keyword type 585 */ 586 void 587 addkey(char *key, int val) 588 { 589 struct templ *p = specials; 590 while (p->rwd) 591 if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0) 592 return; 593 else 594 p++; 595 if (p >= specials + sizeof specials / sizeof specials[0]) 596 return; /* For now, table overflows are silently 597 * ignored */ 598 p->rwd = key; 599 p->rwcode = val; 600 p[1].rwd = 0; 601 p[1].rwcode = 0; 602 } 603