1 /* 2 * Copyright (c) 1982 Regents of the University of California 3 */ 4 #ifndef lint 5 static char sccsid[] = "@(#)asscan2.c 4.6 12/15/82"; 6 #endif not lint 7 8 #include "asscanl.h" 9 static inttoktype oval = NL; 10 11 #ifdef BUFSIZ 12 #undef BUFSIZ 13 #endif 14 15 #define BUFSIZ 4096 16 17 #define NINBUFFERS 2 18 #define INBUFLG NINBUFFERS*BUFSIZ + 2 19 /* 20 * We have two input buffers; the first one is reserved 21 * for catching the tail of a line split across a buffer 22 * boundary; the other one are used for snarfing a buffer 23 * worth of .s source. 24 */ 25 static char inbuffer[INBUFLG]; 26 static char *InBufPtr = 0; 27 28 /* 29 * fill the inbuffer from the standard input. 30 * Assert: there are always n COMPLETE! lines in the buffer area. 31 * Assert: there is always a \n terminating the last line 32 * in the buffer area. 33 * Assert: after the \n, there is an EOFCHAR (hard end of file) 34 * or a NEEDCHAR (end of buffer) 35 * Assert: fgets always null pads the string it reads. 36 * Assert: no ungetc's are done at the end of a line or at the 37 * beginning of a line. 38 * 39 * We read a complete buffer of characters in one single read. 40 * We then back scan within this buffer to find the end of the 41 * last complete line, and force the assertions, and save a pointer 42 * to the incomplete line. 43 * The next call to fillinbuffer will move the unread characters 44 * to the end of the first buffer, and then read another two buffers, 45 * completing the cycle. 46 */ 47 48 static char p_swapped = '\0'; 49 static char *p_start = &inbuffer[NINBUFFERS * BUFSIZ]; 50 static char *p_stop = &inbuffer[NINBUFFERS * BUFSIZ]; 51 52 char *fillinbuffer() 53 { 54 register char *to; 55 register char *from; 56 char *inbufptr; 57 int nread; 58 static int hadeof; 59 int goal; 60 int got; 61 62 *p_start = p_swapped; 63 inbufptr = &inbuffer[1*BUFSIZ] - (p_stop - p_start); 64 65 for (to = inbufptr, from = p_start; from < p_stop;) 66 *to++ = *from++; 67 /* 68 * Now, go read two full buffers (hopefully) 69 */ 70 if (hadeof){ 71 hadeof = 0; 72 return (0); 73 } 74 goal = (NINBUFFERS - 1)*BUFSIZ; 75 nread = 0; 76 do { 77 got = read(stdin->_file, &inbuffer[1*BUFSIZ + nread], goal); 78 if (got == 0) 79 hadeof = 1; 80 if (got <= 0) 81 break; 82 nread += got; 83 goal -= got; 84 } while (goal); 85 86 if (nread == 0) 87 return(0); 88 p_stop = from = &inbuffer[1*BUFSIZ + nread]; 89 *from = '\0'; 90 91 while (*--from != '\n'){ 92 /* 93 * back over the partial line 94 */ 95 if (from == &inbuffer[1*BUFSIZ]) { 96 from = p_stop; 97 *p_stop++ = '\n'; 98 break; 99 } else { 100 continue; 101 } 102 } 103 104 from++; /* first char of partial line */ 105 p_start = from; 106 p_swapped = *p_start; 107 *p_start = NEEDCHAR; /* force assertion */ 108 return(inbufptr); 109 } 110 111 scan_dot_s(bufferbox) 112 struct tokbufdesc *bufferbox; 113 { 114 reg int ryylval; /* local copy of lexical value */ 115 extern int yylval; /* global copy of lexical value */ 116 reg int val; /* the value returned */ 117 int i; /* simple counter */ 118 reg char *rcp; 119 char *cp; /* can have address taken */ 120 reg int ch; /* treated as a character */ 121 int ch1; /* shadow value */ 122 reg char *inbufptr; 123 struct symtab *op; 124 125 reg ptrall bufptr; /* where to stuff tokens */ 126 ptrall lgbackpatch; /* where to stuff a string length */ 127 ptrall bufub; /* where not to stuff tokens */ 128 int maxstrlg; /* how long a string can be */ 129 long intval; /* value of int */ 130 int linescrossed; /* when doing strings and comments */ 131 struct Opcode opstruct; 132 133 (bytetoktype *)bufptr = (bytetoktype *) & (bufferbox->toks[0]); 134 (bytetoktype *)bufub = &(bufferbox->toks[AVAILTOKS]); 135 136 inbufptr = InBufPtr; 137 if (inbufptr == 0){ 138 inbufptr = fillinbuffer(); 139 if (inbufptr == 0){ /*end of file*/ 140 endoffile: 141 inbufptr = 0; 142 ptoken(bufptr, PARSEEOF); 143 goto done; 144 } 145 } 146 147 if (newfflag){ 148 ptoken(bufptr, IFILE); 149 ptoken(bufptr, STRING); 150 val = strlen(newfname) + 1; 151 movestr( (char *)&( ( (lgtype *)bufptr)[1]), newfname, val); 152 bstrlg(bufptr, val); 153 154 ptoken(bufptr, ILINENO); 155 ptoken(bufptr, INT); 156 pint(bufptr, 1); 157 newfflag = 0; 158 } 159 160 while (bufptr < bufub){ 161 loop: 162 switch(ryylval = (type+2)[ch = getchar()]) { 163 case SCANEOF: 164 inbufptr = 0; 165 goto endoffile; 166 167 case NEEDSBUF: 168 inbufptr = fillinbuffer(); 169 if (inbufptr == 0) 170 goto endoffile; 171 goto loop; 172 173 case DIV: /*process C style comments*/ 174 if ( (ch = getchar()) == '*') { /*comment prelude*/ 175 int incomment; 176 linescrossed = 0; 177 incomment = 1; 178 ch = getchar(); /*skip over the * */ 179 while(incomment){ 180 switch(ch){ 181 case '*': 182 ch = getchar(); 183 incomment = (ch != '/'); 184 break; 185 case '\n': 186 scanlineno++; 187 linescrossed++; 188 ch = getchar(); 189 break; 190 case EOFCHAR: 191 goto endoffile; 192 case NEEDCHAR: 193 inbufptr = fillinbuffer(); 194 if (inbufptr == 0) 195 goto endoffile; 196 lineno++; 197 ch = getchar(); 198 break; 199 default: 200 ch = getchar(); 201 break; 202 } 203 } 204 val = ILINESKIP; 205 ryylval = linescrossed; 206 goto ret; 207 } else { /*just an ordinary DIV*/ 208 ungetc(ch); 209 val = ryylval = DIV; 210 goto ret; 211 } 212 case SH: 213 if (oval == NL){ 214 /* 215 * Attempt to recognize a C preprocessor 216 * style comment '^#[ \t]*[0-9]*[ \t]*".*" 217 */ 218 ch = getchar(); /*bump the #*/ 219 while (INCHARSET(ch, SPACE)) 220 ch = getchar();/*bump white */ 221 if (INCHARSET(ch, DIGIT)){ 222 intval = 0; 223 while(INCHARSET(ch, DIGIT)){ 224 intval = intval*10 + ch - '0'; 225 ch = getchar(); 226 } 227 while (INCHARSET(ch, SPACE)) 228 ch = getchar(); 229 if (ch == '"'){ 230 ptoken(bufptr, ILINENO); 231 ptoken(bufptr, INT); 232 pint(bufptr, intval - 1); 233 ptoken(bufptr, IFILE); 234 /* 235 * The '"' has already been 236 * munched 237 * 238 * eatstr will not eat 239 * the trailing \n, so 240 * it is given to the parser 241 * and counted. 242 */ 243 goto eatstr; 244 } 245 } 246 } 247 /* 248 * Well, its just an ordinary decadent comment 249 */ 250 while ((ch != '\n') && (ch != EOFCHAR)) 251 ch = getchar(); 252 if (ch == EOFCHAR) 253 goto endoffile; 254 val = ryylval = oval = NL; 255 scanlineno++; 256 goto ret; 257 258 case NL: 259 scanlineno++; 260 val = ryylval; 261 goto ret; 262 263 case SP: 264 oval = SP; /*invalidate ^# meta comments*/ 265 goto loop; 266 267 case REGOP: /* % , could be used as modulo, or register*/ 268 ch = getchar(); 269 if (INCHARSET(ch, DIGIT)){ 270 ryylval = ch-'0'; 271 if (ch=='1') { 272 if (INCHARSET( (ch = getchar()), REGDIGIT)) 273 ryylval = 10+ch-'0'; 274 else 275 ungetc(ch); 276 } 277 /* 278 * God only knows what the original author 279 * wanted this undocumented feature to 280 * do. 281 * %5++ is really r7 282 */ 283 while(INCHARSET( (ch = getchar()), SIGN)) { 284 if (ch=='+') 285 ryylval++; 286 else 287 ryylval--; 288 } 289 ungetc(ch); 290 val = REG; 291 } else { 292 ungetc(ch); 293 val = REGOP; 294 } 295 goto ret; 296 297 case ALPH: 298 ch1 = ch; 299 if (INCHARSET(ch, SZSPECBEGIN)){ 300 if( (ch = getchar()) == '`' || ch == '^'){ 301 ch1 |= 0100; /*convert to lower*/ 302 switch(ch1){ 303 case 'b': ryylval = 1; break; 304 case 'w': ryylval = 2; break; 305 case 'l': ryylval = 4; break; 306 default: ryylval = d124; break; 307 } 308 val = SIZESPEC; 309 goto ret; 310 } else { 311 ungetc(ch); 312 ch = ch1; /*restore first character*/ 313 } 314 } 315 rcp = yytext; 316 do { 317 if (rcp < &yytext[NCPS]) 318 *rcp++ = ch; 319 } while (INCHARSET ( (ch = getchar()), ALPHA | DIGIT)); 320 *rcp = '\0'; 321 while (INCHARSET(ch, SPACE)) 322 ch = getchar(); 323 ungetc(ch); 324 325 switch((op = *lookup(1))->s_tag){ 326 case 0: 327 case LABELID: 328 /* 329 * Its a name... (Labels are subsets ofname) 330 */ 331 ryylval = (int)op; 332 val = NAME; 333 break; 334 case INST0: 335 case INSTn: 336 case IJXXX: 337 opstruct.Op_popcode = ( (struct instab *)op)->i_popcode; 338 opstruct.Op_eopcode = ( (struct instab *)op)->i_eopcode; 339 val = op->s_tag; 340 break; 341 default: 342 ryylval = ( (struct instab *)op)->i_popcode; 343 val = op->s_tag; 344 break; 345 } 346 goto ret; 347 348 case DIG: 349 /* 350 * Implement call by reference on a reg variable 351 */ 352 cp = inbufptr; 353 val = number(ch, &cp); 354 /* 355 * yylval or yybignum has been stuffed as a side 356 * effect to number(); get the global yylval 357 * into our fast local copy in case it was an INT. 358 */ 359 ryylval = yylval; 360 inbufptr = cp; 361 goto ret; 362 363 case LSH: 364 case RSH: 365 /* 366 * We allow the C style operators 367 * << and >>, as well as < and > 368 */ 369 if ( (ch1 = getchar()) != ch) 370 ungetc(ch1); 371 val = ryylval; 372 goto ret; 373 374 case MINUS: 375 if ( (ch = getchar()) =='(') 376 ryylval=val=MP; 377 else { 378 ungetc(ch); 379 val=MINUS; 380 } 381 goto ret; 382 383 case SQ: 384 if ((ryylval = getchar()) == '\n') 385 scanlineno++; /*not entirely correct*/ 386 val = INT; 387 goto ret; 388 389 case DQ: 390 eatstr: 391 linescrossed = 0; 392 maxstrlg = (char *)bufub - (char *)bufptr; 393 394 if (maxstrlg < MAXSTRLG) { 395 ungetc('"'); 396 *(bytetoktype *)bufptr = VOID ; 397 bufub = bufptr; 398 goto done; 399 } 400 if (maxstrlg > MAXSTRLG) 401 maxstrlg = MAXSTRLG; 402 403 ptoken(bufptr, STRING); 404 lgbackpatch = bufptr; /*this is where the size goes*/ 405 bufptr += sizeof(lgtype); 406 /* 407 * bufptr is now set to 408 * be stuffed with characters from 409 * the input 410 */ 411 412 while ( (maxstrlg > 0) 413 && !(INCHARSET( (ch = getchar()), STRESCAPE)) 414 ){ 415 stuff: 416 maxstrlg -= 1; 417 pchar(bufptr, ch); 418 } 419 if (maxstrlg <= 0){ /*enough characters to fill a string buffer*/ 420 ungetc('"'); /*will read it next*/ 421 } 422 else if (ch == '"') 423 /*VOID*/ ; /*done*/ 424 else if (ch == '\n'){ 425 yywarning("New line embedded in a string constant."); 426 scanlineno++; 427 linescrossed++; 428 ch = getchar(); 429 if (ch == EOFCHAR){ 430 do_eof: 431 pchar(bufptr, '\n'); 432 ungetc(EOFCHAR); 433 } else 434 if (ch == NEEDCHAR){ 435 if ( (inbufptr = fillinbuffer()) == 0) 436 goto do_eof; 437 ch = '\n'; 438 goto stuff; 439 } else { /* simple case */ 440 ungetc(ch); 441 ch = '\n'; 442 goto stuff; 443 } 444 } else { 445 ch = getchar(); /*skip the '\\'*/ 446 if ( INCHARSET(ch, BSESCAPE)){ 447 switch (ch){ 448 case 'b': ch = '\b'; goto stuff; 449 case 'f': ch = '\f'; goto stuff; 450 case 'n': ch = '\n'; goto stuff; 451 case 'r': ch = '\r'; goto stuff; 452 case 't': ch = '\t'; goto stuff; 453 } 454 } 455 if ( !(INCHARSET(ch,OCTDIGIT)) ) goto stuff; 456 i = 0; 457 intval = 0; 458 while ( (i < 3) && (INCHARSET(ch, OCTDIGIT))){ 459 i++;intval <<= 3;intval += ch - '0'; 460 ch = getchar(); 461 } 462 ungetc(ch); 463 ch = (char)intval; 464 goto stuff; 465 } 466 /* 467 * bufptr now points at the next free slot 468 */ 469 bstrfromto(lgbackpatch, bufptr); 470 if (linescrossed){ 471 val = ILINESKIP; 472 ryylval = linescrossed; 473 goto ret; 474 } else 475 goto builtval; 476 477 case BADCHAR: 478 linescrossed = lineno; 479 lineno = scanlineno; 480 yyerror("Illegal character mapped: %d, char read:(octal) %o", 481 ryylval, ch); 482 lineno = linescrossed; 483 val = BADCHAR; 484 goto ret; 485 486 default: 487 val = ryylval; 488 goto ret; 489 } /*end of the switch*/ 490 /* 491 * here with one token, so stuff it 492 */ 493 ret: 494 oval = val; 495 ptoken(bufptr, val); 496 switch(val){ 497 case ILINESKIP: 498 pint(bufptr, ryylval); 499 break; 500 case SIZESPEC: 501 pchar(bufptr, ryylval); 502 break; 503 case BFINT: plong(bufptr, ryylval); 504 break; 505 case INT: plong(bufptr, ryylval); 506 break; 507 case BIGNUM: pnumber(bufptr, yybignum); 508 break; 509 case NAME: pptr(bufptr, (int)(struct symtab *)ryylval); 510 break; 511 case REG: pchar(bufptr, ryylval); 512 break; 513 case INST0: 514 case INSTn: 515 popcode(bufptr, opstruct); 516 break; 517 case IJXXX: 518 popcode(bufptr, opstruct); 519 pptr(bufptr, (int)(struct symtab *)symalloc()); 520 break; 521 case ISTAB: 522 case ISTABSTR: 523 case ISTABNONE: 524 case ISTABDOT: 525 case IALIGN: 526 pptr(bufptr, (int)(struct symtab *)symalloc()); 527 break; 528 /* 529 * default: 530 */ 531 } 532 builtval: ; 533 } /*end of the while to stuff the buffer*/ 534 done: 535 bufferbox->tok_count = (bytetoktype *)bufptr - &(bufferbox->toks[0]); 536 537 /* 538 * This is a real kludge: 539 * 540 * We put the last token in the buffer to be a MINUS 541 * symbol. This last token will never be picked up 542 * in the normal way, but can be looked at during 543 * a peekahead look that the short circuit expression 544 * evaluator uses to see if an expression is complicated. 545 * 546 * Consider the following situation: 547 * 548 * .word 45 + 47 549 * buffer 1 | buffer 0 550 * the peekahead would want to look across the buffer, 551 * but will look in the buffer end zone, see the minus, and 552 * fail. 553 */ 554 ptoken(bufptr, MINUS); 555 InBufPtr = inbufptr; /*copy this back*/ 556 } 557