1 /* 2 * Copyright (c) 1982 Regents of the University of California 3 */ 4 #ifndef lint 5 static char sccsid[] = "@(#)asscan2.c 4.4 04/16/82"; 6 #endif not lint 7 8 #include "asscanl.h" 9 static inttoktype oval = NL; 10 11 #define NINBUFFERS 2 12 #define INBUFLG NINBUFFERS*BUFSIZ + 2 13 /* 14 * We have two input buffers; the first one is reserved 15 * for catching the tail of a line split across a buffer 16 * boundary; the other one are used for snarfing a buffer 17 * worth of .s source. 18 */ 19 static char inbuffer[INBUFLG]; 20 static char *InBufPtr = 0; 21 22 /* 23 * fill the inbuffer from the standard input. 24 * Assert: there are always n COMPLETE! lines in the buffer area. 25 * Assert: there is always a \n terminating the last line 26 * in the buffer area. 27 * Assert: after the \n, there is an EOFCHAR (hard end of file) 28 * or a NEEDCHAR (end of buffer) 29 * Assert: fgets always null pads the string it reads. 30 * Assert: no ungetc's are done at the end of a line or at the 31 * beginning of a line. 32 * 33 * We read a complete buffer of characters in one single read. 34 * We then back scan within this buffer to find the end of the 35 * last complete line, and force the assertions, and save a pointer 36 * to the incomplete line. 37 * The next call to fillinbuffer will move the unread characters 38 * to the end of the first buffer, and then read another two buffers, 39 * completing the cycle. 40 */ 41 42 static char p_swapped = '\0'; 43 static char *p_start = &inbuffer[NINBUFFERS * BUFSIZ]; 44 static char *p_stop = &inbuffer[NINBUFFERS * BUFSIZ]; 45 46 char *fillinbuffer() 47 { 48 register char *to; 49 register char *from; 50 char *inbufptr; 51 int nread; 52 static int hadeof; 53 int goal; 54 int got; 55 56 *p_start = p_swapped; 57 inbufptr = &inbuffer[1*BUFSIZ] - (p_stop - p_start); 58 59 for (to = inbufptr, from = p_start; from < p_stop;) 60 *to++ = *from++; 61 /* 62 * Now, go read two full buffers (hopefully) 63 */ 64 if (hadeof){ 65 hadeof = 0; 66 return (0); 67 } 68 goal = (NINBUFFERS - 1)*BUFSIZ; 69 nread = 0; 70 do { 71 got = read(stdin->_file, &inbuffer[1*BUFSIZ + nread], goal); 72 if (got == 0) 73 hadeof = 1; 74 if (got <= 0) 75 break; 76 nread += got; 77 goal -= got; 78 } while (goal); 79 80 if (nread == 0) 81 return(0); 82 p_stop = from = &inbuffer[1*BUFSIZ + nread]; 83 *from = '\0'; 84 85 while (*--from != '\n'){ 86 /* 87 * back over the partial line 88 */ 89 if (from == &inbuffer[1*BUFSIZ]) { 90 from = p_stop; 91 *p_stop++ = '\n'; 92 break; 93 } else { 94 continue; 95 } 96 } 97 98 from++; /* first char of partial line */ 99 p_start = from; 100 p_swapped = *p_start; 101 *p_start = NEEDCHAR; /* force assertion */ 102 return(inbufptr); 103 } 104 105 scan_dot_s(bufferbox) 106 struct tokbufdesc *bufferbox; 107 { 108 reg int ryylval; /* local copy of lexical value */ 109 extern int yylval; /* global copy of lexical value */ 110 reg int val; /* the value returned */ 111 int i; /* simple counter */ 112 reg char *rcp; 113 char *cp; /* can have address taken */ 114 reg int ch; /* treated as a character */ 115 int ch1; /* shadow value */ 116 reg char *inbufptr; 117 struct symtab *op; 118 119 reg ptrall bufptr; /* where to stuff tokens */ 120 ptrall lgbackpatch; /* where to stuff a string length */ 121 ptrall bufub; /* where not to stuff tokens */ 122 int maxstrlg; /* how long a string can be */ 123 long intval; /* value of int */ 124 int linescrossed; /* when doing strings and comments */ 125 struct Opcode opstruct; 126 127 (bytetoktype *)bufptr = (bytetoktype *) & (bufferbox->toks[0]); 128 (bytetoktype *)bufub = &(bufferbox->toks[AVAILTOKS]); 129 130 inbufptr = InBufPtr; 131 if (inbufptr == 0){ 132 inbufptr = fillinbuffer(); 133 if (inbufptr == 0){ /*end of file*/ 134 endoffile: 135 inbufptr = 0; 136 ptoken(bufptr, PARSEEOF); 137 goto done; 138 } 139 } 140 141 if (newfflag){ 142 ptoken(bufptr, IFILE); 143 ptoken(bufptr, STRING); 144 val = strlen(newfname) + 1; 145 movestr( (char *)&( ( (lgtype *)bufptr)[1]), newfname, val); 146 bstrlg(bufptr, val); 147 148 ptoken(bufptr, ILINENO); 149 ptoken(bufptr, INT); 150 pint(bufptr, 1); 151 newfflag = 0; 152 } 153 154 while (bufptr < bufub){ 155 loop: 156 switch(ryylval = (type+2)[ch = getchar()]) { 157 case SCANEOF: 158 inbufptr = 0; 159 goto endoffile; 160 161 case NEEDSBUF: 162 inbufptr = fillinbuffer(); 163 if (inbufptr == 0) 164 goto endoffile; 165 goto loop; 166 167 case DIV: /*process C style comments*/ 168 if ( (ch = getchar()) == '*') { /*comment prelude*/ 169 int incomment; 170 linescrossed = 0; 171 incomment = 1; 172 ch = getchar(); /*skip over the * */ 173 while(incomment){ 174 switch(ch){ 175 case '*': 176 ch = getchar(); 177 incomment = (ch != '/'); 178 break; 179 case '\n': 180 scanlineno++; 181 linescrossed++; 182 ch = getchar(); 183 break; 184 case EOFCHAR: 185 goto endoffile; 186 case NEEDCHAR: 187 inbufptr = fillinbuffer(); 188 if (inbufptr == 0) 189 goto endoffile; 190 lineno++; 191 ch = getchar(); 192 break; 193 default: 194 ch = getchar(); 195 break; 196 } 197 } 198 val = ILINESKIP; 199 ryylval = linescrossed; 200 goto ret; 201 } else { /*just an ordinary DIV*/ 202 ungetc(ch); 203 val = ryylval = DIV; 204 goto ret; 205 } 206 case SH: 207 if (oval == NL){ 208 /* 209 * Attempt to recognize a C preprocessor 210 * style comment '^#[ \t]*[0-9]*[ \t]*".*" 211 */ 212 ch = getchar(); /*bump the #*/ 213 while (INCHARSET(ch, SPACE)) 214 ch = getchar();/*bump white */ 215 if (INCHARSET(ch, DIGIT)){ 216 intval = 0; 217 while(INCHARSET(ch, DIGIT)){ 218 intval = intval*10 + ch - '0'; 219 ch = getchar(); 220 } 221 while (INCHARSET(ch, SPACE)) 222 ch = getchar(); 223 if (ch == '"'){ 224 ptoken(bufptr, ILINENO); 225 ptoken(bufptr, INT); 226 pint(bufptr, intval - 1); 227 ptoken(bufptr, IFILE); 228 /* 229 * The '"' has already been 230 * munched 231 * 232 * eatstr will not eat 233 * the trailing \n, so 234 * it is given to the parser 235 * and counted. 236 */ 237 goto eatstr; 238 } 239 } 240 } 241 /* 242 * Well, its just an ordinary decadent comment 243 */ 244 while ((ch != '\n') && (ch != EOFCHAR)) 245 ch = getchar(); 246 if (ch == EOFCHAR) 247 goto endoffile; 248 val = ryylval = oval = NL; 249 scanlineno++; 250 goto ret; 251 252 case NL: 253 scanlineno++; 254 val = ryylval; 255 goto ret; 256 257 case SP: 258 oval = SP; /*invalidate ^# meta comments*/ 259 goto loop; 260 261 case REGOP: /* % , could be used as modulo, or register*/ 262 ch = getchar(); 263 if (INCHARSET(ch, DIGIT)){ 264 ryylval = ch-'0'; 265 if (ch=='1') { 266 if (INCHARSET( (ch = getchar()), REGDIGIT)) 267 ryylval = 10+ch-'0'; 268 else 269 ungetc(ch); 270 } 271 /* 272 * God only knows what the original author 273 * wanted this undocumented feature to 274 * do. 275 * %5++ is really r7 276 */ 277 while(INCHARSET( (ch = getchar()), SIGN)) { 278 if (ch=='+') 279 ryylval++; 280 else 281 ryylval--; 282 } 283 ungetc(ch); 284 val = REG; 285 } else { 286 ungetc(ch); 287 val = REGOP; 288 } 289 goto ret; 290 291 case ALPH: 292 ch1 = ch; 293 if (INCHARSET(ch, SZSPECBEGIN)){ 294 if( (ch = getchar()) == '`' || ch == '^'){ 295 ch1 |= 0100; /*convert to lower*/ 296 switch(ch1){ 297 case 'b': ryylval = 1; break; 298 case 'w': ryylval = 2; break; 299 case 'l': ryylval = 4; break; 300 default: ryylval = d124; break; 301 } 302 val = SIZESPEC; 303 goto ret; 304 } else { 305 ungetc(ch); 306 ch = ch1; /*restore first character*/ 307 } 308 } 309 rcp = yytext; 310 do { 311 if (rcp < &yytext[NCPS]) 312 *rcp++ = ch; 313 } while (INCHARSET ( (ch = getchar()), ALPHA | DIGIT)); 314 *rcp = '\0'; 315 while (INCHARSET(ch, SPACE)) 316 ch = getchar(); 317 ungetc(ch); 318 319 switch((op = *lookup(1))->s_tag){ 320 case 0: 321 case LABELID: 322 /* 323 * Its a name... (Labels are subsets ofname) 324 */ 325 ryylval = (int)op; 326 val = NAME; 327 break; 328 case INST0: 329 case INSTn: 330 case IJXXX: 331 opstruct.Op_popcode = ( (struct instab *)op)->i_popcode; 332 opstruct.Op_eopcode = ( (struct instab *)op)->i_eopcode; 333 val = op->s_tag; 334 break; 335 default: 336 ryylval = ( (struct instab *)op)->i_popcode; 337 val = op->s_tag; 338 break; 339 } 340 goto ret; 341 342 case DIG: 343 /* 344 * Implement call by reference on a reg variable 345 */ 346 cp = inbufptr; 347 val = number(ch, &cp); 348 /* 349 * yylval or yybignum has been stuffed as a side 350 * effect to number(); get the global yylval 351 * into our fast local copy in case it was an INT. 352 */ 353 ryylval = yylval; 354 inbufptr = cp; 355 goto ret; 356 357 case LSH: 358 case RSH: 359 /* 360 * We allow the C style operators 361 * << and >>, as well as < and > 362 */ 363 if ( (ch1 = getchar()) != ch) 364 ungetc(ch1); 365 val = ryylval; 366 goto ret; 367 368 case MINUS: 369 if ( (ch = getchar()) =='(') 370 ryylval=val=MP; 371 else { 372 ungetc(ch); 373 val=MINUS; 374 } 375 goto ret; 376 377 case SQ: 378 if ((ryylval = getchar()) == '\n') 379 scanlineno++; /*not entirely correct*/ 380 val = INT; 381 goto ret; 382 383 case DQ: 384 eatstr: 385 linescrossed = 0; 386 maxstrlg = (char *)bufub - (char *)bufptr; 387 388 if (maxstrlg < MAXSTRLG) { 389 ungetc('"'); 390 *(bytetoktype *)bufptr = VOID ; 391 bufub = bufptr; 392 goto done; 393 } 394 if (maxstrlg > MAXSTRLG) 395 maxstrlg = MAXSTRLG; 396 397 ptoken(bufptr, STRING); 398 lgbackpatch = bufptr; /*this is where the size goes*/ 399 bufptr += sizeof(lgtype); 400 /* 401 * bufptr is now set to 402 * be stuffed with characters from 403 * the input 404 */ 405 406 while ( (maxstrlg > 0) 407 && !(INCHARSET( (ch = getchar()), STRESCAPE)) 408 ){ 409 stuff: 410 maxstrlg -= 1; 411 pchar(bufptr, ch); 412 } 413 if (maxstrlg <= 0){ /*enough characters to fill a string buffer*/ 414 ungetc('"'); /*will read it next*/ 415 } 416 else if (ch == '"') 417 /*VOID*/ ; /*done*/ 418 else if (ch == '\n'){ 419 yywarning("New line embedded in a string constant."); 420 scanlineno++; 421 linescrossed++; 422 ch = getchar(); 423 if (ch == EOFCHAR){ 424 do_eof: 425 pchar(bufptr, '\n'); 426 ungetc(EOFCHAR); 427 } else 428 if (ch == NEEDCHAR){ 429 if ( (inbufptr = fillinbuffer()) == 0) 430 goto do_eof; 431 ch = '\n'; 432 goto stuff; 433 } else { /* simple case */ 434 ungetc(ch); 435 ch = '\n'; 436 goto stuff; 437 } 438 } else { 439 ch = getchar(); /*skip the '\\'*/ 440 if ( INCHARSET(ch, BSESCAPE)){ 441 switch (ch){ 442 case 'b': ch = '\b'; goto stuff; 443 case 'f': ch = '\f'; goto stuff; 444 case 'n': ch = '\n'; goto stuff; 445 case 'r': ch = '\r'; goto stuff; 446 case 't': ch = '\t'; goto stuff; 447 } 448 } 449 if ( !(INCHARSET(ch,OCTDIGIT)) ) goto stuff; 450 i = 0; 451 intval = 0; 452 while ( (i < 3) && (INCHARSET(ch, OCTDIGIT))){ 453 i++;intval <<= 3;intval += ch - '0'; 454 ch = getchar(); 455 } 456 ungetc(ch); 457 ch = (char)intval; 458 goto stuff; 459 } 460 /* 461 * bufptr now points at the next free slot 462 */ 463 bstrfromto(lgbackpatch, bufptr); 464 if (linescrossed){ 465 val = ILINESKIP; 466 ryylval = linescrossed; 467 goto ret; 468 } else 469 goto builtval; 470 471 case BADCHAR: 472 linescrossed = lineno; 473 lineno = scanlineno; 474 yyerror("Illegal character mapped: %d, char read:(octal) %o", 475 ryylval, ch); 476 lineno = linescrossed; 477 val = BADCHAR; 478 goto ret; 479 480 default: 481 val = ryylval; 482 goto ret; 483 } /*end of the switch*/ 484 /* 485 * here with one token, so stuff it 486 */ 487 ret: 488 oval = val; 489 ptoken(bufptr, val); 490 switch(val){ 491 case ILINESKIP: 492 pint(bufptr, ryylval); 493 break; 494 case SIZESPEC: 495 pchar(bufptr, ryylval); 496 break; 497 case BFINT: plong(bufptr, ryylval); 498 break; 499 case INT: plong(bufptr, ryylval); 500 break; 501 case BIGNUM: pnumber(bufptr, yybignum); 502 break; 503 case NAME: pptr(bufptr, (int)(struct symtab *)ryylval); 504 break; 505 case REG: pchar(bufptr, ryylval); 506 break; 507 case INST0: 508 case INSTn: 509 popcode(bufptr, opstruct); 510 break; 511 case IJXXX: 512 popcode(bufptr, opstruct); 513 pptr(bufptr, (int)(struct symtab *)symalloc()); 514 break; 515 case ISTAB: 516 case ISTABSTR: 517 case ISTABNONE: 518 case ISTABDOT: 519 case IALIGN: 520 pptr(bufptr, (int)(struct symtab *)symalloc()); 521 break; 522 /* 523 * default: 524 */ 525 } 526 builtval: ; 527 } /*end of the while to stuff the buffer*/ 528 done: 529 bufferbox->tok_count = (bytetoktype *)bufptr - &(bufferbox->toks[0]); 530 531 /* 532 * This is a real kludge: 533 * 534 * We put the last token in the buffer to be a MINUS 535 * symbol. This last token will never be picked up 536 * in the normal way, but can be looked at during 537 * a peekahead look that the short circuit expression 538 * evaluator uses to see if an expression is complicated. 539 * 540 * Consider the following situation: 541 * 542 * .word 45 + 47 543 * buffer 1 | buffer 0 544 * the peekahead would want to look across the buffer, 545 * but will look in the buffer end zone, see the minus, and 546 * fail. 547 */ 548 ptoken(bufptr, MINUS); 549 InBufPtr = inbufptr; /*copy this back*/ 550 } 551