1 /* 2 * Copyright (c) 1982 Regents of the University of California 3 */ 4 #ifndef lint 5 static char sccsid[] = "@(#)asscan2.c 4.3 02/14/82"; 6 #endif not lint 7 8 #include "asscanl.h" 9 static inttoktype oval = NL; 10 11 #define NINBUFFERS 2 12 #define INBUFLG NINBUFFERS*BUFSIZ + 2 13 /* 14 * We have two input buffers; the first one is reserved 15 * for catching the tail of a line split across a buffer 16 * boundary; the other one are used for snarfing a buffer 17 * worth of .s source. 18 */ 19 static char inbuffer[INBUFLG]; 20 static char *InBufPtr = 0; 21 22 /* 23 * fill the inbuffer from the standard input. 24 * Assert: there are always n COMPLETE! lines in the buffer area. 25 * Assert: there is always a \n terminating the last line 26 * in the buffer area. 27 * Assert: after the \n, there is an EOFCHAR (hard end of file) 28 * or a NEEDCHAR (end of buffer) 29 * Assert: fgets always null pads the string it reads. 30 * Assert: no ungetc's are done at the end of a line or at the 31 * beginning of a line. 32 * 33 * We read a complete buffer of characters in one single read. 34 * We then back scan within this buffer to find the end of the 35 * last complete line, and force the assertions, and save a pointer 36 * to the incomplete line. 37 * The next call to fillinbuffer will move the unread characters 38 * to the end of the first buffer, and then read another two buffers, 39 * completing the cycle. 40 */ 41 42 static char p_swapped = '\0'; 43 static char *p_start = &inbuffer[NINBUFFERS * BUFSIZ]; 44 static char *p_stop = &inbuffer[NINBUFFERS * BUFSIZ]; 45 char *fillinbuffer() 46 { 47 register char *to; 48 register char *from; 49 char *inbufptr; 50 int nread; 51 52 *p_start = p_swapped; 53 inbufptr = &inbuffer[1*BUFSIZ] - (p_stop - p_start); 54 55 for (to = inbufptr, from = p_start; from < p_stop;) 56 *to++ = *from++; 57 /* 58 * Now, go read two full buffers (hopefully) 59 */ 60 nread = read(stdin->_file, &inbuffer[1*BUFSIZ], (NINBUFFERS - 1)*BUFSIZ); 61 if (nread == 0) 62 return(0); 63 p_stop = from = &inbuffer[1*BUFSIZ + nread]; 64 *from = '\0'; 65 while (*--from != '\n') /* back over the partial line */ 66 continue; 67 from++; /* first char of partial line */ 68 p_start = from; 69 p_swapped = *p_start; 70 *p_start = NEEDCHAR; /* force assertion */ 71 return(inbufptr); 72 } 73 74 scan_dot_s(bufferbox) 75 struct tokbufdesc *bufferbox; 76 { 77 reg int ryylval; /* local copy of lexical value */ 78 extern int yylval; /* global copy of lexical value */ 79 reg int val; /* the value returned */ 80 int i; /* simple counter */ 81 reg char *rcp; 82 char *cp; /* can have address taken */ 83 reg int ch; /* treated as a character */ 84 int ch1; /* shadow value */ 85 reg char *inbufptr; 86 struct symtab *op; 87 88 reg ptrall bufptr; /* where to stuff tokens */ 89 ptrall lgbackpatch; /* where to stuff a string length */ 90 ptrall bufub; /* where not to stuff tokens */ 91 int maxstrlg; /* how long a string can be */ 92 long intval; /* value of int */ 93 int linescrossed; /* when doing strings and comments */ 94 struct Opcode opstruct; 95 96 (bytetoktype *)bufptr = (bytetoktype *) & (bufferbox->toks[0]); 97 (bytetoktype *)bufub = &(bufferbox->toks[AVAILTOKS]); 98 99 inbufptr = InBufPtr; 100 if (inbufptr == 0){ 101 inbufptr = fillinbuffer(); 102 if (inbufptr == 0){ /*end of file*/ 103 endoffile: 104 inbufptr = 0; 105 ptoken(bufptr, PARSEEOF); 106 goto done; 107 } 108 } 109 110 if (newfflag){ 111 ptoken(bufptr, IFILE); 112 ptoken(bufptr, STRING); 113 val = strlen(newfname) + 1; 114 movestr( (char *)&( ( (lgtype *)bufptr)[1]), newfname, val); 115 bstrlg(bufptr, val); 116 117 ptoken(bufptr, ILINENO); 118 ptoken(bufptr, INT); 119 pint(bufptr, 1); 120 newfflag = 0; 121 } 122 123 while (bufptr < bufub){ 124 loop: 125 switch(ryylval = (type+2)[ch = getchar()]) { 126 case SCANEOF: 127 inbufptr = 0; 128 goto endoffile; 129 130 case NEEDSBUF: 131 inbufptr = fillinbuffer(); 132 if (inbufptr == 0) 133 goto endoffile; 134 goto loop; 135 136 case DIV: /*process C style comments*/ 137 if ( (ch = getchar()) == '*') { /*comment prelude*/ 138 int incomment; 139 linescrossed = 0; 140 incomment = 1; 141 ch = getchar(); /*skip over the * */ 142 while(incomment){ 143 switch(ch){ 144 case '*': 145 ch = getchar(); 146 incomment = (ch != '/'); 147 break; 148 case '\n': 149 scanlineno++; 150 linescrossed++; 151 ch = getchar(); 152 break; 153 case EOFCHAR: 154 goto endoffile; 155 case NEEDCHAR: 156 inbufptr = fillinbuffer(); 157 if (inbufptr == 0) 158 goto endoffile; 159 lineno++; 160 ch = getchar(); 161 break; 162 default: 163 ch = getchar(); 164 break; 165 } 166 } 167 val = ILINESKIP; 168 ryylval = linescrossed; 169 goto ret; 170 } else { /*just an ordinary DIV*/ 171 ungetc(ch); 172 val = ryylval = DIV; 173 goto ret; 174 } 175 case SH: 176 if (oval == NL){ 177 /* 178 * Attempt to recognize a C preprocessor 179 * style comment '^#[ \t]*[0-9]*[ \t]*".*" 180 */ 181 ch = getchar(); /*bump the #*/ 182 while (INCHARSET(ch, SPACE)) 183 ch = getchar();/*bump white */ 184 if (INCHARSET(ch, DIGIT)){ 185 intval = 0; 186 while(INCHARSET(ch, DIGIT)){ 187 intval = intval*10 + ch - '0'; 188 ch = getchar(); 189 } 190 while (INCHARSET(ch, SPACE)) 191 ch = getchar(); 192 if (ch == '"'){ 193 ptoken(bufptr, ILINENO); 194 ptoken(bufptr, INT); 195 pint(bufptr, intval - 1); 196 ptoken(bufptr, IFILE); 197 /* 198 * The '"' has already been 199 * munched 200 * 201 * eatstr will not eat 202 * the trailing \n, so 203 * it is given to the parser 204 * and counted. 205 */ 206 goto eatstr; 207 } 208 } 209 } 210 /* 211 * Well, its just an ordinary decadent comment 212 */ 213 while ((ch != '\n') && (ch != EOFCHAR)) 214 ch = getchar(); 215 if (ch == EOFCHAR) 216 goto endoffile; 217 val = ryylval = oval = NL; 218 scanlineno++; 219 goto ret; 220 221 case NL: 222 scanlineno++; 223 val = ryylval; 224 goto ret; 225 226 case SP: 227 oval = SP; /*invalidate ^# meta comments*/ 228 goto loop; 229 230 case REGOP: /* % , could be used as modulo, or register*/ 231 ch = getchar(); 232 if (INCHARSET(ch, DIGIT)){ 233 ryylval = ch-'0'; 234 if (ch=='1') { 235 if (INCHARSET( (ch = getchar()), REGDIGIT)) 236 ryylval = 10+ch-'0'; 237 else 238 ungetc(ch); 239 } 240 /* 241 * God only knows what the original author 242 * wanted this undocumented feature to 243 * do. 244 * %5++ is really r7 245 */ 246 while(INCHARSET( (ch = getchar()), SIGN)) { 247 if (ch=='+') 248 ryylval++; 249 else 250 ryylval--; 251 } 252 ungetc(ch); 253 val = REG; 254 } else { 255 ungetc(ch); 256 val = REGOP; 257 } 258 goto ret; 259 260 case ALPH: 261 ch1 = ch; 262 if (INCHARSET(ch, SZSPECBEGIN)){ 263 if( (ch = getchar()) == '`' || ch == '^'){ 264 ch1 |= 0100; /*convert to lower*/ 265 switch(ch1){ 266 case 'b': ryylval = 1; break; 267 case 'w': ryylval = 2; break; 268 case 'l': ryylval = 4; break; 269 default: ryylval = d124; break; 270 } 271 val = SIZESPEC; 272 goto ret; 273 } else { 274 ungetc(ch); 275 ch = ch1; /*restore first character*/ 276 } 277 } 278 rcp = yytext; 279 do { 280 if (rcp < &yytext[NCPS]) 281 *rcp++ = ch; 282 } while (INCHARSET ( (ch = getchar()), ALPHA | DIGIT)); 283 *rcp = '\0'; 284 while (INCHARSET(ch, SPACE)) 285 ch = getchar(); 286 ungetc(ch); 287 288 switch((op = *lookup(1))->s_tag){ 289 case 0: 290 case LABELID: 291 /* 292 * Its a name... (Labels are subsets ofname) 293 */ 294 ryylval = (int)op; 295 val = NAME; 296 break; 297 case INST0: 298 case INSTn: 299 case IJXXX: 300 opstruct.Op_popcode = ( (struct instab *)op)->i_popcode; 301 opstruct.Op_eopcode = ( (struct instab *)op)->i_eopcode; 302 val = op->s_tag; 303 break; 304 default: 305 ryylval = ( (struct instab *)op)->i_popcode; 306 val = op->s_tag; 307 break; 308 } 309 goto ret; 310 311 case DIG: 312 /* 313 * Implement call by reference on a reg variable 314 */ 315 cp = inbufptr; 316 val = number(ch, &cp); 317 /* 318 * yylval or yybignum has been stuffed as a side 319 * effect to number(); get the global yylval 320 * into our fast local copy in case it was an INT. 321 */ 322 ryylval = yylval; 323 inbufptr = cp; 324 goto ret; 325 326 case LSH: 327 case RSH: 328 /* 329 * We allow the C style operators 330 * << and >>, as well as < and > 331 */ 332 if ( (ch1 = getchar()) != ch) 333 ungetc(ch1); 334 val = ryylval; 335 goto ret; 336 337 case MINUS: 338 if ( (ch = getchar()) =='(') 339 ryylval=val=MP; 340 else { 341 ungetc(ch); 342 val=MINUS; 343 } 344 goto ret; 345 346 case SQ: 347 if ((ryylval = getchar()) == '\n') 348 scanlineno++; /*not entirely correct*/ 349 val = INT; 350 goto ret; 351 352 case DQ: 353 eatstr: 354 linescrossed = 0; 355 maxstrlg = (char *)bufub - (char *)bufptr; 356 357 if (maxstrlg < MAXSTRLG) { 358 ungetc('"'); 359 *(bytetoktype *)bufptr = VOID ; 360 bufub = bufptr; 361 goto done; 362 } 363 if (maxstrlg > MAXSTRLG) 364 maxstrlg = MAXSTRLG; 365 366 ptoken(bufptr, STRING); 367 lgbackpatch = bufptr; /*this is where the size goes*/ 368 bufptr += sizeof(lgtype); 369 /* 370 * bufptr is now set to 371 * be stuffed with characters from 372 * the input 373 */ 374 375 while ( (maxstrlg > 0) 376 && !(INCHARSET( (ch = getchar()), STRESCAPE)) 377 ){ 378 stuff: 379 maxstrlg-= 1; 380 pchar(bufptr, ch); 381 } 382 if (maxstrlg <= 0){ /*enough characters to fill a string buffer*/ 383 ungetc('"'); /*will read it next*/ 384 } 385 else if (ch == '"'); /*done*/ 386 else if (ch == '\n'){ 387 yywarning("New line embedded in a string constant."); 388 scanlineno++; 389 linescrossed++; 390 ch = getchar(); 391 if (ch == EOFCHAR){ 392 do_eof: 393 pchar(bufptr, '\n'); 394 ungetc(EOFCHAR); 395 } else 396 if (ch == NEEDCHAR){ 397 if ( (inbufptr = fillinbuffer()) == 0) 398 goto do_eof; 399 ch = '\n'; 400 goto stuff; 401 } else { /* simple case */ 402 ungetc(ch); 403 ch = '\n'; 404 goto stuff; 405 } 406 } else { 407 ch = getchar(); /*skip the '\\'*/ 408 if ( INCHARSET(ch, BSESCAPE)){ 409 switch (ch){ 410 case 'b': ch = '\b'; goto stuff; 411 case 'f': ch = '\f'; goto stuff; 412 case 'n': ch = '\n'; goto stuff; 413 case 'r': ch = '\r'; goto stuff; 414 case 't': ch = '\t'; goto stuff; 415 } 416 } 417 if ( !(INCHARSET(ch,OCTDIGIT)) ) goto stuff; 418 i = 0; 419 intval = 0; 420 while ( (i < 3) && (INCHARSET(ch, OCTDIGIT))){ 421 i++;intval <<= 3;intval += ch - '0'; 422 ch = getchar(); 423 } 424 ungetc(ch); 425 val = (char)intval; 426 goto stuff; 427 } 428 /* 429 * bufptr now points at the next free slot 430 */ 431 bstrfromto(lgbackpatch, bufptr); 432 if (linescrossed){ 433 val = ILINESKIP; 434 ryylval = linescrossed; 435 goto ret; 436 } else 437 goto builtval; 438 439 case BADCHAR: 440 linescrossed = lineno; 441 lineno = scanlineno; 442 yyerror("Illegal character mapped: %d, char read:(octal) %o", 443 ryylval, ch); 444 lineno = linescrossed; 445 val = BADCHAR; 446 goto ret; 447 448 default: 449 val = ryylval; 450 goto ret; 451 } /*end of the switch*/ 452 /* 453 * here with one token, so stuff it 454 */ 455 ret: 456 oval = val; 457 ptoken(bufptr, val); 458 switch(val){ 459 case ILINESKIP: 460 pint(bufptr, ryylval); 461 break; 462 case SIZESPEC: 463 pchar(bufptr, ryylval); 464 break; 465 case BFINT: plong(bufptr, ryylval); 466 break; 467 case INT: plong(bufptr, ryylval); 468 break; 469 case BIGNUM: pnumber(bufptr, yybignum); 470 break; 471 case NAME: pptr(bufptr, (int)(struct symtab *)ryylval); 472 break; 473 case REG: pchar(bufptr, ryylval); 474 break; 475 case INST0: 476 case INSTn: 477 popcode(bufptr, opstruct); 478 break; 479 case IJXXX: 480 popcode(bufptr, opstruct); 481 pptr(bufptr, (int)(struct symtab *)symalloc()); 482 break; 483 case ISTAB: 484 case ISTABSTR: 485 case ISTABNONE: 486 case ISTABDOT: 487 case IALIGN: 488 pptr(bufptr, (int)(struct symtab *)symalloc()); 489 break; 490 /* 491 * default: 492 */ 493 } 494 builtval: ; 495 } /*end of the while to stuff the buffer*/ 496 done: 497 bufferbox->tok_count = (bytetoktype *)bufptr - &(bufferbox->toks[0]); 498 499 /* 500 * This is a real kludge: 501 * 502 * We put the last token in the buffer to be a MINUS 503 * symbol. This last token will never be picked up 504 * in the normal way, but can be looked at during 505 * a peekahead look that the short circuit expression 506 * evaluator uses to see if an expression is complicated. 507 * 508 * Consider the following situation: 509 * 510 * .word 45 + 47 511 * buffer 1 | buffer 0 512 * the peekahead would want to look across the buffer, 513 * but will look in the buffer end zone, see the minus, and 514 * fail. 515 */ 516 ptoken(bufptr, MINUS); 517 InBufPtr = inbufptr; /*copy this back*/ 518 } 519