xref: /original-bsd/old/as.vax/asscan2.c (revision f0fd5f8a)
1 /*
2  *	Copyright (c) 1982 Regents of the University of California
3  */
4 #ifndef lint
5 static char sccsid[] = "@(#)asscan2.c 4.6 12/15/82";
6 #endif not lint
7 
8 #include "asscanl.h"
9 static	inttoktype	oval = NL;
10 
11 #ifdef BUFSIZ
12 #undef BUFSIZ
13 #endif
14 
15 #define BUFSIZ 4096
16 
17 #define	NINBUFFERS	2
18 #define	INBUFLG		NINBUFFERS*BUFSIZ + 2
19 	/*
20 	 *	We have two input buffers; the first one is reserved
21 	 *	for catching the tail of a line split across a buffer
22 	 *	boundary; the other one are used for snarfing a buffer
23 	 *	worth of .s source.
24 	 */
25 static	char	inbuffer[INBUFLG];
26 static	char	*InBufPtr = 0;
27 
28 /*
29  *	fill the inbuffer from the standard input.
30  *	Assert: there are always n COMPLETE! lines in the buffer area.
31  *	Assert: there is always a \n terminating the last line
32  *		in the buffer area.
33  *	Assert: after the \n, there is an EOFCHAR (hard end of file)
34  *		or a NEEDCHAR (end of buffer)
35  *	Assert:	fgets always null pads the string it reads.
36  *	Assert:	no ungetc's are done at the end of a line or at the
37  *		beginning of a line.
38  *
39  *	We read a complete buffer of characters in one single read.
40  *	We then back scan within this buffer to find the end of the
41  *	last complete line, and force the assertions, and save a pointer
42  *	to the incomplete line.
43  *	The next call to fillinbuffer will move the unread characters
44  *	to the end of the first buffer, and then read another two buffers,
45  *	completing the cycle.
46  */
47 
48 static	char	p_swapped = '\0';
49 static	char	*p_start = &inbuffer[NINBUFFERS * BUFSIZ];
50 static	char	*p_stop = &inbuffer[NINBUFFERS * BUFSIZ];
51 
52 char *fillinbuffer()
53 {
54 	register	char	*to;
55 	register	char	*from;
56 			char	*inbufptr;
57 	int		nread;
58 	static		int	hadeof;
59 	int		goal;
60 	int		got;
61 
62 	*p_start = p_swapped;
63 	inbufptr = &inbuffer[1*BUFSIZ] - (p_stop - p_start);
64 
65 	for (to = inbufptr, from = p_start; from < p_stop;)
66 		*to++ = *from++;
67 	/*
68 	 *	Now, go read two full buffers (hopefully)
69 	 */
70 	if (hadeof){
71 		hadeof = 0;
72 		return (0);
73 	}
74 	goal = (NINBUFFERS - 1)*BUFSIZ;
75 	nread = 0;
76 	do {
77 		got = read(stdin->_file, &inbuffer[1*BUFSIZ + nread], goal);
78 		if (got == 0)
79 			hadeof = 1;
80 		if (got <= 0)
81 			break;
82 		nread += got;
83 		goal -= got;
84 	} while (goal);
85 
86 	if (nread == 0)
87 		return(0);
88 	p_stop = from = &inbuffer[1*BUFSIZ + nread];
89 	*from = '\0';
90 
91 	while (*--from != '\n'){
92 		/*
93 		 *	back over the partial line
94 		 */
95 		if (from == &inbuffer[1*BUFSIZ]) {
96 			from = p_stop;
97 			*p_stop++ = '\n';
98 			break;
99 		} else {
100 			continue;
101 		}
102 	}
103 
104 	from++;				/* first char of partial line */
105 	p_start = from;
106 	p_swapped = *p_start;
107 	*p_start = NEEDCHAR;		/* force assertion */
108 	return(inbufptr);
109 }
110 
111 scan_dot_s(bufferbox)
112 	struct tokbufdesc *bufferbox;
113 {
114 	reg	int	ryylval;	/* local copy of lexical value */
115 	extern	int	yylval;		/* global copy of lexical value */
116 	reg	int	val;		/* the value returned */
117 		int	i;		/* simple counter */
118 	reg	char	*rcp;
119 		char	*cp;		/* can have address taken */
120 	reg	int	ch;		/* treated as a character */
121 		int	ch1;		/* shadow value */
122 	reg	char	*inbufptr;
123 		struct 	symtab	*op;
124 
125 	reg	ptrall	bufptr;		/* where to stuff tokens */
126 		ptrall	lgbackpatch;	/* where to stuff a string length */
127 		ptrall	bufub;		/* where not to stuff tokens */
128 		int	maxstrlg;	/* how long a string can be */
129 		long	intval;		/* value of int */
130 		int	linescrossed;	/* when doing strings and comments */
131 		struct	Opcode		opstruct;
132 
133 	(bytetoktype *)bufptr = (bytetoktype *) & (bufferbox->toks[0]);
134 	(bytetoktype *)bufub = &(bufferbox->toks[AVAILTOKS]);
135 
136 	inbufptr = InBufPtr;
137 	if (inbufptr == 0){
138 		inbufptr = fillinbuffer();
139 		if (inbufptr == 0){	/*end of file*/
140    		  endoffile:
141 			inbufptr = 0;
142 			ptoken(bufptr, PARSEEOF);
143 			goto done;
144 		}
145 	}
146 
147 	if (newfflag){
148 		ptoken(bufptr, IFILE);
149 		ptoken(bufptr, STRING);
150 		val = strlen(newfname) + 1;
151 		movestr( (char *)&( ( (lgtype *)bufptr)[1]), newfname, val);
152 		bstrlg(bufptr, val);
153 
154 		ptoken(bufptr, ILINENO);
155 		ptoken(bufptr, INT);
156 		pint(bufptr,  1);
157 		newfflag = 0;
158 	}
159 
160 	while (bufptr < bufub){
161    loop:
162         switch(ryylval = (type+2)[ch = getchar()]) {
163 	case SCANEOF:
164 		inbufptr = 0;
165 		goto endoffile;
166 
167 	case NEEDSBUF:
168 		inbufptr = fillinbuffer();
169 		if (inbufptr == 0)
170 			goto endoffile;
171 		goto loop;
172 
173 	case DIV:		/*process C style comments*/
174 		if ( (ch = getchar()) == '*') {  /*comment prelude*/
175 			int	incomment;
176 			linescrossed = 0;
177 			incomment = 1;
178 			ch = getchar();	/*skip over the * */
179 			while(incomment){
180 				switch(ch){
181 				case '*':
182 					ch = getchar();
183 					incomment = (ch != '/');
184 					break;
185 				case '\n':
186 					scanlineno++;
187 					linescrossed++;
188 					ch = getchar();
189 					break;
190 				case EOFCHAR:
191 					goto endoffile;
192 				case NEEDCHAR:
193 					inbufptr = fillinbuffer();
194 					if (inbufptr == 0)
195 						goto endoffile;
196 					lineno++;
197 					ch = getchar();
198 					break;
199 				default:
200 					ch = getchar();
201 					break;
202 				}
203 			}
204 			val = ILINESKIP;
205 			ryylval = linescrossed;
206 			goto ret;
207 		} else {	/*just an ordinary DIV*/
208 			ungetc(ch);
209 			val = ryylval = DIV;
210 			goto ret;
211 		}
212 	case SH:
213 		if (oval == NL){
214 			/*
215 			 *	Attempt to recognize a C preprocessor
216 			 *	style comment '^#[ \t]*[0-9]*[ \t]*".*"
217 			 */
218 			ch = getchar();	/*bump the #*/
219 			while (INCHARSET(ch, SPACE))
220 				ch = getchar();/*bump white */
221 			if (INCHARSET(ch, DIGIT)){
222 				intval = 0;
223 				while(INCHARSET(ch, DIGIT)){
224 					intval = intval*10 + ch - '0';
225 					ch = getchar();
226 				}
227 				while (INCHARSET(ch, SPACE))
228 					ch = getchar();
229 				if (ch == '"'){
230 					ptoken(bufptr, ILINENO);
231 					ptoken(bufptr, INT);
232 					pint(bufptr, intval - 1);
233 					ptoken(bufptr, IFILE);
234 					/*
235 					 *	The '"' has already been
236 					 *	munched
237 					 *
238 					 *	eatstr will not eat
239 					 *	the trailing \n, so
240 					 *	it is given to the parser
241 					 *	and counted.
242 					 */
243 					goto eatstr;
244 				}
245 			}
246 		}
247 		/*
248 		 *	Well, its just an ordinary decadent comment
249 		 */
250 		while ((ch != '\n') && (ch != EOFCHAR))
251 			ch = getchar();
252 		if (ch == EOFCHAR)
253 			goto endoffile;
254 		val = ryylval = oval = NL;
255 		scanlineno++;
256 		goto ret;
257 
258 	case NL:
259 		scanlineno++;
260 		val = ryylval;
261 		goto ret;
262 
263 	case SP:
264 		oval = SP;	/*invalidate ^# meta comments*/
265 		goto loop;
266 
267 	case REGOP:		/* % , could be used as modulo, or register*/
268 		ch = getchar();
269 		if (INCHARSET(ch, DIGIT)){
270 			ryylval = ch-'0';
271 			if (ch=='1') {
272 				if (INCHARSET( (ch = getchar()), REGDIGIT))
273 					ryylval = 10+ch-'0';
274 				else
275 					ungetc(ch);
276 			}
277 			/*
278 			 *	God only knows what the original author
279 			 *	wanted this undocumented feature to
280 			 *	do.
281 			 *		%5++ is really  r7
282 			 */
283 			while(INCHARSET( (ch = getchar()), SIGN)) {
284 				if (ch=='+')
285 					ryylval++;
286 				else
287 					ryylval--;
288 			}
289 			ungetc(ch);
290 			val = REG;
291 		} else {
292 			ungetc(ch);
293 			val = REGOP;
294 		}
295 		goto ret;
296 
297 	case ALPH:
298 		ch1 = ch;
299 		if (INCHARSET(ch, SZSPECBEGIN)){
300 			if( (ch = getchar()) == '`' || ch == '^'){
301 				ch1 |= 0100;	/*convert to lower*/
302 				switch(ch1){
303 				case 'b':	ryylval = 1;	break;
304 				case 'w':	ryylval = 2;	break;
305 				case 'l':	ryylval = 4;	break;
306 				default:	ryylval = d124;	break;
307 				}
308 				val = SIZESPEC;
309 				goto ret;
310 			} else {
311 				ungetc(ch);
312 				ch = ch1;	/*restore first character*/
313 			}
314 		}
315 		rcp = yytext;
316 		do {
317 			if (rcp < &yytext[NCPS])
318 				*rcp++ = ch;
319 		} while (INCHARSET ( (ch = getchar()), ALPHA | DIGIT));
320 		*rcp = '\0';
321 		while (INCHARSET(ch, SPACE))
322 			ch = getchar();
323 		ungetc(ch);
324 
325 		switch((op = *lookup(1))->s_tag){
326 		case 0:
327 		case LABELID:
328 			/*
329 			 *	Its a name... (Labels are subsets ofname)
330 			 */
331 			ryylval = (int)op;
332 			val = NAME;
333 			break;
334 		case INST0:
335 		case INSTn:
336 		case IJXXX:
337 			opstruct.Op_popcode = ( (struct instab *)op)->i_popcode;
338 			opstruct.Op_eopcode = ( (struct instab *)op)->i_eopcode;
339 			val = op->s_tag;
340 			break;
341 		default:
342 			ryylval = ( (struct instab *)op)->i_popcode;
343 			val = op->s_tag;
344 			break;
345 		}
346 		goto ret;
347 
348 	case DIG:
349 		/*
350 		 *	Implement call by reference on a reg variable
351 		 */
352 		cp = inbufptr;
353 		val = number(ch, &cp);
354 		/*
355 		 *	yylval or yybignum has been stuffed as a side
356 		 *	effect to number(); get the global yylval
357 		 *	into our fast local copy in case it was an INT.
358 		 */
359 		ryylval = yylval;
360 		inbufptr = cp;
361 		goto ret;
362 
363 	case LSH:
364 	case RSH:
365 		/*
366 		 *	We allow the C style operators
367 		 *	<< and >>, as well as < and >
368 		 */
369 		if ( (ch1 = getchar()) != ch)
370 			ungetc(ch1);
371 		val = ryylval;
372 		goto ret;
373 
374 	case MINUS:
375 		if ( (ch = getchar()) =='(')
376 			ryylval=val=MP;
377 		else {
378 			ungetc(ch);
379 			val=MINUS;
380 		}
381 		goto ret;
382 
383 	case SQ:
384 		if ((ryylval = getchar()) == '\n')
385 			scanlineno++;		/*not entirely correct*/
386 		val = INT;
387 		goto ret;
388 
389 	case DQ:
390 	   eatstr:
391 		linescrossed = 0;
392 		maxstrlg = (char *)bufub - (char *)bufptr;
393 
394 		if (maxstrlg < MAXSTRLG) {
395 			ungetc('"');
396 			*(bytetoktype *)bufptr = VOID ;
397 			bufub = bufptr;
398 			goto done;
399 		}
400 		if (maxstrlg > MAXSTRLG)
401 			maxstrlg = MAXSTRLG;
402 
403 		ptoken(bufptr, STRING);
404 		lgbackpatch = bufptr;	/*this is where the size goes*/
405 		bufptr += sizeof(lgtype);
406 		/*
407 		 *	bufptr is now set to
408 		 *	be stuffed with characters from
409 		 *	the input
410 		 */
411 
412 		while (   (maxstrlg > 0)
413 		       && !(INCHARSET( (ch = getchar()), STRESCAPE))
414 		      ){
415 			stuff:
416 				maxstrlg -= 1;
417 				pchar(bufptr, ch);
418 			}
419 		if (maxstrlg <= 0){	/*enough characters to fill a string buffer*/
420 			ungetc('"');		/*will read it next*/
421 		}
422 		else if (ch == '"')
423 			/*VOID*/ ;		/*done*/
424 		else if (ch == '\n'){
425 			yywarning("New line embedded in a string constant.");
426 			scanlineno++;
427 			linescrossed++;
428 			ch = getchar();
429 			if (ch == EOFCHAR){
430 			  do_eof:
431 				pchar(bufptr, '\n');
432 				ungetc(EOFCHAR);
433 			} else
434 			if (ch == NEEDCHAR){
435 				if ( (inbufptr = fillinbuffer()) == 0)
436 					goto do_eof;
437 				ch = '\n';
438 				goto stuff;
439 			} else {	/* simple case */
440 				ungetc(ch);
441 				ch = '\n';
442 				goto stuff;
443 			}
444 		} else {
445 			ch = getchar();		/*skip the '\\'*/
446 			if ( INCHARSET(ch, BSESCAPE)){
447 				switch (ch){
448 				  case 'b':  ch = '\b'; goto stuff;
449 				  case 'f':  ch = '\f'; goto stuff;
450 				  case 'n':  ch = '\n'; goto stuff;
451 				  case 'r':  ch = '\r'; goto stuff;
452 				  case 't':  ch = '\t'; goto stuff;
453 				}
454 			}
455 			if ( !(INCHARSET(ch,OCTDIGIT)) )  goto stuff;
456 			i = 0;
457 			intval = 0;
458 			while ( (i < 3) && (INCHARSET(ch, OCTDIGIT))){
459 				i++;intval <<= 3;intval += ch - '0';
460 				ch = getchar();
461 			}
462 			ungetc(ch);
463 			ch = (char)intval;
464 			goto stuff;
465 		}
466 		/*
467 		 *	bufptr now points at the next free slot
468 		 */
469 		bstrfromto(lgbackpatch, bufptr);
470 		if (linescrossed){
471 			val = ILINESKIP;
472 			ryylval = linescrossed;
473 			goto ret;
474 		} else
475 			goto builtval;
476 
477 	case BADCHAR:
478 		linescrossed = lineno;
479 		lineno = scanlineno;
480 		yyerror("Illegal character mapped: %d, char read:(octal) %o",
481 			ryylval, ch);
482 		lineno = linescrossed;
483 		val = BADCHAR;
484 		goto ret;
485 
486 	default:
487 		val = ryylval;
488 		goto ret;
489 	}	/*end of the switch*/
490 	/*
491 	 *	here with one token, so stuff it
492 	 */
493    ret:
494 	oval = val;
495 	ptoken(bufptr, val);
496 	switch(val){
497 		case	ILINESKIP:
498 				pint(bufptr, ryylval);
499 				break;
500 		case	SIZESPEC:
501 				pchar(bufptr, ryylval);
502 				break;
503 		case	BFINT:	plong(bufptr, ryylval);
504 				break;
505 		case	INT:	plong(bufptr, ryylval);
506 				break;
507 		case 	BIGNUM:	pnumber(bufptr, yybignum);
508 				break;
509 		case	NAME:	pptr(bufptr, (int)(struct symtab *)ryylval);
510 				break;
511 		case	REG:	pchar(bufptr, ryylval);
512 				break;
513 		case	INST0:
514 		case	INSTn:
515 				popcode(bufptr, opstruct);
516 				break;
517 		case 	IJXXX:
518 				popcode(bufptr, opstruct);
519 				pptr(bufptr, (int)(struct symtab *)symalloc());
520 				break;
521 		case	ISTAB:
522 		case	ISTABSTR:
523 		case	ISTABNONE:
524 		case	ISTABDOT:
525 		case	IALIGN:
526 				pptr(bufptr, (int)(struct symtab *)symalloc());
527 				break;
528 	/*
529 	 *	default:
530 	 */
531 	 }
532 	 builtval: ;
533    }			/*end of the while to stuff the buffer*/
534    done:
535 	bufferbox->tok_count = (bytetoktype *)bufptr - &(bufferbox->toks[0]);
536 
537 	/*
538 	 *	This is a real kludge:
539 	 *
540 	 *	We put the last token in the buffer to be  a MINUS
541 	 *	symbol.  This last token will never be picked up
542 	 *	in the normal way, but can be looked at during
543 	 *	a peekahead look that the short circuit expression
544 	 *	evaluator uses to see if an expression is complicated.
545 	 *
546 	 *	Consider the following situation:
547 	 *
548 	 *	.word	45		+	47
549 	 *        buffer 1      |  buffer 0
550 	 *	the peekahead would want to look across the buffer,
551 	 *	but will look in the buffer end zone, see the minus, and
552 	 *	fail.
553 	 */
554 	ptoken(bufptr, MINUS);
555 	InBufPtr = inbufptr;		/*copy this back*/
556 }
557