xref: /original-bsd/old/as.vax/asscan2.c (revision 5998a314)
1 /*
2  *	Copyright (c) 1982 Regents of the University of California
3  */
4 #ifndef lint
5 static char sccsid[] = "@(#)asscan2.c 4.4 04/16/82";
6 #endif not lint
7 
8 #include "asscanl.h"
9 static	inttoktype	oval = NL;
10 
11 #define	NINBUFFERS	2
12 #define	INBUFLG		NINBUFFERS*BUFSIZ + 2
13 	/*
14 	 *	We have two input buffers; the first one is reserved
15 	 *	for catching the tail of a line split across a buffer
16 	 *	boundary; the other one are used for snarfing a buffer
17 	 *	worth of .s source.
18 	 */
19 static	char	inbuffer[INBUFLG];
20 static	char	*InBufPtr = 0;
21 
22 /*
23  *	fill the inbuffer from the standard input.
24  *	Assert: there are always n COMPLETE! lines in the buffer area.
25  *	Assert: there is always a \n terminating the last line
26  *		in the buffer area.
27  *	Assert: after the \n, there is an EOFCHAR (hard end of file)
28  *		or a NEEDCHAR (end of buffer)
29  *	Assert:	fgets always null pads the string it reads.
30  *	Assert:	no ungetc's are done at the end of a line or at the
31  *		beginning of a line.
32  *
33  *	We read a complete buffer of characters in one single read.
34  *	We then back scan within this buffer to find the end of the
35  *	last complete line, and force the assertions, and save a pointer
36  *	to the incomplete line.
37  *	The next call to fillinbuffer will move the unread characters
38  *	to the end of the first buffer, and then read another two buffers,
39  *	completing the cycle.
40  */
41 
42 static	char	p_swapped = '\0';
43 static	char	*p_start = &inbuffer[NINBUFFERS * BUFSIZ];
44 static	char	*p_stop = &inbuffer[NINBUFFERS * BUFSIZ];
45 
46 char *fillinbuffer()
47 {
48 	register	char	*to;
49 	register	char	*from;
50 			char	*inbufptr;
51 	int		nread;
52 	static		int	hadeof;
53 	int		goal;
54 	int		got;
55 
56 	*p_start = p_swapped;
57 	inbufptr = &inbuffer[1*BUFSIZ] - (p_stop - p_start);
58 
59 	for (to = inbufptr, from = p_start; from < p_stop;)
60 		*to++ = *from++;
61 	/*
62 	 *	Now, go read two full buffers (hopefully)
63 	 */
64 	if (hadeof){
65 		hadeof = 0;
66 		return (0);
67 	}
68 	goal = (NINBUFFERS - 1)*BUFSIZ;
69 	nread = 0;
70 	do {
71 		got = read(stdin->_file, &inbuffer[1*BUFSIZ + nread], goal);
72 		if (got == 0)
73 			hadeof = 1;
74 		if (got <= 0)
75 			break;
76 		nread += got;
77 		goal -= got;
78 	} while (goal);
79 
80 	if (nread == 0)
81 		return(0);
82 	p_stop = from = &inbuffer[1*BUFSIZ + nread];
83 	*from = '\0';
84 
85 	while (*--from != '\n'){
86 		/*
87 		 *	back over the partial line
88 		 */
89 		if (from == &inbuffer[1*BUFSIZ]) {
90 			from = p_stop;
91 			*p_stop++ = '\n';
92 			break;
93 		} else {
94 			continue;
95 		}
96 	}
97 
98 	from++;				/* first char of partial line */
99 	p_start = from;
100 	p_swapped = *p_start;
101 	*p_start = NEEDCHAR;		/* force assertion */
102 	return(inbufptr);
103 }
104 
105 scan_dot_s(bufferbox)
106 	struct tokbufdesc *bufferbox;
107 {
108 	reg	int	ryylval;	/* local copy of lexical value */
109 	extern	int	yylval;		/* global copy of lexical value */
110 	reg	int	val;		/* the value returned */
111 		int	i;		/* simple counter */
112 	reg	char	*rcp;
113 		char	*cp;		/* can have address taken */
114 	reg	int	ch;		/* treated as a character */
115 		int	ch1;		/* shadow value */
116 	reg	char	*inbufptr;
117 		struct 	symtab	*op;
118 
119 	reg	ptrall	bufptr;		/* where to stuff tokens */
120 		ptrall	lgbackpatch;	/* where to stuff a string length */
121 		ptrall	bufub;		/* where not to stuff tokens */
122 		int	maxstrlg;	/* how long a string can be */
123 		long	intval;		/* value of int */
124 		int	linescrossed;	/* when doing strings and comments */
125 		struct	Opcode		opstruct;
126 
127 	(bytetoktype *)bufptr = (bytetoktype *) & (bufferbox->toks[0]);
128 	(bytetoktype *)bufub = &(bufferbox->toks[AVAILTOKS]);
129 
130 	inbufptr = InBufPtr;
131 	if (inbufptr == 0){
132 		inbufptr = fillinbuffer();
133 		if (inbufptr == 0){	/*end of file*/
134    		  endoffile:
135 			inbufptr = 0;
136 			ptoken(bufptr, PARSEEOF);
137 			goto done;
138 		}
139 	}
140 
141 	if (newfflag){
142 		ptoken(bufptr, IFILE);
143 		ptoken(bufptr, STRING);
144 		val = strlen(newfname) + 1;
145 		movestr( (char *)&( ( (lgtype *)bufptr)[1]), newfname, val);
146 		bstrlg(bufptr, val);
147 
148 		ptoken(bufptr, ILINENO);
149 		ptoken(bufptr, INT);
150 		pint(bufptr,  1);
151 		newfflag = 0;
152 	}
153 
154 	while (bufptr < bufub){
155    loop:
156         switch(ryylval = (type+2)[ch = getchar()]) {
157 	case SCANEOF:
158 		inbufptr = 0;
159 		goto endoffile;
160 
161 	case NEEDSBUF:
162 		inbufptr = fillinbuffer();
163 		if (inbufptr == 0)
164 			goto endoffile;
165 		goto loop;
166 
167 	case DIV:		/*process C style comments*/
168 		if ( (ch = getchar()) == '*') {  /*comment prelude*/
169 			int	incomment;
170 			linescrossed = 0;
171 			incomment = 1;
172 			ch = getchar();	/*skip over the * */
173 			while(incomment){
174 				switch(ch){
175 				case '*':
176 					ch = getchar();
177 					incomment = (ch != '/');
178 					break;
179 				case '\n':
180 					scanlineno++;
181 					linescrossed++;
182 					ch = getchar();
183 					break;
184 				case EOFCHAR:
185 					goto endoffile;
186 				case NEEDCHAR:
187 					inbufptr = fillinbuffer();
188 					if (inbufptr == 0)
189 						goto endoffile;
190 					lineno++;
191 					ch = getchar();
192 					break;
193 				default:
194 					ch = getchar();
195 					break;
196 				}
197 			}
198 			val = ILINESKIP;
199 			ryylval = linescrossed;
200 			goto ret;
201 		} else {	/*just an ordinary DIV*/
202 			ungetc(ch);
203 			val = ryylval = DIV;
204 			goto ret;
205 		}
206 	case SH:
207 		if (oval == NL){
208 			/*
209 			 *	Attempt to recognize a C preprocessor
210 			 *	style comment '^#[ \t]*[0-9]*[ \t]*".*"
211 			 */
212 			ch = getchar();	/*bump the #*/
213 			while (INCHARSET(ch, SPACE))
214 				ch = getchar();/*bump white */
215 			if (INCHARSET(ch, DIGIT)){
216 				intval = 0;
217 				while(INCHARSET(ch, DIGIT)){
218 					intval = intval*10 + ch - '0';
219 					ch = getchar();
220 				}
221 				while (INCHARSET(ch, SPACE))
222 					ch = getchar();
223 				if (ch == '"'){
224 					ptoken(bufptr, ILINENO);
225 					ptoken(bufptr, INT);
226 					pint(bufptr, intval - 1);
227 					ptoken(bufptr, IFILE);
228 					/*
229 					 *	The '"' has already been
230 					 *	munched
231 					 *
232 					 *	eatstr will not eat
233 					 *	the trailing \n, so
234 					 *	it is given to the parser
235 					 *	and counted.
236 					 */
237 					goto eatstr;
238 				}
239 			}
240 		}
241 		/*
242 		 *	Well, its just an ordinary decadent comment
243 		 */
244 		while ((ch != '\n') && (ch != EOFCHAR))
245 			ch = getchar();
246 		if (ch == EOFCHAR)
247 			goto endoffile;
248 		val = ryylval = oval = NL;
249 		scanlineno++;
250 		goto ret;
251 
252 	case NL:
253 		scanlineno++;
254 		val = ryylval;
255 		goto ret;
256 
257 	case SP:
258 		oval = SP;	/*invalidate ^# meta comments*/
259 		goto loop;
260 
261 	case REGOP:		/* % , could be used as modulo, or register*/
262 		ch = getchar();
263 		if (INCHARSET(ch, DIGIT)){
264 			ryylval = ch-'0';
265 			if (ch=='1') {
266 				if (INCHARSET( (ch = getchar()), REGDIGIT))
267 					ryylval = 10+ch-'0';
268 				else
269 					ungetc(ch);
270 			}
271 			/*
272 			 *	God only knows what the original author
273 			 *	wanted this undocumented feature to
274 			 *	do.
275 			 *		%5++ is really  r7
276 			 */
277 			while(INCHARSET( (ch = getchar()), SIGN)) {
278 				if (ch=='+')
279 					ryylval++;
280 				else
281 					ryylval--;
282 			}
283 			ungetc(ch);
284 			val = REG;
285 		} else {
286 			ungetc(ch);
287 			val = REGOP;
288 		}
289 		goto ret;
290 
291 	case ALPH:
292 		ch1 = ch;
293 		if (INCHARSET(ch, SZSPECBEGIN)){
294 			if( (ch = getchar()) == '`' || ch == '^'){
295 				ch1 |= 0100;	/*convert to lower*/
296 				switch(ch1){
297 				case 'b':	ryylval = 1;	break;
298 				case 'w':	ryylval = 2;	break;
299 				case 'l':	ryylval = 4;	break;
300 				default:	ryylval = d124;	break;
301 				}
302 				val = SIZESPEC;
303 				goto ret;
304 			} else {
305 				ungetc(ch);
306 				ch = ch1;	/*restore first character*/
307 			}
308 		}
309 		rcp = yytext;
310 		do {
311 			if (rcp < &yytext[NCPS])
312 				*rcp++ = ch;
313 		} while (INCHARSET ( (ch = getchar()), ALPHA | DIGIT));
314 		*rcp = '\0';
315 		while (INCHARSET(ch, SPACE))
316 			ch = getchar();
317 		ungetc(ch);
318 
319 		switch((op = *lookup(1))->s_tag){
320 		case 0:
321 		case LABELID:
322 			/*
323 			 *	Its a name... (Labels are subsets ofname)
324 			 */
325 			ryylval = (int)op;
326 			val = NAME;
327 			break;
328 		case INST0:
329 		case INSTn:
330 		case IJXXX:
331 			opstruct.Op_popcode = ( (struct instab *)op)->i_popcode;
332 			opstruct.Op_eopcode = ( (struct instab *)op)->i_eopcode;
333 			val = op->s_tag;
334 			break;
335 		default:
336 			ryylval = ( (struct instab *)op)->i_popcode;
337 			val = op->s_tag;
338 			break;
339 		}
340 		goto ret;
341 
342 	case DIG:
343 		/*
344 		 *	Implement call by reference on a reg variable
345 		 */
346 		cp = inbufptr;
347 		val = number(ch, &cp);
348 		/*
349 		 *	yylval or yybignum has been stuffed as a side
350 		 *	effect to number(); get the global yylval
351 		 *	into our fast local copy in case it was an INT.
352 		 */
353 		ryylval = yylval;
354 		inbufptr = cp;
355 		goto ret;
356 
357 	case LSH:
358 	case RSH:
359 		/*
360 		 *	We allow the C style operators
361 		 *	<< and >>, as well as < and >
362 		 */
363 		if ( (ch1 = getchar()) != ch)
364 			ungetc(ch1);
365 		val = ryylval;
366 		goto ret;
367 
368 	case MINUS:
369 		if ( (ch = getchar()) =='(')
370 			ryylval=val=MP;
371 		else {
372 			ungetc(ch);
373 			val=MINUS;
374 		}
375 		goto ret;
376 
377 	case SQ:
378 		if ((ryylval = getchar()) == '\n')
379 			scanlineno++;		/*not entirely correct*/
380 		val = INT;
381 		goto ret;
382 
383 	case DQ:
384 	   eatstr:
385 		linescrossed = 0;
386 		maxstrlg = (char *)bufub - (char *)bufptr;
387 
388 		if (maxstrlg < MAXSTRLG) {
389 			ungetc('"');
390 			*(bytetoktype *)bufptr = VOID ;
391 			bufub = bufptr;
392 			goto done;
393 		}
394 		if (maxstrlg > MAXSTRLG)
395 			maxstrlg = MAXSTRLG;
396 
397 		ptoken(bufptr, STRING);
398 		lgbackpatch = bufptr;	/*this is where the size goes*/
399 		bufptr += sizeof(lgtype);
400 		/*
401 		 *	bufptr is now set to
402 		 *	be stuffed with characters from
403 		 *	the input
404 		 */
405 
406 		while (   (maxstrlg > 0)
407 		       && !(INCHARSET( (ch = getchar()), STRESCAPE))
408 		      ){
409 			stuff:
410 				maxstrlg -= 1;
411 				pchar(bufptr, ch);
412 			}
413 		if (maxstrlg <= 0){	/*enough characters to fill a string buffer*/
414 			ungetc('"');		/*will read it next*/
415 		}
416 		else if (ch == '"')
417 			/*VOID*/ ;		/*done*/
418 		else if (ch == '\n'){
419 			yywarning("New line embedded in a string constant.");
420 			scanlineno++;
421 			linescrossed++;
422 			ch = getchar();
423 			if (ch == EOFCHAR){
424 			  do_eof:
425 				pchar(bufptr, '\n');
426 				ungetc(EOFCHAR);
427 			} else
428 			if (ch == NEEDCHAR){
429 				if ( (inbufptr = fillinbuffer()) == 0)
430 					goto do_eof;
431 				ch = '\n';
432 				goto stuff;
433 			} else {	/* simple case */
434 				ungetc(ch);
435 				ch = '\n';
436 				goto stuff;
437 			}
438 		} else {
439 			ch = getchar();		/*skip the '\\'*/
440 			if ( INCHARSET(ch, BSESCAPE)){
441 				switch (ch){
442 				  case 'b':  ch = '\b'; goto stuff;
443 				  case 'f':  ch = '\f'; goto stuff;
444 				  case 'n':  ch = '\n'; goto stuff;
445 				  case 'r':  ch = '\r'; goto stuff;
446 				  case 't':  ch = '\t'; goto stuff;
447 				}
448 			}
449 			if ( !(INCHARSET(ch,OCTDIGIT)) )  goto stuff;
450 			i = 0;
451 			intval = 0;
452 			while ( (i < 3) && (INCHARSET(ch, OCTDIGIT))){
453 				i++;intval <<= 3;intval += ch - '0';
454 				ch = getchar();
455 			}
456 			ungetc(ch);
457 			ch = (char)intval;
458 			goto stuff;
459 		}
460 		/*
461 		 *	bufptr now points at the next free slot
462 		 */
463 		bstrfromto(lgbackpatch, bufptr);
464 		if (linescrossed){
465 			val = ILINESKIP;
466 			ryylval = linescrossed;
467 			goto ret;
468 		} else
469 			goto builtval;
470 
471 	case BADCHAR:
472 		linescrossed = lineno;
473 		lineno = scanlineno;
474 		yyerror("Illegal character mapped: %d, char read:(octal) %o",
475 			ryylval, ch);
476 		lineno = linescrossed;
477 		val = BADCHAR;
478 		goto ret;
479 
480 	default:
481 		val = ryylval;
482 		goto ret;
483 	}	/*end of the switch*/
484 	/*
485 	 *	here with one token, so stuff it
486 	 */
487    ret:
488 	oval = val;
489 	ptoken(bufptr, val);
490 	switch(val){
491 		case	ILINESKIP:
492 				pint(bufptr, ryylval);
493 				break;
494 		case	SIZESPEC:
495 				pchar(bufptr, ryylval);
496 				break;
497 		case	BFINT:	plong(bufptr, ryylval);
498 				break;
499 		case	INT:	plong(bufptr, ryylval);
500 				break;
501 		case 	BIGNUM:	pnumber(bufptr, yybignum);
502 				break;
503 		case	NAME:	pptr(bufptr, (int)(struct symtab *)ryylval);
504 				break;
505 		case	REG:	pchar(bufptr, ryylval);
506 				break;
507 		case	INST0:
508 		case	INSTn:
509 				popcode(bufptr, opstruct);
510 				break;
511 		case 	IJXXX:
512 				popcode(bufptr, opstruct);
513 				pptr(bufptr, (int)(struct symtab *)symalloc());
514 				break;
515 		case	ISTAB:
516 		case	ISTABSTR:
517 		case	ISTABNONE:
518 		case	ISTABDOT:
519 		case	IALIGN:
520 				pptr(bufptr, (int)(struct symtab *)symalloc());
521 				break;
522 	/*
523 	 *	default:
524 	 */
525 	 }
526 	 builtval: ;
527    }			/*end of the while to stuff the buffer*/
528    done:
529 	bufferbox->tok_count = (bytetoktype *)bufptr - &(bufferbox->toks[0]);
530 
531 	/*
532 	 *	This is a real kludge:
533 	 *
534 	 *	We put the last token in the buffer to be  a MINUS
535 	 *	symbol.  This last token will never be picked up
536 	 *	in the normal way, but can be looked at during
537 	 *	a peekahead look that the short circuit expression
538 	 *	evaluator uses to see if an expression is complicated.
539 	 *
540 	 *	Consider the following situation:
541 	 *
542 	 *	.word	45		+	47
543 	 *        buffer 1      |  buffer 0
544 	 *	the peekahead would want to look across the buffer,
545 	 *	but will look in the buffer end zone, see the minus, and
546 	 *	fail.
547 	 */
548 	ptoken(bufptr, MINUS);
549 	InBufPtr = inbufptr;		/*copy this back*/
550 }
551