xref: /original-bsd/old/as.vax/asscan2.c (revision 0b685140)
1 /*
2  *	Copyright (c) 1982 Regents of the University of California
3  */
4 #ifndef lint
5 static char sccsid[] = "@(#)asscan2.c 4.3 02/14/82";
6 #endif not lint
7 
8 #include "asscanl.h"
9 static	inttoktype	oval = NL;
10 
11 #define	NINBUFFERS	2
12 #define	INBUFLG		NINBUFFERS*BUFSIZ + 2
13 	/*
14 	 *	We have two input buffers; the first one is reserved
15 	 *	for catching the tail of a line split across a buffer
16 	 *	boundary; the other one are used for snarfing a buffer
17 	 *	worth of .s source.
18 	 */
19 static	char	inbuffer[INBUFLG];
20 static	char	*InBufPtr = 0;
21 
22 /*
23  *	fill the inbuffer from the standard input.
24  *	Assert: there are always n COMPLETE! lines in the buffer area.
25  *	Assert: there is always a \n terminating the last line
26  *		in the buffer area.
27  *	Assert: after the \n, there is an EOFCHAR (hard end of file)
28  *		or a NEEDCHAR (end of buffer)
29  *	Assert:	fgets always null pads the string it reads.
30  *	Assert:	no ungetc's are done at the end of a line or at the
31  *		beginning of a line.
32  *
33  *	We read a complete buffer of characters in one single read.
34  *	We then back scan within this buffer to find the end of the
35  *	last complete line, and force the assertions, and save a pointer
36  *	to the incomplete line.
37  *	The next call to fillinbuffer will move the unread characters
38  *	to the end of the first buffer, and then read another two buffers,
39  *	completing the cycle.
40  */
41 
42 static	char	p_swapped = '\0';
43 static	char	*p_start = &inbuffer[NINBUFFERS * BUFSIZ];
44 static	char	*p_stop = &inbuffer[NINBUFFERS * BUFSIZ];
45 char *fillinbuffer()
46 {
47 	register	char	*to;
48 	register	char	*from;
49 			char	*inbufptr;
50 	int		nread;
51 
52 	*p_start = p_swapped;
53 	inbufptr = &inbuffer[1*BUFSIZ] - (p_stop - p_start);
54 
55 	for (to = inbufptr, from = p_start; from < p_stop;)
56 		*to++ = *from++;
57 	/*
58 	 *	Now, go read two full buffers (hopefully)
59 	 */
60 	nread = read(stdin->_file, &inbuffer[1*BUFSIZ], (NINBUFFERS - 1)*BUFSIZ);
61 	if (nread == 0)
62 		return(0);
63 	p_stop = from = &inbuffer[1*BUFSIZ + nread];
64 	*from = '\0';
65 	while (*--from != '\n')		/* back over the partial line */
66 		continue;
67 	from++;				/* first char of partial line */
68 	p_start = from;
69 	p_swapped = *p_start;
70 	*p_start = NEEDCHAR;		/* force assertion */
71 	return(inbufptr);
72 }
73 
74 scan_dot_s(bufferbox)
75 	struct tokbufdesc *bufferbox;
76 {
77 	reg	int	ryylval;	/* local copy of lexical value */
78 	extern	int	yylval;		/* global copy of lexical value */
79 	reg	int	val;		/* the value returned */
80 		int	i;		/* simple counter */
81 	reg	char	*rcp;
82 		char	*cp;		/* can have address taken */
83 	reg	int	ch;		/* treated as a character */
84 		int	ch1;		/* shadow value */
85 	reg	char	*inbufptr;
86 		struct 	symtab	*op;
87 
88 	reg	ptrall	bufptr;		/* where to stuff tokens */
89 		ptrall	lgbackpatch;	/* where to stuff a string length */
90 		ptrall	bufub;		/* where not to stuff tokens */
91 		int	maxstrlg;	/* how long a string can be */
92 		long	intval;		/* value of int */
93 		int	linescrossed;	/* when doing strings and comments */
94 		struct	Opcode		opstruct;
95 
96 	(bytetoktype *)bufptr = (bytetoktype *) & (bufferbox->toks[0]);
97 	(bytetoktype *)bufub = &(bufferbox->toks[AVAILTOKS]);
98 
99 	inbufptr = InBufPtr;
100 	if (inbufptr == 0){
101 		inbufptr = fillinbuffer();
102 		if (inbufptr == 0){	/*end of file*/
103    		  endoffile:
104 			inbufptr = 0;
105 			ptoken(bufptr, PARSEEOF);
106 			goto done;
107 		}
108 	}
109 
110 	if (newfflag){
111 		ptoken(bufptr, IFILE);
112 		ptoken(bufptr, STRING);
113 		val = strlen(newfname) + 1;
114 		movestr( (char *)&( ( (lgtype *)bufptr)[1]), newfname, val);
115 		bstrlg(bufptr, val);
116 
117 		ptoken(bufptr, ILINENO);
118 		ptoken(bufptr, INT);
119 		pint(bufptr,  1);
120 		newfflag = 0;
121 	}
122 
123 	while (bufptr < bufub){
124    loop:
125         switch(ryylval = (type+2)[ch = getchar()]) {
126 	case SCANEOF:
127 		inbufptr = 0;
128 		goto endoffile;
129 
130 	case NEEDSBUF:
131 		inbufptr = fillinbuffer();
132 		if (inbufptr == 0)
133 			goto endoffile;
134 		goto loop;
135 
136 	case DIV:		/*process C style comments*/
137 		if ( (ch = getchar()) == '*') {  /*comment prelude*/
138 			int	incomment;
139 			linescrossed = 0;
140 			incomment = 1;
141 			ch = getchar();	/*skip over the * */
142 			while(incomment){
143 				switch(ch){
144 				case '*':
145 					ch = getchar();
146 					incomment = (ch != '/');
147 					break;
148 				case '\n':
149 					scanlineno++;
150 					linescrossed++;
151 					ch = getchar();
152 					break;
153 				case EOFCHAR:
154 					goto endoffile;
155 				case NEEDCHAR:
156 					inbufptr = fillinbuffer();
157 					if (inbufptr == 0)
158 						goto endoffile;
159 					lineno++;
160 					ch = getchar();
161 					break;
162 				default:
163 					ch = getchar();
164 					break;
165 				}
166 			}
167 			val = ILINESKIP;
168 			ryylval = linescrossed;
169 			goto ret;
170 		} else {	/*just an ordinary DIV*/
171 			ungetc(ch);
172 			val = ryylval = DIV;
173 			goto ret;
174 		}
175 	case SH:
176 		if (oval == NL){
177 			/*
178 			 *	Attempt to recognize a C preprocessor
179 			 *	style comment '^#[ \t]*[0-9]*[ \t]*".*"
180 			 */
181 			ch = getchar();	/*bump the #*/
182 			while (INCHARSET(ch, SPACE))
183 				ch = getchar();/*bump white */
184 			if (INCHARSET(ch, DIGIT)){
185 				intval = 0;
186 				while(INCHARSET(ch, DIGIT)){
187 					intval = intval*10 + ch - '0';
188 					ch = getchar();
189 				}
190 				while (INCHARSET(ch, SPACE))
191 					ch = getchar();
192 				if (ch == '"'){
193 					ptoken(bufptr, ILINENO);
194 					ptoken(bufptr, INT);
195 					pint(bufptr, intval - 1);
196 					ptoken(bufptr, IFILE);
197 					/*
198 					 *	The '"' has already been
199 					 *	munched
200 					 *
201 					 *	eatstr will not eat
202 					 *	the trailing \n, so
203 					 *	it is given to the parser
204 					 *	and counted.
205 					 */
206 					goto eatstr;
207 				}
208 			}
209 		}
210 		/*
211 		 *	Well, its just an ordinary decadent comment
212 		 */
213 		while ((ch != '\n') && (ch != EOFCHAR))
214 			ch = getchar();
215 		if (ch == EOFCHAR)
216 			goto endoffile;
217 		val = ryylval = oval = NL;
218 		scanlineno++;
219 		goto ret;
220 
221 	case NL:
222 		scanlineno++;
223 		val = ryylval;
224 		goto ret;
225 
226 	case SP:
227 		oval = SP;	/*invalidate ^# meta comments*/
228 		goto loop;
229 
230 	case REGOP:		/* % , could be used as modulo, or register*/
231 		ch = getchar();
232 		if (INCHARSET(ch, DIGIT)){
233 			ryylval = ch-'0';
234 			if (ch=='1') {
235 				if (INCHARSET( (ch = getchar()), REGDIGIT))
236 					ryylval = 10+ch-'0';
237 				else
238 					ungetc(ch);
239 			}
240 			/*
241 			 *	God only knows what the original author
242 			 *	wanted this undocumented feature to
243 			 *	do.
244 			 *		%5++ is really  r7
245 			 */
246 			while(INCHARSET( (ch = getchar()), SIGN)) {
247 				if (ch=='+')
248 					ryylval++;
249 				else
250 					ryylval--;
251 			}
252 			ungetc(ch);
253 			val = REG;
254 		} else {
255 			ungetc(ch);
256 			val = REGOP;
257 		}
258 		goto ret;
259 
260 	case ALPH:
261 		ch1 = ch;
262 		if (INCHARSET(ch, SZSPECBEGIN)){
263 			if( (ch = getchar()) == '`' || ch == '^'){
264 				ch1 |= 0100;	/*convert to lower*/
265 				switch(ch1){
266 				case 'b':	ryylval = 1;	break;
267 				case 'w':	ryylval = 2;	break;
268 				case 'l':	ryylval = 4;	break;
269 				default:	ryylval = d124;	break;
270 				}
271 				val = SIZESPEC;
272 				goto ret;
273 			} else {
274 				ungetc(ch);
275 				ch = ch1;	/*restore first character*/
276 			}
277 		}
278 		rcp = yytext;
279 		do {
280 			if (rcp < &yytext[NCPS])
281 				*rcp++ = ch;
282 		} while (INCHARSET ( (ch = getchar()), ALPHA | DIGIT));
283 		*rcp = '\0';
284 		while (INCHARSET(ch, SPACE))
285 			ch = getchar();
286 		ungetc(ch);
287 
288 		switch((op = *lookup(1))->s_tag){
289 		case 0:
290 		case LABELID:
291 			/*
292 			 *	Its a name... (Labels are subsets ofname)
293 			 */
294 			ryylval = (int)op;
295 			val = NAME;
296 			break;
297 		case INST0:
298 		case INSTn:
299 		case IJXXX:
300 			opstruct.Op_popcode = ( (struct instab *)op)->i_popcode;
301 			opstruct.Op_eopcode = ( (struct instab *)op)->i_eopcode;
302 			val = op->s_tag;
303 			break;
304 		default:
305 			ryylval = ( (struct instab *)op)->i_popcode;
306 			val = op->s_tag;
307 			break;
308 		}
309 		goto ret;
310 
311 	case DIG:
312 		/*
313 		 *	Implement call by reference on a reg variable
314 		 */
315 		cp = inbufptr;
316 		val = number(ch, &cp);
317 		/*
318 		 *	yylval or yybignum has been stuffed as a side
319 		 *	effect to number(); get the global yylval
320 		 *	into our fast local copy in case it was an INT.
321 		 */
322 		ryylval = yylval;
323 		inbufptr = cp;
324 		goto ret;
325 
326 	case LSH:
327 	case RSH:
328 		/*
329 		 *	We allow the C style operators
330 		 *	<< and >>, as well as < and >
331 		 */
332 		if ( (ch1 = getchar()) != ch)
333 			ungetc(ch1);
334 		val = ryylval;
335 		goto ret;
336 
337 	case MINUS:
338 		if ( (ch = getchar()) =='(')
339 			ryylval=val=MP;
340 		else {
341 			ungetc(ch);
342 			val=MINUS;
343 		}
344 		goto ret;
345 
346 	case SQ:
347 		if ((ryylval = getchar()) == '\n')
348 			scanlineno++;		/*not entirely correct*/
349 		val = INT;
350 		goto ret;
351 
352 	case DQ:
353 	   eatstr:
354 		linescrossed = 0;
355 		maxstrlg = (char *)bufub - (char *)bufptr;
356 
357 		if (maxstrlg < MAXSTRLG) {
358 			ungetc('"');
359 			*(bytetoktype *)bufptr = VOID ;
360 			bufub = bufptr;
361 			goto done;
362 		}
363 		if (maxstrlg > MAXSTRLG)
364 			maxstrlg = MAXSTRLG;
365 
366 		ptoken(bufptr, STRING);
367 		lgbackpatch = bufptr;	/*this is where the size goes*/
368 		bufptr += sizeof(lgtype);
369 		/*
370 		 *	bufptr is now set to
371 		 *	be stuffed with characters from
372 		 *	the input
373 		 */
374 
375 		while (   (maxstrlg > 0)
376 		       && !(INCHARSET( (ch = getchar()), STRESCAPE))
377 		      ){
378 			stuff:
379 				maxstrlg-= 1;
380 				pchar(bufptr, ch);
381 			}
382 		if (maxstrlg <= 0){	/*enough characters to fill a string buffer*/
383 			ungetc('"');		/*will read it next*/
384 		}
385 		else if (ch == '"');		/*done*/
386 		else if (ch == '\n'){
387 			yywarning("New line embedded in a string constant.");
388 			scanlineno++;
389 			linescrossed++;
390 			ch = getchar();
391 			if (ch == EOFCHAR){
392 			  do_eof:
393 				pchar(bufptr, '\n');
394 				ungetc(EOFCHAR);
395 			} else
396 			if (ch == NEEDCHAR){
397 				if ( (inbufptr = fillinbuffer()) == 0)
398 					goto do_eof;
399 				ch = '\n';
400 				goto stuff;
401 			} else {	/* simple case */
402 				ungetc(ch);
403 				ch = '\n';
404 				goto stuff;
405 			}
406 		} else {
407 			ch = getchar();		/*skip the '\\'*/
408 			if ( INCHARSET(ch, BSESCAPE)){
409 				switch (ch){
410 				  case 'b':  ch = '\b'; goto stuff;
411 				  case 'f':  ch = '\f'; goto stuff;
412 				  case 'n':  ch = '\n'; goto stuff;
413 				  case 'r':  ch = '\r'; goto stuff;
414 				  case 't':  ch = '\t'; goto stuff;
415 				}
416 			}
417 			if ( !(INCHARSET(ch,OCTDIGIT)) )  goto stuff;
418 			i = 0;
419 			intval = 0;
420 			while ( (i < 3) && (INCHARSET(ch, OCTDIGIT))){
421 				i++;intval <<= 3;intval += ch - '0';
422 				ch = getchar();
423 			}
424 			ungetc(ch);
425 			val = (char)intval;
426 			goto stuff;
427 		}
428 		/*
429 		 *	bufptr now points at the next free slot
430 		 */
431 		bstrfromto(lgbackpatch, bufptr);
432 		if (linescrossed){
433 			val = ILINESKIP;
434 			ryylval = linescrossed;
435 			goto ret;
436 		} else
437 			goto builtval;
438 
439 	case BADCHAR:
440 		linescrossed = lineno;
441 		lineno = scanlineno;
442 		yyerror("Illegal character mapped: %d, char read:(octal) %o",
443 			ryylval, ch);
444 		lineno = linescrossed;
445 		val = BADCHAR;
446 		goto ret;
447 
448 	default:
449 		val = ryylval;
450 		goto ret;
451 	}	/*end of the switch*/
452 	/*
453 	 *	here with one token, so stuff it
454 	 */
455    ret:
456 	oval = val;
457 	ptoken(bufptr, val);
458 	switch(val){
459 		case	ILINESKIP:
460 				pint(bufptr, ryylval);
461 				break;
462 		case	SIZESPEC:
463 				pchar(bufptr, ryylval);
464 				break;
465 		case	BFINT:	plong(bufptr, ryylval);
466 				break;
467 		case	INT:	plong(bufptr, ryylval);
468 				break;
469 		case 	BIGNUM:	pnumber(bufptr, yybignum);
470 				break;
471 		case	NAME:	pptr(bufptr, (int)(struct symtab *)ryylval);
472 				break;
473 		case	REG:	pchar(bufptr, ryylval);
474 				break;
475 		case	INST0:
476 		case	INSTn:
477 				popcode(bufptr, opstruct);
478 				break;
479 		case 	IJXXX:
480 				popcode(bufptr, opstruct);
481 				pptr(bufptr, (int)(struct symtab *)symalloc());
482 				break;
483 		case	ISTAB:
484 		case	ISTABSTR:
485 		case	ISTABNONE:
486 		case	ISTABDOT:
487 		case	IALIGN:
488 				pptr(bufptr, (int)(struct symtab *)symalloc());
489 				break;
490 	/*
491 	 *	default:
492 	 */
493 	 }
494 	 builtval: ;
495    }			/*end of the while to stuff the buffer*/
496    done:
497 	bufferbox->tok_count = (bytetoktype *)bufptr - &(bufferbox->toks[0]);
498 
499 	/*
500 	 *	This is a real kludge:
501 	 *
502 	 *	We put the last token in the buffer to be  a MINUS
503 	 *	symbol.  This last token will never be picked up
504 	 *	in the normal way, but can be looked at during
505 	 *	a peekahead look that the short circuit expression
506 	 *	evaluator uses to see if an expression is complicated.
507 	 *
508 	 *	Consider the following situation:
509 	 *
510 	 *	.word	45		+	47
511 	 *        buffer 1      |  buffer 0
512 	 *	the peekahead would want to look across the buffer,
513 	 *	but will look in the buffer end zone, see the minus, and
514 	 *	fail.
515 	 */
516 	ptoken(bufptr, MINUS);
517 	InBufPtr = inbufptr;		/*copy this back*/
518 }
519