xref: /original-bsd/old/as.vax/asscan2.c (revision 1b4ef7de)
1 /*
2  * Copyright (c) 1982 Regents of the University of California.
3  * All rights reserved.  The Berkeley software License Agreement
4  * specifies the terms and conditions for redistribution.
5  */
6 
7 #ifndef lint
8 static char sccsid[] = "@(#)asscan2.c	5.1 (Berkeley) 04/30/85";
9 #endif not lint
10 
11 #include "asscanl.h"
12 
13 static	inttoktype	oval = NL;
14 #define	ASINBUFSIZ	4096
15 char	inbufunget[8];
16 char	inbuffer[ASINBUFSIZ];
17 char	*Ginbufptr = inbuffer;
18 int	Ginbufcnt = 0;
19 int	scannerhadeof;
20 
21 fillinbuffer()
22 {
23 		int	nread;
24 		int	goal;
25 		int	got;
26 
27 	nread = 0;
28 	if (scannerhadeof == 0){
29 		goal = sizeof(inbuffer);
30 		do {
31 			got = read(stdin->_file, inbuffer + nread, goal);
32 			if (got == 0)
33 				scannerhadeof = 1;
34 			if (got <= 0)
35 				break;
36 			nread += got;
37 			goal -= got;
38 		} while (goal);
39 	} else {
40 		scannerhadeof = 0;
41 	}
42 	/*
43 	 *	getchar assumes that Ginbufcnt and Ginbufptr
44 	 *	are adjusted as if one character has been removed
45 	 *	from the input.
46 	 */
47 	if (nread == 0){
48 		inbuffer[0] = EOFCHAR;
49 		nread = 1;
50 	}
51 	Ginbufcnt = nread - 1;
52 	Ginbufptr = inbuffer + 1;
53 }
54 
55 scan_dot_s(bufferbox)
56 	struct tokbufdesc *bufferbox;
57 {
58 	reg	char	*inbufptr;
59 	reg	int	inbufcnt;
60 	reg	int	ryylval;	/* local copy of lexical value */
61 	extern	int	yylval;		/* global copy of lexical value */
62 	reg	int	val;		/* the value returned */
63 		int	i;		/* simple counter */
64 	reg	char	*rcp;
65 		int	ch;		/* treated as a character */
66 		int	ch1;		/* shadow value */
67 		struct 	symtab	*op;
68 		ptrall	lgbackpatch;	/* where to stuff a string length */
69 	reg	ptrall	bufptr;		/* where to stuff tokens */
70 		ptrall	bufub;		/* where not to stuff tokens */
71 		long	intval;		/* value of int */
72 		int	linescrossed;	/* when doing strings and comments */
73 		struct	Opcode		opstruct;
74 	reg	int	strlg;		/* the length of a string */
75 
76 	(bytetoktype *)bufptr = (bytetoktype *) & (bufferbox->toks[0]);
77 	(bytetoktype *)bufub = &(bufferbox->toks[AVAILTOKS]);
78 
79 	MEMTOREGBUF;
80 	if (newfflag){
81 		newfflag = 0;
82 		ryylval = (int)savestr(newfname, strlen(newfname)+1, STR_BOTH);
83 
84 		ptoken(bufptr, IFILE);
85 		ptoken(bufptr, STRING);
86 		pptr(bufptr, ryylval);
87 
88 		ptoken(bufptr, ILINENO);
89 		ptoken(bufptr, INT);
90 		pint(bufptr,  1);
91 	}
92 
93 	while (bufptr < bufub){
94    loop:
95         switch(ryylval = (type+1)[ch = getchar()]) {
96 	case SCANEOF:
97 	endoffile: ;
98 		inbufptr = 0;
99 		ptoken(bufptr, PARSEEOF);
100 		goto done;
101 
102 	case DIV:		/*process C style comments*/
103 		if ( (ch = getchar()) == '*') {  /*comment prelude*/
104 			int	incomment;
105 			linescrossed = 0;
106 			incomment = 1;
107 			ch = getchar();	/*skip over the * */
108 			while(incomment){
109 				switch(ch){
110 				case '*':
111 					ch = getchar();
112 					incomment = (ch != '/');
113 					break;
114 				case '\n':
115 					scanlineno++;
116 					linescrossed++;
117 					ch = getchar();
118 					break;
119 				case EOFCHAR:
120 					goto endoffile;
121 				default:
122 					ch = getchar();
123 					break;
124 				}
125 			}
126 			val = ILINESKIP;
127 			ryylval = linescrossed;
128 			goto ret;
129 		} else {	/*just an ordinary DIV*/
130 			ungetc(ch);
131 			val = ryylval = DIV;
132 			goto ret;
133 		}
134 	case SH:
135 		if (oval == NL){
136 			/*
137 			 *	Attempt to recognize a C preprocessor
138 			 *	style comment '^#[ \t]*[0-9]*[ \t]*".*"
139 			 */
140 			ch = getchar();	/*bump the #*/
141 			while (INCHARSET(ch, SPACE))
142 				ch = getchar();/*bump white */
143 			if (INCHARSET(ch, DIGIT)){
144 				intval = 0;
145 				while(INCHARSET(ch, DIGIT)){
146 					intval = intval*10 + ch - '0';
147 					ch = getchar();
148 				}
149 				while (INCHARSET(ch, SPACE))
150 					ch = getchar();
151 				if (ch == '"'){
152 					ptoken(bufptr, ILINENO);
153 					ptoken(bufptr, INT);
154 					pint(bufptr, intval - 1);
155 					ptoken(bufptr, IFILE);
156 					/*
157 					 *	The '"' has already been
158 					 *	munched
159 					 *
160 					 *	eatstr will not eat
161 					 *	the trailing \n, so
162 					 *	it is given to the parser
163 					 *	and counted.
164 					 */
165 					goto eatstr;
166 				}
167 			}
168 		}
169 		/*
170 		 *	Well, its just an ordinary decadent comment
171 		 */
172 		while ((ch != '\n') && (ch != EOFCHAR))
173 			ch = getchar();
174 		if (ch == EOFCHAR)
175 			goto endoffile;
176 		val = ryylval = oval = NL;
177 		scanlineno++;
178 		goto ret;
179 
180 	case NL:
181 		scanlineno++;
182 		val = ryylval;
183 		goto ret;
184 
185 	case SP:
186 		oval = SP;	/*invalidate ^# meta comments*/
187 		goto loop;
188 
189 	case REGOP:		/* % , could be used as modulo, or register*/
190 		ch = getchar();
191 		if (INCHARSET(ch, DIGIT)){
192 			ryylval = ch-'0';
193 			if (ch=='1') {
194 				if (INCHARSET( (ch = getchar()), REGDIGIT))
195 					ryylval = 10+ch-'0';
196 				else
197 					ungetc(ch);
198 			}
199 			/*
200 			 *	God only knows what the original author
201 			 *	wanted this undocumented feature to
202 			 *	do.
203 			 *		%5++ is really  r7
204 			 */
205 			while(INCHARSET( (ch = getchar()), SIGN)) {
206 				if (ch=='+')
207 					ryylval++;
208 				else
209 					ryylval--;
210 			}
211 			ungetc(ch);
212 			val = REG;
213 		} else {
214 			ungetc(ch);
215 			val = REGOP;
216 		}
217 		goto ret;
218 
219 	case ALPH:
220 		ch1 = ch;
221 		if (INCHARSET(ch, SZSPECBEGIN)){
222 			if( (ch = getchar()) == '`' || ch == '^'){
223 				ch1 |= 0100;	/*convert to lower*/
224 				switch(ch1){
225 				case 'b':	ryylval = 1;	break;
226 				case 'w':	ryylval = 2;	break;
227 				case 'l':	ryylval = 4;	break;
228 				default:	ryylval = d124;	break;
229 				}
230 				val = SIZESPEC;
231 				goto ret;
232 			} else {
233 				ungetc(ch);
234 				ch = ch1;	/*restore first character*/
235 			}
236 		}
237 		rcp = yytext;
238 		do {
239 			if (rcp < &yytext[NCPName])
240 				*rcp++ = ch;
241 		} while (INCHARSET ( (ch = getchar()), ALPHA | DIGIT));
242 		*rcp = '\0';
243 		while (INCHARSET(ch, SPACE))
244 			ch = getchar();
245 		ungetc(ch);
246 
247 		switch((op = *lookup(1))->s_tag){
248 		case 0:
249 		case LABELID:
250 			/*
251 			 *	Its a name... (Labels are subsets of name)
252 			 */
253 			ryylval = (int)op;
254 			val = NAME;
255 			break;
256 		case INST0:
257 		case INSTn:
258 		case IJXXX:
259 			opstruct.Op_popcode = ( (struct instab *)op)->i_popcode;
260 			opstruct.Op_eopcode = ( (struct instab *)op)->i_eopcode;
261 			val = op->s_tag;
262 			break;
263 		default:
264 			ryylval = ( (struct instab *)op)->i_popcode;
265 			val = op->s_tag;
266 			break;
267 		}
268 		goto ret;
269 
270 	case DIG:
271 		/*
272 		 *	restore local inbufptr and inbufcnt
273 		 */
274 		REGTOMEMBUF;
275 		val = number(ch);
276 		MEMTOREGBUF;
277 		/*
278 		 *	yylval or yybignum has been stuffed as a side
279 		 *	effect to number(); get the global yylval
280 		 *	into our fast local copy in case it was an INT.
281 		 */
282 		ryylval = yylval;
283 		goto ret;
284 
285 	case LSH:
286 	case RSH:
287 		/*
288 		 *	We allow the C style operators
289 		 *	<< and >>, as well as < and >
290 		 */
291 		if ( (ch1 = getchar()) != ch)
292 			ungetc(ch1);
293 		val = ryylval;
294 		goto ret;
295 
296 	case MINUS:
297 		if ( (ch = getchar()) =='(')
298 			ryylval=val=MP;
299 		else {
300 			ungetc(ch);
301 			val=MINUS;
302 		}
303 		goto ret;
304 
305 	case SQ:
306 		if ((ryylval = getchar()) == '\n')
307 			scanlineno++;		/*not entirely correct*/
308 		val = INT;
309 		goto ret;
310 
311 	case DQ:
312 	   eatstr:
313 		linescrossed = 0;
314 		for (strlg = 0; /*VOID*/; strlg++){
315 		    switch(ch = getchar()){
316 		    case '"':
317 			goto tailDQ;
318 		    default:
319 		    stuff:
320 			putc(ch, strfile);
321 			break;
322 		    case '\n':
323 			yywarning("New line in a string constant");
324 			scanlineno++;
325 			linescrossed++;
326 			ch = getchar();
327 			switch(ch){
328 			case EOFCHAR:
329 				putc('\n', strfile);
330 				ungetc(EOFCHAR);
331 				goto tailDQ;
332 			default:
333 				ungetc(ch);
334 				ch = '\n';
335 				goto stuff;
336 			}
337 			break;
338 
339 		    case '\\':
340 			ch = getchar();		/*skip the '\\'*/
341 			if ( INCHARSET(ch, BSESCAPE)){
342 				switch (ch){
343 				  case 'b':  ch = '\b'; goto stuff;
344 				  case 'f':  ch = '\f'; goto stuff;
345 				  case 'n':  ch = '\n'; goto stuff;
346 				  case 'r':  ch = '\r'; goto stuff;
347 				  case 't':  ch = '\t'; goto stuff;
348 				}
349 			}
350 			if ( !(INCHARSET(ch, OCTDIGIT)) )
351 				goto stuff;
352 			i = 0;
353 			intval = 0;
354 			while ( (i < 3) && (INCHARSET(ch, OCTDIGIT))){
355 				i++;
356 				intval <<= 3;
357 				intval += ch - '0';
358 				ch = getchar();
359 			}
360 			ungetc(ch);
361 			ch = (char)intval;
362 			goto stuff;
363 		    }
364 		}
365 	tailDQ: ;
366 		/*
367 		 *	account for any lines that were crossed
368 		 */
369 		if (linescrossed){
370 			ptoken(bufptr, ILINESKIP);
371 			pint(bufptr, linescrossed);
372 		}
373 		/*
374 		 *	Cheat: append a trailing null to the string
375 		 *	and then adjust the string length to ignore
376 		 *	the trailing null.  If any STRING client requires
377 		 *	the trailing null, the client can just change STRLEN
378 		 */
379 		putc(0, strfile);
380 		ryylval = (int)savestr((char *)0, strlg + 1, STR_FILE);
381 		val = STRING;
382 		((struct strdesc *)ryylval)->sd_strlen -= 1;
383 		goto ret;
384 
385 	case BADCHAR:
386 		linescrossed = lineno;
387 		lineno = scanlineno;
388 		yyerror("Illegal character mapped: %d, char read:(octal) %o",
389 			ryylval, ch);
390 		lineno = linescrossed;
391 		val = BADCHAR;
392 		goto ret;
393 
394 	default:
395 		val = ryylval;
396 		goto ret;
397 	}	/*end of the switch*/
398 	/*
399 	 *	here with one token, so stuff it
400 	 */
401    ret:
402 	oval = val;
403 	ptoken(bufptr, val);
404 	switch(val){
405 		case	ILINESKIP:
406 				pint(bufptr, ryylval);
407 				break;
408 		case	SIZESPEC:
409 				pchar(bufptr, ryylval);
410 				break;
411 		case	BFINT:	plong(bufptr, ryylval);
412 				break;
413 		case	INT:	plong(bufptr, ryylval);
414 				break;
415 		case 	BIGNUM:	pnumber(bufptr, yybignum);
416 				break;
417 		case	STRING:	pptr(bufptr, (int)(char *)ryylval);
418 				break;
419 		case	NAME:	pptr(bufptr, (int)(struct symtab *)ryylval);
420 				break;
421 		case	REG:	pchar(bufptr, ryylval);
422 				break;
423 		case	INST0:
424 		case	INSTn:
425 				popcode(bufptr, opstruct);
426 				break;
427 		case 	IJXXX:
428 				popcode(bufptr, opstruct);
429 				pptr(bufptr, (int)(struct symtab *)symalloc());
430 				break;
431 		case	ISTAB:
432 		case	ISTABSTR:
433 		case	ISTABNONE:
434 		case	ISTABDOT:
435 		case	IALIGN:
436 				pptr(bufptr, (int)(struct symtab *)symalloc());
437 				break;
438 	/*
439 	 *	default:
440 	 */
441 	 }
442 	 builtval: ;
443    }			/*end of the while to stuff the buffer*/
444    done:
445 	bufferbox->tok_count = (bytetoktype *)bufptr - &(bufferbox->toks[0]);
446 	/*
447 	 *	This is a real kludge:
448 	 *
449 	 *	We put the last token in the buffer to be  a MINUS
450 	 *	symbol.  This last token will never be picked up
451 	 *	in the normal way, but can be looked at during
452 	 *	a peekahead look that the short circuit expression
453 	 *	evaluator uses to see if an expression is complicated.
454 	 *
455 	 *	Consider the following situation:
456 	 *
457 	 *	.word	45		+	47
458 	 *        buffer 1      |  buffer 0
459 	 *	the peekahead would want to look across the buffer,
460 	 *	but will look in the buffer end zone, see the minus, and
461 	 *	fail.
462 	 */
463 	ptoken(bufptr, MINUS);
464 	REGTOMEMBUF;
465 }
466