xref: /original-bsd/old/as.tahoe/asscan2.c (revision 77936e01)
1 /*
2  *	Copyright (c) 1982 Regents of the University of California
3  */
4 #ifndef lint
5 static char sccsid[] = "@(#)asscan2.c 4.14 7/6/83";
6 #endif not lint
7 
8 #include "asscanl.h"
9 
10 static	inttoktype	oval = NL;
11 #define	ASINBUFSIZ	4096
12 char	inbufunget[8];
13 char	inbuffer[ASINBUFSIZ];
14 char	*Ginbufptr = inbuffer;
15 int	Ginbufcnt = 0;
16 int	scannerhadeof;
17 
18 fillinbuffer()
19 {
20 		int	nread;
21 		int	goal;
22 		int	got;
23 
24 	nread = 0;
25 	if (scannerhadeof == 0){
26 		goal = sizeof(inbuffer);
27 		do {
28 			got = read(stdin->_file, inbuffer + nread, goal);
29 			if (got == 0)
30 				scannerhadeof = 1;
31 			if (got <= 0)
32 				break;
33 			nread += got;
34 			goal -= got;
35 		} while (goal);
36 	} else {
37 		scannerhadeof = 0;
38 	}
39 	/*
40 	 *	getchar assumes that Ginbufcnt and Ginbufptr
41 	 *	are adjusted as if one character has been removed
42 	 *	from the input.
43 	 */
44 	if (nread == 0){
45 		inbuffer[0] = EOFCHAR;
46 		nread = 1;
47 	}
48 	Ginbufcnt = nread - 1;
49 	Ginbufptr = inbuffer + 1;
50 }
51 
52 scan_dot_s(bufferbox)
53 	struct tokbufdesc *bufferbox;
54 {
55 	reg	char	*inbufptr;
56 	reg	int	inbufcnt;
57 	reg	int	ryylval;	/* local copy of lexical value */
58 	extern	int	yylval;		/* global copy of lexical value */
59 	reg	int	val;		/* the value returned */
60 		int	i;		/* simple counter */
61 	reg	char	*rcp;
62 		int	ch;		/* treated as a character */
63 		int	ch1;		/* shadow value */
64 		struct 	symtab	*op;
65 	reg	ptrall	bufptr;		/* where to stuff tokens */
66 		ptrall	bufub;		/* where not to stuff tokens */
67 		long	intval;		/* value of int */
68 		int	linescrossed;	/* when doing strings and comments */
69 		u_char	opstruct;
70 	reg	int	strlg;		/* the length of a string */
71 
72 	(bytetoktype *)bufptr = (bytetoktype *) & (bufferbox->toks[0]);
73 	(bytetoktype *)bufub = &(bufferbox->toks[AVAILTOKS]);
74 
75 	MEMTOREGBUF;
76 	if (newfflag){
77 		newfflag = 0;
78 		ryylval = (int)savestr(newfname, strlen(newfname)+1, STR_BOTH);
79 
80 		ptoken(bufptr, IFILE);
81 		ptoken(bufptr, STRING);
82 		pptr(bufptr, ryylval);
83 
84 		ptoken(bufptr, ILINENO);
85 		ptoken(bufptr, INT);
86 		pint(bufptr,  1);
87 	}
88 
89 	while (bufptr < bufub){
90    loop:
91         switch(ryylval = (type+1)[ch = getchar()]) {
92 	case SCANEOF:
93 	endoffile: ;
94 		inbufptr = 0;
95 		ptoken(bufptr, PARSEEOF);
96 		goto done;
97 
98 	case DIV:		/*process C style comments*/
99 		if ( (ch = getchar()) == '*') {  /*comment prelude*/
100 			int	incomment;
101 			linescrossed = 0;
102 			incomment = 1;
103 			ch = getchar();	/*skip over the * */
104 			while(incomment){
105 				switch(ch){
106 				case '*':
107 					ch = getchar();
108 					incomment = (ch != '/');
109 					break;
110 				case '\n':
111 					scanlineno++;
112 					linescrossed++;
113 					ch = getchar();
114 					break;
115 				case EOFCHAR:
116 					goto endoffile;
117 				default:
118 					ch = getchar();
119 					break;
120 				}
121 			}
122 			val = ILINESKIP;
123 			ryylval = linescrossed;
124 			goto ret;
125 		} else {	/*just an ordinary DIV*/
126 			ungetc(ch);
127 			val = ryylval = DIV;
128 			goto ret;
129 		}
130 	case SH:
131 		if (oval == NL){
132 			/*
133 			 *	Attempt to recognize a C preprocessor
134 			 *	style comment '^#[ \t]*[0-9]*[ \t]*".*"
135 			 */
136 			ch = getchar();	/*bump the #*/
137 			while (INCHARSET(ch, SPACE))
138 				ch = getchar();/*bump white */
139 			if (INCHARSET(ch, DIGIT)){
140 				intval = 0;
141 				while(INCHARSET(ch, DIGIT)){
142 					intval = intval*10 + ch - '0';
143 					ch = getchar();
144 				}
145 				while (INCHARSET(ch, SPACE))
146 					ch = getchar();
147 				if (ch == '"' || ch == '\n'){
148 					ptoken(bufptr, ILINENO);
149 					ptoken(bufptr, INT);
150 					pint(bufptr, intval - 1);
151 					if (ch == '"')
152 					{
153 						ptoken(bufptr, IFILE);
154 					/*
155 					 *	The '"' has already been
156 					 *	munched
157 					 *
158 					 *	eatstr will not eat
159 					 *	the trailing \n, so
160 					 *	it is given to the parser
161 					 *	and counted.
162 					 */
163 					goto eatstr;
164 					}
165 				}
166 			}
167 		}
168 		/*
169 		 *	Well, its just an ordinary decadent comment
170 		 */
171 		while ((ch != '\n') && (ch != EOFCHAR))
172 			ch = getchar();
173 		if (ch == EOFCHAR)
174 			goto endoffile;
175 		val = ryylval = oval = NL;
176 		scanlineno++;
177 		goto ret;
178 
179 	case NL:
180 		scanlineno++;
181 		val = ryylval;
182 		goto ret;
183 
184 	case SP:
185 		oval = SP;	/*invalidate ^# meta comments*/
186 		goto loop;
187 
188 	case REGOP:		/* % , could be used as modulo, or register*/
189 		ch = getchar();
190 		if (INCHARSET(ch, DIGIT)){
191 			ryylval = ch-'0';
192 			if (ch=='1') {
193 				if (INCHARSET( (ch = getchar()), REGDIGIT))
194 					ryylval = 10+ch-'0';
195 				else
196 					ungetc(ch);
197 			}
198 			/*
199 			 *	God only knows what the original author
200 			 *	wanted this undocumented feature to
201 			 *	do.
202 			 *		%5++ is really  r7
203 			 */
204 			while(INCHARSET( (ch = getchar()), SIGN)) {
205 				if (ch=='+')
206 					ryylval++;
207 				else
208 					ryylval--;
209 			}
210 			ungetc(ch);
211 			val = REG;
212 		} else {
213 			ungetc(ch);
214 			val = REGOP;
215 		}
216 		goto ret;
217 
218 	case ALPH:
219 		ch1 = ch;
220 		if (INCHARSET(ch, SZSPECBEGIN)){
221 			if( (ch = getchar()) == '`' || ch == '^'){
222 				ch1 |= 0100;	/*convert to lower*/
223 				switch(ch1){
224 				case 'b':	ryylval = 1;	break;
225 				case 'w':	ryylval = 2;	break;
226 				case 'l':	ryylval = 4;	break;
227 				default:	ryylval = d124;	break;
228 				}
229 				val = SIZESPEC;
230 				goto ret;
231 			} else {
232 				ungetc(ch);
233 				ch = ch1;	/*restore first character*/
234 			}
235 		}
236 		rcp = yytext;
237 		do {
238 			if (rcp < &yytext[NCPName])
239 				*rcp++ = ch;
240 		} while (INCHARSET ( (ch = getchar()), ALPHA | DIGIT));
241 		*rcp = '\0';
242 		while (INCHARSET(ch, SPACE))
243 			ch = getchar();
244 		ungetc(ch);
245 
246 		switch((op = *lookup(1))->s_tag){
247 		case 0:
248 		case LABELID:
249 			/*
250 			 *	Its a name... (Labels are subsets of name)
251 			 */
252 			ryylval = (int)op;
253 			val = NAME;
254 			break;
255 		case INST0:
256 		case INSTn:
257 		case IJXXX:
258 			opstruct = ( (struct instab *)op)->i_opcode;
259 			val = op->s_tag;
260 			break;
261 		default:
262 			ryylval = ( (struct instab *)op)->i_opcode;
263 			val = op->s_tag;
264 			break;
265 		}
266 		goto ret;
267 
268 	case DIG:
269 		/*
270 		 *	restore local inbufptr and inbufcnt
271 		 */
272 		REGTOMEMBUF;
273 		val = number(ch);
274 		MEMTOREGBUF;
275 		/*
276 		 *	yylval or yybignum has been stuffed as a side
277 		 *	effect to number(); get the global yylval
278 		 *	into our fast local copy in case it was an INT.
279 		 */
280 		ryylval = yylval;
281 		goto ret;
282 
283 	case LSH:
284 	case RSH:
285 		/*
286 		 *	We allow the C style operators
287 		 *	<< and >>, as well as < and >
288 		 */
289 		if ( (ch1 = getchar()) != ch)
290 			ungetc(ch1);
291 		val = ryylval;
292 		goto ret;
293 
294 	case MINUS:
295 		if ( (ch = getchar()) =='(')
296 			ryylval=val=MP;
297 		else {
298 			ungetc(ch);
299 			val=MINUS;
300 		}
301 		goto ret;
302 
303 	case SQ:
304 		if ((ryylval = getchar()) == '\n')
305 			scanlineno++;		/*not entirely correct*/
306 		val = INT;
307 		goto ret;
308 
309 	case DQ:
310 	   eatstr:
311 		linescrossed = 0;
312 		for (strlg = 0; /*VOID*/; strlg++){
313 		    switch(ch = getchar()){
314 		    case '"':
315 			goto tailDQ;
316 		    default:
317 		    stuff:
318 			putc(ch, strfile);
319 			break;
320 		    case '\n':
321 			yywarning("New line in a string constant");
322 			scanlineno++;
323 			linescrossed++;
324 			ch = getchar();
325 			switch(ch){
326 			case EOFCHAR:
327 				putc('\n', strfile);
328 				ungetc(EOFCHAR);
329 				goto tailDQ;
330 			default:
331 				ungetc(ch);
332 				ch = '\n';
333 				goto stuff;
334 			}
335 			break;
336 
337 		    case '\\':
338 			ch = getchar();		/*skip the '\\'*/
339 			if ( INCHARSET(ch, BSESCAPE)){
340 				switch (ch){
341 				  case 'b':  ch = '\b'; goto stuff;
342 				  case 'f':  ch = '\f'; goto stuff;
343 				  case 'n':  ch = '\n'; goto stuff;
344 				  case 'r':  ch = '\r'; goto stuff;
345 				  case 't':  ch = '\t'; goto stuff;
346 				}
347 			}
348 			if ( !(INCHARSET(ch, OCTDIGIT)) )
349 				goto stuff;
350 			i = 0;
351 			intval = 0;
352 			while ( (i < 3) && (INCHARSET(ch, OCTDIGIT))){
353 				i++;
354 				intval <<= 3;
355 				intval += ch - '0';
356 				ch = getchar();
357 			}
358 			ungetc(ch);
359 			ch = (char)intval;
360 			goto stuff;
361 		    }
362 		}
363 	tailDQ: ;
364 		/*
365 		 *	account for any lines that were crossed
366 		 */
367 		if (linescrossed){
368 			ptoken(bufptr, ILINESKIP);
369 			pint(bufptr, linescrossed);
370 		}
371 		/*
372 		 *	Cheat: append a trailing null to the string
373 		 *	and then adjust the string length to ignore
374 		 *	the trailing null.  If any STRING client requires
375 		 *	the trailing null, the client can just change STRLEN
376 		 */
377 		putc(0, strfile);
378 		ryylval = (int)savestr((char *)0, strlg + 1, STR_FILE);
379 		val = STRING;
380 		((struct strdesc *)ryylval)->sd_strlen -= 1;
381 		goto ret;
382 
383 	case BADCHAR:
384 		linescrossed = lineno;
385 		lineno = scanlineno;
386 		yyerror("Illegal character mapped: %d, char read:(octal) %o",
387 			ryylval, ch);
388 		lineno = linescrossed;
389 		val = BADCHAR;
390 		goto ret;
391 
392 	default:
393 		val = ryylval;
394 		goto ret;
395 	}	/*end of the switch*/
396 	/*
397 	 *	here with one token, so stuff it
398 	 */
399    ret:
400 	oval = val;
401 	ptoken(bufptr, val);
402 	switch(val){
403 		case	ILINESKIP:
404 				pint(bufptr, ryylval);
405 				break;
406 		case	SIZESPEC:
407 				pchar(bufptr, ryylval);
408 				break;
409 		case	BFINT:	plong(bufptr, ryylval);
410 				break;
411 		case	INT:	plong(bufptr, ryylval);
412 				break;
413 		case 	BIGNUM:	pnumber(bufptr, yybignum);
414 				break;
415 		case	STRING:	pptr(bufptr, (int)(char *)ryylval);
416 				break;
417 		case	NAME:	pptr(bufptr, (int)(struct symtab *)ryylval);
418 				break;
419 		case	REG:	pchar(bufptr, ryylval);
420 				break;
421 		case	INST0:
422 		case	INSTn:
423 				popcode(bufptr, opstruct);
424 				break;
425 		case 	IJXXX:
426 				popcode(bufptr, opstruct);
427 				pptr(bufptr, (int)(struct symtab *)symalloc());
428 				break;
429 		case	ISTAB:
430 		case	ISTABSTR:
431 		case	ISTABNONE:
432 		case	ISTABDOT:
433 		case	IALIGN:
434 				pptr(bufptr, (int)(struct symtab *)symalloc());
435 				break;
436 	/*
437 	 *	default:
438 	 */
439 	 }
440 	 builtval: ;
441    }			/*end of the while to stuff the buffer*/
442    done:
443 	bufferbox->tok_count = (bytetoktype *)bufptr - &(bufferbox->toks[0]);
444 	/*
445 	 *	This is a real kludge:
446 	 *
447 	 *	We put the last token in the buffer to be  a MINUS
448 	 *	symbol.  This last token will never be picked up
449 	 *	in the normal way, but can be looked at during
450 	 *	a peekahead look that the short circuit expression
451 	 *	evaluator uses to see if an expression is complicated.
452 	 *
453 	 *	Consider the following situation:
454 	 *
455 	 *	.word	45		+	47
456 	 *        buffer 1      |  buffer 0
457 	 *	the peekahead would want to look across the buffer,
458 	 *	but will look in the buffer end zone, see the minus, and
459 	 *	fail.
460 	 */
461 	ptoken(bufptr, MINUS);
462 	REGTOMEMBUF;
463 }
464