/*
 *	Copyright (c) 1982 Regents of the University of California
 */
#ifndef lint
static char sccsid[] = "@(#)asscan2.c 4.4 04/16/82";
#endif not lint

#include "asscanl.h"
static	inttoktype	oval = NL;

#define	NINBUFFERS	2
#define	INBUFLG		NINBUFFERS*BUFSIZ + 2
	/*
	 *	We have two input buffers; the first one is reserved
	 *	for catching the tail of a line split across a buffer
	 *	boundary; the other one are used for snarfing a buffer
	 *	worth of .s source.
	 */
static	char	inbuffer[INBUFLG];
static	char	*InBufPtr = 0;

/*
 *	fill the inbuffer from the standard input.
 *	Assert: there are always n COMPLETE! lines in the buffer area.
 *	Assert: there is always a \n terminating the last line
 *		in the buffer area.
 *	Assert: after the \n, there is an EOFCHAR (hard end of file)
 *		or a NEEDCHAR (end of buffer)
 *	Assert:	fgets always null pads the string it reads.
 *	Assert:	no ungetc's are done at the end of a line or at the
 *		beginning of a line.
 *	
 *	We read a complete buffer of characters in one single read.
 *	We then back scan within this buffer to find the end of the
 *	last complete line, and force the assertions, and save a pointer
 *	to the incomplete line.
 *	The next call to fillinbuffer will move the unread characters
 *	to the end of the first buffer, and then read another two buffers,
 *	completing the cycle.
 */

static	char	p_swapped = '\0';			
static	char	*p_start = &inbuffer[NINBUFFERS * BUFSIZ];
static	char	*p_stop = &inbuffer[NINBUFFERS * BUFSIZ];

char *fillinbuffer()
{
	register	char	*to;
	register	char	*from;
			char	*inbufptr;
	int		nread;
	static		int	hadeof;
	int		goal;
	int		got;

	*p_start = p_swapped;
	inbufptr = &inbuffer[1*BUFSIZ] - (p_stop - p_start);

	for (to = inbufptr, from = p_start; from < p_stop;)
		*to++ = *from++;
	/*
	 *	Now, go read two full buffers (hopefully)
	 */
	if (hadeof){
		hadeof = 0;
		return (0);
	}
	goal = (NINBUFFERS - 1)*BUFSIZ;
	nread = 0;
	do {
		got = read(stdin->_file, &inbuffer[1*BUFSIZ + nread], goal);
		if (got == 0)
			hadeof = 1;
		if (got <= 0)
			break;
		nread += got;
		goal -= got;
	} while (goal);

	if (nread == 0)
		return(0);
	p_stop = from = &inbuffer[1*BUFSIZ + nread];
	*from = '\0';

	while (*--from != '\n'){
		/*
		 *	back over the partial line
		 */
		if (from == &inbuffer[1*BUFSIZ]) {
			from = p_stop;
			*p_stop++ = '\n';
			break;
		} else {
			continue;
		}
	}

	from++;				/* first char of partial line */
	p_start = from;
	p_swapped = *p_start;
	*p_start = NEEDCHAR;		/* force assertion */
	return(inbufptr);
}

scan_dot_s(bufferbox)
	struct tokbufdesc *bufferbox;
{
	reg	int	ryylval;	/* local copy of lexical value */
	extern	int	yylval;		/* global copy of lexical value */
	reg	int	val;		/* the value returned */
		int	i;		/* simple counter */
	reg	char	*rcp;	
		char	*cp;		/* can have address taken */
	reg	int	ch;		/* treated as a character */
		int	ch1;		/* shadow value */
	reg	char	*inbufptr;
		struct 	symtab	*op;

	reg	ptrall	bufptr;		/* where to stuff tokens */
		ptrall	lgbackpatch;	/* where to stuff a string length */
		ptrall	bufub;		/* where not to stuff tokens */
		int	maxstrlg;	/* how long a string can be */
		long	intval;		/* value of int */
		int	linescrossed;	/* when doing strings and comments */
		struct	Opcode		opstruct;

	(bytetoktype *)bufptr = (bytetoktype *) & (bufferbox->toks[0]);	
	(bytetoktype *)bufub = &(bufferbox->toks[AVAILTOKS]);

	inbufptr = InBufPtr;
	if (inbufptr == 0){
		inbufptr = fillinbuffer();
		if (inbufptr == 0){	/*end of file*/
   		  endoffile:
			inbufptr = 0;
			ptoken(bufptr, PARSEEOF);
			goto done;
		}
	}

	if (newfflag){
		ptoken(bufptr, IFILE);
		ptoken(bufptr, STRING);
		val = strlen(newfname) + 1;
		movestr( (char *)&( ( (lgtype *)bufptr)[1]), newfname, val);
		bstrlg(bufptr, val);

		ptoken(bufptr, ILINENO);
		ptoken(bufptr, INT);
		pint(bufptr,  1);
		newfflag = 0;
	}

	while (bufptr < bufub){
   loop:
        switch(ryylval = (type+2)[ch = getchar()]) {
	case SCANEOF:
		inbufptr = 0;
		goto endoffile;

	case NEEDSBUF:
		inbufptr = fillinbuffer();
		if (inbufptr == 0)
			goto endoffile;
		goto loop;

	case DIV:		/*process C style comments*/
		if ( (ch = getchar()) == '*') {  /*comment prelude*/
			int	incomment;
			linescrossed = 0;
			incomment = 1;
			ch = getchar();	/*skip over the * */
			while(incomment){
				switch(ch){
				case '*':
					ch = getchar();
					incomment = (ch != '/');
					break;
				case '\n':
					scanlineno++;
					linescrossed++;
					ch = getchar();
					break;
				case EOFCHAR:
					goto endoffile;
				case NEEDCHAR:
					inbufptr = fillinbuffer();
					if (inbufptr == 0)
						goto endoffile;
					lineno++;
					ch = getchar();
					break;
				default:
					ch = getchar();
					break;
				}
			}
			val = ILINESKIP;
			ryylval = linescrossed;
			goto ret;
		} else {	/*just an ordinary DIV*/
			ungetc(ch);
			val = ryylval = DIV;
			goto ret;
		}
	case SH:
		if (oval == NL){
			/*
			 *	Attempt to recognize a C preprocessor
			 *	style comment '^#[ \t]*[0-9]*[ \t]*".*"
			 */
			ch = getchar();	/*bump the #*/
			while (INCHARSET(ch, SPACE))
				ch = getchar();/*bump white */
			if (INCHARSET(ch, DIGIT)){
				intval = 0;
				while(INCHARSET(ch, DIGIT)){
					intval = intval*10 + ch - '0';
					ch = getchar();
				}
				while (INCHARSET(ch, SPACE))
					ch = getchar();
				if (ch == '"'){
					ptoken(bufptr, ILINENO);
					ptoken(bufptr, INT);
					pint(bufptr, intval - 1);
					ptoken(bufptr, IFILE);
					/*
					 *	The '"' has already been
					 *	munched
					 *	
					 *	eatstr will not eat
					 *	the trailing \n, so
					 *	it is given to the parser
					 *	and counted.
					 */
					goto eatstr;
				}
			}
		}
		/*
		 *	Well, its just an ordinary decadent comment
		 */
		while ((ch != '\n') && (ch != EOFCHAR)) 
			ch = getchar();
		if (ch == EOFCHAR)
			goto endoffile;
		val = ryylval = oval = NL;
		scanlineno++;
		goto ret;

	case NL:
		scanlineno++;
		val = ryylval;
		goto ret;

	case SP:
		oval = SP;	/*invalidate ^# meta comments*/
		goto loop;

	case REGOP:		/* % , could be used as modulo, or register*/
		ch = getchar();
		if (INCHARSET(ch, DIGIT)){
			ryylval = ch-'0';
			if (ch=='1') {
				if (INCHARSET( (ch = getchar()), REGDIGIT))
					ryylval = 10+ch-'0';
				else
					ungetc(ch);
			}
			/*
			 *	God only knows what the original author
			 *	wanted this undocumented feature to
			 *	do.
			 *		%5++ is really  r7
			 */
			while(INCHARSET( (ch = getchar()), SIGN)) {
				if (ch=='+')
					ryylval++;
				else
					ryylval--;
			}
			ungetc(ch);
			val = REG;
		} else {
			ungetc(ch);
			val = REGOP;
		}
		goto ret;

	case ALPH:
		ch1 = ch;
		if (INCHARSET(ch, SZSPECBEGIN)){
			if( (ch = getchar()) == '`' || ch == '^'){
				ch1 |= 0100;	/*convert to lower*/
				switch(ch1){
				case 'b':	ryylval = 1;	break;
				case 'w':	ryylval = 2;	break;
				case 'l':	ryylval = 4;	break;
				default:	ryylval = d124;	break;
				}
				val = SIZESPEC;
				goto ret;
			} else {
				ungetc(ch);
				ch = ch1;	/*restore first character*/
			}
		}
		rcp = yytext;
		do {
			if (rcp < &yytext[NCPS])
				*rcp++ = ch;
		} while (INCHARSET ( (ch = getchar()), ALPHA | DIGIT));
		*rcp = '\0';
		while (INCHARSET(ch, SPACE))
			ch = getchar();
		ungetc(ch);
	
		switch((op = *lookup(1))->s_tag){
		case 0:
		case LABELID:
			/*
			 *	Its a name... (Labels are subsets ofname)
			 */
			ryylval = (int)op;
			val = NAME;
			break;
		case INST0:
		case INSTn:
		case IJXXX:
			opstruct.Op_popcode = ( (struct instab *)op)->i_popcode;
			opstruct.Op_eopcode = ( (struct instab *)op)->i_eopcode;
			val = op->s_tag;
			break;
		default:
			ryylval = ( (struct instab *)op)->i_popcode;
			val = op->s_tag;
			break;
		}
		goto ret;

	case DIG:
		/*
		 *	Implement call by reference on a reg variable
		 */
		cp = inbufptr;
		val = number(ch, &cp);
		/*
		 *	yylval or yybignum has been stuffed as a side
		 *	effect to number(); get the global yylval
		 *	into our fast local copy in case it was an INT.
		 */
		ryylval = yylval;
		inbufptr = cp;
		goto ret;

	case LSH:
	case RSH:
		/*
		 *	We allow the C style operators
		 *	<< and >>, as well as < and >
		 */
		if ( (ch1 = getchar()) != ch)
			ungetc(ch1);
		val = ryylval;
		goto ret;

	case MINUS:
		if ( (ch = getchar()) =='(')
			ryylval=val=MP;
		else {
			ungetc(ch);
			val=MINUS;
		}
		goto ret;

	case SQ:
		if ((ryylval = getchar()) == '\n')
			scanlineno++;		/*not entirely correct*/
		val = INT;
		goto ret;

	case DQ:
	   eatstr:
		linescrossed = 0;
		maxstrlg = (char *)bufub - (char *)bufptr;

		if (maxstrlg < MAXSTRLG) {
			ungetc('"');
			*(bytetoktype *)bufptr = VOID ;
			bufub = bufptr;
			goto done;
		}
		if (maxstrlg > MAXSTRLG)
			maxstrlg = MAXSTRLG;
		
		ptoken(bufptr, STRING);
		lgbackpatch = bufptr;	/*this is where the size goes*/
		bufptr += sizeof(lgtype);
		/*
		 *	bufptr is now set to
		 *	be stuffed with characters from
		 *	the input
		 */

		while (   (maxstrlg > 0)
		       && !(INCHARSET( (ch = getchar()), STRESCAPE))
		      ){
			stuff:
				maxstrlg -= 1;
				pchar(bufptr, ch);
			}
		if (maxstrlg <= 0){	/*enough characters to fill a string buffer*/
			ungetc('"');		/*will read it next*/
		}
		else if (ch == '"')
			/*VOID*/ ;		/*done*/
		else if (ch == '\n'){
			yywarning("New line embedded in a string constant.");
			scanlineno++;
			linescrossed++;
			ch = getchar();
			if (ch == EOFCHAR){
			  do_eof:
				pchar(bufptr, '\n');
				ungetc(EOFCHAR);
			} else
			if (ch == NEEDCHAR){
				if ( (inbufptr = fillinbuffer()) == 0)
					goto do_eof;
				ch = '\n';
				goto stuff;
			} else {	/* simple case */
				ungetc(ch);
				ch = '\n';
				goto stuff;
			}
		} else {
			ch = getchar();		/*skip the '\\'*/
			if ( INCHARSET(ch, BSESCAPE)){
				switch (ch){
				  case 'b':  ch = '\b'; goto stuff;
				  case 'f':  ch = '\f'; goto stuff;
				  case 'n':  ch = '\n'; goto stuff;
				  case 'r':  ch = '\r'; goto stuff;
				  case 't':  ch = '\t'; goto stuff;
				}
			}
			if ( !(INCHARSET(ch,OCTDIGIT)) )  goto stuff;
			i = 0;
			intval = 0;
			while ( (i < 3) && (INCHARSET(ch, OCTDIGIT))){
				i++;intval <<= 3;intval += ch - '0';
				ch = getchar();
			}
			ungetc(ch);
			ch = (char)intval;
			goto stuff;
		}
		/*
		 *	bufptr now points at the next free slot
		 */
		bstrfromto(lgbackpatch, bufptr);
		if (linescrossed){
			val = ILINESKIP;
			ryylval = linescrossed;
			goto ret;
		} else
			goto builtval;

	case BADCHAR:
		linescrossed = lineno;
		lineno = scanlineno;
		yyerror("Illegal character mapped: %d, char read:(octal) %o",
			ryylval, ch);
		lineno = linescrossed;
		val = BADCHAR;
		goto ret;

	default:
		val = ryylval;
		goto ret;
	}	/*end of the switch*/
	/*
	 *	here with one token, so stuff it
	 */
   ret:	
	oval = val;
	ptoken(bufptr, val);
	switch(val){
		case	ILINESKIP:
				pint(bufptr, ryylval);
				break;
		case	SIZESPEC:
				pchar(bufptr, ryylval);
				break;
		case	BFINT:	plong(bufptr, ryylval);
				break;
		case	INT:	plong(bufptr, ryylval);
				break;
		case 	BIGNUM:	pnumber(bufptr, yybignum);
				break;
		case	NAME:	pptr(bufptr, (int)(struct symtab *)ryylval);
				break;
		case	REG:	pchar(bufptr, ryylval);
				break;	
		case	INST0:
		case	INSTn:
				popcode(bufptr, opstruct);
				break;
		case 	IJXXX:
				popcode(bufptr, opstruct);
				pptr(bufptr, (int)(struct symtab *)symalloc());
				break;
		case	ISTAB:
		case	ISTABSTR:
		case	ISTABNONE:
		case	ISTABDOT:
		case	IALIGN:
				pptr(bufptr, (int)(struct symtab *)symalloc());
				break;
	/*
	 *	default:
	 */
	 }
	 builtval: ;
   }			/*end of the while to stuff the buffer*/
   done:
	bufferbox->tok_count = (bytetoktype *)bufptr - &(bufferbox->toks[0]);

	/*
	 *	This is a real kludge:
	 *
	 *	We put the last token in the buffer to be  a MINUS
	 *	symbol.  This last token will never be picked up
	 *	in the normal way, but can be looked at during
	 *	a peekahead look that the short circuit expression
	 *	evaluator uses to see if an expression is complicated.
	 *
	 *	Consider the following situation:
	 *
	 *	.word	45		+	47
	 *        buffer 1      |  buffer 0
	 *	the peekahead would want to look across the buffer,
	 *	but will look in the buffer end zone, see the minus, and
	 *	fail.
	 */
	ptoken(bufptr, MINUS);
	InBufPtr = inbufptr;		/*copy this back*/
}