1 // -*- C++ -*-
2 //*************************************************************************
3 //
4 // Copyright 2000-2021 by Wilson Snyder.  This program is free software;
5 // you can redistribute it and/or modify it under the terms of either the GNU
6 // Lesser General Public License Version 3 or the Perl Artistic License Version 2.0.
7 //
8 // This program is distributed in the hope that it will be useful,
9 // but WITHOUT ANY WARRANTY; without even the implied warranty of
10 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 // GNU General Public License for more details.
12 //
13 //*************************************************************************
14 /// \file
15 /// \brief Verilog::Preproc: Internal implementation of default preprocessor
16 ///
17 /// Authors: Wilson Snyder
18 ///
19 /// Code available from: https://www.veripool.org/verilog-perl
20 ///
21 //*************************************************************************
22 
23 #include <cstdio>
24 #include <cstdlib>
25 #include <fstream>
26 #include <cstring>
27 #include <stack>
28 #include <vector>
29 #include <map>
30 #include <list>
31 #include <cassert>
32 #include <cerrno>
33 
34 #include <sys/types.h>
35 #include <sys/stat.h>
36 #include <fcntl.h>
37 
38 #if defined(_WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__)
39 # include <io.h>
40 #else
41 # include <unistd.h>
42 #endif
43 
44 #include "VPreProc.h"
45 #include "VPreLex.h"
46 
47 //#undef yyFlexLexer
48 //#define yyFlexLexer xxFlexLexer
49 //#include <FlexLexer.h>
50 
51 //*************************************************************************
52 
53 class VPreDefRef {
54     // One for each pending define substitution
55     string	m_name;		// Define last name being defined
56     string	m_params;	// Define parameter list for next expansion
57     string	m_nextarg;	// String being built for next argument
58     int		m_parenLevel;	// Parenthesis counting inside def args (for PARENT not child)
59 
60     vector<string> m_args;	// List of define arguments
61 public:
name() const62     string name() const { return m_name; }
params() const63     string params() const { return m_params; }
nextarg() const64     string nextarg() const { return m_nextarg; }
nextarg(const string & value)65     void nextarg(const string& value) { m_nextarg = value; }
parenLevel() const66     int parenLevel() const { return m_parenLevel; }
parenLevel(int value)67     void parenLevel(int value) { m_parenLevel = value; }
args()68     vector<string>& args() { return m_args; }
VPreDefRef(const string & name,const string & params)69     VPreDefRef(const string& name, const string& params)
70 	: m_name(name), m_params(params), m_parenLevel(0) {}
~VPreDefRef()71     ~VPreDefRef() {}
72 };
73 
74 //*************************************************************************
75 /// Data for parsing on/off
76 
77 class VPreIfEntry {
78     // One for each pending ifdef/ifndef
79     bool	m_on;		// Current parse for this ifdef level is "on"
80     bool	m_everOn;	// Some if term in elsif tree has been on
81 public:
on() const82     bool on() const { return m_on; }
everOn() const83     bool everOn() const { return m_everOn; }
VPreIfEntry(bool on,bool everOn)84     VPreIfEntry(bool on, bool everOn)
85 	: m_on(on), m_everOn(everOn || on) {}  // Note everOn includes new state
~VPreIfEntry()86     ~VPreIfEntry() {}
87 };
88 
89 //*************************************************************************
90 /// Data for a preprocessor instantiation.
91 
92 class VPreProcImp : public VPreProcOpaque {
93 public:
94     typedef list<string> StrList;
95 
96     VPreProc*	m_preprocp;	///< Object we're holding data for
97     int		m_debug;	///< Debugging level
98     VPreLex*	m_lexp;		///< Current lexer state (NULL = closed)
99 
100     enum ProcState { ps_TOP,
101 		     ps_DEFNAME_UNDEF, ps_DEFNAME_DEFINE,
102 		     ps_DEFNAME_IFDEF, ps_DEFNAME_IFNDEF, ps_DEFNAME_ELSIF,
103 		     ps_DEFFORM, ps_DEFVALUE, ps_DEFPAREN, ps_DEFARG,
104 		     ps_INCNAME, ps_ERRORNAME, ps_JOIN, ps_STRIFY };
procStateName(ProcState s)105     static const char* procStateName(ProcState s) {
106 	static const char* states[]
107 	    = {"ps_TOP",
108 	       "ps_DEFNAME_UNDEF", "ps_DEFNAME_DEFINE",
109 	       "ps_DEFNAME_IFDEF", "ps_DEFNAME_IFNDEF", "ps_DEFNAME_ELSIF",
110 	       "ps_DEFFORM", "ps_DEFVALUE", "ps_DEFPAREN", "ps_DEFARG",
111 	       "ps_INCNAME", "ps_ERRORNAME", "ps_JOIN", "ps_STRIFY" };
112 	return states[s];
113     };
114 
115     stack<ProcState>	m_states; ///< Current state of parser
116     int		m_off;		///< If non-zero, ifdef level is turned off, don't dump text
117     string	m_lastSym;	///< Last symbol name found.
118     string	m_formals;	///< Last formals found
119 
120     // For getRawToken/ `line insertion
121     string	m_lineCmt;	///< Line comment(s) to be returned
122     bool	m_lineCmtNl;	///< Newline needed before inserting lineCmt
123     int		m_lineAdd;	///< Empty lines to return to maintain line count
124     bool	m_rawAtBol;	///< Last rawToken left us at beginning of line
125 
126     // For getFinalToken
127     bool	m_finAhead;	///< Have read a token ahead
128     int		m_finToken;	///< Last token read
129     string	m_finBuf;	///< Last yytext read
130     bool	m_finAtBol;	///< Last getFinalToken left us at beginning of line
131     VFileLine*	m_finFilelinep;	///< Location of last returned token (internal only)
132 
133     // For stringification
134     string	m_strify;	///< Text to be stringified
135 
136     // For defines
137     stack<VPreDefRef> m_defRefs; // Pending definine substitution
138     stack<VPreIfEntry> m_ifdefStack;	///< Stack of true/false emitting evaluations
139     unsigned	m_defDepth;	///< How many `defines deep
140     bool	m_defPutJoin;	///< Insert `` after substitution
141 
142     // For `` join
143     stack<string> m_joinStack;	///< Text on lhs of join
144 
145     // For getline()
146     string	m_lineChars;	///< Characters left for next line
147 
VPreProcImp()148     VPreProcImp() {
149 	m_debug = 0;
150 	m_states.push(ps_TOP);
151 	m_off = 0;
152 	m_lineChars = "";
153 	m_lastSym = "";
154 	m_lineAdd = 0;
155 	m_lineCmtNl = false;
156 	m_rawAtBol = true;
157 	m_finAhead = false;
158 	m_finAtBol = true;
159 	m_defDepth = 0;
160 	m_defPutJoin = false;
161 	m_finToken = 0;
162 	m_finFilelinep = NULL;
163 	m_lexp = NULL;
164 	m_preprocp = NULL;
165     }
configure(VFileLine * filelinep,VPreProc * preprocp)166     void configure(VFileLine* filelinep, VPreProc* preprocp) {
167 	// configure() separate from constructor to avoid calling abstract functions
168 	m_preprocp = preprocp;
169 	m_finFilelinep = filelinep->create(1);
170 	// Create lexer
171 	m_lexp = new VPreLex(this, filelinep);
172 	m_lexp->m_keepComments = m_preprocp->keepComments();
173 	m_lexp->m_keepWhitespace = m_preprocp->keepWhitespace();
174 	m_lexp->m_pedantic = m_preprocp->pedantic();
175 	m_lexp->m_synthesis = m_preprocp->synthesis();
176 	m_lexp->debug(debug()>=10 ? debug() : 0);  // See also VPreProc::debug() method
177     }
~VPreProcImp()178     ~VPreProcImp() {
179 	if (m_lexp) { delete m_lexp; m_lexp = NULL; }
180     }
181     const char* tokenName(int tok);
182     void debugToken(int tok, const char* cmtp);
183     void parseTop();
184     void parseUndef();
185     string getparseline(bool stop_at_eol, size_t approx_chunk);
isEof() const186     bool isEof() const { return m_lexp->curStreamp()->m_eof; }
187     bool readWholefile(const string& filename, StrList& outl);
188     void openFile(string filename, VFileLine* filelinep);
insertUnreadback(const string & text)189     void insertUnreadback(const string& text) { m_lineCmt += text; }
190     void insertUnreadbackAtBol(const string& text);
191     void addLineComment(int enter_exit_level);
192 private:
error(string msg)193     void error(string msg) { m_lexp->m_tokFilelinep->error(msg); }
fatal(string msg)194     void fatal(string msg) { m_lexp->m_tokFilelinep->fatal(msg); }
debug() const195     int debug() const { return m_debug; }
196     void endOfOneFile();
197     string defineSubst(VPreDefRef* refp);
198     string trimWhitespace(const string& strg, bool trailing);
199     void unputString(const string& strg);
200     void unputDefrefString(const string& strg);
201 
parsingOn()202     void parsingOn() {
203 	m_off--;
204 	if (m_off<0) fatalSrc("Underflow of parsing cmds");
205 	// addLineComment no longer needed; getFinalToken will correct.
206     }
parsingOff()207     void parsingOff() { m_off++; }
208 
209     int getRawToken();
210     int getStateToken(string& buf);
211     int getFinalToken(string& buf);
212 
state() const213     ProcState state() const { return m_states.top(); }
stateIsDefname() const214     bool stateIsDefname() const {
215         return state()==ps_DEFNAME_UNDEF
216             || state()==ps_DEFNAME_DEFINE
217             || state()==ps_DEFNAME_IFDEF
218             || state()==ps_DEFNAME_IFNDEF
219             || state()==ps_DEFNAME_ELSIF;
220     }
statePush(ProcState state)221     void statePush(ProcState state) {
222 	m_states.push(state);
223     }
statePop()224     void statePop() {
225 	m_states.pop();
226 	if (m_states.empty()) {
227 	    error("InternalError: Pop of parser state with nothing on stack");
228 	    m_states.push(ps_TOP);
229 	}
230     }
stateChange(ProcState state)231     void stateChange(ProcState state) {
232 	statePop(); statePush(state);
233     }
234 
235 };
236 
237 //*************************************************************************
238 // Creation
239 
VPreProc()240 VPreProc::VPreProc() {
241     VPreProcImp* idatap = new VPreProcImp();
242     m_opaquep = idatap;
243     // Below overridden by configure()
244     m_keepComments = true;
245     m_keepWhitespace = true;
246     m_lineDirectives = true;
247     m_pedantic = false;
248     m_synthesis = false;
249 }
250 
configure(VFileLine * filelinep)251 void VPreProc::configure(VFileLine* filelinep) {
252     VPreProcImp* idatap = static_cast<VPreProcImp*>(m_opaquep);
253     idatap->configure(filelinep, this);
254 }
255 
~VPreProc()256 VPreProc::~VPreProc() {
257     if (m_opaquep) { delete m_opaquep; m_opaquep = NULL; }
258 }
259 
260 //*************************************************************************
261 // VPreProc Methods.  Just call the implementation functions.
262 
comment(string cmt)263 void VPreProc::comment(string cmt) { }
openFile(string filename,VFileLine * filelinep)264 void VPreProc::openFile(string filename, VFileLine* filelinep) {
265     VPreProcImp* idatap = static_cast<VPreProcImp*>(m_opaquep);
266     idatap->openFile(filename,filelinep);
267 }
getline()268 string VPreProc::getline() {
269     VPreProcImp* idatap = static_cast<VPreProcImp*>(m_opaquep);
270     return idatap->getparseline(true,0);
271 }
getall(size_t approx_chunk)272 string VPreProc::getall(size_t approx_chunk) {
273     VPreProcImp* idatap = static_cast<VPreProcImp*>(m_opaquep);
274     return idatap->getparseline(false,approx_chunk);
275 }
debug(int level)276 void VPreProc::debug(int level) {
277     VPreProcImp* idatap = static_cast<VPreProcImp*>(m_opaquep);
278     idatap->m_debug = level;
279     // To see "accepting rule" debug, Makefile.PL must be changed to enable flex debug
280     // export VERILOGPERL_FLEX_DEBUG=1
281     idatap->m_lexp->debug(level>=10 ? level : 0);
282 }
isEof()283 bool VPreProc::isEof() {
284     VPreProcImp* idatap = static_cast<VPreProcImp*>(m_opaquep);
285     return idatap->isEof();
286 }
fileline()287 VFileLine* VPreProc::fileline() {
288     VPreProcImp* idatap = static_cast<VPreProcImp*>(m_opaquep);
289     return idatap->m_lexp->m_tokFilelinep;
290 }
insertUnreadback(string text)291 void VPreProc::insertUnreadback(string text) {
292     VPreProcImp* idatap = static_cast<VPreProcImp*>(m_opaquep);
293     return idatap->insertUnreadback(text);
294 }
295 
296 //**********************************************************************
297 // Parser Utilities
298 
tokenName(int tok)299 const char* VPreProcImp::tokenName(int tok) {
300     switch (tok) {
301     case VP_BACKQUOTE	: return("BACKQUOTE");
302     case VP_COMMENT	: return("COMMENT");
303     case VP_DEFARG	: return("DEFARG");
304     case VP_DEFFORM	: return("DEFFORM");
305     case VP_DEFINE	: return("DEFINE");
306     case VP_DEFREF	: return("DEFREF");
307     case VP_DEFREF_JOIN	: return("DEFREF_JOIN");
308     case VP_DEFVALUE	: return("DEFVALUE");
309     case VP_ELSE	: return("ELSE");
310     case VP_ELSIF	: return("ELSIF");
311     case VP_ENDIF	: return("ENDIF");
312     case VP_EOF		: return("EOF");
313     case VP_ERROR	: return("ERROR");
314     case VP_IFDEF	: return("IFDEF");
315     case VP_IFNDEF	: return("IFNDEF");
316     case VP_JOIN	: return("JOIN");
317     case VP_INCLUDE	: return("INCLUDE");
318     case VP_LINE	: return("LINE");
319     case VP_PSL		: return("PSL");
320     case VP_STRIFY	: return("STRIFY");
321     case VP_STRING	: return("STRING");
322     case VP_SYMBOL	: return("SYMBOL");
323     case VP_SYMBOL_JOIN	: return("SYMBOL_JOIN");
324     case VP_TEXT	: return("TEXT");
325     case VP_UNDEF	: return("UNDEF");
326     case VP_UNDEFINEALL	: return("UNDEFINEALL");
327     case VP_WHITE	: return("WHITE");
328     default: return("?");
329     }
330 }
331 
unputString(const string & strg)332 void VPreProcImp::unputString(const string& strg) {
333     // Note: The preliminary call in ::openFile bypasses this function
334     // We used to just m_lexp->unputString(strg.c_str());
335     // However this can lead to "flex scanner push-back overflow"
336     // so instead we scan from a temporary buffer, then on EOF return.
337     // This is also faster than the old scheme, amazingly.
338     if (m_lexp->m_bufferState!=m_lexp->currentBuffer()) {
339 	fatalSrc("bufferStack missing current buffer; will return incorrectly");
340 	// Hard to debug lost text as won't know till much later
341     }
342     m_lexp->scanBytes(strg);
343 }
344 
unputDefrefString(const string & strg)345 void VPreProcImp::unputDefrefString(const string& strg) {
346     int multiline = 0;
347     for (size_t i=0; i<strg.length(); i++) {
348 	if (strg[i] == '\n') multiline++;
349     }
350     unputString(strg);
351     // A define that inserts multiple newlines are really attributed to one source line,
352     // so temporarily don't increment lineno.
353     m_lexp->curStreamp()->m_ignNewlines += multiline;  // Must be after unput - applies to new stream
354 }
355 
trimWhitespace(const string & strg,bool trailing)356 string VPreProcImp::trimWhitespace(const string& strg, bool trailing) {
357     // Remove leading whitespace
358     string out = strg;
359     string::size_type leadspace = 0;
360     while (out.length() > leadspace
361 	   && isspace(out[leadspace])) leadspace++;
362     if (leadspace) out.erase(0,leadspace);
363     // Remove trailing whitespace
364     if (trailing) {
365 	string::size_type trailspace = 0;
366 	while (out.length() > trailspace
367 	       && isspace(out[out.length()-1-trailspace])) trailspace++;
368 	// Don't remove \{space_or_newline}
369 	if (trailspace && out.length() > trailspace && out[out.length()-1-trailspace]=='\\')
370 	    trailspace--;
371 	if (trailspace) out.erase(out.length()-trailspace,trailspace);
372     }
373     return out;
374 }
375 
defineSubst(VPreDefRef * refp)376 string VPreProcImp::defineSubst(VPreDefRef* refp) {
377     // Substitute out defines in a define reference.
378     // (We also need to call here on non-param defines to handle `")
379     // We could push the define text back into the lexer, but that's slow
380     // and would make recursive definitions and parameter handling nasty.
381     //
382     // Note we parse the definition parameters and value here.  If a
383     // parametrized define is used many, many times, we could cache the
384     // parsed result.
385     if (debug()>=5) {
386 	cout<<"defineSubstIn  `"<<refp->name()<<" "<<refp->params()<<endl;
387 	for (unsigned i=0; i<refp->args().size(); i++) {
388 	    cout<<"defineArg["<<i<<"] = '"<<refp->args()[i]<<"'"<<endl;
389 	}
390     }
391     // Grab value
392     string value = m_preprocp->defValue(refp->name());
393     if (debug()>=5) cout<<"defineValue    '"<<VPreLex::cleanDbgStrg(value)<<"'"<<endl;
394 
395     map<string,string> argValueByName;
396     {   // Parse argument list into map
397 	unsigned numArgs=0;
398 	string argName;
399 	int paren = 1;  // (), {} and [] can use same counter, as must be matched pair per spec
400 	string token;
401 	bool quote = false;
402 	bool haveDefault = false;
403 	// Note there's a leading ( and trailing ), so parens==1 is the base parsing level
404 	string params = refp->params();  // Must keep str in scope to get pointer
405 	const char* cp=params.c_str();
406 	if (*cp == '(') cp++;
407 	for (; *cp; cp++) {
408 	    //if (debug()>=5) cout <<"   Parse  Paren="<<paren<<"  Arg="<<numArgs<<"  token='"<<token<<"'  Parse="<<cp<<endl;
409 	    if (!quote && paren==1) {
410 		if (*cp==')' || *cp==',') {
411 		    string valueDef;
412 		    if (haveDefault) { valueDef=token; } else { argName=token; }
413 		    argName = trimWhitespace(argName,true);
414 		    if (debug()>=5) cout<<"    Got Arg="<<numArgs<<"  argName='"<<argName<<"'  default='"<<valueDef<<"'"<<endl;
415 		    // Parse it
416 		    if (argName!="") {
417 			if (refp->args().size() > numArgs) {
418 			    // A call `def( a ) must be equivelent to `def(a ), so trimWhitespace
419 			    // At one point we didn't trim trailing whitespace, but this confuses `"
420 			    string arg = trimWhitespace(refp->args()[numArgs], true);
421 			    if (arg != "") valueDef = arg;
422 			} else if (!haveDefault) {
423 			    error("Define missing argument '"+argName+"' for: "+refp->name()+"\n");
424 			    return " `"+refp->name()+" ";
425 			}
426 			numArgs++;
427 		    }
428 		    argValueByName[argName] = valueDef;
429 		    // Prepare for next
430 		    argName = "";
431 		    token = "";
432 		    haveDefault = false;
433 		    continue;
434 		}
435 		else if (*cp=='=') {
436 		    haveDefault = true;
437 		    argName = token;
438 		    token = "";
439 		    continue;
440 		}
441 	    }
442 	    if (cp[0]=='\\' && cp[1]) {
443 		token += cp[0]; // \{any} Put out literal next character
444 		token += cp[1];
445 		cp++;
446 		continue;
447 	    }
448 	    if (!quote) {
449 		if (*cp=='(' || *cp=='{' || *cp=='[') paren++;
450 		else if (*cp==')' || *cp=='}' || *cp==']') paren--;
451 	    }
452 	    if (*cp=='"') quote=!quote;
453 	    if (*cp) token += *cp;
454 	}
455 	if (refp->args().size() > numArgs
456 	    // `define X() is ok to call with nothing
457 	    && !(refp->args().size()==1 && numArgs==0 && trimWhitespace(refp->args()[0],false)=="")) {
458 	    error("Define passed too many arguments: "+refp->name()+"\n");
459 	    return " `"+refp->name()+" ";
460 	}
461     }
462 
463     string out = "";
464     {   // Parse substitution define using arguments
465 	string argName;
466 	bool quote = false;
467 	bool backslashesc = false;  // In \.....{space} block
468 	// Note we go through the loop once more at the NULL end-of-string
469 	for (const char* cp=value.c_str(); (*cp) || argName!=""; cp=(*cp?cp+1:cp)) {
470 	    //cout << "CH "<<*cp<<"  an "<<argName<<"\n";
471 	    if (!quote && *cp == '\\') { backslashesc = true; }
472 	    else if (isspace(*cp)) { backslashesc = false; }
473 	    // We don't check for quotes; some simulators expand even inside quotes
474 	    if ( isalpha(*cp) || *cp=='_'
475 		 || *cp=='$' // Won't replace system functions, since no $ in argValueByName
476 		 || (argName!="" && (isdigit(*cp) || *cp=='$'))) {
477 		argName += *cp;
478 		continue;
479 	    }
480 	    if (argName != "") {
481 		// Found a possible variable substitution
482 		map<string,string>::iterator iter = argValueByName.find(argName);
483 		if (iter != argValueByName.end()) {
484 		    // Substitute
485 		    string subst = iter->second;
486 		    if (subst == "") {
487 			// Normally `` is removed later, but with no token after, we're otherwise
488 			// stuck, so remove proceeding ``
489 			if (out.size()>=2 && out.substr(out.size()-2) == "``") {
490 			    out = out.substr(0, out.size()-2);
491 			}
492 		    } else {
493 			out += subst;
494 		    }
495 		} else {
496 		    out += argName;
497 		}
498 		argName = "";
499 	    }
500 	    if (!quote) {
501 		// Check for `` only after we've detected end-of-argname
502 		if (cp[0]=='`' && cp[1]=='`') {
503 		    if (backslashesc) {
504 			// Don't put out the ``, we're forming an escape which will not expand further later
505 		    } else {
506 			out += "``";   // `` must get removed later, as `FOO```BAR must pre-expand FOO and BAR
507 			// See also removal in empty substitutes above
508 		    }
509 		    cp++;
510 		    continue;
511 		}
512 		else if (cp[0]=='`' && cp[1]=='"') {
513 		    out += "`\"";  // `" means to put out a " without enabling quote mode (sort of)
514 		    // however we must expand any macro calls inside it first.
515 		    // So keep it `", so we don't enter quote mode.
516 		    cp++;
517 		    continue;
518 		}
519 		else if (cp[0]=='`' && cp[1]=='\\' && cp[2]=='`' && cp[3]=='"') {
520 		    out += "`\\`\"";   // `\`" means to put out a backslash quote
521 		    // Leave it literal until we parse the VP_STRIFY string
522 		    cp+=3;
523 		    continue;
524 		}
525 		else if (cp[0]=='`' && cp[1]=='\\') {
526 		    out += '\\';   // `\ means to put out a backslash
527 		    cp++;
528 		    continue;
529 		}
530 		else if (cp[0]=='\\' && cp[1]=='\n') {
531 		    // We kept the \\n when we lexed because we don't want whitespace
532 		    // trimming to mis-drop the final \\n
533 		    // At replacement time we need the standard newline.
534 		    out += "\n";	 // \\n newline
535 		    cp++;
536 		    continue;
537 		}
538 	    }
539 	    if (cp[0]=='\\' && cp[1]=='\"') {
540 		out += cp[0]; // \{any} Put out literal next character
541 		out += cp[1];
542 		cp++;
543 		continue;
544 	    }
545 	    else if (cp[0]=='\\') {
546 		// Normally \{any} would put out literal next character
547 		// Instead we allow "`define A(nm) \nm" to expand, per proposed mantis1537
548 		out += cp[0];
549 		continue;
550 	    }
551 	    if (*cp=='"') quote=!quote;
552 	    if (*cp) out += *cp;
553 	}
554     }
555 
556     if (debug()>=5) cout<<"defineSubstOut '"<<VPreLex::cleanDbgStrg(out)<<"'"<<endl;
557     return out;
558 }
559 
560 //**********************************************************************
561 // Parser routines
562 
readWholefile(const string & filename,StrList & outl)563 bool VPreProcImp::readWholefile(const string& filename, StrList& outl) {
564 // If change this code, run a test with the below size set very small
565 //#define INFILTER_IPC_BUFSIZ 16
566 #define INFILTER_IPC_BUFSIZ 64*1024
567     char buf[INFILTER_IPC_BUFSIZ];
568 
569     FILE* fp = NULL;
570     int fd;
571     bool eof = false;
572 
573     if (filename.length()>3 && 0==filename.compare(filename.length()-3, 3, ".gz")) {
574 	string cmd = "gunzip -c "+filename;
575         if ((fp = popen(cmd.c_str(), "r")) == NULL) {
576             return false;
577         }
578         fd = fileno(fp);
579     } else {
580         fd = open(filename.c_str(), O_RDONLY);
581         if (fd<0) return false;
582     }
583     while (!eof) {
584 	ssize_t todo = INFILTER_IPC_BUFSIZ;
585 	errno = 0;
586 	ssize_t got = read(fd, buf, todo);
587 	if (got>0) {
588 	    outl.push_back(string(buf, got));
589 	}
590 	else if (errno == EINTR || errno == EAGAIN
591 #ifdef EWOULDBLOCK
592 		 || errno == EWOULDBLOCK
593 #endif
594 	    ) {
595 	} else { eof = true; break; }
596     }
597 
598     if (fp) { pclose(fp); fp=NULL; }
599     else close(fd);
600     return true;
601 }
602 
openFile(string filename,VFileLine * filelinep)603 void VPreProcImp::openFile(string filename, VFileLine* filelinep) {
604     // Open a new file, possibly overriding the current one which is active.
605 
606     // Read a list<string> with the whole file.
607     StrList wholefile;
608     bool ok = readWholefile(filename, wholefile/*ref*/);
609     if (!ok) {
610 	error("File not found: "+filename+"\n");
611 	return;
612     }
613 
614     if (!m_preprocp->isEof()) {  // IE not the first file.
615 	// We allow the same include file twice, because occasionally it pops
616 	// up, with guards preventing a real recursion.
617 	if (m_lexp->m_streampStack.size()>VPreProc::INCLUDE_DEPTH_MAX) {
618 	    error("Recursive inclusion of file: "+filename);
619 	    return;
620 	}
621 	// There's already a file active.  Push it to work on the new one.
622 	addLineComment(0);
623     }
624 
625     // Create new stream structure
626     m_lexp->scanNewFile(m_preprocp->fileline()->create(filename, 1));
627     addLineComment(1); // Enter
628 
629     // Filter all DOS CR's en-mass.  This avoids bugs with lexing CRs in the wrong places.
630     // This will also strip them from strings, but strings aren't supposed to be multi-line without a "\"
631     for (StrList::iterator it=wholefile.begin(); it!=wholefile.end(); ++it) {
632 	// We don't end-loop at \0 as we allow and strip mid-string '\0's (for now).
633 	bool strip = false;
634 	const char* sp = it->data();
635 	const char* ep = sp + it->length();
636 	// Only process if needed, as saves extra string allocations
637 	for (const char* cp=sp; cp<ep; cp++) {
638 	    if (*cp == '\r' || *cp == '\0') {
639 		strip = true; break;
640 	    }
641 	}
642 	if (strip) {
643 	    string out;  out.reserve(it->length());
644 	    for (const char* cp=sp; cp<ep; cp++) {
645 		if (!(*cp == '\r' || *cp == '\0')) {
646 		    out += *cp;
647 		}
648 	    }
649 	    *it = out;
650 	}
651 
652 	// Push the data to an internal buffer.
653 	m_lexp->scanBytesBack(*it);
654 	// Reclaim memory; the push saved the string contents for us
655 	*it = "";
656     }
657 }
658 
insertUnreadbackAtBol(const string & text)659 void VPreProcImp::insertUnreadbackAtBol(const string& text) {
660     // Insert insuring we're at the beginning of line, for `line
661     // We don't always add a leading newline, as it may result in extra unreadback(newlines).
662     if (m_lineCmt == "") { m_lineCmtNl = true; }
663     else if (m_lineCmt[m_lineCmt.length()-1]!='\n') {
664 	insertUnreadback("\n");
665     }
666     insertUnreadback(text);
667 }
668 
addLineComment(int enter_exit_level)669 void VPreProcImp::addLineComment(int enter_exit_level) {
670     if (m_preprocp->lineDirectives()) {
671 	insertUnreadbackAtBol(m_lexp->curFilelinep()->lineDirectiveStrg(enter_exit_level));
672     }
673 }
674 
getRawToken()675 int VPreProcImp::getRawToken() {
676     // Get a token from the file, whatever it may be.
677     while (1) {
678       next_tok:
679 	if (m_lineAdd) {
680 	    m_lineAdd--;
681 	    m_rawAtBol = true;
682 	    yyourtext("\n",1);
683 	    if (debug()>=5) debugToken(VP_WHITE, "LNA");
684 	    return (VP_WHITE);
685 	}
686 	if (m_lineCmt!="") {
687 	    // We have some `line directive or other processed data to return to the user.
688 	    static string rtncmt;  // Keep the c string till next call
689 	    rtncmt = m_lineCmt;
690 	    if (m_lineCmtNl) {
691 		if (!m_rawAtBol) rtncmt = "\n"+rtncmt;
692 		m_lineCmtNl = false;
693 	    }
694 	    yyourtext(rtncmt.c_str(), rtncmt.length());
695 	    m_lineCmt = "";
696 	    if (yyourleng()) m_rawAtBol = (yyourtext()[yyourleng()-1]=='\n');
697 	    if (state()==ps_DEFVALUE) {
698 		VPreLex::s_currentLexp->appendDefValue(yyourtext(),yyourleng());
699 		goto next_tok;
700 	    } else {
701 		if (debug()>=5) debugToken(VP_TEXT, "LCM");
702 		return (VP_TEXT);
703 	    }
704 	}
705 	if (isEof()) return (VP_EOF);
706 
707 	// Snarf next token from the file
708 	int tok = m_lexp->lex();
709 
710 	if (debug()>=5) debugToken(tok, "RAW");
711 
712 	// A EOF on an include, so we can print `line and detect mis-matched "s
713 	if (tok==VP_EOF) {
714 	    goto next_tok;  // find the EOF, after adding needed lines
715 	}
716 
717 	if (yyourleng()) m_rawAtBol = (yyourtext()[yyourleng()-1]=='\n');
718 	return tok;
719     }
720 }
721 
debugToken(int tok,const char * cmtp)722 void VPreProcImp::debugToken(int tok, const char* cmtp) {
723     if (debug()>=5) {
724 	string buf = string(yyourtext(), yyourleng());
725 	string::size_type pos;
726 	while ((pos=buf.find("\n")) != string::npos) { buf.replace(pos, 1, "\\n"); }
727 	while ((pos=buf.find("\r")) != string::npos) { buf.replace(pos, 1, "\\r"); }
728 	fprintf(stderr, "%d: %s %s %s(%d) dr%d:  <%d>%-10s: %s\n",
729 		m_lexp->m_tokFilelinep->lineno(), cmtp, m_off?"of":"on",
730 		procStateName(state()), (int)m_states.size(), (int)m_defRefs.size(),
731 		m_lexp->currentStartState(), tokenName(tok), buf.c_str());
732     }
733 }
734 
735 // Sorry, we're not using bison/yacc. It doesn't handle returning white space
736 // in the middle of parsing other tokens.
737 
getStateToken(string & buf)738 int VPreProcImp::getStateToken(string& buf) {
739     // Return the next state-determined token
740     while (1) {
741       next_tok:
742 	if (isEof()) {
743 	    buf = string(yyourtext(), yyourleng());
744 	    return VP_EOF;
745 	}
746 	int tok = getRawToken();
747 
748 	// Most states emit white space and comments between tokens. (Unless collecting a string)
749 	if (tok==VP_WHITE && state() !=ps_STRIFY) {
750 	    buf = string(yyourtext(), yyourleng());
751 	    return (tok);
752 	}
753 	if (tok==VP_BACKQUOTE && state() !=ps_STRIFY) { tok = VP_TEXT; }
754 	if (tok==VP_COMMENT) {
755 	    if (!m_off) {
756 		if (m_lexp->m_keepComments == KEEPCMT_SUB
757 		    || m_lexp->m_keepComments == KEEPCMT_EXP) {
758 		    string rtn; rtn.assign(yyourtext(),yyourleng());
759 		    m_preprocp->comment(rtn);
760 		    // Need to insure "foo/**/bar" becomes two tokens
761 		    insertUnreadback(" ");
762 		} else if (m_lexp->m_keepComments) {
763 		    buf = string(yyourtext(), yyourleng());
764 		    return (tok);
765 		} else {
766 		    // Need to insure "foo/**/bar" becomes two tokens
767 		    insertUnreadback(" ");
768 		}
769 	    }
770 	    // We're off or processed the comment specially.  If there are newlines
771 	    // in it, we also return the newlines as TEXT so that the linenumber
772 	    // count is maintained for downstream tools
773 	    for (size_t len=0; len<(size_t)yyourleng(); len++) { if (yyourtext()[len]=='\n') m_lineAdd++; }
774 	    goto next_tok;
775 	}
776 	if (tok==VP_LINE) {
777 	    addLineComment(m_lexp->m_enterExit);
778 	    goto next_tok;
779 	}
780 
781 	if (tok==VP_DEFREF_JOIN) {
782 	    // Here's something fun and unspecified as yet:
783 	    // The existance of non-existance of a base define changes `` expansion
784 	    //	`define QA_b zzz
785 	    //	`define Q1 `QA``_b
786 	    //	 1Q1 -> zzz
787 	    //	`define QA a
788 	    //	 `Q1 -> a_b
789 	    // Note parenthesis make this unambiguous
790 	    //	`define Q1 `QA()``_b  // -> a_b
791 	    // This may be a side effect of how `UNDEFINED remains as `UNDEFINED,
792 	    // but it screws up our method here.  So hardcode it.
793 	    string name(yyourtext()+1,yyourleng()-1);
794 	    if (m_preprocp->defExists(name)) {   // JOIN(DEFREF)
795 		// Put back the `` and process the defref
796 		if (debug()>=5) cout<<"```: define "<<name<<" exists, expand first\n";
797 		m_defPutJoin = true;  // After define, unputString("``").  Not now as would lose yyourtext()
798 		if (debug()>=5) cout<<"TOKEN now DEFREF\n";
799 		tok = VP_DEFREF;
800 	    } else {  // DEFREF(JOIN)
801 		if (debug()>=5) cout<<"```: define "<<name<<" doesn't exist, join first\n";
802 		// FALLTHRU, handle as with VP_SYMBOL_JOIN
803 	    }
804 	}
805 	if (tok==VP_SYMBOL_JOIN || tok==VP_DEFREF_JOIN || tok==VP_JOIN) {  // not else if, can fallthru from above if()
806 	    // a`` -> string doesn't include the ``, so can just grab next and continue
807 	    string out(yyourtext(),yyourleng());
808 	    if (debug()>=5) cout<<"`` LHS:"<<out<<endl;
809 	    // a``b``c can have multiple joins, so we need a stack
810 	    m_joinStack.push(out);
811 	    statePush(ps_JOIN);
812 	    goto next_tok;
813 	}
814 
815 	// Deal with some special parser states
816 	switch (state()) {
817 	case ps_TOP: {
818 	    break;
819 	}
820 	case ps_DEFNAME_UNDEF:	// FALLTHRU
821 	case ps_DEFNAME_DEFINE:	// FALLTHRU
822 	case ps_DEFNAME_IFDEF:	// FALLTHRU
823 	case ps_DEFNAME_IFNDEF:	// FALLTHRU
824 	case ps_DEFNAME_ELSIF: {
825 	    if (tok==VP_SYMBOL) {
826 		m_lastSym.assign(yyourtext(),yyourleng());
827 		if (state()==ps_DEFNAME_IFDEF
828 		    || state()==ps_DEFNAME_IFNDEF) {
829 		    bool enable = m_preprocp->defExists(m_lastSym);
830 		    if (debug()>=5) cout<<"Ifdef "<<m_lastSym<<(enable?" ON":" OFF")<<endl;
831 		    if (state()==ps_DEFNAME_IFNDEF) enable = !enable;
832 		    m_ifdefStack.push(VPreIfEntry(enable,false));
833 		    if (!enable) parsingOff();
834 		    statePop();
835 		    goto next_tok;
836 		}
837 		else if (state()==ps_DEFNAME_ELSIF) {
838 		    if (m_ifdefStack.empty()) {
839 			error("`elsif with no matching `if\n");
840 		    } else {
841 			// Handle `else portion
842 			VPreIfEntry lastIf = m_ifdefStack.top(); m_ifdefStack.pop();
843 			if (!lastIf.on()) parsingOn();
844 			// Handle `if portion
845 			bool enable = !lastIf.everOn() && m_preprocp->defExists(m_lastSym);
846 			if (debug()>=5) cout<<"Elsif "<<m_lastSym<<(enable?" ON":" OFF")<<endl;
847 			m_ifdefStack.push(VPreIfEntry(enable, lastIf.everOn()));
848 			if (!enable) parsingOff();
849 		    }
850 		    statePop();
851 		    goto next_tok;
852 		}
853 		else if (state()==ps_DEFNAME_UNDEF) {
854 		    if (!m_off) {
855 			if (debug()>=5) cout<<"Undef "<<m_lastSym<<endl;
856 			m_preprocp->undef(m_lastSym);
857 		    }
858 		    statePop();
859 		    goto next_tok;
860 		}
861 		else if (state()==ps_DEFNAME_DEFINE) {
862 		    // m_lastSym already set.
863 		    stateChange(ps_DEFFORM);
864 		    m_lexp->pushStateDefForm();
865 		    goto next_tok;
866 		}
867 		else fatalSrc("Bad case\n");
868 		goto next_tok;
869 	    }
870 	    else if (tok==VP_TEXT) {
871 		// IE, something like comment between define and symbol
872 		if (!m_off) {
873 		    buf = string(yyourtext(), yyourleng());
874 		    return tok;
875 		}
876 		else goto next_tok;
877 	    }
878 	    else if (tok==VP_DEFREF) {
879 		// IE, `ifdef `MACRO(x): Substitue and come back here when state pops.
880 		break;
881 	    }
882 	    else {
883 		error((string)"Expecting define name. Found: "+tokenName(tok)+"\n");
884 		goto next_tok;
885 	    }
886 	}
887 	case ps_DEFFORM: {
888 	    if (tok==VP_DEFFORM) {
889 		m_formals = m_lexp->m_defValue;
890 		if (debug()>=5) cout<<"DefFormals='"<<VPreLex::cleanDbgStrg(m_formals)<<"'\n";
891 		stateChange(ps_DEFVALUE);
892 		m_lexp->pushStateDefValue();
893 		goto next_tok;
894 	    } else if (tok==VP_TEXT) {
895 		// IE, something like comment in formals
896 		if (!m_off) {
897 		    buf = string(yyourtext(), yyourleng());
898 		    return tok;
899 		}
900 		else goto next_tok;
901 	    } else {
902 		error((string)"Expecting define formal arguments. Found: "+tokenName(tok)+"\n");
903 		goto next_tok;
904 	    }
905 	}
906 	case ps_DEFVALUE: {
907 	    static string newlines;
908 	    newlines = "\n";  // Always start with trailing return
909 	    if (tok == VP_DEFVALUE) {
910 		if (debug()>=5) cout<<"DefValue='"<<VPreLex::cleanDbgStrg(m_lexp->m_defValue)
911 				    <<"'  formals='"<<VPreLex::cleanDbgStrg(m_formals)<<"'\n";
912 		// Add any formals
913 		string formals = m_formals;
914 		string value = m_lexp->m_defValue;
915 		// Remove returns
916 		// Not removing returns in values has two problems,
917 		// 1. we need to correct line numbers with `line after each substitution
918 		// 2. Substituting in " .... " with embedded returns requires \ escape.
919 		//    This is very difficult in the presence of `", so we keep the \ before the newline.
920 		for (size_t i=0; i<formals.length(); i++) {
921 		    if (formals[i] == '\n') {
922 			newlines += "\n";
923 		    }
924 		}
925 		for (size_t i=0; i<value.length(); i++) {
926 		    if (value[i] == '\n') {
927 			newlines += "\n";
928 		    }
929 		}
930 		if (!m_off) {
931 		    // Remove leading and trailing whitespace
932 		    value = trimWhitespace(value, true);
933 		    // Define it
934 		    if (debug()>=5) cout<<"Define "<<m_lastSym<<" "<<formals
935 					<<" = '"<<VPreLex::cleanDbgStrg(value)<<"'"<<endl;
936 		    m_preprocp->define(m_lastSym, value, formals);
937 		}
938 	    } else {
939 		string msg = string("Bad define text, unexpected ")+tokenName(tok)+"\n";
940 		fatalSrc(msg);
941 	    }
942 	    statePop();
943 	    // DEFVALUE is terminated by a return, but lex can't return both tokens.
944 	    // Thus, we emit a return here.
945 	    buf = newlines;
946 	    return(VP_WHITE);
947 	}
948 	case ps_DEFPAREN: {
949 	    if (tok==VP_TEXT && yyourleng()==1 && yyourtext()[0]=='(') {
950 		stateChange(ps_DEFARG);
951 		goto next_tok;
952 	    } else {
953 		if (m_defRefs.empty()) fatalSrc("Shouldn't be in DEFPAREN w/o active defref");
954 		VPreDefRef* refp = &(m_defRefs.top());
955 		error((string)"Expecting ( to begin argument list for define reference `"+refp->name()+"\n");
956 		statePop();
957 		goto next_tok;
958 	    }
959 	}
960 	case ps_DEFARG: {
961 	    if (m_defRefs.empty()) fatalSrc("Shouldn't be in DEFARG w/o active defref");
962 	    VPreDefRef* refp = &(m_defRefs.top());
963 	    refp->nextarg(refp->nextarg()+m_lexp->m_defValue); m_lexp->m_defValue="";
964 	    if (debug()>=5) cout<<"defarg++ "<<refp->nextarg()<<endl;
965 	    if (tok==VP_DEFARG && yyourleng()==1 && yyourtext()[0]==',') {
966 		refp->args().push_back(refp->nextarg());
967 		stateChange(ps_DEFARG);
968 		m_lexp->pushStateDefArg(1);
969 		refp->nextarg("");
970 		goto next_tok;
971 	    } else if (tok==VP_DEFARG && yyourleng()==1 && yyourtext()[0]==')') {
972 		// Substitute in and prepare for next action
973 		// Similar code in non-parenthesized define (Search for END_OF_DEFARG)
974 		refp->args().push_back(refp->nextarg());
975 		string out;
976 		if (!m_off) {
977 		    out = defineSubst(refp);
978 		    out = m_preprocp->defSubstitute(out);
979 		}
980 		m_defRefs.pop();  refp=NULL;
981 		if (m_defRefs.empty()) {
982 		    statePop();
983 		    if (state() == ps_JOIN) {  // Handle {left}```FOO(ARG) where `FOO(ARG) might be empty
984 			if (m_joinStack.empty()) fatalSrc("`` join stack empty, but in a ``");
985 			string lhs = m_joinStack.top(); m_joinStack.pop();
986 			out = lhs+out;
987 			if (debug()>=5) cout<<"``-end-defarg Out:"<<out<<endl;
988 			statePop();
989 		    }
990 		    if (!m_off) unputDefrefString(out);
991 		    // Prevent problem when EMPTY="" in `ifdef NEVER `define `EMPTY
992 		    else if (stateIsDefname()) unputDefrefString("__IF_OFF_IGNORED_DEFINE");
993 		    m_lexp->m_parenLevel = 0;
994 		}
995 		else {  // Finished a defref inside a upper defref
996 		    // Can't subst now, or
997 		    // `define a(ign) x,y
998 		    // foo(`a(ign),`b)  would break because a contains comma
999 		    refp = &(m_defRefs.top());  // We popped, so new top
1000 		    refp->nextarg(refp->nextarg()+m_lexp->m_defValue+out); m_lexp->m_defValue="";
1001 		    m_lexp->m_parenLevel = refp->parenLevel();
1002 		    statePop();  // Will go to ps_DEFARG, as we're under another define
1003 		}
1004 		goto next_tok;
1005 	    } else if (tok==VP_DEFREF) {
1006 		// Expand it, then state will come back here
1007 		// Value of building argument is data before the lower defref
1008 		// we'll append it when we push the argument.
1009 		break;
1010 	    } else if (tok==VP_SYMBOL || tok==VP_STRING || tok==VP_TEXT || tok==VP_WHITE || tok==VP_PSL) {
1011 		string rtn; rtn.assign(yyourtext(),yyourleng());
1012 		refp->nextarg(refp->nextarg()+rtn);
1013 		goto next_tok;
1014 	    } else if (tok==VP_STRIFY) {
1015 		// We must expand stringinfication, when done will return to this state
1016 		statePush(ps_STRIFY);
1017 		goto next_tok;
1018 	    } else {
1019 		error((string)"Expecting ) or , to end argument list for define reference. Found: "+tokenName(tok));
1020 		statePop();
1021 		goto next_tok;
1022 	    }
1023 	}
1024 	case ps_INCNAME: {
1025 	    if (tok==VP_STRING) {
1026 		statePop();
1027 		m_lastSym.assign(yyourtext(),yyourleng());
1028 		if (debug()>=5) cout<<"Include "<<m_lastSym<<endl;
1029 		// Drop leading and trailing quotes.
1030 		m_lastSym.erase(0,1);
1031 		m_lastSym.erase(m_lastSym.length()-1,1);
1032 		m_preprocp->include(m_lastSym);
1033 		goto next_tok;
1034 	    }
1035 	    else if (tok==VP_TEXT && yyourleng()==1 && yyourtext()[0]=='<') {
1036 		// include <filename>
1037 		stateChange(ps_INCNAME);  // Still
1038 		m_lexp->pushStateIncFilename();
1039 		goto next_tok;
1040 	    }
1041 	    else if (tok==VP_DEFREF
1042 		     || tok==VP_STRIFY) {
1043 		// Expand it, then state will come back here
1044 		break;
1045 	    }
1046 	    else {
1047 		statePop();
1048 		error((string)"Expecting include filename. Found: "+tokenName(tok)+"\n");
1049 		goto next_tok;
1050 	    }
1051 	}
1052 	case ps_ERRORNAME: {
1053 	    if (tok==VP_STRING) {
1054 		if (!m_off) {
1055 		    m_lastSym.assign(yyourtext(),yyourleng());
1056 		    error(m_lastSym);
1057 		}
1058 		statePop();
1059 		goto next_tok;
1060 	    }
1061 	    else {
1062 		error((string)"Expecting `error string. Found: "+tokenName(tok)+"\n");
1063 		statePop();
1064 		goto next_tok;
1065 	    }
1066 	}
1067 	case ps_JOIN: {
1068 	    if (tok==VP_SYMBOL || tok==VP_TEXT) {
1069 		if (m_joinStack.empty()) fatalSrc("`` join stack empty, but in a ``");
1070 		string lhs = m_joinStack.top(); m_joinStack.pop();
1071 		if (debug()>=5) cout<<"`` LHS:"<<lhs<<endl;
1072 		string rhs(yyourtext(),yyourleng());
1073 		if (debug()>=5) cout<<"`` RHS:"<<rhs<<endl;
1074 		string out = lhs+rhs;
1075 		if (debug()>=5) cout<<"`` Out:"<<out<<endl;
1076 		unputString(out);
1077 		statePop();
1078 		goto next_tok;
1079 	    } else if (tok==VP_EOF || tok==VP_WHITE || tok == VP_COMMENT || tok==VP_STRING) {
1080 		// Other compilers just ignore this, so no warning
1081 		// "Expecting symbol to terminate ``; whitespace etc cannot follow ``. Found: "+tokenName(tok)+"\n"
1082 		string lhs = m_joinStack.top(); m_joinStack.pop();
1083 		unputString(lhs);
1084 		statePop();
1085 		goto next_tok;
1086 	    } else {
1087 		// `define, etc, fall through and expand.  Pop back here.
1088 		break;
1089 	    }
1090 	}
1091 	case ps_STRIFY: {
1092 	    if (tok==VP_STRIFY) {
1093 		// Quote what's in the middle of the stringification
1094 		// Note a `" MACRO_WITH(`") `" doesn't need to be handled (we don't need a stack)
1095 		// That behavior isn't specified, and other simulators vary widely
1096 		string out = m_strify;
1097 		m_strify = "";
1098 		// Convert any newlines to spaces, so we don't get a multiline "..." without \ escapes
1099 		// The spec is silent about this either way; simulators vary
1100 		string::size_type pos;
1101 		while ((pos=out.find("\n")) != string::npos) {
1102 		    out.replace(pos, 1, " ");
1103 		}
1104 		unputString((string)"\""+out+"\"");
1105 		statePop();
1106 		goto next_tok;
1107 	    }
1108 	    else if (tok==VP_EOF) {
1109 		error("`\" not terminated at EOF\n");
1110 		break;
1111 	    }
1112 	    else if (tok==VP_BACKQUOTE) {
1113 		m_strify += "\\\"";
1114 		goto next_tok;
1115 	    }
1116 	    else if (tok==VP_DEFREF) {
1117 		// Spec says to expand macros inside `"
1118 		// Substitue it into the stream, then return here
1119 		break;
1120 	    }
1121 	    else {
1122 		// Append token to eventual string
1123 		m_strify.append(yyourtext(),yyourleng());
1124 		goto next_tok;
1125 	    }
1126 	}
1127 	default: fatalSrc("Bad case\n");
1128 	}
1129 	// Default is to do top level expansion of some tokens
1130 	switch (tok) {
1131 	case VP_INCLUDE:
1132 	    if (!m_off) {
1133 		statePush(ps_INCNAME);
1134 	    } // Else incname looks like normal text, that will be ignored
1135 	    goto next_tok;
1136 	case VP_UNDEF:
1137 	    statePush(ps_DEFNAME_UNDEF);
1138 	    goto next_tok;
1139 	case VP_DEFINE:
1140 	    // No m_off check here, as a `ifdef NEVER `define FOO(`endif)  should work
1141 	    statePush(ps_DEFNAME_DEFINE);
1142 	    goto next_tok;
1143 	case VP_IFDEF:
1144 	    statePush(ps_DEFNAME_IFDEF);
1145 	    goto next_tok;
1146 	case VP_IFNDEF:
1147 	    statePush(ps_DEFNAME_IFNDEF);
1148 	    goto next_tok;
1149 	case VP_ELSIF:
1150 	    statePush(ps_DEFNAME_ELSIF);
1151 	    goto next_tok;
1152 	case VP_ELSE:
1153 	    if (m_ifdefStack.empty()) {
1154 		error("`else with no matching `if\n");
1155 	    } else {
1156 		VPreIfEntry lastIf = m_ifdefStack.top(); m_ifdefStack.pop();
1157 		bool enable = !lastIf.everOn();
1158 		if (debug()>=5) cout<<"Else "<<(enable?" ON":" OFF")<<endl;
1159 		m_ifdefStack.push(VPreIfEntry(enable, lastIf.everOn()));
1160 		if (!lastIf.on()) parsingOn();
1161 		if (!enable) parsingOff();
1162 	    }
1163 	    goto next_tok;
1164 	case VP_ENDIF:
1165 	    if (debug()>=5) cout<<"Endif "<<endl;
1166 	    if (m_ifdefStack.empty()) {
1167 		error("`endif with no matching `if\n");
1168 	    } else {
1169 		VPreIfEntry lastIf = m_ifdefStack.top(); m_ifdefStack.pop();
1170 		if (!lastIf.on()) parsingOn();
1171 		// parsingOn() really only enables parsing if
1172 		// all ifdef's above this want it on
1173 	    }
1174 	    goto next_tok;
1175 
1176 	case VP_DEFREF: {
1177 	    // m_off not right here, but inside substitution, to make this work: `ifdef NEVER `DEFUN(`endif)
1178 	    string name(yyourtext()+1,yyourleng()-1);
1179 	    if (debug()>=5) cout<<"DefRef "<<name<<endl;
1180 	    if (m_defPutJoin) { m_defPutJoin = false; unputString("``"); }
1181 	    if (m_defDepth++ > VPreProc::DEFINE_RECURSION_LEVEL_MAX) {
1182 		error("Recursive `define substitution: `"+name);
1183 		goto next_tok;
1184 	    }
1185 	    // Substitute
1186 	    string params = m_preprocp->defParams(name);
1187 	    if (params=="") {   // Not found, return original string as-is
1188 		m_defDepth = 0;
1189 		if (debug()>=5) cout<<"Defref `"<<name<<" => not_defined"<<endl;
1190 		if (m_off) {
1191 		    goto next_tok;
1192 		} else {
1193                     unputDefrefString(string("`\032") + name);
1194                     goto next_tok;
1195 		}
1196 	    }
1197 	    else if (params=="0") {  // Found, as simple substitution
1198 		string out;
1199 		if (!m_off) {
1200 		    VPreDefRef tempref(name, "");
1201 		    out = defineSubst(&tempref);
1202 		}
1203 		// Similar code in parenthesized define (Search for END_OF_DEFARG)
1204 		out = m_preprocp->defSubstitute(out);
1205 		if (m_defRefs.empty()) {
1206 		    // Just output the substitution
1207 		    if (state() == ps_JOIN) {  // Handle {left}```FOO where `FOO might be empty
1208 			if (m_joinStack.empty()) fatalSrc("`` join stack empty, but in a ``");
1209 			string lhs = m_joinStack.top(); m_joinStack.pop();
1210 			out = lhs+out;
1211 			if (debug()>=5) cout<<"``-end-defref Out:"<<out<<endl;
1212 			statePop();
1213 		    }
1214 		    if (!m_off) unputDefrefString(out);
1215 		    // Prevent problem when EMPTY="" in `ifdef NEVER `define `EMPTY
1216 		    else if (stateIsDefname()) unputDefrefString("__IF_OFF_IGNORED_DEFINE");
1217 		} else {
1218 		    // Inside another define.
1219 		    // Can't subst now, or
1220 		    // `define a x,y
1221 		    // foo(`a,`b)  would break because a contains comma
1222 		    VPreDefRef* refp = &(m_defRefs.top());
1223 		    refp->nextarg(refp->nextarg()+m_lexp->m_defValue+out); m_lexp->m_defValue="";
1224 		}
1225 		goto next_tok;
1226 	    }
1227 	    else {  // Found, with parameters
1228 		if (debug()>=5) cout<<"Defref `"<<name<<" => parametrized"<<endl;
1229 		// The CURRENT macro needs the paren saved, it's not a property of the child macro
1230 		if (!m_defRefs.empty()) m_defRefs.top().parenLevel(m_lexp->m_parenLevel);
1231 		m_defRefs.push(VPreDefRef(name, params));
1232 		statePush(ps_DEFPAREN);
1233 		m_lexp->pushStateDefArg(0);
1234 		goto next_tok;
1235 	    }
1236 	    fatalSrc("Bad case\n");
1237 	}
1238 	case VP_ERROR: {
1239 	    statePush(ps_ERRORNAME);
1240 	    goto next_tok;
1241 	}
1242 	case VP_EOF:
1243 	    if (!m_ifdefStack.empty()) {
1244 		error("`ifdef not terminated at EOF\n");
1245 	    }
1246 	    buf = string(yyourtext(), yyourleng());
1247 	    return tok;
1248 	case VP_UNDEFINEALL:
1249 	    if (!m_off) {
1250 		if (debug()>=5) cout<<"Undefineall "<<endl;
1251 		m_preprocp->undefineall();
1252 	    }
1253 	    goto next_tok;
1254 	case VP_STRIFY:
1255 	    // We must expand macros in the body of the stringification
1256 	    // Then, when done, form a final string to return
1257 	    // (it could be used as a include filename, for example, so need the string token)
1258 	    statePush(ps_STRIFY);
1259 	    goto next_tok;
1260 	case VP_SYMBOL:
1261 	case VP_STRING:
1262 	case VP_PSL:
1263 	case VP_TEXT: {
1264 	    m_defDepth = 0;
1265 	    if (!m_off) {
1266 		buf = string(yyourtext(), yyourleng());
1267 		return tok;
1268 	    }
1269 	    else goto next_tok;
1270 	}
1271 	case VP_WHITE:		// Handled at top of loop
1272 	case VP_COMMENT:	// Handled at top of loop
1273 	case VP_DEFFORM:	// Handled by state=ps_DEFFORM;
1274 	case VP_DEFVALUE:	// Handled by state=ps_DEFVALUE;
1275 	default:
1276 	    fatalSrc((string)"Internal error: Unexpected token "+tokenName(tok)+"\n");
1277 	    break;
1278 	}
1279 	buf = string(yyourtext(), yyourleng());
1280 	return tok;
1281     }
1282 }
1283 
getFinalToken(string & buf)1284 int VPreProcImp::getFinalToken(string& buf) {
1285     // Return the next user-visible token in the input stream.
1286     // Includes and such are handled here, and are never seen by the caller.
1287     if (!m_finAhead) {
1288 	m_finAhead = true;
1289 	m_finToken = getStateToken(m_finBuf);
1290     }
1291     int tok = m_finToken;
1292     buf = m_finBuf;
1293     if (0 && debug()>=5) {
1294 	string bufcln = VPreLex::cleanDbgStrg(buf);
1295 	fprintf(stderr,"%d: FIN:      %-10s: %s\n",
1296 		m_lexp->m_tokFilelinep->lineno(), tokenName(tok), bufcln.c_str());
1297     }
1298     // Track `line
1299     const char* bufp = buf.c_str();
1300     while (*bufp == '\n') bufp++;
1301     if ((tok == VP_TEXT || tok == VP_LINE) && 0==strncmp(bufp,"`line ",6)) {
1302 	int enter;
1303 	m_finFilelinep = m_finFilelinep->lineDirective(bufp, enter/*ref*/);
1304     }
1305     else {
1306 	if (m_finAtBol && !(tok==VP_TEXT && buf=="\n")
1307 	    && m_preprocp->lineDirectives()) {
1308 	    if (int outBehind = m_lexp->m_tokFilelinep->lineno() - m_finFilelinep->lineno()) {
1309 		if (debug()>=5) fprintf(stderr,"%d: FIN: readjust, fin at %d  request at %d\n",
1310 					m_lexp->m_tokFilelinep->lineno(),
1311 					m_finFilelinep->lineno(), m_lexp->m_tokFilelinep->lineno());
1312 		m_finFilelinep = m_finFilelinep->create(m_lexp->m_tokFilelinep->filename(),m_lexp->m_tokFilelinep->lineno());
1313 		if (outBehind > 0 && outBehind <= (int)VPreProc::NEWLINES_VS_TICKLINE) {
1314 		    // Output stream is behind, send newlines to get back in sync
1315 		    // (Most likely because we're completing a disabled `endif)
1316 		    if (m_preprocp->keepWhitespace()) {
1317 			buf = string(outBehind,'\n');
1318 			return VP_TEXT;
1319 		    }
1320 		} else {
1321 		    // Need to backup, use `line
1322 		    buf = m_finFilelinep->lineDirectiveStrg(0);
1323 		    return VP_LINE;
1324 		}
1325 	    }
1326 	}
1327 	// Track newlines in prep for next token
1328 	for (string::iterator cp=buf.begin(); cp!=buf.end(); ++cp) {
1329 	    if (*cp == '\n') {
1330 		m_finAtBol = true;
1331 		m_finFilelinep->linenoIncInPlace();  // Increment in place to avoid new/delete calls.  It's private data.
1332 	    } else {
1333 		m_finAtBol = false;
1334 	    }
1335 	}
1336     }
1337     m_finAhead = false;  // Consumed the token
1338     return tok;
1339 }
1340 
getparseline(bool stop_at_eol,size_t approx_chunk)1341 string VPreProcImp::getparseline(bool stop_at_eol, size_t approx_chunk) {
1342     // Get a single line from the parse stream.  Buffer unreturned text until the newline.
1343     if (isEof()) return "";
1344     while (1) {
1345 	const char* rtnp = NULL;
1346 	bool gotEof = false;
1347 	while ((stop_at_eol
1348 		? (NULL==(rtnp=strchr(m_lineChars.c_str(),'\n')))
1349 		: (approx_chunk==0 || (m_lineChars.length() < approx_chunk)))
1350 	       && !gotEof) {
1351 	    string buf;
1352 	    int tok = getFinalToken(buf/*ref*/);
1353 	    if (debug()>=5) {
1354 		string bufcln = VPreLex::cleanDbgStrg(buf);
1355 		fprintf(stderr,"%d: GETFETC:  %-10s: %s\n",
1356 			m_lexp->m_tokFilelinep->lineno(), tokenName(tok), bufcln.c_str());
1357 	    }
1358 	    if (tok==VP_EOF) {
1359 		// Add a final newline, if the user forgot the final \n.
1360 		if (m_lineChars != "" && m_lineChars[m_lineChars.length()-1] != '\n') {
1361 		    m_lineChars.append("\n");
1362 		}
1363 		gotEof = true;
1364 	    }
1365 	    else if (tok==VP_PSL) {
1366 		m_lineChars.append(" psl ");
1367 	    }
1368 	    else {
1369 		m_lineChars.append(buf);
1370 	    }
1371 	}
1372 
1373 	// Make new string with data up to the newline.
1374 	size_t len = stop_at_eol ? (rtnp-m_lineChars.c_str()+1) : m_lineChars.length();
1375 	string theLine(m_lineChars, 0, len);
1376 	m_lineChars = m_lineChars.erase(0,len);	// Remove returned characters
1377 
1378 	if (!m_preprocp->keepWhitespace() && !gotEof) {
1379 	    const char* cp=theLine.c_str();
1380 	    for (; *cp && (isspace(*cp) || *cp=='\n'); cp++) {}
1381 	    if (!*cp) continue;
1382 	}
1383 
1384 	if (debug()>=4) {
1385 	    string lncln = VPreLex::cleanDbgStrg(theLine);
1386 	    fprintf(stderr,"%d: GETLINE:  %s\n",
1387 		    m_lexp->m_tokFilelinep->lineno(), lncln.c_str());
1388 	}
1389 	return theLine;
1390     }
1391 }
1392