xref: /original-bsd/usr.bin/m4/main.c (revision 4ec22e22)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Ozan Yigit.
7  *
8  * %sccs.include.redist.c%
9  */
10 
11 #ifndef lint
12 static char sccsid[] = "@(#)main.c	5.4 (Berkeley) 06/01/90";
13 #endif /* not lint */
14 
15 /*
16  * main.c
17  * Facility: m4 macro processor
18  * by: oz
19  */
20 
21 #include "mdef.h"
22 
23 /*
24  * m4 - macro processor
25  *
26  * PD m4 is based on the macro tool distributed with the software
27  * tools (VOS) package, and described in the "SOFTWARE TOOLS" and
28  * "SOFTWARE TOOLS IN PASCAL" books. It has been expanded to include
29  * most of the command set of SysV m4, the standard UN*X macro processor.
30  *
31  * Since both PD m4 and UN*X m4 are based on SOFTWARE TOOLS macro,
32  * there may be certain implementation similarities between
33  * the two. The PD m4 was produced without ANY references to m4
34  * sources.
35  *
36  * References:
37  *
38  *	Software Tools distribution: macro
39  *
40  *	Kernighan, Brian W. and P. J. Plauger, SOFTWARE
41  *	TOOLS IN PASCAL, Addison-Wesley, Mass. 1981
42  *
43  *	Kernighan, Brian W. and P. J. Plauger, SOFTWARE
44  *	TOOLS, Addison-Wesley, Mass. 1976
45  *
46  *	Kernighan, Brian W. and Dennis M. Ritchie,
47  *	THE M4 MACRO PROCESSOR, Unix Programmer's Manual,
48  *	Seventh Edition, Vol. 2, Bell Telephone Labs, 1979
49  *
50  *	System V man page for M4
51  *
52  * Modification History:
53  *
54  * Jan 28 1986 Oz	Break the whole thing into little
55  *			pieces, for easier (?) maintenance.
56  *
57  * Dec 12 1985 Oz	Optimize the code, try to squeeze
58  *			few microseconds out..
59  *
60  * Dec 05 1985 Oz	Add getopt interface, define (-D),
61  *			undefine (-U) options.
62  *
63  * Oct 21 1985 Oz	Clean up various bugs, add comment handling.
64  *
65  * June 7 1985 Oz	Add some of SysV m4 stuff (m4wrap, pushdef,
66  *			popdef, decr, shift etc.).
67  *
68  * June 5 1985 Oz	Initial cut.
69  *
70  * Implementation Notes:
71  *
72  * [1]	PD m4 uses a different (and simpler) stack mechanism than the one
73  *	described in Software Tools and Software Tools in Pascal books.
74  *	The triple stack nonsense is replaced with a single stack containing
75  *	the call frames and the arguments. Each frame is back-linked to a
76  * 	previous stack frame, which enables us to rewind the stack after
77  * 	each nested call is completed. Each argument is a character pointer
78  *	to the beginning of the argument string within the string space.
79  *	The only exceptions to this are (*) arg 0 and arg 1, which are
80  * 	the macro definition and macro name strings, stored dynamically
81  *	for the hash table.
82  *
83  *	    .					   .
84  *	|   .	|  <-- sp			|  .  |
85  *	+-------+				+-----+
86  *	| arg 3 ------------------------------->| str |
87  *	+-------+				|  .  |
88  *	| arg 2 --------------+ 		   .
89  *	+-------+	      |
90  *	    *		      |			|     |
91  *	+-------+	      | 		+-----+
92  *	| plev	|  <-- fp     +---------------->| str |
93  *	+-------+				|  .  |
94  *	| type	|				   .
95  *	+-------+
96  *	| prcf	-----------+		plev: paren level
97  *	+-------+  	   |		type: call type
98  *	|   .	| 	   |		prcf: prev. call frame
99  *	    .	   	   |
100  *	+-------+	   |
101  *	|	<----------+
102  *	+-------+
103  *
104  * [2]	We have three types of null values:
105  *
106  *		nil  - nodeblock pointer type 0
107  *		null - null string ("")
108  *		NULL - Stdio-defined NULL
109  *
110  */
111 
112 ndptr hashtab[HASHSIZE];	/* hash table for macros etc.  */
113 char buf[BUFSIZE];		/* push-back buffer	       */
114 char *bp = buf; 		/* first available character   */
115 char *endpbb = buf+BUFSIZE;	/* end of push-back buffer     */
116 stae mstack[STACKMAX+1]; 	/* stack of m4 machine         */
117 char strspace[STRSPMAX+1];	/* string space for evaluation */
118 char *ep = strspace;		/* first free char in strspace */
119 char *endest= strspace+STRSPMAX;/* end of string space	       */
120 int sp; 			/* current m4  stack pointer   */
121 int fp; 			/* m4 call frame pointer       */
122 FILE *infile[MAXINP];		/* input file stack (0=stdin)  */
123 FILE *outfile[MAXOUT];		/* diversion array(0=bitbucket)*/
124 FILE *active;			/* active output file pointer  */
125 char *m4temp;			/* filename for diversions     */
126 int ilevel = 0; 		/* input file stack pointer    */
127 int oindex = 0; 		/* diversion index..	       */
128 char *null = "";                /* as it says.. just a null..  */
129 char *m4wraps = "";             /* m4wrap string default..     */
130 char lquote = LQUOTE;		/* left quote character  (`)   */
131 char rquote = RQUOTE;		/* right quote character (')   */
132 char scommt = SCOMMT;		/* start character for comment */
133 char ecommt = ECOMMT;		/* end character for comment   */
134 struct keyblk keywrds[] = {	/* m4 keywords to be installed */
135 	"include",      INCLTYPE,
136 	"sinclude",     SINCTYPE,
137 	"define",       DEFITYPE,
138 	"defn",         DEFNTYPE,
139 	"divert",       DIVRTYPE,
140 	"expr",         EXPRTYPE,
141 	"eval",         EXPRTYPE,
142 	"substr",       SUBSTYPE,
143 	"ifelse",       IFELTYPE,
144 	"ifdef",        IFDFTYPE,
145 	"len",          LENGTYPE,
146 	"incr",         INCRTYPE,
147 	"decr",         DECRTYPE,
148 	"dnl",          DNLNTYPE,
149 	"changequote",  CHNQTYPE,
150 	"changecom",    CHNCTYPE,
151 	"index",        INDXTYPE,
152 #ifdef EXTENDED
153 	"paste",        PASTTYPE,
154 	"spaste",       SPASTYPE,
155 #endif
156 	"popdef",       POPDTYPE,
157 	"pushdef",      PUSDTYPE,
158 	"dumpdef",      DUMPTYPE,
159 	"shift",        SHIFTYPE,
160 	"translit",     TRNLTYPE,
161 	"undefine",     UNDFTYPE,
162 	"undivert",     UNDVTYPE,
163 	"divnum",       DIVNTYPE,
164 	"maketemp",     MKTMTYPE,
165 	"errprint",     ERRPTYPE,
166 	"m4wrap",       M4WRTYPE,
167 	"m4exit",       EXITTYPE,
168 	"syscmd",       SYSCTYPE,
169 	"sysval",       SYSVTYPE,
170 	"unix",         MACRTYPE,
171 };
172 
173 #define MAXKEYS	(sizeof(keywrds)/sizeof(struct keyblk))
174 
175 extern ndptr lookup();
176 extern ndptr addent();
177 extern int onintr();
178 
179 extern char *malloc();
180 extern char *mktemp();
181 
182 extern int optind;
183 extern char *optarg;
184 
185 main(argc,argv)
186 char *argv[];
187 {
188 	register int c;
189 	register int n;
190 	char *p;
191 
192 	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
193 		signal(SIGINT, onintr);
194 #ifdef NONZEROPAGES
195 	initm4();
196 #endif
197 	initkwds();
198 
199 	while ((c = getopt(argc, argv, "tD:U:o:")) != EOF)
200 		switch(c) {
201 
202 		case 'D':               /* define something..*/
203 			for (p = optarg; *p; p++)
204 				if (*p == '=')
205 					break;
206 			if (*p)
207 				*p++ = EOS;
208 			dodefine(optarg, p);
209 			break;
210 		case 'U':               /* undefine...       */
211 			remhash(optarg, TOP);
212 			break;
213 		case 'o':		/* specific output   */
214 		case '?':
215 		default:
216 			usage();
217 		}
218 
219 	infile[0] = stdin;		/* default input (naturally) */
220 	active = stdout;		/* default active output     */
221 	m4temp = mktemp(DIVNAM);	/* filename for diversions   */
222 
223 	sp = -1;			/* stack pointer initialized */
224 	fp = 0; 			/* frame pointer initialized */
225 
226 	macro();			/* get some work done here   */
227 
228 	if (*m4wraps) { 		/* anything for rundown ??   */
229 		ilevel = 0;		/* in case m4wrap includes.. */
230 		putback(EOF);		/* eof is a must !!	     */
231 		pbstr(m4wraps); 	/* user-defined wrapup act   */
232 		macro();		/* last will and testament   */
233 	}
234 
235 	if (active != stdout)
236 		active = stdout;	/* reset output just in case */
237 	for (n = 1; n < MAXOUT; n++)	/* default wrap-up: undivert */
238 		if (outfile[n] != NULL)
239 			getdiv(n);
240 					/* remove bitbucket if used  */
241 	if (outfile[0] != NULL) {
242 		(void) fclose(outfile[0]);
243 		m4temp[UNIQUE] = '0';
244 		(void) unlink(m4temp);
245 	}
246 
247 	exit(0);
248 }
249 
250 ndptr inspect();	/* forward ... */
251 
252 /*
253  * macro - the work horse..
254  *
255  */
256 macro() {
257 	char token[MAXTOK];
258 	register char *s;
259 	register int t, l;
260 	register ndptr p;
261 	register int  nlpar;
262 
263 	cycle {
264 		if ((t = gpbc()) == '_' || isalpha(t)) {
265 			putback(t);
266 			if ((p = inspect(s = token)) == nil) {
267 				if (sp < 0)
268 					while (*s)
269 						putc(*s++, active);
270 				else
271 					while (*s)
272 						chrsave(*s++);
273 			}
274 			else {
275 		/*
276 		 * real thing.. First build a call frame:
277 		 *
278 		 */
279 				pushf(fp);	/* previous call frm */
280 				pushf(p->type); /* type of the call  */
281 				pushf(0);	/* parenthesis level */
282 				fp = sp;	/* new frame pointer */
283 		/*
284 		 * now push the string arguments:
285 		 *
286 		 */
287 				pushs(p->defn);	      /* defn string */
288 				pushs(p->name);	      /* macro name  */
289 				pushs(ep);	      /* start next..*/
290 
291 				putback(l = gpbc());
292 				if (l != LPAREN)  {   /* add bracks  */
293 					putback(RPAREN);
294 					putback(LPAREN);
295 				}
296 			}
297 		}
298 		else if (t == EOF) {
299 			if (sp > -1)
300 				error("m4: unexpected end of input");
301 			if (--ilevel < 0)
302 				break;			/* all done thanks.. */
303 			(void) fclose(infile[ilevel+1]);
304 			continue;
305 		}
306 	/*
307 	 * non-alpha single-char token seen..
308 	 * [the order of else if .. stmts is
309 	 * important.]
310 	 *
311 	 */
312 		else if (t == lquote) { 		/* strip quotes */
313 			nlpar = 1;
314 			do {
315 				if ((l = gpbc()) == rquote)
316 					nlpar--;
317 				else if (l == lquote)
318 					nlpar++;
319 				else if (l == EOF)
320 					error("m4: missing right quote");
321 				if (nlpar > 0) {
322 					if (sp < 0)
323 						putc(l, active);
324 					else
325 						chrsave(l);
326 				}
327 			}
328 			while (nlpar != 0);
329 		}
330 
331 		else if (sp < 0) {		/* not in a macro at all */
332 			if (t == scommt) {	/* comment handling here */
333 				putc(t, active);
334 				while ((t = gpbc()) != ecommt)
335 					putc(t, active);
336 			}
337 			putc(t, active);	/* output directly..	 */
338 		}
339 
340 		else switch(t) {
341 
342 		case LPAREN:
343 			if (PARLEV > 0)
344 				chrsave(t);
345 			while (isspace(l = gpbc()))
346 				;		/* skip blank, tab, nl.. */
347 			putback(l);
348 			PARLEV++;
349 			break;
350 
351 		case RPAREN:
352 			if (--PARLEV > 0)
353 				chrsave(t);
354 			else {			/* end of argument list */
355 				chrsave(EOS);
356 
357 				if (sp == STACKMAX)
358 					error("m4: internal stack overflow");
359 
360 				if (CALTYP == MACRTYPE)
361 					expand(mstack+fp+1, sp-fp);
362 				else
363 					eval(mstack+fp+1, sp-fp, CALTYP);
364 
365 				ep = PREVEP;	/* flush strspace */
366 				sp = PREVSP;	/* previous sp..  */
367 				fp = PREVFP;	/* rewind stack...*/
368 			}
369 			break;
370 
371 		case COMMA:
372 			if (PARLEV == 1)	{
373 				chrsave(EOS);		/* new argument   */
374 				while (isspace(l = gpbc()))
375 					;
376 				putback(l);
377 				pushs(ep);
378 			}
379 			break;
380 		default:
381 			chrsave(t);			/* stack the char */
382 			break;
383 		}
384 	}
385 }
386 
387 
388 /*
389  * build an input token..
390  * consider only those starting with _ or A-Za-z. This is a
391  * combo with lookup to speed things up.
392  */
393 ndptr
394 inspect(tp)
395 register char *tp;
396 {
397 	register int h = 0;
398 	register char c;
399 	register char *name = tp;
400 	register char *etp = tp+MAXTOK;
401 	register ndptr p;
402 
403 	while (tp < etp && (isalnum(c = gpbc()) || c == '_'))
404 		h += (*tp++ = c);
405 	putback(c);
406 	if (tp == etp)
407 		error("m4: token too long");
408 	*tp = EOS;
409 	for (p = hashtab[h%HASHSIZE]; p != nil; p = p->nxtptr)
410 		if (strcmp(name, p->name) == 0)
411 			break;
412 	return(p);
413 }
414 
415 #ifdef NONZEROPAGES
416 /*
417  * initm4 - initialize various tables. Useful only if your system
418  * does not know anything about demand-zero pages.
419  *
420  */
421 initm4()
422 {
423 	register int i;
424 
425 	for (i = 0; i < HASHSIZE; i++)
426 		hashtab[i] = nil;
427 	for (i = 0; i < MAXOUT; i++)
428 		outfile[i] = NULL;
429 }
430 #endif
431 
432 /*
433  * initkwds - initialise m4 keywords as fast as possible.
434  * This very similar to install, but without certain overheads,
435  * such as calling lookup. Malloc is not used for storing the
436  * keyword strings, since we simply use the static  pointers
437  * within keywrds block. We also assume that there is enough memory
438  * to at least install the keywords (i.e. malloc won't fail).
439  *
440  */
441 initkwds() {
442 	register int i;
443 	register int h;
444 	register ndptr p;
445 
446 	for (i = 0; i < MAXKEYS; i++) {
447 		h = hash(keywrds[i].knam);
448 		p = (ndptr) malloc(sizeof(struct ndblock));
449 		p->nxtptr = hashtab[h];
450 		hashtab[h] = p;
451 		p->name = keywrds[i].knam;
452 		p->defn = null;
453 		p->type = keywrds[i].ktyp | STATIC;
454 	}
455 }
456