xref: /original-bsd/usr.bin/m4/main.c (revision 28544557)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Ozan Yigit.
7  *
8  * %sccs.include.redist.c%
9  */
10 
11 #ifndef lint
12 static char sccsid[] = "@(#)main.c	5.5 (Berkeley) 02/26/91";
13 #endif /* not lint */
14 
15 /*
16  * main.c
17  * Facility: m4 macro processor
18  * by: oz
19  */
20 
21 #include <signal.h>
22 #include <unistd.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include "mdef.h"
27 #include "pathnames.h"
28 
29 /*
30  * m4 - macro processor
31  *
32  * PD m4 is based on the macro tool distributed with the software
33  * tools (VOS) package, and described in the "SOFTWARE TOOLS" and
34  * "SOFTWARE TOOLS IN PASCAL" books. It has been expanded to include
35  * most of the command set of SysV m4, the standard UN*X macro processor.
36  *
37  * Since both PD m4 and UN*X m4 are based on SOFTWARE TOOLS macro,
38  * there may be certain implementation similarities between
39  * the two. The PD m4 was produced without ANY references to m4
40  * sources.
41  *
42  * References:
43  *
44  *	Software Tools distribution: macro
45  *
46  *	Kernighan, Brian W. and P. J. Plauger, SOFTWARE
47  *	TOOLS IN PASCAL, Addison-Wesley, Mass. 1981
48  *
49  *	Kernighan, Brian W. and P. J. Plauger, SOFTWARE
50  *	TOOLS, Addison-Wesley, Mass. 1976
51  *
52  *	Kernighan, Brian W. and Dennis M. Ritchie,
53  *	THE M4 MACRO PROCESSOR, Unix Programmer's Manual,
54  *	Seventh Edition, Vol. 2, Bell Telephone Labs, 1979
55  *
56  *	System V man page for M4
57  *
58  * Modification History:
59  *
60  * Jan 28 1986 Oz	Break the whole thing into little
61  *			pieces, for easier (?) maintenance.
62  *
63  * Dec 12 1985 Oz	Optimize the code, try to squeeze
64  *			few microseconds out..
65  *
66  * Dec 05 1985 Oz	Add getopt interface, define (-D),
67  *			undefine (-U) options.
68  *
69  * Oct 21 1985 Oz	Clean up various bugs, add comment handling.
70  *
71  * June 7 1985 Oz	Add some of SysV m4 stuff (m4wrap, pushdef,
72  *			popdef, decr, shift etc.).
73  *
74  * June 5 1985 Oz	Initial cut.
75  *
76  * Implementation Notes:
77  *
78  * [1]	PD m4 uses a different (and simpler) stack mechanism than the one
79  *	described in Software Tools and Software Tools in Pascal books.
80  *	The triple stack nonsense is replaced with a single stack containing
81  *	the call frames and the arguments. Each frame is back-linked to a
82  * 	previous stack frame, which enables us to rewind the stack after
83  * 	each nested call is completed. Each argument is a character pointer
84  *	to the beginning of the argument string within the string space.
85  *	The only exceptions to this are (*) arg 0 and arg 1, which are
86  * 	the macro definition and macro name strings, stored dynamically
87  *	for the hash table.
88  *
89  *	    .					   .
90  *	|   .	|  <-- sp			|  .  |
91  *	+-------+				+-----+
92  *	| arg 3 ------------------------------->| str |
93  *	+-------+				|  .  |
94  *	| arg 2 --------------+ 		   .
95  *	+-------+	      |
96  *	    *		      |			|     |
97  *	+-------+	      | 		+-----+
98  *	| plev	|  <-- fp     +---------------->| str |
99  *	+-------+				|  .  |
100  *	| type	|				   .
101  *	+-------+
102  *	| prcf	-----------+		plev: paren level
103  *	+-------+  	   |		type: call type
104  *	|   .	| 	   |		prcf: prev. call frame
105  *	    .	   	   |
106  *	+-------+	   |
107  *	|	<----------+
108  *	+-------+
109  *
110  * [2]	We have three types of null values:
111  *
112  *		nil  - nodeblock pointer type 0
113  *		null - null string ("")
114  *		NULL - Stdio-defined NULL
115  *
116  */
117 
118 ndptr hashtab[HASHSIZE];	/* hash table for macros etc.  */
119 char buf[BUFSIZE];		/* push-back buffer	       */
120 char *bp = buf; 		/* first available character   */
121 char *endpbb = buf+BUFSIZE;	/* end of push-back buffer     */
122 stae mstack[STACKMAX+1]; 	/* stack of m4 machine         */
123 char strspace[STRSPMAX+1];	/* string space for evaluation */
124 char *ep = strspace;		/* first free char in strspace */
125 char *endest= strspace+STRSPMAX;/* end of string space	       */
126 int sp; 			/* current m4  stack pointer   */
127 int fp; 			/* m4 call frame pointer       */
128 FILE *infile[MAXINP];		/* input file stack (0=stdin)  */
129 FILE *outfile[MAXOUT];		/* diversion array(0=bitbucket)*/
130 FILE *active;			/* active output file pointer  */
131 char *m4temp;			/* filename for diversions     */
132 int ilevel = 0; 		/* input file stack pointer    */
133 int oindex = 0; 		/* diversion index..	       */
134 char *null = "";                /* as it says.. just a null..  */
135 char *m4wraps = "";             /* m4wrap string default..     */
136 char lquote = LQUOTE;		/* left quote character  (`)   */
137 char rquote = RQUOTE;		/* right quote character (')   */
138 char scommt = SCOMMT;		/* start character for comment */
139 char ecommt = ECOMMT;		/* end character for comment   */
140 struct keyblk keywrds[] = {	/* m4 keywords to be installed */
141 	"include",      INCLTYPE,
142 	"sinclude",     SINCTYPE,
143 	"define",       DEFITYPE,
144 	"defn",         DEFNTYPE,
145 	"divert",       DIVRTYPE,
146 	"expr",         EXPRTYPE,
147 	"eval",         EXPRTYPE,
148 	"substr",       SUBSTYPE,
149 	"ifelse",       IFELTYPE,
150 	"ifdef",        IFDFTYPE,
151 	"len",          LENGTYPE,
152 	"incr",         INCRTYPE,
153 	"decr",         DECRTYPE,
154 	"dnl",          DNLNTYPE,
155 	"changequote",  CHNQTYPE,
156 	"changecom",    CHNCTYPE,
157 	"index",        INDXTYPE,
158 #ifdef EXTENDED
159 	"paste",        PASTTYPE,
160 	"spaste",       SPASTYPE,
161 #endif
162 	"popdef",       POPDTYPE,
163 	"pushdef",      PUSDTYPE,
164 	"dumpdef",      DUMPTYPE,
165 	"shift",        SHIFTYPE,
166 	"translit",     TRNLTYPE,
167 	"undefine",     UNDFTYPE,
168 	"undivert",     UNDVTYPE,
169 	"divnum",       DIVNTYPE,
170 	"maketemp",     MKTMTYPE,
171 	"errprint",     ERRPTYPE,
172 	"m4wrap",       M4WRTYPE,
173 	"m4exit",       EXITTYPE,
174 	"syscmd",       SYSCTYPE,
175 	"sysval",       SYSVTYPE,
176 	"unix",         MACRTYPE,
177 };
178 
179 #define MAXKEYS	(sizeof(keywrds)/sizeof(struct keyblk))
180 
181 extern ndptr lookup();
182 extern ndptr addent();
183 extern void onintr();
184 
185 extern int optind;
186 extern char *optarg;
187 
188 main(argc,argv)
189 	int argc;
190 	char **argv;
191 {
192 	register int c;
193 	register int n;
194 	char *p;
195 
196 	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
197 		signal(SIGINT, onintr);
198 #ifdef NONZEROPAGES
199 	initm4();
200 #endif
201 	initkwds();
202 
203 	while ((c = getopt(argc, argv, "tD:U:o:")) != EOF)
204 		switch(c) {
205 
206 		case 'D':               /* define something..*/
207 			for (p = optarg; *p; p++)
208 				if (*p == '=')
209 					break;
210 			if (*p)
211 				*p++ = EOS;
212 			dodefine(optarg, p);
213 			break;
214 		case 'U':               /* undefine...       */
215 			remhash(optarg, TOP);
216 			break;
217 		case 'o':		/* specific output   */
218 		case '?':
219 		default:
220 			usage();
221 		}
222 
223 	infile[0] = stdin;		/* default input (naturally) */
224 	active = stdout;		/* default active output     */
225 	m4temp = mktemp(DIVNAM);	/* filename for diversions   */
226 
227 	sp = -1;			/* stack pointer initialized */
228 	fp = 0; 			/* frame pointer initialized */
229 
230 	macro();			/* get some work done here   */
231 
232 	if (*m4wraps) { 		/* anything for rundown ??   */
233 		ilevel = 0;		/* in case m4wrap includes.. */
234 		putback(EOF);		/* eof is a must !!	     */
235 		pbstr(m4wraps); 	/* user-defined wrapup act   */
236 		macro();		/* last will and testament   */
237 	}
238 
239 	if (active != stdout)
240 		active = stdout;	/* reset output just in case */
241 	for (n = 1; n < MAXOUT; n++)	/* default wrap-up: undivert */
242 		if (outfile[n] != NULL)
243 			getdiv(n);
244 					/* remove bitbucket if used  */
245 	if (outfile[0] != NULL) {
246 		(void) fclose(outfile[0]);
247 		m4temp[UNIQUE] = '0';
248 		(void) unlink(m4temp);
249 	}
250 
251 	exit(0);
252 }
253 
254 ndptr inspect();	/* forward ... */
255 
256 /*
257  * macro - the work horse..
258  *
259  */
260 macro() {
261 	char token[MAXTOK];
262 	register char *s;
263 	register int t, l;
264 	register ndptr p;
265 	register int  nlpar;
266 
267 	cycle {
268 		if ((t = gpbc()) == '_' || isalpha(t)) {
269 			putback(t);
270 			if ((p = inspect(s = token)) == nil) {
271 				if (sp < 0)
272 					while (*s)
273 						putc(*s++, active);
274 				else
275 					while (*s)
276 						chrsave(*s++);
277 			}
278 			else {
279 		/*
280 		 * real thing.. First build a call frame:
281 		 *
282 		 */
283 				pushf(fp);	/* previous call frm */
284 				pushf(p->type); /* type of the call  */
285 				pushf(0);	/* parenthesis level */
286 				fp = sp;	/* new frame pointer */
287 		/*
288 		 * now push the string arguments:
289 		 *
290 		 */
291 				pushs(p->defn);	      /* defn string */
292 				pushs(p->name);	      /* macro name  */
293 				pushs(ep);	      /* start next..*/
294 
295 				putback(l = gpbc());
296 				if (l != LPAREN)  {   /* add bracks  */
297 					putback(RPAREN);
298 					putback(LPAREN);
299 				}
300 			}
301 		}
302 		else if (t == EOF) {
303 			if (sp > -1)
304 				error("m4: unexpected end of input");
305 			if (--ilevel < 0)
306 				break;			/* all done thanks.. */
307 			(void) fclose(infile[ilevel+1]);
308 			continue;
309 		}
310 	/*
311 	 * non-alpha single-char token seen..
312 	 * [the order of else if .. stmts is
313 	 * important.]
314 	 *
315 	 */
316 		else if (t == lquote) { 		/* strip quotes */
317 			nlpar = 1;
318 			do {
319 				if ((l = gpbc()) == rquote)
320 					nlpar--;
321 				else if (l == lquote)
322 					nlpar++;
323 				else if (l == EOF)
324 					error("m4: missing right quote");
325 				if (nlpar > 0) {
326 					if (sp < 0)
327 						putc(l, active);
328 					else
329 						chrsave(l);
330 				}
331 			}
332 			while (nlpar != 0);
333 		}
334 
335 		else if (sp < 0) {		/* not in a macro at all */
336 			if (t == scommt) {	/* comment handling here */
337 				putc(t, active);
338 				while ((t = gpbc()) != ecommt)
339 					putc(t, active);
340 			}
341 			putc(t, active);	/* output directly..	 */
342 		}
343 
344 		else switch(t) {
345 
346 		case LPAREN:
347 			if (PARLEV > 0)
348 				chrsave(t);
349 			while (isspace(l = gpbc()))
350 				;		/* skip blank, tab, nl.. */
351 			putback(l);
352 			PARLEV++;
353 			break;
354 
355 		case RPAREN:
356 			if (--PARLEV > 0)
357 				chrsave(t);
358 			else {			/* end of argument list */
359 				chrsave(EOS);
360 
361 				if (sp == STACKMAX)
362 					error("m4: internal stack overflow");
363 
364 				if (CALTYP == MACRTYPE)
365 					expand(mstack+fp+1, sp-fp);
366 				else
367 					eval(mstack+fp+1, sp-fp, CALTYP);
368 
369 				ep = PREVEP;	/* flush strspace */
370 				sp = PREVSP;	/* previous sp..  */
371 				fp = PREVFP;	/* rewind stack...*/
372 			}
373 			break;
374 
375 		case COMMA:
376 			if (PARLEV == 1)	{
377 				chrsave(EOS);		/* new argument   */
378 				while (isspace(l = gpbc()))
379 					;
380 				putback(l);
381 				pushs(ep);
382 			}
383 			break;
384 		default:
385 			chrsave(t);			/* stack the char */
386 			break;
387 		}
388 	}
389 }
390 
391 
392 /*
393  * build an input token..
394  * consider only those starting with _ or A-Za-z. This is a
395  * combo with lookup to speed things up.
396  */
397 ndptr
398 inspect(tp)
399 register char *tp;
400 {
401 	register int h = 0;
402 	register char c;
403 	register char *name = tp;
404 	register char *etp = tp+MAXTOK;
405 	register ndptr p;
406 
407 	while (tp < etp && (isalnum(c = gpbc()) || c == '_'))
408 		h += (*tp++ = c);
409 	putback(c);
410 	if (tp == etp)
411 		error("m4: token too long");
412 	*tp = EOS;
413 	for (p = hashtab[h%HASHSIZE]; p != nil; p = p->nxtptr)
414 		if (strcmp(name, p->name) == 0)
415 			break;
416 	return(p);
417 }
418 
419 #ifdef NONZEROPAGES
420 /*
421  * initm4 - initialize various tables. Useful only if your system
422  * does not know anything about demand-zero pages.
423  *
424  */
425 initm4()
426 {
427 	register int i;
428 
429 	for (i = 0; i < HASHSIZE; i++)
430 		hashtab[i] = nil;
431 	for (i = 0; i < MAXOUT; i++)
432 		outfile[i] = NULL;
433 }
434 #endif
435 
436 /*
437  * initkwds - initialise m4 keywords as fast as possible.
438  * This very similar to install, but without certain overheads,
439  * such as calling lookup. Malloc is not used for storing the
440  * keyword strings, since we simply use the static  pointers
441  * within keywrds block. We also assume that there is enough memory
442  * to at least install the keywords (i.e. malloc won't fail).
443  *
444  */
445 initkwds() {
446 	register int i;
447 	register int h;
448 	register ndptr p;
449 
450 	for (i = 0; i < MAXKEYS; i++) {
451 		h = hash(keywrds[i].knam);
452 		p = (ndptr) malloc(sizeof(struct ndblock));
453 		p->nxtptr = hashtab[h];
454 		hashtab[h] = p;
455 		p->name = keywrds[i].knam;
456 		p->defn = null;
457 		p->type = keywrds[i].ktyp | STATIC;
458 	}
459 }
460