xref: /386bsd/usr/src/usr.bin/m4/main.c (revision a2142627)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Ozan Yigit.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 #ifndef lint
38 static char sccsid[] = "@(#)main.c	5.6 (Berkeley) 3/6/91";
39 #endif /* not lint */
40 
41 /*
42  * main.c
43  * Facility: m4 macro processor
44  * by: oz
45  */
46 
47 #include <signal.h>
48 #include <unistd.h>
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <string.h>
52 #include "mdef.h"
53 #include "pathnames.h"
54 
55 /*
56  * m4 - macro processor
57  *
58  * PD m4 is based on the macro tool distributed with the software
59  * tools (VOS) package, and described in the "SOFTWARE TOOLS" and
60  * "SOFTWARE TOOLS IN PASCAL" books. It has been expanded to include
61  * most of the command set of SysV m4, the standard UN*X macro processor.
62  *
63  * Since both PD m4 and UN*X m4 are based on SOFTWARE TOOLS macro,
64  * there may be certain implementation similarities between
65  * the two. The PD m4 was produced without ANY references to m4
66  * sources.
67  *
68  * References:
69  *
70  *	Software Tools distribution: macro
71  *
72  *	Kernighan, Brian W. and P. J. Plauger, SOFTWARE
73  *	TOOLS IN PASCAL, Addison-Wesley, Mass. 1981
74  *
75  *	Kernighan, Brian W. and P. J. Plauger, SOFTWARE
76  *	TOOLS, Addison-Wesley, Mass. 1976
77  *
78  *	Kernighan, Brian W. and Dennis M. Ritchie,
79  *	THE M4 MACRO PROCESSOR, Unix Programmer's Manual,
80  *	Seventh Edition, Vol. 2, Bell Telephone Labs, 1979
81  *
82  *	System V man page for M4
83  *
84  * Modification History:
85  *
86  * Jan 28 1986 Oz	Break the whole thing into little
87  *			pieces, for easier (?) maintenance.
88  *
89  * Dec 12 1985 Oz	Optimize the code, try to squeeze
90  *			few microseconds out..
91  *
92  * Dec 05 1985 Oz	Add getopt interface, define (-D),
93  *			undefine (-U) options.
94  *
95  * Oct 21 1985 Oz	Clean up various bugs, add comment handling.
96  *
97  * June 7 1985 Oz	Add some of SysV m4 stuff (m4wrap, pushdef,
98  *			popdef, decr, shift etc.).
99  *
100  * June 5 1985 Oz	Initial cut.
101  *
102  * Implementation Notes:
103  *
104  * [1]	PD m4 uses a different (and simpler) stack mechanism than the one
105  *	described in Software Tools and Software Tools in Pascal books.
106  *	The triple stack nonsense is replaced with a single stack containing
107  *	the call frames and the arguments. Each frame is back-linked to a
108  * 	previous stack frame, which enables us to rewind the stack after
109  * 	each nested call is completed. Each argument is a character pointer
110  *	to the beginning of the argument string within the string space.
111  *	The only exceptions to this are (*) arg 0 and arg 1, which are
112  * 	the macro definition and macro name strings, stored dynamically
113  *	for the hash table.
114  *
115  *	    .					   .
116  *	|   .	|  <-- sp			|  .  |
117  *	+-------+				+-----+
118  *	| arg 3 ------------------------------->| str |
119  *	+-------+				|  .  |
120  *	| arg 2 --------------+ 		   .
121  *	+-------+	      |
122  *	    *		      |			|     |
123  *	+-------+	      | 		+-----+
124  *	| plev	|  <-- fp     +---------------->| str |
125  *	+-------+				|  .  |
126  *	| type	|				   .
127  *	+-------+
128  *	| prcf	-----------+		plev: paren level
129  *	+-------+  	   |		type: call type
130  *	|   .	| 	   |		prcf: prev. call frame
131  *	    .	   	   |
132  *	+-------+	   |
133  *	|	<----------+
134  *	+-------+
135  *
136  * [2]	We have three types of null values:
137  *
138  *		nil  - nodeblock pointer type 0
139  *		null - null string ("")
140  *		NULL - Stdio-defined NULL
141  *
142  */
143 
144 ndptr hashtab[HASHSIZE];	/* hash table for macros etc.  */
145 char buf[BUFSIZE];		/* push-back buffer	       */
146 char *bp = buf; 		/* first available character   */
147 char *endpbb = buf+BUFSIZE;	/* end of push-back buffer     */
148 stae mstack[STACKMAX+1]; 	/* stack of m4 machine         */
149 char strspace[STRSPMAX+1];	/* string space for evaluation */
150 char *ep = strspace;		/* first free char in strspace */
151 char *endest= strspace+STRSPMAX;/* end of string space	       */
152 int sp; 			/* current m4  stack pointer   */
153 int fp; 			/* m4 call frame pointer       */
154 FILE *infile[MAXINP];		/* input file stack (0=stdin)  */
155 FILE *outfile[MAXOUT];		/* diversion array(0=bitbucket)*/
156 FILE *active;			/* active output file pointer  */
157 char *m4temp;			/* filename for diversions     */
158 int ilevel = 0; 		/* input file stack pointer    */
159 int oindex = 0; 		/* diversion index..	       */
160 char *null = "";                /* as it says.. just a null..  */
161 char *m4wraps = "";             /* m4wrap string default..     */
162 char lquote = LQUOTE;		/* left quote character  (`)   */
163 char rquote = RQUOTE;		/* right quote character (')   */
164 char scommt = SCOMMT;		/* start character for comment */
165 char ecommt = ECOMMT;		/* end character for comment   */
166 struct keyblk keywrds[] = {	/* m4 keywords to be installed */
167 	"include",      INCLTYPE,
168 	"sinclude",     SINCTYPE,
169 	"define",       DEFITYPE,
170 	"defn",         DEFNTYPE,
171 	"divert",       DIVRTYPE,
172 	"expr",         EXPRTYPE,
173 	"eval",         EXPRTYPE,
174 	"substr",       SUBSTYPE,
175 	"ifelse",       IFELTYPE,
176 	"ifdef",        IFDFTYPE,
177 	"len",          LENGTYPE,
178 	"incr",         INCRTYPE,
179 	"decr",         DECRTYPE,
180 	"dnl",          DNLNTYPE,
181 	"changequote",  CHNQTYPE,
182 	"changecom",    CHNCTYPE,
183 	"index",        INDXTYPE,
184 #ifdef EXTENDED
185 	"paste",        PASTTYPE,
186 	"spaste",       SPASTYPE,
187 #endif
188 	"popdef",       POPDTYPE,
189 	"pushdef",      PUSDTYPE,
190 	"dumpdef",      DUMPTYPE,
191 	"shift",        SHIFTYPE,
192 	"translit",     TRNLTYPE,
193 	"undefine",     UNDFTYPE,
194 	"undivert",     UNDVTYPE,
195 	"divnum",       DIVNTYPE,
196 	"maketemp",     MKTMTYPE,
197 	"errprint",     ERRPTYPE,
198 	"m4wrap",       M4WRTYPE,
199 	"m4exit",       EXITTYPE,
200 	"syscmd",       SYSCTYPE,
201 	"sysval",       SYSVTYPE,
202 	"unix",         MACRTYPE,
203 };
204 
205 #define MAXKEYS	(sizeof(keywrds)/sizeof(struct keyblk))
206 
207 extern ndptr lookup();
208 extern ndptr addent();
209 extern void onintr();
210 
211 extern int optind;
212 extern char *optarg;
213 
main(argc,argv)214 main(argc,argv)
215 	int argc;
216 	char **argv;
217 {
218 	register int c;
219 	register int n;
220 	char *p;
221 
222 	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
223 		signal(SIGINT, onintr);
224 #ifdef NONZEROPAGES
225 	initm4();
226 #endif
227 	initkwds();
228 
229 	while ((c = getopt(argc, argv, "tD:U:o:")) != EOF)
230 		switch(c) {
231 
232 		case 'D':               /* define something..*/
233 			for (p = optarg; *p; p++)
234 				if (*p == '=')
235 					break;
236 			if (*p)
237 				*p++ = EOS;
238 			dodefine(optarg, p);
239 			break;
240 		case 'U':               /* undefine...       */
241 			remhash(optarg, TOP);
242 			break;
243 		case 'o':		/* specific output   */
244 		case '?':
245 		default:
246 			usage();
247 		}
248 
249 	infile[0] = stdin;		/* default input (naturally) */
250 	active = stdout;		/* default active output     */
251 	m4temp = mktemp(strdup(DIVNAM));/* filename for diversions   */
252 
253 	sp = -1;			/* stack pointer initialized */
254 	fp = 0; 			/* frame pointer initialized */
255 
256 	macro();			/* get some work done here   */
257 
258 	if (*m4wraps) { 		/* anything for rundown ??   */
259 		ilevel = 0;		/* in case m4wrap includes.. */
260 		putback(EOF);		/* eof is a must !!	     */
261 		pbstr(m4wraps); 	/* user-defined wrapup act   */
262 		macro();		/* last will and testament   */
263 	}
264 
265 	if (active != stdout)
266 		active = stdout;	/* reset output just in case */
267 	for (n = 1; n < MAXOUT; n++)	/* default wrap-up: undivert */
268 		if (outfile[n] != NULL)
269 			getdiv(n);
270 					/* remove bitbucket if used  */
271 	if (outfile[0] != NULL) {
272 		(void) fclose(outfile[0]);
273 		m4temp[UNIQUE] = '0';
274 		(void) unlink(m4temp);
275 	}
276 
277 	exit(0);
278 }
279 
280 ndptr inspect();	/* forward ... */
281 
282 /*
283  * macro - the work horse..
284  *
285  */
macro()286 macro() {
287 	char token[MAXTOK];
288 	register char *s;
289 	register int t, l;
290 	register ndptr p;
291 	register int  nlpar;
292 
293 	cycle {
294 		if ((t = gpbc()) == '_' || isalpha(t)) {
295 			putback(t);
296 			if ((p = inspect(s = token)) == nil) {
297 				if (sp < 0)
298 					while (*s)
299 						putc(*s++, active);
300 				else
301 					while (*s)
302 						chrsave(*s++);
303 			}
304 			else {
305 		/*
306 		 * real thing.. First build a call frame:
307 		 *
308 		 */
309 				pushf(fp);	/* previous call frm */
310 				pushf(p->type); /* type of the call  */
311 				pushf(0);	/* parenthesis level */
312 				fp = sp;	/* new frame pointer */
313 		/*
314 		 * now push the string arguments:
315 		 *
316 		 */
317 				pushs(p->defn);	      /* defn string */
318 				pushs(p->name);	      /* macro name  */
319 				pushs(ep);	      /* start next..*/
320 
321 				putback(l = gpbc());
322 				if (l != LPAREN)  {   /* add bracks  */
323 					putback(RPAREN);
324 					putback(LPAREN);
325 				}
326 			}
327 		}
328 		else if (t == EOF) {
329 			if (sp > -1)
330 				error("m4: unexpected end of input");
331 			if (--ilevel < 0)
332 				break;			/* all done thanks.. */
333 			(void) fclose(infile[ilevel+1]);
334 			continue;
335 		}
336 	/*
337 	 * non-alpha single-char token seen..
338 	 * [the order of else if .. stmts is
339 	 * important.]
340 	 *
341 	 */
342 		else if (t == lquote) { 		/* strip quotes */
343 			nlpar = 1;
344 			do {
345 				if ((l = gpbc()) == rquote)
346 					nlpar--;
347 				else if (l == lquote)
348 					nlpar++;
349 				else if (l == EOF)
350 					error("m4: missing right quote");
351 				if (nlpar > 0) {
352 					if (sp < 0)
353 						putc(l, active);
354 					else
355 						chrsave(l);
356 				}
357 			}
358 			while (nlpar != 0);
359 		}
360 
361 		else if (sp < 0) {		/* not in a macro at all */
362 			if (t == scommt) {	/* comment handling here */
363 				putc(t, active);
364 				while ((t = gpbc()) != ecommt)
365 					putc(t, active);
366 			}
367 			putc(t, active);	/* output directly..	 */
368 		}
369 
370 		else switch(t) {
371 
372 		case LPAREN:
373 			if (PARLEV > 0)
374 				chrsave(t);
375 			while (isspace(l = gpbc()))
376 				;		/* skip blank, tab, nl.. */
377 			putback(l);
378 			PARLEV++;
379 			break;
380 
381 		case RPAREN:
382 			if (--PARLEV > 0)
383 				chrsave(t);
384 			else {			/* end of argument list */
385 				chrsave(EOS);
386 
387 				if (sp == STACKMAX)
388 					error("m4: internal stack overflow");
389 
390 				if (CALTYP == MACRTYPE)
391 					expand(mstack+fp+1, sp-fp);
392 				else
393 					eval(mstack+fp+1, sp-fp, CALTYP);
394 
395 				ep = PREVEP;	/* flush strspace */
396 				sp = PREVSP;	/* previous sp..  */
397 				fp = PREVFP;	/* rewind stack...*/
398 			}
399 			break;
400 
401 		case COMMA:
402 			if (PARLEV == 1)	{
403 				chrsave(EOS);		/* new argument   */
404 				while (isspace(l = gpbc()))
405 					;
406 				putback(l);
407 				pushs(ep);
408 			}
409 			break;
410 		default:
411 			chrsave(t);			/* stack the char */
412 			break;
413 		}
414 	}
415 }
416 
417 
418 /*
419  * build an input token..
420  * consider only those starting with _ or A-Za-z. This is a
421  * combo with lookup to speed things up.
422  */
423 ndptr
inspect(tp)424 inspect(tp)
425 register char *tp;
426 {
427 	register int h = 0;
428 	register char c;
429 	register char *name = tp;
430 	register char *etp = tp+MAXTOK;
431 	register ndptr p;
432 
433 	while (tp < etp && (isalnum(c = gpbc()) || c == '_'))
434 		h += (*tp++ = c);
435 	putback(c);
436 	if (tp == etp)
437 		error("m4: token too long");
438 	*tp = EOS;
439 	for (p = hashtab[h%HASHSIZE]; p != nil; p = p->nxtptr)
440 		if (strcmp(name, p->name) == 0)
441 			break;
442 	return(p);
443 }
444 
445 #ifdef NONZEROPAGES
446 /*
447  * initm4 - initialize various tables. Useful only if your system
448  * does not know anything about demand-zero pages.
449  *
450  */
initm4()451 initm4()
452 {
453 	register int i;
454 
455 	for (i = 0; i < HASHSIZE; i++)
456 		hashtab[i] = nil;
457 	for (i = 0; i < MAXOUT; i++)
458 		outfile[i] = NULL;
459 }
460 #endif
461 
462 /*
463  * initkwds - initialise m4 keywords as fast as possible.
464  * This very similar to install, but without certain overheads,
465  * such as calling lookup. Malloc is not used for storing the
466  * keyword strings, since we simply use the static  pointers
467  * within keywrds block. We also assume that there is enough memory
468  * to at least install the keywords (i.e. malloc won't fail).
469  *
470  */
initkwds()471 initkwds() {
472 	register int i;
473 	register int h;
474 	register ndptr p;
475 
476 	for (i = 0; i < MAXKEYS; i++) {
477 		h = hash(keywrds[i].knam);
478 		p = (ndptr) malloc(sizeof(struct ndblock));
479 		p->nxtptr = hashtab[h];
480 		hashtab[h] = p;
481 		p->name = keywrds[i].knam;
482 		p->defn = null;
483 		p->type = keywrds[i].ktyp | STATIC;
484 	}
485 }
486