xref: /openbsd/usr.bin/m4/main.c (revision f2dfb0a4)
1 /*	$OpenBSD: main.c,v 1.10 1998/06/02 20:46:40 deraadt Exp $	*/
2 /*	$NetBSD: main.c,v 1.12 1997/02/08 23:54:49 cgd Exp $	*/
3 
4 /*-
5  * Copyright (c) 1989, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * This code is derived from software contributed to Berkeley by
9  * Ozan Yigit at York University.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the University of
22  *	California, Berkeley and its contributors.
23  * 4. Neither the name of the University nor the names of its contributors
24  *    may be used to endorse or promote products derived from this software
25  *    without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  */
39 
40 #ifndef lint
41 static char copyright[] =
42 "@(#) Copyright (c) 1989, 1993\n\
43 	The Regents of the University of California.  All rights reserved.\n";
44 #endif /* not lint */
45 
46 #ifndef lint
47 #if 0
48 static char sccsid[] = "@(#)main.c	8.1 (Berkeley) 6/6/93";
49 #else
50 static char rcsid[] = "$OpenBSD: main.c,v 1.10 1998/06/02 20:46:40 deraadt Exp $";
51 #endif
52 #endif /* not lint */
53 
54 /*
55  * main.c
56  * Facility: m4 macro processor
57  * by: oz
58  */
59 
60 #include <sys/types.h>
61 #include <signal.h>
62 #include <errno.h>
63 #include <unistd.h>
64 #include <stdio.h>
65 #include <ctype.h>
66 #include <string.h>
67 #include "mdef.h"
68 #include "stdd.h"
69 #include "extern.h"
70 #include "pathnames.h"
71 
72 ndptr hashtab[HASHSIZE];	/* hash table for macros etc.  */
73 pbent buf[BUFSIZE];		/* push-back buffer	       */
74 pbent *bufbase = buf;		/* the base for current ilevel */
75 pbent *bbase[MAXINP];		/* the base for each ilevel    */
76 pbent *bp = buf; 		/* first available character   */
77 pbent *endpbb = buf+BUFSIZE;	/* end of push-back buffer     */
78 stae mstack[STACKMAX+1]; 	/* stack of m4 machine         */
79 char strspace[STRSPMAX+1];	/* string space for evaluation */
80 char *ep = strspace;		/* first free char in strspace */
81 char *endest= strspace+STRSPMAX;/* end of string space	       */
82 int sp; 			/* current m4  stack pointer   */
83 int fp; 			/* m4 call frame pointer       */
84 FILE *infile[MAXINP];		/* input file stack (0=stdin)  */
85 FILE *outfile[MAXOUT];		/* diversion array(0=bitbucket)*/
86 FILE *active;			/* active output file pointer  */
87 char *m4temp;			/* filename for diversions     */
88 int ilevel = 0; 		/* input file stack pointer    */
89 int oindex = 0; 		/* diversion index..	       */
90 char *null = "";                /* as it says.. just a null..  */
91 char *m4wraps = "";             /* m4wrap string default..     */
92 char *progname;			/* name of this program        */
93 char lquote[MAXCCHARS+1] = {LQUOTE};	/* left quote character  (`)   */
94 char rquote[MAXCCHARS+1] = {RQUOTE};	/* right quote character (')   */
95 char scommt[MAXCCHARS+1] = {SCOMMT};	/* start character for comment */
96 char ecommt[MAXCCHARS+1] = {ECOMMT};	/* end character for comment   */
97 
98 struct keyblk keywrds[] = {	/* m4 keywords to be installed */
99 	{ "include",      INCLTYPE },
100 	{ "sinclude",     SINCTYPE },
101 	{ "define",       DEFITYPE },
102 	{ "defn",         DEFNTYPE },
103 	{ "divert",       DIVRTYPE },
104 	{ "expr",         EXPRTYPE },
105 	{ "eval",         EXPRTYPE },
106 	{ "substr",       SUBSTYPE },
107 	{ "ifelse",       IFELTYPE },
108 	{ "ifdef",        IFDFTYPE },
109 	{ "len",          LENGTYPE },
110 	{ "incr",         INCRTYPE },
111 	{ "decr",         DECRTYPE },
112 	{ "dnl",          DNLNTYPE },
113 	{ "changequote",  CHNQTYPE },
114 	{ "changecom",    CHNCTYPE },
115 	{ "index",        INDXTYPE },
116 #ifdef EXTENDED
117 	{ "paste",        PASTTYPE },
118 	{ "spaste",       SPASTYPE },
119 #endif
120 	{ "popdef",       POPDTYPE },
121 	{ "pushdef",      PUSDTYPE },
122 	{ "dumpdef",      DUMPTYPE },
123 	{ "shift",        SHIFTYPE },
124 	{ "translit",     TRNLTYPE },
125 	{ "undefine",     UNDFTYPE },
126 	{ "undivert",     UNDVTYPE },
127 	{ "divnum",       DIVNTYPE },
128 	{ "maketemp",     MKTMTYPE },
129 	{ "errprint",     ERRPTYPE },
130 	{ "m4wrap",       M4WRTYPE },
131 	{ "m4exit",       EXITTYPE },
132 	{ "syscmd",       SYSCTYPE },
133 	{ "sysval",       SYSVTYPE },
134 
135 #if defined(unix) || defined(__NetBSD__) || defined(__OpenBSD__)
136 	{ "unix",         MACRTYPE },
137 #else
138 #ifdef vms
139 	{ "vms",          MACRTYPE },
140 #endif
141 #endif
142 };
143 
144 #define MAXKEYS	(sizeof(keywrds)/sizeof(struct keyblk))
145 
146 extern int optind;
147 extern char *optarg;
148 
149 void macro();
150 void initkwds();
151 extern int getopt();
152 
153 int
154 main(argc,argv)
155 	int argc;
156 	char *argv[];
157 {
158 	register int c;
159 	register int n;
160 	char *p;
161 	register FILE *ifp;
162 
163 	progname = basename(argv[0]);
164 
165 	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
166 		signal(SIGINT, onintr);
167 
168 	initkwds();
169 
170 	while ((c = getopt(argc, argv, "tD:U:o:")) != -1)
171 		switch(c) {
172 
173 		case 'D':               /* define something..*/
174 			for (p = optarg; *p; p++)
175 				if (*p == '=')
176 					break;
177 			if (*p)
178 				*p++ = EOS;
179 			dodefine(optarg, p);
180 			break;
181 		case 'U':               /* undefine...       */
182 			remhash(optarg, TOP);
183 			break;
184 		case 'o':		/* specific output   */
185 		case '?':
186 			usage();
187 		}
188 
189         argc -= optind;
190         argv += optind;
191 
192 	active = stdout;		/* default active output     */
193 					/* filename for diversions   */
194 	m4temp = mktemp(xstrdup(_PATH_DIVNAME));
195 
196 	bbase[0] = bufbase;
197         if (!argc) {
198  		sp = -1;		/* stack pointer initialized */
199 		fp = 0; 		/* frame pointer initialized */
200 		infile[0] = stdin;	/* default input (naturally) */
201 		macro();
202 	} else
203 		for (; argc--; ++argv) {
204 			p = *argv;
205 			if (p[0] == '-' && p[1] == '\0')
206 				ifp = stdin;
207 			else if ((ifp = fopen(p, "r")) == NULL)
208 				oops("%s: %s", p, strerror(errno));
209 			sp = -1;
210 			fp = 0;
211 			infile[0] = ifp;
212 			macro();
213 			if (ifp != stdin)
214 				(void)fclose(ifp);
215 		}
216 
217 	if (*m4wraps) { 		/* anything for rundown ??   */
218 		ilevel = 0;		/* in case m4wrap includes.. */
219 		bufbase = bp = buf;	/* use the entire buffer   */
220 		putback(EOF);		/* eof is a must !!	     */
221 		pbstr(m4wraps); 	/* user-defined wrapup act   */
222 		macro();		/* last will and testament   */
223 	}
224 
225 	if (active != stdout)
226 		active = stdout;	/* reset output just in case */
227 	for (n = 1; n < MAXOUT; n++)	/* default wrap-up: undivert */
228 		if (outfile[n] != NULL)
229 			getdiv(n);
230 					/* remove bitbucket if used  */
231 	if (outfile[0] != NULL) {
232 		(void) fclose(outfile[0]);
233 		m4temp[UNIQUE] = '0';
234 #ifdef vms
235 		(void) remove(m4temp);
236 #else
237 		(void) unlink(m4temp);
238 #endif
239 	}
240 
241 	return 0;
242 }
243 
244 ndptr inspect();
245 
246 /*
247  * Look ahead (at most MAXCCHARS characters) for `token'.
248  * (on input `t == token[0]')
249  * Used for comment and quoting delimiters.
250  * Returns 1 if `token' present; copied to output.
251  *         0 if `token' not found; all characters pushed back
252  */
253 int
254 do_look_ahead(t, token)
255 	int	t;
256 	char	*token;
257 {
258 	int i;
259 
260 	if (t != token[0])
261 		oops("internal error", "");
262 
263 	for (i = 1; *++token; i++) {
264 		t = gpbc();
265 		if (t == EOF || t != *token) {
266 			if (t != EOF)
267 				putback(t);
268 			while (--i)
269 				putback(*--token);
270 			return 0;
271 		}
272 	}
273 	return 1;
274 }
275 
276 #define LOOK_AHEAD(t, token) ((t)==(token)[0] && do_look_ahead(t,token))
277 
278 /*
279  * macro - the work horse..
280  */
281 void
282 macro() {
283 	char token[MAXTOK], chars[2];
284 	register char *s;
285 	register int t, l;
286 	register ndptr p;
287 	register int  nlpar;
288 
289 	cycle {
290 		t = gpbc();
291 		if (t == '_' || isalpha(t)) {
292 			putback(t);
293 			s = token;
294 			if ((p = inspect(s)) == nil) {
295 				if (sp < 0)
296 					while (*s)
297 						putc(*s++, active);
298 				else
299 					while (*s)
300 						chrsave(*s++);
301 			}
302 			else {
303 		/*
304 		 * real thing.. First build a call frame:
305 		 */
306 				pushf(fp);	/* previous call frm */
307 				pushf(p->type); /* type of the call  */
308 				pushf(0);	/* parenthesis level */
309 				fp = sp;	/* new frame pointer */
310 		/*
311 		 * now push the string arguments:
312 		 */
313 				pushs(p->defn);	      /* defn string */
314 				pushs(p->name);	      /* macro name  */
315 				pushs(ep);	      /* start next..*/
316 
317 				putback(l = gpbc());
318 				if (l != LPAREN)  {   /* add bracks  */
319 					putback(RPAREN);
320 					putback(LPAREN);
321 				}
322 			}
323 		}
324 		else if (t == EOF) {
325 			if (sp > -1)
326 				oops("unexpected end of input", "");
327 			if (ilevel <= 0)
328 				break;			/* all done thanks.. */
329 			--ilevel;
330 			(void) fclose(infile[ilevel+1]);
331 			bufbase = bbase[ilevel];
332 			continue;
333 		}
334 	/*
335 	 * non-alpha token possibly seen..
336 	 * [the order of else if .. stmts is important.]
337 	 */
338 		else if (LOOK_AHEAD(t,lquote)) {	/* strip quotes */
339 			nlpar = 1;
340 			do {
341 
342 				l = gpbc();
343 				if (LOOK_AHEAD(l,rquote)) {
344 					nlpar--;
345 					s = rquote;
346 				} else if (LOOK_AHEAD(l,lquote)) {
347 					nlpar++;
348 					s = lquote;
349 				} else if (l == EOF)
350 					oops("missing right quote", "");
351 				else {
352 					chars[0] = l;
353 					chars[1] = '\0';
354 					s = chars;
355 				}
356 				if (nlpar > 0) {
357 					if (sp < 0)
358 						while (*s)
359 							putc(*s++, active);
360 					else
361 						while (*s)
362 							chrsave(*s++);
363 				}
364 			}
365 			while (nlpar != 0);
366 		}
367 
368 		else if (sp < 0 && LOOK_AHEAD(t, scommt)) {
369 			int i;
370 			for (i = 0; i < MAXCCHARS && scommt[i]; i++)
371 				putc(scommt[i], active);
372 
373 			for(;;) {
374 				t = gpbc();
375 				if (LOOK_AHEAD(t, ecommt)) {
376 					for (i = 0; i < MAXCCHARS && ecommt[i];
377 					     i++)
378 						putc(ecommt[i], active);
379 					break;
380 				}
381 				if (t == EOF)
382 					break;
383 				putc(t, active);
384 			}
385 		}
386 
387 		else if (sp < 0) {		/* not in a macro at all */
388 			putc(t, active);	/* output directly..	 */
389 		}
390 
391 		else switch(t) {
392 
393 		case LPAREN:
394 			if (PARLEV > 0)
395 				chrsave(t);
396 			while (isspace(l = gpbc()))
397 				;		/* skip blank, tab, nl.. */
398 			putback(l);
399 			PARLEV++;
400 			break;
401 
402 		case RPAREN:
403 			if (--PARLEV > 0)
404 				chrsave(t);
405 			else {			/* end of argument list */
406 				chrsave(EOS);
407 
408 				if (sp == STACKMAX)
409 					oops("internal stack overflow", "");
410 
411 				if (CALTYP == MACRTYPE)
412 					expand((char **) mstack+fp+1, sp-fp);
413 				else
414 					eval((char **) mstack+fp+1, sp-fp, CALTYP);
415 
416 				ep = PREVEP;	/* flush strspace */
417 				sp = PREVSP;	/* previous sp..  */
418 				fp = PREVFP;	/* rewind stack...*/
419 			}
420 			break;
421 
422 		case COMMA:
423 			if (PARLEV == 1) {
424 				chrsave(EOS);		/* new argument   */
425 				while (isspace(l = gpbc()))
426 					;
427 				putback(l);
428 				pushs(ep);
429 			} else
430 				chrsave(t);
431 			break;
432 
433 		default:
434 			chrsave(t);			/* stack the char */
435 			break;
436 		}
437 	}
438 }
439 
440 /*
441  * build an input token..
442  * consider only those starting with _ or A-Za-z. This is a
443  * combo with lookup to speed things up.
444  */
445 ndptr
446 inspect(tp)
447 register char *tp;
448 {
449 	register char c;
450 	register char *name = tp;
451 	register char *etp = tp+MAXTOK;
452 	register ndptr p;
453 	register unsigned long h = 0;
454 
455 	while ((isalnum(c = gpbc()) || c == '_') && tp < etp)
456 		h = (h << 5) + h + (*tp++ = c);
457 	putback(c);
458 	if (tp == etp)
459 		oops("token too long", "");
460 
461 	*tp = EOS;
462 
463 	for (p = hashtab[h%HASHSIZE]; p != nil; p = p->nxtptr)
464 		if (STREQ(name, p->name))
465 			break;
466 	return p;
467 }
468 
469 /*
470  * initkwds - initialise m4 keywords as fast as possible.
471  * This very similar to install, but without certain overheads,
472  * such as calling lookup. Malloc is not used for storing the
473  * keyword strings, since we simply use the static  pointers
474  * within keywrds block.
475  */
476 void
477 initkwds() {
478 	register int i;
479 	register int h;
480 	register ndptr p;
481 
482 	for (i = 0; i < MAXKEYS; i++) {
483 		h = hash(keywrds[i].knam);
484 		p = (ndptr) xalloc(sizeof(struct ndblock));
485 		p->nxtptr = hashtab[h];
486 		hashtab[h] = p;
487 		p->name = keywrds[i].knam;
488 		p->defn = null;
489 		p->type = keywrds[i].ktyp | STATIC;
490 	}
491 }
492