1 /*  Part of SWI-Prolog
2 
3     Author:        Jan Wielemaker
4     E-mail:        J.Wielemaker@vu.nl
5     WWW:           http://www.swi-prolog.org
6     Copyright (c)  1997-2020, University of Amsterdam
7 			      CWI, Amsterdam
8     All rights reserved.
9 
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions
12     are met:
13 
14     1. Redistributions of source code must retain the above copyright
15        notice, this list of conditions and the following disclaimer.
16 
17     2. Redistributions in binary form must reproduce the above copyright
18        notice, this list of conditions and the following disclaimer in
19        the documentation and/or other materials provided with the
20        distribution.
21 
22     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26     COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
28     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
30     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
32     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33     POSSIBILITY OF SUCH DAMAGE.
34 */
35 
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <stdarg.h>
39 #include <assert.h>
40 #include <errno.h>
41 #include <signal.h>
42 #ifdef HAVE_MALLOC_H
43 #include <malloc.h>
44 #endif
45 #include <string.h>
46 
47 static int debuglevel = 0;
48 static int emit_space = 0;
49 
50 #define DEBUG(n, g)	if ( n <= debuglevel ) { g; }
51 
52 #define MAXFUNC		100		/* max bounded function name length */
53 #define MAXCMD		256		/* max length of a \foobar */
54 #define MAXARG		4096		/* max {..} argument length */
55 #define MAXWORD		1024		/* max length of a word (no spaces) */
56 #define MAXVERB		1024		/* max length of \verb|string| */
57 #define MAXMATH		4096		/* max length of $...$ */
58 #define MAXVERBATIM	10240		/* max size of verbatim */
59 #define MAXOUTPUT	11000		/* output() max */
60 #define MAXCMDARGS	32
61 #define MAXENVNESTING	256		/* max depth of environment */
62 
63 static void error(int eno, const char *file, int line); /* handle errors */
64 static void warn(int eno, const char *file, int line); /* handle errors */
65 
66 #define ERR_UNEXPECTED_EOF	1	/* unexpected end-of-file */
67 #define ERR_RUNAWAY_ARGUMENT	2	/* runaway argument */
68 #define ERR_VERBATIM_TOO_LONG	3	/* verbatim env too large */
69 #define ERR_NOCMD_SPECS		4	/* cannot find command-specs */
70 #define ERR_BAD_COMMAND_SPEC	5	/* syntax error in command specs */
71 #define ERR_ENV_NESTING		6	/* environment-stack overflow */
72 #define ERR_UNDEF_FUNCTION	7	/* undefined function */
73 #define ERR_BAD_ARG_SPEC	8
74 #define ERR_BAD_ENV_SPEC	9
75 #define ERR_RUNAWAY_MATH       10	/* notclosed math env */
76 #define ERR_BAD_MATH_ENV_CLOSURE 11	/* $$ closed by $ */
77 #define ERR_ENV_UNDERFLOW      12	/* environment stack-underflow */
78 #define ERR_CMD_TOO_LONG       13
79 #define ERR_BAD_DIM	       14
80 
81 const char *tex_error_strings[] =
82 { "No error",
83   "Unexpected end of file",
84   "Runaway argument",
85   "Verbatim environment too long",
86   "Cannot find command specification file",
87   "Syntax error",
88   "Environment-stack overflow",
89   "Undefined function",
90   "Syntax error in argument definition",
91   "Syntax error in environment definition",
92   "$: Runaway argument",
93   "$$ closed by single $",
94   "Too many '}' or \\end{}",
95   "Command name too long",
96   "Bad dimension",
97   NULL					/* allow for easy enumeration */
98 };
99 
100 #ifndef FALSE
101 #define FALSE 0
102 #define TRUE  1
103 #endif
104 
105 #define EOS '\0'			/* end-of-string */
106 #define streq(s1, s2) (strcmp((s1), (s2)) == 0)
107 
108 		 /*******************************
109 		 *	     COMMANDS		*
110 		 *******************************/
111 
112 #define F_NOSKIPBLANK	0x01		/* Argument handling */
113 
114 #define CA_OPTIONAL	0x01		/* [optional arg] */
115 #define CA_TEXT		0x02		/* Argument contains output text */
116 #define CA_DIM		0x04		/* Argument is a dimension */
117 
118 #define CMD_MODIFY	0x01		/* command allows for modifier */
119 
120 #define PRE_COMMENT	-1		/* put %\n before command */
121 
122 typedef struct _command *Command;
123 typedef struct _environment *Environment;
124 typedef struct _token *Token;
125 typedef struct _input *Input;
126 typedef struct _output *Output;
127 
128 typedef int (*CallBack)(Token token, void *context);
129 typedef void (*CmdFunc)(Command cmd, Input fd, CallBack func, void *ctx);
130 typedef void (*EnvFunc)(Environment cmd, Input fd, CallBack func, void *ctx);
131 typedef void *AnyFunc;
132 static AnyFunc lookupFunction(const char *name);
133 
134 typedef struct
135 { int flags;				/* command arguments flags */
136 } cmd_arg, *CmdArg;
137 
138 typedef struct _cmd_descr
139 { const char*name;			/* name of the command */
140   int	     flags;			/* command-flags */
141   CmdArg     arguments;			/* argument specifiers */
142   char	     arg_count;			/* # arguments */
143   char	     pre_lines;			/* # newlines needed before */
144   char	     post_lines;		/* # newlines needed after */
145   CmdFunc    function;			/* associated function */
146   const char*fname;			/* function-name */
147   struct _cmd_descr *next;		/* next in hash-table */
148 } cmd_descr, *CmdDescr;
149 
150 typedef struct _command
151 { CmdDescr   command;			/* the commands */
152   int	     flags;			/* general flags */
153   char **    arguments;			/* the actual arguments */
154 } command;
155 
156 typedef struct _env_descr
157 { const char *name;			/* name of the environment */
158   int	      flags;			/* environment flags */
159   CmdArg      arguments;		/* argument-list */
160   char	      arg_count;		/* # arguments */
161   EnvFunc     function;			/* associated function */
162   const char *fname;			/* function-name */
163   struct _env_descr *next;
164 } env_descr, *EnvDescr;
165 
166 typedef struct _environment
167 { EnvDescr    environment;
168   int	      flags;
169   char **     arguments;
170 } environment;
171 
172 #define INPUT_FILE	0		/* reading from a file */
173 #define INPUT_STRING	1		/* reading from a string */
174 
175 typedef struct _input
176 { int		type;
177   int		lineno;
178   const char *  name;			/* name (for feedback) */
179   Input		parent;			/* Parent input */
180   union
181   { FILE       *fd;
182     const char *string;
183   } stream;
184 } input;
185 
186 static Input	curin;			/* current input (file) */
187 
188 static int cmd_prolog(Command g, Input fd, CallBack func, void *ctx);
189 
190 		 /*******************************
191 		 *	       TOKENS		*
192 		 *******************************/
193 
194 
195 #define TOK_CMD		 0		/* \cmd */
196 #define TOK_BEGIN_GROUP	 1		/* { */
197 #define TOK_END_GROUP	 2		/* } */
198 #define TOK_BEGIN_ENV	 3		/* \begin{cmd} */
199 #define TOK_END_ENV	 4		/* \end{cmd} */
200 #define TOK_VERB	 5		/* \verb|foo| */
201 #define TOK_VERBATIM	 6		/* verbatim environment */
202 #define TOK_PRE		 7		/* pre environment */
203 #define TOK_MATH	 8		/* $...$ */
204 #define TOK_MATH_ENV	 9		/* $$...$$ */
205 #define TOK_PAR		10		/* implicit paragraph (blank line) */
206 #define TOK_WORD        11		/* general word */
207 #define TOK_NOSPACEWORD 12		/* word without reintroducing spaces */
208 #define TOK_SPACE       13		/* blank space */
209 #define TOK_LINE        14		/* single line */
210 #define TOK_EOF	        15		/* end-of-file */
211 
212 typedef struct _token
213 { int	type;				/* type identifier */
214   int	prelines;			/* HTML stuff */
215   int	postlines;			/* HTML stuff */
216   char *context;			/* additional context info */
217   union
218   { char *string;			/* related text */
219     Command cmd;			/* related TeX command */
220     Environment env;			/* related TeX environment */
221   } value;
222 } token;
223 
224 static const char *texarg;		/* argument for runaway message */
225 
226 		 /*******************************
227 		 *   CHARACTER CLASSIFICATION	*
228 		 *******************************/
229 
230 #define EF 0				/* end-of-file */
231 #define SP 1				/* space */
232 #define SC 2				/* start line comment (%) */
233 #define BG 3				/* begin group ({) */
234 #define EG 4				/* end group (}) */
235 #define MM 5				/* math-mode delimiter ($) */
236 #define TD 6				/* Table delimiter (&) */
237 #define NB 7				/* Non-breaking space (~) */
238 #define CM 8				/* command (\) */
239 
240 #define PU 9				/* punctuation */
241 #define DI 10				/* digit */
242 
243 #define BQ 11				/* back quote (`) */
244 #define SQ 12				/* single quote (') */
245 
246 #define LC 13				/* lower-case letter */
247 #define UC 14				/* uppercase letter */
248 
249 #define CharType(c) (char_type[(c)+1])
250 
251 #define isspace(c)	(CharType(c) == SP)
252 #define isdigit(c)	(CharType(c) == DI)
253 #define isalnum(c)	(CharType(c) >= LC)
254 #define wbreak(c)	(CharType(c) <= CM) /* breaks a word */
255 #define isbegingroup(c) (CharType(c) == BG)
256 #define iscommand(c)    (CharType(c) == CM)
257 
258 static char char_type[] = {
259 /* EOF */
260    EF,
261 /* ^@  ^A  ^B  ^C  ^D  ^E  ^F  ^G  ^H  ^I  ^J  ^K  ^L  ^M  ^N  ^O    0-15 */
262    EF, SP, SP, SP, SP, SP, SP, SP, SP, SP, SP, SP, SP, SP, SP, SP,
263 /* ^P  ^Q  ^R  ^S  ^T  ^U  ^V  ^W  ^X  ^Y  ^Z  ^[  ^\  ^]  ^^  ^_   16-31 */
264    SP, SP, SP, SP, SP, SP, SP, SP, SP, SP, SP, SP, SP, SP, SP, SP,
265 /* sp   !   "   #   $   %   &   '   (   )   *   +   ,   -   .   /   32-47 */
266    SP, PU, PU, PU, MM, SC, TD, SQ, PU, PU, PU, PU, PU, PU, PU, PU,
267 /*  0   1   2   3   4   5   6   7   8   9   :   ;   <   =   >   ?   48-63 */
268    DI, DI, DI, DI, DI, DI, DI, DI, DI, DI, PU, PU, PU, PU, PU, PU,
269 /*  @   A   B   C   D   E   F   G   H   I   J   K   L   M   N   O   64-79 */
270    PU, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC,
271 /*  P   Q   R   S   T   U   V   W   X   Y   Z   [   \   ]   ^   _   80-95 */
272    UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, PU, CM, PU, PU, UC,
273 /*  `   a   b   c   d   e   f   g   h   i   j   k   l   m   n   o   96-111 */
274    BQ, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC,
275 /*  p   q   r   s   t   u   v   w   x   y   z   {   |   }   ~  ^?   112-127 */
276    LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, BG, PU, EG, NB, SP,
277 			  /* 128-255 */
278    LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC,
279    LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC,
280    LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC,
281    LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC,
282    LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC,
283    LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC,
284    LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC,
285    LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC
286 };
287 
288 
289 		 /*******************************
290 		 *	      INPUT		*
291 		 *******************************/
292 
293 static Input
openInputFile(const char * name)294 openInputFile(const char *name)
295 { FILE *fd;
296 
297   if ( (fd = fopen(name, "r")) )
298   { Input i = malloc(sizeof(input));
299 
300     if ( i )
301     { i->type      = INPUT_FILE;
302       i->stream.fd = fd;
303       i->lineno    = 1;
304 
305       i->name   = name;
306       i->parent = curin;
307       curin = i;
308 
309       return i;
310     }
311   }
312 
313   return NULL;
314 }
315 
316 
317 static Input
openInputString(const char * str)318 openInputString(const char *str)
319 { Input i = malloc(sizeof(input));
320 
321   if ( i )
322   { i->type	     = INPUT_STRING;
323     i->stream.string = str;
324     i->lineno    = 1;
325 
326     i->name = str;
327     i->parent = curin;
328     curin = i;
329 
330     return i;
331   }
332 
333   return NULL;
334 }
335 
336 
337 static void
closeInput(Input i)338 closeInput(Input i)
339 { if ( i->type == INPUT_FILE )
340     fclose(i->stream.fd);
341 
342   curin = i->parent;
343 
344   free(i);
345 }
346 
347 
348 static const char *
texfile()349 texfile()
350 { if ( curin )
351   { Input i = curin;
352 
353     while(i && i->type == INPUT_STRING)
354       i = i->parent;
355 
356     if ( i && i->type == INPUT_FILE )
357       return i->name;
358 
359     return curin->name;
360   }
361 
362   return "no input";
363 }
364 
365 
366 static int
texline()367 texline()
368 { if ( curin )
369   { Input i = curin;
370     int offset = 0;
371 
372     while(i && i->type == INPUT_STRING)
373     { offset += i->lineno - 1;
374       i = i->parent;
375     }
376 
377     if ( i && i->type == INPUT_FILE )
378       return i->lineno + offset;
379 
380     return curin->lineno;
381   }
382 
383   return -1;
384 }
385 
386 
387 static int
mygetc(Input fd)388 mygetc(Input fd)
389 { int c;
390 
391   switch(fd->type)
392   { case INPUT_FILE:
393       c = getc(fd->stream.fd);
394       break;
395     case INPUT_STRING:
396     default:
397       c = *fd->stream.string++;
398       if ( c == '\0' )
399 	c = EOF;
400       break;
401   }
402 
403   if ( c == '\n' )
404     fd->lineno++;
405 
406   return c;
407 }
408 
409 static void
myungetc(int c,Input fd)410 myungetc(int c, Input fd)
411 { switch(fd->type)
412   { case INPUT_FILE:
413       ungetc(c, fd->stream.fd);
414       break;
415     case INPUT_STRING:
416     default:
417       fd->stream.string--;
418       break;
419   }
420 
421   if ( c == '\n' )
422     fd->lineno--;
423 }
424 
425 
426 static char *
myfgets(char * buf,int size,Input fd)427 myfgets(char *buf, int size, Input fd)
428 { char *s = buf;
429 
430   for(;;)
431   { int	c = mygetc(fd);
432 
433     if ( c == EOF )
434     { if ( s == buf )
435 	return NULL;
436 
437       *s = EOS;
438       return buf;
439     } else if ( c == '\n' )
440     { *s++ = c;
441       *s = EOS;
442       return buf;
443     }
444 
445     *s++ = c;
446     if ( s >= &buf[size-1] )
447     { *s = EOS;
448       return buf;
449     }
450   }
451 }
452 
453 
454 #undef getc
455 #undef ungetc
456 #undef fgets
457 #define getc(fd) mygetc(fd)
458 #define ungetc(c, fd) myungetc(c, fd)
459 #define fgets(buf, size, fd) myfgets(buf, size, fd)
460 
461 		 /*******************************
462 		 *	      STRINGS		*
463 		 *******************************/
464 
465 
466 int
stringHashValue(const char * t,int buckets)467 stringHashValue(const char *t, int buckets)
468 { unsigned int value = 0;
469   unsigned int shift = 5;
470 
471   while(*t)
472   { unsigned int c = *t++;
473 
474     c -= 'a';
475     value ^= c << (shift & 0xf);
476     shift ^= c;
477   }
478 
479   return (value ^ (value >> 16)) % buckets;
480 }
481 
482 
483 const char *
save_string(const char * s)484 save_string(const char *s)
485 { char *cp = malloc(strlen(s)+1);
486 
487   strcpy(cp, s);
488 
489   return (const char *)cp;
490 }
491 
492 
493 		 /*******************************
494 		 *	 COMMAND REGISTRY	*
495 		 *******************************/
496 
497 #define CMD_HASH_SIZE 256
498 
499 static CmdDescr cmd_table[CMD_HASH_SIZE];
500 
501 static CmdDescr
lookupCommand(const char * name)502 lookupCommand(const char *name)
503 { int v = stringHashValue(name, CMD_HASH_SIZE);
504   CmdDescr c;
505 
506   for(c = cmd_table[v]; c; c = c->next)
507   { if ( streq(c->name, name) )
508       return c;
509   }
510 
511   if ( isspace(*name) && name[1] == EOS )
512     return lookupCommand(" ");
513 
514   return NULL;
515 }
516 
517 
518 static CmdDescr
newCommand(const char * name)519 newCommand(const char *name)
520 { int v = stringHashValue(name, CMD_HASH_SIZE);
521   CmdDescr c;
522 
523   for(c = cmd_table[v]; c; c = c->next)
524   { if ( streq(c->name, name) )
525     { c->flags = 0;
526       c->arg_count = 0;
527       if ( c->arguments )
528       { free(c->arguments);
529 	c->arguments = NULL;
530       }
531       return c;
532     }
533   }
534 
535   c = malloc(sizeof(*c));
536   c->name       = save_string(name);
537   c->flags      = 0;
538   c->arg_count  = 0;
539   c->arguments  = NULL;
540   c->pre_lines  = 0;
541   c->post_lines = 0;
542   c->function   = NULL;
543   c->fname      = NULL;
544   c->next       = cmd_table[v];
545   cmd_table[v]  = c;
546 
547   return c;
548 }
549 
550 #define ENV_HASH_SIZE 256
551 
552 static EnvDescr env_table[ENV_HASH_SIZE];
553 
554 static EnvDescr
lookupEnvironment(const char * name)555 lookupEnvironment(const char *name)
556 { int v = stringHashValue(name, ENV_HASH_SIZE);
557   EnvDescr e;
558 
559   for(e = env_table[v]; e; e = e->next)
560   { if ( streq(e->name, name) )
561       return e;
562   }
563 
564   return NULL;
565 }
566 
567 
568 static EnvDescr
newEnvironment(const char * name)569 newEnvironment(const char *name)
570 { int v = stringHashValue(name, ENV_HASH_SIZE);
571   EnvDescr e;
572 
573   for(e = env_table[v]; e; e = e->next)
574   { if ( streq(e->name, name) )
575     { e->flags = 0;
576       e->arg_count = 0;
577       if ( e->arguments )
578       { free(e->arguments);
579 	e->arguments = NULL;
580       }
581       return e;
582     }
583   }
584 
585   e = malloc(sizeof(*e));
586   e->name       = save_string(name);
587   e->flags      = 0;
588   e->arg_count  = 0;
589   e->arguments  = NULL;
590   e->function   = NULL;
591   e->fname      = NULL;
592   e->next       = env_table[v];
593   env_table[v]  = e;
594 
595   return e;
596 }
597 
598 #define skipBanks(s)	while(isspace(*s)) s++
599 
600 static int
parseArgSpec(const char * fname,int lineno,char ** line,CmdArg args)601 parseArgSpec(const char *fname, int lineno, char **line, CmdArg args)
602 { int nargs = 0;
603   char *s = *line;
604 
605   skipBanks(s);
606 
607   for(;;s++)
608   { switch(*s)
609     { case '[':
610 	args[nargs].flags = 0;
611         if ( s[2] != ']' )
612 	{ warn(ERR_BAD_ARG_SPEC, fname, lineno);
613 	  return -1;
614 	}
615 	args[nargs].flags |= CA_OPTIONAL;
616 	goto arg_cont;
617       case '{':
618 	args[nargs].flags = 0;
619         if ( s[2] != '}' )
620 	{ warn(ERR_BAD_ARG_SPEC, fname, lineno);
621 	  return -1;
622 	}
623       arg_cont:
624 	switch(s[1])
625 	{ case '+':
626 	    args[nargs].flags |= CA_TEXT;
627 	    break;
628 	  case 'd':
629 	    args[nargs].flags |= CA_DIM;
630 	    break;
631 	  case '-':
632 	    break;
633 	  default:
634 	    warn(ERR_BAD_ARG_SPEC, fname, lineno);
635 	    return -1;
636 	}
637         nargs++;
638         s += 2;
639         break;
640       default:
641 	skipBanks(s);
642         *line = s;
643         return nargs;
644     }
645   }
646 }
647 
648 
649 static AnyFunc
parseFuncSpec(char ** line,const char ** fname)650 parseFuncSpec(char **line, const char **fname)
651 { char *s = *line;
652   char b[MAXFUNC];
653   char *q = b;
654 
655   skipBanks(s);
656   if ( *s != '=' )
657     return NULL;
658   s++;
659   while(isalnum(*s))
660     *q++ = *s++;
661   *q = EOS;
662   skipBanks(s);
663 
664   *line = s;
665   *fname = (const char *)save_string(b);
666 
667   return lookupFunction(b);
668 }
669 
670 
671 static int
parseEnvSpec(const char * fname,int line,char * s)672 parseEnvSpec(const char *fname, int line, char *s)
673 { char *f = ++s;
674   char tmp;
675   EnvDescr e;
676   cmd_arg args[MAXCMDARGS];		/* argument-list */
677 
678   while(isalnum(*s))
679     s++;
680   tmp = *s;
681   *s = EOS;
682   e = newEnvironment(f);
683   *s = tmp;
684 
685   if ( *s == '*' )			/* \begin{figure*} */
686   { e->flags |= CMD_MODIFY;
687     s++;
688   }
689 
690   skipBanks(s);
691   if ( *s != '}' )			/* check for } */
692   { warn(ERR_BAD_ENV_SPEC, fname, line);
693     return FALSE;
694   } else
695     s++;
696 					/* parse arguments */
697   e->arg_count = parseArgSpec(fname, line, &s, args);
698   if ( e->arg_count < 0 )
699     return FALSE;
700   e->arguments = malloc(e->arg_count*sizeof(cmd_arg));
701   memcpy(e->arguments, args, e->arg_count*sizeof(cmd_arg));
702 
703   if ( *s == '=' )			/* =function */
704   { e->function = parseFuncSpec(&s, &e->fname);
705     skipBanks(s);
706   }
707 
708   if ( *s != EOS && *s != '%' )
709   { warn(ERR_BAD_COMMAND_SPEC, fname, line);
710     return FALSE;
711   }
712 
713   return TRUE;
714 }
715 
716 
717 static int
parseCommandSpec(const char * fname,int lineno,char * line)718 parseCommandSpec(const char *fname, int lineno, char *line)
719 { char *s = line;
720 
721   while(isspace(*s))
722     s++;
723 
724   if ( *s == '%' || *s == EOS )
725     return TRUE;			/* comment-line */
726 
727   if ( *s == '{' )			/* environment */
728   { return parseEnvSpec(fname, lineno, s);
729   } else if ( *s == '\\' )		/* normal command */
730   { char *f = ++s;
731     char tmp;
732     cmd_arg args[MAXCMDARGS];		/* argument-list */
733     CmdDescr c;
734 
735     if ( isalnum(*s) )
736     { while(isalnum(*s))
737       { s++;
738       }
739     } else
740       s++;
741     tmp = *s;
742     *s = EOS;
743     c = newCommand(f);
744     *s = tmp;
745 
746     skipBanks(s);			/* spaces after command */
747     if ( *s == '*' )			/* modified */
748     { c->flags |= CMD_MODIFY;
749       s++;
750     }
751 
752     c->arg_count = parseArgSpec(fname, lineno, &s, args);
753     if ( c->arg_count < 0 )
754       return FALSE;
755     c->arguments = malloc(c->arg_count*sizeof(cmd_arg));
756     memcpy(c->arguments, args, c->arg_count*sizeof(cmd_arg));
757 
758     if ( *s == '=' )		/* associate function */
759     { c->function = parseFuncSpec(&s, &c->fname);
760     }
761 
762     if ( isdigit(*s) )		/* pre-lines */
763     { c->pre_lines = *s - '0';
764       s++;
765       skipBanks(s);
766     } else if ( *s == '%' )
767     { c->pre_lines = PRE_COMMENT;	/* %\n */
768       s++;
769       skipBanks(s);
770     }
771 
772     if ( isdigit(*s) )		/* post-lines */
773     { c->post_lines = *s - '0';
774       s++;
775       skipBanks(s);
776     }
777 
778     if ( *s != EOS && *s != '%' )
779     { warn(ERR_BAD_COMMAND_SPEC, fname, lineno);
780       return FALSE;
781     }
782 
783     return TRUE;
784   }
785 
786   warn(ERR_BAD_COMMAND_SPEC, fname, lineno);
787   return FALSE;
788 }
789 
790 
791 static int
parseCmdSpecs(const char * fname)792 parseCmdSpecs(const char *fname)
793 { char line[MAXCMD];
794   int l = 0;
795   Input fd;
796 
797   if ( (fd = openInputFile(fname)) == NULL )
798   { warn(ERR_NOCMD_SPECS, fname, 0);
799     return FALSE;
800   }
801 
802   while(fgets(line, sizeof(line), fd))
803     parseCommandSpec(fname, ++l, line);
804 
805   closeInput(fd);
806 
807   return TRUE;
808 }
809 
810 
811 		 /*******************************
812 		 *	   PARSING STUFF	*
813 		 *******************************/
814 
815 static void
getCommand(Input fd,char * buf,int size)816 getCommand(Input fd, char *buf, int size)
817 { int c;
818 
819   size--;				/* room for EOS */
820 
821   c = getc(fd);
822   if ( isalnum(c) )			/* \blabla */
823   { do
824     { if ( --size <= 0 )
825 	error(ERR_CMD_TOO_LONG, texfile(), texline());
826       *buf++ = c;
827       c = getc(fd);
828     } while(isalnum(c));
829     ungetc(c, fd);
830   } else				/* \" */
831   { *buf++ = c;
832   }
833 
834   *buf = EOS;
835 }
836 
837 
838 static void
getArgument(Input fd,int flags,char * buf,int size)839 getArgument(Input fd, int flags, char *buf, int size)
840 { int c = getc(fd);
841   int sz = size;
842 
843   if ( !(flags & F_NOSKIPBLANK) )
844   { while(isspace(c))
845       c = getc(fd);
846   }
847 
848   if ( isbegingroup(c) )		/* { */
849   { int nesting = 1; char *s = buf;
850 
851     for(;;)
852     { c = getc(fd);
853 
854       switch(CharType(c))
855       { case CM:
856 	  *s++ = c;
857 	  continue;
858 	case BG:
859 	  nesting++;
860 	  break;
861 	case EG:
862 	  nesting--;
863 	  break;
864 	case SP:
865 	  while(isspace(c))
866 	    c = getc(fd);
867 	  ungetc(c, fd);
868 	  c = ' ';
869 	  break;
870 	case EF:
871 	  error(ERR_UNEXPECTED_EOF, texfile(), texline());
872       }
873 
874       if ( nesting > 0 )
875       { if ( --sz < 0 )
876 	{ buf[size-1] = EOS;
877 	  texarg = buf;
878 	  error(ERR_RUNAWAY_ARGUMENT, texfile(), texline());
879 	}
880 	*s++ = c;
881       } else
882 	break;
883     }
884 
885     *s++ = EOS;
886   } else if ( iscommand(c) )		/* \ */
887   { *buf++ = c;
888     size--;
889     getCommand(fd, buf, size);
890   } else
891   { *buf++ = c;
892     *buf = EOS;
893   }
894 }
895 
896 
897 static int
getOptionalArgument(Input fd,int flags,char * buf,int size)898 getOptionalArgument(Input fd, int flags, char *buf, int size)
899 { int c = getc(fd);
900   int sz = size;
901 
902   if ( c == '[' )
903   { int nesting = 1; char *s = buf;
904 
905     for(;;)
906     { c = getc(fd);
907 
908       switch(CharType(c))
909       { case CM:
910 	  *s++ = c;
911 	  continue;
912 	case BG:
913 	  nesting++;
914 	  break;
915 	case EG:
916 	  nesting--;
917 	  break;
918 	case EF:
919 	  error(ERR_UNEXPECTED_EOF, texfile(), texline());
920 	default:
921 	  switch(c)
922 	  { case '[':
923 	      nesting++;
924 	      break;
925 	    case ']':
926 	      nesting--;
927 	      break;
928 	  }
929       }
930 
931       if ( c != ']' || nesting > 0 )
932       { if ( --sz < 0 )
933 	{ buf[size-1] = EOS;
934 	  texarg = buf;
935 	  error(ERR_RUNAWAY_ARGUMENT, texfile(), texline());
936 	}
937 	*s++ = c;
938       } else
939 	break;
940     }
941 
942     *s++ = EOS;
943     return TRUE;
944   } else
945     ungetc(c, fd);
946 
947   return FALSE;
948 }
949 
950 
951 static void
getDimension(Input fd,int flags,char * buf,int size)952 getDimension(Input fd, int flags, char *buf, int size)
953 { int c = getc(fd);
954 
955   if ( !(flags & F_NOSKIPBLANK) )
956   { while(isspace(c))
957       c = getc(fd);
958   }
959 
960   if ( isbegingroup(c) )
961   { ungetc(c, fd);
962 
963     getArgument(fd, flags, buf, size);
964   } else if ( iscommand(c) )
965   { buf[0] = c;
966     getCommand(fd, &buf[1], size-1);
967   } else if ( isdigit(c) )
968   { char *s = buf;
969     do
970     { *s++ = c;
971       c = getc(fd);
972     } while(isdigit(c) || c == '.' );
973     if ( isalnum(c) )
974     { *s++ = c;
975       c = getc(fd);
976     }
977     if ( isalnum(c) )
978     { *s++ = c;
979       *s = EOS;
980       return;
981     }
982 
983     error(ERR_BAD_DIM, texfile(), texline());
984   }
985 }
986 
987 
988 static void
parseCommand(Input fd,const char * name,CallBack func,void * ctx)989 parseCommand(Input fd, const char *name, CallBack func, void *ctx)
990 { CmdDescr cmd = lookupCommand(&name[1]); /* skip \ */
991   command g;
992   token t;
993   int n, c;
994   int flags = 0;
995 
996   if ( !cmd )
997   { fprintf(stderr, "[WARNING: Unknown command: %s]\n", name);
998     cmd = newCommand(&name[1]);
999   }
1000 
1001   g.command = cmd;
1002   g.flags   = 0;
1003 
1004   if ( cmd->name[1] == EOS && !isalnum(cmd->name[0]) )
1005     flags |= F_NOSKIPBLANK;
1006 
1007   c = getc(fd);
1008   if ( cmd->arg_count > 0 )
1009     g.arguments = alloca(sizeof(char *) * cmd->arg_count);
1010   else
1011     g.arguments = NULL;
1012 
1013   if ( !(flags & F_NOSKIPBLANK) )
1014   { while(isspace(c))
1015       c = getc(fd);
1016   }
1017 
1018   if ( cmd->flags & CMD_MODIFY && c == '*' ) /* \section* (modified) */
1019   { g.flags |= CMD_MODIFY;
1020     c = getc(fd);
1021     if ( !(flags & F_NOSKIPBLANK) )
1022     { while(isspace(c))
1023 	c = getc(fd);
1024     }
1025   }
1026   ungetc(c, fd);
1027 
1028   for(n=0; n<cmd->arg_count; n++)	/* process the arguments */
1029   { char abuf[MAXARG];
1030 
1031     if ( cmd->arguments[n].flags & CA_OPTIONAL )
1032     { if ( getOptionalArgument(fd, flags, abuf, sizeof(abuf)) )
1033       { g.arguments[n] = alloca(strlen(abuf)+1);
1034 	strcpy(g.arguments[n], abuf);
1035       } else
1036 	g.arguments[n] = NULL;
1037     } else if ( cmd->arguments[n].flags & CA_DIM )
1038     { getDimension(fd, flags, abuf, sizeof(abuf));
1039       g.arguments[n] = alloca(strlen(abuf)+1);
1040       strcpy(g.arguments[n], abuf);
1041     } else
1042     { getArgument(fd, flags, abuf, sizeof(abuf));
1043       g.arguments[n] = alloca(strlen(abuf)+1);
1044       strcpy(g.arguments[n], abuf);
1045     }
1046   }
1047 
1048   if ( cmd->function )
1049   { (*cmd->function)(&g, fd, func, ctx);
1050   } else
1051   { t.type = TOK_CMD;
1052     t.value.cmd = &g;
1053     (*func)(&t, ctx);
1054   }
1055 }
1056 
1057 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1058 Handle verbatim environment
1059 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1060 
1061 static void
env_verbatim(Environment e,Input fd,CallBack func,void * ctx)1062 env_verbatim(Environment e, Input fd, CallBack func, void *ctx)
1063 { char end[MAXCMD];
1064   char buf[MAXVERBATIM];
1065   int left = MAXVERBATIM-1;
1066   char *s = buf;
1067   char *ms;
1068   int el;
1069   token t;
1070 
1071   sprintf(end, "\\end{%s}", e->environment->name);
1072   el = strlen(end);
1073   ms = buf+el;
1074 
1075   for(;;)
1076   { if ( --left == 0 )
1077       error(ERR_VERBATIM_TOO_LONG, texfile(), texline());
1078     *s++ = getc(fd);
1079 
1080     if ( s >= ms && s[-el] == '\\' && strncmp(&s[-el], end, el) == 0 )
1081     { s[-el] = EOS;
1082       t.type = TOK_VERBATIM;
1083       t.context = (char *)e->environment->name;
1084       t.value.string = buf;
1085       (*func)(&t, ctx);
1086       return;
1087     }
1088   }
1089 }
1090 
1091 
1092 static void
env_normal(Environment e,Input fd,CallBack func,void * ctx)1093 env_normal(Environment e, Input fd, CallBack func, void *ctx)
1094 { token t;
1095 
1096   t.type = TOK_BEGIN_ENV;
1097   t.value.env = e;
1098   (*func)(&t, ctx);
1099 }
1100 
1101 
1102 static void
cmd_normal(Command g,Input fd,CallBack func,void * ctx)1103 cmd_normal(Command g, Input fd, CallBack func, void *ctx)
1104 { token t;
1105 
1106   t.type = TOK_CMD;
1107   t.value.cmd = g;
1108   (*func)(&t, ctx);
1109 }
1110 
1111 
1112 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1113 handle \begin command
1114 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1115 
1116 static void
cmd_begin(Command g,Input fd,CallBack func,void * ctx)1117 cmd_begin(Command g, Input fd, CallBack func, void *ctx)
1118 { char ename[MAXCMD];
1119   int enl, n;
1120   char *args[MAXCMDARGS];
1121   environment e;
1122   EnvDescr env;
1123   token t;
1124 
1125   e.flags = 0;
1126   e.arguments = args;
1127   getArgument(fd, 0, ename, sizeof(ename));
1128   enl = strlen(ename);
1129 
1130   if ( ename[enl-1] == '*' )		/* check for modified env */
1131   { ename[enl-1] = EOS;
1132     e.flags |= CMD_MODIFY;
1133   }
1134 
1135 					/* find the environment */
1136   if ( !(env = lookupEnvironment(ename)) )
1137   { fprintf(stderr, "WARNING: undefined environment: %s\n", ename);
1138     env = newEnvironment(ename);
1139   }
1140   e.environment = env;
1141 
1142   for(n=0; n<env->arg_count; n++)	/* process the arguments */
1143   { char abuf[MAXARG];
1144 
1145     if ( env->arguments[n].flags & CA_OPTIONAL )
1146     { if ( getOptionalArgument(fd, 0, abuf, sizeof(abuf)) )
1147       { e.arguments[n] = alloca(strlen(abuf)+1);
1148         strcpy(e.arguments[n], abuf);
1149       } else
1150 	e.arguments[n] = NULL;
1151     } else
1152     { getArgument(fd, 0, abuf, sizeof(abuf));
1153       e.arguments[n] = alloca(strlen(abuf)+1);
1154       strcpy(e.arguments[n], abuf);
1155     }
1156   }
1157 
1158   if ( env->function )
1159   { (*env->function)(&e, fd, func, ctx);
1160   } else
1161   { t.type = TOK_BEGIN_ENV;
1162     t.value.env = &e;
1163     (*func)(&t, ctx);
1164   }
1165 }
1166 
1167 
1168 static void
cmd_end(Command g,Input fd,CallBack func,void * ctx)1169 cmd_end(Command g, Input fd, CallBack func, void *ctx)
1170 { token t;
1171 
1172   t.type = TOK_END_ENV;
1173   t.value.string = g->arguments[0];	/* name of the environment */
1174   (*func)(&t, ctx);
1175 }
1176 
1177 
1178 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1179 handle \verb command
1180 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1181 
1182 void
cmd_verb(Command g,Input fd,CallBack func,void * ctx)1183 cmd_verb(Command g, Input fd, CallBack func, void *ctx)
1184 { char buf[MAXVERB];
1185   char *s = buf;
1186   int delim = getc(fd);
1187   int c = getc(fd);
1188   token t;
1189   char ds[2];
1190 
1191   ds[0] = delim;
1192   ds[1] = EOS;
1193   while(c != delim && c != EOF)
1194   { *s++ = c;
1195     c = getc(fd);
1196   }
1197   if ( c == EOF )
1198     error(ERR_UNEXPECTED_EOF, texfile(), texline());
1199   *s++ = EOS;
1200 
1201   t.type = TOK_VERB;
1202   t.context = ds;
1203   t.value.string  = buf;
1204   (*func)(&t, ctx);
1205 }
1206 
1207 #define ACTIVE(n, f) { n, f }
1208 
1209 typedef struct
1210 { char	  *name;			/* name */
1211   AnyFunc  function;			/* associated function */
1212 } active, *Active;
1213 
1214 static active active_list[] =
1215 { ACTIVE("verb",	cmd_verb),
1216   ACTIVE("begin",	cmd_begin),
1217   ACTIVE("end",		cmd_end),
1218   ACTIVE("item",	cmd_normal),
1219   ACTIVE("prolog",	cmd_prolog),
1220   ACTIVE("verbatim",	env_verbatim),
1221   ACTIVE("list",	env_normal),
1222   ACTIVE("float",	env_normal),
1223   ACTIVE(NULL,		NULL)
1224 };
1225 
1226 static AnyFunc
lookupFunction(const char * name)1227 lookupFunction(const char *name)
1228 { Active a = active_list;
1229 
1230   for( ; a->name; a++ )
1231   { if ( streq(name, a->name) )
1232       return a->function;
1233   }
1234 
1235   error(ERR_UNDEF_FUNCTION, texfile(), texline());
1236   return NULL;
1237 }
1238 
1239 
1240 static void
parseMath(Input fd,CallBack func,void * ctx)1241 parseMath(Input fd, CallBack func, void *ctx)
1242 { int c = getc(fd);
1243   char buf[MAXMATH];
1244   char *s = buf;
1245   token t;
1246   int nesting = 0;
1247   int left = MAXMATH-1;
1248 
1249   if ( CharType(c) == MM )		/* $$ */
1250   { t.type = TOK_MATH_ENV;
1251   } else
1252   { t.type = TOK_MATH;
1253     ungetc(c, fd);
1254   }
1255 
1256   for(;;)
1257   { c = getc(fd);
1258 
1259     switch(CharType(c))
1260     { case BG:
1261 	nesting++;
1262 	break;
1263       case EG:
1264 	nesting--;
1265 	break;
1266       case SP:
1267 	while(isspace(c))
1268 	  c = getc(fd);
1269 	ungetc(c, fd);
1270 	c = ' ';
1271 	break;
1272       case EF:
1273 	error(ERR_UNEXPECTED_EOF, texfile(), texline());
1274     }
1275 
1276     if ( c != '$' || nesting > 0 )
1277     { if ( --left < 0 )
1278       { texarg = buf;
1279 	error(ERR_RUNAWAY_MATH, texfile(), texline());
1280       }
1281       *s++ = c;
1282     } else
1283       break;
1284   }
1285 
1286   *s = EOS;
1287   if ( t.type == TOK_MATH_ENV )
1288   { c = getc(fd);
1289     if ( c != '$' )
1290       error(ERR_BAD_MATH_ENV_CLOSURE, texfile(), texline());
1291   }
1292 
1293   t.value.string = buf;
1294   (*func)(&t, ctx);
1295 }
1296 
1297 
1298 static void
parseTeX(Input fd,CallBack func,void * ctx)1299 parseTeX(Input fd, CallBack func, void *ctx)
1300 { token t;
1301 
1302   int c = getc(fd);
1303 
1304   for(;;)
1305   { switch(CharType(c))
1306     { case SP:				/* blank space */
1307       { int lines = 0;
1308 
1309 	do
1310 	{ if ( c == '\n' )
1311 	    lines++;
1312 	  c = getc(fd);
1313 	} while(isspace(c));
1314 
1315 	if ( lines >= 2 )
1316 	{ t.type = TOK_PAR;
1317 	  t.value.string = NULL;
1318 
1319 	  (*func)(&t, ctx);
1320 	} else if ( lines == 1 )
1321 	{ t.type = TOK_LINE;
1322 	  t.value.string = NULL;
1323 
1324 	  (*func)(&t, ctx);
1325 	} else
1326 	{ t.type = TOK_SPACE;
1327 	  t.value.string = NULL;
1328 
1329 	  (*func)(&t, ctx);
1330 	}
1331 
1332 	break;
1333       }
1334       case BG:				/* { */
1335       { char buf[2];
1336 
1337 	buf[0] = c;
1338 	buf[1] = EOS;
1339 
1340 	t.type = TOK_BEGIN_GROUP;
1341 	t.value.string = buf;
1342 	(*func)(&t, ctx);
1343 	c = getc(fd);
1344 
1345 	break;
1346       }
1347       case EG:				/* } */
1348       { char buf[2];
1349 
1350 	buf[0] = c;
1351 	buf[1] = EOS;
1352 
1353 	t.type = TOK_END_GROUP;
1354 	t.value.string = buf;
1355 	(*func)(&t, ctx); c = getc(fd);
1356 
1357 	break;
1358       }
1359       case CM:				/* \command */
1360       { char buf[MAXCMD];
1361 
1362 	buf[0] = c;
1363 	getCommand(fd, &buf[1], MAXCMD-1);
1364 
1365 	parseCommand(fd, buf, func, ctx);
1366 	c = getc(fd);
1367 
1368 	break;
1369       }
1370       case MM:
1371       { parseMath(fd, func, ctx);
1372 	c = getc(fd);
1373 
1374         break;
1375       }
1376       case SC:				/* % comment */
1377       { do
1378 	{ c = getc(fd);
1379 	} while( c != EOF && c != '\n' );
1380 
1381 	while(isspace(c))
1382 	  c = getc(fd);
1383 
1384 	break;
1385       }
1386       case EF:				/* end-of-file */
1387       { t.type = TOK_EOF;
1388 	t.value.string = NULL;
1389 	(*func)(&t, ctx);
1390 
1391 	return;
1392       }
1393       case TD:				/* & */
1394       case NB:				/* ~ */
1395       case BQ:				/* ` */
1396       case SQ:				/* ' */
1397       { char buf[2];
1398 
1399 	buf[0] = c;
1400 	buf[1] = EOS;
1401 	t.type = TOK_WORD;
1402 	t.value.string = buf;
1403 	(*func)(&t, ctx);
1404 	c = getc(fd);
1405 
1406 	break;
1407       }
1408       default:				/* default: begin a word */
1409       { char buf[MAXWORD];
1410 	char *s = buf;
1411 
1412 	do
1413 	{ *s++ = c;
1414 	  assert(s < buf+MAXWORD);
1415 	  c = getc(fd);
1416 	} while(!wbreak(c));
1417 	*s = EOS;
1418 	t.type = TOK_WORD;
1419 	if ( streq(buf, "[]") )
1420 	  t.value.string = "\\[]";
1421 	else
1422 	  t.value.string = buf;
1423 	(*func)(&t, ctx);
1424 
1425 	break;
1426       }
1427     }
1428   }
1429 }
1430 
1431 
1432 		 /*******************************
1433 	         *           MAIN LOOP          *
1434 		 *******************************/
1435 
1436 static int
parseTeXFile(const char * file,CallBack func,void * ctx)1437 parseTeXFile(const char *file, CallBack func, void *ctx)
1438 { Input fd;
1439 
1440   if ( (fd = openInputFile(file)) == NULL )
1441   { fprintf(stderr, "Can't open %s: %s", file, strerror(errno));
1442 
1443     return FALSE;
1444   }
1445   parseTeX(fd, func, ctx);
1446 
1447   closeInput(fd);
1448 
1449   return TRUE;
1450 }
1451 
1452 
1453 
1454 		 /*******************************
1455 		 *	    HTML OUTPUT         *
1456 		 *******************************/
1457 
1458 #define VERB_NORMAL	0
1459 #define VERB_VERBATIM	1
1460 #define VERB_PRE	2
1461 
1462 typedef struct
1463 { int	envnesting;			/* nesting of begin/end */
1464   int   last_type;			/* type of previous token */
1465   int	line_pos;			/* position in line */
1466   int   newlines;			/* # consecutive newlines */
1467   int	spaces;				/* # consecutive spaces */
1468   int	verbatim;			/* verbatim output */
1469   int   left_margin;			/* left margin for text */
1470   int   right_margin;			/* right margin for text */
1471   FILE *fd;				/* output descriptor */
1472 } ppcontext, *PPContext;
1473 
1474 
1475 static void
output(PPContext pp,const char * fmt,...)1476 output(PPContext pp, const char *fmt, ...)
1477 { va_list args;
1478   char buf[MAXOUTPUT];
1479   char *s = buf;
1480 
1481   va_start(args, fmt);
1482   vsprintf(buf, fmt, args);
1483   va_end(args);
1484 
1485   if ( pp->verbatim )
1486   { for(;;s++)
1487     { int c;
1488 
1489       switch((c = *s))
1490       { case EOS:
1491 	  return;
1492 	case '\n':
1493 	  putc(c, pp->fd);
1494 	  pp->line_pos = 0;
1495 	  pp->spaces++;
1496 	  break;
1497 	case '\t':
1498 	  putc(c, pp->fd);
1499 	  pp->line_pos |= 0x7;
1500 	  pp->line_pos++;
1501 	  pp->spaces++;
1502 	  break;
1503 	case ' ':
1504 	  pp->spaces++;
1505 	  putc(c, pp->fd);
1506 	  pp->line_pos++;
1507 	  break;
1508 	default:
1509 	  pp->spaces = 0;
1510 	  if ( pp->verbatim == VERB_PRE	)
1511 	  { switch(c)
1512 	    { case '<':
1513 		fputs("&lt;", pp->fd);
1514 	        break;
1515 	      case '>':
1516 		fputs("&gt;", pp->fd);
1517 	        break;
1518 	      case '&':
1519 		fputs("&amp;", pp->fd);
1520 	        break;
1521 	      default:
1522 		putc(c, pp->fd);
1523 	    }
1524 	  } else
1525 	    putc(c, pp->fd);
1526 	  pp->line_pos++;
1527       }
1528     }
1529   } else
1530   { for(;;s++)
1531     { int c;
1532 
1533       switch((c = *s))
1534       { case EOS:
1535 	  return;
1536 	case '\n':
1537 	  if ( ++pp->newlines <= 2 )
1538 	    putc(c, pp->fd);
1539 	  pp->line_pos = 0;
1540 	  pp->spaces = 1;
1541 	  break;
1542 	case '\t':
1543 	  c = ' ';
1544 	case ' ':
1545 	  pp->newlines = 0;
1546 	  if ( ++pp->spaces <= 1 )
1547 	  { putc(c, pp->fd);
1548 	    pp->line_pos++;
1549 	  }
1550 	  break;
1551 	default:
1552 	  pp->newlines = 0;
1553 	  pp->spaces = 0;
1554 	  putc(c, pp->fd);
1555 	  pp->line_pos++;
1556       }
1557     }
1558   }
1559 }
1560 
1561 
1562 static void
nl(PPContext pp)1563 nl(PPContext pp)
1564 { int spaces = pp->left_margin % 8;
1565   int tabs   = pp->left_margin / 8;
1566   int n;
1567 
1568   output(pp, "\n");
1569   for(n=0; n<tabs; n++)
1570     output(pp, "\t");
1571   for(n=0; n<spaces; n++)
1572     output(pp, " ");
1573 }
1574 
1575 
1576 void
outputBlank(PPContext pp)1577 outputBlank(PPContext pp)
1578 { switch(pp->last_type)
1579   { case TOK_SPACE:
1580       if ( pp->newlines == 0 )
1581 	output(pp, " ");
1582       break;
1583     case TOK_LINE:
1584       if ( pp->newlines < 1 )
1585 	output(pp, "\n");
1586       break;
1587   }
1588 }
1589 
1590 
1591 static char *tok_names[] =
1592 { "CMD", "BG", "EG", "BE", "EE", "VERB",
1593   "VERBATIM", "$", "$$", "PAR", "W", "S", "L", "EOF"
1594 };
1595 
1596 void
put_token(Token t,void * ctx)1597 put_token(Token t, void *ctx)
1598 { PPContext pp = ctx;
1599   static CmdDescr CMD_BEGIN, CMD_END;
1600 
1601   if ( !CMD_BEGIN )
1602   { CMD_BEGIN = lookupCommand("begin");
1603     CMD_END   = lookupCommand("end");
1604   }
1605 
1606   DEBUG(1, output(pp, "[%s]", tok_names[t->type]));
1607 
1608   switch(t->type)
1609   { case TOK_CMD:
1610     { Command g = t->value.cmd;
1611       int n;
1612       int args_printed = 0;
1613 
1614       outputBlank(pp);
1615       if ( g->command->pre_lines == PRE_COMMENT )
1616       { output(pp, "%\n");
1617       } else
1618       { while(pp->newlines < g->command->pre_lines)
1619 	  output(pp, "\n");
1620       }
1621       output(pp, "\\%s", g->command->name);
1622       if ( g->flags & CMD_MODIFY )
1623 	output(pp, "*");
1624       for(n=0; n<g->command->arg_count; n++)
1625       { if ( g->command->arguments[n].flags & CA_OPTIONAL )
1626 	{ if ( g->arguments[n] )
1627 	  { output(pp, "[%s]", g->arguments[n]);
1628 	    args_printed++;
1629 	  }
1630 	} else
1631 	{ output(pp, "{%s}", g->arguments[n]);
1632 	  args_printed++;
1633 	}
1634       }
1635       if ( !args_printed )
1636       { if ( isalnum(g->command->name[strlen(g->command->name)-1]) )
1637 	  output(pp, " ");
1638       }
1639       while(pp->newlines < g->command->post_lines)
1640 	output(pp, "\n");
1641 
1642       break;
1643     }
1644     case TOK_BEGIN_ENV:
1645     { Environment e = t->value.env;
1646       int n;
1647 
1648       outputBlank(pp);
1649       while(pp->newlines < CMD_BEGIN->pre_lines)
1650 	output(pp, "\n");
1651       output(pp, "\\begin{%s", e->environment->name);
1652       if ( e->flags & CMD_MODIFY )
1653 	output(pp, "*");
1654       output(pp, "}");
1655       for(n=0; n<e->environment->arg_count; n++)
1656       { if ( e->environment->arguments[n].flags & CA_OPTIONAL )
1657 	{ if ( e->arguments[n] )
1658 	    output(pp, "[%s]", e->arguments[n]);
1659 	} else
1660 	{ output(pp, "{%s}", e->arguments[n]);
1661 	}
1662       }
1663       while(pp->newlines < CMD_BEGIN->post_lines)
1664 	output(pp, "\n");
1665 
1666       break;
1667     }
1668     case TOK_END_ENV:
1669     { outputBlank(pp);
1670       while(pp->newlines < CMD_END->pre_lines)
1671 	output(pp, "\n");
1672       output(pp, "\\begin{%s}", t->value.string);
1673       while(pp->newlines < CMD_END->post_lines)
1674 	output(pp, "\n");
1675 
1676       break;
1677     }
1678     case TOK_BEGIN_GROUP:
1679       outputBlank(pp);
1680       output(pp, "{");
1681       break;
1682     case TOK_END_GROUP:
1683       outputBlank(pp);
1684       output(pp, "}");
1685       break;
1686     case TOK_MATH:
1687       outputBlank(pp);
1688       output(pp, "$%s$", t->value.string);
1689       break;
1690     case TOK_MATH_ENV:
1691       outputBlank(pp);
1692       output(pp, "$$%s$$", t->value.string);
1693       break;
1694     case TOK_VERB:
1695       outputBlank(pp);
1696       pp->verbatim = VERB_VERBATIM;
1697       output(pp, "\\verb%s%s%s", t->context, t->value.string, t->context);
1698       pp->verbatim = VERB_NORMAL;
1699       break;
1700     case TOK_VERBATIM:
1701 
1702       while( pp->newlines < CMD_BEGIN->pre_lines )
1703 	output(pp, "\n");
1704       output(pp, "\\begin{%s}", t->context);
1705       pp->verbatim = VERB_VERBATIM;
1706       output(pp, "%s", t->value.string);
1707       pp->verbatim = VERB_NORMAL;
1708       output(pp, "\\end{%s}", t->context);
1709       while( pp->newlines < CMD_BEGIN->post_lines )
1710 	output(pp, "\n");
1711       break;
1712     case TOK_PAR:
1713       output(pp, "\n\n");
1714       break;
1715     case TOK_WORD:
1716     { int pendingblank;
1717 
1718       if ( pp->last_type == TOK_LINE )
1719 	pp->last_type = TOK_SPACE;
1720       pendingblank = (pp->last_type == TOK_SPACE);
1721       outputBlank(pp);			/* as space! */
1722       if ( pendingblank &&		/* no blanks in input: concatenate! */
1723 	   (int)strlen(t->value.string) + pp->line_pos > pp->right_margin )
1724 	nl(pp);
1725       output(pp, "%s", t->value.string);
1726       break;
1727     }
1728     case TOK_LINE:
1729     case TOK_SPACE:
1730       break;
1731     case TOK_EOF:
1732       output(pp, "\n");
1733       break;
1734     default:
1735       assert(0);
1736   }
1737 
1738   pp->last_type = t->type;
1739 }
1740 
1741 #ifdef TEST
1742 
1743 static void
error(int eno,const char * file,int line)1744 error(int eno, const char *file, int line)
1745 { fprintf(stderr, "ERROR: %s:%d: %s\n", file, line, tex_error_strings[eno]);
1746 
1747   exit(1);
1748 }
1749 
1750 
1751 int
main(int argc,char ** argv)1752 main(int argc, char **argv)
1753 { parseCmdSpecs("cmd.spec");
1754 
1755   if ( argc == 2 )
1756   { ppcontext pp;
1757 
1758     pp.envnesting   = 0;
1759     pp.last_type    = TOK_EOF;
1760     pp.line_pos     = 0;
1761     pp.newlines     = 0;
1762     pp.spaces       = 0;
1763     pp.verbatim     = FALSE;
1764     pp.left_margin  = 0;
1765     pp.right_margin = 72;
1766 
1767     parseTeXFile(argv[1], put_token, &pp);
1768   }
1769 
1770   exit(0);
1771 }
1772 
1773 #endif /*TEST*/
1774 
1775 #ifdef __SWI_PROLOG__
1776 
1777 		 /*******************************
1778 		 *	 PROLOG CONNECTION	*
1779 		 *******************************/
1780 
1781 #include <SWI-Prolog.h>
1782 
1783 static int build_list(Token t, void *context);
1784 
1785 typedef struct
1786 { term_t list;				/* list we are working on */
1787   term_t head;				/* head (tmp term ref) */
1788   int    envnesting;			/* depth of stack */
1789   int	 prev_type0;			/* type of previous token */
1790   int	 prev_type1;			/* type of token before that */
1791   term_t stack[MAXENVNESTING];		/* Pushed environment */
1792 } pl_context, *PlContext;
1793 
1794 
1795 static functor_t FUNCTOR_verb1;		/* verb/1 */
1796 static functor_t FUNCTOR_verb2;		/* verb/2 */
1797 static functor_t FUNCTOR_verbatim2;	/* verbatim/2 */
1798 static functor_t FUNCTOR_verbatim1;	/* verbatim/1 */
1799 static functor_t FUNCTOR_pre1;		/* pre/1 */
1800 static functor_t FUNCTOR_dot2;		/* ./2 */
1801 static functor_t FUNCTOR_brace1;	/* {}/1 */
1802 static functor_t FUNCTOR_cmd1;		/* \/1 */
1803 static functor_t FUNCTOR_cmd2;		/* \/2 */
1804 static functor_t FUNCTOR_cmd3;		/* \/3 */
1805 static functor_t FUNCTOR_env2;		/* env/2 */
1806 static functor_t FUNCTOR_env3;		/* env/3 */
1807 static functor_t FUNCTOR_env4;		/* env/4 */
1808 static functor_t FUNCTOR_math1;		/* $/1 */
1809 static functor_t FUNCTOR_mathenv1;	/* $$/1 */
1810 static functor_t FUNCTOR_html1;		/* html/1 */
1811 static functor_t FUNCTOR_html3;		/* html/3 */
1812 static functor_t FUNCTOR_nospace1;	/* nospace/1 */
1813 static atom_t	 ATOM_begin_group;	/* '\{' */
1814 static atom_t	 ATOM_end_group;	/* '\}' */
1815 static atom_t	 ATOM_nbsp;		/* '~' */
1816 static atom_t	 ATOM_par;		/* 'par' */
1817 static atom_t	 ATOM_star;		/* * */
1818 static atom_t	 ATOM_minus;		/* - */
1819 static atom_t	 ATOM_space;		/* ' ' */
1820 static atom_t	 ATOM_nl;		/* '\n' */
1821 #ifndef ATOM_nil
1822 static atom_t	 ATOM_nil;		/* [] */
1823 #endif
1824 static atom_t	 ATOM_true;		/* true */
1825 static atom_t	 ATOM_false;		/* false */
1826 
1827 #ifndef ATOM_dot
1828 #define ATOM_dot PL_new_atom(".")
1829 #endif
1830 
1831 static void
initPrologConstants()1832 initPrologConstants()
1833 { FUNCTOR_verb2     = PL_new_functor(PL_new_atom("verb"), 2);
1834   FUNCTOR_verb1     = PL_new_functor(PL_new_atom("verb"), 1);
1835   FUNCTOR_verbatim2 = PL_new_functor(PL_new_atom("verbatim"), 2);
1836   FUNCTOR_verbatim1 = PL_new_functor(PL_new_atom("verbatim"), 1);
1837   FUNCTOR_pre1	    = PL_new_functor(PL_new_atom("pre"), 1);
1838   FUNCTOR_dot2	    = PL_new_functor(ATOM_dot, 2);
1839   FUNCTOR_brace1    = PL_new_functor(PL_new_atom("{}"), 1);
1840   FUNCTOR_cmd1      = PL_new_functor(PL_new_atom("\\"), 1);
1841   FUNCTOR_cmd2      = PL_new_functor(PL_new_atom("\\"), 2);
1842   FUNCTOR_cmd3      = PL_new_functor(PL_new_atom("\\"), 3);
1843   FUNCTOR_env2      = PL_new_functor(PL_new_atom("env"), 2);
1844   FUNCTOR_env3      = PL_new_functor(PL_new_atom("env"), 3);
1845   FUNCTOR_env4      = PL_new_functor(PL_new_atom("env"), 4);
1846   FUNCTOR_math1	    = PL_new_functor(PL_new_atom("$"), 1);
1847   FUNCTOR_mathenv1  = PL_new_functor(PL_new_atom("$$"), 1);
1848   FUNCTOR_html1     = PL_new_functor(PL_new_atom("html"), 1);
1849   FUNCTOR_html3     = PL_new_functor(PL_new_atom("html"), 3);
1850   FUNCTOR_nospace1  = PL_new_functor(PL_new_atom("nospace"), 1);
1851 
1852   ATOM_begin_group  = PL_new_atom("\\{");
1853   ATOM_end_group    = PL_new_atom("\\}");
1854   ATOM_par	    = PL_new_atom("par");
1855   ATOM_nbsp	    = PL_new_atom("~");
1856   ATOM_star	    = PL_new_atom("*");
1857   ATOM_minus	    = PL_new_atom("-");
1858   ATOM_space	    = PL_new_atom(" ");
1859   ATOM_nl	    = PL_new_atom("\n");
1860 #ifndef ATOM_nil
1861   ATOM_nil	    = PL_new_atom("[]");
1862 #endif
1863   ATOM_true	    = PL_new_atom("true");
1864   ATOM_false	    = PL_new_atom("false");
1865 }
1866 
1867 static ppcontext ppctx;
1868 
1869 static foreign_t
pl_put_tex_token(term_t term)1870 pl_put_tex_token(term_t term)
1871 { token t;
1872   atom_t atom;
1873   functor_t f;
1874   static int last_is_word = FALSE;
1875 
1876   t.type = -1;
1877 
1878   if ( PL_get_atom(term, &atom) )
1879   { if ( atom == ATOM_begin_group )
1880     { t.type = TOK_BEGIN_GROUP;
1881       t.value.string = "{";
1882     } else if ( atom == ATOM_end_group )
1883     { t.type = TOK_END_GROUP;
1884       t.value.string = "}";
1885     } else if ( atom == ATOM_space )
1886     { t.type = TOK_SPACE;
1887       t.value.string = " ";
1888     } else if ( atom == ATOM_nl )
1889     { t.type = TOK_LINE;
1890       t.value.string = "\n";
1891     } else
1892     { if ( last_is_word )		/* regenerate the space tokens */
1893       { t.type = TOK_SPACE;
1894 	t.value.string = " ";
1895 
1896 	put_token(&t, &ppctx);
1897       } else
1898 	last_is_word = TRUE;
1899       t.type = TOK_WORD;
1900       t.value.string = (char *)PL_atom_chars(atom);
1901     }
1902   } else if ( PL_get_functor(term, &f) )
1903   { term_t arg = PL_new_term_ref();
1904     char *s;
1905 
1906     if ( f == FUNCTOR_verb2 || f == FUNCTOR_verbatim2 )
1907     { if ( PL_get_arg(1, term, arg) && PL_get_chars(arg, &s, CVT_ATOMIC) )
1908       { t.context = s;
1909 
1910 	if ( PL_get_arg(2, term, arg) && PL_get_chars(arg, &s, CVT_ATOMIC) )
1911 	{ t.value.string = s;
1912 	  t.type = (f == FUNCTOR_verb2 ? TOK_VERB : TOK_VERBATIM);
1913 	}
1914       }
1915     } else if ( f == FUNCTOR_cmd1 )
1916     { char *cname;
1917 
1918       if ( PL_get_arg(1, term, arg) && PL_get_chars(arg, &cname, CVT_ATOMIC) )
1919       { command g;
1920 
1921 	t.type = TOK_CMD;
1922 	t.value.cmd = &g;
1923 	g.flags = 0;
1924 	g.arguments = 0;
1925 
1926 	g.command = lookupCommand(cname);
1927 	if ( !g.command )
1928 	{ fprintf(stderr, "[WARNING: Undefined command: %s]\n", cname);
1929 	  g.command = newCommand(cname);
1930 	}
1931       }
1932     } else if ( f == FUNCTOR_cmd2 || f == FUNCTOR_cmd3 ||
1933 		f == FUNCTOR_env2 || f == FUNCTOR_env3 )
1934     { char *cname;
1935       command g;
1936       environment e;
1937       int isenv = (f == FUNCTOR_env2 || f == FUNCTOR_env3);
1938       int ismod = (f == FUNCTOR_cmd3 || f == FUNCTOR_env3);
1939       term_t alist = PL_new_term_ref();
1940 
1941       if ( isenv )
1942       { t.type = TOK_BEGIN_ENV;
1943 	t.value.env = &e;
1944 	e.flags = 0;
1945       } else
1946       { t.type = TOK_CMD;
1947 	t.value.cmd = &g;
1948 	g.flags = 0;
1949       }
1950 
1951       if ( ismod )
1952       { _PL_get_arg(2, term, arg);
1953 	if ( PL_get_atom(arg, &atom) && atom == ATOM_star )
1954 	{ if ( isenv )
1955 	    e.flags |= CMD_MODIFY;
1956 	  else
1957 	    g.flags |= CMD_MODIFY;
1958 	}
1959 	_PL_get_arg(3, term, alist);
1960       } else
1961 	_PL_get_arg(2, term, alist);
1962 
1963       _PL_get_arg(1, term, arg);
1964       if ( PL_get_atom_chars(arg, &cname) )
1965       { int n;
1966 	term_t a2 = PL_new_term_ref();
1967 	int argn;
1968 	char **args;
1969 
1970 	if ( isenv )
1971 	{ e.environment = lookupEnvironment(cname);
1972 	  if ( !e.environment )
1973 	  { fprintf(stderr, "[WARNING: Undefined environment: %s]\n", cname);
1974 	    e.environment = newEnvironment(cname);
1975 	  }
1976 	  argn = e.environment->arg_count;
1977 	  args = e.arguments = alloca(sizeof(cmd_arg)*argn);
1978 	} else
1979 	{ g.command = lookupCommand(cname);
1980 	  if ( !g.command )
1981 	  { fprintf(stderr, "[WARNING: Undefined command: %s]\n", cname);
1982 	    g.command = newCommand(cname);
1983 	  }
1984 	  argn = g.command->arg_count;
1985 	  args = g.arguments = alloca(sizeof(cmd_arg)*argn);
1986 	}
1987 
1988 	for(n=0; n<argn; n++)
1989 	{ if ( PL_get_list(alist, arg, alist) &&
1990 	       PL_get_arg(1, arg, a2) &&
1991 	       PL_get_chars(a2, &s, CVT_ATOMIC) )
1992 	    args[n] = s;
1993 	  else
1994 	    args[n] = NULL;
1995 	}
1996       }
1997     } else if ( f == FUNCTOR_math1 || f == FUNCTOR_mathenv1 )
1998     { term_t arg = PL_new_term_ref();
1999       char *s;
2000 
2001       if ( PL_get_arg(1, term, arg) &&
2002 	   PL_get_chars(arg, &s, CVT_ATOMIC) )
2003       { t.type = (f == FUNCTOR_math1 ? TOK_MATH : TOK_MATH_ENV);
2004 	t.value.string = s;
2005       }
2006     }
2007   }
2008 
2009   if ( t.type != TOK_WORD )
2010     last_is_word = FALSE;
2011 
2012   if ( t.type >= 0 )
2013   { put_token(&t, &ppctx);
2014     PL_succeed;
2015   }
2016 
2017   return PL_warning("put_tex_token/1: instantiation error");
2018 }
2019 
2020 
2021 static void
tex2pl_from_string(const char * str,term_t tokens)2022 tex2pl_from_string(const char *str, term_t tokens)
2023 { pl_context ctx;
2024   Input fd;
2025 
2026   ctx.list       = PL_copy_term_ref(tokens);
2027   ctx.head       = PL_new_term_ref();
2028   ctx.envnesting = 0;
2029   ctx.prev_type0 = TOK_EOF;
2030   ctx.prev_type1 = TOK_EOF;
2031 
2032   fd = openInputString(str);
2033   parseTeX(fd, build_list, &ctx);
2034   closeInput(fd);
2035 }
2036 
2037 
2038 static foreign_t
pl_tex_atom_to_tokens(term_t txt,term_t tokens)2039 pl_tex_atom_to_tokens(term_t txt, term_t tokens)
2040 { char *s;
2041 
2042   if ( PL_get_chars(txt, &s, CVT_ALL) )
2043   { tex2pl_from_string(s, tokens);
2044 
2045     PL_succeed;
2046   }
2047 
2048   PL_fail;
2049 }
2050 
2051 
2052 
2053 static int
build_arguments(term_t alist,int nargs,CmdArg argspec,char ** args)2054 build_arguments(term_t alist, int nargs, CmdArg argspec, char **args)
2055 { int ga = 0;				/* goal argument */
2056   term_t tmp = PL_new_term_ref();
2057 
2058   for( ; ga < nargs; ga++ )
2059   { int rc;
2060 
2061     if ( !PL_unify_list(alist, tmp, alist) )
2062       return FALSE;
2063 
2064     if ( argspec[ga].flags & CA_OPTIONAL )
2065     { if ( args[ga] == NULL )
2066       { rc = PL_unify_atom(tmp, ATOM_nil);
2067       } else
2068       { if ( argspec[ga].flags & CA_TEXT )
2069 	{ term_t arg = PL_new_term_ref();
2070 
2071 	  tex2pl_from_string(args[ga], arg);
2072 	  rc = PL_unify_term(tmp,		/* [text] */
2073 			     PL_FUNCTOR, FUNCTOR_dot2,
2074 			     PL_TERM,    arg,
2075 			     PL_ATOM,    ATOM_nil);
2076 	} else
2077 	{ rc = PL_unify_term(tmp,		/* [text] */
2078 			     PL_FUNCTOR, FUNCTOR_dot2,
2079 			     PL_CHARS,   args[ga],
2080 			     PL_ATOM,    ATOM_nil);
2081 	}
2082       }
2083     } else
2084     { if ( argspec[ga].flags & CA_TEXT )
2085       { term_t arg = PL_new_term_ref();
2086 
2087 	tex2pl_from_string(args[ga], arg);
2088 	rc = PL_unify_term(tmp,		/* {text} */
2089 			   PL_FUNCTOR, FUNCTOR_brace1,
2090 			   PL_TERM,   arg);
2091 
2092       } else
2093       { rc = PL_unify_term(tmp,		/* {text} */
2094 			   PL_FUNCTOR, FUNCTOR_brace1,
2095 			   PL_CHARS,   args[ga]);
2096       }
2097     }
2098 
2099     if ( !rc )
2100       return rc;
2101   }
2102 
2103   return PL_unify_nil(alist);
2104 }
2105 
2106 
2107 static void
popStack(PlContext ctx)2108 popStack(PlContext ctx)
2109 { if ( ctx->envnesting > 0 )
2110     ctx->list = ctx->stack[--ctx->envnesting];
2111   else
2112     error(ERR_ENV_UNDERFLOW, texfile(), texline());
2113 }
2114 
2115 
2116 static int
build_list(Token t,void * context)2117 build_list(Token t, void *context)
2118 { PlContext ctx = context;
2119 
2120   DEBUG(1, put_token(t, &ppctx));
2121 
2122   switch(t->type)
2123   { case TOK_EOF:
2124       return PL_unify_nil(ctx->list);
2125     case TOK_SPACE:
2126     case TOK_LINE:
2127       if ( !emit_space )
2128 	goto out;
2129   }
2130 
2131   if ( !emit_space )
2132   { if ( (ctx->prev_type0 == TOK_SPACE || ctx->prev_type0 == TOK_LINE) &&
2133 	 (ctx->prev_type1 != TOK_WORD || t->type != TOK_WORD) )
2134     { atom_t a = (ctx->prev_type0 == TOK_SPACE ? ATOM_space : ATOM_nl);
2135 
2136       if ( !PL_unify_list(ctx->list, ctx->head, ctx->list) ||
2137 	   !PL_unify_atom(ctx->head, a) )
2138 	return FALSE;
2139     }
2140   }
2141 
2142   switch(t->type)
2143   { case TOK_END_GROUP:
2144     case TOK_END_ENV:
2145       if ( !PL_unify_nil(ctx->list) )
2146 	return FALSE;
2147       popStack(ctx);
2148       goto out;
2149   }
2150 
2151   if ( !PL_unify_list(ctx->list, ctx->head, ctx->list) )
2152     return FALSE;
2153 
2154   switch(t->type)
2155   { case TOK_BEGIN_ENV:
2156     { Environment e   = t->value.env;
2157       atom_t modified = (e->flags & CMD_MODIFY ? ATOM_star : ATOM_minus);
2158       term_t clist    = PL_new_term_ref();
2159       term_t alist    = PL_new_term_ref();
2160       int rc;
2161 
2162       if ( e->environment->flags & CMD_MODIFY )
2163       { rc = PL_unify_term(ctx->head,
2164 			   PL_FUNCTOR, FUNCTOR_env4,
2165 			   PL_CHARS,   e->environment->name,
2166 			   PL_ATOM,	  modified,
2167 			   PL_TERM,	  alist,
2168 			   PL_TERM,	  clist);
2169       } else
2170       { rc = PL_unify_term(ctx->head,
2171 			   PL_FUNCTOR, FUNCTOR_env3,
2172 			   PL_CHARS,   e->environment->name,
2173 			   PL_TERM,	  alist,
2174 			   PL_TERM,	  clist);
2175       }
2176 
2177       if ( rc )
2178 	rc = build_arguments(alist,	/* environment arguments */
2179 			     e->environment->arg_count,
2180 			     e->environment->arguments,
2181 			     e->arguments);
2182 
2183       if ( !rc )
2184 	return FALSE;
2185 
2186       PL_reset_term_refs(alist);
2187 					/* contents of the environment */
2188       if ( ctx->envnesting >= MAXENVNESTING )
2189 	error(ERR_ENV_NESTING, texfile(), texline());
2190       ctx->stack[ctx->envnesting++] = ctx->list;
2191       ctx->list = clist;		/* no need to copy */
2192 
2193       break;
2194     }
2195     case TOK_CMD:
2196     { Command g       = t->value.cmd;
2197       term_t alist    = PL_new_term_ref();
2198       term_t modified = (g->flags & CMD_MODIFY ? ATOM_star : ATOM_minus);
2199       int rc;
2200 
2201       if ( g->command->flags & CMD_MODIFY )
2202       { rc = PL_unify_term(ctx->head,
2203 			   PL_FUNCTOR, FUNCTOR_cmd3,
2204 			   PL_CHARS,   g->command->name,
2205 			   PL_ATOM,	  modified,
2206 			   PL_TERM,	  alist);
2207       } else
2208       { if ( g->command->arg_count == 0 )
2209 	{ rc = PL_unify_term(ctx->head,
2210 			     PL_FUNCTOR, FUNCTOR_cmd1,
2211 			     PL_CHARS,   g->command->name);
2212 	  PL_reset_term_refs(alist);
2213 
2214 	  break;
2215 	} else
2216 	{ rc = PL_unify_term(ctx->head,
2217 			     PL_FUNCTOR, FUNCTOR_cmd2,
2218 			     PL_CHARS,   g->command->name,
2219 			     PL_TERM,    alist);
2220 	}
2221       }
2222 
2223       if ( rc )
2224 	rc = build_arguments(alist,
2225 			     g->command->arg_count,
2226 			     g->command->arguments,
2227 			     g->arguments);
2228 
2229       PL_reset_term_refs(alist);
2230       break;
2231     }
2232     case TOK_BEGIN_GROUP:
2233       if ( ctx->envnesting >= MAXENVNESTING )
2234 	error(ERR_ENV_NESTING, texfile(), texline());
2235       ctx->stack[ctx->envnesting++] = ctx->list;
2236       ctx->list = PL_copy_term_ref(ctx->head);
2237       break;
2238     case TOK_MATH:
2239       if ( !PL_unify_term(ctx->head,
2240 			  PL_FUNCTOR, FUNCTOR_math1,
2241 			  PL_STRING,  t->value.string) )
2242 	return FALSE;
2243       break;
2244     case TOK_MATH_ENV:
2245       if ( !PL_unify_term(ctx->head,
2246 			  PL_FUNCTOR, FUNCTOR_mathenv1,
2247 			  PL_STRING,  t->value.string) )
2248 	return FALSE;
2249       break;
2250     case TOK_VERB:
2251       if ( !PL_unify_term(ctx->head,
2252 			  PL_FUNCTOR, FUNCTOR_verb2,
2253 			  PL_CHARS,   t->context,
2254 			  PL_STRING,  t->value.string) )
2255 	return FALSE;
2256       break;
2257     case TOK_VERBATIM:
2258       if ( !PL_unify_term(ctx->head,
2259 			  PL_FUNCTOR, FUNCTOR_verbatim2,
2260 			  PL_CHARS,   t->context,
2261 			  PL_STRING,  t->value.string) )
2262 	return FALSE;
2263       break;
2264     case TOK_PAR:
2265       if ( !PL_unify_term(ctx->head,
2266 			  PL_FUNCTOR, FUNCTOR_cmd1,
2267 			  PL_ATOM,	ATOM_par) )
2268 	return FALSE;
2269       break;
2270     case TOK_WORD:
2271       if ( !PL_unify_atom_chars(ctx->head, t->value.string) )
2272 	return FALSE;
2273       break;
2274     case TOK_SPACE:
2275     case TOK_LINE:
2276       if ( !PL_unify_atom(ctx->head, ATOM_space) )
2277 	return FALSE;
2278       break;
2279   }
2280 
2281 out:
2282   ctx->prev_type1 = ctx->prev_type0;
2283   ctx->prev_type0 = t->type;
2284 
2285   return TRUE;
2286 }
2287 
2288 
2289 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2290 Calls tex:prolog_function(cmd([Star], [Args]))
2291 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2292 
2293 static int
cmd_prolog(Command g,Input fd,CallBack func,void * ctx)2294 cmd_prolog(Command g, Input fd, CallBack func, void *ctx)
2295 { fid_t  f        = PL_open_foreign_frame();
2296   term_t t0       = PL_new_term_ref();
2297   term_t alist    = PL_new_term_ref();
2298   term_t modified = (g->flags & CMD_MODIFY ? ATOM_star : ATOM_minus);
2299   predicate_t p   = PL_predicate("prolog_function", 1, "tex");
2300   int rc;
2301 
2302   if ( g->command->flags & CMD_MODIFY )
2303   { rc = PL_unify_term(t0,
2304 		       PL_FUNCTOR, FUNCTOR_cmd3,
2305 		       PL_CHARS,   g->command->name,
2306 		       PL_ATOM,    modified,
2307 		       PL_TERM,    alist);
2308   } else
2309   { if ( g->command->arg_count == 0 )
2310     { rc = PL_unify_term(t0,
2311 			 PL_FUNCTOR, FUNCTOR_cmd1,
2312 			 PL_CHARS,   g->command->name);
2313     } else
2314     { rc = PL_unify_term(t0,
2315 			 PL_FUNCTOR, FUNCTOR_cmd2,
2316 			 PL_CHARS,   g->command->name,
2317 			 PL_TERM,    alist);
2318     }
2319   }
2320 
2321   if ( rc )
2322     rc = build_arguments(alist,
2323 			 g->command->arg_count,
2324 			 g->command->arguments,
2325 			 g->arguments);
2326 
2327   if ( !rc )
2328     return FALSE;
2329 
2330   PL_call_predicate(NULL, TRUE, p, t0);
2331   PL_discard_foreign_frame(f);
2332 
2333   cmd_normal(g, fd, func, ctx);
2334 
2335   return TRUE;
2336 }
2337 
2338 
2339 foreign_t
pl_tex_emit_spaces(term_t old,term_t new)2340 pl_tex_emit_spaces(term_t old, term_t new)
2341 { if ( PL_unify_atom(old, emit_space ? ATOM_true : ATOM_false) )
2342   { atom_t a;
2343 
2344     if ( PL_get_atom(new, &a) )
2345     { if ( a == ATOM_true )
2346       { emit_space = 1;
2347 	return TRUE;
2348       } else
2349       { emit_space = 0;
2350 	return TRUE;
2351       }
2352     }
2353   }
2354 
2355   return FALSE;
2356 }
2357 
2358 
2359 foreign_t
pl_tex_tokens(term_t file,term_t tokens)2360 pl_tex_tokens(term_t file, term_t tokens)
2361 { char *fname;
2362 
2363   if ( PL_get_chars(file, &fname, CVT_ALL) )
2364   { pl_context ctx;
2365 
2366     ctx.list       = tokens;
2367     ctx.head       = PL_new_term_ref();
2368     ctx.envnesting = 0;
2369     ctx.prev_type0 = TOK_EOF;
2370     ctx.prev_type1 = TOK_EOF;
2371 
2372     parseTeXFile(fname, build_list, &ctx);
2373     PL_succeed;
2374   }
2375 
2376   PL_fail;
2377 }
2378 
2379 
2380 foreign_t
pl_tex_command_property(term_t name,term_t pre,term_t post)2381 pl_tex_command_property(term_t name, term_t pre, term_t post)
2382 { char *cname;
2383 
2384   if ( PL_get_atom_chars(name, &cname) )
2385   { CmdDescr cmd = lookupCommand(cname);
2386 
2387     if ( cmd &&
2388 	 PL_unify_integer(pre, cmd->pre_lines) &&
2389          PL_unify_integer(post, cmd->post_lines) )
2390       PL_succeed;
2391   }
2392 
2393   PL_fail;
2394 }
2395 
2396 
2397 foreign_t
pl_tex_debug(term_t old,term_t new)2398 pl_tex_debug(term_t old, term_t new)
2399 { if ( PL_unify_integer(old, debuglevel) &&
2400        PL_get_integer(new, &debuglevel) )
2401     PL_succeed;
2402 
2403   PL_fail;
2404 }
2405 
2406 
2407 foreign_t
pl_tex_tell(term_t file)2408 pl_tex_tell(term_t file)
2409 { char *name;
2410 
2411   if ( PL_get_chars(file, &name, CVT_ALL) )
2412   { FILE *fd = (streq(name, "-") ? stdout : fopen(name, "w"));
2413 
2414     if ( fd )
2415     { ppctx.envnesting   = 0;		/* separate predicate? */
2416       ppctx.last_type    = TOK_EOF;
2417       ppctx.line_pos     = 0;
2418       ppctx.newlines     = 0;
2419       ppctx.spaces       = 0;
2420       ppctx.verbatim     = FALSE;
2421       ppctx.left_margin  = 0;
2422       ppctx.right_margin = 72;
2423       ppctx.fd	         = fd;
2424 
2425       PL_succeed;
2426     }
2427   }
2428 
2429   PL_fail;
2430 }
2431 
2432 
2433 foreign_t
pl_tex_told()2434 pl_tex_told()
2435 { fflush(ppctx.fd);
2436   if ( ppctx.fd != stdout )
2437     fclose(ppctx.fd);
2438   ppctx.fd = stdout;
2439 
2440   PL_succeed;
2441 }
2442 
2443 
2444 foreign_t
pl_tex_read_commands(term_t file)2445 pl_tex_read_commands(term_t file)
2446 { char *name;
2447 
2448   if ( PL_get_chars(file, &name, CVT_ALL) &&
2449        parseCmdSpecs(name) )
2450     PL_succeed;
2451 
2452   PL_fail;
2453 }
2454 
2455 
2456 foreign_t
pl_tex_declare(term_t spec)2457 pl_tex_declare(term_t spec)
2458 { char *s;
2459 
2460   if ( PL_get_chars(spec, &s, CVT_ALL) &&
2461        parseCommandSpec("tex_declare/1", 0, s) )
2462     PL_succeed;
2463 
2464   PL_fail;
2465 }
2466 
2467 
2468 foreign_t
pl_tex_environment_function(term_t env,term_t func)2469 pl_tex_environment_function(term_t env, term_t func)
2470 { char *s;
2471   EnvDescr e;
2472 
2473   if ( PL_get_atom_chars(env, &s) &&
2474        (e = lookupEnvironment(s)) &&
2475        e->fname )
2476     return PL_unify_atom_chars(func, e->fname);
2477 
2478   PL_fail;
2479 }
2480 
2481 
2482 foreign_t
pl_tex_command_function(term_t cmd,term_t func)2483 pl_tex_command_function(term_t cmd, term_t func)
2484 { char *s;
2485   CmdDescr c;
2486 
2487   if ( PL_get_atom_chars(cmd, &s) &&
2488        (c = lookupCommand(s)) &&
2489        c->fname )
2490     return PL_unify_atom_chars(func, c->fname);
2491 
2492   PL_fail;
2493 }
2494 
2495 
2496 		 /*******************************
2497 		 *          HTML OUTPUT		*
2498 		 *******************************/
2499 
2500 static void
output_n(PPContext pp,const char * s,int l)2501 output_n(PPContext pp, const char *s, int l)
2502 { if ( l > 0 )
2503   { char buf[l+1];
2504 
2505     memcpy(buf, s, l);
2506     buf[l] = EOS;
2507     output(pp, "%s", buf);
2508   }
2509 }
2510 
2511 
2512 static void
output_html(PPContext pp,const char * s)2513 output_html(PPContext pp, const char *s)
2514 { int c;
2515   const char *from = s;
2516 
2517   for(; (c=*s); s++)
2518   { switch(c)
2519     { case '<':
2520 	output_n(pp, from, s-from);
2521         from = s+1;
2522         output(pp, "%s", "&lt;");
2523 	break;
2524       case '>':
2525 	output_n(pp, from, s-from);
2526         from = s+1;
2527         output(pp, "%s", "&gt;");
2528 	break;
2529       case '&':
2530 	output_n(pp, from, s-from);
2531         from = s+1;
2532         output(pp, "%s", "&amp;");
2533 	break;
2534     }
2535   }
2536 
2537   output_n(pp, from, s-from);
2538 }
2539 
2540 
2541 static void
put_html_token(Token t,void * ctx)2542 put_html_token(Token t, void *ctx)
2543 { PPContext pp = ctx;
2544 
2545   switch(t->type)
2546   { case TOK_CMD:
2547     { outputBlank(pp);
2548       while(pp->newlines < t->prelines)
2549 	output(pp, "\n");
2550       output(pp, "%s", t->value.string);
2551       while(pp->newlines < t->postlines)
2552 	output(pp, "\n");
2553 
2554       break;
2555     }
2556     case TOK_VERBATIM:
2557     { pp->verbatim = VERB_VERBATIM;
2558       output(pp, "%s", t->value.string);
2559       pp->verbatim = VERB_NORMAL;
2560       break;
2561     }
2562     case TOK_PRE:
2563     { pp->verbatim = VERB_PRE;
2564       output(pp, "%s", t->value.string);
2565       pp->verbatim = VERB_NORMAL;
2566       break;
2567     }
2568     case TOK_VERB:
2569     { outputBlank(pp);
2570       pp->verbatim = VERB_VERBATIM;
2571       output(pp, "%s", t->value.string);
2572       pp->verbatim = VERB_NORMAL;
2573 
2574       break;
2575     }
2576     case TOK_SPACE:
2577       break;
2578     case TOK_LINE:
2579       break;
2580     case TOK_NOSPACEWORD:
2581       outputBlank(pp);
2582       output_html(pp, t->value.string);
2583       break;
2584     case TOK_WORD:
2585     { int pendingblank;
2586 
2587       if ( pp->last_type == TOK_LINE )
2588 	pp->last_type = TOK_SPACE;
2589       pendingblank = (pp->last_type == TOK_SPACE);
2590       outputBlank(pp);			/* as space! */
2591       if ( pendingblank &&		/* no blanks in input: concatenate! */
2592 	   (int)strlen(t->value.string) + pp->line_pos > pp->right_margin )
2593 	nl(pp);
2594       output_html(pp, t->value.string);
2595       break;
2596     }
2597     case TOK_EOF:
2598       output(pp, "\n");
2599       break;
2600     default:
2601       assert(0);
2602   }
2603 
2604   pp->last_type = t->type;
2605 }
2606 
2607 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2608 Control output of HTML data format:
2609 
2610   html(Text, [Pre, Post])	Output a command
2611   verbatim(Text)		Output verbatim text
2612   verb(Text)			Output short text
2613   Atom				Output plain text
2614 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2615 
2616 static foreign_t
pl_put_html_token(term_t term)2617 pl_put_html_token(term_t term)
2618 { token t;
2619   char *s;
2620   atom_t atom;
2621   static int last_is_word = FALSE;
2622 
2623   t.type = -1;
2624 
2625   if ( PL_is_functor(term, FUNCTOR_html3) )
2626   { term_t a = PL_new_term_ref();
2627 
2628     if ( PL_get_arg(1, term, a) &&
2629 	 PL_get_chars(a, &s, CVT_ATOMIC) &&
2630 	 PL_get_arg(2, term, a) &&
2631 	 PL_get_integer(a, &t.prelines) &&
2632 	 PL_get_arg(3, term, a) &&
2633 	 PL_get_integer(a, &t.postlines) )
2634     { t.type = TOK_CMD;
2635       t.value.string = s;
2636     }
2637   } else if ( PL_is_functor(term, FUNCTOR_html1) )
2638   { term_t a = PL_new_term_ref();
2639 
2640     if ( PL_get_arg(1, term, a) &&
2641 	 PL_get_chars(a, &s, CVT_ATOMIC) )
2642     { t.type = TOK_CMD;
2643       t.value.string = s;
2644       t.prelines = t.postlines = 0;
2645     }
2646   } else if ( PL_is_functor(term, FUNCTOR_verbatim1) )
2647   { term_t a = PL_new_term_ref();
2648 
2649     if ( PL_get_arg(1, term, a) &&
2650 	 PL_get_chars(a, &s, CVT_ATOMIC) )
2651     { t.type = TOK_VERBATIM;
2652       t.value.string = s;
2653     }
2654   } else if ( PL_is_functor(term, FUNCTOR_verb1) )
2655   { term_t a = PL_new_term_ref();
2656 
2657     if ( PL_get_arg(1, term, a) &&
2658 	 PL_get_chars(a, &s, CVT_ATOMIC) )
2659     { t.type = TOK_VERB;
2660       t.value.string = s;
2661     }
2662   } else if ( PL_is_functor(term, FUNCTOR_pre1) )
2663   { term_t a = PL_new_term_ref();
2664 
2665     if ( PL_get_arg(1, term, a) &&
2666 	 PL_get_chars(a, &s, CVT_ATOMIC) )
2667     { t.type = TOK_PRE;
2668       t.value.string = s;
2669     }
2670   } else if ( PL_is_functor(term, FUNCTOR_nospace1) )
2671   { term_t a = PL_new_term_ref();
2672 
2673     if ( PL_get_arg(1, term, a) &&
2674 	 PL_get_chars(a, &s, CVT_ATOMIC) )
2675     { t.type = TOK_NOSPACEWORD;
2676       t.value.string = s;
2677     }
2678   } else if ( PL_get_atom(term, &atom) )
2679   { if ( atom == ATOM_space )
2680     { t.type = TOK_SPACE;
2681       t.value.string = " ";
2682     } else if ( atom == ATOM_nl )
2683     { t.type = TOK_LINE;
2684       t.value.string = "\n";
2685     } else
2686     { if ( last_is_word )		/* regenerate the space tokens */
2687       { t.type = TOK_SPACE;
2688 	t.value.string = " ";
2689 
2690 	put_html_token(&t, &ppctx);
2691       } else
2692 	last_is_word = TRUE;
2693 
2694       t.type = TOK_WORD;
2695       t.value.string = (char *)PL_atom_chars(atom);
2696     }
2697   } else if ( PL_get_chars(term, &s, CVT_ALL) )
2698   { if ( last_is_word )		/* regenerate the space tokens */
2699     { t.type = TOK_SPACE;
2700       t.value.string = " ";
2701 
2702       put_html_token(&t, &ppctx);
2703     } else
2704       last_is_word = TRUE;
2705 
2706     t.type = TOK_WORD;
2707     t.value.string = s;
2708   }
2709 
2710   if ( t.type != TOK_WORD )
2711     last_is_word = FALSE;
2712 
2713   if ( t.type >= 0 )
2714   { put_html_token(&t, &ppctx);
2715     PL_succeed;
2716   }
2717 
2718   return PL_warning("put_html_token/1: instantiation error");
2719 }
2720 
2721 		 /*******************************
2722 		 *	      ERRORS		*
2723 		 *******************************/
2724 
2725 static void
error(int eno,const char * file,int line)2726 error(int eno, const char *file, int line)
2727 { fprintf(stderr,
2728 	  "[TeX tokeniser: %s:%d: %s]\n",
2729 	  file, line, tex_error_strings[eno]);
2730   switch(eno)
2731   { case ERR_RUNAWAY_ARGUMENT:
2732     case ERR_RUNAWAY_MATH:
2733     { char argstart[50];
2734       strncpy(argstart, texarg, 50);
2735       argstart[49] = EOS;
2736       fprintf(stderr, "Start: \"%s\"\n", argstart);
2737       break;
2738     }
2739   }
2740 
2741   exit(1);
2742 }
2743 
2744 static void
warn(int eno,const char * file,int line)2745 warn(int eno, const char *file, int line)
2746 { fprintf(stderr,
2747 	  "WARNING: %s:%d: %s\n",
2748 	  file, line, tex_error_strings[eno]);
2749 }
2750 
2751 
2752 extern void install_ps(void);
2753 
2754 install_t
install()2755 install()
2756 { initPrologConstants();
2757 
2758   PL_register_foreign("tex_tokens",           2, pl_tex_tokens,           0);
2759   PL_register_foreign("tex_command_property", 3, pl_tex_command_property, 0);
2760   PL_register_foreign("put_tex_token",        1, pl_put_tex_token,        0);
2761   PL_register_foreign("put_html_token",       1, pl_put_html_token,       0);
2762   PL_register_foreign("tex_debug",            2, pl_tex_debug,            0);
2763   PL_register_foreign("tex_tell",             1, pl_tex_tell,		  0);
2764   PL_register_foreign("tex_told",             0, pl_tex_told,             0);
2765   PL_register_foreign("tex_read_commands",    1, pl_tex_read_commands,	  0);
2766   PL_register_foreign("tex_declare",	      1, pl_tex_declare,	  0);
2767   PL_register_foreign("tex_environment_function",
2768 					      2, pl_tex_environment_function,
2769 									  0);
2770   PL_register_foreign("tex_command_function", 2, pl_tex_command_function, 0);
2771   PL_register_foreign("tex_atom_to_tokens",   2, pl_tex_atom_to_tokens,   0);
2772   PL_register_foreign("tex_emit_spaces",      2, pl_tex_emit_spaces,      0);
2773 
2774   ppctx.envnesting   = 0;		/* separate predicate? */
2775   ppctx.last_type    = TOK_EOF;
2776   ppctx.line_pos     = 0;
2777   ppctx.newlines     = 0;
2778   ppctx.spaces       = 0;
2779   ppctx.verbatim     = FALSE;
2780   ppctx.left_margin  = 0;
2781   ppctx.right_margin = 72;
2782   ppctx.fd	     = stdout;
2783 
2784   install_ps();
2785 }
2786 
2787 #endif /*__SWI_PROLOG__*/
2788