1 /* Part of SWI-Prolog
2
3 Author: Jan Wielemaker
4 E-mail: J.Wielemaker@vu.nl
5 WWW: http://www.swi-prolog.org
6 Copyright (c) 1997-2020, University of Amsterdam
7 CWI, Amsterdam
8 All rights reserved.
9
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions
12 are met:
13
14 1. Redistributions of source code must retain the above copyright
15 notice, this list of conditions and the following disclaimer.
16
17 2. Redistributions in binary form must reproduce the above copyright
18 notice, this list of conditions and the following disclaimer in
19 the documentation and/or other materials provided with the
20 distribution.
21
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
30 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
32 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 POSSIBILITY OF SUCH DAMAGE.
34 */
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <stdarg.h>
39 #include <assert.h>
40 #include <errno.h>
41 #include <signal.h>
42 #ifdef HAVE_MALLOC_H
43 #include <malloc.h>
44 #endif
45 #include <string.h>
46
47 static int debuglevel = 0;
48 static int emit_space = 0;
49
50 #define DEBUG(n, g) if ( n <= debuglevel ) { g; }
51
52 #define MAXFUNC 100 /* max bounded function name length */
53 #define MAXCMD 256 /* max length of a \foobar */
54 #define MAXARG 4096 /* max {..} argument length */
55 #define MAXWORD 1024 /* max length of a word (no spaces) */
56 #define MAXVERB 1024 /* max length of \verb|string| */
57 #define MAXMATH 4096 /* max length of $...$ */
58 #define MAXVERBATIM 10240 /* max size of verbatim */
59 #define MAXOUTPUT 11000 /* output() max */
60 #define MAXCMDARGS 32
61 #define MAXENVNESTING 256 /* max depth of environment */
62
63 static void error(int eno, const char *file, int line); /* handle errors */
64 static void warn(int eno, const char *file, int line); /* handle errors */
65
66 #define ERR_UNEXPECTED_EOF 1 /* unexpected end-of-file */
67 #define ERR_RUNAWAY_ARGUMENT 2 /* runaway argument */
68 #define ERR_VERBATIM_TOO_LONG 3 /* verbatim env too large */
69 #define ERR_NOCMD_SPECS 4 /* cannot find command-specs */
70 #define ERR_BAD_COMMAND_SPEC 5 /* syntax error in command specs */
71 #define ERR_ENV_NESTING 6 /* environment-stack overflow */
72 #define ERR_UNDEF_FUNCTION 7 /* undefined function */
73 #define ERR_BAD_ARG_SPEC 8
74 #define ERR_BAD_ENV_SPEC 9
75 #define ERR_RUNAWAY_MATH 10 /* notclosed math env */
76 #define ERR_BAD_MATH_ENV_CLOSURE 11 /* $$ closed by $ */
77 #define ERR_ENV_UNDERFLOW 12 /* environment stack-underflow */
78 #define ERR_CMD_TOO_LONG 13
79 #define ERR_BAD_DIM 14
80
81 const char *tex_error_strings[] =
82 { "No error",
83 "Unexpected end of file",
84 "Runaway argument",
85 "Verbatim environment too long",
86 "Cannot find command specification file",
87 "Syntax error",
88 "Environment-stack overflow",
89 "Undefined function",
90 "Syntax error in argument definition",
91 "Syntax error in environment definition",
92 "$: Runaway argument",
93 "$$ closed by single $",
94 "Too many '}' or \\end{}",
95 "Command name too long",
96 "Bad dimension",
97 NULL /* allow for easy enumeration */
98 };
99
100 #ifndef FALSE
101 #define FALSE 0
102 #define TRUE 1
103 #endif
104
105 #define EOS '\0' /* end-of-string */
106 #define streq(s1, s2) (strcmp((s1), (s2)) == 0)
107
108 /*******************************
109 * COMMANDS *
110 *******************************/
111
112 #define F_NOSKIPBLANK 0x01 /* Argument handling */
113
114 #define CA_OPTIONAL 0x01 /* [optional arg] */
115 #define CA_TEXT 0x02 /* Argument contains output text */
116 #define CA_DIM 0x04 /* Argument is a dimension */
117
118 #define CMD_MODIFY 0x01 /* command allows for modifier */
119
120 #define PRE_COMMENT -1 /* put %\n before command */
121
122 typedef struct _command *Command;
123 typedef struct _environment *Environment;
124 typedef struct _token *Token;
125 typedef struct _input *Input;
126 typedef struct _output *Output;
127
128 typedef int (*CallBack)(Token token, void *context);
129 typedef void (*CmdFunc)(Command cmd, Input fd, CallBack func, void *ctx);
130 typedef void (*EnvFunc)(Environment cmd, Input fd, CallBack func, void *ctx);
131 typedef void *AnyFunc;
132 static AnyFunc lookupFunction(const char *name);
133
134 typedef struct
135 { int flags; /* command arguments flags */
136 } cmd_arg, *CmdArg;
137
138 typedef struct _cmd_descr
139 { const char*name; /* name of the command */
140 int flags; /* command-flags */
141 CmdArg arguments; /* argument specifiers */
142 char arg_count; /* # arguments */
143 char pre_lines; /* # newlines needed before */
144 char post_lines; /* # newlines needed after */
145 CmdFunc function; /* associated function */
146 const char*fname; /* function-name */
147 struct _cmd_descr *next; /* next in hash-table */
148 } cmd_descr, *CmdDescr;
149
150 typedef struct _command
151 { CmdDescr command; /* the commands */
152 int flags; /* general flags */
153 char ** arguments; /* the actual arguments */
154 } command;
155
156 typedef struct _env_descr
157 { const char *name; /* name of the environment */
158 int flags; /* environment flags */
159 CmdArg arguments; /* argument-list */
160 char arg_count; /* # arguments */
161 EnvFunc function; /* associated function */
162 const char *fname; /* function-name */
163 struct _env_descr *next;
164 } env_descr, *EnvDescr;
165
166 typedef struct _environment
167 { EnvDescr environment;
168 int flags;
169 char ** arguments;
170 } environment;
171
172 #define INPUT_FILE 0 /* reading from a file */
173 #define INPUT_STRING 1 /* reading from a string */
174
175 typedef struct _input
176 { int type;
177 int lineno;
178 const char * name; /* name (for feedback) */
179 Input parent; /* Parent input */
180 union
181 { FILE *fd;
182 const char *string;
183 } stream;
184 } input;
185
186 static Input curin; /* current input (file) */
187
188 static int cmd_prolog(Command g, Input fd, CallBack func, void *ctx);
189
190 /*******************************
191 * TOKENS *
192 *******************************/
193
194
195 #define TOK_CMD 0 /* \cmd */
196 #define TOK_BEGIN_GROUP 1 /* { */
197 #define TOK_END_GROUP 2 /* } */
198 #define TOK_BEGIN_ENV 3 /* \begin{cmd} */
199 #define TOK_END_ENV 4 /* \end{cmd} */
200 #define TOK_VERB 5 /* \verb|foo| */
201 #define TOK_VERBATIM 6 /* verbatim environment */
202 #define TOK_PRE 7 /* pre environment */
203 #define TOK_MATH 8 /* $...$ */
204 #define TOK_MATH_ENV 9 /* $$...$$ */
205 #define TOK_PAR 10 /* implicit paragraph (blank line) */
206 #define TOK_WORD 11 /* general word */
207 #define TOK_NOSPACEWORD 12 /* word without reintroducing spaces */
208 #define TOK_SPACE 13 /* blank space */
209 #define TOK_LINE 14 /* single line */
210 #define TOK_EOF 15 /* end-of-file */
211
212 typedef struct _token
213 { int type; /* type identifier */
214 int prelines; /* HTML stuff */
215 int postlines; /* HTML stuff */
216 char *context; /* additional context info */
217 union
218 { char *string; /* related text */
219 Command cmd; /* related TeX command */
220 Environment env; /* related TeX environment */
221 } value;
222 } token;
223
224 static const char *texarg; /* argument for runaway message */
225
226 /*******************************
227 * CHARACTER CLASSIFICATION *
228 *******************************/
229
230 #define EF 0 /* end-of-file */
231 #define SP 1 /* space */
232 #define SC 2 /* start line comment (%) */
233 #define BG 3 /* begin group ({) */
234 #define EG 4 /* end group (}) */
235 #define MM 5 /* math-mode delimiter ($) */
236 #define TD 6 /* Table delimiter (&) */
237 #define NB 7 /* Non-breaking space (~) */
238 #define CM 8 /* command (\) */
239
240 #define PU 9 /* punctuation */
241 #define DI 10 /* digit */
242
243 #define BQ 11 /* back quote (`) */
244 #define SQ 12 /* single quote (') */
245
246 #define LC 13 /* lower-case letter */
247 #define UC 14 /* uppercase letter */
248
249 #define CharType(c) (char_type[(c)+1])
250
251 #define isspace(c) (CharType(c) == SP)
252 #define isdigit(c) (CharType(c) == DI)
253 #define isalnum(c) (CharType(c) >= LC)
254 #define wbreak(c) (CharType(c) <= CM) /* breaks a word */
255 #define isbegingroup(c) (CharType(c) == BG)
256 #define iscommand(c) (CharType(c) == CM)
257
258 static char char_type[] = {
259 /* EOF */
260 EF,
261 /* ^@ ^A ^B ^C ^D ^E ^F ^G ^H ^I ^J ^K ^L ^M ^N ^O 0-15 */
262 EF, SP, SP, SP, SP, SP, SP, SP, SP, SP, SP, SP, SP, SP, SP, SP,
263 /* ^P ^Q ^R ^S ^T ^U ^V ^W ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ 16-31 */
264 SP, SP, SP, SP, SP, SP, SP, SP, SP, SP, SP, SP, SP, SP, SP, SP,
265 /* sp ! " # $ % & ' ( ) * + , - . / 32-47 */
266 SP, PU, PU, PU, MM, SC, TD, SQ, PU, PU, PU, PU, PU, PU, PU, PU,
267 /* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? 48-63 */
268 DI, DI, DI, DI, DI, DI, DI, DI, DI, DI, PU, PU, PU, PU, PU, PU,
269 /* @ A B C D E F G H I J K L M N O 64-79 */
270 PU, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC,
271 /* P Q R S T U V W X Y Z [ \ ] ^ _ 80-95 */
272 UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, UC, PU, CM, PU, PU, UC,
273 /* ` a b c d e f g h i j k l m n o 96-111 */
274 BQ, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC,
275 /* p q r s t u v w x y z { | } ~ ^? 112-127 */
276 LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, BG, PU, EG, NB, SP,
277 /* 128-255 */
278 LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC,
279 LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC,
280 LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC,
281 LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC,
282 LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC,
283 LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC,
284 LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC,
285 LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC, LC
286 };
287
288
289 /*******************************
290 * INPUT *
291 *******************************/
292
293 static Input
openInputFile(const char * name)294 openInputFile(const char *name)
295 { FILE *fd;
296
297 if ( (fd = fopen(name, "r")) )
298 { Input i = malloc(sizeof(input));
299
300 if ( i )
301 { i->type = INPUT_FILE;
302 i->stream.fd = fd;
303 i->lineno = 1;
304
305 i->name = name;
306 i->parent = curin;
307 curin = i;
308
309 return i;
310 }
311 }
312
313 return NULL;
314 }
315
316
317 static Input
openInputString(const char * str)318 openInputString(const char *str)
319 { Input i = malloc(sizeof(input));
320
321 if ( i )
322 { i->type = INPUT_STRING;
323 i->stream.string = str;
324 i->lineno = 1;
325
326 i->name = str;
327 i->parent = curin;
328 curin = i;
329
330 return i;
331 }
332
333 return NULL;
334 }
335
336
337 static void
closeInput(Input i)338 closeInput(Input i)
339 { if ( i->type == INPUT_FILE )
340 fclose(i->stream.fd);
341
342 curin = i->parent;
343
344 free(i);
345 }
346
347
348 static const char *
texfile()349 texfile()
350 { if ( curin )
351 { Input i = curin;
352
353 while(i && i->type == INPUT_STRING)
354 i = i->parent;
355
356 if ( i && i->type == INPUT_FILE )
357 return i->name;
358
359 return curin->name;
360 }
361
362 return "no input";
363 }
364
365
366 static int
texline()367 texline()
368 { if ( curin )
369 { Input i = curin;
370 int offset = 0;
371
372 while(i && i->type == INPUT_STRING)
373 { offset += i->lineno - 1;
374 i = i->parent;
375 }
376
377 if ( i && i->type == INPUT_FILE )
378 return i->lineno + offset;
379
380 return curin->lineno;
381 }
382
383 return -1;
384 }
385
386
387 static int
mygetc(Input fd)388 mygetc(Input fd)
389 { int c;
390
391 switch(fd->type)
392 { case INPUT_FILE:
393 c = getc(fd->stream.fd);
394 break;
395 case INPUT_STRING:
396 default:
397 c = *fd->stream.string++;
398 if ( c == '\0' )
399 c = EOF;
400 break;
401 }
402
403 if ( c == '\n' )
404 fd->lineno++;
405
406 return c;
407 }
408
409 static void
myungetc(int c,Input fd)410 myungetc(int c, Input fd)
411 { switch(fd->type)
412 { case INPUT_FILE:
413 ungetc(c, fd->stream.fd);
414 break;
415 case INPUT_STRING:
416 default:
417 fd->stream.string--;
418 break;
419 }
420
421 if ( c == '\n' )
422 fd->lineno--;
423 }
424
425
426 static char *
myfgets(char * buf,int size,Input fd)427 myfgets(char *buf, int size, Input fd)
428 { char *s = buf;
429
430 for(;;)
431 { int c = mygetc(fd);
432
433 if ( c == EOF )
434 { if ( s == buf )
435 return NULL;
436
437 *s = EOS;
438 return buf;
439 } else if ( c == '\n' )
440 { *s++ = c;
441 *s = EOS;
442 return buf;
443 }
444
445 *s++ = c;
446 if ( s >= &buf[size-1] )
447 { *s = EOS;
448 return buf;
449 }
450 }
451 }
452
453
454 #undef getc
455 #undef ungetc
456 #undef fgets
457 #define getc(fd) mygetc(fd)
458 #define ungetc(c, fd) myungetc(c, fd)
459 #define fgets(buf, size, fd) myfgets(buf, size, fd)
460
461 /*******************************
462 * STRINGS *
463 *******************************/
464
465
466 int
stringHashValue(const char * t,int buckets)467 stringHashValue(const char *t, int buckets)
468 { unsigned int value = 0;
469 unsigned int shift = 5;
470
471 while(*t)
472 { unsigned int c = *t++;
473
474 c -= 'a';
475 value ^= c << (shift & 0xf);
476 shift ^= c;
477 }
478
479 return (value ^ (value >> 16)) % buckets;
480 }
481
482
483 const char *
save_string(const char * s)484 save_string(const char *s)
485 { char *cp = malloc(strlen(s)+1);
486
487 strcpy(cp, s);
488
489 return (const char *)cp;
490 }
491
492
493 /*******************************
494 * COMMAND REGISTRY *
495 *******************************/
496
497 #define CMD_HASH_SIZE 256
498
499 static CmdDescr cmd_table[CMD_HASH_SIZE];
500
501 static CmdDescr
lookupCommand(const char * name)502 lookupCommand(const char *name)
503 { int v = stringHashValue(name, CMD_HASH_SIZE);
504 CmdDescr c;
505
506 for(c = cmd_table[v]; c; c = c->next)
507 { if ( streq(c->name, name) )
508 return c;
509 }
510
511 if ( isspace(*name) && name[1] == EOS )
512 return lookupCommand(" ");
513
514 return NULL;
515 }
516
517
518 static CmdDescr
newCommand(const char * name)519 newCommand(const char *name)
520 { int v = stringHashValue(name, CMD_HASH_SIZE);
521 CmdDescr c;
522
523 for(c = cmd_table[v]; c; c = c->next)
524 { if ( streq(c->name, name) )
525 { c->flags = 0;
526 c->arg_count = 0;
527 if ( c->arguments )
528 { free(c->arguments);
529 c->arguments = NULL;
530 }
531 return c;
532 }
533 }
534
535 c = malloc(sizeof(*c));
536 c->name = save_string(name);
537 c->flags = 0;
538 c->arg_count = 0;
539 c->arguments = NULL;
540 c->pre_lines = 0;
541 c->post_lines = 0;
542 c->function = NULL;
543 c->fname = NULL;
544 c->next = cmd_table[v];
545 cmd_table[v] = c;
546
547 return c;
548 }
549
550 #define ENV_HASH_SIZE 256
551
552 static EnvDescr env_table[ENV_HASH_SIZE];
553
554 static EnvDescr
lookupEnvironment(const char * name)555 lookupEnvironment(const char *name)
556 { int v = stringHashValue(name, ENV_HASH_SIZE);
557 EnvDescr e;
558
559 for(e = env_table[v]; e; e = e->next)
560 { if ( streq(e->name, name) )
561 return e;
562 }
563
564 return NULL;
565 }
566
567
568 static EnvDescr
newEnvironment(const char * name)569 newEnvironment(const char *name)
570 { int v = stringHashValue(name, ENV_HASH_SIZE);
571 EnvDescr e;
572
573 for(e = env_table[v]; e; e = e->next)
574 { if ( streq(e->name, name) )
575 { e->flags = 0;
576 e->arg_count = 0;
577 if ( e->arguments )
578 { free(e->arguments);
579 e->arguments = NULL;
580 }
581 return e;
582 }
583 }
584
585 e = malloc(sizeof(*e));
586 e->name = save_string(name);
587 e->flags = 0;
588 e->arg_count = 0;
589 e->arguments = NULL;
590 e->function = NULL;
591 e->fname = NULL;
592 e->next = env_table[v];
593 env_table[v] = e;
594
595 return e;
596 }
597
598 #define skipBanks(s) while(isspace(*s)) s++
599
600 static int
parseArgSpec(const char * fname,int lineno,char ** line,CmdArg args)601 parseArgSpec(const char *fname, int lineno, char **line, CmdArg args)
602 { int nargs = 0;
603 char *s = *line;
604
605 skipBanks(s);
606
607 for(;;s++)
608 { switch(*s)
609 { case '[':
610 args[nargs].flags = 0;
611 if ( s[2] != ']' )
612 { warn(ERR_BAD_ARG_SPEC, fname, lineno);
613 return -1;
614 }
615 args[nargs].flags |= CA_OPTIONAL;
616 goto arg_cont;
617 case '{':
618 args[nargs].flags = 0;
619 if ( s[2] != '}' )
620 { warn(ERR_BAD_ARG_SPEC, fname, lineno);
621 return -1;
622 }
623 arg_cont:
624 switch(s[1])
625 { case '+':
626 args[nargs].flags |= CA_TEXT;
627 break;
628 case 'd':
629 args[nargs].flags |= CA_DIM;
630 break;
631 case '-':
632 break;
633 default:
634 warn(ERR_BAD_ARG_SPEC, fname, lineno);
635 return -1;
636 }
637 nargs++;
638 s += 2;
639 break;
640 default:
641 skipBanks(s);
642 *line = s;
643 return nargs;
644 }
645 }
646 }
647
648
649 static AnyFunc
parseFuncSpec(char ** line,const char ** fname)650 parseFuncSpec(char **line, const char **fname)
651 { char *s = *line;
652 char b[MAXFUNC];
653 char *q = b;
654
655 skipBanks(s);
656 if ( *s != '=' )
657 return NULL;
658 s++;
659 while(isalnum(*s))
660 *q++ = *s++;
661 *q = EOS;
662 skipBanks(s);
663
664 *line = s;
665 *fname = (const char *)save_string(b);
666
667 return lookupFunction(b);
668 }
669
670
671 static int
parseEnvSpec(const char * fname,int line,char * s)672 parseEnvSpec(const char *fname, int line, char *s)
673 { char *f = ++s;
674 char tmp;
675 EnvDescr e;
676 cmd_arg args[MAXCMDARGS]; /* argument-list */
677
678 while(isalnum(*s))
679 s++;
680 tmp = *s;
681 *s = EOS;
682 e = newEnvironment(f);
683 *s = tmp;
684
685 if ( *s == '*' ) /* \begin{figure*} */
686 { e->flags |= CMD_MODIFY;
687 s++;
688 }
689
690 skipBanks(s);
691 if ( *s != '}' ) /* check for } */
692 { warn(ERR_BAD_ENV_SPEC, fname, line);
693 return FALSE;
694 } else
695 s++;
696 /* parse arguments */
697 e->arg_count = parseArgSpec(fname, line, &s, args);
698 if ( e->arg_count < 0 )
699 return FALSE;
700 e->arguments = malloc(e->arg_count*sizeof(cmd_arg));
701 memcpy(e->arguments, args, e->arg_count*sizeof(cmd_arg));
702
703 if ( *s == '=' ) /* =function */
704 { e->function = parseFuncSpec(&s, &e->fname);
705 skipBanks(s);
706 }
707
708 if ( *s != EOS && *s != '%' )
709 { warn(ERR_BAD_COMMAND_SPEC, fname, line);
710 return FALSE;
711 }
712
713 return TRUE;
714 }
715
716
717 static int
parseCommandSpec(const char * fname,int lineno,char * line)718 parseCommandSpec(const char *fname, int lineno, char *line)
719 { char *s = line;
720
721 while(isspace(*s))
722 s++;
723
724 if ( *s == '%' || *s == EOS )
725 return TRUE; /* comment-line */
726
727 if ( *s == '{' ) /* environment */
728 { return parseEnvSpec(fname, lineno, s);
729 } else if ( *s == '\\' ) /* normal command */
730 { char *f = ++s;
731 char tmp;
732 cmd_arg args[MAXCMDARGS]; /* argument-list */
733 CmdDescr c;
734
735 if ( isalnum(*s) )
736 { while(isalnum(*s))
737 { s++;
738 }
739 } else
740 s++;
741 tmp = *s;
742 *s = EOS;
743 c = newCommand(f);
744 *s = tmp;
745
746 skipBanks(s); /* spaces after command */
747 if ( *s == '*' ) /* modified */
748 { c->flags |= CMD_MODIFY;
749 s++;
750 }
751
752 c->arg_count = parseArgSpec(fname, lineno, &s, args);
753 if ( c->arg_count < 0 )
754 return FALSE;
755 c->arguments = malloc(c->arg_count*sizeof(cmd_arg));
756 memcpy(c->arguments, args, c->arg_count*sizeof(cmd_arg));
757
758 if ( *s == '=' ) /* associate function */
759 { c->function = parseFuncSpec(&s, &c->fname);
760 }
761
762 if ( isdigit(*s) ) /* pre-lines */
763 { c->pre_lines = *s - '0';
764 s++;
765 skipBanks(s);
766 } else if ( *s == '%' )
767 { c->pre_lines = PRE_COMMENT; /* %\n */
768 s++;
769 skipBanks(s);
770 }
771
772 if ( isdigit(*s) ) /* post-lines */
773 { c->post_lines = *s - '0';
774 s++;
775 skipBanks(s);
776 }
777
778 if ( *s != EOS && *s != '%' )
779 { warn(ERR_BAD_COMMAND_SPEC, fname, lineno);
780 return FALSE;
781 }
782
783 return TRUE;
784 }
785
786 warn(ERR_BAD_COMMAND_SPEC, fname, lineno);
787 return FALSE;
788 }
789
790
791 static int
parseCmdSpecs(const char * fname)792 parseCmdSpecs(const char *fname)
793 { char line[MAXCMD];
794 int l = 0;
795 Input fd;
796
797 if ( (fd = openInputFile(fname)) == NULL )
798 { warn(ERR_NOCMD_SPECS, fname, 0);
799 return FALSE;
800 }
801
802 while(fgets(line, sizeof(line), fd))
803 parseCommandSpec(fname, ++l, line);
804
805 closeInput(fd);
806
807 return TRUE;
808 }
809
810
811 /*******************************
812 * PARSING STUFF *
813 *******************************/
814
815 static void
getCommand(Input fd,char * buf,int size)816 getCommand(Input fd, char *buf, int size)
817 { int c;
818
819 size--; /* room for EOS */
820
821 c = getc(fd);
822 if ( isalnum(c) ) /* \blabla */
823 { do
824 { if ( --size <= 0 )
825 error(ERR_CMD_TOO_LONG, texfile(), texline());
826 *buf++ = c;
827 c = getc(fd);
828 } while(isalnum(c));
829 ungetc(c, fd);
830 } else /* \" */
831 { *buf++ = c;
832 }
833
834 *buf = EOS;
835 }
836
837
838 static void
getArgument(Input fd,int flags,char * buf,int size)839 getArgument(Input fd, int flags, char *buf, int size)
840 { int c = getc(fd);
841 int sz = size;
842
843 if ( !(flags & F_NOSKIPBLANK) )
844 { while(isspace(c))
845 c = getc(fd);
846 }
847
848 if ( isbegingroup(c) ) /* { */
849 { int nesting = 1; char *s = buf;
850
851 for(;;)
852 { c = getc(fd);
853
854 switch(CharType(c))
855 { case CM:
856 *s++ = c;
857 continue;
858 case BG:
859 nesting++;
860 break;
861 case EG:
862 nesting--;
863 break;
864 case SP:
865 while(isspace(c))
866 c = getc(fd);
867 ungetc(c, fd);
868 c = ' ';
869 break;
870 case EF:
871 error(ERR_UNEXPECTED_EOF, texfile(), texline());
872 }
873
874 if ( nesting > 0 )
875 { if ( --sz < 0 )
876 { buf[size-1] = EOS;
877 texarg = buf;
878 error(ERR_RUNAWAY_ARGUMENT, texfile(), texline());
879 }
880 *s++ = c;
881 } else
882 break;
883 }
884
885 *s++ = EOS;
886 } else if ( iscommand(c) ) /* \ */
887 { *buf++ = c;
888 size--;
889 getCommand(fd, buf, size);
890 } else
891 { *buf++ = c;
892 *buf = EOS;
893 }
894 }
895
896
897 static int
getOptionalArgument(Input fd,int flags,char * buf,int size)898 getOptionalArgument(Input fd, int flags, char *buf, int size)
899 { int c = getc(fd);
900 int sz = size;
901
902 if ( c == '[' )
903 { int nesting = 1; char *s = buf;
904
905 for(;;)
906 { c = getc(fd);
907
908 switch(CharType(c))
909 { case CM:
910 *s++ = c;
911 continue;
912 case BG:
913 nesting++;
914 break;
915 case EG:
916 nesting--;
917 break;
918 case EF:
919 error(ERR_UNEXPECTED_EOF, texfile(), texline());
920 default:
921 switch(c)
922 { case '[':
923 nesting++;
924 break;
925 case ']':
926 nesting--;
927 break;
928 }
929 }
930
931 if ( c != ']' || nesting > 0 )
932 { if ( --sz < 0 )
933 { buf[size-1] = EOS;
934 texarg = buf;
935 error(ERR_RUNAWAY_ARGUMENT, texfile(), texline());
936 }
937 *s++ = c;
938 } else
939 break;
940 }
941
942 *s++ = EOS;
943 return TRUE;
944 } else
945 ungetc(c, fd);
946
947 return FALSE;
948 }
949
950
951 static void
getDimension(Input fd,int flags,char * buf,int size)952 getDimension(Input fd, int flags, char *buf, int size)
953 { int c = getc(fd);
954
955 if ( !(flags & F_NOSKIPBLANK) )
956 { while(isspace(c))
957 c = getc(fd);
958 }
959
960 if ( isbegingroup(c) )
961 { ungetc(c, fd);
962
963 getArgument(fd, flags, buf, size);
964 } else if ( iscommand(c) )
965 { buf[0] = c;
966 getCommand(fd, &buf[1], size-1);
967 } else if ( isdigit(c) )
968 { char *s = buf;
969 do
970 { *s++ = c;
971 c = getc(fd);
972 } while(isdigit(c) || c == '.' );
973 if ( isalnum(c) )
974 { *s++ = c;
975 c = getc(fd);
976 }
977 if ( isalnum(c) )
978 { *s++ = c;
979 *s = EOS;
980 return;
981 }
982
983 error(ERR_BAD_DIM, texfile(), texline());
984 }
985 }
986
987
988 static void
parseCommand(Input fd,const char * name,CallBack func,void * ctx)989 parseCommand(Input fd, const char *name, CallBack func, void *ctx)
990 { CmdDescr cmd = lookupCommand(&name[1]); /* skip \ */
991 command g;
992 token t;
993 int n, c;
994 int flags = 0;
995
996 if ( !cmd )
997 { fprintf(stderr, "[WARNING: Unknown command: %s]\n", name);
998 cmd = newCommand(&name[1]);
999 }
1000
1001 g.command = cmd;
1002 g.flags = 0;
1003
1004 if ( cmd->name[1] == EOS && !isalnum(cmd->name[0]) )
1005 flags |= F_NOSKIPBLANK;
1006
1007 c = getc(fd);
1008 if ( cmd->arg_count > 0 )
1009 g.arguments = alloca(sizeof(char *) * cmd->arg_count);
1010 else
1011 g.arguments = NULL;
1012
1013 if ( !(flags & F_NOSKIPBLANK) )
1014 { while(isspace(c))
1015 c = getc(fd);
1016 }
1017
1018 if ( cmd->flags & CMD_MODIFY && c == '*' ) /* \section* (modified) */
1019 { g.flags |= CMD_MODIFY;
1020 c = getc(fd);
1021 if ( !(flags & F_NOSKIPBLANK) )
1022 { while(isspace(c))
1023 c = getc(fd);
1024 }
1025 }
1026 ungetc(c, fd);
1027
1028 for(n=0; n<cmd->arg_count; n++) /* process the arguments */
1029 { char abuf[MAXARG];
1030
1031 if ( cmd->arguments[n].flags & CA_OPTIONAL )
1032 { if ( getOptionalArgument(fd, flags, abuf, sizeof(abuf)) )
1033 { g.arguments[n] = alloca(strlen(abuf)+1);
1034 strcpy(g.arguments[n], abuf);
1035 } else
1036 g.arguments[n] = NULL;
1037 } else if ( cmd->arguments[n].flags & CA_DIM )
1038 { getDimension(fd, flags, abuf, sizeof(abuf));
1039 g.arguments[n] = alloca(strlen(abuf)+1);
1040 strcpy(g.arguments[n], abuf);
1041 } else
1042 { getArgument(fd, flags, abuf, sizeof(abuf));
1043 g.arguments[n] = alloca(strlen(abuf)+1);
1044 strcpy(g.arguments[n], abuf);
1045 }
1046 }
1047
1048 if ( cmd->function )
1049 { (*cmd->function)(&g, fd, func, ctx);
1050 } else
1051 { t.type = TOK_CMD;
1052 t.value.cmd = &g;
1053 (*func)(&t, ctx);
1054 }
1055 }
1056
1057 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1058 Handle verbatim environment
1059 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1060
1061 static void
env_verbatim(Environment e,Input fd,CallBack func,void * ctx)1062 env_verbatim(Environment e, Input fd, CallBack func, void *ctx)
1063 { char end[MAXCMD];
1064 char buf[MAXVERBATIM];
1065 int left = MAXVERBATIM-1;
1066 char *s = buf;
1067 char *ms;
1068 int el;
1069 token t;
1070
1071 sprintf(end, "\\end{%s}", e->environment->name);
1072 el = strlen(end);
1073 ms = buf+el;
1074
1075 for(;;)
1076 { if ( --left == 0 )
1077 error(ERR_VERBATIM_TOO_LONG, texfile(), texline());
1078 *s++ = getc(fd);
1079
1080 if ( s >= ms && s[-el] == '\\' && strncmp(&s[-el], end, el) == 0 )
1081 { s[-el] = EOS;
1082 t.type = TOK_VERBATIM;
1083 t.context = (char *)e->environment->name;
1084 t.value.string = buf;
1085 (*func)(&t, ctx);
1086 return;
1087 }
1088 }
1089 }
1090
1091
1092 static void
env_normal(Environment e,Input fd,CallBack func,void * ctx)1093 env_normal(Environment e, Input fd, CallBack func, void *ctx)
1094 { token t;
1095
1096 t.type = TOK_BEGIN_ENV;
1097 t.value.env = e;
1098 (*func)(&t, ctx);
1099 }
1100
1101
1102 static void
cmd_normal(Command g,Input fd,CallBack func,void * ctx)1103 cmd_normal(Command g, Input fd, CallBack func, void *ctx)
1104 { token t;
1105
1106 t.type = TOK_CMD;
1107 t.value.cmd = g;
1108 (*func)(&t, ctx);
1109 }
1110
1111
1112 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1113 handle \begin command
1114 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1115
1116 static void
cmd_begin(Command g,Input fd,CallBack func,void * ctx)1117 cmd_begin(Command g, Input fd, CallBack func, void *ctx)
1118 { char ename[MAXCMD];
1119 int enl, n;
1120 char *args[MAXCMDARGS];
1121 environment e;
1122 EnvDescr env;
1123 token t;
1124
1125 e.flags = 0;
1126 e.arguments = args;
1127 getArgument(fd, 0, ename, sizeof(ename));
1128 enl = strlen(ename);
1129
1130 if ( ename[enl-1] == '*' ) /* check for modified env */
1131 { ename[enl-1] = EOS;
1132 e.flags |= CMD_MODIFY;
1133 }
1134
1135 /* find the environment */
1136 if ( !(env = lookupEnvironment(ename)) )
1137 { fprintf(stderr, "WARNING: undefined environment: %s\n", ename);
1138 env = newEnvironment(ename);
1139 }
1140 e.environment = env;
1141
1142 for(n=0; n<env->arg_count; n++) /* process the arguments */
1143 { char abuf[MAXARG];
1144
1145 if ( env->arguments[n].flags & CA_OPTIONAL )
1146 { if ( getOptionalArgument(fd, 0, abuf, sizeof(abuf)) )
1147 { e.arguments[n] = alloca(strlen(abuf)+1);
1148 strcpy(e.arguments[n], abuf);
1149 } else
1150 e.arguments[n] = NULL;
1151 } else
1152 { getArgument(fd, 0, abuf, sizeof(abuf));
1153 e.arguments[n] = alloca(strlen(abuf)+1);
1154 strcpy(e.arguments[n], abuf);
1155 }
1156 }
1157
1158 if ( env->function )
1159 { (*env->function)(&e, fd, func, ctx);
1160 } else
1161 { t.type = TOK_BEGIN_ENV;
1162 t.value.env = &e;
1163 (*func)(&t, ctx);
1164 }
1165 }
1166
1167
1168 static void
cmd_end(Command g,Input fd,CallBack func,void * ctx)1169 cmd_end(Command g, Input fd, CallBack func, void *ctx)
1170 { token t;
1171
1172 t.type = TOK_END_ENV;
1173 t.value.string = g->arguments[0]; /* name of the environment */
1174 (*func)(&t, ctx);
1175 }
1176
1177
1178 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1179 handle \verb command
1180 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1181
1182 void
cmd_verb(Command g,Input fd,CallBack func,void * ctx)1183 cmd_verb(Command g, Input fd, CallBack func, void *ctx)
1184 { char buf[MAXVERB];
1185 char *s = buf;
1186 int delim = getc(fd);
1187 int c = getc(fd);
1188 token t;
1189 char ds[2];
1190
1191 ds[0] = delim;
1192 ds[1] = EOS;
1193 while(c != delim && c != EOF)
1194 { *s++ = c;
1195 c = getc(fd);
1196 }
1197 if ( c == EOF )
1198 error(ERR_UNEXPECTED_EOF, texfile(), texline());
1199 *s++ = EOS;
1200
1201 t.type = TOK_VERB;
1202 t.context = ds;
1203 t.value.string = buf;
1204 (*func)(&t, ctx);
1205 }
1206
1207 #define ACTIVE(n, f) { n, f }
1208
1209 typedef struct
1210 { char *name; /* name */
1211 AnyFunc function; /* associated function */
1212 } active, *Active;
1213
1214 static active active_list[] =
1215 { ACTIVE("verb", cmd_verb),
1216 ACTIVE("begin", cmd_begin),
1217 ACTIVE("end", cmd_end),
1218 ACTIVE("item", cmd_normal),
1219 ACTIVE("prolog", cmd_prolog),
1220 ACTIVE("verbatim", env_verbatim),
1221 ACTIVE("list", env_normal),
1222 ACTIVE("float", env_normal),
1223 ACTIVE(NULL, NULL)
1224 };
1225
1226 static AnyFunc
lookupFunction(const char * name)1227 lookupFunction(const char *name)
1228 { Active a = active_list;
1229
1230 for( ; a->name; a++ )
1231 { if ( streq(name, a->name) )
1232 return a->function;
1233 }
1234
1235 error(ERR_UNDEF_FUNCTION, texfile(), texline());
1236 return NULL;
1237 }
1238
1239
1240 static void
parseMath(Input fd,CallBack func,void * ctx)1241 parseMath(Input fd, CallBack func, void *ctx)
1242 { int c = getc(fd);
1243 char buf[MAXMATH];
1244 char *s = buf;
1245 token t;
1246 int nesting = 0;
1247 int left = MAXMATH-1;
1248
1249 if ( CharType(c) == MM ) /* $$ */
1250 { t.type = TOK_MATH_ENV;
1251 } else
1252 { t.type = TOK_MATH;
1253 ungetc(c, fd);
1254 }
1255
1256 for(;;)
1257 { c = getc(fd);
1258
1259 switch(CharType(c))
1260 { case BG:
1261 nesting++;
1262 break;
1263 case EG:
1264 nesting--;
1265 break;
1266 case SP:
1267 while(isspace(c))
1268 c = getc(fd);
1269 ungetc(c, fd);
1270 c = ' ';
1271 break;
1272 case EF:
1273 error(ERR_UNEXPECTED_EOF, texfile(), texline());
1274 }
1275
1276 if ( c != '$' || nesting > 0 )
1277 { if ( --left < 0 )
1278 { texarg = buf;
1279 error(ERR_RUNAWAY_MATH, texfile(), texline());
1280 }
1281 *s++ = c;
1282 } else
1283 break;
1284 }
1285
1286 *s = EOS;
1287 if ( t.type == TOK_MATH_ENV )
1288 { c = getc(fd);
1289 if ( c != '$' )
1290 error(ERR_BAD_MATH_ENV_CLOSURE, texfile(), texline());
1291 }
1292
1293 t.value.string = buf;
1294 (*func)(&t, ctx);
1295 }
1296
1297
1298 static void
parseTeX(Input fd,CallBack func,void * ctx)1299 parseTeX(Input fd, CallBack func, void *ctx)
1300 { token t;
1301
1302 int c = getc(fd);
1303
1304 for(;;)
1305 { switch(CharType(c))
1306 { case SP: /* blank space */
1307 { int lines = 0;
1308
1309 do
1310 { if ( c == '\n' )
1311 lines++;
1312 c = getc(fd);
1313 } while(isspace(c));
1314
1315 if ( lines >= 2 )
1316 { t.type = TOK_PAR;
1317 t.value.string = NULL;
1318
1319 (*func)(&t, ctx);
1320 } else if ( lines == 1 )
1321 { t.type = TOK_LINE;
1322 t.value.string = NULL;
1323
1324 (*func)(&t, ctx);
1325 } else
1326 { t.type = TOK_SPACE;
1327 t.value.string = NULL;
1328
1329 (*func)(&t, ctx);
1330 }
1331
1332 break;
1333 }
1334 case BG: /* { */
1335 { char buf[2];
1336
1337 buf[0] = c;
1338 buf[1] = EOS;
1339
1340 t.type = TOK_BEGIN_GROUP;
1341 t.value.string = buf;
1342 (*func)(&t, ctx);
1343 c = getc(fd);
1344
1345 break;
1346 }
1347 case EG: /* } */
1348 { char buf[2];
1349
1350 buf[0] = c;
1351 buf[1] = EOS;
1352
1353 t.type = TOK_END_GROUP;
1354 t.value.string = buf;
1355 (*func)(&t, ctx); c = getc(fd);
1356
1357 break;
1358 }
1359 case CM: /* \command */
1360 { char buf[MAXCMD];
1361
1362 buf[0] = c;
1363 getCommand(fd, &buf[1], MAXCMD-1);
1364
1365 parseCommand(fd, buf, func, ctx);
1366 c = getc(fd);
1367
1368 break;
1369 }
1370 case MM:
1371 { parseMath(fd, func, ctx);
1372 c = getc(fd);
1373
1374 break;
1375 }
1376 case SC: /* % comment */
1377 { do
1378 { c = getc(fd);
1379 } while( c != EOF && c != '\n' );
1380
1381 while(isspace(c))
1382 c = getc(fd);
1383
1384 break;
1385 }
1386 case EF: /* end-of-file */
1387 { t.type = TOK_EOF;
1388 t.value.string = NULL;
1389 (*func)(&t, ctx);
1390
1391 return;
1392 }
1393 case TD: /* & */
1394 case NB: /* ~ */
1395 case BQ: /* ` */
1396 case SQ: /* ' */
1397 { char buf[2];
1398
1399 buf[0] = c;
1400 buf[1] = EOS;
1401 t.type = TOK_WORD;
1402 t.value.string = buf;
1403 (*func)(&t, ctx);
1404 c = getc(fd);
1405
1406 break;
1407 }
1408 default: /* default: begin a word */
1409 { char buf[MAXWORD];
1410 char *s = buf;
1411
1412 do
1413 { *s++ = c;
1414 assert(s < buf+MAXWORD);
1415 c = getc(fd);
1416 } while(!wbreak(c));
1417 *s = EOS;
1418 t.type = TOK_WORD;
1419 if ( streq(buf, "[]") )
1420 t.value.string = "\\[]";
1421 else
1422 t.value.string = buf;
1423 (*func)(&t, ctx);
1424
1425 break;
1426 }
1427 }
1428 }
1429 }
1430
1431
1432 /*******************************
1433 * MAIN LOOP *
1434 *******************************/
1435
1436 static int
parseTeXFile(const char * file,CallBack func,void * ctx)1437 parseTeXFile(const char *file, CallBack func, void *ctx)
1438 { Input fd;
1439
1440 if ( (fd = openInputFile(file)) == NULL )
1441 { fprintf(stderr, "Can't open %s: %s", file, strerror(errno));
1442
1443 return FALSE;
1444 }
1445 parseTeX(fd, func, ctx);
1446
1447 closeInput(fd);
1448
1449 return TRUE;
1450 }
1451
1452
1453
1454 /*******************************
1455 * HTML OUTPUT *
1456 *******************************/
1457
1458 #define VERB_NORMAL 0
1459 #define VERB_VERBATIM 1
1460 #define VERB_PRE 2
1461
1462 typedef struct
1463 { int envnesting; /* nesting of begin/end */
1464 int last_type; /* type of previous token */
1465 int line_pos; /* position in line */
1466 int newlines; /* # consecutive newlines */
1467 int spaces; /* # consecutive spaces */
1468 int verbatim; /* verbatim output */
1469 int left_margin; /* left margin for text */
1470 int right_margin; /* right margin for text */
1471 FILE *fd; /* output descriptor */
1472 } ppcontext, *PPContext;
1473
1474
1475 static void
output(PPContext pp,const char * fmt,...)1476 output(PPContext pp, const char *fmt, ...)
1477 { va_list args;
1478 char buf[MAXOUTPUT];
1479 char *s = buf;
1480
1481 va_start(args, fmt);
1482 vsprintf(buf, fmt, args);
1483 va_end(args);
1484
1485 if ( pp->verbatim )
1486 { for(;;s++)
1487 { int c;
1488
1489 switch((c = *s))
1490 { case EOS:
1491 return;
1492 case '\n':
1493 putc(c, pp->fd);
1494 pp->line_pos = 0;
1495 pp->spaces++;
1496 break;
1497 case '\t':
1498 putc(c, pp->fd);
1499 pp->line_pos |= 0x7;
1500 pp->line_pos++;
1501 pp->spaces++;
1502 break;
1503 case ' ':
1504 pp->spaces++;
1505 putc(c, pp->fd);
1506 pp->line_pos++;
1507 break;
1508 default:
1509 pp->spaces = 0;
1510 if ( pp->verbatim == VERB_PRE )
1511 { switch(c)
1512 { case '<':
1513 fputs("<", pp->fd);
1514 break;
1515 case '>':
1516 fputs(">", pp->fd);
1517 break;
1518 case '&':
1519 fputs("&", pp->fd);
1520 break;
1521 default:
1522 putc(c, pp->fd);
1523 }
1524 } else
1525 putc(c, pp->fd);
1526 pp->line_pos++;
1527 }
1528 }
1529 } else
1530 { for(;;s++)
1531 { int c;
1532
1533 switch((c = *s))
1534 { case EOS:
1535 return;
1536 case '\n':
1537 if ( ++pp->newlines <= 2 )
1538 putc(c, pp->fd);
1539 pp->line_pos = 0;
1540 pp->spaces = 1;
1541 break;
1542 case '\t':
1543 c = ' ';
1544 case ' ':
1545 pp->newlines = 0;
1546 if ( ++pp->spaces <= 1 )
1547 { putc(c, pp->fd);
1548 pp->line_pos++;
1549 }
1550 break;
1551 default:
1552 pp->newlines = 0;
1553 pp->spaces = 0;
1554 putc(c, pp->fd);
1555 pp->line_pos++;
1556 }
1557 }
1558 }
1559 }
1560
1561
1562 static void
nl(PPContext pp)1563 nl(PPContext pp)
1564 { int spaces = pp->left_margin % 8;
1565 int tabs = pp->left_margin / 8;
1566 int n;
1567
1568 output(pp, "\n");
1569 for(n=0; n<tabs; n++)
1570 output(pp, "\t");
1571 for(n=0; n<spaces; n++)
1572 output(pp, " ");
1573 }
1574
1575
1576 void
outputBlank(PPContext pp)1577 outputBlank(PPContext pp)
1578 { switch(pp->last_type)
1579 { case TOK_SPACE:
1580 if ( pp->newlines == 0 )
1581 output(pp, " ");
1582 break;
1583 case TOK_LINE:
1584 if ( pp->newlines < 1 )
1585 output(pp, "\n");
1586 break;
1587 }
1588 }
1589
1590
1591 static char *tok_names[] =
1592 { "CMD", "BG", "EG", "BE", "EE", "VERB",
1593 "VERBATIM", "$", "$$", "PAR", "W", "S", "L", "EOF"
1594 };
1595
1596 void
put_token(Token t,void * ctx)1597 put_token(Token t, void *ctx)
1598 { PPContext pp = ctx;
1599 static CmdDescr CMD_BEGIN, CMD_END;
1600
1601 if ( !CMD_BEGIN )
1602 { CMD_BEGIN = lookupCommand("begin");
1603 CMD_END = lookupCommand("end");
1604 }
1605
1606 DEBUG(1, output(pp, "[%s]", tok_names[t->type]));
1607
1608 switch(t->type)
1609 { case TOK_CMD:
1610 { Command g = t->value.cmd;
1611 int n;
1612 int args_printed = 0;
1613
1614 outputBlank(pp);
1615 if ( g->command->pre_lines == PRE_COMMENT )
1616 { output(pp, "%\n");
1617 } else
1618 { while(pp->newlines < g->command->pre_lines)
1619 output(pp, "\n");
1620 }
1621 output(pp, "\\%s", g->command->name);
1622 if ( g->flags & CMD_MODIFY )
1623 output(pp, "*");
1624 for(n=0; n<g->command->arg_count; n++)
1625 { if ( g->command->arguments[n].flags & CA_OPTIONAL )
1626 { if ( g->arguments[n] )
1627 { output(pp, "[%s]", g->arguments[n]);
1628 args_printed++;
1629 }
1630 } else
1631 { output(pp, "{%s}", g->arguments[n]);
1632 args_printed++;
1633 }
1634 }
1635 if ( !args_printed )
1636 { if ( isalnum(g->command->name[strlen(g->command->name)-1]) )
1637 output(pp, " ");
1638 }
1639 while(pp->newlines < g->command->post_lines)
1640 output(pp, "\n");
1641
1642 break;
1643 }
1644 case TOK_BEGIN_ENV:
1645 { Environment e = t->value.env;
1646 int n;
1647
1648 outputBlank(pp);
1649 while(pp->newlines < CMD_BEGIN->pre_lines)
1650 output(pp, "\n");
1651 output(pp, "\\begin{%s", e->environment->name);
1652 if ( e->flags & CMD_MODIFY )
1653 output(pp, "*");
1654 output(pp, "}");
1655 for(n=0; n<e->environment->arg_count; n++)
1656 { if ( e->environment->arguments[n].flags & CA_OPTIONAL )
1657 { if ( e->arguments[n] )
1658 output(pp, "[%s]", e->arguments[n]);
1659 } else
1660 { output(pp, "{%s}", e->arguments[n]);
1661 }
1662 }
1663 while(pp->newlines < CMD_BEGIN->post_lines)
1664 output(pp, "\n");
1665
1666 break;
1667 }
1668 case TOK_END_ENV:
1669 { outputBlank(pp);
1670 while(pp->newlines < CMD_END->pre_lines)
1671 output(pp, "\n");
1672 output(pp, "\\begin{%s}", t->value.string);
1673 while(pp->newlines < CMD_END->post_lines)
1674 output(pp, "\n");
1675
1676 break;
1677 }
1678 case TOK_BEGIN_GROUP:
1679 outputBlank(pp);
1680 output(pp, "{");
1681 break;
1682 case TOK_END_GROUP:
1683 outputBlank(pp);
1684 output(pp, "}");
1685 break;
1686 case TOK_MATH:
1687 outputBlank(pp);
1688 output(pp, "$%s$", t->value.string);
1689 break;
1690 case TOK_MATH_ENV:
1691 outputBlank(pp);
1692 output(pp, "$$%s$$", t->value.string);
1693 break;
1694 case TOK_VERB:
1695 outputBlank(pp);
1696 pp->verbatim = VERB_VERBATIM;
1697 output(pp, "\\verb%s%s%s", t->context, t->value.string, t->context);
1698 pp->verbatim = VERB_NORMAL;
1699 break;
1700 case TOK_VERBATIM:
1701
1702 while( pp->newlines < CMD_BEGIN->pre_lines )
1703 output(pp, "\n");
1704 output(pp, "\\begin{%s}", t->context);
1705 pp->verbatim = VERB_VERBATIM;
1706 output(pp, "%s", t->value.string);
1707 pp->verbatim = VERB_NORMAL;
1708 output(pp, "\\end{%s}", t->context);
1709 while( pp->newlines < CMD_BEGIN->post_lines )
1710 output(pp, "\n");
1711 break;
1712 case TOK_PAR:
1713 output(pp, "\n\n");
1714 break;
1715 case TOK_WORD:
1716 { int pendingblank;
1717
1718 if ( pp->last_type == TOK_LINE )
1719 pp->last_type = TOK_SPACE;
1720 pendingblank = (pp->last_type == TOK_SPACE);
1721 outputBlank(pp); /* as space! */
1722 if ( pendingblank && /* no blanks in input: concatenate! */
1723 (int)strlen(t->value.string) + pp->line_pos > pp->right_margin )
1724 nl(pp);
1725 output(pp, "%s", t->value.string);
1726 break;
1727 }
1728 case TOK_LINE:
1729 case TOK_SPACE:
1730 break;
1731 case TOK_EOF:
1732 output(pp, "\n");
1733 break;
1734 default:
1735 assert(0);
1736 }
1737
1738 pp->last_type = t->type;
1739 }
1740
1741 #ifdef TEST
1742
1743 static void
error(int eno,const char * file,int line)1744 error(int eno, const char *file, int line)
1745 { fprintf(stderr, "ERROR: %s:%d: %s\n", file, line, tex_error_strings[eno]);
1746
1747 exit(1);
1748 }
1749
1750
1751 int
main(int argc,char ** argv)1752 main(int argc, char **argv)
1753 { parseCmdSpecs("cmd.spec");
1754
1755 if ( argc == 2 )
1756 { ppcontext pp;
1757
1758 pp.envnesting = 0;
1759 pp.last_type = TOK_EOF;
1760 pp.line_pos = 0;
1761 pp.newlines = 0;
1762 pp.spaces = 0;
1763 pp.verbatim = FALSE;
1764 pp.left_margin = 0;
1765 pp.right_margin = 72;
1766
1767 parseTeXFile(argv[1], put_token, &pp);
1768 }
1769
1770 exit(0);
1771 }
1772
1773 #endif /*TEST*/
1774
1775 #ifdef __SWI_PROLOG__
1776
1777 /*******************************
1778 * PROLOG CONNECTION *
1779 *******************************/
1780
1781 #include <SWI-Prolog.h>
1782
1783 static int build_list(Token t, void *context);
1784
1785 typedef struct
1786 { term_t list; /* list we are working on */
1787 term_t head; /* head (tmp term ref) */
1788 int envnesting; /* depth of stack */
1789 int prev_type0; /* type of previous token */
1790 int prev_type1; /* type of token before that */
1791 term_t stack[MAXENVNESTING]; /* Pushed environment */
1792 } pl_context, *PlContext;
1793
1794
1795 static functor_t FUNCTOR_verb1; /* verb/1 */
1796 static functor_t FUNCTOR_verb2; /* verb/2 */
1797 static functor_t FUNCTOR_verbatim2; /* verbatim/2 */
1798 static functor_t FUNCTOR_verbatim1; /* verbatim/1 */
1799 static functor_t FUNCTOR_pre1; /* pre/1 */
1800 static functor_t FUNCTOR_dot2; /* ./2 */
1801 static functor_t FUNCTOR_brace1; /* {}/1 */
1802 static functor_t FUNCTOR_cmd1; /* \/1 */
1803 static functor_t FUNCTOR_cmd2; /* \/2 */
1804 static functor_t FUNCTOR_cmd3; /* \/3 */
1805 static functor_t FUNCTOR_env2; /* env/2 */
1806 static functor_t FUNCTOR_env3; /* env/3 */
1807 static functor_t FUNCTOR_env4; /* env/4 */
1808 static functor_t FUNCTOR_math1; /* $/1 */
1809 static functor_t FUNCTOR_mathenv1; /* $$/1 */
1810 static functor_t FUNCTOR_html1; /* html/1 */
1811 static functor_t FUNCTOR_html3; /* html/3 */
1812 static functor_t FUNCTOR_nospace1; /* nospace/1 */
1813 static atom_t ATOM_begin_group; /* '\{' */
1814 static atom_t ATOM_end_group; /* '\}' */
1815 static atom_t ATOM_nbsp; /* '~' */
1816 static atom_t ATOM_par; /* 'par' */
1817 static atom_t ATOM_star; /* * */
1818 static atom_t ATOM_minus; /* - */
1819 static atom_t ATOM_space; /* ' ' */
1820 static atom_t ATOM_nl; /* '\n' */
1821 #ifndef ATOM_nil
1822 static atom_t ATOM_nil; /* [] */
1823 #endif
1824 static atom_t ATOM_true; /* true */
1825 static atom_t ATOM_false; /* false */
1826
1827 #ifndef ATOM_dot
1828 #define ATOM_dot PL_new_atom(".")
1829 #endif
1830
1831 static void
initPrologConstants()1832 initPrologConstants()
1833 { FUNCTOR_verb2 = PL_new_functor(PL_new_atom("verb"), 2);
1834 FUNCTOR_verb1 = PL_new_functor(PL_new_atom("verb"), 1);
1835 FUNCTOR_verbatim2 = PL_new_functor(PL_new_atom("verbatim"), 2);
1836 FUNCTOR_verbatim1 = PL_new_functor(PL_new_atom("verbatim"), 1);
1837 FUNCTOR_pre1 = PL_new_functor(PL_new_atom("pre"), 1);
1838 FUNCTOR_dot2 = PL_new_functor(ATOM_dot, 2);
1839 FUNCTOR_brace1 = PL_new_functor(PL_new_atom("{}"), 1);
1840 FUNCTOR_cmd1 = PL_new_functor(PL_new_atom("\\"), 1);
1841 FUNCTOR_cmd2 = PL_new_functor(PL_new_atom("\\"), 2);
1842 FUNCTOR_cmd3 = PL_new_functor(PL_new_atom("\\"), 3);
1843 FUNCTOR_env2 = PL_new_functor(PL_new_atom("env"), 2);
1844 FUNCTOR_env3 = PL_new_functor(PL_new_atom("env"), 3);
1845 FUNCTOR_env4 = PL_new_functor(PL_new_atom("env"), 4);
1846 FUNCTOR_math1 = PL_new_functor(PL_new_atom("$"), 1);
1847 FUNCTOR_mathenv1 = PL_new_functor(PL_new_atom("$$"), 1);
1848 FUNCTOR_html1 = PL_new_functor(PL_new_atom("html"), 1);
1849 FUNCTOR_html3 = PL_new_functor(PL_new_atom("html"), 3);
1850 FUNCTOR_nospace1 = PL_new_functor(PL_new_atom("nospace"), 1);
1851
1852 ATOM_begin_group = PL_new_atom("\\{");
1853 ATOM_end_group = PL_new_atom("\\}");
1854 ATOM_par = PL_new_atom("par");
1855 ATOM_nbsp = PL_new_atom("~");
1856 ATOM_star = PL_new_atom("*");
1857 ATOM_minus = PL_new_atom("-");
1858 ATOM_space = PL_new_atom(" ");
1859 ATOM_nl = PL_new_atom("\n");
1860 #ifndef ATOM_nil
1861 ATOM_nil = PL_new_atom("[]");
1862 #endif
1863 ATOM_true = PL_new_atom("true");
1864 ATOM_false = PL_new_atom("false");
1865 }
1866
1867 static ppcontext ppctx;
1868
1869 static foreign_t
pl_put_tex_token(term_t term)1870 pl_put_tex_token(term_t term)
1871 { token t;
1872 atom_t atom;
1873 functor_t f;
1874 static int last_is_word = FALSE;
1875
1876 t.type = -1;
1877
1878 if ( PL_get_atom(term, &atom) )
1879 { if ( atom == ATOM_begin_group )
1880 { t.type = TOK_BEGIN_GROUP;
1881 t.value.string = "{";
1882 } else if ( atom == ATOM_end_group )
1883 { t.type = TOK_END_GROUP;
1884 t.value.string = "}";
1885 } else if ( atom == ATOM_space )
1886 { t.type = TOK_SPACE;
1887 t.value.string = " ";
1888 } else if ( atom == ATOM_nl )
1889 { t.type = TOK_LINE;
1890 t.value.string = "\n";
1891 } else
1892 { if ( last_is_word ) /* regenerate the space tokens */
1893 { t.type = TOK_SPACE;
1894 t.value.string = " ";
1895
1896 put_token(&t, &ppctx);
1897 } else
1898 last_is_word = TRUE;
1899 t.type = TOK_WORD;
1900 t.value.string = (char *)PL_atom_chars(atom);
1901 }
1902 } else if ( PL_get_functor(term, &f) )
1903 { term_t arg = PL_new_term_ref();
1904 char *s;
1905
1906 if ( f == FUNCTOR_verb2 || f == FUNCTOR_verbatim2 )
1907 { if ( PL_get_arg(1, term, arg) && PL_get_chars(arg, &s, CVT_ATOMIC) )
1908 { t.context = s;
1909
1910 if ( PL_get_arg(2, term, arg) && PL_get_chars(arg, &s, CVT_ATOMIC) )
1911 { t.value.string = s;
1912 t.type = (f == FUNCTOR_verb2 ? TOK_VERB : TOK_VERBATIM);
1913 }
1914 }
1915 } else if ( f == FUNCTOR_cmd1 )
1916 { char *cname;
1917
1918 if ( PL_get_arg(1, term, arg) && PL_get_chars(arg, &cname, CVT_ATOMIC) )
1919 { command g;
1920
1921 t.type = TOK_CMD;
1922 t.value.cmd = &g;
1923 g.flags = 0;
1924 g.arguments = 0;
1925
1926 g.command = lookupCommand(cname);
1927 if ( !g.command )
1928 { fprintf(stderr, "[WARNING: Undefined command: %s]\n", cname);
1929 g.command = newCommand(cname);
1930 }
1931 }
1932 } else if ( f == FUNCTOR_cmd2 || f == FUNCTOR_cmd3 ||
1933 f == FUNCTOR_env2 || f == FUNCTOR_env3 )
1934 { char *cname;
1935 command g;
1936 environment e;
1937 int isenv = (f == FUNCTOR_env2 || f == FUNCTOR_env3);
1938 int ismod = (f == FUNCTOR_cmd3 || f == FUNCTOR_env3);
1939 term_t alist = PL_new_term_ref();
1940
1941 if ( isenv )
1942 { t.type = TOK_BEGIN_ENV;
1943 t.value.env = &e;
1944 e.flags = 0;
1945 } else
1946 { t.type = TOK_CMD;
1947 t.value.cmd = &g;
1948 g.flags = 0;
1949 }
1950
1951 if ( ismod )
1952 { _PL_get_arg(2, term, arg);
1953 if ( PL_get_atom(arg, &atom) && atom == ATOM_star )
1954 { if ( isenv )
1955 e.flags |= CMD_MODIFY;
1956 else
1957 g.flags |= CMD_MODIFY;
1958 }
1959 _PL_get_arg(3, term, alist);
1960 } else
1961 _PL_get_arg(2, term, alist);
1962
1963 _PL_get_arg(1, term, arg);
1964 if ( PL_get_atom_chars(arg, &cname) )
1965 { int n;
1966 term_t a2 = PL_new_term_ref();
1967 int argn;
1968 char **args;
1969
1970 if ( isenv )
1971 { e.environment = lookupEnvironment(cname);
1972 if ( !e.environment )
1973 { fprintf(stderr, "[WARNING: Undefined environment: %s]\n", cname);
1974 e.environment = newEnvironment(cname);
1975 }
1976 argn = e.environment->arg_count;
1977 args = e.arguments = alloca(sizeof(cmd_arg)*argn);
1978 } else
1979 { g.command = lookupCommand(cname);
1980 if ( !g.command )
1981 { fprintf(stderr, "[WARNING: Undefined command: %s]\n", cname);
1982 g.command = newCommand(cname);
1983 }
1984 argn = g.command->arg_count;
1985 args = g.arguments = alloca(sizeof(cmd_arg)*argn);
1986 }
1987
1988 for(n=0; n<argn; n++)
1989 { if ( PL_get_list(alist, arg, alist) &&
1990 PL_get_arg(1, arg, a2) &&
1991 PL_get_chars(a2, &s, CVT_ATOMIC) )
1992 args[n] = s;
1993 else
1994 args[n] = NULL;
1995 }
1996 }
1997 } else if ( f == FUNCTOR_math1 || f == FUNCTOR_mathenv1 )
1998 { term_t arg = PL_new_term_ref();
1999 char *s;
2000
2001 if ( PL_get_arg(1, term, arg) &&
2002 PL_get_chars(arg, &s, CVT_ATOMIC) )
2003 { t.type = (f == FUNCTOR_math1 ? TOK_MATH : TOK_MATH_ENV);
2004 t.value.string = s;
2005 }
2006 }
2007 }
2008
2009 if ( t.type != TOK_WORD )
2010 last_is_word = FALSE;
2011
2012 if ( t.type >= 0 )
2013 { put_token(&t, &ppctx);
2014 PL_succeed;
2015 }
2016
2017 return PL_warning("put_tex_token/1: instantiation error");
2018 }
2019
2020
2021 static void
tex2pl_from_string(const char * str,term_t tokens)2022 tex2pl_from_string(const char *str, term_t tokens)
2023 { pl_context ctx;
2024 Input fd;
2025
2026 ctx.list = PL_copy_term_ref(tokens);
2027 ctx.head = PL_new_term_ref();
2028 ctx.envnesting = 0;
2029 ctx.prev_type0 = TOK_EOF;
2030 ctx.prev_type1 = TOK_EOF;
2031
2032 fd = openInputString(str);
2033 parseTeX(fd, build_list, &ctx);
2034 closeInput(fd);
2035 }
2036
2037
2038 static foreign_t
pl_tex_atom_to_tokens(term_t txt,term_t tokens)2039 pl_tex_atom_to_tokens(term_t txt, term_t tokens)
2040 { char *s;
2041
2042 if ( PL_get_chars(txt, &s, CVT_ALL) )
2043 { tex2pl_from_string(s, tokens);
2044
2045 PL_succeed;
2046 }
2047
2048 PL_fail;
2049 }
2050
2051
2052
2053 static int
build_arguments(term_t alist,int nargs,CmdArg argspec,char ** args)2054 build_arguments(term_t alist, int nargs, CmdArg argspec, char **args)
2055 { int ga = 0; /* goal argument */
2056 term_t tmp = PL_new_term_ref();
2057
2058 for( ; ga < nargs; ga++ )
2059 { int rc;
2060
2061 if ( !PL_unify_list(alist, tmp, alist) )
2062 return FALSE;
2063
2064 if ( argspec[ga].flags & CA_OPTIONAL )
2065 { if ( args[ga] == NULL )
2066 { rc = PL_unify_atom(tmp, ATOM_nil);
2067 } else
2068 { if ( argspec[ga].flags & CA_TEXT )
2069 { term_t arg = PL_new_term_ref();
2070
2071 tex2pl_from_string(args[ga], arg);
2072 rc = PL_unify_term(tmp, /* [text] */
2073 PL_FUNCTOR, FUNCTOR_dot2,
2074 PL_TERM, arg,
2075 PL_ATOM, ATOM_nil);
2076 } else
2077 { rc = PL_unify_term(tmp, /* [text] */
2078 PL_FUNCTOR, FUNCTOR_dot2,
2079 PL_CHARS, args[ga],
2080 PL_ATOM, ATOM_nil);
2081 }
2082 }
2083 } else
2084 { if ( argspec[ga].flags & CA_TEXT )
2085 { term_t arg = PL_new_term_ref();
2086
2087 tex2pl_from_string(args[ga], arg);
2088 rc = PL_unify_term(tmp, /* {text} */
2089 PL_FUNCTOR, FUNCTOR_brace1,
2090 PL_TERM, arg);
2091
2092 } else
2093 { rc = PL_unify_term(tmp, /* {text} */
2094 PL_FUNCTOR, FUNCTOR_brace1,
2095 PL_CHARS, args[ga]);
2096 }
2097 }
2098
2099 if ( !rc )
2100 return rc;
2101 }
2102
2103 return PL_unify_nil(alist);
2104 }
2105
2106
2107 static void
popStack(PlContext ctx)2108 popStack(PlContext ctx)
2109 { if ( ctx->envnesting > 0 )
2110 ctx->list = ctx->stack[--ctx->envnesting];
2111 else
2112 error(ERR_ENV_UNDERFLOW, texfile(), texline());
2113 }
2114
2115
2116 static int
build_list(Token t,void * context)2117 build_list(Token t, void *context)
2118 { PlContext ctx = context;
2119
2120 DEBUG(1, put_token(t, &ppctx));
2121
2122 switch(t->type)
2123 { case TOK_EOF:
2124 return PL_unify_nil(ctx->list);
2125 case TOK_SPACE:
2126 case TOK_LINE:
2127 if ( !emit_space )
2128 goto out;
2129 }
2130
2131 if ( !emit_space )
2132 { if ( (ctx->prev_type0 == TOK_SPACE || ctx->prev_type0 == TOK_LINE) &&
2133 (ctx->prev_type1 != TOK_WORD || t->type != TOK_WORD) )
2134 { atom_t a = (ctx->prev_type0 == TOK_SPACE ? ATOM_space : ATOM_nl);
2135
2136 if ( !PL_unify_list(ctx->list, ctx->head, ctx->list) ||
2137 !PL_unify_atom(ctx->head, a) )
2138 return FALSE;
2139 }
2140 }
2141
2142 switch(t->type)
2143 { case TOK_END_GROUP:
2144 case TOK_END_ENV:
2145 if ( !PL_unify_nil(ctx->list) )
2146 return FALSE;
2147 popStack(ctx);
2148 goto out;
2149 }
2150
2151 if ( !PL_unify_list(ctx->list, ctx->head, ctx->list) )
2152 return FALSE;
2153
2154 switch(t->type)
2155 { case TOK_BEGIN_ENV:
2156 { Environment e = t->value.env;
2157 atom_t modified = (e->flags & CMD_MODIFY ? ATOM_star : ATOM_minus);
2158 term_t clist = PL_new_term_ref();
2159 term_t alist = PL_new_term_ref();
2160 int rc;
2161
2162 if ( e->environment->flags & CMD_MODIFY )
2163 { rc = PL_unify_term(ctx->head,
2164 PL_FUNCTOR, FUNCTOR_env4,
2165 PL_CHARS, e->environment->name,
2166 PL_ATOM, modified,
2167 PL_TERM, alist,
2168 PL_TERM, clist);
2169 } else
2170 { rc = PL_unify_term(ctx->head,
2171 PL_FUNCTOR, FUNCTOR_env3,
2172 PL_CHARS, e->environment->name,
2173 PL_TERM, alist,
2174 PL_TERM, clist);
2175 }
2176
2177 if ( rc )
2178 rc = build_arguments(alist, /* environment arguments */
2179 e->environment->arg_count,
2180 e->environment->arguments,
2181 e->arguments);
2182
2183 if ( !rc )
2184 return FALSE;
2185
2186 PL_reset_term_refs(alist);
2187 /* contents of the environment */
2188 if ( ctx->envnesting >= MAXENVNESTING )
2189 error(ERR_ENV_NESTING, texfile(), texline());
2190 ctx->stack[ctx->envnesting++] = ctx->list;
2191 ctx->list = clist; /* no need to copy */
2192
2193 break;
2194 }
2195 case TOK_CMD:
2196 { Command g = t->value.cmd;
2197 term_t alist = PL_new_term_ref();
2198 term_t modified = (g->flags & CMD_MODIFY ? ATOM_star : ATOM_minus);
2199 int rc;
2200
2201 if ( g->command->flags & CMD_MODIFY )
2202 { rc = PL_unify_term(ctx->head,
2203 PL_FUNCTOR, FUNCTOR_cmd3,
2204 PL_CHARS, g->command->name,
2205 PL_ATOM, modified,
2206 PL_TERM, alist);
2207 } else
2208 { if ( g->command->arg_count == 0 )
2209 { rc = PL_unify_term(ctx->head,
2210 PL_FUNCTOR, FUNCTOR_cmd1,
2211 PL_CHARS, g->command->name);
2212 PL_reset_term_refs(alist);
2213
2214 break;
2215 } else
2216 { rc = PL_unify_term(ctx->head,
2217 PL_FUNCTOR, FUNCTOR_cmd2,
2218 PL_CHARS, g->command->name,
2219 PL_TERM, alist);
2220 }
2221 }
2222
2223 if ( rc )
2224 rc = build_arguments(alist,
2225 g->command->arg_count,
2226 g->command->arguments,
2227 g->arguments);
2228
2229 PL_reset_term_refs(alist);
2230 break;
2231 }
2232 case TOK_BEGIN_GROUP:
2233 if ( ctx->envnesting >= MAXENVNESTING )
2234 error(ERR_ENV_NESTING, texfile(), texline());
2235 ctx->stack[ctx->envnesting++] = ctx->list;
2236 ctx->list = PL_copy_term_ref(ctx->head);
2237 break;
2238 case TOK_MATH:
2239 if ( !PL_unify_term(ctx->head,
2240 PL_FUNCTOR, FUNCTOR_math1,
2241 PL_STRING, t->value.string) )
2242 return FALSE;
2243 break;
2244 case TOK_MATH_ENV:
2245 if ( !PL_unify_term(ctx->head,
2246 PL_FUNCTOR, FUNCTOR_mathenv1,
2247 PL_STRING, t->value.string) )
2248 return FALSE;
2249 break;
2250 case TOK_VERB:
2251 if ( !PL_unify_term(ctx->head,
2252 PL_FUNCTOR, FUNCTOR_verb2,
2253 PL_CHARS, t->context,
2254 PL_STRING, t->value.string) )
2255 return FALSE;
2256 break;
2257 case TOK_VERBATIM:
2258 if ( !PL_unify_term(ctx->head,
2259 PL_FUNCTOR, FUNCTOR_verbatim2,
2260 PL_CHARS, t->context,
2261 PL_STRING, t->value.string) )
2262 return FALSE;
2263 break;
2264 case TOK_PAR:
2265 if ( !PL_unify_term(ctx->head,
2266 PL_FUNCTOR, FUNCTOR_cmd1,
2267 PL_ATOM, ATOM_par) )
2268 return FALSE;
2269 break;
2270 case TOK_WORD:
2271 if ( !PL_unify_atom_chars(ctx->head, t->value.string) )
2272 return FALSE;
2273 break;
2274 case TOK_SPACE:
2275 case TOK_LINE:
2276 if ( !PL_unify_atom(ctx->head, ATOM_space) )
2277 return FALSE;
2278 break;
2279 }
2280
2281 out:
2282 ctx->prev_type1 = ctx->prev_type0;
2283 ctx->prev_type0 = t->type;
2284
2285 return TRUE;
2286 }
2287
2288
2289 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2290 Calls tex:prolog_function(cmd([Star], [Args]))
2291 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2292
2293 static int
cmd_prolog(Command g,Input fd,CallBack func,void * ctx)2294 cmd_prolog(Command g, Input fd, CallBack func, void *ctx)
2295 { fid_t f = PL_open_foreign_frame();
2296 term_t t0 = PL_new_term_ref();
2297 term_t alist = PL_new_term_ref();
2298 term_t modified = (g->flags & CMD_MODIFY ? ATOM_star : ATOM_minus);
2299 predicate_t p = PL_predicate("prolog_function", 1, "tex");
2300 int rc;
2301
2302 if ( g->command->flags & CMD_MODIFY )
2303 { rc = PL_unify_term(t0,
2304 PL_FUNCTOR, FUNCTOR_cmd3,
2305 PL_CHARS, g->command->name,
2306 PL_ATOM, modified,
2307 PL_TERM, alist);
2308 } else
2309 { if ( g->command->arg_count == 0 )
2310 { rc = PL_unify_term(t0,
2311 PL_FUNCTOR, FUNCTOR_cmd1,
2312 PL_CHARS, g->command->name);
2313 } else
2314 { rc = PL_unify_term(t0,
2315 PL_FUNCTOR, FUNCTOR_cmd2,
2316 PL_CHARS, g->command->name,
2317 PL_TERM, alist);
2318 }
2319 }
2320
2321 if ( rc )
2322 rc = build_arguments(alist,
2323 g->command->arg_count,
2324 g->command->arguments,
2325 g->arguments);
2326
2327 if ( !rc )
2328 return FALSE;
2329
2330 PL_call_predicate(NULL, TRUE, p, t0);
2331 PL_discard_foreign_frame(f);
2332
2333 cmd_normal(g, fd, func, ctx);
2334
2335 return TRUE;
2336 }
2337
2338
2339 foreign_t
pl_tex_emit_spaces(term_t old,term_t new)2340 pl_tex_emit_spaces(term_t old, term_t new)
2341 { if ( PL_unify_atom(old, emit_space ? ATOM_true : ATOM_false) )
2342 { atom_t a;
2343
2344 if ( PL_get_atom(new, &a) )
2345 { if ( a == ATOM_true )
2346 { emit_space = 1;
2347 return TRUE;
2348 } else
2349 { emit_space = 0;
2350 return TRUE;
2351 }
2352 }
2353 }
2354
2355 return FALSE;
2356 }
2357
2358
2359 foreign_t
pl_tex_tokens(term_t file,term_t tokens)2360 pl_tex_tokens(term_t file, term_t tokens)
2361 { char *fname;
2362
2363 if ( PL_get_chars(file, &fname, CVT_ALL) )
2364 { pl_context ctx;
2365
2366 ctx.list = tokens;
2367 ctx.head = PL_new_term_ref();
2368 ctx.envnesting = 0;
2369 ctx.prev_type0 = TOK_EOF;
2370 ctx.prev_type1 = TOK_EOF;
2371
2372 parseTeXFile(fname, build_list, &ctx);
2373 PL_succeed;
2374 }
2375
2376 PL_fail;
2377 }
2378
2379
2380 foreign_t
pl_tex_command_property(term_t name,term_t pre,term_t post)2381 pl_tex_command_property(term_t name, term_t pre, term_t post)
2382 { char *cname;
2383
2384 if ( PL_get_atom_chars(name, &cname) )
2385 { CmdDescr cmd = lookupCommand(cname);
2386
2387 if ( cmd &&
2388 PL_unify_integer(pre, cmd->pre_lines) &&
2389 PL_unify_integer(post, cmd->post_lines) )
2390 PL_succeed;
2391 }
2392
2393 PL_fail;
2394 }
2395
2396
2397 foreign_t
pl_tex_debug(term_t old,term_t new)2398 pl_tex_debug(term_t old, term_t new)
2399 { if ( PL_unify_integer(old, debuglevel) &&
2400 PL_get_integer(new, &debuglevel) )
2401 PL_succeed;
2402
2403 PL_fail;
2404 }
2405
2406
2407 foreign_t
pl_tex_tell(term_t file)2408 pl_tex_tell(term_t file)
2409 { char *name;
2410
2411 if ( PL_get_chars(file, &name, CVT_ALL) )
2412 { FILE *fd = (streq(name, "-") ? stdout : fopen(name, "w"));
2413
2414 if ( fd )
2415 { ppctx.envnesting = 0; /* separate predicate? */
2416 ppctx.last_type = TOK_EOF;
2417 ppctx.line_pos = 0;
2418 ppctx.newlines = 0;
2419 ppctx.spaces = 0;
2420 ppctx.verbatim = FALSE;
2421 ppctx.left_margin = 0;
2422 ppctx.right_margin = 72;
2423 ppctx.fd = fd;
2424
2425 PL_succeed;
2426 }
2427 }
2428
2429 PL_fail;
2430 }
2431
2432
2433 foreign_t
pl_tex_told()2434 pl_tex_told()
2435 { fflush(ppctx.fd);
2436 if ( ppctx.fd != stdout )
2437 fclose(ppctx.fd);
2438 ppctx.fd = stdout;
2439
2440 PL_succeed;
2441 }
2442
2443
2444 foreign_t
pl_tex_read_commands(term_t file)2445 pl_tex_read_commands(term_t file)
2446 { char *name;
2447
2448 if ( PL_get_chars(file, &name, CVT_ALL) &&
2449 parseCmdSpecs(name) )
2450 PL_succeed;
2451
2452 PL_fail;
2453 }
2454
2455
2456 foreign_t
pl_tex_declare(term_t spec)2457 pl_tex_declare(term_t spec)
2458 { char *s;
2459
2460 if ( PL_get_chars(spec, &s, CVT_ALL) &&
2461 parseCommandSpec("tex_declare/1", 0, s) )
2462 PL_succeed;
2463
2464 PL_fail;
2465 }
2466
2467
2468 foreign_t
pl_tex_environment_function(term_t env,term_t func)2469 pl_tex_environment_function(term_t env, term_t func)
2470 { char *s;
2471 EnvDescr e;
2472
2473 if ( PL_get_atom_chars(env, &s) &&
2474 (e = lookupEnvironment(s)) &&
2475 e->fname )
2476 return PL_unify_atom_chars(func, e->fname);
2477
2478 PL_fail;
2479 }
2480
2481
2482 foreign_t
pl_tex_command_function(term_t cmd,term_t func)2483 pl_tex_command_function(term_t cmd, term_t func)
2484 { char *s;
2485 CmdDescr c;
2486
2487 if ( PL_get_atom_chars(cmd, &s) &&
2488 (c = lookupCommand(s)) &&
2489 c->fname )
2490 return PL_unify_atom_chars(func, c->fname);
2491
2492 PL_fail;
2493 }
2494
2495
2496 /*******************************
2497 * HTML OUTPUT *
2498 *******************************/
2499
2500 static void
output_n(PPContext pp,const char * s,int l)2501 output_n(PPContext pp, const char *s, int l)
2502 { if ( l > 0 )
2503 { char buf[l+1];
2504
2505 memcpy(buf, s, l);
2506 buf[l] = EOS;
2507 output(pp, "%s", buf);
2508 }
2509 }
2510
2511
2512 static void
output_html(PPContext pp,const char * s)2513 output_html(PPContext pp, const char *s)
2514 { int c;
2515 const char *from = s;
2516
2517 for(; (c=*s); s++)
2518 { switch(c)
2519 { case '<':
2520 output_n(pp, from, s-from);
2521 from = s+1;
2522 output(pp, "%s", "<");
2523 break;
2524 case '>':
2525 output_n(pp, from, s-from);
2526 from = s+1;
2527 output(pp, "%s", ">");
2528 break;
2529 case '&':
2530 output_n(pp, from, s-from);
2531 from = s+1;
2532 output(pp, "%s", "&");
2533 break;
2534 }
2535 }
2536
2537 output_n(pp, from, s-from);
2538 }
2539
2540
2541 static void
put_html_token(Token t,void * ctx)2542 put_html_token(Token t, void *ctx)
2543 { PPContext pp = ctx;
2544
2545 switch(t->type)
2546 { case TOK_CMD:
2547 { outputBlank(pp);
2548 while(pp->newlines < t->prelines)
2549 output(pp, "\n");
2550 output(pp, "%s", t->value.string);
2551 while(pp->newlines < t->postlines)
2552 output(pp, "\n");
2553
2554 break;
2555 }
2556 case TOK_VERBATIM:
2557 { pp->verbatim = VERB_VERBATIM;
2558 output(pp, "%s", t->value.string);
2559 pp->verbatim = VERB_NORMAL;
2560 break;
2561 }
2562 case TOK_PRE:
2563 { pp->verbatim = VERB_PRE;
2564 output(pp, "%s", t->value.string);
2565 pp->verbatim = VERB_NORMAL;
2566 break;
2567 }
2568 case TOK_VERB:
2569 { outputBlank(pp);
2570 pp->verbatim = VERB_VERBATIM;
2571 output(pp, "%s", t->value.string);
2572 pp->verbatim = VERB_NORMAL;
2573
2574 break;
2575 }
2576 case TOK_SPACE:
2577 break;
2578 case TOK_LINE:
2579 break;
2580 case TOK_NOSPACEWORD:
2581 outputBlank(pp);
2582 output_html(pp, t->value.string);
2583 break;
2584 case TOK_WORD:
2585 { int pendingblank;
2586
2587 if ( pp->last_type == TOK_LINE )
2588 pp->last_type = TOK_SPACE;
2589 pendingblank = (pp->last_type == TOK_SPACE);
2590 outputBlank(pp); /* as space! */
2591 if ( pendingblank && /* no blanks in input: concatenate! */
2592 (int)strlen(t->value.string) + pp->line_pos > pp->right_margin )
2593 nl(pp);
2594 output_html(pp, t->value.string);
2595 break;
2596 }
2597 case TOK_EOF:
2598 output(pp, "\n");
2599 break;
2600 default:
2601 assert(0);
2602 }
2603
2604 pp->last_type = t->type;
2605 }
2606
2607 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2608 Control output of HTML data format:
2609
2610 html(Text, [Pre, Post]) Output a command
2611 verbatim(Text) Output verbatim text
2612 verb(Text) Output short text
2613 Atom Output plain text
2614 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2615
2616 static foreign_t
pl_put_html_token(term_t term)2617 pl_put_html_token(term_t term)
2618 { token t;
2619 char *s;
2620 atom_t atom;
2621 static int last_is_word = FALSE;
2622
2623 t.type = -1;
2624
2625 if ( PL_is_functor(term, FUNCTOR_html3) )
2626 { term_t a = PL_new_term_ref();
2627
2628 if ( PL_get_arg(1, term, a) &&
2629 PL_get_chars(a, &s, CVT_ATOMIC) &&
2630 PL_get_arg(2, term, a) &&
2631 PL_get_integer(a, &t.prelines) &&
2632 PL_get_arg(3, term, a) &&
2633 PL_get_integer(a, &t.postlines) )
2634 { t.type = TOK_CMD;
2635 t.value.string = s;
2636 }
2637 } else if ( PL_is_functor(term, FUNCTOR_html1) )
2638 { term_t a = PL_new_term_ref();
2639
2640 if ( PL_get_arg(1, term, a) &&
2641 PL_get_chars(a, &s, CVT_ATOMIC) )
2642 { t.type = TOK_CMD;
2643 t.value.string = s;
2644 t.prelines = t.postlines = 0;
2645 }
2646 } else if ( PL_is_functor(term, FUNCTOR_verbatim1) )
2647 { term_t a = PL_new_term_ref();
2648
2649 if ( PL_get_arg(1, term, a) &&
2650 PL_get_chars(a, &s, CVT_ATOMIC) )
2651 { t.type = TOK_VERBATIM;
2652 t.value.string = s;
2653 }
2654 } else if ( PL_is_functor(term, FUNCTOR_verb1) )
2655 { term_t a = PL_new_term_ref();
2656
2657 if ( PL_get_arg(1, term, a) &&
2658 PL_get_chars(a, &s, CVT_ATOMIC) )
2659 { t.type = TOK_VERB;
2660 t.value.string = s;
2661 }
2662 } else if ( PL_is_functor(term, FUNCTOR_pre1) )
2663 { term_t a = PL_new_term_ref();
2664
2665 if ( PL_get_arg(1, term, a) &&
2666 PL_get_chars(a, &s, CVT_ATOMIC) )
2667 { t.type = TOK_PRE;
2668 t.value.string = s;
2669 }
2670 } else if ( PL_is_functor(term, FUNCTOR_nospace1) )
2671 { term_t a = PL_new_term_ref();
2672
2673 if ( PL_get_arg(1, term, a) &&
2674 PL_get_chars(a, &s, CVT_ATOMIC) )
2675 { t.type = TOK_NOSPACEWORD;
2676 t.value.string = s;
2677 }
2678 } else if ( PL_get_atom(term, &atom) )
2679 { if ( atom == ATOM_space )
2680 { t.type = TOK_SPACE;
2681 t.value.string = " ";
2682 } else if ( atom == ATOM_nl )
2683 { t.type = TOK_LINE;
2684 t.value.string = "\n";
2685 } else
2686 { if ( last_is_word ) /* regenerate the space tokens */
2687 { t.type = TOK_SPACE;
2688 t.value.string = " ";
2689
2690 put_html_token(&t, &ppctx);
2691 } else
2692 last_is_word = TRUE;
2693
2694 t.type = TOK_WORD;
2695 t.value.string = (char *)PL_atom_chars(atom);
2696 }
2697 } else if ( PL_get_chars(term, &s, CVT_ALL) )
2698 { if ( last_is_word ) /* regenerate the space tokens */
2699 { t.type = TOK_SPACE;
2700 t.value.string = " ";
2701
2702 put_html_token(&t, &ppctx);
2703 } else
2704 last_is_word = TRUE;
2705
2706 t.type = TOK_WORD;
2707 t.value.string = s;
2708 }
2709
2710 if ( t.type != TOK_WORD )
2711 last_is_word = FALSE;
2712
2713 if ( t.type >= 0 )
2714 { put_html_token(&t, &ppctx);
2715 PL_succeed;
2716 }
2717
2718 return PL_warning("put_html_token/1: instantiation error");
2719 }
2720
2721 /*******************************
2722 * ERRORS *
2723 *******************************/
2724
2725 static void
error(int eno,const char * file,int line)2726 error(int eno, const char *file, int line)
2727 { fprintf(stderr,
2728 "[TeX tokeniser: %s:%d: %s]\n",
2729 file, line, tex_error_strings[eno]);
2730 switch(eno)
2731 { case ERR_RUNAWAY_ARGUMENT:
2732 case ERR_RUNAWAY_MATH:
2733 { char argstart[50];
2734 strncpy(argstart, texarg, 50);
2735 argstart[49] = EOS;
2736 fprintf(stderr, "Start: \"%s\"\n", argstart);
2737 break;
2738 }
2739 }
2740
2741 exit(1);
2742 }
2743
2744 static void
warn(int eno,const char * file,int line)2745 warn(int eno, const char *file, int line)
2746 { fprintf(stderr,
2747 "WARNING: %s:%d: %s\n",
2748 file, line, tex_error_strings[eno]);
2749 }
2750
2751
2752 extern void install_ps(void);
2753
2754 install_t
install()2755 install()
2756 { initPrologConstants();
2757
2758 PL_register_foreign("tex_tokens", 2, pl_tex_tokens, 0);
2759 PL_register_foreign("tex_command_property", 3, pl_tex_command_property, 0);
2760 PL_register_foreign("put_tex_token", 1, pl_put_tex_token, 0);
2761 PL_register_foreign("put_html_token", 1, pl_put_html_token, 0);
2762 PL_register_foreign("tex_debug", 2, pl_tex_debug, 0);
2763 PL_register_foreign("tex_tell", 1, pl_tex_tell, 0);
2764 PL_register_foreign("tex_told", 0, pl_tex_told, 0);
2765 PL_register_foreign("tex_read_commands", 1, pl_tex_read_commands, 0);
2766 PL_register_foreign("tex_declare", 1, pl_tex_declare, 0);
2767 PL_register_foreign("tex_environment_function",
2768 2, pl_tex_environment_function,
2769 0);
2770 PL_register_foreign("tex_command_function", 2, pl_tex_command_function, 0);
2771 PL_register_foreign("tex_atom_to_tokens", 2, pl_tex_atom_to_tokens, 0);
2772 PL_register_foreign("tex_emit_spaces", 2, pl_tex_emit_spaces, 0);
2773
2774 ppctx.envnesting = 0; /* separate predicate? */
2775 ppctx.last_type = TOK_EOF;
2776 ppctx.line_pos = 0;
2777 ppctx.newlines = 0;
2778 ppctx.spaces = 0;
2779 ppctx.verbatim = FALSE;
2780 ppctx.left_margin = 0;
2781 ppctx.right_margin = 72;
2782 ppctx.fd = stdout;
2783
2784 install_ps();
2785 }
2786
2787 #endif /*__SWI_PROLOG__*/
2788