1 
2 /* parser.c - parser for LaTeX code
3 
4 Copyright (C) 1998-2002 The Free Software Foundation
5 
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License
8 as published by the Free Software Foundation; either version 2
9 of the License, or (at your option) any later version.
10 
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 GNU General Public License for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 
20 This file is available from http://sourceforge.net/projects/latex2rtf/
21 
22 Authors:
23     1998-2000 Georg Lehner
24     2001-2007 Scott Prahl
25 */
26 
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <ctype.h>
31 
32 #include "main.h"
33 #include "commands.h"
34 #include "cfg.h"
35 #include "stack.h"
36 #include "utils.h"
37 #include "parser.h"
38 #include "fonts.h"
39 #include "lengths.h"
40 #include "definitions.h"
41 #include "funct1.h"
42 #include "encodings.h"
43 
44 typedef struct InputStackType {
45     char *string;
46     char *string_start;
47     FILE *file;
48     char *file_name;
49     int file_line;
50 } InputStackType;
51 
52 #define PARSER_SOURCE_MAX 100
53 #define SCAN_BUFFER_SIZE   5000
54 
55 static InputStackType g_parser_stack[PARSER_SOURCE_MAX];
56 
57 static int g_parser_depth = -1;
58 static char *g_parser_string = "stdin";
59 static FILE *g_parser_file = NULL;
60 static int g_parser_line = 1;
61 static int g_parser_include_level = 0;
62 
63 static char g_parser_currentChar;   /* Global current character */
64 static char g_parser_lastChar;
65 static char g_parser_penultimateChar;
66 static int g_parser_backslashes;
67 
68 #define TRACK_LINE_NUMBER_MAX 10
69 static int g_track_line_number_stack[TRACK_LINE_NUMBER_MAX];
70 static int g_track_line_number = -1;
71 
PushTrackLineNumber(int flag)72 void PushTrackLineNumber(int flag)
73 
74 /***************************************************************************
75  purpose:    set whether or not line numbers should be tracked in LaTeX source file
76 ****************************************************************************/
77 {
78     if (g_track_line_number >= TRACK_LINE_NUMBER_MAX)
79         diagnostics(ERROR, "scan ahead stack too large! Sorry.");
80 
81     g_track_line_number++;
82     g_track_line_number_stack[g_track_line_number] = flag;
83 }
84 
PopTrackLineNumber(void)85 void PopTrackLineNumber(void)
86 
87 /***************************************************************************
88  purpose:    restore last state of line numbers tracking in LaTeX source file
89 ****************************************************************************/
90 {
91     if (g_track_line_number < 0)
92         diagnostics(ERROR, "scan ahead stack too small! Sorry.");
93 
94     g_track_line_number--;
95 }
96 
97 /***************************************************************************
98  purpose:     returns the current line number of the text being processed
99 ****************************************************************************/
CurrentLineNumber(void)100 int CurrentLineNumber(void)
101 {
102     return g_parser_line;
103 }
104 
UpdateLineNumber(char * s)105 void UpdateLineNumber(char *s)
106 
107 /***************************************************************************
108  purpose:    advances the line number for each '\n' in s
109 ****************************************************************************/
110 {
111     if (s == NULL)
112         return;
113 
114     while (*s != '\0') {
115         if (*s == '\n')
116             g_parser_line++;
117         s++;
118     }
119 }
120 
121 /***************************************************************************
122  purpose:     returns the current file descriptor
123 ****************************************************************************/
CurrentFileDescriptor(void)124 int CurrentFileDescriptor(void)
125 {
126     int fd=0;
127     if (g_parser_file)
128         fd = fileno(g_parser_file);
129 
130     return fd;
131 }
132 
CurrentFileName(void)133 char *CurrentFileName(void)
134 
135 /***************************************************************************
136  purpose:     returns the filename of the text being processed
137 ****************************************************************************/
138 {
139     char *s = "(Not set)";
140 
141     if (g_parser_stack[g_parser_depth].file_name)
142         return g_parser_stack[g_parser_depth].file_name;
143     else
144         return s;
145 }
146 
147 /*
148     The following two routines allow parsing of multiple files and strings
149 */
150 
PushSource(const char * filename,const char * string)151 int PushSource(const char *filename, const char *string)
152 
153 /***************************************************************************
154  purpose:     change the source used by getRawTexChar() to either file or string
155               --> pass NULL for unused argument (both NULL means use stdin)
156               --> PushSource duplicates string
157 ****************************************************************************/
158 {
159     char s[50];
160     FILE *p = NULL;
161     char *name = NULL;
162     int i;
163     int line = 1;
164 
165     if (0) {
166         diagnostics(WARNING, "Before PushSource** line=%d, g_parser_depth=%d, g_parser_include_level=%d",
167           g_parser_line, g_parser_depth, g_parser_include_level);
168         for (i = 0; i <= g_parser_depth; i++) {
169             if (g_parser_stack[i].file)
170                 diagnostics(WARNING, "i=%d file   =%s, line=%d", i, g_parser_stack[i].file_name, g_parser_stack[i].file_line);
171 
172             else {
173                 strncpy_printable(s, g_parser_stack[i].string, 25);
174                 diagnostics(WARNING, "i=%d string =%s, line=%d", i, s, g_parser_stack[i].file_line);
175             }
176         }
177     }
178 
179     /* save current values for linenumber and string */
180     if (g_parser_depth >= 0) {
181         g_parser_stack[g_parser_depth].file_line = g_parser_line;
182         g_parser_stack[g_parser_depth].string = g_parser_string;
183     }
184 
185     /* first test to see if we should use stdin */
186     if ((filename == NULL || strcmp(filename, "-") == 0) && string == NULL) {
187         g_parser_include_level++;
188         g_parser_line = 1;
189         name = strdup("stdin");
190         p = stdin;
191 
192         /* if not then try to open a file */
193     } else if (filename) {
194         p = my_fopen((char *)filename, "rb");
195         if (p == NULL)
196             return 1;
197         g_parser_include_level++;
198         g_parser_line = 1;
199         name = strdup(filename);
200 
201     } else {
202         name = CurrentFileName();
203         line = CurrentLineNumber();
204     }
205 
206     g_parser_depth++;
207 
208     if (g_parser_depth >= PARSER_SOURCE_MAX)
209         diagnostics(ERROR, "More than %d PushSource() calls", (int) PARSER_SOURCE_MAX);
210 
211     g_parser_string = (string) ? strdup(string) : NULL;
212     g_parser_stack[g_parser_depth].string = g_parser_string;
213     g_parser_stack[g_parser_depth].string_start = g_parser_string;
214     g_parser_stack[g_parser_depth].file = p;
215     g_parser_stack[g_parser_depth].file_line = line;
216     g_parser_stack[g_parser_depth].file_name = name;
217     g_parser_file = p;
218     g_parser_string = g_parser_stack[g_parser_depth].string;
219 
220     if (g_parser_file) {
221         diagnostics(3, "Opening Source File %s", g_parser_stack[g_parser_depth].file_name);
222     } else {
223         diagnostics(4, "Opening Source string");
224         show_string(5, g_parser_string, "opening");
225     }
226 
227     if (0) {
228         diagnostics(WARNING, "After PushSource** line=%d, g_parser_depth=%d, g_parser_include_level=%d",
229           g_parser_line, g_parser_depth, g_parser_include_level);
230         for (i = 0; i <= g_parser_depth; i++) {
231             if (g_parser_stack[i].file)
232                 diagnostics(WARNING, "i=%d file   =%s, line=%d", i, g_parser_stack[i].file_name, g_parser_stack[i].file_line);
233 
234             else {
235                 strncpy_printable(s, g_parser_stack[i].string, 25);
236                 diagnostics(WARNING, "i=%d string =%s, line=%d", i, s, g_parser_stack[i].file_line);
237             }
238         }
239     }
240     return 0;
241 }
242 
StillSource(void)243 int StillSource(void)
244 
245 /***************************************************************************
246  purpose:     figure out if text remains to be processed
247 ****************************************************************************/
248 {
249     if (g_parser_file)
250         return (!feof(g_parser_file));
251     else
252         return (*g_parser_string != '\0');
253 }
254 
EndSource(void)255 void EndSource(void)
256 {
257     if (g_parser_file)
258         fseek(g_parser_file, 0, SEEK_END);
259     else
260         *g_parser_string = '\0';
261 
262     return;
263 }
264 
PopSource(void)265 void PopSource(void)
266 
267 /***************************************************************************
268  purpose:     return to the previous source
269 ****************************************************************************/
270 {
271     char s[50];
272     int i;
273 
274     if (g_parser_depth < 0) {
275         diagnostics(1, "Hmmm.  More PopSource() calls than PushSource() calls");
276         return;
277     }
278 
279     if (0) {
280         diagnostics(WARNING, "Before PopSource** line=%d, g_parser_depth=%d, g_parser_include_level=%d",
281           g_parser_line, g_parser_depth, g_parser_include_level);
282         for (i = 0; i <= g_parser_depth; i++) {
283             if (g_parser_stack[i].file)
284                 diagnostics(WARNING, "i=%d file   =%s, line=%d", i, g_parser_stack[i].file_name, g_parser_stack[i].file_line);
285 
286             else {
287                 strncpy_printable(s, g_parser_stack[i].string, 25);
288                 diagnostics(WARNING, "i=%d string =%s, line=%d", i, s, g_parser_stack[i].file_line);
289             }
290         }
291     }
292 
293     if (g_parser_file) {
294         diagnostics(3, "Closing Source File '%s'", g_parser_stack[g_parser_depth].file_name);
295         fclose(g_parser_file);
296         free(g_parser_stack[g_parser_depth].file_name);
297         g_parser_stack[g_parser_depth].file_name = NULL;
298         g_parser_include_level--;
299     }
300 
301     if (g_parser_string) {
302         if (strlen(g_parser_stack[g_parser_depth].string_start) < 49)
303             strcpy(s, g_parser_stack[g_parser_depth].string_start);
304         else {
305             strncpy(s, g_parser_stack[g_parser_depth].string_start, 49);
306             s[49] = '\0';
307         }
308 
309         show_string(5, s, "closing");
310         free(g_parser_stack[g_parser_depth].string_start);
311         g_parser_stack[g_parser_depth].string_start = NULL;
312     }
313 
314     g_parser_depth--;
315 
316     if (g_parser_depth >= 0) {
317         g_parser_string = g_parser_stack[g_parser_depth].string;
318         g_parser_file = g_parser_stack[g_parser_depth].file;
319     }
320 
321     if (g_parser_file && 0) {
322         g_parser_line = g_parser_stack[g_parser_depth].file_line;
323     }
324 
325     if (g_parser_file)
326         diagnostics(4, "Resuming Source File '%s'", g_parser_stack[g_parser_depth].file_name);
327     else {
328         diagnostics(5, "Resuming Source string");
329         show_string(5,g_parser_string,"resuming");
330     }
331 
332     if (0) {
333         diagnostics(WARNING, "After PopSource** line=%d, g_parser_depth=%d, g_parser_include_level=%d",
334           g_parser_line, g_parser_depth, g_parser_include_level);
335         for (i = 0; i <= g_parser_depth; i++) {
336             if (g_parser_stack[i].file)
337                 diagnostics(WARNING, "i=%d file   =%s, line=%d", i, g_parser_stack[i].file_name, g_parser_stack[i].file_line);
338 
339             else {
340                 strncpy_printable(s, g_parser_stack[i].string, 25);
341                 diagnostics(WARNING, "i=%d string =%s, line=%d", i, s, g_parser_stack[i].file_line);
342             }
343         }
344     }
345 }
346 
CmdInclude(int code)347 void CmdInclude(int code)
348 
349 /******************************************************************************
350  purpose: handles \input file, \input{file}, \include{file}
351           code == 0 for \include
352           code == 1 for \input
353  ******************************************************************************/
354 {
355     int cNext;
356     char name[100];
357     int i;
358     char *basename=NULL;
359     char *texname=NULL;
360 
361     cNext = getNonSpace();
362 
363     if (cNext == '{') {         /* \input{gnu} or \include{gnu} */
364         ungetTexChar(cNext);
365         basename = getBraceParam();
366 
367     } else {                    /* \input gnu */
368         i = 0;
369         while (cNext != '\0' && !isspace(cNext)) {
370             if (i<99) name[i] = (char) cNext;
371             i++;
372             cNext = getTexChar();
373         }
374 
375         if (i<99)
376             name[i] = '\0';
377         else {
378             name[99] = '\0';
379             diagnostics(WARNING, "\\input filename '%s' more than 100 chars, skipping",name);
380             return;
381         }
382 
383         basename = strdup(name);
384     }
385 
386     if (strstr(basename, "german.sty") != NULL) {
387         GermanMode = TRUE;
388         PushEnvironment(GERMAN_MODE);
389         free(basename);
390         return;
391 
392     } else if (strstr(basename, "french.sty") != NULL) {
393         FrenchMode = TRUE;
394         PushEnvironment(FRENCH_MODE);
395         free(basename);
396         return;
397     }
398 
399     if (basename && strstr(basename, ".tex") == NULL && strstr(basename, ".ltx") == NULL)         /* append .tex if missing */
400         texname = strdup_together(basename, ".tex");
401 
402     if (texname && PushSource(texname, NULL) == 0)            /* Try the .tex name first*/
403         diagnostics(WARNING, "Including file <%s> (.tex appended)", texname);
404 
405     else if (basename && PushSource(basename, NULL) == 0)     /* Try the basename second*/
406         diagnostics(WARNING, "Including file <%s>", basename);
407 
408     /* \include{file} always starts a new page */
409     if (code == 0)
410         PushSource(NULL, "\\pagebreak ");
411 
412     if (basename) free(basename);
413     if (texname)  free(texname);
414 }
415 
416 
417 #define CR (char) 0x0d
418 #define LF (char) 0x0a
419 
getParserDepth(void)420 int getParserDepth(void)
421 {
422     return g_parser_depth;
423 }
424 
425 
getRawTexChar(void)426 char getRawTexChar(void)
427 
428 /***************************************************************************
429  purpose:     get the next character from the input stream with minimal
430               filtering  (CRLF or CR or LF ->  \n) and '\t' -> ' '
431               it also keeps track of the line number
432               should only be used by \verb and \verbatim and getTexChar()
433 ****************************************************************************/
434 {
435     int thechar;
436 
437     if (g_parser_file) {
438         thechar = getc(g_parser_file);
439         while (thechar == EOF) {
440             if (!feof(g_parser_file))
441                 diagnostics(ERROR, "Unknown file I/O error reading latex file\n");
442             else if (g_parser_include_level > 1) {
443                 PopSource();    /* go back to parsing parent */
444                 thechar = getRawTexChar();  /* get next char from parent file */
445             } else
446                 thechar = '\0';
447         }
448         if (thechar == CR) {   /* convert CR, CRLF, or LF to \n */
449             thechar = getc(g_parser_file);
450             if (thechar != LF && !feof(g_parser_file))
451                 ungetc(thechar, g_parser_file);
452             thechar = '\n';
453         } else if (thechar == LF)
454             thechar = '\n';
455         else if (thechar == '\t')
456             thechar = ' ';
457 
458         g_parser_currentChar = (char) thechar;
459 
460     } else {
461 
462         if (g_parser_string && *g_parser_string) {
463             thechar = *g_parser_string;
464 
465             /* convert CR, CRLF, or LF to \n */
466             if (thechar == CR) {
467                 g_parser_string++;
468                 thechar = *g_parser_string;
469                 if (thechar != LF)
470                     g_parser_string--;
471                 thechar = '\n';
472             } else if (thechar == LF)
473                 thechar = '\n';
474             else if (thechar == '\t')
475                 thechar = ' ';
476 
477             g_parser_currentChar = thechar;
478             g_parser_string++;
479         }
480         else if (g_parser_depth > 15)
481         {
482              PopSource();    /* go back to parsing parent */
483              g_parser_currentChar = getRawTexChar();  /* get next char from parent file */
484         } else
485             g_parser_currentChar = '\0';
486     }
487 
488     if (g_parser_currentChar == '\n' && g_track_line_number_stack[g_track_line_number])
489         g_parser_line++;
490 
491     g_parser_penultimateChar = g_parser_lastChar;
492     g_parser_lastChar = g_parser_currentChar;
493     if (0) {
494         if (g_parser_currentChar=='\n')
495             diagnostics(5,"getRawTexChar = <\\n>");
496         else if (g_parser_currentChar=='\0')
497             diagnostics(5,"getRawTexChar = <\\0> depth=%d, files=%d", g_parser_depth, g_parser_include_level);
498         else
499             diagnostics(5,"getRawTexChar = <%2c>",g_parser_currentChar);
500     }
501     /* if (g_parser_currentChar=='\0') exit(0);*/
502     return g_parser_currentChar;
503 }
504 
505 #undef CR
506 #undef LF
507 
ungetTexChar(char c)508 void ungetTexChar(char c)
509 
510 /****************************************************************************
511 purpose: rewind the filepointer in the LaTeX-file by one
512  ****************************************************************************/
513 {
514     if (c == '\0')
515         return;
516 
517     if (g_parser_file) {
518 
519         ungetc(c, g_parser_file);
520 
521     } else {
522         g_parser_string--;
523         if (g_parser_string && *g_parser_string) {
524             *g_parser_string = c;
525         }
526     }
527 
528     if (c == '\n' && g_track_line_number_stack[g_track_line_number])
529         g_parser_line--;
530 
531     g_parser_currentChar = g_parser_lastChar;
532     g_parser_lastChar = g_parser_penultimateChar;
533     g_parser_penultimateChar = '\0';    /* no longer know what that it was */
534     g_parser_backslashes = 0;
535     diagnostics(6, "after ungetTexChar=<%c> backslashes=%d line=%ld", c, g_parser_backslashes, g_parser_line);
536 }
537 
skipBOM(int cThis)538 int skipBOM(int cThis)
539 {
540     /* UTF8 Byte Order Mark */
541     if (cThis == 0xEF) {
542     	cThis = getRawTexChar();
543     	if (cThis == 0xBB) {
544      		cThis = getRawTexChar();
545     		if (cThis == 0xBF) {
546     			CmdFontEncoding(ENCODING_UTF8);
547      			cThis = getRawTexChar();
548      			diagnostics(2, "UTF 8 BOM encountered, now assuming UTF8 input");
549      		}
550     	}
551     }
552 
553     /* UTF16 Byte Order Mark */
554     if (cThis == 0xFE) {
555     	cThis = getRawTexChar();
556     	if (cThis == 0xFF)
557      		diagnostics(2, "UTF 16 is not supported, you might try converting to UTF8");
558     }
559 
560     return cThis;
561 }
562 
getTexChar()563 char getTexChar()
564 
565 /***************************************************************************
566  purpose:     get the next character from the input stream
567               This should be the usual place to access the LaTeX file
568               It filters the input stream so that % is handled properly
569 ****************************************************************************/
570 {
571     int cThis;
572 
573     cThis = getRawTexChar();
574     cThis = skipBOM(cThis);
575 
576     if (cThis == '\\')
577         g_parser_backslashes++;
578     else
579         g_parser_backslashes = 0;
580     if (0) {
581         if (cThis=='\n')
582             diagnostics(6,"getRawTexChar = <\\n> backslashes=%d line=%ld", g_parser_backslashes, g_parser_line);
583         else if (cThis=='\0')
584             diagnostics(6,"getRawTexChar = <\\0> backslashes=%d line=%ld", g_parser_backslashes, g_parser_line);
585         else
586             diagnostics(6,"getRawTexChar = <%2c> backslashes=%d line=%ld",cThis, g_parser_backslashes, g_parser_line);
587     }
588     return cThis;
589 }
590 
skipToEOL(void)591 void skipToEOL(void)
592 
593 /****************************************************************************
594 purpose: ignores anything from inputfile until the end of line.
595          uses getRawTexChar() because % are not important
596  ****************************************************************************/
597 {
598     char cThis;
599 
600     while ((cThis = getRawTexChar()) && cThis != '\n') {
601     }
602 }
603 
getNonBlank(void)604 char getNonBlank(void)
605 
606 /***************************************************************************
607  Description: get the next non-blank character from the input stream
608 ****************************************************************************/
609 {
610     char c;
611 
612     c = getTexChar();
613     while (c == ' ' || c == '\n') {
614         c = getTexChar();
615     }
616     return c;
617 }
618 
getNonSpace(void)619 char getNonSpace(void)
620 
621 /***************************************************************************
622  Description: get the next non-space character from the input stream
623 ****************************************************************************/
624 {
625     char c;
626 
627     while ((c = getTexChar()) && c == ' ') {
628     }
629     return c;
630 }
631 
skipSpaces(void)632 void skipSpaces(void)
633 /***************************************************************************
634  Description: skip to the next non-space character from the input stream
635 ****************************************************************************/
636 {
637     char c;
638 
639     while ((c = getTexChar()) && c == ' ') {
640     }
641     ungetTexChar(c);
642 }
643 
skipWhiteSpace(void)644 void    skipWhiteSpace(void)
645 /***************************************************************************
646  Description: skip over spaces and linefeeds
647 ****************************************************************************/
648 {
649     char c=getNonBlank();
650     ungetTexChar(c);
651 }
652 
653 
getSameChar(char c)654 int getSameChar(char c)
655 
656 /***************************************************************************
657  Description: returns the number of characters that are the same as c
658 ****************************************************************************/
659 {
660     char cThis;
661     int count = -1;
662 
663     do {
664         cThis = getTexChar();
665         count++;
666     } while (cThis == c);
667 
668     ungetTexChar(cThis);
669 
670     return count;
671 }
672 
getDelimitedText(char left,char right,int raw)673 char *getDelimitedText(char left, char right, int raw)
674 
675 /******************************************************************************
676   purpose: general scanning routine that allocates and returns a string
677            that is between "left" and "right" that accounts for escaping by '\'
678 
679            Example for getDelimitedText('{','}',TRUE)
680 
681            "the \{ is shown {\it by} a\\} blah blah" ----> "the \{ is shown {\it by} a\\"
682 
683            Note the missing opening brace in the example above
684 
685            It turns out that for getDelimitedText('[',']',TRUE)
686 
687            "the \] is shown {]} a\\] blah blah blah" ----> "the \] is shown {]} a\\"
688 
689  ******************************************************************************/
690 {
691     char buffer[SCAN_BUFFER_SIZE];
692     int size = -1;
693     int lefts_needed = 1;
694     int brace_level = 0;
695     int last_char_was_backslash = FALSE;
696 
697     while (lefts_needed && size < SCAN_BUFFER_SIZE-1) {
698         size++;
699         buffer[size] = (raw) ? getRawTexChar() : getTexChar();
700 
701         if (last_char_was_backslash)  {            /* ignore \{ etc.           */
702             if (buffer[size] == '\\') {            /* two backslashes in a row */
703                 last_char_was_backslash = FALSE;   /* next char is not special */
704                 continue;
705             }
706         }
707 
708         else if (buffer[size] == right && brace_level == 0)
709         	lefts_needed--;
710 
711         else if (buffer[size] == '{')
712         	brace_level++;
713 
714         else if (buffer[size] == '}')
715         	brace_level--;
716 
717         last_char_was_backslash = (buffer[size] == '\\') ? TRUE : FALSE;
718     }
719 
720     buffer[size] = '\0';        /* overwrite final delimeter */
721     if (size == SCAN_BUFFER_SIZE-1) {
722         diagnostics(WARNING, "Could not find closing '%c' in %d chars", right, SCAN_BUFFER_SIZE);
723         return strdup(" NOT FOUND ");
724     }
725 
726     return strdup(buffer);
727 }
728 
parseBrace(void)729 void parseBrace(void)
730 
731 /****************************************************************************
732   Description: Skip text to balancing close brace
733  ****************************************************************************/
734 {
735     char *s = getDelimitedText('{', '}', FALSE);
736 
737     free(s);
738 }
739 
parseBracket(void)740 static void parseBracket(void)
741 
742 /****************************************************************************
743   Description: Skip text to balancing close bracket
744  ****************************************************************************/
745 {
746     char *s = getDelimitedText('[', ']', FALSE);
747 
748     free(s);
749 }
750 
CmdIgnoreParameter(int code)751 void CmdIgnoreParameter(int code)
752 
753 /****************************************************************************
754    Description: Ignore the parameters of a command
755    Example    : CmdIgnoreParameter(21) for \command[opt1]{reg1}{reg2}
756 
757    code is a decimal # of the form "op" where `o' is the number of
758    optional parameters (0-9) and `p' is the # of required parameters.
759 
760    The specified number of parameters is ignored.  The order of the parameters
761    in the LaTeX file does not matter.
762 ****************************************************************************/
763 {
764     int optParmCount = code / 10;
765     int regParmCount = code % 10;
766     char cThis;
767 
768     diagnostics(4, "CmdIgnoreParameter [%d] {%d}", optParmCount, regParmCount);
769 
770     while (regParmCount) {
771         cThis = getNonBlank();
772         switch (cThis) {
773             case '{':
774 
775                 regParmCount--;
776                 parseBrace();
777                 break;
778 
779             case '[':
780 
781                 optParmCount--;
782                 parseBracket();
783                 break;
784 
785             default:
786                 diagnostics(WARNING, "Ignored command missing {} expected %d - found %d", code % 10,
787                   code % 10 - regParmCount);
788                 ungetTexChar(cThis);
789                 return;
790         }
791     }
792 
793     /* Check for trailing optional parameter e.g., \item[label] */
794 
795     if (optParmCount > 0) {
796         cThis = getNonSpace();
797         if (cThis == '[')
798             parseBracket();
799         else {
800             ungetTexChar(cThis);
801             return;
802         }
803     }
804     return;
805 }
806 
getSimpleCommand(void)807 char *getSimpleCommand(void)
808 
809 /**************************************************************************
810      purpose: returns a simple command e.g., \alpha\beta will return "\beta"
811                                                   ^
812                                               \! will return \!
813  **************************************************************************/
814 {
815     char buffer[128];
816     int size;
817 
818     buffer[0] = getTexChar();
819 
820     if (buffer[0] != '\\')
821         return NULL;
822 
823     buffer[1] = getTexChar();
824 
825     for (size = 2; size < 127; size++) {
826         buffer[size] = getRawTexChar(); /* \t \r '%' all end command */
827 
828         if (!isalpha((int) buffer[size])) {
829             ungetTexChar(buffer[size]);
830             break;
831         }
832     }
833 
834     buffer[size] = '\0';
835     if (size == 127) {
836         diagnostics(WARNING, "Misplaced brace.");
837         diagnostics(ERROR, "Cannot find close brace in 127 characters");
838     }
839 
840     diagnostics(5, "getSimpleCommand result <%s>", buffer);
841     return strdup(buffer);
842 }
843 
getBracketParam(void)844 char *getBracketParam(void)
845 
846 /******************************************************************************
847   purpose: return bracketed parameter
848 
849   \item[1]   --->  "1"        \item[]   --->  ""        \item the  --->  NULL
850        ^                           ^                         ^
851   \item [1]  --->  "1"        \item []  --->  ""        \item  the --->  NULL
852        ^                           ^                         ^
853  ******************************************************************************/
854 {
855     char c, *text;
856 
857     c = getNonBlank();
858     PushTrackLineNumber(FALSE);
859 
860     if (c == '[') {
861         text = getDelimitedText('[', ']', FALSE);
862         diagnostics(6, "getBracketParam [%s]", text);
863 
864     } else {
865         ungetTexChar(c);
866         text = NULL;
867         diagnostics(6, "getBracketParam []");
868     }
869 
870     PopTrackLineNumber();
871     return text;
872 }
873 
getBraceParam0(int raw_flag)874 static char *getBraceParam0(int raw_flag)
875 
876 /**************************************************************************
877      purpose: allocates and returns the next parameter in the LaTeX file
878               Examples:  (^ indicates the current file position)
879 
880      \alpha\beta   --->  "\beta"             \bar \alpha   --->  "\alpha"
881            ^                                     ^
882      \bar{text}    --->  "text"              \bar text     --->  "t"
883          ^                                       ^
884     _\alpha        ---> "\alpha"             _{\alpha}     ---> "\alpha"
885      ^                                        ^
886     _2             ---> "2"                  _{2}          ---> "2"
887      ^                                        ^
888  **************************************************************************/
889 {
890     char s[2], *text;
891 
892     s[0] = getNonSpace();       /* skip spaces and one possible newline */
893     if (s[0] == '\n')
894         s[0] = getNonSpace();
895 
896     PushTrackLineNumber(FALSE);
897 
898     if (s[0] == '\\') {
899         ungetTexChar(s[0]);
900         text = getSimpleCommand();
901 
902     } else if (s[0] == '{')
903         text = getDelimitedText('{', '}', raw_flag);
904 
905     else {
906         s[1] = '\0';
907         text = strdup(s);
908     }
909 
910     PopTrackLineNumber();
911     diagnostics(6, "Leaving getBraceParam {%s}", text);
912     return text;
913 }
914 
getBraceParam(void)915 char *getBraceParam(void)
916 {
917     return getBraceParam0(FALSE);
918 }
919 
getBraceRawParam(void)920 char *getBraceRawParam(void)
921 {
922     return getBraceParam0(TRUE);
923 }
924 
ignoreBraceParam(void)925 void ignoreBraceParam(void) {
926     char *p = getBraceParam();
927     if (NULL != p) free(p);
928 }
929 
ignoreBracketParam(void)930 void  ignoreBracketParam(void) {
931     char *p = getBracketParam();
932     if (NULL != p) free(p);
933 }
934 
935 
getLeftRightParam(void)936 char *getLeftRightParam(void)
937 
938 /**************************************************************************
939      purpose: get text between \left ... \right
940  **************************************************************************/
941 {
942     char text[5000], s, *command;
943     int i = 0;
944     int lrdepth = 1;
945 
946     text[0] = '\0';
947 
948     for (;;) {
949         s = getTexChar();
950         if (s == '\\') {
951             ungetTexChar(s);
952             command = getSimpleCommand();
953             if (strcmp(command, "\\right") == 0) {
954                 lrdepth--;
955                 if (lrdepth == 0) {
956                     free(command);
957                     return strdup(text);
958                 }
959             }
960             my_strlcat(text + i, command, 5000);
961             i += (int) strlen(command);
962             if (i > 4950)
963                 diagnostics(ERROR, "Contents of \\left .. \\right too large.");
964             if (strcmp(command, "\\left") == 0)
965                 lrdepth++;
966             free(command);
967         } else {
968             text[i] = s;
969             i++;
970             text[i] = '\0';
971         }
972     }
973     return NULL;
974 }
975 
976 
977 
978 
getTexUntil(char * target,int raw)979 char *getTexUntil(char *target, int raw)
980 
981 /**************************************************************************
982      purpose: returns the portion of the file to the beginning of target
983      returns: NULL if not found
984  **************************************************************************/
985 {
986     enum { BUFFSIZE = 200000 };
987     char *s;
988     char buffer[BUFFSIZE];
989     int last_i = -1;
990     int i = 0;                  /* size of string that has been read */
991     int j = 0;               /* number of found characters */
992     int end_of_file_reached = FALSE;
993     int len = (int) strlen(target);
994 
995     PushTrackLineNumber(FALSE);
996 
997     diagnostics(5, "getTexUntil target = <%s> raw_search = %d ", target, raw);
998 
999     while (j < len && i < BUFFSIZE) {
1000 
1001         if (i > last_i) {
1002             buffer[i] = (raw) ? getRawTexChar() : getTexChar();
1003             last_i = i;
1004             if (buffer[i] != '\n')
1005                 diagnostics(7, "next char = <%c>, %d, %d, %d", buffer[i], i, j, last_i);
1006             else
1007                 diagnostics(7, "next char = <\\n>");
1008 
1009         }
1010 
1011         if (buffer[i] == '\0') {
1012             end_of_file_reached = TRUE;
1013             diagnostics(7, "end of file reached");
1014             break;
1015         }
1016 
1017         if (buffer[i] != target[j]) {
1018             if (j > 0) {        /* false start, put back what was found */
1019                 diagnostics(8, "failed to match target[%d]=<%c> != buffer[%d]=<%c>", j, target[j], i, buffer[i]);
1020                 i -= j;
1021                 j = 0;
1022             }
1023         } else
1024             j++;
1025 
1026         i++;
1027     }
1028 
1029     if (i == BUFFSIZE)
1030         diagnostics(ERROR, "Could not find <%s> in %d characters \n\
1031         Recompile with larger BUFFSIZE in getTexUntil() in parser.c", target, BUFFSIZE);
1032 
1033     if (!end_of_file_reached)   /* do not include target in returned string */
1034         buffer[i - len] = '\0';
1035     else {
1036         diagnostics(ERROR, "Could not find <%s>", target);
1037         exit(1);
1038     }
1039 
1040     PopTrackLineNumber();
1041 
1042     diagnostics(6, "buffer size =[%d], actual=[%d]", strlen(buffer), i - len);
1043 
1044     s = strdup(buffer);
1045     diagnostics(6, "getTexUntil result = %s", s);
1046     return s;
1047 }
1048 
getSpacedTexUntil(char * target,int raw)1049 char *getSpacedTexUntil(char *target, int raw)
1050 
1051 /**************************************************************************
1052      purpose: returns the portion of the file to the beginning of target
1053 
1054      getSpacedTexUntil("\begin|{|document|}")
1055 
1056      will match the regular expression "\\begin *{ *document *}"
1057  **************************************************************************/
1058 {
1059     enum { BUFFSIZE = 16000 };
1060     char buffer[BUFFSIZE];
1061     char *s;
1062     int buffer_pos, target_pos, target_len, max_buffer_pos, start_pos;
1063 
1064     PushTrackLineNumber(FALSE);
1065 
1066     diagnostics(5, "getSpacedTexUntil target = <%s> raw_search = %d ", target, raw);
1067 
1068     buffer_pos = 0;
1069     target_pos = 0;
1070     start_pos  = 0;
1071     target_len = (int) strlen(target);
1072     max_buffer_pos = -1;
1073 
1074     do {
1075 
1076         /* the next character might already be in the buffer */
1077         if (buffer_pos > max_buffer_pos) {
1078             buffer[buffer_pos] = (raw) ? getRawTexChar() : getTexChar();
1079             max_buffer_pos = buffer_pos;
1080         }
1081 
1082         if (buffer[buffer_pos] == '\0') {
1083             diagnostics(ERROR, "end of file reached before '%s' was found",target);
1084         }
1085 
1086         if (buffer[buffer_pos] == target[target_pos]) {
1087             if (target_pos == 0)
1088                 start_pos = buffer_pos;
1089             target_pos++;
1090         }
1091 
1092         /* does not match next character in target ... */
1093         else if (target[target_pos] != '|') {
1094 
1095             if (target_pos > 0)        /* false start, put back what was found */
1096                 buffer_pos = start_pos;
1097             target_pos = 0;
1098 
1099         /* next character in target is '|' */
1100         } else if (buffer[buffer_pos] != ' ' && buffer[buffer_pos] != '\n') {
1101 
1102             /* next char is non-blank ... either match or reset */
1103             target_pos++;  /* move past wildcard */
1104             if (buffer[buffer_pos] == target[target_pos]) {
1105                 target_pos++;
1106             } else {
1107                 buffer_pos = start_pos;
1108                 target_pos = 0;
1109             }
1110         }
1111 
1112         if (0) {
1113         if (buffer[buffer_pos] != '\n')
1114             diagnostics(WARNING, "this char = <%c>, %d, %d, max=%d", buffer[buffer_pos], buffer_pos, target_pos, max_buffer_pos);
1115         else
1116             diagnostics(WARNING, "this char = <\\n>, %d, %d, max=%d", buffer[buffer_pos], buffer_pos, target_pos, max_buffer_pos);
1117         }
1118 
1119         buffer_pos++;
1120 
1121 
1122         if (buffer_pos == BUFFSIZE)
1123             diagnostics(ERROR, "Could not find <%s> in %d characters \n\
1124             Recompile with larger BUFFSIZE in getTexUntil() in parser.c", target, BUFFSIZE);
1125 
1126     } while (target_pos < target_len);
1127 
1128     /* terminate buffer */
1129     buffer[start_pos] = '\0';
1130 
1131     PopTrackLineNumber();
1132 
1133     s = strdup(buffer);
1134     diagnostics(6, "getSpacedTexUntil result = %s", s);
1135     return s;
1136 }
1137 
getDimension(void)1138 int getDimension(void)
1139 
1140 /**************************************************************************
1141      purpose: reads a TeX dimension and returns size it twips
1142           eg: 3 in, -.013mm, 29 pc, + 42,1 dd, 1234sp
1143 **************************************************************************/
1144 {
1145     char cThis, buffer[20];
1146     int i = 0;
1147     float num;
1148 
1149     skipSpaces();
1150 
1151 /* obtain optional sign */
1152     cThis = getTexChar();
1153 
1154 /* skip "to" */
1155     if (cThis == 't') {
1156         getTexChar();
1157         cThis = getTexChar();
1158     }
1159 
1160 /* skip "spread" */
1161     if (cThis == 's') {
1162         getTexChar();
1163         getTexChar();
1164         getTexChar();
1165         getTexChar();
1166         getTexChar();
1167         cThis = getTexChar();
1168     }
1169 
1170     if (cThis == '-' || cThis == '+') {
1171         buffer[i++] = cThis;
1172         skipSpaces();
1173         cThis = getTexChar();
1174     }
1175 
1176 /* obtain number */
1177     if (cThis == '\\')
1178         buffer[i++] = '1';
1179     else {
1180         while (i < 19 && (isdigit((int) cThis) || cThis == '.' || cThis == ',')) {
1181             if (cThis == ',')
1182                 cThis = '.';
1183             buffer[i++] = cThis;
1184             cThis = getTexChar();
1185         }
1186     }
1187     ungetTexChar(cThis);
1188     buffer[i] = '\0';
1189     diagnostics(4, "getDimension() raw number is <%s>", buffer);
1190 
1191     if (i == 19 || sscanf(buffer, "%f", &num) != 1) {
1192         diagnostics(WARNING, "Screwy number in TeX dimension");
1193         diagnostics(WARNING, "getDimension() number is <%s>", buffer);
1194         return 0;
1195     }
1196 
1197 /*  num *= 2;                    convert pts to twips */
1198 
1199 /* obtain unit of measure */
1200     skipSpaces();
1201     buffer[0] = tolower((int) getTexChar());
1202 
1203     if (buffer[0] == '\0')  /* no units specified ... assume points */
1204         return (int) (num * 20);
1205 
1206 /* skip "true" */
1207     if (buffer[0] == 't') {
1208         getTexChar();
1209         getTexChar();
1210         getTexChar();
1211         skipSpaces();
1212         buffer[0] = tolower((int) getTexChar());
1213     }
1214 
1215     if (buffer[0] != '\\') {
1216         buffer[1] = tolower((int) getTexChar());
1217         buffer[2] = '\0';
1218 
1219         diagnostics(4, "getDimension() dimension is <%s>", buffer);
1220         if (strstr(buffer, "pt"))
1221             return (int) (num * 20);
1222         else if (strstr(buffer, "pc"))
1223             return (int) (num * 12 * 20);
1224         else if (strstr(buffer, "in"))
1225             return (int) (num * 72.27 * 20);
1226         else if (strstr(buffer, "bp"))
1227             return (int) (num * 72.27 / 72 * 20);
1228         else if (strstr(buffer, "cm"))
1229             return (int) (num * 72.27 / 2.54 * 20);
1230         else if (strstr(buffer, "mm"))
1231             return (int) (num * 72.27 / 25.4 * 20);
1232         else if (strstr(buffer, "dd"))
1233             return (int) (num * 1238.0 / 1157.0 * 20);
1234         else if (strstr(buffer, "dd"))
1235             return (int) (num * 1238.0 / 1157 * 20);
1236         else if (strstr(buffer, "cc"))
1237             return (int) (num * 1238.0 / 1157.0 * 12.0 * 20);
1238         else if (strstr(buffer, "sp"))
1239             return (int) (num / 65536.0 * 20);
1240         else if (strstr(buffer, "ex"))
1241             return (int) (num * CurrentFontSize() * 0.5);
1242         else if (strstr(buffer, "em"))
1243             return (int) (num * CurrentFontSize());
1244         else if (strstr(buffer, "in"))
1245             return (int) (num * 72.27 * 20);
1246         else {
1247             ungetTexChar(buffer[1]);
1248             ungetTexChar(buffer[0]);
1249             return (int) num;
1250         }
1251     } else {
1252         char *s, *t;
1253 
1254         ungetTexChar(buffer[0]);
1255         s = getSimpleCommand();
1256         t = s + 1;              /* skip initial backslash */
1257         diagnostics(4, "getDimension() dimension is <%s>", t);
1258         num *= getLength(t);
1259         free(s);
1260         return (int) num;
1261     }
1262 
1263 }
1264 
1265 /***************************************************************************
1266  purpose: return twips for \\, \\[1pt], \\*[1pt]
1267  ***************************************************************************/
getSlashSlashParam(void)1268 int getSlashSlashParam(void)
1269 {
1270     char cThis, *vertical_space;
1271     int height = 0;
1272 
1273     cThis = getTexChar();
1274     if (cThis != '*')
1275         ungetTexChar(cThis);
1276 
1277     vertical_space = getBracketParam();
1278     if (vertical_space) {
1279         height = getStringDimension(vertical_space);
1280         free(vertical_space);
1281     }
1282 
1283     return height;
1284 }
1285 
1286