1
2 /* parser.c - parser for LaTeX code
3
4 Copyright (C) 1998-2002 The Free Software Foundation
5
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License
8 as published by the Free Software Foundation; either version 2
9 of the License, or (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19
20 This file is available from http://sourceforge.net/projects/latex2rtf/
21
22 Authors:
23 1998-2000 Georg Lehner
24 2001-2007 Scott Prahl
25 */
26
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <ctype.h>
31
32 #include "main.h"
33 #include "commands.h"
34 #include "cfg.h"
35 #include "stack.h"
36 #include "utils.h"
37 #include "parser.h"
38 #include "fonts.h"
39 #include "lengths.h"
40 #include "definitions.h"
41 #include "funct1.h"
42 #include "encodings.h"
43
44 typedef struct InputStackType {
45 char *string;
46 char *string_start;
47 FILE *file;
48 char *file_name;
49 int file_line;
50 } InputStackType;
51
52 #define PARSER_SOURCE_MAX 100
53 #define SCAN_BUFFER_SIZE 5000
54
55 static InputStackType g_parser_stack[PARSER_SOURCE_MAX];
56
57 static int g_parser_depth = -1;
58 static char *g_parser_string = "stdin";
59 static FILE *g_parser_file = NULL;
60 static int g_parser_line = 1;
61 static int g_parser_include_level = 0;
62
63 static char g_parser_currentChar; /* Global current character */
64 static char g_parser_lastChar;
65 static char g_parser_penultimateChar;
66 static int g_parser_backslashes;
67
68 #define TRACK_LINE_NUMBER_MAX 10
69 static int g_track_line_number_stack[TRACK_LINE_NUMBER_MAX];
70 static int g_track_line_number = -1;
71
PushTrackLineNumber(int flag)72 void PushTrackLineNumber(int flag)
73
74 /***************************************************************************
75 purpose: set whether or not line numbers should be tracked in LaTeX source file
76 ****************************************************************************/
77 {
78 if (g_track_line_number >= TRACK_LINE_NUMBER_MAX)
79 diagnostics(ERROR, "scan ahead stack too large! Sorry.");
80
81 g_track_line_number++;
82 g_track_line_number_stack[g_track_line_number] = flag;
83 }
84
PopTrackLineNumber(void)85 void PopTrackLineNumber(void)
86
87 /***************************************************************************
88 purpose: restore last state of line numbers tracking in LaTeX source file
89 ****************************************************************************/
90 {
91 if (g_track_line_number < 0)
92 diagnostics(ERROR, "scan ahead stack too small! Sorry.");
93
94 g_track_line_number--;
95 }
96
97 /***************************************************************************
98 purpose: returns the current line number of the text being processed
99 ****************************************************************************/
CurrentLineNumber(void)100 int CurrentLineNumber(void)
101 {
102 return g_parser_line;
103 }
104
UpdateLineNumber(char * s)105 void UpdateLineNumber(char *s)
106
107 /***************************************************************************
108 purpose: advances the line number for each '\n' in s
109 ****************************************************************************/
110 {
111 if (s == NULL)
112 return;
113
114 while (*s != '\0') {
115 if (*s == '\n')
116 g_parser_line++;
117 s++;
118 }
119 }
120
121 /***************************************************************************
122 purpose: returns the current file descriptor
123 ****************************************************************************/
CurrentFileDescriptor(void)124 int CurrentFileDescriptor(void)
125 {
126 int fd=0;
127 if (g_parser_file)
128 fd = fileno(g_parser_file);
129
130 return fd;
131 }
132
CurrentFileName(void)133 char *CurrentFileName(void)
134
135 /***************************************************************************
136 purpose: returns the filename of the text being processed
137 ****************************************************************************/
138 {
139 char *s = "(Not set)";
140
141 if (g_parser_stack[g_parser_depth].file_name)
142 return g_parser_stack[g_parser_depth].file_name;
143 else
144 return s;
145 }
146
147 /*
148 The following two routines allow parsing of multiple files and strings
149 */
150
PushSource(const char * filename,const char * string)151 int PushSource(const char *filename, const char *string)
152
153 /***************************************************************************
154 purpose: change the source used by getRawTexChar() to either file or string
155 --> pass NULL for unused argument (both NULL means use stdin)
156 --> PushSource duplicates string
157 ****************************************************************************/
158 {
159 char s[50];
160 FILE *p = NULL;
161 char *name = NULL;
162 int i;
163 int line = 1;
164
165 if (0) {
166 diagnostics(WARNING, "Before PushSource** line=%d, g_parser_depth=%d, g_parser_include_level=%d",
167 g_parser_line, g_parser_depth, g_parser_include_level);
168 for (i = 0; i <= g_parser_depth; i++) {
169 if (g_parser_stack[i].file)
170 diagnostics(WARNING, "i=%d file =%s, line=%d", i, g_parser_stack[i].file_name, g_parser_stack[i].file_line);
171
172 else {
173 strncpy_printable(s, g_parser_stack[i].string, 25);
174 diagnostics(WARNING, "i=%d string =%s, line=%d", i, s, g_parser_stack[i].file_line);
175 }
176 }
177 }
178
179 /* save current values for linenumber and string */
180 if (g_parser_depth >= 0) {
181 g_parser_stack[g_parser_depth].file_line = g_parser_line;
182 g_parser_stack[g_parser_depth].string = g_parser_string;
183 }
184
185 /* first test to see if we should use stdin */
186 if ((filename == NULL || strcmp(filename, "-") == 0) && string == NULL) {
187 g_parser_include_level++;
188 g_parser_line = 1;
189 name = strdup("stdin");
190 p = stdin;
191
192 /* if not then try to open a file */
193 } else if (filename) {
194 p = my_fopen((char *)filename, "rb");
195 if (p == NULL)
196 return 1;
197 g_parser_include_level++;
198 g_parser_line = 1;
199 name = strdup(filename);
200
201 } else {
202 name = CurrentFileName();
203 line = CurrentLineNumber();
204 }
205
206 g_parser_depth++;
207
208 if (g_parser_depth >= PARSER_SOURCE_MAX)
209 diagnostics(ERROR, "More than %d PushSource() calls", (int) PARSER_SOURCE_MAX);
210
211 g_parser_string = (string) ? strdup(string) : NULL;
212 g_parser_stack[g_parser_depth].string = g_parser_string;
213 g_parser_stack[g_parser_depth].string_start = g_parser_string;
214 g_parser_stack[g_parser_depth].file = p;
215 g_parser_stack[g_parser_depth].file_line = line;
216 g_parser_stack[g_parser_depth].file_name = name;
217 g_parser_file = p;
218 g_parser_string = g_parser_stack[g_parser_depth].string;
219
220 if (g_parser_file) {
221 diagnostics(3, "Opening Source File %s", g_parser_stack[g_parser_depth].file_name);
222 } else {
223 diagnostics(4, "Opening Source string");
224 show_string(5, g_parser_string, "opening");
225 }
226
227 if (0) {
228 diagnostics(WARNING, "After PushSource** line=%d, g_parser_depth=%d, g_parser_include_level=%d",
229 g_parser_line, g_parser_depth, g_parser_include_level);
230 for (i = 0; i <= g_parser_depth; i++) {
231 if (g_parser_stack[i].file)
232 diagnostics(WARNING, "i=%d file =%s, line=%d", i, g_parser_stack[i].file_name, g_parser_stack[i].file_line);
233
234 else {
235 strncpy_printable(s, g_parser_stack[i].string, 25);
236 diagnostics(WARNING, "i=%d string =%s, line=%d", i, s, g_parser_stack[i].file_line);
237 }
238 }
239 }
240 return 0;
241 }
242
StillSource(void)243 int StillSource(void)
244
245 /***************************************************************************
246 purpose: figure out if text remains to be processed
247 ****************************************************************************/
248 {
249 if (g_parser_file)
250 return (!feof(g_parser_file));
251 else
252 return (*g_parser_string != '\0');
253 }
254
EndSource(void)255 void EndSource(void)
256 {
257 if (g_parser_file)
258 fseek(g_parser_file, 0, SEEK_END);
259 else
260 *g_parser_string = '\0';
261
262 return;
263 }
264
PopSource(void)265 void PopSource(void)
266
267 /***************************************************************************
268 purpose: return to the previous source
269 ****************************************************************************/
270 {
271 char s[50];
272 int i;
273
274 if (g_parser_depth < 0) {
275 diagnostics(1, "Hmmm. More PopSource() calls than PushSource() calls");
276 return;
277 }
278
279 if (0) {
280 diagnostics(WARNING, "Before PopSource** line=%d, g_parser_depth=%d, g_parser_include_level=%d",
281 g_parser_line, g_parser_depth, g_parser_include_level);
282 for (i = 0; i <= g_parser_depth; i++) {
283 if (g_parser_stack[i].file)
284 diagnostics(WARNING, "i=%d file =%s, line=%d", i, g_parser_stack[i].file_name, g_parser_stack[i].file_line);
285
286 else {
287 strncpy_printable(s, g_parser_stack[i].string, 25);
288 diagnostics(WARNING, "i=%d string =%s, line=%d", i, s, g_parser_stack[i].file_line);
289 }
290 }
291 }
292
293 if (g_parser_file) {
294 diagnostics(3, "Closing Source File '%s'", g_parser_stack[g_parser_depth].file_name);
295 fclose(g_parser_file);
296 free(g_parser_stack[g_parser_depth].file_name);
297 g_parser_stack[g_parser_depth].file_name = NULL;
298 g_parser_include_level--;
299 }
300
301 if (g_parser_string) {
302 if (strlen(g_parser_stack[g_parser_depth].string_start) < 49)
303 strcpy(s, g_parser_stack[g_parser_depth].string_start);
304 else {
305 strncpy(s, g_parser_stack[g_parser_depth].string_start, 49);
306 s[49] = '\0';
307 }
308
309 show_string(5, s, "closing");
310 free(g_parser_stack[g_parser_depth].string_start);
311 g_parser_stack[g_parser_depth].string_start = NULL;
312 }
313
314 g_parser_depth--;
315
316 if (g_parser_depth >= 0) {
317 g_parser_string = g_parser_stack[g_parser_depth].string;
318 g_parser_file = g_parser_stack[g_parser_depth].file;
319 }
320
321 if (g_parser_file && 0) {
322 g_parser_line = g_parser_stack[g_parser_depth].file_line;
323 }
324
325 if (g_parser_file)
326 diagnostics(4, "Resuming Source File '%s'", g_parser_stack[g_parser_depth].file_name);
327 else {
328 diagnostics(5, "Resuming Source string");
329 show_string(5,g_parser_string,"resuming");
330 }
331
332 if (0) {
333 diagnostics(WARNING, "After PopSource** line=%d, g_parser_depth=%d, g_parser_include_level=%d",
334 g_parser_line, g_parser_depth, g_parser_include_level);
335 for (i = 0; i <= g_parser_depth; i++) {
336 if (g_parser_stack[i].file)
337 diagnostics(WARNING, "i=%d file =%s, line=%d", i, g_parser_stack[i].file_name, g_parser_stack[i].file_line);
338
339 else {
340 strncpy_printable(s, g_parser_stack[i].string, 25);
341 diagnostics(WARNING, "i=%d string =%s, line=%d", i, s, g_parser_stack[i].file_line);
342 }
343 }
344 }
345 }
346
CmdInclude(int code)347 void CmdInclude(int code)
348
349 /******************************************************************************
350 purpose: handles \input file, \input{file}, \include{file}
351 code == 0 for \include
352 code == 1 for \input
353 ******************************************************************************/
354 {
355 int cNext;
356 char name[100];
357 int i;
358 char *basename=NULL;
359 char *texname=NULL;
360
361 cNext = getNonSpace();
362
363 if (cNext == '{') { /* \input{gnu} or \include{gnu} */
364 ungetTexChar(cNext);
365 basename = getBraceParam();
366
367 } else { /* \input gnu */
368 i = 0;
369 while (cNext != '\0' && !isspace(cNext)) {
370 if (i<99) name[i] = (char) cNext;
371 i++;
372 cNext = getTexChar();
373 }
374
375 if (i<99)
376 name[i] = '\0';
377 else {
378 name[99] = '\0';
379 diagnostics(WARNING, "\\input filename '%s' more than 100 chars, skipping",name);
380 return;
381 }
382
383 basename = strdup(name);
384 }
385
386 if (strstr(basename, "german.sty") != NULL) {
387 GermanMode = TRUE;
388 PushEnvironment(GERMAN_MODE);
389 free(basename);
390 return;
391
392 } else if (strstr(basename, "french.sty") != NULL) {
393 FrenchMode = TRUE;
394 PushEnvironment(FRENCH_MODE);
395 free(basename);
396 return;
397 }
398
399 if (basename && strstr(basename, ".tex") == NULL && strstr(basename, ".ltx") == NULL) /* append .tex if missing */
400 texname = strdup_together(basename, ".tex");
401
402 if (texname && PushSource(texname, NULL) == 0) /* Try the .tex name first*/
403 diagnostics(WARNING, "Including file <%s> (.tex appended)", texname);
404
405 else if (basename && PushSource(basename, NULL) == 0) /* Try the basename second*/
406 diagnostics(WARNING, "Including file <%s>", basename);
407
408 /* \include{file} always starts a new page */
409 if (code == 0)
410 PushSource(NULL, "\\pagebreak ");
411
412 if (basename) free(basename);
413 if (texname) free(texname);
414 }
415
416
417 #define CR (char) 0x0d
418 #define LF (char) 0x0a
419
getParserDepth(void)420 int getParserDepth(void)
421 {
422 return g_parser_depth;
423 }
424
425
getRawTexChar(void)426 char getRawTexChar(void)
427
428 /***************************************************************************
429 purpose: get the next character from the input stream with minimal
430 filtering (CRLF or CR or LF -> \n) and '\t' -> ' '
431 it also keeps track of the line number
432 should only be used by \verb and \verbatim and getTexChar()
433 ****************************************************************************/
434 {
435 int thechar;
436
437 if (g_parser_file) {
438 thechar = getc(g_parser_file);
439 while (thechar == EOF) {
440 if (!feof(g_parser_file))
441 diagnostics(ERROR, "Unknown file I/O error reading latex file\n");
442 else if (g_parser_include_level > 1) {
443 PopSource(); /* go back to parsing parent */
444 thechar = getRawTexChar(); /* get next char from parent file */
445 } else
446 thechar = '\0';
447 }
448 if (thechar == CR) { /* convert CR, CRLF, or LF to \n */
449 thechar = getc(g_parser_file);
450 if (thechar != LF && !feof(g_parser_file))
451 ungetc(thechar, g_parser_file);
452 thechar = '\n';
453 } else if (thechar == LF)
454 thechar = '\n';
455 else if (thechar == '\t')
456 thechar = ' ';
457
458 g_parser_currentChar = (char) thechar;
459
460 } else {
461
462 if (g_parser_string && *g_parser_string) {
463 thechar = *g_parser_string;
464
465 /* convert CR, CRLF, or LF to \n */
466 if (thechar == CR) {
467 g_parser_string++;
468 thechar = *g_parser_string;
469 if (thechar != LF)
470 g_parser_string--;
471 thechar = '\n';
472 } else if (thechar == LF)
473 thechar = '\n';
474 else if (thechar == '\t')
475 thechar = ' ';
476
477 g_parser_currentChar = thechar;
478 g_parser_string++;
479 }
480 else if (g_parser_depth > 15)
481 {
482 PopSource(); /* go back to parsing parent */
483 g_parser_currentChar = getRawTexChar(); /* get next char from parent file */
484 } else
485 g_parser_currentChar = '\0';
486 }
487
488 if (g_parser_currentChar == '\n' && g_track_line_number_stack[g_track_line_number])
489 g_parser_line++;
490
491 g_parser_penultimateChar = g_parser_lastChar;
492 g_parser_lastChar = g_parser_currentChar;
493 if (0) {
494 if (g_parser_currentChar=='\n')
495 diagnostics(5,"getRawTexChar = <\\n>");
496 else if (g_parser_currentChar=='\0')
497 diagnostics(5,"getRawTexChar = <\\0> depth=%d, files=%d", g_parser_depth, g_parser_include_level);
498 else
499 diagnostics(5,"getRawTexChar = <%2c>",g_parser_currentChar);
500 }
501 /* if (g_parser_currentChar=='\0') exit(0);*/
502 return g_parser_currentChar;
503 }
504
505 #undef CR
506 #undef LF
507
ungetTexChar(char c)508 void ungetTexChar(char c)
509
510 /****************************************************************************
511 purpose: rewind the filepointer in the LaTeX-file by one
512 ****************************************************************************/
513 {
514 if (c == '\0')
515 return;
516
517 if (g_parser_file) {
518
519 ungetc(c, g_parser_file);
520
521 } else {
522 g_parser_string--;
523 if (g_parser_string && *g_parser_string) {
524 *g_parser_string = c;
525 }
526 }
527
528 if (c == '\n' && g_track_line_number_stack[g_track_line_number])
529 g_parser_line--;
530
531 g_parser_currentChar = g_parser_lastChar;
532 g_parser_lastChar = g_parser_penultimateChar;
533 g_parser_penultimateChar = '\0'; /* no longer know what that it was */
534 g_parser_backslashes = 0;
535 diagnostics(6, "after ungetTexChar=<%c> backslashes=%d line=%ld", c, g_parser_backslashes, g_parser_line);
536 }
537
skipBOM(int cThis)538 int skipBOM(int cThis)
539 {
540 /* UTF8 Byte Order Mark */
541 if (cThis == 0xEF) {
542 cThis = getRawTexChar();
543 if (cThis == 0xBB) {
544 cThis = getRawTexChar();
545 if (cThis == 0xBF) {
546 CmdFontEncoding(ENCODING_UTF8);
547 cThis = getRawTexChar();
548 diagnostics(2, "UTF 8 BOM encountered, now assuming UTF8 input");
549 }
550 }
551 }
552
553 /* UTF16 Byte Order Mark */
554 if (cThis == 0xFE) {
555 cThis = getRawTexChar();
556 if (cThis == 0xFF)
557 diagnostics(2, "UTF 16 is not supported, you might try converting to UTF8");
558 }
559
560 return cThis;
561 }
562
getTexChar()563 char getTexChar()
564
565 /***************************************************************************
566 purpose: get the next character from the input stream
567 This should be the usual place to access the LaTeX file
568 It filters the input stream so that % is handled properly
569 ****************************************************************************/
570 {
571 int cThis;
572
573 cThis = getRawTexChar();
574 cThis = skipBOM(cThis);
575
576 if (cThis == '\\')
577 g_parser_backslashes++;
578 else
579 g_parser_backslashes = 0;
580 if (0) {
581 if (cThis=='\n')
582 diagnostics(6,"getRawTexChar = <\\n> backslashes=%d line=%ld", g_parser_backslashes, g_parser_line);
583 else if (cThis=='\0')
584 diagnostics(6,"getRawTexChar = <\\0> backslashes=%d line=%ld", g_parser_backslashes, g_parser_line);
585 else
586 diagnostics(6,"getRawTexChar = <%2c> backslashes=%d line=%ld",cThis, g_parser_backslashes, g_parser_line);
587 }
588 return cThis;
589 }
590
skipToEOL(void)591 void skipToEOL(void)
592
593 /****************************************************************************
594 purpose: ignores anything from inputfile until the end of line.
595 uses getRawTexChar() because % are not important
596 ****************************************************************************/
597 {
598 char cThis;
599
600 while ((cThis = getRawTexChar()) && cThis != '\n') {
601 }
602 }
603
getNonBlank(void)604 char getNonBlank(void)
605
606 /***************************************************************************
607 Description: get the next non-blank character from the input stream
608 ****************************************************************************/
609 {
610 char c;
611
612 c = getTexChar();
613 while (c == ' ' || c == '\n') {
614 c = getTexChar();
615 }
616 return c;
617 }
618
getNonSpace(void)619 char getNonSpace(void)
620
621 /***************************************************************************
622 Description: get the next non-space character from the input stream
623 ****************************************************************************/
624 {
625 char c;
626
627 while ((c = getTexChar()) && c == ' ') {
628 }
629 return c;
630 }
631
skipSpaces(void)632 void skipSpaces(void)
633 /***************************************************************************
634 Description: skip to the next non-space character from the input stream
635 ****************************************************************************/
636 {
637 char c;
638
639 while ((c = getTexChar()) && c == ' ') {
640 }
641 ungetTexChar(c);
642 }
643
skipWhiteSpace(void)644 void skipWhiteSpace(void)
645 /***************************************************************************
646 Description: skip over spaces and linefeeds
647 ****************************************************************************/
648 {
649 char c=getNonBlank();
650 ungetTexChar(c);
651 }
652
653
getSameChar(char c)654 int getSameChar(char c)
655
656 /***************************************************************************
657 Description: returns the number of characters that are the same as c
658 ****************************************************************************/
659 {
660 char cThis;
661 int count = -1;
662
663 do {
664 cThis = getTexChar();
665 count++;
666 } while (cThis == c);
667
668 ungetTexChar(cThis);
669
670 return count;
671 }
672
getDelimitedText(char left,char right,int raw)673 char *getDelimitedText(char left, char right, int raw)
674
675 /******************************************************************************
676 purpose: general scanning routine that allocates and returns a string
677 that is between "left" and "right" that accounts for escaping by '\'
678
679 Example for getDelimitedText('{','}',TRUE)
680
681 "the \{ is shown {\it by} a\\} blah blah" ----> "the \{ is shown {\it by} a\\"
682
683 Note the missing opening brace in the example above
684
685 It turns out that for getDelimitedText('[',']',TRUE)
686
687 "the \] is shown {]} a\\] blah blah blah" ----> "the \] is shown {]} a\\"
688
689 ******************************************************************************/
690 {
691 char buffer[SCAN_BUFFER_SIZE];
692 int size = -1;
693 int lefts_needed = 1;
694 int brace_level = 0;
695 int last_char_was_backslash = FALSE;
696
697 while (lefts_needed && size < SCAN_BUFFER_SIZE-1) {
698 size++;
699 buffer[size] = (raw) ? getRawTexChar() : getTexChar();
700
701 if (last_char_was_backslash) { /* ignore \{ etc. */
702 if (buffer[size] == '\\') { /* two backslashes in a row */
703 last_char_was_backslash = FALSE; /* next char is not special */
704 continue;
705 }
706 }
707
708 else if (buffer[size] == right && brace_level == 0)
709 lefts_needed--;
710
711 else if (buffer[size] == '{')
712 brace_level++;
713
714 else if (buffer[size] == '}')
715 brace_level--;
716
717 last_char_was_backslash = (buffer[size] == '\\') ? TRUE : FALSE;
718 }
719
720 buffer[size] = '\0'; /* overwrite final delimeter */
721 if (size == SCAN_BUFFER_SIZE-1) {
722 diagnostics(WARNING, "Could not find closing '%c' in %d chars", right, SCAN_BUFFER_SIZE);
723 return strdup(" NOT FOUND ");
724 }
725
726 return strdup(buffer);
727 }
728
parseBrace(void)729 void parseBrace(void)
730
731 /****************************************************************************
732 Description: Skip text to balancing close brace
733 ****************************************************************************/
734 {
735 char *s = getDelimitedText('{', '}', FALSE);
736
737 free(s);
738 }
739
parseBracket(void)740 static void parseBracket(void)
741
742 /****************************************************************************
743 Description: Skip text to balancing close bracket
744 ****************************************************************************/
745 {
746 char *s = getDelimitedText('[', ']', FALSE);
747
748 free(s);
749 }
750
CmdIgnoreParameter(int code)751 void CmdIgnoreParameter(int code)
752
753 /****************************************************************************
754 Description: Ignore the parameters of a command
755 Example : CmdIgnoreParameter(21) for \command[opt1]{reg1}{reg2}
756
757 code is a decimal # of the form "op" where `o' is the number of
758 optional parameters (0-9) and `p' is the # of required parameters.
759
760 The specified number of parameters is ignored. The order of the parameters
761 in the LaTeX file does not matter.
762 ****************************************************************************/
763 {
764 int optParmCount = code / 10;
765 int regParmCount = code % 10;
766 char cThis;
767
768 diagnostics(4, "CmdIgnoreParameter [%d] {%d}", optParmCount, regParmCount);
769
770 while (regParmCount) {
771 cThis = getNonBlank();
772 switch (cThis) {
773 case '{':
774
775 regParmCount--;
776 parseBrace();
777 break;
778
779 case '[':
780
781 optParmCount--;
782 parseBracket();
783 break;
784
785 default:
786 diagnostics(WARNING, "Ignored command missing {} expected %d - found %d", code % 10,
787 code % 10 - regParmCount);
788 ungetTexChar(cThis);
789 return;
790 }
791 }
792
793 /* Check for trailing optional parameter e.g., \item[label] */
794
795 if (optParmCount > 0) {
796 cThis = getNonSpace();
797 if (cThis == '[')
798 parseBracket();
799 else {
800 ungetTexChar(cThis);
801 return;
802 }
803 }
804 return;
805 }
806
getSimpleCommand(void)807 char *getSimpleCommand(void)
808
809 /**************************************************************************
810 purpose: returns a simple command e.g., \alpha\beta will return "\beta"
811 ^
812 \! will return \!
813 **************************************************************************/
814 {
815 char buffer[128];
816 int size;
817
818 buffer[0] = getTexChar();
819
820 if (buffer[0] != '\\')
821 return NULL;
822
823 buffer[1] = getTexChar();
824
825 for (size = 2; size < 127; size++) {
826 buffer[size] = getRawTexChar(); /* \t \r '%' all end command */
827
828 if (!isalpha((int) buffer[size])) {
829 ungetTexChar(buffer[size]);
830 break;
831 }
832 }
833
834 buffer[size] = '\0';
835 if (size == 127) {
836 diagnostics(WARNING, "Misplaced brace.");
837 diagnostics(ERROR, "Cannot find close brace in 127 characters");
838 }
839
840 diagnostics(5, "getSimpleCommand result <%s>", buffer);
841 return strdup(buffer);
842 }
843
getBracketParam(void)844 char *getBracketParam(void)
845
846 /******************************************************************************
847 purpose: return bracketed parameter
848
849 \item[1] ---> "1" \item[] ---> "" \item the ---> NULL
850 ^ ^ ^
851 \item [1] ---> "1" \item [] ---> "" \item the ---> NULL
852 ^ ^ ^
853 ******************************************************************************/
854 {
855 char c, *text;
856
857 c = getNonBlank();
858 PushTrackLineNumber(FALSE);
859
860 if (c == '[') {
861 text = getDelimitedText('[', ']', FALSE);
862 diagnostics(6, "getBracketParam [%s]", text);
863
864 } else {
865 ungetTexChar(c);
866 text = NULL;
867 diagnostics(6, "getBracketParam []");
868 }
869
870 PopTrackLineNumber();
871 return text;
872 }
873
getBraceParam0(int raw_flag)874 static char *getBraceParam0(int raw_flag)
875
876 /**************************************************************************
877 purpose: allocates and returns the next parameter in the LaTeX file
878 Examples: (^ indicates the current file position)
879
880 \alpha\beta ---> "\beta" \bar \alpha ---> "\alpha"
881 ^ ^
882 \bar{text} ---> "text" \bar text ---> "t"
883 ^ ^
884 _\alpha ---> "\alpha" _{\alpha} ---> "\alpha"
885 ^ ^
886 _2 ---> "2" _{2} ---> "2"
887 ^ ^
888 **************************************************************************/
889 {
890 char s[2], *text;
891
892 s[0] = getNonSpace(); /* skip spaces and one possible newline */
893 if (s[0] == '\n')
894 s[0] = getNonSpace();
895
896 PushTrackLineNumber(FALSE);
897
898 if (s[0] == '\\') {
899 ungetTexChar(s[0]);
900 text = getSimpleCommand();
901
902 } else if (s[0] == '{')
903 text = getDelimitedText('{', '}', raw_flag);
904
905 else {
906 s[1] = '\0';
907 text = strdup(s);
908 }
909
910 PopTrackLineNumber();
911 diagnostics(6, "Leaving getBraceParam {%s}", text);
912 return text;
913 }
914
getBraceParam(void)915 char *getBraceParam(void)
916 {
917 return getBraceParam0(FALSE);
918 }
919
getBraceRawParam(void)920 char *getBraceRawParam(void)
921 {
922 return getBraceParam0(TRUE);
923 }
924
ignoreBraceParam(void)925 void ignoreBraceParam(void) {
926 char *p = getBraceParam();
927 if (NULL != p) free(p);
928 }
929
ignoreBracketParam(void)930 void ignoreBracketParam(void) {
931 char *p = getBracketParam();
932 if (NULL != p) free(p);
933 }
934
935
getLeftRightParam(void)936 char *getLeftRightParam(void)
937
938 /**************************************************************************
939 purpose: get text between \left ... \right
940 **************************************************************************/
941 {
942 char text[5000], s, *command;
943 int i = 0;
944 int lrdepth = 1;
945
946 text[0] = '\0';
947
948 for (;;) {
949 s = getTexChar();
950 if (s == '\\') {
951 ungetTexChar(s);
952 command = getSimpleCommand();
953 if (strcmp(command, "\\right") == 0) {
954 lrdepth--;
955 if (lrdepth == 0) {
956 free(command);
957 return strdup(text);
958 }
959 }
960 my_strlcat(text + i, command, 5000);
961 i += (int) strlen(command);
962 if (i > 4950)
963 diagnostics(ERROR, "Contents of \\left .. \\right too large.");
964 if (strcmp(command, "\\left") == 0)
965 lrdepth++;
966 free(command);
967 } else {
968 text[i] = s;
969 i++;
970 text[i] = '\0';
971 }
972 }
973 return NULL;
974 }
975
976
977
978
getTexUntil(char * target,int raw)979 char *getTexUntil(char *target, int raw)
980
981 /**************************************************************************
982 purpose: returns the portion of the file to the beginning of target
983 returns: NULL if not found
984 **************************************************************************/
985 {
986 enum { BUFFSIZE = 200000 };
987 char *s;
988 char buffer[BUFFSIZE];
989 int last_i = -1;
990 int i = 0; /* size of string that has been read */
991 int j = 0; /* number of found characters */
992 int end_of_file_reached = FALSE;
993 int len = (int) strlen(target);
994
995 PushTrackLineNumber(FALSE);
996
997 diagnostics(5, "getTexUntil target = <%s> raw_search = %d ", target, raw);
998
999 while (j < len && i < BUFFSIZE) {
1000
1001 if (i > last_i) {
1002 buffer[i] = (raw) ? getRawTexChar() : getTexChar();
1003 last_i = i;
1004 if (buffer[i] != '\n')
1005 diagnostics(7, "next char = <%c>, %d, %d, %d", buffer[i], i, j, last_i);
1006 else
1007 diagnostics(7, "next char = <\\n>");
1008
1009 }
1010
1011 if (buffer[i] == '\0') {
1012 end_of_file_reached = TRUE;
1013 diagnostics(7, "end of file reached");
1014 break;
1015 }
1016
1017 if (buffer[i] != target[j]) {
1018 if (j > 0) { /* false start, put back what was found */
1019 diagnostics(8, "failed to match target[%d]=<%c> != buffer[%d]=<%c>", j, target[j], i, buffer[i]);
1020 i -= j;
1021 j = 0;
1022 }
1023 } else
1024 j++;
1025
1026 i++;
1027 }
1028
1029 if (i == BUFFSIZE)
1030 diagnostics(ERROR, "Could not find <%s> in %d characters \n\
1031 Recompile with larger BUFFSIZE in getTexUntil() in parser.c", target, BUFFSIZE);
1032
1033 if (!end_of_file_reached) /* do not include target in returned string */
1034 buffer[i - len] = '\0';
1035 else {
1036 diagnostics(ERROR, "Could not find <%s>", target);
1037 exit(1);
1038 }
1039
1040 PopTrackLineNumber();
1041
1042 diagnostics(6, "buffer size =[%d], actual=[%d]", strlen(buffer), i - len);
1043
1044 s = strdup(buffer);
1045 diagnostics(6, "getTexUntil result = %s", s);
1046 return s;
1047 }
1048
getSpacedTexUntil(char * target,int raw)1049 char *getSpacedTexUntil(char *target, int raw)
1050
1051 /**************************************************************************
1052 purpose: returns the portion of the file to the beginning of target
1053
1054 getSpacedTexUntil("\begin|{|document|}")
1055
1056 will match the regular expression "\\begin *{ *document *}"
1057 **************************************************************************/
1058 {
1059 enum { BUFFSIZE = 16000 };
1060 char buffer[BUFFSIZE];
1061 char *s;
1062 int buffer_pos, target_pos, target_len, max_buffer_pos, start_pos;
1063
1064 PushTrackLineNumber(FALSE);
1065
1066 diagnostics(5, "getSpacedTexUntil target = <%s> raw_search = %d ", target, raw);
1067
1068 buffer_pos = 0;
1069 target_pos = 0;
1070 start_pos = 0;
1071 target_len = (int) strlen(target);
1072 max_buffer_pos = -1;
1073
1074 do {
1075
1076 /* the next character might already be in the buffer */
1077 if (buffer_pos > max_buffer_pos) {
1078 buffer[buffer_pos] = (raw) ? getRawTexChar() : getTexChar();
1079 max_buffer_pos = buffer_pos;
1080 }
1081
1082 if (buffer[buffer_pos] == '\0') {
1083 diagnostics(ERROR, "end of file reached before '%s' was found",target);
1084 }
1085
1086 if (buffer[buffer_pos] == target[target_pos]) {
1087 if (target_pos == 0)
1088 start_pos = buffer_pos;
1089 target_pos++;
1090 }
1091
1092 /* does not match next character in target ... */
1093 else if (target[target_pos] != '|') {
1094
1095 if (target_pos > 0) /* false start, put back what was found */
1096 buffer_pos = start_pos;
1097 target_pos = 0;
1098
1099 /* next character in target is '|' */
1100 } else if (buffer[buffer_pos] != ' ' && buffer[buffer_pos] != '\n') {
1101
1102 /* next char is non-blank ... either match or reset */
1103 target_pos++; /* move past wildcard */
1104 if (buffer[buffer_pos] == target[target_pos]) {
1105 target_pos++;
1106 } else {
1107 buffer_pos = start_pos;
1108 target_pos = 0;
1109 }
1110 }
1111
1112 if (0) {
1113 if (buffer[buffer_pos] != '\n')
1114 diagnostics(WARNING, "this char = <%c>, %d, %d, max=%d", buffer[buffer_pos], buffer_pos, target_pos, max_buffer_pos);
1115 else
1116 diagnostics(WARNING, "this char = <\\n>, %d, %d, max=%d", buffer[buffer_pos], buffer_pos, target_pos, max_buffer_pos);
1117 }
1118
1119 buffer_pos++;
1120
1121
1122 if (buffer_pos == BUFFSIZE)
1123 diagnostics(ERROR, "Could not find <%s> in %d characters \n\
1124 Recompile with larger BUFFSIZE in getTexUntil() in parser.c", target, BUFFSIZE);
1125
1126 } while (target_pos < target_len);
1127
1128 /* terminate buffer */
1129 buffer[start_pos] = '\0';
1130
1131 PopTrackLineNumber();
1132
1133 s = strdup(buffer);
1134 diagnostics(6, "getSpacedTexUntil result = %s", s);
1135 return s;
1136 }
1137
getDimension(void)1138 int getDimension(void)
1139
1140 /**************************************************************************
1141 purpose: reads a TeX dimension and returns size it twips
1142 eg: 3 in, -.013mm, 29 pc, + 42,1 dd, 1234sp
1143 **************************************************************************/
1144 {
1145 char cThis, buffer[20];
1146 int i = 0;
1147 float num;
1148
1149 skipSpaces();
1150
1151 /* obtain optional sign */
1152 cThis = getTexChar();
1153
1154 /* skip "to" */
1155 if (cThis == 't') {
1156 getTexChar();
1157 cThis = getTexChar();
1158 }
1159
1160 /* skip "spread" */
1161 if (cThis == 's') {
1162 getTexChar();
1163 getTexChar();
1164 getTexChar();
1165 getTexChar();
1166 getTexChar();
1167 cThis = getTexChar();
1168 }
1169
1170 if (cThis == '-' || cThis == '+') {
1171 buffer[i++] = cThis;
1172 skipSpaces();
1173 cThis = getTexChar();
1174 }
1175
1176 /* obtain number */
1177 if (cThis == '\\')
1178 buffer[i++] = '1';
1179 else {
1180 while (i < 19 && (isdigit((int) cThis) || cThis == '.' || cThis == ',')) {
1181 if (cThis == ',')
1182 cThis = '.';
1183 buffer[i++] = cThis;
1184 cThis = getTexChar();
1185 }
1186 }
1187 ungetTexChar(cThis);
1188 buffer[i] = '\0';
1189 diagnostics(4, "getDimension() raw number is <%s>", buffer);
1190
1191 if (i == 19 || sscanf(buffer, "%f", &num) != 1) {
1192 diagnostics(WARNING, "Screwy number in TeX dimension");
1193 diagnostics(WARNING, "getDimension() number is <%s>", buffer);
1194 return 0;
1195 }
1196
1197 /* num *= 2; convert pts to twips */
1198
1199 /* obtain unit of measure */
1200 skipSpaces();
1201 buffer[0] = tolower((int) getTexChar());
1202
1203 if (buffer[0] == '\0') /* no units specified ... assume points */
1204 return (int) (num * 20);
1205
1206 /* skip "true" */
1207 if (buffer[0] == 't') {
1208 getTexChar();
1209 getTexChar();
1210 getTexChar();
1211 skipSpaces();
1212 buffer[0] = tolower((int) getTexChar());
1213 }
1214
1215 if (buffer[0] != '\\') {
1216 buffer[1] = tolower((int) getTexChar());
1217 buffer[2] = '\0';
1218
1219 diagnostics(4, "getDimension() dimension is <%s>", buffer);
1220 if (strstr(buffer, "pt"))
1221 return (int) (num * 20);
1222 else if (strstr(buffer, "pc"))
1223 return (int) (num * 12 * 20);
1224 else if (strstr(buffer, "in"))
1225 return (int) (num * 72.27 * 20);
1226 else if (strstr(buffer, "bp"))
1227 return (int) (num * 72.27 / 72 * 20);
1228 else if (strstr(buffer, "cm"))
1229 return (int) (num * 72.27 / 2.54 * 20);
1230 else if (strstr(buffer, "mm"))
1231 return (int) (num * 72.27 / 25.4 * 20);
1232 else if (strstr(buffer, "dd"))
1233 return (int) (num * 1238.0 / 1157.0 * 20);
1234 else if (strstr(buffer, "dd"))
1235 return (int) (num * 1238.0 / 1157 * 20);
1236 else if (strstr(buffer, "cc"))
1237 return (int) (num * 1238.0 / 1157.0 * 12.0 * 20);
1238 else if (strstr(buffer, "sp"))
1239 return (int) (num / 65536.0 * 20);
1240 else if (strstr(buffer, "ex"))
1241 return (int) (num * CurrentFontSize() * 0.5);
1242 else if (strstr(buffer, "em"))
1243 return (int) (num * CurrentFontSize());
1244 else if (strstr(buffer, "in"))
1245 return (int) (num * 72.27 * 20);
1246 else {
1247 ungetTexChar(buffer[1]);
1248 ungetTexChar(buffer[0]);
1249 return (int) num;
1250 }
1251 } else {
1252 char *s, *t;
1253
1254 ungetTexChar(buffer[0]);
1255 s = getSimpleCommand();
1256 t = s + 1; /* skip initial backslash */
1257 diagnostics(4, "getDimension() dimension is <%s>", t);
1258 num *= getLength(t);
1259 free(s);
1260 return (int) num;
1261 }
1262
1263 }
1264
1265 /***************************************************************************
1266 purpose: return twips for \\, \\[1pt], \\*[1pt]
1267 ***************************************************************************/
getSlashSlashParam(void)1268 int getSlashSlashParam(void)
1269 {
1270 char cThis, *vertical_space;
1271 int height = 0;
1272
1273 cThis = getTexChar();
1274 if (cThis != '*')
1275 ungetTexChar(cThis);
1276
1277 vertical_space = getBracketParam();
1278 if (vertical_space) {
1279 height = getStringDimension(vertical_space);
1280 free(vertical_space);
1281 }
1282
1283 return height;
1284 }
1285
1286