1 /* ***** BEGIN LICENSE BLOCK *****
2  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3  *
4  * The contents of this file are subject to the Mozilla Public License Version
5  * 1.1 (the "License"); you may not use this file except in compliance with
6  * the License. You may obtain a copy of the License at
7  * http://www.mozilla.org/MPL/
8  *
9  * Software distributed under the License is distributed on an "AS IS" basis,
10  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11  * for the specific language governing rights and limitations under the
12  * License.
13  *
14  * The Original Code is Hunspell, based on MySpell.
15  *
16  * The Initial Developers of the Original Code are
17  * Kevin Hendricks (MySpell) and Németh László (Hunspell).
18  * Portions created by the Initial Developers are Copyright (C) 2002-2005
19  * the Initial Developers. All Rights Reserved.
20  *
21  * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
22  * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
23  * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
24  * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
25  * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
26  *
27  * Alternatively, the contents of this file may be used under the terms of
28  * either the GNU General Public License Version 2 or later (the "GPL"), or
29  * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30  * in which case the provisions of the GPL or the LGPL are applicable instead
31  * of those above. If you wish to allow use of your version of this file only
32  * under the terms of either the GPL or the LGPL, and not to allow others to
33  * use your version of this file under the terms of the MPL, indicate your
34  * decision by deleting the provisions above and replace them with the notice
35  * and other provisions required by the GPL or the LGPL. If you do not delete
36  * the provisions above, a recipient may use your version of this file under
37  * the terms of any one of the MPL, the GPL or the LGPL.
38  *
39  * ***** END LICENSE BLOCK ***** */
40 
41 #include <cstdlib>
42 #include <cstring>
43 #include <cstdio>
44 #include <ctype.h>
45 
46 #include "../hunspell/csutil.hxx"
47 #include "latexparser.hxx"
48 
49 #ifndef W32
50 using namespace std;
51 #endif
52 
53 static struct {
54   const char* pat[2];
55   int arg;
56 } PATTERN[] = {{{"\\(", "\\)"}, 0},
57                {{"$$", "$$"}, 0},
58                {{"$", "$"}, 0},
59                {{"\\begin{math}", "\\end{math}"}, 0},
60                {{"\\[", "\\]"}, 0},
61                {{"\\begin{displaymath}", "\\end{displaymath}"}, 0},
62                {{"\\begin{equation}", "\\end{equation}"}, 0},
63                {{"\\begin{equation*}", "\\end{equation*}"}, 0},
64                {{"\\cite", NULL}, 1},
65                {{"\\nocite", NULL}, 1},
66                {{"\\index", NULL}, 1},
67                {{"\\label", NULL}, 1},
68                {{"\\ref", NULL}, 1},
69                {{"\\pageref", NULL}, 1},
70                {{"\\autoref", NULL}, 1},
71                {{"\\parbox", NULL}, 1},
72                {{"\\begin{verbatim}", "\\end{verbatim}"}, 0},
73                {{"\\verb+", "+"}, 0},
74                {{"\\verb|", "|"}, 0},
75                {{"\\verb#", "#"}, 0},
76                {{"\\verb*", "*"}, 0},
77                {{"\\documentstyle", "\\begin{document}"}, 0},
78                {{"\\documentclass", "\\begin{document}"}, 0},
79                //	{ { "\\documentclass", NULL } , 1 },
80                {{"\\usepackage", NULL}, 1},
81                {{"\\includeonly", NULL}, 1},
82                {{"\\include", NULL}, 1},
83                {{"\\input", NULL}, 1},
84                {{"\\vspace", NULL}, 1},
85                {{"\\setlength", NULL}, 2},
86                {{"\\addtolength", NULL}, 2},
87                {{"\\settowidth", NULL}, 2},
88                {{"\\rule", NULL}, 2},
89                {{"\\hspace", NULL}, 1},
90                {{"\\vspace", NULL}, 1},
91                {{"\\\\[", "]"}, 0},
92                {{"\\pagebreak[", "]"}, 0},
93                {{"\\nopagebreak[", "]"}, 0},
94                {{"\\enlargethispage", NULL}, 1},
95                {{"\\begin{tabular}", NULL}, 1},
96                {{"\\addcontentsline", NULL}, 2},
97                {{"\\begin{thebibliography}", NULL}, 1},
98                {{"\\bibliography", NULL}, 1},
99                {{"\\bibliographystyle", NULL}, 1},
100                {{"\\bibitem", NULL}, 1},
101                {{"\\begin", NULL}, 1},
102                {{"\\end", NULL}, 1},
103                {{"\\pagestyle", NULL}, 1},
104                {{"\\pagenumbering", NULL}, 1},
105                {{"\\thispagestyle", NULL}, 1},
106                {{"\\newtheorem", NULL}, 2},
107                {{"\\newcommand", NULL}, 2},
108                {{"\\renewcommand", NULL}, 2},
109                {{"\\setcounter", NULL}, 2},
110                {{"\\addtocounter", NULL}, 1},
111                {{"\\stepcounter", NULL}, 1},
112                {{"\\selectlanguage", NULL}, 1},
113                {{"\\inputencoding", NULL}, 1},
114                {{"\\hyphenation", NULL}, 1},
115                {{"\\definecolor", NULL}, 3},
116                {{"\\color", NULL}, 1},
117                {{"\\textcolor", NULL}, 1},
118                {{"\\pagecolor", NULL}, 1},
119                {{"\\colorbox", NULL}, 2},
120                {{"\\fcolorbox", NULL}, 2},
121                {{"\\declaregraphicsextensions", NULL}, 1},
122                {{"\\psfig", NULL}, 1},
123                {{"\\url", NULL}, 1},
124                {{"\\eqref", NULL}, 1},
125                {{"\\vskip", NULL}, 1},
126                {{"\\vglue", NULL}, 1},
127                {{"\'\'", NULL}, 1}};
128 
129 #define PATTERN_LEN (sizeof(PATTERN) / sizeof(PATTERN[0]))
130 
LaTeXParser(const char * wordchars)131 LaTeXParser::LaTeXParser(const char* wordchars)
132     : TextParser(wordchars)
133     , pattern_num(0), depth(0), arg(0), opt(0) {
134 }
135 
LaTeXParser(const w_char * wordchars,int len)136 LaTeXParser::LaTeXParser(const w_char* wordchars, int len)
137     : TextParser(wordchars, len)
138     , pattern_num(0), depth(0), arg(0), opt(0) {
139 }
140 
~LaTeXParser()141 LaTeXParser::~LaTeXParser() {}
142 
look_pattern(int col)143 int LaTeXParser::look_pattern(int col) {
144   for (unsigned int i = 0; i < PATTERN_LEN; i++) {
145     const char* j = line[actual].c_str() + head;
146     const char* k = PATTERN[i].pat[col];
147     if (!k)
148       continue;
149     while ((*k != '\0') && (tolower(*j) == *k)) {
150       j++;
151       k++;
152     }
153     if (*k == '\0')
154       return i;
155   }
156   return -1;
157 }
158 
159 /*
160  * LaTeXParser
161  *
162  * state 0: not wordchar
163  * state 1: wordchar
164  * state 2: comments
165  * state 3: commands
166  * state 4: commands with arguments
167  * state 5: % comment
168  *
169  */
170 
next_token(std::string & t)171 bool LaTeXParser::next_token(std::string& t) {
172   t.clear();
173   int i;
174   int slash = 0;
175   int apostrophe;
176   for (;;) {
177     // fprintf(stderr,"depth: %d, state: %d, , arg: %d, token:
178     // %s\n",depth,state,arg,line[actual]+head);
179 
180     switch (state) {
181       case 0:  // non word chars
182         if ((pattern_num = look_pattern(0)) != -1) {
183           if (PATTERN[pattern_num].pat[1]) {
184             state = 2;
185           } else {
186             state = 4;
187             depth = 0;
188             arg = 0;
189             opt = 1;
190           }
191           head += strlen(PATTERN[pattern_num].pat[0]) - 1;
192         } else if (line[actual][head] == '%') {
193           state = 5;
194         } else if (is_wordchar(line[actual].c_str() + head)) {
195           state = 1;
196           token = head;
197         } else if (line[actual][head] == '\\') {
198           if (line[actual][head + 1] == '\\' ||   // \\ (linebreak)
199               (line[actual][head + 1] == '$') ||  // \$ (dollar sign)
200               (line[actual][head + 1] == '%')) {  // \% (percent)
201             head++;
202             break;
203           }
204           state = 3;
205         }
206         break;
207       case 1:  // wordchar
208         apostrophe = 0;
209         if (!is_wordchar(line[actual].c_str() + head) ||
210             (line[actual][head] == '\'' && line[actual][head + 1] == '\'' &&
211              ++apostrophe)) {
212           state = 0;
213           bool ok = alloc_token(token, &head, t);
214           if (apostrophe)
215             head += 2;
216           if (ok)
217             return true;
218         }
219         break;
220       case 2:  // comment, labels, etc
221         if (((i = look_pattern(1)) != -1) &&
222             (strcmp(PATTERN[i].pat[1], PATTERN[pattern_num].pat[1]) == 0)) {
223           state = 0;
224           head += strlen(PATTERN[pattern_num].pat[1]) - 1;
225         }
226         break;
227       case 3:  // command
228         if ((tolower(line[actual][head]) < 'a') ||
229             (tolower(line[actual][head]) > 'z')) {
230           state = 0;
231           head--;
232         }
233         break;
234       case 4:  // command with arguments
235         if (slash && (line[actual][head] != '\0')) {
236           slash = 0;
237           head++;
238           break;
239         } else if (line[actual][head] == '\\') {
240           slash = 1;
241         } else if ((line[actual][head] == '{') ||
242                    ((opt) && (line[actual][head] == '['))) {
243           depth++;
244           opt = 0;
245         } else if (line[actual][head] == '}') {
246           depth--;
247           if (depth == 0) {
248             opt = 1;
249             arg++;
250           }
251           if (((depth == 0) && (arg == PATTERN[pattern_num].arg)) ||
252               (depth < 0)) {
253             state = 0;  // XXX not handles the last optional arg.
254           }
255         } else if (line[actual][head] == ']')
256           depth--;
257     }  // case
258     if (next_char(line[actual].c_str(), &head)) {
259       if (state == 5)
260         state = 0;
261       return false;
262     }
263   }
264 }
265