1 /* ***** BEGIN LICENSE BLOCK *****
2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
3 *
4 * The contents of this file are subject to the Mozilla Public License Version
5 * 1.1 (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 * http://www.mozilla.org/MPL/
8 *
9 * Software distributed under the License is distributed on an "AS IS" basis,
10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11 * for the specific language governing rights and limitations under the
12 * License.
13 *
14 * The Original Code is Hunspell, based on MySpell.
15 *
16 * The Initial Developers of the Original Code are
17 * Kevin Hendricks (MySpell) and Németh László (Hunspell).
18 * Portions created by the Initial Developers are Copyright (C) 2002-2005
19 * the Initial Developers. All Rights Reserved.
20 *
21 * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
22 * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
23 * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
24 * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
25 * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
26 *
27 * Alternatively, the contents of this file may be used under the terms of
28 * either the GNU General Public License Version 2 or later (the "GPL"), or
29 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30 * in which case the provisions of the GPL or the LGPL are applicable instead
31 * of those above. If you wish to allow use of your version of this file only
32 * under the terms of either the GPL or the LGPL, and not to allow others to
33 * use your version of this file under the terms of the MPL, indicate your
34 * decision by deleting the provisions above and replace them with the notice
35 * and other provisions required by the GPL or the LGPL. If you do not delete
36 * the provisions above, a recipient may use your version of this file under
37 * the terms of any one of the MPL, the GPL or the LGPL.
38 *
39 * ***** END LICENSE BLOCK ***** */
40
41 #include <cstdlib>
42 #include <cstring>
43 #include <cstdio>
44 #include <ctype.h>
45
46 #include "../hunspell/csutil.hxx"
47 #include "latexparser.hxx"
48
49 #ifndef W32
50 using namespace std;
51 #endif
52
53 static struct {
54 const char* pat[2];
55 int arg;
56 } PATTERN[] = {{{"\\(", "\\)"}, 0},
57 {{"$$", "$$"}, 0},
58 {{"$", "$"}, 0},
59 {{"\\begin{math}", "\\end{math}"}, 0},
60 {{"\\[", "\\]"}, 0},
61 {{"\\begin{displaymath}", "\\end{displaymath}"}, 0},
62 {{"\\begin{equation}", "\\end{equation}"}, 0},
63 {{"\\begin{equation*}", "\\end{equation*}"}, 0},
64 {{"\\cite", NULL}, 1},
65 {{"\\nocite", NULL}, 1},
66 {{"\\index", NULL}, 1},
67 {{"\\label", NULL}, 1},
68 {{"\\ref", NULL}, 1},
69 {{"\\pageref", NULL}, 1},
70 {{"\\autoref", NULL}, 1},
71 {{"\\parbox", NULL}, 1},
72 {{"\\begin{verbatim}", "\\end{verbatim}"}, 0},
73 {{"\\verb+", "+"}, 0},
74 {{"\\verb|", "|"}, 0},
75 {{"\\verb#", "#"}, 0},
76 {{"\\verb*", "*"}, 0},
77 {{"\\documentstyle", "\\begin{document}"}, 0},
78 {{"\\documentclass", "\\begin{document}"}, 0},
79 // { { "\\documentclass", NULL } , 1 },
80 {{"\\usepackage", NULL}, 1},
81 {{"\\includeonly", NULL}, 1},
82 {{"\\include", NULL}, 1},
83 {{"\\input", NULL}, 1},
84 {{"\\vspace", NULL}, 1},
85 {{"\\setlength", NULL}, 2},
86 {{"\\addtolength", NULL}, 2},
87 {{"\\settowidth", NULL}, 2},
88 {{"\\rule", NULL}, 2},
89 {{"\\hspace", NULL}, 1},
90 {{"\\vspace", NULL}, 1},
91 {{"\\\\[", "]"}, 0},
92 {{"\\pagebreak[", "]"}, 0},
93 {{"\\nopagebreak[", "]"}, 0},
94 {{"\\enlargethispage", NULL}, 1},
95 {{"\\begin{tabular}", NULL}, 1},
96 {{"\\addcontentsline", NULL}, 2},
97 {{"\\begin{thebibliography}", NULL}, 1},
98 {{"\\bibliography", NULL}, 1},
99 {{"\\bibliographystyle", NULL}, 1},
100 {{"\\bibitem", NULL}, 1},
101 {{"\\begin", NULL}, 1},
102 {{"\\end", NULL}, 1},
103 {{"\\pagestyle", NULL}, 1},
104 {{"\\pagenumbering", NULL}, 1},
105 {{"\\thispagestyle", NULL}, 1},
106 {{"\\newtheorem", NULL}, 2},
107 {{"\\newcommand", NULL}, 2},
108 {{"\\renewcommand", NULL}, 2},
109 {{"\\setcounter", NULL}, 2},
110 {{"\\addtocounter", NULL}, 1},
111 {{"\\stepcounter", NULL}, 1},
112 {{"\\selectlanguage", NULL}, 1},
113 {{"\\inputencoding", NULL}, 1},
114 {{"\\hyphenation", NULL}, 1},
115 {{"\\definecolor", NULL}, 3},
116 {{"\\color", NULL}, 1},
117 {{"\\textcolor", NULL}, 1},
118 {{"\\pagecolor", NULL}, 1},
119 {{"\\colorbox", NULL}, 2},
120 {{"\\fcolorbox", NULL}, 2},
121 {{"\\declaregraphicsextensions", NULL}, 1},
122 {{"\\psfig", NULL}, 1},
123 {{"\\url", NULL}, 1},
124 {{"\\eqref", NULL}, 1},
125 {{"\\vskip", NULL}, 1},
126 {{"\\vglue", NULL}, 1},
127 {{"\'\'", NULL}, 1}};
128
129 #define PATTERN_LEN (sizeof(PATTERN) / sizeof(PATTERN[0]))
130
LaTeXParser(const char * wordchars)131 LaTeXParser::LaTeXParser(const char* wordchars)
132 : TextParser(wordchars)
133 , pattern_num(0), depth(0), arg(0), opt(0) {
134 }
135
LaTeXParser(const w_char * wordchars,int len)136 LaTeXParser::LaTeXParser(const w_char* wordchars, int len)
137 : TextParser(wordchars, len)
138 , pattern_num(0), depth(0), arg(0), opt(0) {
139 }
140
~LaTeXParser()141 LaTeXParser::~LaTeXParser() {}
142
look_pattern(int col)143 int LaTeXParser::look_pattern(int col) {
144 for (unsigned int i = 0; i < PATTERN_LEN; i++) {
145 const char* j = line[actual].c_str() + head;
146 const char* k = PATTERN[i].pat[col];
147 if (!k)
148 continue;
149 while ((*k != '\0') && (tolower(*j) == *k)) {
150 j++;
151 k++;
152 }
153 if (*k == '\0')
154 return i;
155 }
156 return -1;
157 }
158
159 /*
160 * LaTeXParser
161 *
162 * state 0: not wordchar
163 * state 1: wordchar
164 * state 2: comments
165 * state 3: commands
166 * state 4: commands with arguments
167 * state 5: % comment
168 *
169 */
170
next_token(std::string & t)171 bool LaTeXParser::next_token(std::string& t) {
172 t.clear();
173 int i;
174 int slash = 0;
175 int apostrophe;
176 for (;;) {
177 // fprintf(stderr,"depth: %d, state: %d, , arg: %d, token:
178 // %s\n",depth,state,arg,line[actual]+head);
179
180 switch (state) {
181 case 0: // non word chars
182 if ((pattern_num = look_pattern(0)) != -1) {
183 if (PATTERN[pattern_num].pat[1]) {
184 state = 2;
185 } else {
186 state = 4;
187 depth = 0;
188 arg = 0;
189 opt = 1;
190 }
191 head += strlen(PATTERN[pattern_num].pat[0]) - 1;
192 } else if (line[actual][head] == '%') {
193 state = 5;
194 } else if (is_wordchar(line[actual].c_str() + head)) {
195 state = 1;
196 token = head;
197 } else if (line[actual][head] == '\\') {
198 if (line[actual][head + 1] == '\\' || // \\ (linebreak)
199 (line[actual][head + 1] == '$') || // \$ (dollar sign)
200 (line[actual][head + 1] == '%')) { // \% (percent)
201 head++;
202 break;
203 }
204 state = 3;
205 }
206 break;
207 case 1: // wordchar
208 apostrophe = 0;
209 if (!is_wordchar(line[actual].c_str() + head) ||
210 (line[actual][head] == '\'' && line[actual][head + 1] == '\'' &&
211 ++apostrophe)) {
212 state = 0;
213 bool ok = alloc_token(token, &head, t);
214 if (apostrophe)
215 head += 2;
216 if (ok)
217 return true;
218 }
219 break;
220 case 2: // comment, labels, etc
221 if (((i = look_pattern(1)) != -1) &&
222 (strcmp(PATTERN[i].pat[1], PATTERN[pattern_num].pat[1]) == 0)) {
223 state = 0;
224 head += strlen(PATTERN[pattern_num].pat[1]) - 1;
225 }
226 break;
227 case 3: // command
228 if ((tolower(line[actual][head]) < 'a') ||
229 (tolower(line[actual][head]) > 'z')) {
230 state = 0;
231 head--;
232 }
233 break;
234 case 4: // command with arguments
235 if (slash && (line[actual][head] != '\0')) {
236 slash = 0;
237 head++;
238 break;
239 } else if (line[actual][head] == '\\') {
240 slash = 1;
241 } else if ((line[actual][head] == '{') ||
242 ((opt) && (line[actual][head] == '['))) {
243 depth++;
244 opt = 0;
245 } else if (line[actual][head] == '}') {
246 depth--;
247 if (depth == 0) {
248 opt = 1;
249 arg++;
250 }
251 if (((depth == 0) && (arg == PATTERN[pattern_num].arg)) ||
252 (depth < 0)) {
253 state = 0; // XXX not handles the last optional arg.
254 }
255 } else if (line[actual][head] == ']')
256 depth--;
257 } // case
258 if (next_char(line[actual].c_str(), &head)) {
259 if (state == 5)
260 state = 0;
261 return false;
262 }
263 }
264 }
265