1 // Scintilla source code edit control
2 /** @file LexLaTeX.cxx
3 ** Lexer for LaTeX2e.
4 **/
5 // Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
7
8 // Modified by G. HU in 2013. Added folding, syntax highting inside math environments, and changed some minor behaviors.
9
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdio.h>
13 #include <stdarg.h>
14 #include <assert.h>
15 #include <ctype.h>
16 #include <vector>
17
18 #include "ILexer.h"
19 #include "Scintilla.h"
20 #include "SciLexer.h"
21
22 #include "PropSetSimple.h"
23 #include "WordList.h"
24 #include "LexAccessor.h"
25 #include "Accessor.h"
26 #include "StyleContext.h"
27 #include "CharacterSet.h"
28 #include "LexerModule.h"
29 #include "LexerBase.h"
30
31 using namespace Scintilla;
32
33 using namespace std;
34
35 struct latexFoldSave {
latexFoldSavelatexFoldSave36 latexFoldSave() : structLev(0) {
37 for (int i = 0; i < 8; ++i) openBegins[i] = 0;
38 }
latexFoldSavelatexFoldSave39 latexFoldSave(const latexFoldSave &save) : structLev(save.structLev) {
40 for (int i = 0; i < 8; ++i) openBegins[i] = save.openBegins[i];
41 }
operator =latexFoldSave42 latexFoldSave &operator=(const latexFoldSave &save) {
43 if (this != &save) {
44 structLev = save.structLev;
45 for (int i = 0; i < 8; ++i) openBegins[i] = save.openBegins[i];
46 }
47 return *this;
48 }
49 int openBegins[8];
50 Sci_Position structLev;
51 };
52
53 class LexerLaTeX : public LexerBase {
54 private:
55 vector<int> modes;
setMode(Sci_Position line,int mode)56 void setMode(Sci_Position line, int mode) {
57 if (line >= static_cast<Sci_Position>(modes.size())) modes.resize(line + 1, 0);
58 modes[line] = mode;
59 }
getMode(Sci_Position line)60 int getMode(Sci_Position line) {
61 if (line >= 0 && line < static_cast<Sci_Position>(modes.size())) return modes[line];
62 return 0;
63 }
truncModes(Sci_Position numLines)64 void truncModes(Sci_Position numLines) {
65 if (static_cast<Sci_Position>(modes.size()) > numLines * 2 + 256)
66 modes.resize(numLines + 128);
67 }
68
69 vector<latexFoldSave> saves;
setSave(Sci_Position line,const latexFoldSave & save)70 void setSave(Sci_Position line, const latexFoldSave &save) {
71 if (line >= static_cast<Sci_Position>(saves.size())) saves.resize(line + 1);
72 saves[line] = save;
73 }
getSave(Sci_Position line,latexFoldSave & save)74 void getSave(Sci_Position line, latexFoldSave &save) {
75 if (line >= 0 && line < static_cast<Sci_Position>(saves.size())) save = saves[line];
76 else {
77 save.structLev = 0;
78 for (int i = 0; i < 8; ++i) save.openBegins[i] = 0;
79 }
80 }
truncSaves(Sci_Position numLines)81 void truncSaves(Sci_Position numLines) {
82 if (static_cast<Sci_Position>(saves.size()) > numLines * 2 + 256)
83 saves.resize(numLines + 128);
84 }
85 public:
LexerFactoryLaTeX()86 static ILexer *LexerFactoryLaTeX() {
87 return new LexerLaTeX();
88 }
89 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
90 void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
91 };
92
latexIsSpecial(int ch)93 static bool latexIsSpecial(int ch) {
94 return (ch == '#') || (ch == '$') || (ch == '%') || (ch == '&') || (ch == '_') ||
95 (ch == '{') || (ch == '}') || (ch == ' ');
96 }
97
latexIsBlank(int ch)98 static bool latexIsBlank(int ch) {
99 return (ch == ' ') || (ch == '\t');
100 }
101
latexIsBlankAndNL(int ch)102 static bool latexIsBlankAndNL(int ch) {
103 return (ch == ' ') || (ch == '\t') || (ch == '\r') || (ch == '\n');
104 }
105
latexIsLetter(int ch)106 static bool latexIsLetter(int ch) {
107 return IsASCII(ch) && isalpha(ch);
108 }
109
latexIsTagValid(Sci_Position & i,Sci_Position l,Accessor & styler)110 static bool latexIsTagValid(Sci_Position &i, Sci_Position l, Accessor &styler) {
111 while (i < l) {
112 if (styler.SafeGetCharAt(i) == '{') {
113 while (i < l) {
114 i++;
115 if (styler.SafeGetCharAt(i) == '}') {
116 return true;
117 } else if (!latexIsLetter(styler.SafeGetCharAt(i)) &&
118 styler.SafeGetCharAt(i)!='*') {
119 return false;
120 }
121 }
122 } else if (!latexIsBlank(styler.SafeGetCharAt(i))) {
123 return false;
124 }
125 i++;
126 }
127 return false;
128 }
129
latexNextNotBlankIs(Sci_Position i,Accessor & styler,char needle)130 static bool latexNextNotBlankIs(Sci_Position i, Accessor &styler, char needle) {
131 char ch;
132 while (i < styler.Length()) {
133 ch = styler.SafeGetCharAt(i);
134 if (!latexIsBlankAndNL(ch) && ch != '*') {
135 if (ch == needle)
136 return true;
137 else
138 return false;
139 }
140 i++;
141 }
142 return false;
143 }
144
latexLastWordIs(Sci_Position start,Accessor & styler,const char * needle)145 static bool latexLastWordIs(Sci_Position start, Accessor &styler, const char *needle) {
146 Sci_PositionU i = 0;
147 Sci_PositionU l = static_cast<Sci_PositionU>(strlen(needle));
148 Sci_Position ini = start-l+1;
149 char s[32];
150
151 while (i < l && i < 31) {
152 s[i] = styler.SafeGetCharAt(ini + i);
153 i++;
154 }
155 s[i] = '\0';
156
157 return (strcmp(s, needle) == 0);
158 }
159
latexLastWordIsMathEnv(Sci_Position pos,Accessor & styler)160 static bool latexLastWordIsMathEnv(Sci_Position pos, Accessor &styler) {
161 Sci_Position i, j;
162 char s[32];
163 const char *mathEnvs[] = { "align", "alignat", "flalign", "gather",
164 "multiline", "displaymath", "eqnarray", "equation" };
165 if (styler.SafeGetCharAt(pos) != '}') return false;
166 for (i = pos - 1; i >= 0; --i) {
167 if (styler.SafeGetCharAt(i) == '{') break;
168 if (pos - i >= 20) return false;
169 }
170 if (i < 0 || i == pos - 1) return false;
171 ++i;
172 for (j = 0; i + j < pos; ++j)
173 s[j] = styler.SafeGetCharAt(i + j);
174 s[j] = '\0';
175 if (j == 0) return false;
176 if (s[j - 1] == '*') s[--j] = '\0';
177 for (i = 0; i < static_cast<int>(sizeof(mathEnvs) / sizeof(const char *)); ++i)
178 if (strcmp(s, mathEnvs[i]) == 0) return true;
179 return false;
180 }
181
latexStateReset(int & mode,int & state)182 static inline void latexStateReset(int &mode, int &state) {
183 switch (mode) {
184 case 1: state = SCE_L_MATH; break;
185 case 2: state = SCE_L_MATH2; break;
186 default: state = SCE_L_DEFAULT; break;
187 }
188 }
189
190 // There are cases not handled correctly, like $abcd\textrm{what is $x+y$}z+w$.
191 // But I think it's already good enough.
Lex(Sci_PositionU startPos,Sci_Position length,int initStyle,IDocument * pAccess)192 void SCI_METHOD LexerLaTeX::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
193 // startPos is assumed to be the first character of a line
194 Accessor styler(pAccess, &props);
195 styler.StartAt(startPos);
196 int mode = getMode(styler.GetLine(startPos) - 1);
197 int state = initStyle;
198 if (state == SCE_L_ERROR || state == SCE_L_SHORTCMD || state == SCE_L_SPECIAL) // should not happen
199 latexStateReset(mode, state);
200
201 char chNext = styler.SafeGetCharAt(startPos);
202 char chVerbatimDelim = '\0';
203 styler.StartSegment(startPos);
204 Sci_Position lengthDoc = startPos + length;
205
206 for (Sci_Position i = startPos; i < lengthDoc; i++) {
207 char ch = chNext;
208 chNext = styler.SafeGetCharAt(i + 1);
209
210 if (styler.IsLeadByte(ch)) {
211 i++;
212 chNext = styler.SafeGetCharAt(i + 1);
213 continue;
214 }
215
216 if (ch == '\r' || ch == '\n')
217 setMode(styler.GetLine(i), mode);
218
219 switch (state) {
220 case SCE_L_DEFAULT :
221 switch (ch) {
222 case '\\' :
223 styler.ColourTo(i - 1, state);
224 if (latexIsLetter(chNext)) {
225 state = SCE_L_COMMAND;
226 } else if (latexIsSpecial(chNext)) {
227 styler.ColourTo(i + 1, SCE_L_SPECIAL);
228 i++;
229 chNext = styler.SafeGetCharAt(i + 1);
230 } else if (chNext == '\r' || chNext == '\n') {
231 styler.ColourTo(i, SCE_L_ERROR);
232 } else if (IsASCII(chNext)) {
233 styler.ColourTo(i + 1, SCE_L_SHORTCMD);
234 if (chNext == '(') {
235 mode = 1;
236 state = SCE_L_MATH;
237 } else if (chNext == '[') {
238 mode = 2;
239 state = SCE_L_MATH2;
240 }
241 i++;
242 chNext = styler.SafeGetCharAt(i + 1);
243 }
244 break;
245 case '$' :
246 styler.ColourTo(i - 1, state);
247 if (chNext == '$') {
248 styler.ColourTo(i + 1, SCE_L_SHORTCMD);
249 mode = 2;
250 state = SCE_L_MATH2;
251 i++;
252 chNext = styler.SafeGetCharAt(i + 1);
253 } else {
254 styler.ColourTo(i, SCE_L_SHORTCMD);
255 mode = 1;
256 state = SCE_L_MATH;
257 }
258 break;
259 case '%' :
260 styler.ColourTo(i - 1, state);
261 state = SCE_L_COMMENT;
262 break;
263 }
264 break;
265 // These 3 will never be reached.
266 case SCE_L_ERROR:
267 case SCE_L_SPECIAL:
268 case SCE_L_SHORTCMD:
269 break;
270 case SCE_L_COMMAND :
271 if (!latexIsLetter(chNext)) {
272 styler.ColourTo(i, state);
273 if (latexNextNotBlankIs(i + 1, styler, '[' )) {
274 state = SCE_L_CMDOPT;
275 } else if (latexLastWordIs(i, styler, "\\begin")) {
276 state = SCE_L_TAG;
277 } else if (latexLastWordIs(i, styler, "\\end")) {
278 state = SCE_L_TAG2;
279 } else if (latexLastWordIs(i, styler, "\\verb") && chNext != '*' && chNext != ' ') {
280 chVerbatimDelim = chNext;
281 state = SCE_L_VERBATIM;
282 } else {
283 latexStateReset(mode, state);
284 }
285 }
286 break;
287 case SCE_L_CMDOPT :
288 if (ch == ']') {
289 styler.ColourTo(i, state);
290 latexStateReset(mode, state);
291 }
292 break;
293 case SCE_L_TAG :
294 if (latexIsTagValid(i, lengthDoc, styler)) {
295 styler.ColourTo(i, state);
296 latexStateReset(mode, state);
297 if (latexLastWordIs(i, styler, "{verbatim}")) {
298 state = SCE_L_VERBATIM;
299 } else if (latexLastWordIs(i, styler, "{comment}")) {
300 state = SCE_L_COMMENT2;
301 } else if (latexLastWordIs(i, styler, "{math}") && mode == 0) {
302 mode = 1;
303 state = SCE_L_MATH;
304 } else if (latexLastWordIsMathEnv(i, styler) && mode == 0) {
305 mode = 2;
306 state = SCE_L_MATH2;
307 }
308 } else {
309 styler.ColourTo(i, SCE_L_ERROR);
310 latexStateReset(mode, state);
311 ch = styler.SafeGetCharAt(i);
312 if (ch == '\r' || ch == '\n') setMode(styler.GetLine(i), mode);
313 }
314 chNext = styler.SafeGetCharAt(i+1);
315 break;
316 case SCE_L_TAG2 :
317 if (latexIsTagValid(i, lengthDoc, styler)) {
318 styler.ColourTo(i, state);
319 latexStateReset(mode, state);
320 } else {
321 styler.ColourTo(i, SCE_L_ERROR);
322 latexStateReset(mode, state);
323 ch = styler.SafeGetCharAt(i);
324 if (ch == '\r' || ch == '\n') setMode(styler.GetLine(i), mode);
325 }
326 chNext = styler.SafeGetCharAt(i+1);
327 break;
328 case SCE_L_MATH :
329 switch (ch) {
330 case '\\' :
331 styler.ColourTo(i - 1, state);
332 if (latexIsLetter(chNext)) {
333 Sci_Position match = i + 3;
334 if (latexLastWordIs(match, styler, "\\end")) {
335 match++;
336 if (latexIsTagValid(match, lengthDoc, styler)) {
337 if (latexLastWordIs(match, styler, "{math}"))
338 mode = 0;
339 }
340 }
341 state = SCE_L_COMMAND;
342 } else if (latexIsSpecial(chNext)) {
343 styler.ColourTo(i + 1, SCE_L_SPECIAL);
344 i++;
345 chNext = styler.SafeGetCharAt(i + 1);
346 } else if (chNext == '\r' || chNext == '\n') {
347 styler.ColourTo(i, SCE_L_ERROR);
348 } else if (IsASCII(chNext)) {
349 if (chNext == ')') {
350 mode = 0;
351 state = SCE_L_DEFAULT;
352 }
353 styler.ColourTo(i + 1, SCE_L_SHORTCMD);
354 i++;
355 chNext = styler.SafeGetCharAt(i + 1);
356 }
357 break;
358 case '$' :
359 styler.ColourTo(i - 1, state);
360 styler.ColourTo(i, SCE_L_SHORTCMD);
361 mode = 0;
362 state = SCE_L_DEFAULT;
363 break;
364 case '%' :
365 styler.ColourTo(i - 1, state);
366 state = SCE_L_COMMENT;
367 break;
368 }
369 break;
370 case SCE_L_MATH2 :
371 switch (ch) {
372 case '\\' :
373 styler.ColourTo(i - 1, state);
374 if (latexIsLetter(chNext)) {
375 Sci_Position match = i + 3;
376 if (latexLastWordIs(match, styler, "\\end")) {
377 match++;
378 if (latexIsTagValid(match, lengthDoc, styler)) {
379 if (latexLastWordIsMathEnv(match, styler))
380 mode = 0;
381 }
382 }
383 state = SCE_L_COMMAND;
384 } else if (latexIsSpecial(chNext)) {
385 styler.ColourTo(i + 1, SCE_L_SPECIAL);
386 i++;
387 chNext = styler.SafeGetCharAt(i + 1);
388 } else if (chNext == '\r' || chNext == '\n') {
389 styler.ColourTo(i, SCE_L_ERROR);
390 } else if (IsASCII(chNext)) {
391 if (chNext == ']') {
392 mode = 0;
393 state = SCE_L_DEFAULT;
394 }
395 styler.ColourTo(i + 1, SCE_L_SHORTCMD);
396 i++;
397 chNext = styler.SafeGetCharAt(i + 1);
398 }
399 break;
400 case '$' :
401 styler.ColourTo(i - 1, state);
402 if (chNext == '$') {
403 styler.ColourTo(i + 1, SCE_L_SHORTCMD);
404 i++;
405 chNext = styler.SafeGetCharAt(i + 1);
406 mode = 0;
407 state = SCE_L_DEFAULT;
408 } else { // This may not be an error, e.g. \begin{equation}\text{$a$}\end{equation}
409 styler.ColourTo(i, SCE_L_SHORTCMD);
410 }
411 break;
412 case '%' :
413 styler.ColourTo(i - 1, state);
414 state = SCE_L_COMMENT;
415 break;
416 }
417 break;
418 case SCE_L_COMMENT :
419 if (ch == '\r' || ch == '\n') {
420 styler.ColourTo(i - 1, state);
421 latexStateReset(mode, state);
422 }
423 break;
424 case SCE_L_COMMENT2 :
425 if (ch == '\\') {
426 Sci_Position match = i + 3;
427 if (latexLastWordIs(match, styler, "\\end")) {
428 match++;
429 if (latexIsTagValid(match, lengthDoc, styler)) {
430 if (latexLastWordIs(match, styler, "{comment}")) {
431 styler.ColourTo(i - 1, state);
432 state = SCE_L_COMMAND;
433 }
434 }
435 }
436 }
437 break;
438 case SCE_L_VERBATIM :
439 if (ch == '\\') {
440 Sci_Position match = i + 3;
441 if (latexLastWordIs(match, styler, "\\end")) {
442 match++;
443 if (latexIsTagValid(match, lengthDoc, styler)) {
444 if (latexLastWordIs(match, styler, "{verbatim}")) {
445 styler.ColourTo(i - 1, state);
446 state = SCE_L_COMMAND;
447 }
448 }
449 }
450 } else if (chNext == chVerbatimDelim) {
451 styler.ColourTo(i + 1, state);
452 latexStateReset(mode, state);
453 chVerbatimDelim = '\0';
454 i++;
455 chNext = styler.SafeGetCharAt(i + 1);
456 } else if (chVerbatimDelim != '\0' && (ch == '\n' || ch == '\r')) {
457 styler.ColourTo(i, SCE_L_ERROR);
458 latexStateReset(mode, state);
459 chVerbatimDelim = '\0';
460 }
461 break;
462 }
463 }
464 if (lengthDoc == styler.Length()) truncModes(styler.GetLine(lengthDoc - 1));
465 styler.ColourTo(lengthDoc - 1, state);
466 styler.Flush();
467 }
468
latexFoldSaveToInt(const latexFoldSave & save)469 static int latexFoldSaveToInt(const latexFoldSave &save) {
470 int sum = 0;
471 for (int i = 0; i <= save.structLev; ++i)
472 sum += save.openBegins[i];
473 return ((sum + save.structLev + SC_FOLDLEVELBASE) & SC_FOLDLEVELNUMBERMASK);
474 }
475
476 // Change folding state while processing a line
477 // Return the level before the first relevant command
Fold(Sci_PositionU startPos,Sci_Position length,int,IDocument * pAccess)478 void SCI_METHOD LexerLaTeX::Fold(Sci_PositionU startPos, Sci_Position length, int, IDocument *pAccess) {
479 const char *structWords[7] = {"part", "chapter", "section", "subsection",
480 "subsubsection", "paragraph", "subparagraph"};
481 Accessor styler(pAccess, &props);
482 Sci_PositionU endPos = startPos + length;
483 Sci_Position curLine = styler.GetLine(startPos);
484 latexFoldSave save;
485 getSave(curLine - 1, save);
486 do {
487 char ch, buf[16];
488 Sci_Position i, j;
489 int lev = -1;
490 bool needFold = false;
491 for (i = static_cast<Sci_Position>(startPos); i < static_cast<Sci_Position>(endPos); ++i) {
492 ch = styler.SafeGetCharAt(i);
493 if (ch == '\r' || ch == '\n') break;
494 if (ch != '\\' || styler.StyleAt(i) != SCE_L_COMMAND) continue;
495 for (j = 0; j < 15 && i + 1 < static_cast<Sci_Position>(endPos); ++j, ++i) {
496 buf[j] = styler.SafeGetCharAt(i + 1);
497 if (!latexIsLetter(buf[j])) break;
498 }
499 buf[j] = '\0';
500 if (strcmp(buf, "begin") == 0) {
501 if (lev < 0) lev = latexFoldSaveToInt(save);
502 ++save.openBegins[save.structLev];
503 needFold = true;
504 }
505 else if (strcmp(buf, "end") == 0) {
506 while (save.structLev > 0 && save.openBegins[save.structLev] == 0)
507 --save.structLev;
508 if (lev < 0) lev = latexFoldSaveToInt(save);
509 if (save.openBegins[save.structLev] > 0) --save.openBegins[save.structLev];
510 }
511 else {
512 for (j = 0; j < 7; ++j)
513 if (strcmp(buf, structWords[j]) == 0) break;
514 if (j >= 7) continue;
515 save.structLev = j; // level before the command
516 for (j = save.structLev + 1; j < 8; ++j) {
517 save.openBegins[save.structLev] += save.openBegins[j];
518 save.openBegins[j] = 0;
519 }
520 if (lev < 0) lev = latexFoldSaveToInt(save);
521 ++save.structLev; // level after the command
522 needFold = true;
523 }
524 }
525 if (lev < 0) lev = latexFoldSaveToInt(save);
526 if (needFold) lev |= SC_FOLDLEVELHEADERFLAG;
527 styler.SetLevel(curLine, lev);
528 setSave(curLine, save);
529 ++curLine;
530 startPos = styler.LineStart(curLine);
531 if (static_cast<Sci_Position>(startPos) == styler.Length()) {
532 lev = latexFoldSaveToInt(save);
533 styler.SetLevel(curLine, lev);
534 setSave(curLine, save);
535 truncSaves(curLine);
536 }
537 } while (startPos < endPos);
538 styler.Flush();
539 }
540
541 static const char *const emptyWordListDesc[] = {
542 0
543 };
544
545 LexerModule lmLatex(SCLEX_LATEX, LexerLaTeX::LexerFactoryLaTeX, "latex", emptyWordListDesc);
546