1 // Scintilla source code edit control
2 /** @file LexLaTeX.cxx
3 ** Lexer for LaTeX2e.
4 **/
5 // Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
7
8 // Modified by G. HU in 2013. Added folding, syntax highting inside math environments, and changed some minor behaviors.
9
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdio.h>
13 #include <stdarg.h>
14 #include <assert.h>
15 #include <ctype.h>
16 #include <vector>
17
18 #include "ILexer.h"
19 #include "Scintilla.h"
20 #include "SciLexer.h"
21
22 #include "PropSetSimple.h"
23 #include "WordList.h"
24 #include "LexAccessor.h"
25 #include "Accessor.h"
26 #include "StyleContext.h"
27 #include "CharacterSet.h"
28 #include "LexerModule.h"
29 #include "DefaultLexer.h"
30 #include "LexerBase.h"
31
32 using namespace Scintilla;
33
34 using namespace std;
35
36 struct latexFoldSave {
latexFoldSavelatexFoldSave37 latexFoldSave() : structLev(0) {
38 for (int i = 0; i < 8; ++i) openBegins[i] = 0;
39 }
latexFoldSavelatexFoldSave40 latexFoldSave(const latexFoldSave &save) : structLev(save.structLev) {
41 for (int i = 0; i < 8; ++i) openBegins[i] = save.openBegins[i];
42 }
operator =latexFoldSave43 latexFoldSave &operator=(const latexFoldSave &save) {
44 if (this != &save) {
45 structLev = save.structLev;
46 for (int i = 0; i < 8; ++i) openBegins[i] = save.openBegins[i];
47 }
48 return *this;
49 }
50 int openBegins[8];
51 Sci_Position structLev;
52 };
53
54 class LexerLaTeX : public LexerBase {
55 private:
56 vector<int> modes;
setMode(Sci_Position line,int mode)57 void setMode(Sci_Position line, int mode) {
58 if (line >= static_cast<Sci_Position>(modes.size())) modes.resize(line + 1, 0);
59 modes[line] = mode;
60 }
getMode(Sci_Position line)61 int getMode(Sci_Position line) {
62 if (line >= 0 && line < static_cast<Sci_Position>(modes.size())) return modes[line];
63 return 0;
64 }
truncModes(Sci_Position numLines)65 void truncModes(Sci_Position numLines) {
66 if (static_cast<Sci_Position>(modes.size()) > numLines * 2 + 256)
67 modes.resize(numLines + 128);
68 }
69
70 vector<latexFoldSave> saves;
setSave(Sci_Position line,const latexFoldSave & save)71 void setSave(Sci_Position line, const latexFoldSave &save) {
72 if (line >= static_cast<Sci_Position>(saves.size())) saves.resize(line + 1);
73 saves[line] = save;
74 }
getSave(Sci_Position line,latexFoldSave & save)75 void getSave(Sci_Position line, latexFoldSave &save) {
76 if (line >= 0 && line < static_cast<Sci_Position>(saves.size())) save = saves[line];
77 else {
78 save.structLev = 0;
79 for (int i = 0; i < 8; ++i) save.openBegins[i] = 0;
80 }
81 }
truncSaves(Sci_Position numLines)82 void truncSaves(Sci_Position numLines) {
83 if (static_cast<Sci_Position>(saves.size()) > numLines * 2 + 256)
84 saves.resize(numLines + 128);
85 }
86 public:
LexerFactoryLaTeX()87 static ILexer5 *LexerFactoryLaTeX() {
88 return new LexerLaTeX();
89 }
90 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
91 void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
92
93 // ILexer5 methods
GetName()94 const char * SCI_METHOD GetName() override {
95 return "latex";
96 }
GetIdentifier()97 int SCI_METHOD GetIdentifier() override {
98 return SCLEX_LATEX;
99 }
100 };
101
latexIsSpecial(int ch)102 static bool latexIsSpecial(int ch) {
103 return (ch == '#') || (ch == '$') || (ch == '%') || (ch == '&') || (ch == '_') ||
104 (ch == '{') || (ch == '}') || (ch == ' ');
105 }
106
latexIsBlank(int ch)107 static bool latexIsBlank(int ch) {
108 return (ch == ' ') || (ch == '\t');
109 }
110
latexIsBlankAndNL(int ch)111 static bool latexIsBlankAndNL(int ch) {
112 return (ch == ' ') || (ch == '\t') || (ch == '\r') || (ch == '\n');
113 }
114
latexIsLetter(int ch)115 static bool latexIsLetter(int ch) {
116 return IsASCII(ch) && isalpha(ch);
117 }
118
latexIsTagValid(Sci_Position & i,Sci_Position l,Accessor & styler)119 static bool latexIsTagValid(Sci_Position &i, Sci_Position l, Accessor &styler) {
120 while (i < l) {
121 if (styler.SafeGetCharAt(i) == '{') {
122 while (i < l) {
123 i++;
124 if (styler.SafeGetCharAt(i) == '}') {
125 return true;
126 } else if (!latexIsLetter(styler.SafeGetCharAt(i)) &&
127 styler.SafeGetCharAt(i)!='*') {
128 return false;
129 }
130 }
131 } else if (!latexIsBlank(styler.SafeGetCharAt(i))) {
132 return false;
133 }
134 i++;
135 }
136 return false;
137 }
138
latexNextNotBlankIs(Sci_Position i,Accessor & styler,char needle)139 static bool latexNextNotBlankIs(Sci_Position i, Accessor &styler, char needle) {
140 char ch;
141 while (i < styler.Length()) {
142 ch = styler.SafeGetCharAt(i);
143 if (!latexIsBlankAndNL(ch) && ch != '*') {
144 if (ch == needle)
145 return true;
146 else
147 return false;
148 }
149 i++;
150 }
151 return false;
152 }
153
latexLastWordIs(Sci_Position start,Accessor & styler,const char * needle)154 static bool latexLastWordIs(Sci_Position start, Accessor &styler, const char *needle) {
155 Sci_PositionU i = 0;
156 Sci_PositionU l = static_cast<Sci_PositionU>(strlen(needle));
157 Sci_Position ini = start-l+1;
158 char s[32];
159
160 while (i < l && i < 31) {
161 s[i] = styler.SafeGetCharAt(ini + i);
162 i++;
163 }
164 s[i] = '\0';
165
166 return (strcmp(s, needle) == 0);
167 }
168
latexLastWordIsMathEnv(Sci_Position pos,Accessor & styler)169 static bool latexLastWordIsMathEnv(Sci_Position pos, Accessor &styler) {
170 Sci_Position i, j;
171 char s[32];
172 const char *mathEnvs[] = { "align", "alignat", "flalign", "gather",
173 "multiline", "displaymath", "eqnarray", "equation" };
174 if (styler.SafeGetCharAt(pos) != '}') return false;
175 for (i = pos - 1; i >= 0; --i) {
176 if (styler.SafeGetCharAt(i) == '{') break;
177 if (pos - i >= 20) return false;
178 }
179 if (i < 0 || i == pos - 1) return false;
180 ++i;
181 for (j = 0; i + j < pos; ++j)
182 s[j] = styler.SafeGetCharAt(i + j);
183 s[j] = '\0';
184 if (j == 0) return false;
185 if (s[j - 1] == '*') s[--j] = '\0';
186 for (i = 0; i < static_cast<int>(sizeof(mathEnvs) / sizeof(const char *)); ++i)
187 if (strcmp(s, mathEnvs[i]) == 0) return true;
188 return false;
189 }
190
latexStateReset(int & mode,int & state)191 static inline void latexStateReset(int &mode, int &state) {
192 switch (mode) {
193 case 1: state = SCE_L_MATH; break;
194 case 2: state = SCE_L_MATH2; break;
195 default: state = SCE_L_DEFAULT; break;
196 }
197 }
198
199 // There are cases not handled correctly, like $abcd\textrm{what is $x+y$}z+w$.
200 // But I think it's already good enough.
Lex(Sci_PositionU startPos,Sci_Position length,int initStyle,IDocument * pAccess)201 void SCI_METHOD LexerLaTeX::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
202 // startPos is assumed to be the first character of a line
203 Accessor styler(pAccess, &props);
204 styler.StartAt(startPos);
205 int mode = getMode(styler.GetLine(startPos) - 1);
206 int state = initStyle;
207 if (state == SCE_L_ERROR || state == SCE_L_SHORTCMD || state == SCE_L_SPECIAL) // should not happen
208 latexStateReset(mode, state);
209
210 char chNext = styler.SafeGetCharAt(startPos);
211 char chVerbatimDelim = '\0';
212 styler.StartSegment(startPos);
213 Sci_Position lengthDoc = startPos + length;
214
215 for (Sci_Position i = startPos; i < lengthDoc; i++) {
216 char ch = chNext;
217 chNext = styler.SafeGetCharAt(i + 1);
218
219 if (styler.IsLeadByte(ch)) {
220 i++;
221 chNext = styler.SafeGetCharAt(i + 1);
222 continue;
223 }
224
225 if (ch == '\r' || ch == '\n')
226 setMode(styler.GetLine(i), mode);
227
228 switch (state) {
229 case SCE_L_DEFAULT :
230 switch (ch) {
231 case '\\' :
232 styler.ColourTo(i - 1, state);
233 if (latexIsLetter(chNext)) {
234 state = SCE_L_COMMAND;
235 } else if (latexIsSpecial(chNext)) {
236 styler.ColourTo(i + 1, SCE_L_SPECIAL);
237 i++;
238 chNext = styler.SafeGetCharAt(i + 1);
239 } else if (chNext == '\r' || chNext == '\n') {
240 styler.ColourTo(i, SCE_L_ERROR);
241 } else if (IsASCII(chNext)) {
242 styler.ColourTo(i + 1, SCE_L_SHORTCMD);
243 if (chNext == '(') {
244 mode = 1;
245 state = SCE_L_MATH;
246 } else if (chNext == '[') {
247 mode = 2;
248 state = SCE_L_MATH2;
249 }
250 i++;
251 chNext = styler.SafeGetCharAt(i + 1);
252 }
253 break;
254 case '$' :
255 styler.ColourTo(i - 1, state);
256 if (chNext == '$') {
257 styler.ColourTo(i + 1, SCE_L_SHORTCMD);
258 mode = 2;
259 state = SCE_L_MATH2;
260 i++;
261 chNext = styler.SafeGetCharAt(i + 1);
262 } else {
263 styler.ColourTo(i, SCE_L_SHORTCMD);
264 mode = 1;
265 state = SCE_L_MATH;
266 }
267 break;
268 case '%' :
269 styler.ColourTo(i - 1, state);
270 state = SCE_L_COMMENT;
271 break;
272 }
273 break;
274 // These 3 will never be reached.
275 case SCE_L_ERROR:
276 case SCE_L_SPECIAL:
277 case SCE_L_SHORTCMD:
278 break;
279 case SCE_L_COMMAND :
280 if (!latexIsLetter(chNext)) {
281 styler.ColourTo(i, state);
282 if (latexNextNotBlankIs(i + 1, styler, '[' )) {
283 state = SCE_L_CMDOPT;
284 } else if (latexLastWordIs(i, styler, "\\begin")) {
285 state = SCE_L_TAG;
286 } else if (latexLastWordIs(i, styler, "\\end")) {
287 state = SCE_L_TAG2;
288 } else if (latexLastWordIs(i, styler, "\\verb") && chNext != '*' && chNext != ' ') {
289 chVerbatimDelim = chNext;
290 state = SCE_L_VERBATIM;
291 } else {
292 latexStateReset(mode, state);
293 }
294 }
295 break;
296 case SCE_L_CMDOPT :
297 if (ch == ']') {
298 styler.ColourTo(i, state);
299 latexStateReset(mode, state);
300 }
301 break;
302 case SCE_L_TAG :
303 if (latexIsTagValid(i, lengthDoc, styler)) {
304 styler.ColourTo(i, state);
305 latexStateReset(mode, state);
306 if (latexLastWordIs(i, styler, "{verbatim}")) {
307 state = SCE_L_VERBATIM;
308 } else if (latexLastWordIs(i, styler, "{comment}")) {
309 state = SCE_L_COMMENT2;
310 } else if (latexLastWordIs(i, styler, "{math}") && mode == 0) {
311 mode = 1;
312 state = SCE_L_MATH;
313 } else if (latexLastWordIsMathEnv(i, styler) && mode == 0) {
314 mode = 2;
315 state = SCE_L_MATH2;
316 }
317 } else {
318 styler.ColourTo(i, SCE_L_ERROR);
319 latexStateReset(mode, state);
320 ch = styler.SafeGetCharAt(i);
321 if (ch == '\r' || ch == '\n') setMode(styler.GetLine(i), mode);
322 }
323 chNext = styler.SafeGetCharAt(i+1);
324 break;
325 case SCE_L_TAG2 :
326 if (latexIsTagValid(i, lengthDoc, styler)) {
327 styler.ColourTo(i, state);
328 latexStateReset(mode, state);
329 } else {
330 styler.ColourTo(i, SCE_L_ERROR);
331 latexStateReset(mode, state);
332 ch = styler.SafeGetCharAt(i);
333 if (ch == '\r' || ch == '\n') setMode(styler.GetLine(i), mode);
334 }
335 chNext = styler.SafeGetCharAt(i+1);
336 break;
337 case SCE_L_MATH :
338 switch (ch) {
339 case '\\' :
340 styler.ColourTo(i - 1, state);
341 if (latexIsLetter(chNext)) {
342 Sci_Position match = i + 3;
343 if (latexLastWordIs(match, styler, "\\end")) {
344 match++;
345 if (latexIsTagValid(match, lengthDoc, styler)) {
346 if (latexLastWordIs(match, styler, "{math}"))
347 mode = 0;
348 }
349 }
350 state = SCE_L_COMMAND;
351 } else if (latexIsSpecial(chNext)) {
352 styler.ColourTo(i + 1, SCE_L_SPECIAL);
353 i++;
354 chNext = styler.SafeGetCharAt(i + 1);
355 } else if (chNext == '\r' || chNext == '\n') {
356 styler.ColourTo(i, SCE_L_ERROR);
357 } else if (IsASCII(chNext)) {
358 if (chNext == ')') {
359 mode = 0;
360 state = SCE_L_DEFAULT;
361 }
362 styler.ColourTo(i + 1, SCE_L_SHORTCMD);
363 i++;
364 chNext = styler.SafeGetCharAt(i + 1);
365 }
366 break;
367 case '$' :
368 styler.ColourTo(i - 1, state);
369 styler.ColourTo(i, SCE_L_SHORTCMD);
370 mode = 0;
371 state = SCE_L_DEFAULT;
372 break;
373 case '%' :
374 styler.ColourTo(i - 1, state);
375 state = SCE_L_COMMENT;
376 break;
377 }
378 break;
379 case SCE_L_MATH2 :
380 switch (ch) {
381 case '\\' :
382 styler.ColourTo(i - 1, state);
383 if (latexIsLetter(chNext)) {
384 Sci_Position match = i + 3;
385 if (latexLastWordIs(match, styler, "\\end")) {
386 match++;
387 if (latexIsTagValid(match, lengthDoc, styler)) {
388 if (latexLastWordIsMathEnv(match, styler))
389 mode = 0;
390 }
391 }
392 state = SCE_L_COMMAND;
393 } else if (latexIsSpecial(chNext)) {
394 styler.ColourTo(i + 1, SCE_L_SPECIAL);
395 i++;
396 chNext = styler.SafeGetCharAt(i + 1);
397 } else if (chNext == '\r' || chNext == '\n') {
398 styler.ColourTo(i, SCE_L_ERROR);
399 } else if (IsASCII(chNext)) {
400 if (chNext == ']') {
401 mode = 0;
402 state = SCE_L_DEFAULT;
403 }
404 styler.ColourTo(i + 1, SCE_L_SHORTCMD);
405 i++;
406 chNext = styler.SafeGetCharAt(i + 1);
407 }
408 break;
409 case '$' :
410 styler.ColourTo(i - 1, state);
411 if (chNext == '$') {
412 styler.ColourTo(i + 1, SCE_L_SHORTCMD);
413 i++;
414 chNext = styler.SafeGetCharAt(i + 1);
415 mode = 0;
416 state = SCE_L_DEFAULT;
417 } else { // This may not be an error, e.g. \begin{equation}\text{$a$}\end{equation}
418 styler.ColourTo(i, SCE_L_SHORTCMD);
419 }
420 break;
421 case '%' :
422 styler.ColourTo(i - 1, state);
423 state = SCE_L_COMMENT;
424 break;
425 }
426 break;
427 case SCE_L_COMMENT :
428 if (ch == '\r' || ch == '\n') {
429 styler.ColourTo(i - 1, state);
430 latexStateReset(mode, state);
431 }
432 break;
433 case SCE_L_COMMENT2 :
434 if (ch == '\\') {
435 Sci_Position match = i + 3;
436 if (latexLastWordIs(match, styler, "\\end")) {
437 match++;
438 if (latexIsTagValid(match, lengthDoc, styler)) {
439 if (latexLastWordIs(match, styler, "{comment}")) {
440 styler.ColourTo(i - 1, state);
441 state = SCE_L_COMMAND;
442 }
443 }
444 }
445 }
446 break;
447 case SCE_L_VERBATIM :
448 if (ch == '\\') {
449 Sci_Position match = i + 3;
450 if (latexLastWordIs(match, styler, "\\end")) {
451 match++;
452 if (latexIsTagValid(match, lengthDoc, styler)) {
453 if (latexLastWordIs(match, styler, "{verbatim}")) {
454 styler.ColourTo(i - 1, state);
455 state = SCE_L_COMMAND;
456 }
457 }
458 }
459 } else if (chNext == chVerbatimDelim) {
460 styler.ColourTo(i + 1, state);
461 latexStateReset(mode, state);
462 chVerbatimDelim = '\0';
463 i++;
464 chNext = styler.SafeGetCharAt(i + 1);
465 } else if (chVerbatimDelim != '\0' && (ch == '\n' || ch == '\r')) {
466 styler.ColourTo(i, SCE_L_ERROR);
467 latexStateReset(mode, state);
468 chVerbatimDelim = '\0';
469 }
470 break;
471 }
472 }
473 if (lengthDoc == styler.Length()) truncModes(styler.GetLine(lengthDoc - 1));
474 styler.ColourTo(lengthDoc - 1, state);
475 styler.Flush();
476 }
477
latexFoldSaveToInt(const latexFoldSave & save)478 static int latexFoldSaveToInt(const latexFoldSave &save) {
479 int sum = 0;
480 for (int i = 0; i <= save.structLev; ++i)
481 sum += save.openBegins[i];
482 return ((sum + save.structLev + SC_FOLDLEVELBASE) & SC_FOLDLEVELNUMBERMASK);
483 }
484
485 // Change folding state while processing a line
486 // Return the level before the first relevant command
Fold(Sci_PositionU startPos,Sci_Position length,int,IDocument * pAccess)487 void SCI_METHOD LexerLaTeX::Fold(Sci_PositionU startPos, Sci_Position length, int, IDocument *pAccess) {
488 const char *structWords[7] = {"part", "chapter", "section", "subsection",
489 "subsubsection", "paragraph", "subparagraph"};
490 Accessor styler(pAccess, &props);
491 Sci_PositionU endPos = startPos + length;
492 Sci_Position curLine = styler.GetLine(startPos);
493 latexFoldSave save;
494 getSave(curLine - 1, save);
495 do {
496 char ch, buf[16];
497 Sci_Position i, j;
498 int lev = -1;
499 bool needFold = false;
500 for (i = static_cast<Sci_Position>(startPos); i < static_cast<Sci_Position>(endPos); ++i) {
501 ch = styler.SafeGetCharAt(i);
502 if (ch == '\r' || ch == '\n') break;
503 if (ch != '\\' || styler.StyleAt(i) != SCE_L_COMMAND) continue;
504 for (j = 0; j < 15 && i + 1 < static_cast<Sci_Position>(endPos); ++j, ++i) {
505 buf[j] = styler.SafeGetCharAt(i + 1);
506 if (!latexIsLetter(buf[j])) break;
507 }
508 buf[j] = '\0';
509 if (strcmp(buf, "begin") == 0) {
510 if (lev < 0) lev = latexFoldSaveToInt(save);
511 ++save.openBegins[save.structLev];
512 needFold = true;
513 }
514 else if (strcmp(buf, "end") == 0) {
515 while (save.structLev > 0 && save.openBegins[save.structLev] == 0)
516 --save.structLev;
517 if (lev < 0) lev = latexFoldSaveToInt(save);
518 if (save.openBegins[save.structLev] > 0) --save.openBegins[save.structLev];
519 }
520 else {
521 for (j = 0; j < 7; ++j)
522 if (strcmp(buf, structWords[j]) == 0) break;
523 if (j >= 7) continue;
524 save.structLev = j; // level before the command
525 for (j = save.structLev + 1; j < 8; ++j) {
526 save.openBegins[save.structLev] += save.openBegins[j];
527 save.openBegins[j] = 0;
528 }
529 if (lev < 0) lev = latexFoldSaveToInt(save);
530 ++save.structLev; // level after the command
531 needFold = true;
532 }
533 }
534 if (lev < 0) lev = latexFoldSaveToInt(save);
535 if (needFold) lev |= SC_FOLDLEVELHEADERFLAG;
536 styler.SetLevel(curLine, lev);
537 setSave(curLine, save);
538 ++curLine;
539 startPos = styler.LineStart(curLine);
540 if (static_cast<Sci_Position>(startPos) == styler.Length()) {
541 lev = latexFoldSaveToInt(save);
542 styler.SetLevel(curLine, lev);
543 setSave(curLine, save);
544 truncSaves(curLine);
545 }
546 } while (startPos < endPos);
547 styler.Flush();
548 }
549
550 static const char *const emptyWordListDesc[] = {
551 0
552 };
553
554 LexerModule lmLatex(SCLEX_LATEX, LexerLaTeX::LexerFactoryLaTeX, "latex", emptyWordListDesc);
555