1 // Scintilla source code edit control
2 /** @file LexLaTeX.cxx
3 ** Lexer for LaTeX2e.
4 **/
5 // Copyright 1998-2001 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
7
8 // Modified by G. HU in 2013. Added folding, syntax highting inside math environments, and changed some minor behaviors.
9
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdio.h>
13 #include <stdarg.h>
14 #include <assert.h>
15 #include <ctype.h>
16 #include <vector>
17
18 #include "ILexer.h"
19 #include "Scintilla.h"
20 #include "SciLexer.h"
21
22 #include "PropSetSimple.h"
23 #include "WordList.h"
24 #include "LexAccessor.h"
25 #include "Accessor.h"
26 #include "StyleContext.h"
27 #include "CharacterSet.h"
28 #include "LexerModule.h"
29 #include "DefaultLexer.h"
30 #include "LexerBase.h"
31
32 using namespace Scintilla;
33
34 using namespace std;
35
36 struct latexFoldSave {
latexFoldSavelatexFoldSave37 latexFoldSave() : structLev(0) {
38 for (int i = 0; i < 8; ++i) openBegins[i] = 0;
39 }
latexFoldSavelatexFoldSave40 latexFoldSave(const latexFoldSave &save) : structLev(save.structLev) {
41 for (int i = 0; i < 8; ++i) openBegins[i] = save.openBegins[i];
42 }
operator =latexFoldSave43 latexFoldSave &operator=(const latexFoldSave &save) {
44 if (this != &save) {
45 structLev = save.structLev;
46 for (int i = 0; i < 8; ++i) openBegins[i] = save.openBegins[i];
47 }
48 return *this;
49 }
50 int openBegins[8];
51 Sci_Position structLev;
52 };
53
54 class LexerLaTeX : public LexerBase {
55 private:
56 vector<int> modes;
setMode(Sci_Position line,int mode)57 void setMode(Sci_Position line, int mode) {
58 if (line >= static_cast<Sci_Position>(modes.size())) modes.resize(line + 1, 0);
59 modes[line] = mode;
60 }
getMode(Sci_Position line)61 int getMode(Sci_Position line) {
62 if (line >= 0 && line < static_cast<Sci_Position>(modes.size())) return modes[line];
63 return 0;
64 }
truncModes(Sci_Position numLines)65 void truncModes(Sci_Position numLines) {
66 if (static_cast<Sci_Position>(modes.size()) > numLines * 2 + 256)
67 modes.resize(numLines + 128);
68 }
69
70 vector<latexFoldSave> saves;
setSave(Sci_Position line,const latexFoldSave & save)71 void setSave(Sci_Position line, const latexFoldSave &save) {
72 if (line >= static_cast<Sci_Position>(saves.size())) saves.resize(line + 1);
73 saves[line] = save;
74 }
getSave(Sci_Position line,latexFoldSave & save)75 void getSave(Sci_Position line, latexFoldSave &save) {
76 if (line >= 0 && line < static_cast<Sci_Position>(saves.size())) save = saves[line];
77 else {
78 save.structLev = 0;
79 for (int i = 0; i < 8; ++i) save.openBegins[i] = 0;
80 }
81 }
truncSaves(Sci_Position numLines)82 void truncSaves(Sci_Position numLines) {
83 if (static_cast<Sci_Position>(saves.size()) > numLines * 2 + 256)
84 saves.resize(numLines + 128);
85 }
86 public:
LexerFactoryLaTeX()87 static ILexer5 *LexerFactoryLaTeX() {
88 return new LexerLaTeX();
89 }
90 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
91 void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
92
93 // ILexer5 methods
GetName()94 const char * SCI_METHOD GetName() override {
95 return "latex";
96 }
GetIdentifier()97 int SCI_METHOD GetIdentifier() override {
98 return SCLEX_LATEX;
99 }
100 };
101
latexIsSpecial(int ch)102 static bool latexIsSpecial(int ch) {
103 return (ch == '#') || (ch == '$') || (ch == '%') || (ch == '&') || (ch == '_') ||
104 (ch == '{') || (ch == '}') || (ch == ' ');
105 }
106
latexIsBlank(int ch)107 static bool latexIsBlank(int ch) {
108 return (ch == ' ') || (ch == '\t');
109 }
110
latexIsBlankAndNL(int ch)111 static bool latexIsBlankAndNL(int ch) {
112 return (ch == ' ') || (ch == '\t') || (ch == '\r') || (ch == '\n');
113 }
114
latexIsLetter(int ch)115 static bool latexIsLetter(int ch) {
116 return IsASCII(ch) && isalpha(ch);
117 }
118
latexIsTagValid(Sci_Position & i,Sci_Position l,Accessor & styler)119 static bool latexIsTagValid(Sci_Position &i, Sci_Position l, Accessor &styler) {
120 while (i < l) {
121 if (styler.SafeGetCharAt(i) == '{') {
122 while (i < l) {
123 i++;
124 if (styler.SafeGetCharAt(i) == '}') {
125 return true;
126 } else if (!latexIsLetter(styler.SafeGetCharAt(i)) &&
127 styler.SafeGetCharAt(i)!='*') {
128 return false;
129 }
130 }
131 } else if (!latexIsBlank(styler.SafeGetCharAt(i))) {
132 return false;
133 }
134 i++;
135 }
136 return false;
137 }
138
latexNextNotBlankIs(Sci_Position i,Accessor & styler,char needle)139 static bool latexNextNotBlankIs(Sci_Position i, Accessor &styler, char needle) {
140 char ch;
141 while (i < styler.Length()) {
142 ch = styler.SafeGetCharAt(i);
143 if (!latexIsBlankAndNL(ch) && ch != '*') {
144 if (ch == needle)
145 return true;
146 else
147 return false;
148 }
149 i++;
150 }
151 return false;
152 }
153
latexLastWordIs(Sci_Position start,Accessor & styler,const char * needle)154 static bool latexLastWordIs(Sci_Position start, Accessor &styler, const char *needle) {
155 Sci_PositionU i = 0;
156 Sci_PositionU l = static_cast<Sci_PositionU>(strlen(needle));
157 Sci_Position ini = start-l+1;
158 char s[32];
159
160 while (i < l && i < 31) {
161 s[i] = styler.SafeGetCharAt(ini + i);
162 i++;
163 }
164 s[i] = '\0';
165
166 return (strcmp(s, needle) == 0);
167 }
168
latexLastWordIsMathEnv(Sci_Position pos,Accessor & styler)169 static bool latexLastWordIsMathEnv(Sci_Position pos, Accessor &styler) {
170 Sci_Position i, j;
171 char s[32];
172 const char *mathEnvs[] = { "align", "alignat", "flalign", "gather",
173 "multiline", "displaymath", "eqnarray", "equation" };
174 if (styler.SafeGetCharAt(pos) != '}') return false;
175 for (i = pos - 1; i >= 0; --i) {
176 if (styler.SafeGetCharAt(i) == '{') break;
177 if (pos - i >= 20) return false;
178 }
179 if (i < 0 || i == pos - 1) return false;
180 ++i;
181 for (j = 0; i + j < pos; ++j)
182 s[j] = styler.SafeGetCharAt(i + j);
183 s[j] = '\0';
184 if (j == 0) return false;
185 if (s[j - 1] == '*') s[--j] = '\0';
186 for (i = 0; i < static_cast<int>(sizeof(mathEnvs) / sizeof(const char *)); ++i)
187 if (strcmp(s, mathEnvs[i]) == 0) return true;
188 return false;
189 }
190
latexStateReset(int & mode,int & state)191 static inline void latexStateReset(int &mode, int &state) {
192 switch (mode) {
193 case 1: state = SCE_L_MATH; break;
194 case 2: state = SCE_L_MATH2; break;
195 default: state = SCE_L_DEFAULT; break;
196 }
197 }
198
199 // There are cases not handled correctly, like $abcd\textrm{what is $x+y$}z+w$.
200 // But I think it's already good enough.
Lex(Sci_PositionU startPos,Sci_Position length,int initStyle,IDocument * pAccess)201 void SCI_METHOD LexerLaTeX::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
202 // startPos is assumed to be the first character of a line
203 Accessor styler(pAccess, &props);
204 styler.StartAt(startPos);
205 int mode = getMode(styler.GetLine(startPos) - 1);
206 int state = initStyle;
207 if (state == SCE_L_ERROR || state == SCE_L_SHORTCMD || state == SCE_L_SPECIAL) // should not happen
208 latexStateReset(mode, state);
209
210 char chNext = styler.SafeGetCharAt(startPos);
211 char chVerbatimDelim = '\0';
212 styler.StartSegment(startPos);
213 Sci_Position lengthDoc = startPos + length;
214
215 for (Sci_Position i = startPos; i < lengthDoc; i++) {
216 char ch = chNext;
217 chNext = styler.SafeGetCharAt(i + 1);
218
219 if (styler.IsLeadByte(ch)) {
220 i++;
221 chNext = styler.SafeGetCharAt(i + 1);
222 continue;
223 }
224
225 if (ch == '\r' || ch == '\n')
226 setMode(styler.GetLine(i), mode);
227
228 switch (state) {
229 case SCE_L_DEFAULT :
230 switch (ch) {
231 case '\\' :
232 styler.ColourTo(i - 1, state);
233 if (latexIsLetter(chNext)) {
234 state = SCE_L_COMMAND;
235 } else if (latexIsSpecial(chNext)) {
236 styler.ColourTo(i + 1, SCE_L_SPECIAL);
237 i++;
238 chNext = styler.SafeGetCharAt(i + 1);
239 } else if (chNext == '\r' || chNext == '\n') {
240 styler.ColourTo(i, SCE_L_ERROR);
241 } else if (IsASCII(chNext)) {
242 styler.ColourTo(i + 1, SCE_L_SHORTCMD);
243 if (chNext == '(') {
244 mode = 1;
245 state = SCE_L_MATH;
246 } else if (chNext == '[') {
247 mode = 2;
248 state = SCE_L_MATH2;
249 }
250 i++;
251 chNext = styler.SafeGetCharAt(i + 1);
252 }
253 break;
254 case '$' :
255 styler.ColourTo(i - 1, state);
256 if (chNext == '$') {
257 styler.ColourTo(i + 1, SCE_L_SHORTCMD);
258 mode = 2;
259 state = SCE_L_MATH2;
260 i++;
261 chNext = styler.SafeGetCharAt(i + 1);
262 } else {
263 styler.ColourTo(i, SCE_L_SHORTCMD);
264 mode = 1;
265 state = SCE_L_MATH;
266 }
267 break;
268 case '%' :
269 styler.ColourTo(i - 1, state);
270 state = SCE_L_COMMENT;
271 break;
272 }
273 break;
274 // These 3 will never be reached.
275 case SCE_L_ERROR:
276 case SCE_L_SPECIAL:
277 case SCE_L_SHORTCMD:
278 break;
279 case SCE_L_COMMAND :
280 if (!latexIsLetter(chNext)) {
281 styler.ColourTo(i, state);
282 if (latexNextNotBlankIs(i + 1, styler, '[' )) {
283 state = SCE_L_CMDOPT;
284 } else if (latexLastWordIs(i, styler, "\\begin")) {
285 state = SCE_L_TAG;
286 } else if (latexLastWordIs(i, styler, "\\end")) {
287 state = SCE_L_TAG2;
288 } else if (latexLastWordIs(i, styler, "\\verb") && chNext != '*' && chNext != ' ') {
289 chVerbatimDelim = chNext;
290 state = SCE_L_VERBATIM;
291 } else {
292 latexStateReset(mode, state);
293 }
294 }
295 break;
296 case SCE_L_CMDOPT :
297 if (ch == ']') {
298 styler.ColourTo(i, state);
299 latexStateReset(mode, state);
300 }
301 break;
302 case SCE_L_TAG :
303 if (latexIsTagValid(i, lengthDoc, styler)) {
304 styler.ColourTo(i, state);
305 latexStateReset(mode, state);
306 if (latexLastWordIs(i, styler, "{verbatim}")) {
307 state = SCE_L_VERBATIM;
308 } else if (latexLastWordIs(i, styler, "{lstlisting}")) {
309 state = SCE_L_VERBATIM;
310 } else if (latexLastWordIs(i, styler, "{comment}")) {
311 state = SCE_L_COMMENT2;
312 } else if (latexLastWordIs(i, styler, "{math}") && mode == 0) {
313 mode = 1;
314 state = SCE_L_MATH;
315 } else if (latexLastWordIsMathEnv(i, styler) && mode == 0) {
316 mode = 2;
317 state = SCE_L_MATH2;
318 }
319 } else {
320 styler.ColourTo(i, SCE_L_ERROR);
321 latexStateReset(mode, state);
322 ch = styler.SafeGetCharAt(i);
323 if (ch == '\r' || ch == '\n') setMode(styler.GetLine(i), mode);
324 }
325 chNext = styler.SafeGetCharAt(i+1);
326 break;
327 case SCE_L_TAG2 :
328 if (latexIsTagValid(i, lengthDoc, styler)) {
329 styler.ColourTo(i, state);
330 latexStateReset(mode, state);
331 } else {
332 styler.ColourTo(i, SCE_L_ERROR);
333 latexStateReset(mode, state);
334 ch = styler.SafeGetCharAt(i);
335 if (ch == '\r' || ch == '\n') setMode(styler.GetLine(i), mode);
336 }
337 chNext = styler.SafeGetCharAt(i+1);
338 break;
339 case SCE_L_MATH :
340 switch (ch) {
341 case '\\' :
342 styler.ColourTo(i - 1, state);
343 if (latexIsLetter(chNext)) {
344 Sci_Position match = i + 3;
345 if (latexLastWordIs(match, styler, "\\end")) {
346 match++;
347 if (latexIsTagValid(match, lengthDoc, styler)) {
348 if (latexLastWordIs(match, styler, "{math}"))
349 mode = 0;
350 }
351 }
352 state = SCE_L_COMMAND;
353 } else if (latexIsSpecial(chNext)) {
354 styler.ColourTo(i + 1, SCE_L_SPECIAL);
355 i++;
356 chNext = styler.SafeGetCharAt(i + 1);
357 } else if (chNext == '\r' || chNext == '\n') {
358 styler.ColourTo(i, SCE_L_ERROR);
359 } else if (IsASCII(chNext)) {
360 if (chNext == ')') {
361 mode = 0;
362 state = SCE_L_DEFAULT;
363 }
364 styler.ColourTo(i + 1, SCE_L_SHORTCMD);
365 i++;
366 chNext = styler.SafeGetCharAt(i + 1);
367 }
368 break;
369 case '$' :
370 styler.ColourTo(i - 1, state);
371 styler.ColourTo(i, SCE_L_SHORTCMD);
372 mode = 0;
373 state = SCE_L_DEFAULT;
374 break;
375 case '%' :
376 styler.ColourTo(i - 1, state);
377 state = SCE_L_COMMENT;
378 break;
379 }
380 break;
381 case SCE_L_MATH2 :
382 switch (ch) {
383 case '\\' :
384 styler.ColourTo(i - 1, state);
385 if (latexIsLetter(chNext)) {
386 Sci_Position match = i + 3;
387 if (latexLastWordIs(match, styler, "\\end")) {
388 match++;
389 if (latexIsTagValid(match, lengthDoc, styler)) {
390 if (latexLastWordIsMathEnv(match, styler))
391 mode = 0;
392 }
393 }
394 state = SCE_L_COMMAND;
395 } else if (latexIsSpecial(chNext)) {
396 styler.ColourTo(i + 1, SCE_L_SPECIAL);
397 i++;
398 chNext = styler.SafeGetCharAt(i + 1);
399 } else if (chNext == '\r' || chNext == '\n') {
400 styler.ColourTo(i, SCE_L_ERROR);
401 } else if (IsASCII(chNext)) {
402 if (chNext == ']') {
403 mode = 0;
404 state = SCE_L_DEFAULT;
405 }
406 styler.ColourTo(i + 1, SCE_L_SHORTCMD);
407 i++;
408 chNext = styler.SafeGetCharAt(i + 1);
409 }
410 break;
411 case '$' :
412 styler.ColourTo(i - 1, state);
413 if (chNext == '$') {
414 styler.ColourTo(i + 1, SCE_L_SHORTCMD);
415 i++;
416 chNext = styler.SafeGetCharAt(i + 1);
417 mode = 0;
418 state = SCE_L_DEFAULT;
419 } else { // This may not be an error, e.g. \begin{equation}\text{$a$}\end{equation}
420 styler.ColourTo(i, SCE_L_SHORTCMD);
421 }
422 break;
423 case '%' :
424 styler.ColourTo(i - 1, state);
425 state = SCE_L_COMMENT;
426 break;
427 }
428 break;
429 case SCE_L_COMMENT :
430 if (ch == '\r' || ch == '\n') {
431 styler.ColourTo(i - 1, state);
432 latexStateReset(mode, state);
433 }
434 break;
435 case SCE_L_COMMENT2 :
436 if (ch == '\\') {
437 Sci_Position match = i + 3;
438 if (latexLastWordIs(match, styler, "\\end")) {
439 match++;
440 if (latexIsTagValid(match, lengthDoc, styler)) {
441 if (latexLastWordIs(match, styler, "{comment}")) {
442 styler.ColourTo(i - 1, state);
443 state = SCE_L_COMMAND;
444 }
445 }
446 }
447 }
448 break;
449 case SCE_L_VERBATIM :
450 if (ch == '\\') {
451 Sci_Position match = i + 3;
452 if (latexLastWordIs(match, styler, "\\end")) {
453 match++;
454 if (latexIsTagValid(match, lengthDoc, styler)) {
455 if (latexLastWordIs(match, styler, "{verbatim}")) {
456 styler.ColourTo(i - 1, state);
457 state = SCE_L_COMMAND;
458 } else if (latexLastWordIs(match, styler, "{lstlisting}")) {
459 styler.ColourTo(i - 1, state);
460 state = SCE_L_COMMAND;
461 }
462 }
463 }
464 } else if (chNext == chVerbatimDelim) {
465 styler.ColourTo(i + 1, state);
466 latexStateReset(mode, state);
467 chVerbatimDelim = '\0';
468 i++;
469 chNext = styler.SafeGetCharAt(i + 1);
470 } else if (chVerbatimDelim != '\0' && (ch == '\n' || ch == '\r')) {
471 styler.ColourTo(i, SCE_L_ERROR);
472 latexStateReset(mode, state);
473 chVerbatimDelim = '\0';
474 }
475 break;
476 }
477 }
478 if (lengthDoc == styler.Length()) truncModes(styler.GetLine(lengthDoc - 1));
479 styler.ColourTo(lengthDoc - 1, state);
480 styler.Flush();
481 }
482
latexFoldSaveToInt(const latexFoldSave & save)483 static int latexFoldSaveToInt(const latexFoldSave &save) {
484 int sum = 0;
485 for (int i = 0; i <= save.structLev; ++i)
486 sum += save.openBegins[i];
487 return ((sum + save.structLev + SC_FOLDLEVELBASE) & SC_FOLDLEVELNUMBERMASK);
488 }
489
490 // Change folding state while processing a line
491 // Return the level before the first relevant command
Fold(Sci_PositionU startPos,Sci_Position length,int,IDocument * pAccess)492 void SCI_METHOD LexerLaTeX::Fold(Sci_PositionU startPos, Sci_Position length, int, IDocument *pAccess) {
493 const char *structWords[7] = {"part", "chapter", "section", "subsection",
494 "subsubsection", "paragraph", "subparagraph"};
495 Accessor styler(pAccess, &props);
496 Sci_PositionU endPos = startPos + length;
497 Sci_Position curLine = styler.GetLine(startPos);
498 latexFoldSave save;
499 getSave(curLine - 1, save);
500 do {
501 char ch, buf[16];
502 Sci_Position i, j;
503 int lev = -1;
504 bool needFold = false;
505 for (i = static_cast<Sci_Position>(startPos); i < static_cast<Sci_Position>(endPos); ++i) {
506 ch = styler.SafeGetCharAt(i);
507 if (ch == '\r' || ch == '\n') break;
508 if (ch != '\\' || styler.StyleAt(i) != SCE_L_COMMAND) continue;
509 for (j = 0; j < 15 && i + 1 < static_cast<Sci_Position>(endPos); ++j, ++i) {
510 buf[j] = styler.SafeGetCharAt(i + 1);
511 if (!latexIsLetter(buf[j])) break;
512 }
513 buf[j] = '\0';
514 if (strcmp(buf, "begin") == 0) {
515 if (lev < 0) lev = latexFoldSaveToInt(save);
516 ++save.openBegins[save.structLev];
517 needFold = true;
518 }
519 else if (strcmp(buf, "end") == 0) {
520 while (save.structLev > 0 && save.openBegins[save.structLev] == 0)
521 --save.structLev;
522 if (lev < 0) lev = latexFoldSaveToInt(save);
523 if (save.openBegins[save.structLev] > 0) --save.openBegins[save.structLev];
524 }
525 else {
526 for (j = 0; j < 7; ++j)
527 if (strcmp(buf, structWords[j]) == 0) break;
528 if (j >= 7) continue;
529 save.structLev = j; // level before the command
530 for (j = save.structLev + 1; j < 8; ++j) {
531 save.openBegins[save.structLev] += save.openBegins[j];
532 save.openBegins[j] = 0;
533 }
534 if (lev < 0) lev = latexFoldSaveToInt(save);
535 ++save.structLev; // level after the command
536 needFold = true;
537 }
538 }
539 if (lev < 0) lev = latexFoldSaveToInt(save);
540 if (needFold) lev |= SC_FOLDLEVELHEADERFLAG;
541 styler.SetLevel(curLine, lev);
542 setSave(curLine, save);
543 ++curLine;
544 startPos = styler.LineStart(curLine);
545 if (static_cast<Sci_Position>(startPos) == styler.Length()) {
546 lev = latexFoldSaveToInt(save);
547 styler.SetLevel(curLine, lev);
548 setSave(curLine, save);
549 truncSaves(curLine);
550 }
551 } while (startPos < endPos);
552 styler.Flush();
553 }
554
555 static const char *const emptyWordListDesc[] = {
556 0
557 };
558
559 LexerModule lmLatex(SCLEX_LATEX, LexerLaTeX::LexerFactoryLaTeX, "latex", emptyWordListDesc);
560