1 // Scintilla source code edit control
2 
3 // File: LexTeX.cxx - general context conformant tex coloring scheme
4 // Author: Hans Hagen - PRAGMA ADE - Hasselt NL - www.pragma-ade.com
5 // Version: September 28, 2003
6 
7 // Copyright: 1998-2003 by Neil Hodgson <neilh@scintilla.org>
8 // The License.txt file describes the conditions under which this software may be distributed.
9 
10 // This lexer is derived from the one written for the texwork environment (1999++) which in
11 // turn is inspired on texedit (1991++) which finds its roots in wdt (1986).
12 
13 // If you run into strange boundary cases, just tell me and I'll look into it.
14 
15 
16 // TeX Folding code added by instanton (soft_share@126.com) with borrowed code from VisualTeX source by Alex Romanenko.
17 // Version: June 22, 2007
18 
19 #include <stdlib.h>
20 #include <string.h>
21 #include <stdio.h>
22 #include <stdarg.h>
23 #include <assert.h>
24 #include <ctype.h>
25 
26 #include "ILexer.h"
27 #include "Scintilla.h"
28 #include "SciLexer.h"
29 
30 #include "WordList.h"
31 #include "LexAccessor.h"
32 #include "Accessor.h"
33 #include "StyleContext.h"
34 #include "CharacterSet.h"
35 #include "LexerModule.h"
36 
37 using namespace Scintilla;
38 
39 // val SCE_TEX_DEFAULT = 0
40 // val SCE_TEX_SPECIAL = 1
41 // val SCE_TEX_GROUP   = 2
42 // val SCE_TEX_SYMBOL  = 3
43 // val SCE_TEX_COMMAND = 4
44 // val SCE_TEX_TEXT    = 5
45 
46 // Definitions in SciTEGlobal.properties:
47 //
48 // TeX Highlighting
49 //
50 // # Default
51 // style.tex.0=fore:#7F7F00
52 // # Special
53 // style.tex.1=fore:#007F7F
54 // # Group
55 // style.tex.2=fore:#880000
56 // # Symbol
57 // style.tex.3=fore:#7F7F00
58 // # Command
59 // style.tex.4=fore:#008800
60 // # Text
61 // style.tex.5=fore:#000000
62 
63 // lexer.tex.interface.default=0
64 // lexer.tex.comment.process=0
65 
66 // todo: lexer.tex.auto.if
67 
68 // Auxiliary functions:
69 
endOfLine(Accessor & styler,Sci_PositionU i)70 static inline bool endOfLine(Accessor &styler, Sci_PositionU i) {
71 	return
72       (styler[i] == '\n') || ((styler[i] == '\r') && (styler.SafeGetCharAt(i + 1) != '\n')) ;
73 }
74 
isTeXzero(int ch)75 static inline bool isTeXzero(int ch) {
76 	return
77       (ch == '%') ;
78 }
79 
isTeXone(int ch)80 static inline bool isTeXone(int ch) {
81 	return
82       (ch == '[') || (ch == ']') || (ch == '=') || (ch == '#') ||
83       (ch == '(') || (ch == ')') || (ch == '<') || (ch == '>') ||
84       (ch == '"') ;
85 }
86 
isTeXtwo(int ch)87 static inline bool isTeXtwo(int ch) {
88 	return
89       (ch == '{') || (ch == '}') || (ch == '$') ;
90 }
91 
isTeXthree(int ch)92 static inline bool isTeXthree(int ch) {
93 	return
94       (ch == '~') || (ch == '^') || (ch == '_') || (ch == '&') ||
95       (ch == '-') || (ch == '+') || (ch == '\"') || (ch == '`') ||
96       (ch == '/') || (ch == '|') || (ch == '%') ;
97 }
98 
isTeXfour(int ch)99 static inline bool isTeXfour(int ch) {
100 	return
101       (ch == '\\') ;
102 }
103 
isTeXfive(int ch)104 static inline bool isTeXfive(int ch) {
105 	return
106       ((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z')) ||
107       (ch == '@') || (ch == '!') || (ch == '?') ;
108 }
109 
isTeXsix(int ch)110 static inline bool isTeXsix(int ch) {
111 	return
112       (ch == ' ') ;
113 }
114 
isTeXseven(int ch)115 static inline bool isTeXseven(int ch) {
116 	return
117       (ch == '^') ;
118 }
119 
120 // Interface determination
121 
CheckTeXInterface(Sci_PositionU startPos,Sci_Position length,Accessor & styler,int defaultInterface)122 static int CheckTeXInterface(
123     Sci_PositionU startPos,
124     Sci_Position length,
125     Accessor &styler,
126 	int defaultInterface) {
127 
128     char lineBuffer[1024] ;
129 	Sci_PositionU linePos = 0 ;
130 
131     // some day we can make something lexer.tex.mapping=(all,0)(nl,1)(en,2)...
132 
133     if (styler.SafeGetCharAt(0) == '%') {
134         for (Sci_PositionU i = 0; i < startPos + length; i++) {
135             lineBuffer[linePos++] = styler.SafeGetCharAt(i) ;
136             if (endOfLine(styler, i) || (linePos >= sizeof(lineBuffer) - 1)) {
137                 lineBuffer[linePos] = '\0';
138                 if (strstr(lineBuffer, "interface=all")) {
139                     return 0 ;
140 				} else if (strstr(lineBuffer, "interface=tex")) {
141                     return 1 ;
142                 } else if (strstr(lineBuffer, "interface=nl")) {
143                     return 2 ;
144                 } else if (strstr(lineBuffer, "interface=en")) {
145                     return 3 ;
146                 } else if (strstr(lineBuffer, "interface=de")) {
147                     return 4 ;
148                 } else if (strstr(lineBuffer, "interface=cz")) {
149                     return 5 ;
150                 } else if (strstr(lineBuffer, "interface=it")) {
151                     return 6 ;
152                 } else if (strstr(lineBuffer, "interface=ro")) {
153                     return 7 ;
154                 } else if (strstr(lineBuffer, "interface=latex")) {
155 					// we will move latex cum suis up to 91+ when more keyword lists are supported
156                     return 8 ;
157 				} else if (styler.SafeGetCharAt(1) == 'D' && strstr(lineBuffer, "%D \\module")) {
158 					// better would be to limit the search to just one line
159 					return 3 ;
160                 } else {
161                     return defaultInterface ;
162                 }
163             }
164 		}
165     }
166 
167     return defaultInterface ;
168 }
169 
ColouriseTeXDoc(Sci_PositionU startPos,Sci_Position length,int,WordList * keywordlists[],Accessor & styler)170 static void ColouriseTeXDoc(
171     Sci_PositionU startPos,
172     Sci_Position length,
173     int,
174     WordList *keywordlists[],
175     Accessor &styler) {
176 
177 	styler.StartAt(startPos) ;
178 	styler.StartSegment(startPos) ;
179 
180 	bool processComment   = styler.GetPropertyInt("lexer.tex.comment.process",   0) == 1 ;
181 	bool useKeywords      = styler.GetPropertyInt("lexer.tex.use.keywords",      1) == 1 ;
182 	bool autoIf           = styler.GetPropertyInt("lexer.tex.auto.if",           1) == 1 ;
183 	int  defaultInterface = styler.GetPropertyInt("lexer.tex.interface.default", 1) ;
184 
185 	char key[100] ;
186 	int  k ;
187 	bool newifDone = false ;
188 	bool inComment = false ;
189 
190 	int currentInterface = CheckTeXInterface(startPos,length,styler,defaultInterface) ;
191 
192     if (currentInterface == 0) {
193         useKeywords = false ;
194         currentInterface = 1 ;
195     }
196 
197     WordList &keywords = *keywordlists[currentInterface-1] ;
198 
199 	StyleContext sc(startPos, length, SCE_TEX_TEXT, styler);
200 
201 	bool going = sc.More() ; // needed because of a fuzzy end of file state
202 
203 	for (; going; sc.Forward()) {
204 
205 		if (! sc.More()) { going = false ; } // we need to go one behind the end of text
206 
207 		if (inComment) {
208 			if (sc.atLineEnd) {
209 				sc.SetState(SCE_TEX_TEXT) ;
210 				newifDone = false ;
211 				inComment = false ;
212 			}
213 		} else {
214 			if (! isTeXfive(sc.ch)) {
215 				if (sc.state == SCE_TEX_COMMAND) {
216 					if (sc.LengthCurrent() == 1) { // \<noncstoken>
217 						if (isTeXseven(sc.ch) && isTeXseven(sc.chNext)) {
218 							sc.Forward(2) ; // \^^ and \^^<token>
219 						}
220 						sc.ForwardSetState(SCE_TEX_TEXT) ;
221 					} else {
222 						sc.GetCurrent(key, sizeof(key)-1) ;
223 						k = static_cast<int>(strlen(key)) ;
224 						memmove(key,key+1,k) ; // shift left over escape token
225 						key[k] = '\0' ;
226 						k-- ;
227 						if (! keywords || ! useKeywords) {
228 							sc.SetState(SCE_TEX_COMMAND) ;
229 							newifDone = false ;
230 						} else if (k == 1) { //\<cstoken>
231 							sc.SetState(SCE_TEX_COMMAND) ;
232 							newifDone = false ;
233 						} else if (keywords.InList(key)) {
234     						sc.SetState(SCE_TEX_COMMAND) ;
235 							newifDone = autoIf && (strcmp(key,"newif") == 0) ;
236 						} else if (autoIf && ! newifDone && (key[0] == 'i') && (key[1] == 'f') && keywords.InList("if")) {
237 	    					sc.SetState(SCE_TEX_COMMAND) ;
238 						} else {
239 							sc.ChangeState(SCE_TEX_TEXT) ;
240 							sc.SetState(SCE_TEX_TEXT) ;
241 							newifDone = false ;
242 						}
243 					}
244 				}
245 				if (isTeXzero(sc.ch)) {
246 					sc.SetState(SCE_TEX_SYMBOL);
247 
248 					if (!endOfLine(styler,sc.currentPos + 1))
249 						sc.ForwardSetState(SCE_TEX_DEFAULT) ;
250 
251 					inComment = ! processComment ;
252 					newifDone = false ;
253 				} else if (isTeXseven(sc.ch) && isTeXseven(sc.chNext)) {
254 					sc.SetState(SCE_TEX_TEXT) ;
255 					sc.ForwardSetState(SCE_TEX_TEXT) ;
256 				} else if (isTeXone(sc.ch)) {
257 					sc.SetState(SCE_TEX_SPECIAL) ;
258 					newifDone = false ;
259 				} else if (isTeXtwo(sc.ch)) {
260 					sc.SetState(SCE_TEX_GROUP) ;
261 					newifDone = false ;
262 				} else if (isTeXthree(sc.ch)) {
263 					sc.SetState(SCE_TEX_SYMBOL) ;
264 					newifDone = false ;
265 				} else if (isTeXfour(sc.ch)) {
266 					sc.SetState(SCE_TEX_COMMAND) ;
267 				} else if (isTeXsix(sc.ch)) {
268 					sc.SetState(SCE_TEX_TEXT) ;
269 				} else if (sc.atLineEnd) {
270 					sc.SetState(SCE_TEX_TEXT) ;
271 					newifDone = false ;
272 					inComment = false ;
273 				} else {
274 					sc.SetState(SCE_TEX_TEXT) ;
275 				}
276 			} else if (sc.state != SCE_TEX_COMMAND) {
277 				sc.SetState(SCE_TEX_TEXT) ;
278 			}
279 		}
280 	}
281 	sc.ChangeState(SCE_TEX_TEXT) ;
282 	sc.Complete();
283 
284 }
285 
286 
isNumber(int ch)287 static inline bool isNumber(int ch) {
288 	return
289       (ch == '0') || (ch == '1') || (ch == '2') ||
290       (ch == '3') || (ch == '4') || (ch == '5') ||
291       (ch == '6') || (ch == '7') || (ch == '8') || (ch == '9');
292 }
293 
isWordChar(int ch)294 static inline bool isWordChar(int ch) {
295 	return ((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z'));
296 }
297 
ParseTeXCommand(Sci_PositionU pos,Accessor & styler,char * command)298 static Sci_Position ParseTeXCommand(Sci_PositionU pos, Accessor &styler, char *command)
299 {
300   Sci_Position length=0;
301   char ch=styler.SafeGetCharAt(pos+1);
302 
303   if(ch==',' || ch==':' || ch==';' || ch=='%'){
304       command[0]=ch;
305       command[1]=0;
306 	  return 1;
307   }
308 
309   // find end
310      while(isWordChar(ch) && !isNumber(ch) && ch!='_' && ch!='.' && length<100){
311           command[length]=ch;
312           length++;
313           ch=styler.SafeGetCharAt(pos+length+1);
314      }
315 
316   command[length]='\0';
317   if(!length) return 0;
318   return length+1;
319 }
320 
classifyFoldPointTeXPaired(const char * s)321 static int classifyFoldPointTeXPaired(const char* s) {
322 	int lev=0;
323 	if (!(isdigit(s[0]) || (s[0] == '.'))){
324 		if (strcmp(s, "begin")==0||strcmp(s,"FoldStart")==0||
325 			strcmp(s,"abstract")==0||strcmp(s,"unprotect")==0||
326 			strcmp(s,"title")==0||strncmp(s,"start",5)==0||strncmp(s,"Start",5)==0||
327 			strcmp(s,"documentclass")==0||strncmp(s,"if",2)==0
328 			)
329 			lev=1;
330 		if (strcmp(s, "end")==0||strcmp(s,"FoldStop")==0||
331 			strcmp(s,"maketitle")==0||strcmp(s,"protect")==0||
332 			strncmp(s,"stop",4)==0||strncmp(s,"Stop",4)==0||
333 			strcmp(s,"fi")==0
334 			)
335 		lev=-1;
336 	}
337 	return lev;
338 }
339 
classifyFoldPointTeXUnpaired(const char * s)340 static int classifyFoldPointTeXUnpaired(const char* s) {
341 	int lev=0;
342 	if (!(isdigit(s[0]) || (s[0] == '.'))){
343 		if (strcmp(s,"part")==0||
344 			strcmp(s,"chapter")==0||
345 			strcmp(s,"section")==0||
346 			strcmp(s,"subsection")==0||
347 			strcmp(s,"subsubsection")==0||
348 			strcmp(s,"CJKfamily")==0||
349 			strcmp(s,"appendix")==0||
350 			strcmp(s,"Topic")==0||strcmp(s,"topic")==0||
351 			strcmp(s,"subject")==0||strcmp(s,"subsubject")==0||
352 			strcmp(s,"def")==0||strcmp(s,"gdef")==0||strcmp(s,"edef")==0||
353 			strcmp(s,"xdef")==0||strcmp(s,"framed")==0||
354 			strcmp(s,"frame")==0||
355 			strcmp(s,"foilhead")==0||strcmp(s,"overlays")==0||strcmp(s,"slide")==0
356 			){
357 			    lev=1;
358 			}
359 	}
360 	return lev;
361 }
362 
IsTeXCommentLine(Sci_Position line,Accessor & styler)363 static bool IsTeXCommentLine(Sci_Position line, Accessor &styler) {
364 	Sci_Position pos = styler.LineStart(line);
365 	Sci_Position eol_pos = styler.LineStart(line + 1) - 1;
366 
367 	Sci_Position startpos = pos;
368 
369 	while (startpos<eol_pos){
370 		char ch = styler[startpos];
371 		if (ch!='%' && ch!=' ') return false;
372 		else if (ch=='%') return true;
373 		startpos++;
374 	}
375 
376 	return false;
377 }
378 
379 // FoldTeXDoc: borrowed from VisualTeX with modifications
380 
FoldTexDoc(Sci_PositionU startPos,Sci_Position length,int,WordList * [],Accessor & styler)381 static void FoldTexDoc(Sci_PositionU startPos, Sci_Position length, int, WordList *[], Accessor &styler)
382 {
383 	bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
384 	Sci_PositionU endPos = startPos+length;
385 	int visibleChars=0;
386 	Sci_Position lineCurrent=styler.GetLine(startPos);
387 	int levelPrev=styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
388 	int levelCurrent=levelPrev;
389 	char chNext=styler[startPos];
390 	char buffer[100]="";
391 
392 	for (Sci_PositionU i=startPos; i < endPos; i++) {
393 		char ch=chNext;
394 		chNext=styler.SafeGetCharAt(i+1);
395 		bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
396 
397         if(ch=='\\') {
398             ParseTeXCommand(i, styler, buffer);
399 			levelCurrent += classifyFoldPointTeXPaired(buffer)+classifyFoldPointTeXUnpaired(buffer);
400 		}
401 
402 		if (levelCurrent > SC_FOLDLEVELBASE && ((ch == '\r' || ch=='\n') && (chNext == '\\'))) {
403             ParseTeXCommand(i+1, styler, buffer);
404 			levelCurrent -= classifyFoldPointTeXUnpaired(buffer);
405 		}
406 
407 	char chNext2;
408 	char chNext3;
409 	char chNext4;
410 	char chNext5;
411 	chNext2=styler.SafeGetCharAt(i+2);
412 	chNext3=styler.SafeGetCharAt(i+3);
413 	chNext4=styler.SafeGetCharAt(i+4);
414 	chNext5=styler.SafeGetCharAt(i+5);
415 
416 	bool atEOfold = (ch == '%') &&
417 			(chNext == '%') && (chNext2=='}') &&
418 				(chNext3=='}')&& (chNext4=='-')&& (chNext5=='-');
419 
420 	bool atBOfold = (ch == '%') &&
421 			(chNext == '%') && (chNext2=='-') &&
422 				(chNext3=='-')&& (chNext4=='{')&& (chNext5=='{');
423 
424 	if(atBOfold){
425 		levelCurrent+=1;
426 	}
427 
428 	if(atEOfold){
429 		levelCurrent-=1;
430 	}
431 
432 	if(ch=='\\' && chNext=='['){
433 		levelCurrent+=1;
434 	}
435 
436 	if(ch=='\\' && chNext==']'){
437 		levelCurrent-=1;
438 	}
439 
440 	bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
441 
442 	if (foldComment && atEOL && IsTeXCommentLine(lineCurrent, styler))
443         {
444             if (lineCurrent==0 && IsTeXCommentLine(lineCurrent + 1, styler)
445 				)
446                 levelCurrent++;
447             else if (lineCurrent!=0 && !IsTeXCommentLine(lineCurrent - 1, styler)
448                && IsTeXCommentLine(lineCurrent + 1, styler)
449 				)
450                 levelCurrent++;
451             else if (lineCurrent!=0 && IsTeXCommentLine(lineCurrent - 1, styler) &&
452                      !IsTeXCommentLine(lineCurrent+1, styler))
453                 levelCurrent--;
454         }
455 
456 //---------------------------------------------------------------------------------------------
457 
458 		if (atEOL) {
459 			int lev = levelPrev;
460 			if (visibleChars == 0 && foldCompact)
461 				lev |= SC_FOLDLEVELWHITEFLAG;
462 			if ((levelCurrent > levelPrev) && (visibleChars > 0))
463 				lev |= SC_FOLDLEVELHEADERFLAG;
464 			if (lev != styler.LevelAt(lineCurrent)) {
465 				styler.SetLevel(lineCurrent, lev);
466 			}
467 			lineCurrent++;
468 			levelPrev = levelCurrent;
469 			visibleChars = 0;
470 		}
471 
472 		if (!isspacechar(ch))
473 			visibleChars++;
474 	}
475 
476 	// Fill in the real level of the next line, keeping the current flags as they will be filled in later
477 	int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
478 	styler.SetLevel(lineCurrent, levelPrev | flagsNext);
479 }
480 
481 
482 
483 
484 static const char * const texWordListDesc[] = {
485     "TeX, eTeX, pdfTeX, Omega",
486     "ConTeXt Dutch",
487     "ConTeXt English",
488     "ConTeXt German",
489     "ConTeXt Czech",
490     "ConTeXt Italian",
491     "ConTeXt Romanian",
492 	0,
493 } ;
494 
495 LexerModule lmTeX(SCLEX_TEX,   ColouriseTeXDoc, "tex", FoldTexDoc, texWordListDesc);
496