1 // Scintilla source code edit control
2 
3 // File: LexTeX.cxx - general context conformant tex coloring scheme
4 // Author: Hans Hagen - PRAGMA ADE - Hasselt NL - www.pragma-ade.com
5 // Version: September 28, 2003
6 
7 // Copyright: 1998-2003 by Neil Hodgson <neilh@scintilla.org>
8 // The License.txt file describes the conditions under which this software may be distributed.
9 
10 // This lexer is derived from the one written for the texwork environment (1999++) which in
11 // turn is inspired on texedit (1991++) which finds its roots in wdt (1986).
12 
13 // If you run into strange boundary cases, just tell me and I'll look into it.
14 
15 
16 // TeX Folding code added by instanton (soft_share@126.com) with borrowed code from VisualTeX source by Alex Romanenko.
17 // Version: June 22, 2007
18 
19 #include <stdlib.h>
20 #include <string.h>
21 #include <stdio.h>
22 #include <stdarg.h>
23 #include <assert.h>
24 #include <ctype.h>
25 
26 #include "ILexer.h"
27 #include "Scintilla.h"
28 #include "SciLexer.h"
29 
30 #include "WordList.h"
31 #include "LexAccessor.h"
32 #include "Accessor.h"
33 #include "StyleContext.h"
34 #include "CharacterSet.h"
35 #include "LexerModule.h"
36 
37 #ifdef SCI_NAMESPACE
38 using namespace Scintilla;
39 #endif
40 
41 // val SCE_TEX_DEFAULT = 0
42 // val SCE_TEX_SPECIAL = 1
43 // val SCE_TEX_GROUP   = 2
44 // val SCE_TEX_SYMBOL  = 3
45 // val SCE_TEX_COMMAND = 4
46 // val SCE_TEX_TEXT    = 5
47 
48 // Definitions in SciTEGlobal.properties:
49 //
50 // TeX Highlighting
51 //
52 // # Default
53 // style.tex.0=fore:#7F7F00
54 // # Special
55 // style.tex.1=fore:#007F7F
56 // # Group
57 // style.tex.2=fore:#880000
58 // # Symbol
59 // style.tex.3=fore:#7F7F00
60 // # Command
61 // style.tex.4=fore:#008800
62 // # Text
63 // style.tex.5=fore:#000000
64 
65 // lexer.tex.interface.default=0
66 // lexer.tex.comment.process=0
67 
68 // todo: lexer.tex.auto.if
69 
70 // Auxiliary functions:
71 
endOfLine(Accessor & styler,unsigned int i)72 static inline bool endOfLine(Accessor &styler, unsigned int i) {
73 	return
74       (styler[i] == '\n') || ((styler[i] == '\r') && (styler.SafeGetCharAt(i + 1) != '\n')) ;
75 }
76 
isTeXzero(int ch)77 static inline bool isTeXzero(int ch) {
78 	return
79       (ch == '%') ;
80 }
81 
isTeXone(int ch)82 static inline bool isTeXone(int ch) {
83 	return
84       (ch == '[') || (ch == ']') || (ch == '=') || (ch == '#') ||
85       (ch == '(') || (ch == ')') || (ch == '<') || (ch == '>') ||
86       (ch == '"') ;
87 }
88 
isTeXtwo(int ch)89 static inline bool isTeXtwo(int ch) {
90 	return
91       (ch == '{') || (ch == '}') || (ch == '$') ;
92 }
93 
isTeXthree(int ch)94 static inline bool isTeXthree(int ch) {
95 	return
96       (ch == '~') || (ch == '^') || (ch == '_') || (ch == '&') ||
97       (ch == '-') || (ch == '+') || (ch == '\"') || (ch == '`') ||
98       (ch == '/') || (ch == '|') || (ch == '%') ;
99 }
100 
isTeXfour(int ch)101 static inline bool isTeXfour(int ch) {
102 	return
103       (ch == '\\') ;
104 }
105 
isTeXfive(int ch)106 static inline bool isTeXfive(int ch) {
107 	return
108       ((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z')) ||
109       (ch == '@') || (ch == '!') || (ch == '?') ;
110 }
111 
isTeXsix(int ch)112 static inline bool isTeXsix(int ch) {
113 	return
114       (ch == ' ') ;
115 }
116 
isTeXseven(int ch)117 static inline bool isTeXseven(int ch) {
118 	return
119       (ch == '^') ;
120 }
121 
122 // Interface determination
123 
CheckTeXInterface(unsigned int startPos,int length,Accessor & styler,int defaultInterface)124 static int CheckTeXInterface(
125     unsigned int startPos,
126     int length,
127     Accessor &styler,
128 	int defaultInterface) {
129 
130     char lineBuffer[1024] ;
131 	unsigned int linePos = 0 ;
132 
133     // some day we can make something lexer.tex.mapping=(all,0)(nl,1)(en,2)...
134 
135     if (styler.SafeGetCharAt(0) == '%') {
136         for (unsigned int i = 0; i < startPos + length; i++) {
137             lineBuffer[linePos++] = styler.SafeGetCharAt(i) ;
138             if (endOfLine(styler, i) || (linePos >= sizeof(lineBuffer) - 1)) {
139                 lineBuffer[linePos] = '\0';
140                 if (strstr(lineBuffer, "interface=all")) {
141                     return 0 ;
142 				} else if (strstr(lineBuffer, "interface=tex")) {
143                     return 1 ;
144                 } else if (strstr(lineBuffer, "interface=nl")) {
145                     return 2 ;
146                 } else if (strstr(lineBuffer, "interface=en")) {
147                     return 3 ;
148                 } else if (strstr(lineBuffer, "interface=de")) {
149                     return 4 ;
150                 } else if (strstr(lineBuffer, "interface=cz")) {
151                     return 5 ;
152                 } else if (strstr(lineBuffer, "interface=it")) {
153                     return 6 ;
154                 } else if (strstr(lineBuffer, "interface=ro")) {
155                     return 7 ;
156                 } else if (strstr(lineBuffer, "interface=latex")) {
157 					// we will move latex cum suis up to 91+ when more keyword lists are supported
158                     return 8 ;
159 				} else if (styler.SafeGetCharAt(1) == 'D' && strstr(lineBuffer, "%D \\module")) {
160 					// better would be to limit the search to just one line
161 					return 3 ;
162                 } else {
163                     return defaultInterface ;
164                 }
165             }
166 		}
167     }
168 
169     return defaultInterface ;
170 }
171 
ColouriseTeXDoc(unsigned int startPos,int length,int,WordList * keywordlists[],Accessor & styler)172 static void ColouriseTeXDoc(
173     unsigned int startPos,
174     int length,
175     int,
176     WordList *keywordlists[],
177     Accessor &styler) {
178 
179 	styler.StartAt(startPos) ;
180 	styler.StartSegment(startPos) ;
181 
182 	bool processComment   = styler.GetPropertyInt("lexer.tex.comment.process",   0) == 1 ;
183 	bool useKeywords      = styler.GetPropertyInt("lexer.tex.use.keywords",      1) == 1 ;
184 	bool autoIf           = styler.GetPropertyInt("lexer.tex.auto.if",           1) == 1 ;
185 	int  defaultInterface = styler.GetPropertyInt("lexer.tex.interface.default", 1) ;
186 
187 	char key[100] ;
188 	int  k ;
189 	bool newifDone = false ;
190 	bool inComment = false ;
191 
192 	int currentInterface = CheckTeXInterface(startPos,length,styler,defaultInterface) ;
193 
194     if (currentInterface == 0) {
195         useKeywords = false ;
196         currentInterface = 1 ;
197     }
198 
199     WordList &keywords = *keywordlists[currentInterface-1] ;
200 
201 	StyleContext sc(startPos, length, SCE_TEX_TEXT, styler);
202 
203 	bool going = sc.More() ; // needed because of a fuzzy end of file state
204 
205 	for (; going; sc.Forward()) {
206 
207 		if (! sc.More()) { going = false ; } // we need to go one behind the end of text
208 
209 		if (inComment) {
210 			if (sc.atLineEnd) {
211 				sc.SetState(SCE_TEX_TEXT) ;
212 				newifDone = false ;
213 				inComment = false ;
214 			}
215 		} else {
216 			if (! isTeXfive(sc.ch)) {
217 				if (sc.state == SCE_TEX_COMMAND) {
218 					if (sc.LengthCurrent() == 1) { // \<noncstoken>
219 						if (isTeXseven(sc.ch) && isTeXseven(sc.chNext)) {
220 							sc.Forward(2) ; // \^^ and \^^<token>
221 						}
222 						sc.ForwardSetState(SCE_TEX_TEXT) ;
223 					} else {
224 						sc.GetCurrent(key, sizeof(key)-1) ;
225 						k = static_cast<int>(strlen(key)) ;
226 						memmove(key,key+1,k) ; // shift left over escape token
227 						key[k] = '\0' ;
228 						k-- ;
229 						if (! keywords || ! useKeywords) {
230 							sc.SetState(SCE_TEX_COMMAND) ;
231 							newifDone = false ;
232 						} else if (k == 1) { //\<cstoken>
233 							sc.SetState(SCE_TEX_COMMAND) ;
234 							newifDone = false ;
235 						} else if (keywords.InList(key)) {
236     						sc.SetState(SCE_TEX_COMMAND) ;
237 							newifDone = autoIf && (strcmp(key,"newif") == 0) ;
238 						} else if (autoIf && ! newifDone && (key[0] == 'i') && (key[1] == 'f') && keywords.InList("if")) {
239 	    					sc.SetState(SCE_TEX_COMMAND) ;
240 						} else {
241 							sc.ChangeState(SCE_TEX_TEXT) ;
242 							sc.SetState(SCE_TEX_TEXT) ;
243 							newifDone = false ;
244 						}
245 					}
246 				}
247 				if (isTeXzero(sc.ch)) {
248 					sc.SetState(SCE_TEX_SYMBOL);
249 
250 					if (!endOfLine(styler,sc.currentPos + 1))
251 						sc.ForwardSetState(SCE_TEX_DEFAULT) ;
252 
253 					inComment = ! processComment ;
254 					newifDone = false ;
255 				} else if (isTeXseven(sc.ch) && isTeXseven(sc.chNext)) {
256 					sc.SetState(SCE_TEX_TEXT) ;
257 					sc.ForwardSetState(SCE_TEX_TEXT) ;
258 				} else if (isTeXone(sc.ch)) {
259 					sc.SetState(SCE_TEX_SPECIAL) ;
260 					newifDone = false ;
261 				} else if (isTeXtwo(sc.ch)) {
262 					sc.SetState(SCE_TEX_GROUP) ;
263 					newifDone = false ;
264 				} else if (isTeXthree(sc.ch)) {
265 					sc.SetState(SCE_TEX_SYMBOL) ;
266 					newifDone = false ;
267 				} else if (isTeXfour(sc.ch)) {
268 					sc.SetState(SCE_TEX_COMMAND) ;
269 				} else if (isTeXsix(sc.ch)) {
270 					sc.SetState(SCE_TEX_TEXT) ;
271 				} else if (sc.atLineEnd) {
272 					sc.SetState(SCE_TEX_TEXT) ;
273 					newifDone = false ;
274 					inComment = false ;
275 				} else {
276 					sc.SetState(SCE_TEX_TEXT) ;
277 				}
278 			} else if (sc.state != SCE_TEX_COMMAND) {
279 				sc.SetState(SCE_TEX_TEXT) ;
280 			}
281 		}
282 	}
283 	sc.ChangeState(SCE_TEX_TEXT) ;
284 	sc.Complete();
285 
286 }
287 
288 
isNumber(int ch)289 static inline bool isNumber(int ch) {
290 	return
291       (ch == '0') || (ch == '1') || (ch == '2') ||
292       (ch == '3') || (ch == '4') || (ch == '5') ||
293       (ch == '6') || (ch == '7') || (ch == '8') || (ch == '9');
294 }
295 
isWordChar(int ch)296 static inline bool isWordChar(int ch) {
297 	return ((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z'));
298 }
299 
ParseTeXCommand(unsigned int pos,Accessor & styler,char * command)300 static int ParseTeXCommand(unsigned int pos, Accessor &styler, char *command)
301 {
302   int length=0;
303   char ch=styler.SafeGetCharAt(pos+1);
304 
305   if(ch==',' || ch==':' || ch==';' || ch=='%'){
306       command[0]=ch;
307       command[1]=0;
308 	  return 1;
309   }
310 
311   // find end
312      while(isWordChar(ch) && !isNumber(ch) && ch!='_' && ch!='.' && length<100){
313           command[length]=ch;
314           length++;
315           ch=styler.SafeGetCharAt(pos+length+1);
316      }
317 
318   command[length]='\0';
319   if(!length) return 0;
320   return length+1;
321 }
322 
classifyFoldPointTeXPaired(const char * s)323 static int classifyFoldPointTeXPaired(const char* s) {
324 	int lev=0;
325 	if (!(isdigit(s[0]) || (s[0] == '.'))){
326 		if (strcmp(s, "begin")==0||strcmp(s,"FoldStart")==0||
327 			strcmp(s,"abstract")==0||strcmp(s,"unprotect")==0||
328 			strcmp(s,"title")==0||strncmp(s,"start",5)==0||strncmp(s,"Start",5)==0||
329 			strcmp(s,"documentclass")==0||strncmp(s,"if",2)==0
330 			)
331 			lev=1;
332 		if (strcmp(s, "end")==0||strcmp(s,"FoldStop")==0||
333 			strcmp(s,"maketitle")==0||strcmp(s,"protect")==0||
334 			strncmp(s,"stop",4)==0||strncmp(s,"Stop",4)==0||
335 			strcmp(s,"fi")==0
336 			)
337 		lev=-1;
338 	}
339 	return lev;
340 }
341 
classifyFoldPointTeXUnpaired(const char * s)342 static int classifyFoldPointTeXUnpaired(const char* s) {
343 	int lev=0;
344 	if (!(isdigit(s[0]) || (s[0] == '.'))){
345 		if (strcmp(s,"part")==0||
346 			strcmp(s,"chapter")==0||
347 			strcmp(s,"section")==0||
348 			strcmp(s,"subsection")==0||
349 			strcmp(s,"subsubsection")==0||
350 			strcmp(s,"CJKfamily")==0||
351 			strcmp(s,"appendix")==0||
352 			strcmp(s,"Topic")==0||strcmp(s,"topic")==0||
353 			strcmp(s,"subject")==0||strcmp(s,"subsubject")==0||
354 			strcmp(s,"def")==0||strcmp(s,"gdef")==0||strcmp(s,"edef")==0||
355 			strcmp(s,"xdef")==0||strcmp(s,"framed")==0||
356 			strcmp(s,"frame")==0||
357 			strcmp(s,"foilhead")==0||strcmp(s,"overlays")==0||strcmp(s,"slide")==0
358 			){
359 			    lev=1;
360 			}
361 	}
362 	return lev;
363 }
364 
IsTeXCommentLine(int line,Accessor & styler)365 static bool IsTeXCommentLine(int line, Accessor &styler) {
366 	int pos = styler.LineStart(line);
367 	int eol_pos = styler.LineStart(line + 1) - 1;
368 
369 	int startpos = pos;
370 
371 	while (startpos<eol_pos){
372 		char ch = styler[startpos];
373 		if (ch!='%' && ch!=' ') return false;
374 		else if (ch=='%') return true;
375 		startpos++;
376 	}
377 
378 	return false;
379 }
380 
381 // FoldTeXDoc: borrowed from VisualTeX with modifications
382 
FoldTexDoc(unsigned int startPos,int length,int,WordList * [],Accessor & styler)383 static void FoldTexDoc(unsigned int startPos, int length, int, WordList *[], Accessor &styler)
384 {
385 	bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
386 	unsigned int endPos = startPos+length;
387 	int visibleChars=0;
388 	int lineCurrent=styler.GetLine(startPos);
389 	int levelPrev=styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
390 	int levelCurrent=levelPrev;
391 	char chNext=styler[startPos];
392 	char buffer[100]="";
393 
394 	for (unsigned int i=startPos; i < endPos; i++) {
395 		char ch=chNext;
396 		chNext=styler.SafeGetCharAt(i+1);
397 		bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
398 
399         if(ch=='\\') {
400             ParseTeXCommand(i, styler, buffer);
401 			levelCurrent += classifyFoldPointTeXPaired(buffer)+classifyFoldPointTeXUnpaired(buffer);
402 		}
403 
404 		if (levelCurrent > SC_FOLDLEVELBASE && ((ch == '\r' || ch=='\n') && (chNext == '\\'))) {
405             ParseTeXCommand(i+1, styler, buffer);
406 			levelCurrent -= classifyFoldPointTeXUnpaired(buffer);
407 		}
408 
409 	char chNext2;
410 	char chNext3;
411 	char chNext4;
412 	char chNext5;
413 	chNext2=styler.SafeGetCharAt(i+2);
414 	chNext3=styler.SafeGetCharAt(i+3);
415 	chNext4=styler.SafeGetCharAt(i+4);
416 	chNext5=styler.SafeGetCharAt(i+5);
417 
418 	bool atEOfold = (ch == '%') &&
419 			(chNext == '%') && (chNext2=='}') &&
420 				(chNext3=='}')&& (chNext4=='-')&& (chNext5=='-');
421 
422 	bool atBOfold = (ch == '%') &&
423 			(chNext == '%') && (chNext2=='-') &&
424 				(chNext3=='-')&& (chNext4=='{')&& (chNext5=='{');
425 
426 	if(atBOfold){
427 		levelCurrent+=1;
428 	}
429 
430 	if(atEOfold){
431 		levelCurrent-=1;
432 	}
433 
434 	if(ch=='\\' && chNext=='['){
435 		levelCurrent+=1;
436 	}
437 
438 	if(ch=='\\' && chNext==']'){
439 		levelCurrent-=1;
440 	}
441 
442 	bool foldComment = styler.GetPropertyInt("fold.comment") != 0;
443 
444 	if (foldComment && atEOL && IsTeXCommentLine(lineCurrent, styler))
445         {
446             if (lineCurrent==0 && IsTeXCommentLine(lineCurrent + 1, styler)
447 				)
448                 levelCurrent++;
449             else if (lineCurrent!=0 && !IsTeXCommentLine(lineCurrent - 1, styler)
450                && IsTeXCommentLine(lineCurrent + 1, styler)
451 				)
452                 levelCurrent++;
453             else if (lineCurrent!=0 && IsTeXCommentLine(lineCurrent - 1, styler) &&
454                      !IsTeXCommentLine(lineCurrent+1, styler))
455                 levelCurrent--;
456         }
457 
458 //---------------------------------------------------------------------------------------------
459 
460 		if (atEOL) {
461 			int lev = levelPrev;
462 			if (visibleChars == 0 && foldCompact)
463 				lev |= SC_FOLDLEVELWHITEFLAG;
464 			if ((levelCurrent > levelPrev) && (visibleChars > 0))
465 				lev |= SC_FOLDLEVELHEADERFLAG;
466 			if (lev != styler.LevelAt(lineCurrent)) {
467 				styler.SetLevel(lineCurrent, lev);
468 			}
469 			lineCurrent++;
470 			levelPrev = levelCurrent;
471 			visibleChars = 0;
472 		}
473 
474 		if (!isspacechar(ch))
475 			visibleChars++;
476 	}
477 
478 	// Fill in the real level of the next line, keeping the current flags as they will be filled in later
479 	int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
480 	styler.SetLevel(lineCurrent, levelPrev | flagsNext);
481 }
482 
483 
484 
485 
486 static const char * const texWordListDesc[] = {
487     "TeX, eTeX, pdfTeX, Omega",
488     "ConTeXt Dutch",
489     "ConTeXt English",
490     "ConTeXt German",
491     "ConTeXt Czech",
492     "ConTeXt Italian",
493     "ConTeXt Romanian",
494 	0,
495 } ;
496 
497 LexerModule lmTeX(SCLEX_TEX,   ColouriseTeXDoc, "tex", FoldTexDoc, texWordListDesc);
498