1 // Scintilla source code edit control
2 /** @file LexHTML.cxx
3  ** Lexer for HTML.
4  **/
5 // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
7 
8 #include <stdlib.h>
9 #include <string.h>
10 #include <ctype.h>
11 #include <stdio.h>
12 #include <stdarg.h>
13 
14 #include "Platform.h"
15 
16 #include "PropSet.h"
17 #include "Accessor.h"
18 #include "StyleContext.h"
19 #include "KeyWords.h"
20 #include "Scintilla.h"
21 #include "SciLexer.h"
22 
23 #define SCE_HA_JS (SCE_HJA_START - SCE_HJ_START)
24 #define SCE_HA_VBS (SCE_HBA_START - SCE_HB_START)
25 #define SCE_HA_PYTHON (SCE_HPA_START - SCE_HP_START)
26 
27 enum script_type { eScriptNone = 0, eScriptJS, eScriptVBS, eScriptPython, eScriptPHP, eScriptXML, eScriptSGML, eScriptSGMLblock };
28 enum script_mode { eHtml = 0, eNonHtmlScript, eNonHtmlPreProc, eNonHtmlScriptPreProc };
29 
IsAWordChar(const int ch)30 static inline bool IsAWordChar(const int ch) {
31 	return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
32 }
33 
IsAWordStart(const int ch)34 static inline bool IsAWordStart(const int ch) {
35 	return (ch < 0x80) && (isalnum(ch) || ch == '_');
36 }
37 
MakeLowerCase(int ch)38 static inline int MakeLowerCase(int ch) {
39 	if (ch < 'A' || ch > 'Z')
40 		return ch;
41 	else
42 		return ch - 'A' + 'a';
43 }
44 
GetTextSegment(Accessor & styler,unsigned int start,unsigned int end,char * s,size_t len)45 static void GetTextSegment(Accessor &styler, unsigned int start, unsigned int end, char *s, size_t len) {
46 	size_t i = 0;
47 	for (; (i < end - start + 1) && (i < len-1); i++) {
48 		s[i] = static_cast<char>(MakeLowerCase(styler[start + i]));
49 	}
50 	s[i] = '\0';
51 }
52 
segIsScriptingIndicator(Accessor & styler,unsigned int start,unsigned int end,script_type prevValue)53 static script_type segIsScriptingIndicator(Accessor &styler, unsigned int start, unsigned int end, script_type prevValue) {
54 	char s[100];
55 	GetTextSegment(styler, start, end, s, sizeof(s));
56 	//Platform::DebugPrintf("Scripting indicator [%s]\n", s);
57 	if (strstr(s, "src"))	// External script
58 		return eScriptNone;
59 	if (strstr(s, "vbs"))
60 		return eScriptVBS;
61 	if (strstr(s, "pyth"))
62 		return eScriptPython;
63 	if (strstr(s, "javas"))
64 		return eScriptJS;
65 	if (strstr(s, "jscr"))
66 		return eScriptJS;
67 	if (strstr(s, "php"))
68 		return eScriptPHP;
69 	if (strstr(s, "xml"))
70 		return eScriptXML;
71 
72 	return prevValue;
73 }
74 
PrintScriptingIndicatorOffset(Accessor & styler,unsigned int start,unsigned int end)75 static int PrintScriptingIndicatorOffset(Accessor &styler, unsigned int start, unsigned int end) {
76 	int iResult = 0;
77 	char s[100];
78 	GetTextSegment(styler, start, end, s, sizeof(s));
79 	if (0 == strncmp(s, "php", 3)) {
80 		iResult = 3;
81 	}
82 
83 	return iResult;
84 }
85 
ScriptOfState(int state)86 static script_type ScriptOfState(int state) {
87 	if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
88 		return eScriptPython;
89 	} else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
90 		return eScriptVBS;
91 	} else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
92 		return eScriptJS;
93 	} else if ((state >= SCE_HPHP_DEFAULT) && (state <= SCE_HPHP_COMMENTLINE)) {
94 		return eScriptPHP;
95 	} else if ((state >= SCE_H_SGML_DEFAULT) && (state < SCE_H_SGML_BLOCK_DEFAULT)) {
96 		return eScriptSGML;
97 	} else if (state == SCE_H_SGML_BLOCK_DEFAULT) {
98 		return eScriptSGMLblock;
99 	} else {
100 		return eScriptNone;
101 	}
102 }
103 
statePrintForState(int state,script_mode inScriptType)104 static int statePrintForState(int state, script_mode inScriptType) {
105 	int StateToPrint;
106 
107 	if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
108 		StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_PYTHON);
109 	} else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
110 		StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_VBS);
111 	} else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
112 		StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_JS);
113 	} else {
114 		StateToPrint = state;
115 	}
116 
117 	return StateToPrint;
118 }
119 
stateForPrintState(int StateToPrint)120 static int stateForPrintState(int StateToPrint) {
121 	int state;
122 
123 	if ((StateToPrint >= SCE_HPA_START) && (StateToPrint <= SCE_HPA_IDENTIFIER)) {
124 		state = StateToPrint - SCE_HA_PYTHON;
125 	} else if ((StateToPrint >= SCE_HBA_START) && (StateToPrint <= SCE_HBA_STRINGEOL)) {
126 		state = StateToPrint - SCE_HA_VBS;
127 	} else if ((StateToPrint >= SCE_HJA_START) && (StateToPrint <= SCE_HJA_REGEX)) {
128 		state = StateToPrint - SCE_HA_JS;
129 	} else {
130 		state = StateToPrint;
131 	}
132 
133 	return state;
134 }
135 
IsNumber(unsigned int start,Accessor & styler)136 static inline bool IsNumber(unsigned int start, Accessor &styler) {
137 	return IsADigit(styler[start]) || (styler[start] == '.') ||
138 	       (styler[start] == '-') || (styler[start] == '#');
139 }
140 
isStringState(int state)141 static inline bool isStringState(int state) {
142 	bool bResult;
143 
144 	switch (state) {
145 	case SCE_HJ_DOUBLESTRING:
146 	case SCE_HJ_SINGLESTRING:
147 	case SCE_HJA_DOUBLESTRING:
148 	case SCE_HJA_SINGLESTRING:
149 	case SCE_HB_STRING:
150 	case SCE_HBA_STRING:
151 	case SCE_HP_STRING:
152 	case SCE_HP_CHARACTER:
153 	case SCE_HP_TRIPLE:
154 	case SCE_HP_TRIPLEDOUBLE:
155 	case SCE_HPA_STRING:
156 	case SCE_HPA_CHARACTER:
157 	case SCE_HPA_TRIPLE:
158 	case SCE_HPA_TRIPLEDOUBLE:
159 	case SCE_HPHP_HSTRING:
160 	case SCE_HPHP_SIMPLESTRING:
161 	case SCE_HPHP_HSTRING_VARIABLE:
162 	case SCE_HPHP_COMPLEX_VARIABLE:
163 		bResult = true;
164 		break;
165 	default :
166 		bResult = false;
167 		break;
168 	}
169 	return bResult;
170 }
171 
stateAllowsTermination(int state)172 static inline bool stateAllowsTermination(int state) {
173 	bool allowTermination = !isStringState(state);
174 	if (allowTermination) {
175 		switch (state) {
176 		case SCE_HPHP_COMMENT:
177 		case SCE_HP_COMMENTLINE:
178 		case SCE_HPA_COMMENTLINE:
179 			allowTermination = false;
180 		}
181 	}
182 	return allowTermination;
183 }
184 
185 // not really well done, since it's only comments that should lex the %> and <%
isCommentASPState(int state)186 static inline bool isCommentASPState(int state) {
187 	bool bResult;
188 
189 	switch (state) {
190 	case SCE_HJ_COMMENT:
191 	case SCE_HJ_COMMENTLINE:
192 	case SCE_HJ_COMMENTDOC:
193 	case SCE_HB_COMMENTLINE:
194 	case SCE_HP_COMMENTLINE:
195 	case SCE_HPHP_COMMENT:
196 	case SCE_HPHP_COMMENTLINE:
197 		bResult = true;
198 		break;
199 	default :
200 		bResult = false;
201 		break;
202 	}
203 	return bResult;
204 }
205 
classifyAttribHTML(unsigned int start,unsigned int end,WordList & keywords,Accessor & styler)206 static void classifyAttribHTML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
207 	bool wordIsNumber = IsNumber(start, styler);
208 	char chAttr = SCE_H_ATTRIBUTEUNKNOWN;
209 	if (wordIsNumber) {
210 		chAttr = SCE_H_NUMBER;
211 	} else {
212 		char s[100];
213 		GetTextSegment(styler, start, end, s, sizeof(s));
214 		if (keywords.InList(s))
215 			chAttr = SCE_H_ATTRIBUTE;
216 	}
217 	if ((chAttr == SCE_H_ATTRIBUTEUNKNOWN) && !keywords)
218 		// No keywords -> all are known
219 		chAttr = SCE_H_ATTRIBUTE;
220 	styler.ColourTo(end, chAttr);
221 }
222 
classifyTagHTML(unsigned int start,unsigned int end,WordList & keywords,Accessor & styler,bool & tagDontFold,bool caseSensitive)223 static int classifyTagHTML(unsigned int start, unsigned int end,
224                            WordList &keywords, Accessor &styler, bool &tagDontFold,
225 			   bool caseSensitive) {
226 	char s[30 + 2];
227 	// Copy after the '<'
228 	unsigned int i = 0;
229 	for (unsigned int cPos = start; cPos <= end && i < 30; cPos++) {
230 		char ch = styler[cPos];
231 		if ((ch != '<') && (ch != '/')) {
232 			s[i++] = caseSensitive ? ch : static_cast<char>(MakeLowerCase(ch));
233 		}
234 	}
235 
236 	//The following is only a quick hack, to see if this whole thing would work
237 	//we first need the tagname with a trailing space...
238 	s[i] = ' ';
239 	s[i+1] = '\0';
240 
241 	//...to find it in the list of no-container-tags
242 	// (There are many more. We will need a keywordlist in the property file for this)
243 	tagDontFold = (NULL != strstr("meta link img area br hr input ",s));
244 
245 	//now we can remove the trailing space
246 	s[i] = '\0';
247 
248 	bool isScript = false;
249 	char chAttr = SCE_H_TAGUNKNOWN;
250 	if (s[0] == '!') {
251 		chAttr = SCE_H_SGML_DEFAULT;
252 	} else if (s[0] == '/') {	// Closing tag
253 		if (keywords.InList(s + 1))
254 			chAttr = SCE_H_TAG;
255 	} else {
256 		if (keywords.InList(s)) {
257 			chAttr = SCE_H_TAG;
258 			isScript = 0 == strcmp(s, "script");
259 		}
260 	}
261 	if ((chAttr == SCE_H_TAGUNKNOWN) && !keywords) {
262 		// No keywords -> all are known
263 		chAttr = SCE_H_TAG;
264 		isScript = 0 == strcmp(s, "script");
265 	}
266 	styler.ColourTo(end, chAttr);
267 	return isScript ? SCE_H_SCRIPT : chAttr;
268 }
269 
classifyWordHTJS(unsigned int start,unsigned int end,WordList & keywords,Accessor & styler,script_mode inScriptType)270 static void classifyWordHTJS(unsigned int start, unsigned int end,
271                              WordList &keywords, Accessor &styler, script_mode inScriptType) {
272 	char chAttr = SCE_HJ_WORD;
273 	bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
274 	if (wordIsNumber)
275 		chAttr = SCE_HJ_NUMBER;
276 	else {
277 		char s[30 + 1];
278 		unsigned int i = 0;
279 		for (; i < end - start + 1 && i < 30; i++) {
280 			s[i] = styler[start + i];
281 		}
282 		s[i] = '\0';
283 		if (keywords.InList(s))
284 			chAttr = SCE_HJ_KEYWORD;
285 	}
286 	styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
287 }
288 
classifyWordHTVB(unsigned int start,unsigned int end,WordList & keywords,Accessor & styler,script_mode inScriptType)289 static int classifyWordHTVB(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, script_mode inScriptType) {
290 	char chAttr = SCE_HB_IDENTIFIER;
291 	bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
292 	if (wordIsNumber)
293 		chAttr = SCE_HB_NUMBER;
294 	else {
295 		char s[100];
296 		GetTextSegment(styler, start, end, s, sizeof(s));
297 		if (keywords.InList(s)) {
298 			chAttr = SCE_HB_WORD;
299 			if (strcmp(s, "rem") == 0)
300 				chAttr = SCE_HB_COMMENTLINE;
301 		}
302 	}
303 	styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
304 	if (chAttr == SCE_HB_COMMENTLINE)
305 		return SCE_HB_COMMENTLINE;
306 	else
307 		return SCE_HB_DEFAULT;
308 }
309 
classifyWordHTPy(unsigned int start,unsigned int end,WordList & keywords,Accessor & styler,char * prevWord,script_mode inScriptType)310 static void classifyWordHTPy(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler, char *prevWord, script_mode inScriptType) {
311 	bool wordIsNumber = IsADigit(styler[start]);
312 	char s[30 + 1];
313 	unsigned int i = 0;
314 	for (; i < end - start + 1 && i < 30; i++) {
315 		s[i] = styler[start + i];
316 	}
317 	s[i] = '\0';
318 	char chAttr = SCE_HP_IDENTIFIER;
319 	if (0 == strcmp(prevWord, "class"))
320 		chAttr = SCE_HP_CLASSNAME;
321 	else if (0 == strcmp(prevWord, "def"))
322 		chAttr = SCE_HP_DEFNAME;
323 	else if (wordIsNumber)
324 		chAttr = SCE_HP_NUMBER;
325 	else if (keywords.InList(s))
326 		chAttr = SCE_HP_WORD;
327 	styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
328 	strcpy(prevWord, s);
329 }
330 
331 // Update the word colour to default or keyword
332 // Called when in a PHP word
classifyWordHTPHP(unsigned int start,unsigned int end,WordList & keywords,Accessor & styler)333 static void classifyWordHTPHP(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
334 	char chAttr = SCE_HPHP_DEFAULT;
335 	bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.' && start+1 <= end && IsADigit(styler[start+1]));
336 	if (wordIsNumber)
337 		chAttr = SCE_HPHP_NUMBER;
338 	else {
339 		char s[100];
340 		GetTextSegment(styler, start, end, s, sizeof(s));
341 		if (keywords.InList(s))
342 			chAttr = SCE_HPHP_WORD;
343 	}
344 	styler.ColourTo(end, chAttr);
345 }
346 
isWordHSGML(unsigned int start,unsigned int end,WordList & keywords,Accessor & styler)347 static bool isWordHSGML(unsigned int start, unsigned int end, WordList &keywords, Accessor &styler) {
348 	char s[30 + 1];
349 	unsigned int i = 0;
350 	for (; i < end - start + 1 && i < 30; i++) {
351 		s[i] = styler[start + i];
352 	}
353 	s[i] = '\0';
354 	return keywords.InList(s);
355 }
356 
isWordCdata(unsigned int start,unsigned int end,Accessor & styler)357 static bool isWordCdata(unsigned int start, unsigned int end, Accessor &styler) {
358 	char s[30 + 1];
359 	unsigned int i = 0;
360 	for (; i < end - start + 1 && i < 30; i++) {
361 		s[i] = styler[start + i];
362 	}
363 	s[i] = '\0';
364 	return (0 == strcmp(s, "[CDATA["));
365 }
366 
367 // Return the first state to reach when entering a scripting language
StateForScript(script_type scriptLanguage)368 static int StateForScript(script_type scriptLanguage) {
369 	int Result;
370 	switch (scriptLanguage) {
371 	case eScriptVBS:
372 		Result = SCE_HB_START;
373 		break;
374 	case eScriptPython:
375 		Result = SCE_HP_START;
376 		break;
377 	case eScriptPHP:
378 		Result = SCE_HPHP_DEFAULT;
379 		break;
380 	case eScriptXML:
381 		Result = SCE_H_TAGUNKNOWN;
382 		break;
383 	case eScriptSGML:
384 		Result = SCE_H_SGML_DEFAULT;
385 		break;
386 	default :
387 		Result = SCE_HJ_START;
388 		break;
389 	}
390 	return Result;
391 }
392 
ishtmlwordchar(char ch)393 static inline bool ishtmlwordchar(char ch) {
394 	return !isascii(ch) ||
395 		(isalnum(ch) || ch == '.' || ch == '-' || ch == '_' || ch == ':' || ch == '!' || ch == '#');
396 }
397 
issgmlwordchar(char ch)398 static inline bool issgmlwordchar(char ch) {
399 	return !isascii(ch) ||
400 		(isalnum(ch) || ch == '.' || ch == '_' || ch == ':' || ch == '!' || ch == '#' || ch == '[');
401 }
402 
IsPhpWordStart(const unsigned char ch)403 static inline bool IsPhpWordStart(const unsigned char ch) {
404 	return (isascii(ch) && (isalpha(ch) || (ch == '_'))) || (ch >= 0x7f);
405 }
406 
IsPhpWordChar(char ch)407 static inline bool IsPhpWordChar(char ch) {
408 	return IsADigit(ch) || IsPhpWordStart(ch);
409 }
410 
InTagState(int state)411 static bool InTagState(int state) {
412 	return state == SCE_H_TAG || state == SCE_H_TAGUNKNOWN ||
413 	       state == SCE_H_SCRIPT ||
414 	       state == SCE_H_ATTRIBUTE || state == SCE_H_ATTRIBUTEUNKNOWN ||
415 	       state == SCE_H_NUMBER || state == SCE_H_OTHER ||
416 	       state == SCE_H_DOUBLESTRING || state == SCE_H_SINGLESTRING;
417 }
418 
IsCommentState(const int state)419 static bool IsCommentState(const int state) {
420 	return state == SCE_H_COMMENT || state == SCE_H_SGML_COMMENT;
421 }
422 
IsScriptCommentState(const int state)423 static bool IsScriptCommentState(const int state) {
424 	return state == SCE_HJ_COMMENT || state == SCE_HJ_COMMENTLINE || state == SCE_HJA_COMMENT ||
425 		   state == SCE_HJA_COMMENTLINE || state == SCE_HB_COMMENTLINE || state == SCE_HBA_COMMENTLINE;
426 }
427 
isLineEnd(char ch)428 static bool isLineEnd(char ch) {
429 	return ch == '\r' || ch == '\n';
430 }
431 
isOKBeforeRE(char ch)432 static bool isOKBeforeRE(char ch) {
433 	return (ch == '(') || (ch == '=') || (ch == ',');
434 }
435 
isPHPStringState(int state)436 static bool isPHPStringState(int state) {
437 	return
438 	    (state == SCE_HPHP_HSTRING) ||
439 	    (state == SCE_HPHP_SIMPLESTRING) ||
440 	    (state == SCE_HPHP_HSTRING_VARIABLE) ||
441 	    (state == SCE_HPHP_COMPLEX_VARIABLE);
442 }
443 
FindPhpStringDelimiter(char * phpStringDelimiter,const int phpStringDelimiterSize,int i,const int lengthDoc,Accessor & styler)444 static int FindPhpStringDelimiter(char *phpStringDelimiter, const int phpStringDelimiterSize, int i, const int lengthDoc, Accessor &styler) {
445 	int j;
446 	while (i < lengthDoc && (styler[i] == ' ' || styler[i] == '\t'))
447 		i++;
448 	phpStringDelimiter[0] = '\n';
449 	for (j = i; j < lengthDoc && styler[j] != '\n' && styler[j] != '\r'; j++) {
450 		if (j - i < phpStringDelimiterSize - 2)
451 			phpStringDelimiter[j-i+1] = styler[j];
452 		else
453 			i++;
454 	}
455 	phpStringDelimiter[j-i+1] = '\0';
456 	return j;
457 }
458 
ColouriseHyperTextDoc(unsigned int startPos,int length,int initStyle,WordList * keywordlists[],Accessor & styler)459 static void ColouriseHyperTextDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
460                                   Accessor &styler) {
461 	WordList &keywords = *keywordlists[0];
462 	WordList &keywords2 = *keywordlists[1];
463 	WordList &keywords3 = *keywordlists[2];
464 	WordList &keywords4 = *keywordlists[3];
465 	WordList &keywords5 = *keywordlists[4];
466 	WordList &keywords6 = *keywordlists[5]; // SGML (DTD) keywords
467 
468 	// Lexer for HTML requires more lexical states (7 bits worth) than most lexers
469 	styler.StartAt(startPos, STYLE_MAX);
470 	char prevWord[200];
471 	prevWord[0] = '\0';
472 	char phpStringDelimiter[200]; // PHP is not limited in length, we are
473 	phpStringDelimiter[0] = '\0';
474 	int StateToPrint = initStyle;
475 	int state = stateForPrintState(StateToPrint);
476 
477 	// If inside a tag, it may be a script tag, so reread from the start to ensure any language tags are seen
478 	if (InTagState(state)) {
479 		while ((startPos > 0) && (InTagState(styler.StyleAt(startPos - 1)))) {
480 			startPos--;
481 			length++;
482 		}
483 		state = SCE_H_DEFAULT;
484 	}
485 	// String can be heredoc, must find a delimiter first
486 	while (startPos > 0 && isPHPStringState(state) && state != SCE_HPHP_SIMPLESTRING) {
487 		startPos--;
488 		length++;
489 		state = styler.StyleAt(startPos);
490 	}
491 	styler.StartAt(startPos, STYLE_MAX);
492 
493 	int lineCurrent = styler.GetLine(startPos);
494 	int lineState;
495 	if (lineCurrent > 0) {
496 		lineState = styler.GetLineState(lineCurrent);
497 	} else {
498 		// Default client and ASP scripting language is JavaScript
499 		lineState = eScriptJS << 8;
500 		lineState |= styler.GetPropertyInt("asp.default.language", eScriptJS) << 4;
501 	}
502 	script_mode inScriptType = script_mode((lineState >> 0) & 0x03); // 2 bits of scripting mode
503 	bool tagOpened = (lineState >> 2) & 0x01; // 1 bit to know if we are in an opened tag
504 	bool tagClosing = (lineState >> 3) & 0x01; // 1 bit to know if we are in a closing tag
505 	bool tagDontFold = false; //some HTML tags should not be folded
506 	script_type aspScript = script_type((lineState >> 4) & 0x0F); // 4 bits of script name
507 	script_type clientScript = script_type((lineState >> 8) & 0x0F); // 4 bits of script name
508 	int beforePreProc = (lineState >> 12) & 0xFF; // 8 bits of state
509 
510 	script_type scriptLanguage = ScriptOfState(state);
511 
512 	const bool foldHTML = styler.GetPropertyInt("fold.html", 0) != 0;
513 	const bool fold = foldHTML && styler.GetPropertyInt("fold", 0);
514 	const bool foldHTMLPreprocessor = foldHTML && styler.GetPropertyInt("fold.html.preprocessor", 1);
515 	const bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
516 	const bool caseSensitive = styler.GetPropertyInt("html.tags.case.sensitive", 0) != 0;
517 
518 	int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
519 	int levelCurrent = levelPrev;
520 	int visibleChars = 0;
521 
522 	char chPrev = ' ';
523 	char ch = ' ';
524 	char chPrevNonWhite = ' ';
525 	// look back to set chPrevNonWhite properly for better regex colouring
526 	if (scriptLanguage == eScriptJS && startPos > 0) {
527 		int back = startPos;
528 		int style = 0;
529 		while (--back) {
530 			style = styler.StyleAt(back);
531 			if (style < SCE_HJ_DEFAULT || style > SCE_HJ_COMMENTDOC)
532 				// includes SCE_HJ_COMMENT & SCE_HJ_COMMENTLINE
533 				break;
534 		}
535 		if (style == SCE_HJ_SYMBOLS) {
536 			chPrevNonWhite = styler.SafeGetCharAt(back);
537 		}
538 	}
539 
540 	styler.StartSegment(startPos);
541 	const int lengthDoc = startPos + length;
542 	for (int i = startPos; i < lengthDoc; i++) {
543 		const char chPrev2 = chPrev;
544 		chPrev = ch;
545 		if (!isspacechar(ch) && state != SCE_HJ_COMMENT &&
546 			state != SCE_HJ_COMMENTLINE && state != SCE_HJ_COMMENTDOC)
547 			chPrevNonWhite = ch;
548 		ch = styler[i];
549 		char chNext = styler.SafeGetCharAt(i + 1);
550 		const char chNext2 = styler.SafeGetCharAt(i + 2);
551 
552 		// Handle DBCS codepages
553 		if (styler.IsLeadByte(ch)) {
554 			chPrev = ' ';
555 			i += 1;
556 			continue;
557 		}
558 
559 		if ((!isspacechar(ch) || !foldCompact) && fold)
560 			visibleChars++;
561 
562 		// decide what is the current state to print (depending of the script tag)
563 		StateToPrint = statePrintForState(state, inScriptType);
564 
565 		// handle script folding
566 		if (fold) {
567 			switch (scriptLanguage) {
568 			case eScriptJS:
569 			case eScriptPHP:
570 				//not currently supported				case eScriptVBS:
571 
572 				if ((state != SCE_HPHP_COMMENT) && (state != SCE_HPHP_COMMENTLINE) && (state != SCE_HJ_COMMENT) && (state != SCE_HJ_COMMENTLINE) && (state != SCE_HJ_COMMENTDOC) && (!isStringState(state))) {
573 				//Platform::DebugPrintf("state=%d, StateToPrint=%d, initStyle=%d\n", state, StateToPrint, initStyle);
574 				//if ((state == SCE_HPHP_OPERATOR) || (state == SCE_HPHP_DEFAULT) || (state == SCE_HJ_SYMBOLS) || (state == SCE_HJ_START) || (state == SCE_HJ_DEFAULT)) {
575 					if ((ch == '{') || (ch == '}')) {
576 						levelCurrent += (ch == '{') ? 1 : -1;
577 					}
578 				}
579 				break;
580 			case eScriptPython:
581 				if (state != SCE_HP_COMMENTLINE) {
582 					if ((ch == ':') && ((chNext == '\n') || (chNext == '\r' && chNext2 == '\n'))) {
583 						levelCurrent++;
584 					} else if ((ch == '\n') && !((chNext == '\r') && (chNext2 == '\n')) && (chNext != '\n')) {
585 						// check if the number of tabs is lower than the level
586 						int Findlevel = (levelCurrent & ~SC_FOLDLEVELBASE) * 8;
587 						for (int j = 0; Findlevel > 0; j++) {
588 							char chTmp = styler.SafeGetCharAt(i + j + 1);
589 							if (chTmp == '\t') {
590 								Findlevel -= 8;
591 							} else if (chTmp == ' ') {
592 								Findlevel--;
593 							} else {
594 								break;
595 							}
596 						}
597 
598 						if (Findlevel > 0) {
599 							levelCurrent -= Findlevel / 8;
600 							if (Findlevel % 8)
601 								levelCurrent--;
602 						}
603 					}
604 				}
605 				break;
606 			default:
607 				break;
608 			}
609 		}
610 
611 		if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
612 			// Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
613 			// Avoid triggering two times on Dos/Win
614 			// New line -> record any line state onto /next/ line
615 			if (fold) {
616 				int lev = levelPrev;
617 				if (visibleChars == 0)
618 					lev |= SC_FOLDLEVELWHITEFLAG;
619 				if ((levelCurrent > levelPrev) && (visibleChars > 0))
620 					lev |= SC_FOLDLEVELHEADERFLAG;
621 
622 				styler.SetLevel(lineCurrent, lev);
623 				visibleChars = 0;
624 				levelPrev = levelCurrent;
625 			}
626 			lineCurrent++;
627 			styler.SetLineState(lineCurrent,
628 			                    ((inScriptType & 0x03) << 0) |
629 			                    ((tagOpened & 0x01) << 2) |
630 			                    ((tagClosing & 0x01) << 3) |
631 			                    ((aspScript & 0x0F) << 4) |
632 			                    ((clientScript & 0x0F) << 8) |
633 			                    ((beforePreProc & 0xFF) << 12));
634 		}
635 
636 		// generic end of script processing
637 		else if ((inScriptType == eNonHtmlScript) && (ch == '<') && (chNext == '/')) {
638 			// Check if it's the end of the script tag (or any other HTML tag)
639 			switch (state) {
640 				// in these cases, you can embed HTML tags (to confirm !!!!!!!!!!!!!!!!!!!!!!)
641 			case SCE_H_DOUBLESTRING:
642 			case SCE_H_SINGLESTRING:
643 			case SCE_HJ_COMMENT:
644 			case SCE_HJ_COMMENTDOC:
645 			//case SCE_HJ_COMMENTLINE: // removed as this is a common thing done to hide
646 			// the end of script marker from some JS interpreters.
647 			case SCE_HJ_DOUBLESTRING:
648 			case SCE_HJ_SINGLESTRING:
649 			case SCE_HJ_REGEX:
650 			case SCE_HB_STRING:
651 			case SCE_HP_STRING:
652 			case SCE_HP_TRIPLE:
653 			case SCE_HP_TRIPLEDOUBLE:
654 				break;
655 			default :
656 				// check if the closing tag is a script tag
657 				if (state == SCE_HJ_COMMENTLINE) {
658 					char tag[7]; // room for the <script> tag
659 					char chr;	// current char
660 					int j=0;
661 					chr = styler.SafeGetCharAt(i+2);
662 					while (j < 6 && !isspacechar(chr)) {
663 						tag[j++] = static_cast<char>(MakeLowerCase(chr));
664 						chr = styler.SafeGetCharAt(i+2+j);
665 					}
666 					tag[j] = '\0';
667 					if (strcmp(tag, "script") != 0) break;
668 				}
669 				// closing tag of the script (it's a closing HTML tag anyway)
670 				styler.ColourTo(i - 1, StateToPrint);
671 				state = SCE_H_TAGUNKNOWN;
672 				inScriptType = eHtml;
673 				scriptLanguage = eScriptNone;
674 				clientScript = eScriptJS;
675 				i += 2;
676 				visibleChars += 2;
677 				tagClosing = true;
678 				continue;
679 			}
680 		}
681 
682 		/////////////////////////////////////
683 		// handle the start of PHP pre-processor = Non-HTML
684 		else if ((state != SCE_H_ASPAT) &&
685 		         !isPHPStringState(state) &&
686 		         (state != SCE_HPHP_COMMENT) &&
687 		         (ch == '<') &&
688 		         (chNext == '?') &&
689 				 !IsScriptCommentState(state) ) {
690 			scriptLanguage = segIsScriptingIndicator(styler, i + 2, i + 10, eScriptPHP);
691 			if (scriptLanguage != eScriptPHP && isStringState(state)) continue;
692 			styler.ColourTo(i - 1, StateToPrint);
693 			beforePreProc = state;
694 			i++;
695 			visibleChars++;
696 			i += PrintScriptingIndicatorOffset(styler, styler.GetStartSegment() + 2, i + 10);
697 			if (scriptLanguage == eScriptXML)
698 				styler.ColourTo(i, SCE_H_XMLSTART);
699 			else
700 				styler.ColourTo(i, SCE_H_QUESTION);
701 			state = StateForScript(scriptLanguage);
702 			if (inScriptType == eNonHtmlScript)
703 				inScriptType = eNonHtmlScriptPreProc;
704 			else
705 				inScriptType = eNonHtmlPreProc;
706 			// Fold whole script, but not if the XML first tag (all XML-like tags in this case)
707 			if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
708 				levelCurrent++;
709 			}
710 			// should be better
711 			ch = styler.SafeGetCharAt(i);
712 			continue;
713 		}
714 
715 		// handle the start of ASP pre-processor = Non-HTML
716 		else if (!isCommentASPState(state) && (ch == '<') && (chNext == '%') && !isPHPStringState(state)) {
717 			styler.ColourTo(i - 1, StateToPrint);
718 			beforePreProc = state;
719 			if (inScriptType == eNonHtmlScript)
720 				inScriptType = eNonHtmlScriptPreProc;
721 			else
722 				inScriptType = eNonHtmlPreProc;
723 
724 			if (chNext2 == '@') {
725 				i += 2; // place as if it was the second next char treated
726 				visibleChars += 2;
727 				state = SCE_H_ASPAT;
728 			} else if ((chNext2 == '-') && (styler.SafeGetCharAt(i + 3) == '-')) {
729 				styler.ColourTo(i + 3, SCE_H_ASP);
730 				state = SCE_H_XCCOMMENT;
731 				scriptLanguage = eScriptVBS;
732 				continue;
733 			} else {
734 				if (chNext2 == '=') {
735 					i += 2; // place as if it was the second next char treated
736 					visibleChars += 2;
737 				} else {
738 					i++; // place as if it was the next char treated
739 					visibleChars++;
740 				}
741 
742 				state = StateForScript(aspScript);
743 			}
744 			scriptLanguage = eScriptVBS;
745 			styler.ColourTo(i, SCE_H_ASP);
746 			// fold whole script
747 			if (foldHTMLPreprocessor)
748 				levelCurrent++;
749 			// should be better
750 			ch = styler.SafeGetCharAt(i);
751 			continue;
752 		}
753 
754 		/////////////////////////////////////
755 		// handle the start of SGML language (DTD)
756 		else if (((scriptLanguage == eScriptNone) || (scriptLanguage == eScriptXML)) &&
757 				 (chPrev == '<') &&
758 				 (ch == '!') &&
759 				 (StateToPrint != SCE_H_CDATA) &&
760 				 (!IsCommentState(StateToPrint)) &&
761 				 (!IsScriptCommentState(StateToPrint)) ) {
762 			beforePreProc = state;
763 			styler.ColourTo(i - 2, StateToPrint);
764 			if ((chNext == '-') && (chNext2 == '-')) {
765 				state = SCE_H_COMMENT; // wait for a pending command
766 				styler.ColourTo(i + 2, SCE_H_COMMENT);
767 				i += 2; // follow styling after the --
768 			} else if (isWordCdata(i + 1, i + 7, styler)) {
769 				state = SCE_H_CDATA;
770 			} else {
771 				styler.ColourTo(i, SCE_H_SGML_DEFAULT); // <! is default
772 				scriptLanguage = eScriptSGML;
773 				state = SCE_H_SGML_COMMAND; // wait for a pending command
774 			}
775 			// fold whole tag (-- when closing the tag)
776 			if (foldHTMLPreprocessor)
777 				levelCurrent++;
778 			continue;
779 		}
780 
781 		// handle the end of a pre-processor = Non-HTML
782 		else if ((
783 		             ((inScriptType == eNonHtmlPreProc)
784 		              || (inScriptType == eNonHtmlScriptPreProc)) && (
785 		                 ((scriptLanguage != eScriptNone) && stateAllowsTermination(state) && ((ch == '%') || (ch == '?')))
786 		             ) && (chNext == '>')) ||
787 		         ((scriptLanguage == eScriptSGML) && (ch == '>') && (state != SCE_H_SGML_COMMENT))) {
788 			if (state == SCE_H_ASPAT) {
789 				aspScript = segIsScriptingIndicator(styler,
790 				                                    styler.GetStartSegment(), i - 1, aspScript);
791 			}
792 			// Bounce out of any ASP mode
793 			switch (state) {
794 			case SCE_HJ_WORD:
795 				classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
796 				break;
797 			case SCE_HB_WORD:
798 				classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
799 				break;
800 			case SCE_HP_WORD:
801 				classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
802 				break;
803 			case SCE_HPHP_WORD:
804 				classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
805 				break;
806 			case SCE_H_XCCOMMENT:
807 				styler.ColourTo(i - 1, state);
808 				break;
809 			default :
810 				styler.ColourTo(i - 1, StateToPrint);
811 				break;
812 			}
813 			if (scriptLanguage != eScriptSGML) {
814 				i++;
815 				visibleChars++;
816 			}
817 			if (ch == '%')
818 				styler.ColourTo(i, SCE_H_ASP);
819 			else if (scriptLanguage == eScriptXML)
820 				styler.ColourTo(i, SCE_H_XMLEND);
821 			else if (scriptLanguage == eScriptSGML)
822 				styler.ColourTo(i, SCE_H_SGML_DEFAULT);
823 			else
824 				styler.ColourTo(i, SCE_H_QUESTION);
825 			state = beforePreProc;
826 			if (inScriptType == eNonHtmlScriptPreProc)
827 				inScriptType = eNonHtmlScript;
828 			else
829 				inScriptType = eHtml;
830 			// Unfold all scripting languages, except for XML tag
831 			if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
832 				levelCurrent--;
833 			}
834 			scriptLanguage = eScriptNone;
835 			continue;
836 		}
837 		/////////////////////////////////////
838 
839 		switch (state) {
840 		case SCE_H_DEFAULT:
841 			if (ch == '<') {
842 				// in HTML, fold on tag open and unfold on tag close
843 				tagOpened = true;
844 				tagClosing = (chNext == '/');
845 				styler.ColourTo(i - 1, StateToPrint);
846 				if (chNext != '!')
847 					state = SCE_H_TAGUNKNOWN;
848 			} else if (ch == '&') {
849 				styler.ColourTo(i - 1, SCE_H_DEFAULT);
850 				state = SCE_H_ENTITY;
851 			}
852 			break;
853 		case SCE_H_SGML_DEFAULT:
854 		case SCE_H_SGML_BLOCK_DEFAULT:
855 //			if (scriptLanguage == eScriptSGMLblock)
856 //				StateToPrint = SCE_H_SGML_BLOCK_DEFAULT;
857 
858 			if (ch == '\"') {
859 				styler.ColourTo(i - 1, StateToPrint);
860 				state = SCE_H_SGML_DOUBLESTRING;
861 			} else if (ch == '\'') {
862 				styler.ColourTo(i - 1, StateToPrint);
863 				state = SCE_H_SGML_SIMPLESTRING;
864 			} else if ((ch == '-') && (chPrev == '-')) {
865 				styler.ColourTo(i - 2, StateToPrint);
866 				state = SCE_H_SGML_COMMENT;
867 			} else if (isascii(ch) && isalpha(ch) && (chPrev == '%')) {
868 				styler.ColourTo(i - 2, StateToPrint);
869 				state = SCE_H_SGML_ENTITY;
870 			} else if (ch == '#') {
871 				styler.ColourTo(i - 1, StateToPrint);
872 				state = SCE_H_SGML_SPECIAL;
873 			} else if (ch == '[') {
874 				styler.ColourTo(i - 1, StateToPrint);
875 				scriptLanguage = eScriptSGMLblock;
876 				state = SCE_H_SGML_BLOCK_DEFAULT;
877 			} else if (ch == ']') {
878 				if (scriptLanguage == eScriptSGMLblock) {
879 					styler.ColourTo(i, StateToPrint);
880 					scriptLanguage = eScriptSGML;
881 				} else {
882 					styler.ColourTo(i - 1, StateToPrint);
883 					styler.ColourTo(i, SCE_H_SGML_ERROR);
884 				}
885 				state = SCE_H_SGML_DEFAULT;
886 			} else if (scriptLanguage == eScriptSGMLblock) {
887 				if ((ch == '!') && (chPrev == '<')) {
888 					styler.ColourTo(i - 2, StateToPrint);
889 					styler.ColourTo(i, SCE_H_SGML_DEFAULT);
890 					state = SCE_H_SGML_COMMAND;
891 				} else if (ch == '>') {
892 					styler.ColourTo(i - 1, StateToPrint);
893 					styler.ColourTo(i, SCE_H_SGML_DEFAULT);
894 				}
895 			}
896 			break;
897 		case SCE_H_SGML_COMMAND:
898 			if ((ch == '-') && (chPrev == '-')) {
899 				styler.ColourTo(i - 2, StateToPrint);
900 				state = SCE_H_SGML_COMMENT;
901 			} else if (!issgmlwordchar(ch)) {
902 				if (isWordHSGML(styler.GetStartSegment(), i - 1, keywords6, styler)) {
903 					styler.ColourTo(i - 1, StateToPrint);
904 					state = SCE_H_SGML_1ST_PARAM;
905 				} else {
906 					state = SCE_H_SGML_ERROR;
907 				}
908 			}
909 			break;
910 		case SCE_H_SGML_1ST_PARAM:
911 			// wait for the beginning of the word
912 			if ((ch == '-') && (chPrev == '-')) {
913 				if (scriptLanguage == eScriptSGMLblock) {
914 					styler.ColourTo(i - 2, SCE_H_SGML_BLOCK_DEFAULT);
915 				} else {
916 					styler.ColourTo(i - 2, SCE_H_SGML_DEFAULT);
917 				}
918 				state = SCE_H_SGML_1ST_PARAM_COMMENT;
919 			} else if (issgmlwordchar(ch)) {
920 				if (scriptLanguage == eScriptSGMLblock) {
921 					styler.ColourTo(i - 1, SCE_H_SGML_BLOCK_DEFAULT);
922 				} else {
923 					styler.ColourTo(i - 1, SCE_H_SGML_DEFAULT);
924 				}
925 				// find the length of the word
926 				int size = 1;
927 				while (ishtmlwordchar(styler.SafeGetCharAt(i + size)))
928 					size++;
929 				styler.ColourTo(i + size - 1, StateToPrint);
930 				i += size - 1;
931 				visibleChars += size - 1;
932 				ch = styler.SafeGetCharAt(i);
933 				if (scriptLanguage == eScriptSGMLblock) {
934 					state = SCE_H_SGML_BLOCK_DEFAULT;
935 				} else {
936 					state = SCE_H_SGML_DEFAULT;
937 				}
938 				continue;
939 			}
940 			break;
941 		case SCE_H_SGML_ERROR:
942 			if ((ch == '-') && (chPrev == '-')) {
943 				styler.ColourTo(i - 2, StateToPrint);
944 				state = SCE_H_SGML_COMMENT;
945 			}
946 		case SCE_H_SGML_DOUBLESTRING:
947 			if (ch == '\"') {
948 				styler.ColourTo(i, StateToPrint);
949 				state = SCE_H_SGML_DEFAULT;
950 			}
951 			break;
952 		case SCE_H_SGML_SIMPLESTRING:
953 			if (ch == '\'') {
954 				styler.ColourTo(i, StateToPrint);
955 				state = SCE_H_SGML_DEFAULT;
956 			}
957 			break;
958 		case SCE_H_SGML_COMMENT:
959 			if ((ch == '-') && (chPrev == '-')) {
960 				styler.ColourTo(i, StateToPrint);
961 				state = SCE_H_SGML_DEFAULT;
962 			}
963 			break;
964 		case SCE_H_CDATA:
965 			if ((chPrev2 == ']') && (chPrev == ']') && (ch == '>')) {
966 				styler.ColourTo(i, StateToPrint);
967 				state = SCE_H_DEFAULT;
968 				levelCurrent--;
969 			}
970 			break;
971 		case SCE_H_COMMENT:
972 			if ((chPrev2 == '-') && (chPrev == '-') && (ch == '>')) {
973 				styler.ColourTo(i, StateToPrint);
974 				state = SCE_H_DEFAULT;
975 				levelCurrent--;
976 			}
977 			break;
978 		case SCE_H_SGML_1ST_PARAM_COMMENT:
979 			if ((ch == '-') && (chPrev == '-')) {
980 				styler.ColourTo(i, SCE_H_SGML_COMMENT);
981 				state = SCE_H_SGML_1ST_PARAM;
982 			}
983 			break;
984 		case SCE_H_SGML_SPECIAL:
985 			if (!(isascii(ch) && isupper(ch))) {
986 				styler.ColourTo(i - 1, StateToPrint);
987 				if (isalnum(ch)) {
988 					state = SCE_H_SGML_ERROR;
989 				} else {
990 					state = SCE_H_SGML_DEFAULT;
991 				}
992 			}
993 			break;
994 		case SCE_H_SGML_ENTITY:
995 			if (ch == ';') {
996 				styler.ColourTo(i, StateToPrint);
997 				state = SCE_H_SGML_DEFAULT;
998 			} else if (!(isascii(ch) && isalnum(ch)) && ch != '-' && ch != '.') {
999 				styler.ColourTo(i, SCE_H_SGML_ERROR);
1000 				state = SCE_H_SGML_DEFAULT;
1001 			}
1002 			break;
1003 		case SCE_H_ENTITY:
1004 			if (ch == ';') {
1005 				styler.ColourTo(i, StateToPrint);
1006 				state = SCE_H_DEFAULT;
1007 			}
1008 			if (ch != '#' && !(isascii(ch) && isalnum(ch))	// Should check that '#' follows '&', but it is unlikely anyway...
1009 				&& ch != '.' && ch != '-' && ch != '_' && ch != ':') { // valid in XML
1010 				styler.ColourTo(i, SCE_H_TAGUNKNOWN);
1011 				state = SCE_H_DEFAULT;
1012 			}
1013 			break;
1014 		case SCE_H_TAGUNKNOWN:
1015 			if (!ishtmlwordchar(ch) && !((ch == '/') && (chPrev == '<')) && ch != '[') {
1016 				int eClass = classifyTagHTML(styler.GetStartSegment(),
1017 					i - 1, keywords, styler, tagDontFold, caseSensitive);
1018 				if (eClass == SCE_H_SCRIPT) {
1019 					if (!tagClosing) {
1020 						inScriptType = eNonHtmlScript;
1021 						scriptLanguage = clientScript;
1022 						eClass = SCE_H_TAG;
1023 					} else {
1024 						scriptLanguage = eScriptNone;
1025 						eClass = SCE_H_TAG;
1026 					}
1027 				}
1028 				if (ch == '>') {
1029 					styler.ColourTo(i, eClass);
1030 					if (inScriptType == eNonHtmlScript) {
1031 						state = StateForScript(scriptLanguage);
1032 					} else {
1033 						state = SCE_H_DEFAULT;
1034 					}
1035 					tagOpened = false;
1036 					if (!tagDontFold){
1037 						if (tagClosing) {
1038 							levelCurrent--;
1039 						} else {
1040 							levelCurrent++;
1041 						}
1042 					}
1043 					tagClosing = false;
1044 				} else if (ch == '/' && chNext == '>') {
1045 					if (eClass == SCE_H_TAGUNKNOWN) {
1046 						styler.ColourTo(i + 1, SCE_H_TAGUNKNOWN);
1047 					} else {
1048 						styler.ColourTo(i - 1, StateToPrint);
1049 						styler.ColourTo(i + 1, SCE_H_TAGEND);
1050 					}
1051 					i++;
1052 					ch = chNext;
1053 					state = SCE_H_DEFAULT;
1054 					tagOpened = false;
1055 				} else {
1056 					if (eClass != SCE_H_TAGUNKNOWN) {
1057 						if (eClass == SCE_H_SGML_DEFAULT) {
1058 							state = SCE_H_SGML_DEFAULT;
1059 						} else {
1060 							state = SCE_H_OTHER;
1061 						}
1062 					}
1063 				}
1064 			}
1065 			break;
1066 		case SCE_H_ATTRIBUTE:
1067 			if (!ishtmlwordchar(ch) && ch != '/' && ch != '-') {
1068 				if (inScriptType == eNonHtmlScript) {
1069 					int scriptLanguagePrev = scriptLanguage;
1070 					clientScript = segIsScriptingIndicator(styler, styler.GetStartSegment(), i - 1, scriptLanguage);
1071 					scriptLanguage = clientScript;
1072 					if ((scriptLanguagePrev != scriptLanguage) && (scriptLanguage == eScriptNone))
1073 						inScriptType = eHtml;
1074 				}
1075 				classifyAttribHTML(styler.GetStartSegment(), i - 1, keywords, styler);
1076 				if (ch == '>') {
1077 					styler.ColourTo(i, SCE_H_TAG);
1078 					if (inScriptType == eNonHtmlScript) {
1079 						state = StateForScript(scriptLanguage);
1080 					} else {
1081 						state = SCE_H_DEFAULT;
1082 					}
1083 					tagOpened = false;
1084 					if (!tagDontFold){
1085 						if (tagClosing){
1086 							levelCurrent--;
1087 						} else {
1088 							levelCurrent++;
1089 						}
1090 					}
1091 					tagClosing = false;
1092 				} else if (ch == '=') {
1093 					styler.ColourTo(i, SCE_H_OTHER);
1094 					state = SCE_H_VALUE;
1095 				} else {
1096 					state = SCE_H_OTHER;
1097 				}
1098 			}
1099 			break;
1100 		case SCE_H_OTHER:
1101 			if (ch == '>') {
1102 				styler.ColourTo(i - 1, StateToPrint);
1103 				styler.ColourTo(i, SCE_H_TAG);
1104 				if (inScriptType == eNonHtmlScript) {
1105 					state = StateForScript(scriptLanguage);
1106 				} else {
1107 					state = SCE_H_DEFAULT;
1108 				}
1109 				tagOpened = false;
1110 				if (!tagDontFold){
1111 					if (tagClosing){
1112 						levelCurrent--;
1113 					} else {
1114 						levelCurrent++;
1115 					}
1116 				}
1117 				tagClosing = false;
1118 			} else if (ch == '\"') {
1119 				styler.ColourTo(i - 1, StateToPrint);
1120 				state = SCE_H_DOUBLESTRING;
1121 			} else if (ch == '\'') {
1122 				styler.ColourTo(i - 1, StateToPrint);
1123 				state = SCE_H_SINGLESTRING;
1124 			} else if (ch == '=') {
1125 				styler.ColourTo(i, StateToPrint);
1126 				state = SCE_H_VALUE;
1127 			} else if (ch == '/' && chNext == '>') {
1128 				styler.ColourTo(i - 1, StateToPrint);
1129 				styler.ColourTo(i + 1, SCE_H_TAGEND);
1130 				i++;
1131 				ch = chNext;
1132 				state = SCE_H_DEFAULT;
1133 				tagOpened = false;
1134 			} else if (ch == '?' && chNext == '>') {
1135 				styler.ColourTo(i - 1, StateToPrint);
1136 				styler.ColourTo(i + 1, SCE_H_XMLEND);
1137 				i++;
1138 				ch = chNext;
1139 				state = SCE_H_DEFAULT;
1140 			} else if (ishtmlwordchar(ch)) {
1141 				styler.ColourTo(i - 1, StateToPrint);
1142 				state = SCE_H_ATTRIBUTE;
1143 			}
1144 			break;
1145 		case SCE_H_DOUBLESTRING:
1146 			if (ch == '\"') {
1147 				if (inScriptType == eNonHtmlScript) {
1148 					scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1149 				}
1150 				styler.ColourTo(i, SCE_H_DOUBLESTRING);
1151 				state = SCE_H_OTHER;
1152 			}
1153 			break;
1154 		case SCE_H_SINGLESTRING:
1155 			if (ch == '\'') {
1156 				if (inScriptType == eNonHtmlScript) {
1157 					scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1158 				}
1159 				styler.ColourTo(i, SCE_H_SINGLESTRING);
1160 				state = SCE_H_OTHER;
1161 			}
1162 			break;
1163 		case SCE_H_VALUE:
1164 			if (!ishtmlwordchar(ch)) {
1165 				if (ch == '\"' && chPrev == '=') {
1166 					// Should really test for being first character
1167 					state = SCE_H_DOUBLESTRING;
1168 				} else if (ch == '\'' && chPrev == '=') {
1169 					state = SCE_H_SINGLESTRING;
1170 				} else {
1171 					if (IsNumber(styler.GetStartSegment(), styler)) {
1172 						styler.ColourTo(i - 1, SCE_H_NUMBER);
1173 					} else {
1174 						styler.ColourTo(i - 1, StateToPrint);
1175 					}
1176 					if (ch == '>') {
1177 						styler.ColourTo(i, SCE_H_TAG);
1178 						if (inScriptType == eNonHtmlScript) {
1179 							state = StateForScript(scriptLanguage);
1180 						} else {
1181 							state = SCE_H_DEFAULT;
1182 						}
1183 						tagOpened = false;
1184 						if (!tagDontFold){
1185 							if (tagClosing){
1186 								levelCurrent--;
1187 							} else {
1188 								levelCurrent++;
1189 							}
1190 						}
1191 						tagClosing = false;
1192 					} else {
1193 						state = SCE_H_OTHER;
1194 					}
1195 				}
1196 			}
1197 			break;
1198 		case SCE_HJ_DEFAULT:
1199 		case SCE_HJ_START:
1200 		case SCE_HJ_SYMBOLS:
1201 			if (iswordstart(ch)) {
1202 				styler.ColourTo(i - 1, StateToPrint);
1203 				state = SCE_HJ_WORD;
1204 			} else if (ch == '/' && chNext == '*') {
1205 				styler.ColourTo(i - 1, StateToPrint);
1206 				if (chNext2 == '*')
1207 					state = SCE_HJ_COMMENTDOC;
1208 				else
1209 					state = SCE_HJ_COMMENT;
1210 			} else if (ch == '/' && chNext == '/') {
1211 				styler.ColourTo(i - 1, StateToPrint);
1212 				state = SCE_HJ_COMMENTLINE;
1213 			} else if (ch == '/' && isOKBeforeRE(chPrevNonWhite)) {
1214 				styler.ColourTo(i - 1, StateToPrint);
1215 				state = SCE_HJ_REGEX;
1216 			} else if (ch == '\"') {
1217 				styler.ColourTo(i - 1, StateToPrint);
1218 				state = SCE_HJ_DOUBLESTRING;
1219 			} else if (ch == '\'') {
1220 				styler.ColourTo(i - 1, StateToPrint);
1221 				state = SCE_HJ_SINGLESTRING;
1222 			} else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1223 			           styler.SafeGetCharAt(i + 3) == '-') {
1224 				styler.ColourTo(i - 1, StateToPrint);
1225 				state = SCE_HJ_COMMENTLINE;
1226 			} else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1227 				styler.ColourTo(i - 1, StateToPrint);
1228 				state = SCE_HJ_COMMENTLINE;
1229 				i += 2;
1230 			} else if (isoperator(ch)) {
1231 				styler.ColourTo(i - 1, StateToPrint);
1232 				styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1233 				state = SCE_HJ_DEFAULT;
1234 			} else if ((ch == ' ') || (ch == '\t')) {
1235 				if (state == SCE_HJ_START) {
1236 					styler.ColourTo(i - 1, StateToPrint);
1237 					state = SCE_HJ_DEFAULT;
1238 				}
1239 			}
1240 			break;
1241 		case SCE_HJ_WORD:
1242 			if (!iswordchar(ch)) {
1243 				classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
1244 				//styler.ColourTo(i - 1, eHTJSKeyword);
1245 				state = SCE_HJ_DEFAULT;
1246 				if (ch == '/' && chNext == '*') {
1247 					if (chNext2 == '*')
1248 						state = SCE_HJ_COMMENTDOC;
1249 					else
1250 						state = SCE_HJ_COMMENT;
1251 				} else if (ch == '/' && chNext == '/') {
1252 					state = SCE_HJ_COMMENTLINE;
1253 				} else if (ch == '\"') {
1254 					state = SCE_HJ_DOUBLESTRING;
1255 				} else if (ch == '\'') {
1256 					state = SCE_HJ_SINGLESTRING;
1257 				} else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1258 					styler.ColourTo(i - 1, StateToPrint);
1259 					state = SCE_HJ_COMMENTLINE;
1260 					i += 2;
1261 				} else if (isoperator(ch)) {
1262 					styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1263 					state = SCE_HJ_DEFAULT;
1264 				}
1265 			}
1266 			break;
1267 		case SCE_HJ_COMMENT:
1268 		case SCE_HJ_COMMENTDOC:
1269 			if (ch == '/' && chPrev == '*') {
1270 				styler.ColourTo(i, StateToPrint);
1271 				state = SCE_HJ_DEFAULT;
1272 				ch = ' ';
1273 			}
1274 			break;
1275 		case SCE_HJ_COMMENTLINE:
1276 			if (ch == '\r' || ch == '\n') {
1277 				styler.ColourTo(i - 1, statePrintForState(SCE_HJ_COMMENTLINE, inScriptType));
1278 				state = SCE_HJ_DEFAULT;
1279 				ch = ' ';
1280 			}
1281 			break;
1282 		case SCE_HJ_DOUBLESTRING:
1283 			if (ch == '\\') {
1284 				if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1285 					i++;
1286 				}
1287 			} else if (ch == '\"') {
1288 				styler.ColourTo(i, statePrintForState(SCE_HJ_DOUBLESTRING, inScriptType));
1289 				state = SCE_HJ_DEFAULT;
1290 			} else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1291 				styler.ColourTo(i - 1, StateToPrint);
1292 				state = SCE_HJ_COMMENTLINE;
1293 				i += 2;
1294 			} else if (isLineEnd(ch)) {
1295 				styler.ColourTo(i - 1, StateToPrint);
1296 				state = SCE_HJ_STRINGEOL;
1297 			}
1298 			break;
1299 		case SCE_HJ_SINGLESTRING:
1300 			if (ch == '\\') {
1301 				if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1302 					i++;
1303 				}
1304 			} else if (ch == '\'') {
1305 				styler.ColourTo(i, statePrintForState(SCE_HJ_SINGLESTRING, inScriptType));
1306 				state = SCE_HJ_DEFAULT;
1307 			} else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1308 				styler.ColourTo(i - 1, StateToPrint);
1309 				state = SCE_HJ_COMMENTLINE;
1310 				i += 2;
1311 			} else if (isLineEnd(ch)) {
1312 				styler.ColourTo(i - 1, StateToPrint);
1313 				state = SCE_HJ_STRINGEOL;
1314 			}
1315 			break;
1316 		case SCE_HJ_STRINGEOL:
1317 			if (!isLineEnd(ch)) {
1318 				styler.ColourTo(i - 1, StateToPrint);
1319 				state = SCE_HJ_DEFAULT;
1320 			} else if (!isLineEnd(chNext)) {
1321 				styler.ColourTo(i, StateToPrint);
1322 				state = SCE_HJ_DEFAULT;
1323 			}
1324 			break;
1325 		case SCE_HJ_REGEX:
1326 			if (ch == '\r' || ch == '\n' || ch == '/') {
1327 				if (ch == '/') {
1328 					while (isascii(chNext) && islower(chNext)) {   // gobble regex flags
1329 						i++;
1330 						ch = chNext;
1331 						chNext = styler.SafeGetCharAt(i + 1);
1332 					}
1333 				}
1334 				styler.ColourTo(i, StateToPrint);
1335 				state = SCE_HJ_DEFAULT;
1336 			} else if (ch == '\\') {
1337 				// Gobble up the quoted character
1338 				if (chNext == '\\' || chNext == '/') {
1339 					i++;
1340 					ch = chNext;
1341 					chNext = styler.SafeGetCharAt(i + 1);
1342 				}
1343 			}
1344 			break;
1345 		case SCE_HB_DEFAULT:
1346 		case SCE_HB_START:
1347 			if (iswordstart(ch)) {
1348 				styler.ColourTo(i - 1, StateToPrint);
1349 				state = SCE_HB_WORD;
1350 			} else if (ch == '\'') {
1351 				styler.ColourTo(i - 1, StateToPrint);
1352 				state = SCE_HB_COMMENTLINE;
1353 			} else if (ch == '\"') {
1354 				styler.ColourTo(i - 1, StateToPrint);
1355 				state = SCE_HB_STRING;
1356 			} else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1357 			           styler.SafeGetCharAt(i + 3) == '-') {
1358 				styler.ColourTo(i - 1, StateToPrint);
1359 				state = SCE_HB_COMMENTLINE;
1360 			} else if (isoperator(ch)) {
1361 				styler.ColourTo(i - 1, StateToPrint);
1362 				styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
1363 				state = SCE_HB_DEFAULT;
1364 			} else if ((ch == ' ') || (ch == '\t')) {
1365 				if (state == SCE_HB_START) {
1366 					styler.ColourTo(i - 1, StateToPrint);
1367 					state = SCE_HB_DEFAULT;
1368 				}
1369 			}
1370 			break;
1371 		case SCE_HB_WORD:
1372 			if (!iswordchar(ch)) {
1373 				state = classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
1374 				if (state == SCE_HB_DEFAULT) {
1375 					if (ch == '\"') {
1376 						state = SCE_HB_STRING;
1377 					} else if (ch == '\'') {
1378 						state = SCE_HB_COMMENTLINE;
1379 					} else if (isoperator(ch)) {
1380 						styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
1381 						state = SCE_HB_DEFAULT;
1382 					}
1383 				}
1384 			}
1385 			break;
1386 		case SCE_HB_STRING:
1387 			if (ch == '\"') {
1388 				styler.ColourTo(i, StateToPrint);
1389 				state = SCE_HB_DEFAULT;
1390 			} else if (ch == '\r' || ch == '\n') {
1391 				styler.ColourTo(i - 1, StateToPrint);
1392 				state = SCE_HB_STRINGEOL;
1393 			}
1394 			break;
1395 		case SCE_HB_COMMENTLINE:
1396 			if (ch == '\r' || ch == '\n') {
1397 				styler.ColourTo(i - 1, StateToPrint);
1398 				state = SCE_HB_DEFAULT;
1399 			}
1400 			break;
1401 		case SCE_HB_STRINGEOL:
1402 			if (!isLineEnd(ch)) {
1403 				styler.ColourTo(i - 1, StateToPrint);
1404 				state = SCE_HB_DEFAULT;
1405 			} else if (!isLineEnd(chNext)) {
1406 				styler.ColourTo(i, StateToPrint);
1407 				state = SCE_HB_DEFAULT;
1408 			}
1409 			break;
1410 		case SCE_HP_DEFAULT:
1411 		case SCE_HP_START:
1412 			if (iswordstart(ch)) {
1413 				styler.ColourTo(i - 1, StateToPrint);
1414 				state = SCE_HP_WORD;
1415 			} else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1416 			           styler.SafeGetCharAt(i + 3) == '-') {
1417 				styler.ColourTo(i - 1, StateToPrint);
1418 				state = SCE_HP_COMMENTLINE;
1419 			} else if (ch == '#') {
1420 				styler.ColourTo(i - 1, StateToPrint);
1421 				state = SCE_HP_COMMENTLINE;
1422 			} else if (ch == '\"') {
1423 				styler.ColourTo(i - 1, StateToPrint);
1424 				if (chNext == '\"' && chNext2 == '\"') {
1425 					i += 2;
1426 					state = SCE_HP_TRIPLEDOUBLE;
1427 					ch = ' ';
1428 					chPrev = ' ';
1429 					chNext = styler.SafeGetCharAt(i + 1);
1430 				} else {
1431 					//					state = statePrintForState(SCE_HP_STRING,inScriptType);
1432 					state = SCE_HP_STRING;
1433 				}
1434 			} else if (ch == '\'') {
1435 				styler.ColourTo(i - 1, StateToPrint);
1436 				if (chNext == '\'' && chNext2 == '\'') {
1437 					i += 2;
1438 					state = SCE_HP_TRIPLE;
1439 					ch = ' ';
1440 					chPrev = ' ';
1441 					chNext = styler.SafeGetCharAt(i + 1);
1442 				} else {
1443 					state = SCE_HP_CHARACTER;
1444 				}
1445 			} else if (isoperator(ch)) {
1446 				styler.ColourTo(i - 1, StateToPrint);
1447 				styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
1448 			} else if ((ch == ' ') || (ch == '\t')) {
1449 				if (state == SCE_HP_START) {
1450 					styler.ColourTo(i - 1, StateToPrint);
1451 					state = SCE_HP_DEFAULT;
1452 				}
1453 			}
1454 			break;
1455 		case SCE_HP_WORD:
1456 			if (!iswordchar(ch)) {
1457 				classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType);
1458 				state = SCE_HP_DEFAULT;
1459 				if (ch == '#') {
1460 					state = SCE_HP_COMMENTLINE;
1461 				} else if (ch == '\"') {
1462 					if (chNext == '\"' && chNext2 == '\"') {
1463 						i += 2;
1464 						state = SCE_HP_TRIPLEDOUBLE;
1465 						ch = ' ';
1466 						chPrev = ' ';
1467 						chNext = styler.SafeGetCharAt(i + 1);
1468 					} else {
1469 						state = SCE_HP_STRING;
1470 					}
1471 				} else if (ch == '\'') {
1472 					if (chNext == '\'' && chNext2 == '\'') {
1473 						i += 2;
1474 						state = SCE_HP_TRIPLE;
1475 						ch = ' ';
1476 						chPrev = ' ';
1477 						chNext = styler.SafeGetCharAt(i + 1);
1478 					} else {
1479 						state = SCE_HP_CHARACTER;
1480 					}
1481 				} else if (isoperator(ch)) {
1482 					styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
1483 				}
1484 			}
1485 			break;
1486 		case SCE_HP_COMMENTLINE:
1487 			if (ch == '\r' || ch == '\n') {
1488 				styler.ColourTo(i - 1, StateToPrint);
1489 				state = SCE_HP_DEFAULT;
1490 			}
1491 			break;
1492 		case SCE_HP_STRING:
1493 			if (ch == '\\') {
1494 				if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1495 					i++;
1496 					ch = chNext;
1497 					chNext = styler.SafeGetCharAt(i + 1);
1498 				}
1499 			} else if (ch == '\"') {
1500 				styler.ColourTo(i, StateToPrint);
1501 				state = SCE_HP_DEFAULT;
1502 			}
1503 			break;
1504 		case SCE_HP_CHARACTER:
1505 			if (ch == '\\') {
1506 				if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1507 					i++;
1508 					ch = chNext;
1509 					chNext = styler.SafeGetCharAt(i + 1);
1510 				}
1511 			} else if (ch == '\'') {
1512 				styler.ColourTo(i, StateToPrint);
1513 				state = SCE_HP_DEFAULT;
1514 			}
1515 			break;
1516 		case SCE_HP_TRIPLE:
1517 			if (ch == '\'' && chPrev == '\'' && chPrev2 == '\'') {
1518 				styler.ColourTo(i, StateToPrint);
1519 				state = SCE_HP_DEFAULT;
1520 			}
1521 			break;
1522 		case SCE_HP_TRIPLEDOUBLE:
1523 			if (ch == '\"' && chPrev == '\"' && chPrev2 == '\"') {
1524 				styler.ColourTo(i, StateToPrint);
1525 				state = SCE_HP_DEFAULT;
1526 			}
1527 			break;
1528 			///////////// start - PHP state handling
1529 		case SCE_HPHP_WORD:
1530 			if (!iswordchar(ch)) {
1531 				classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
1532 				if (ch == '/' && chNext == '*') {
1533 					i++;
1534 					state = SCE_HPHP_COMMENT;
1535 				} else if (ch == '/' && chNext == '/') {
1536 					i++;
1537 					state = SCE_HPHP_COMMENTLINE;
1538 				} else if (ch == '#') {
1539 					state = SCE_HPHP_COMMENTLINE;
1540 				} else if (ch == '\"') {
1541 					state = SCE_HPHP_HSTRING;
1542 					strcpy(phpStringDelimiter, "\"");
1543 				} else if (styler.Match(i, "<<<")) {
1544 					state = SCE_HPHP_HSTRING;
1545 					i = FindPhpStringDelimiter(phpStringDelimiter, sizeof(phpStringDelimiter), i + 3, lengthDoc, styler);
1546 				} else if (ch == '\'') {
1547 					state = SCE_HPHP_SIMPLESTRING;
1548 				} else if (ch == '$' && IsPhpWordStart(chNext)) {
1549 					state = SCE_HPHP_VARIABLE;
1550 				} else if (isoperator(ch)) {
1551 					state = SCE_HPHP_OPERATOR;
1552 				} else {
1553 					state = SCE_HPHP_DEFAULT;
1554 				}
1555 			}
1556 			break;
1557 		case SCE_HPHP_NUMBER:
1558 			// recognize bases 8,10 or 16 integers OR floating-point numbers
1559 			if (!IsADigit(ch)
1560 				&& strchr(".xXabcdefABCDEF", ch) == NULL
1561 				&& ((ch != '-' && ch != '+') || (chPrev != 'e' && chPrev != 'E'))) {
1562 				styler.ColourTo(i - 1, SCE_HPHP_NUMBER);
1563 				if (isoperator(ch))
1564 					state = SCE_HPHP_OPERATOR;
1565 				else
1566 					state = SCE_HPHP_DEFAULT;
1567 			}
1568 			break;
1569 		case SCE_HPHP_VARIABLE:
1570 			if (!IsPhpWordChar(ch)) {
1571 				styler.ColourTo(i - 1, SCE_HPHP_VARIABLE);
1572 				if (isoperator(ch))
1573 					state = SCE_HPHP_OPERATOR;
1574 				else
1575 					state = SCE_HPHP_DEFAULT;
1576 			}
1577 			break;
1578 		case SCE_HPHP_COMMENT:
1579 			if (ch == '/' && chPrev == '*') {
1580 				styler.ColourTo(i, StateToPrint);
1581 				state = SCE_HPHP_DEFAULT;
1582 			}
1583 			break;
1584 		case SCE_HPHP_COMMENTLINE:
1585 			if (ch == '\r' || ch == '\n') {
1586 				styler.ColourTo(i - 1, StateToPrint);
1587 				state = SCE_HPHP_DEFAULT;
1588 			}
1589 			break;
1590 		case SCE_HPHP_HSTRING:
1591 			if (ch == '\\' && (phpStringDelimiter[0] == '\"' || chNext == '$' || chNext == '{')) {
1592 				// skip the next char
1593 				i++;
1594 			} else if (((ch == '{' && chNext == '$') || (ch == '$' && chNext == '{'))
1595 				&& IsPhpWordStart(chNext2)) {
1596 				styler.ColourTo(i - 1, StateToPrint);
1597 				state = SCE_HPHP_COMPLEX_VARIABLE;
1598 			} else if (ch == '$' && IsPhpWordStart(chNext)) {
1599 				styler.ColourTo(i - 1, StateToPrint);
1600 				state = SCE_HPHP_HSTRING_VARIABLE;
1601 			} else if (styler.Match(i, phpStringDelimiter)) {
1602 				if (strlen(phpStringDelimiter) > 1)
1603 					i += strlen(phpStringDelimiter) - 1;
1604 				styler.ColourTo(i, StateToPrint);
1605 				state = SCE_HPHP_DEFAULT;
1606 			}
1607 			break;
1608 		case SCE_HPHP_SIMPLESTRING:
1609 			if (ch == '\\') {
1610 				// skip the next char
1611 				i++;
1612 			} else if (ch == '\'') {
1613 				styler.ColourTo(i, StateToPrint);
1614 				state = SCE_HPHP_DEFAULT;
1615 			}
1616 			break;
1617 		case SCE_HPHP_HSTRING_VARIABLE:
1618 			if (!IsPhpWordChar(ch)) {
1619 				styler.ColourTo(i - 1, StateToPrint);
1620 				i--; // strange but it works
1621 				state = SCE_HPHP_HSTRING;
1622 			}
1623 			break;
1624 		case SCE_HPHP_COMPLEX_VARIABLE:
1625 			if (ch == '}') {
1626 				styler.ColourTo(i, StateToPrint);
1627 				state = SCE_HPHP_HSTRING;
1628 			}
1629 			break;
1630 		case SCE_HPHP_OPERATOR:
1631 		case SCE_HPHP_DEFAULT:
1632 			styler.ColourTo(i - 1, StateToPrint);
1633 			if (IsADigit(ch) || (ch == '.' && IsADigit(chNext))) {
1634 				state = SCE_HPHP_NUMBER;
1635 			} else if (iswordstart(ch)) {
1636 				state = SCE_HPHP_WORD;
1637 			} else if (ch == '/' && chNext == '*') {
1638 				i++;
1639 				state = SCE_HPHP_COMMENT;
1640 			} else if (ch == '/' && chNext == '/') {
1641 				i++;
1642 				state = SCE_HPHP_COMMENTLINE;
1643 			} else if (ch == '#') {
1644 				state = SCE_HPHP_COMMENTLINE;
1645 			} else if (ch == '\"') {
1646 				state = SCE_HPHP_HSTRING;
1647 				strcpy(phpStringDelimiter, "\"");
1648 			} else if (styler.Match(i, "<<<")) {
1649 				state = SCE_HPHP_HSTRING;
1650 				i = FindPhpStringDelimiter(phpStringDelimiter, sizeof(phpStringDelimiter), i + 3, lengthDoc, styler);
1651 			} else if (ch == '\'') {
1652 				state = SCE_HPHP_SIMPLESTRING;
1653 			} else if (ch == '$' && IsPhpWordStart(chNext)) {
1654 				state = SCE_HPHP_VARIABLE;
1655 			} else if (isoperator(ch)) {
1656 				state = SCE_HPHP_OPERATOR;
1657 			} else if ((state == SCE_HPHP_OPERATOR) && (isspacechar(ch))) {
1658 				state = SCE_HPHP_DEFAULT;
1659 			}
1660 			break;
1661 			///////////// end - PHP state handling
1662 		}
1663 
1664 		// Some of the above terminated their lexeme but since the same character starts
1665 		// the same class again, only reenter if non empty segment.
1666 
1667 		bool nonEmptySegment = i >= static_cast<int>(styler.GetStartSegment());
1668 		if (state == SCE_HB_DEFAULT) {    // One of the above succeeded
1669 			if ((ch == '\"') && (nonEmptySegment)) {
1670 				state = SCE_HB_STRING;
1671 			} else if (ch == '\'') {
1672 				state = SCE_HB_COMMENTLINE;
1673 			} else if (iswordstart(ch)) {
1674 				state = SCE_HB_WORD;
1675 			} else if (isoperator(ch)) {
1676 				styler.ColourTo(i, SCE_HB_DEFAULT);
1677 			}
1678 		} else if (state == SCE_HBA_DEFAULT) {    // One of the above succeeded
1679 			if ((ch == '\"') && (nonEmptySegment)) {
1680 				state = SCE_HBA_STRING;
1681 			} else if (ch == '\'') {
1682 				state = SCE_HBA_COMMENTLINE;
1683 			} else if (iswordstart(ch)) {
1684 				state = SCE_HBA_WORD;
1685 			} else if (isoperator(ch)) {
1686 				styler.ColourTo(i, SCE_HBA_DEFAULT);
1687 			}
1688 		} else if (state == SCE_HJ_DEFAULT) {    // One of the above succeeded
1689 			if (ch == '/' && chNext == '*') {
1690 				if (styler.SafeGetCharAt(i + 2) == '*')
1691 					state = SCE_HJ_COMMENTDOC;
1692 				else
1693 					state = SCE_HJ_COMMENT;
1694 			} else if (ch == '/' && chNext == '/') {
1695 				state = SCE_HJ_COMMENTLINE;
1696 			} else if ((ch == '\"') && (nonEmptySegment)) {
1697 				state = SCE_HJ_DOUBLESTRING;
1698 			} else if ((ch == '\'') && (nonEmptySegment)) {
1699 				state = SCE_HJ_SINGLESTRING;
1700 			} else if (iswordstart(ch)) {
1701 				state = SCE_HJ_WORD;
1702 			} else if (isoperator(ch)) {
1703 				styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1704 			}
1705 		}
1706 	}
1707 
1708 	StateToPrint = statePrintForState(state, inScriptType);
1709 		styler.ColourTo(lengthDoc - 1, StateToPrint);
1710 
1711 	// Fill in the real level of the next line, keeping the current flags as they will be filled in later
1712 	if (fold) {
1713 		int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
1714 		styler.SetLevel(lineCurrent, levelPrev | flagsNext);
1715 	}
1716 }
1717 
isASPScript(int state)1718 static bool isASPScript(int state) {
1719 	return
1720 		(state >= SCE_HJA_START && state <= SCE_HJA_REGEX) ||
1721 		(state >= SCE_HBA_START && state <= SCE_HBA_STRINGEOL) ||
1722 		(state >= SCE_HPA_DEFAULT && state <= SCE_HPA_IDENTIFIER);
1723 }
1724 
ColouriseHBAPiece(StyleContext & sc,WordList * keywordlists[])1725 static void ColouriseHBAPiece(StyleContext &sc, WordList *keywordlists[]) {
1726 	WordList &keywordsVBS = *keywordlists[2];
1727 	if (sc.state == SCE_HBA_WORD) {
1728 		if (!IsAWordChar(sc.ch)) {
1729 			char s[100];
1730 			sc.GetCurrentLowered(s, sizeof(s));
1731 			if (keywordsVBS.InList(s)) {
1732 				if (strcmp(s, "rem") == 0) {
1733 					sc.ChangeState(SCE_HBA_COMMENTLINE);
1734 					if (sc.atLineEnd) {
1735 						sc.SetState(SCE_HBA_DEFAULT);
1736 					}
1737 				} else {
1738 					sc.SetState(SCE_HBA_DEFAULT);
1739 				}
1740 			} else {
1741 				sc.ChangeState(SCE_HBA_IDENTIFIER);
1742 				sc.SetState(SCE_HBA_DEFAULT);
1743 			}
1744 		}
1745 	} else if (sc.state == SCE_HBA_NUMBER) {
1746 		if (!IsAWordChar(sc.ch)) {
1747 			sc.SetState(SCE_HBA_DEFAULT);
1748 		}
1749 	} else if (sc.state == SCE_HBA_STRING) {
1750 		if (sc.ch == '\"') {
1751 			sc.ForwardSetState(SCE_HBA_DEFAULT);
1752 		} else if (sc.ch == '\r' || sc.ch == '\n') {
1753 			sc.ChangeState(SCE_HBA_STRINGEOL);
1754 			sc.ForwardSetState(SCE_HBA_DEFAULT);
1755 		}
1756 	} else if (sc.state == SCE_HBA_COMMENTLINE) {
1757 		if (sc.ch == '\r' || sc.ch == '\n') {
1758 			sc.SetState(SCE_HBA_DEFAULT);
1759 		}
1760 	}
1761 
1762 	if (sc.state == SCE_HBA_DEFAULT) {
1763 		if (IsADigit(sc.ch) || (sc.ch == '.' && IsADigit(sc.chNext))) {
1764 			sc.SetState(SCE_HBA_NUMBER);
1765 		} else if (IsAWordStart(sc.ch)) {
1766 			sc.SetState(SCE_HBA_WORD);
1767 		} else if (sc.ch == '\'') {
1768 			sc.SetState(SCE_HBA_COMMENTLINE);
1769 		} else if (sc.ch == '\"') {
1770 			sc.SetState(SCE_HBA_STRING);
1771 		}
1772 	}
1773 }
1774 
ColouriseHTMLPiece(StyleContext & sc,WordList * keywordlists[])1775 static void ColouriseHTMLPiece(StyleContext &sc, WordList *keywordlists[]) {
1776 	WordList &keywordsTags = *keywordlists[0];
1777 	if (sc.state == SCE_H_COMMENT) {
1778 		if (sc.Match("-->")) {
1779 			sc.Forward();
1780 			sc.Forward();
1781 			sc.ForwardSetState(SCE_H_DEFAULT);
1782 		}
1783 	} else if (sc.state == SCE_H_ENTITY) {
1784 		if (sc.ch == ';') {
1785 			sc.ForwardSetState(SCE_H_DEFAULT);
1786 		} else if (sc.ch != '#' && (sc.ch < 0x80) && !isalnum(sc.ch)	// Should check that '#' follows '&', but it is unlikely anyway...
1787 			&& sc.ch != '.' && sc.ch != '-' && sc.ch != '_' && sc.ch != ':') { // valid in XML
1788 			sc.ChangeState(SCE_H_TAGUNKNOWN);
1789 			sc.SetState(SCE_H_DEFAULT);
1790 		}
1791 	} else if (sc.state == SCE_H_TAGUNKNOWN) {
1792 		if (!ishtmlwordchar(static_cast<char>(sc.ch)) && !((sc.ch == '/') && (sc.chPrev == '<')) && sc.ch != '[') {
1793 			char s[100];
1794 			sc.GetCurrentLowered(s, sizeof(s));
1795 			if (s[1] == '/') {
1796 				if (keywordsTags.InList(s + 2)) {
1797 					sc.ChangeState(SCE_H_TAG);
1798 				}
1799 			} else {
1800 				if (keywordsTags.InList(s + 1)) {
1801 					sc.ChangeState(SCE_H_TAG);
1802 				}
1803 			}
1804 			if (sc.ch == '>') {
1805 				sc.ForwardSetState(SCE_H_DEFAULT);
1806 			} else if (sc.Match('/', '>')) {
1807 				sc.SetState(SCE_H_TAGEND);
1808 				sc.Forward();
1809 				sc.ForwardSetState(SCE_H_DEFAULT);
1810 			} else {
1811 				sc.SetState(SCE_H_OTHER);
1812 			}
1813 		}
1814 	} else if (sc.state == SCE_H_ATTRIBUTE) {
1815 		if (!ishtmlwordchar(static_cast<char>(sc.ch))) {
1816 			char s[100];
1817 			sc.GetCurrentLowered(s, sizeof(s));
1818 			if (!keywordsTags.InList(s)) {
1819 				sc.ChangeState(SCE_H_ATTRIBUTEUNKNOWN);
1820 			}
1821 			sc.SetState(SCE_H_OTHER);
1822 		}
1823 	} else if (sc.state == SCE_H_OTHER) {
1824 		if (sc.ch == '>') {
1825 			sc.SetState(SCE_H_TAG);
1826 			sc.ForwardSetState(SCE_H_DEFAULT);
1827 		} else if (sc.Match('/', '>')) {
1828 			sc.SetState(SCE_H_TAG);
1829 			sc.Forward();
1830 			sc.ForwardSetState(SCE_H_DEFAULT);
1831 		} else if (sc.chPrev == '=') {
1832 			sc.SetState(SCE_H_VALUE);
1833 		}
1834 	} else if (sc.state == SCE_H_DOUBLESTRING) {
1835 		if (sc.ch == '\"') {
1836 			sc.ForwardSetState(SCE_H_OTHER);
1837 		}
1838 	} else if (sc.state == SCE_H_SINGLESTRING) {
1839 		if (sc.ch == '\'') {
1840 			sc.ForwardSetState(SCE_H_OTHER);
1841 		}
1842 	} else if (sc.state == SCE_H_NUMBER) {
1843 		if (!IsADigit(sc.ch)) {
1844 			sc.SetState(SCE_H_OTHER);
1845 		}
1846 	}
1847 
1848 	if (sc.state == SCE_H_DEFAULT) {
1849 		if (sc.ch == '<') {
1850 			if (sc.Match("<!--"))
1851 				sc.SetState(SCE_H_COMMENT);
1852 			else
1853 				sc.SetState(SCE_H_TAGUNKNOWN);
1854 		} else if (sc.ch == '&') {
1855 			sc.SetState(SCE_H_ENTITY);
1856 		}
1857 	} else if ((sc.state == SCE_H_OTHER) || (sc.state == SCE_H_VALUE)) {
1858 		if (sc.ch == '\"' && sc.chPrev == '=') {
1859 			sc.SetState(SCE_H_DOUBLESTRING);
1860 		} else if (sc.ch == '\'' && sc.chPrev == '=') {
1861 			sc.SetState(SCE_H_SINGLESTRING);
1862 		} else if (IsADigit(sc.ch)) {
1863 			sc.SetState(SCE_H_NUMBER);
1864 		} else if (sc.ch == '>') {
1865 			sc.SetState(SCE_H_TAG);
1866 			sc.ForwardSetState(SCE_H_DEFAULT);
1867 		} else if (ishtmlwordchar(static_cast<char>(sc.ch))) {
1868 			sc.SetState(SCE_H_ATTRIBUTE);
1869 		}
1870 	}
1871 }
1872 
ColouriseASPPiece(StyleContext & sc,WordList * keywordlists[])1873 static void ColouriseASPPiece(StyleContext &sc, WordList *keywordlists[]) {
1874 	// Possibly exit current state to either SCE_H_DEFAULT or SCE_HBA_DEFAULT
1875 	if ((sc.state == SCE_H_ASPAT || isASPScript(sc.state)) && sc.Match('%', '>')) {
1876 		sc.SetState(SCE_H_ASP);
1877 		sc.Forward();
1878 		sc.ForwardSetState(SCE_H_DEFAULT);
1879 	}
1880 
1881 	// Handle some ASP script
1882 	if (sc.state >= SCE_HBA_START && sc.state <= SCE_HBA_STRINGEOL) {
1883 		ColouriseHBAPiece(sc, keywordlists);
1884 	} else if (sc.state >= SCE_H_DEFAULT && sc.state <= SCE_H_SGML_BLOCK_DEFAULT) {
1885 		ColouriseHTMLPiece(sc, keywordlists);
1886 	}
1887 
1888 	// Enter new sc.state
1889 	if ((sc.state == SCE_H_DEFAULT) || (sc.state == SCE_H_TAGUNKNOWN)) {
1890 		if (sc.Match('<', '%')) {
1891 			if (sc.state == SCE_H_TAGUNKNOWN)
1892 				sc.ChangeState(SCE_H_ASP);
1893 			else
1894 				sc.SetState(SCE_H_ASP);
1895 			sc.Forward();
1896 			sc.Forward();
1897 			if (sc.ch == '@') {
1898 				sc.ForwardSetState(SCE_H_ASPAT);
1899 			} else {
1900 				if (sc.ch == '=') {
1901 					sc.Forward();
1902 				}
1903 				sc.SetState(SCE_HBA_DEFAULT);
1904 			}
1905 		}
1906 	}
1907 }
1908 
ColouriseASPDoc(unsigned int startPos,int length,int initStyle,WordList * keywordlists[],Accessor & styler)1909 static void ColouriseASPDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
1910                                   Accessor &styler) {
1911 	// Lexer for HTML requires more lexical states (7 bits worth) than most lexers
1912 	StyleContext sc(startPos, length, initStyle, styler, 0x7f);
1913 	for (; sc.More(); sc.Forward()) {
1914 		ColouriseASPPiece(sc, keywordlists);
1915 	}
1916 	sc.Complete();
1917 }
1918 
ColourisePHPPiece(StyleContext & sc,WordList * keywordlists[])1919 static void ColourisePHPPiece(StyleContext &sc, WordList *keywordlists[]) {
1920 	// Possibly exit current state to either SCE_H_DEFAULT or SCE_HBA_DEFAULT
1921 	if (sc.state >= SCE_HPHP_DEFAULT && sc.state <= SCE_HPHP_OPERATOR) {
1922 		if (!isPHPStringState(sc.state) &&
1923 			(sc.state != SCE_HPHP_COMMENT) &&
1924 			(sc.Match('?', '>'))) {
1925 			sc.SetState(SCE_H_QUESTION);
1926 			sc.Forward();
1927 			sc.ForwardSetState(SCE_H_DEFAULT);
1928 		}
1929 	}
1930 
1931 	if (sc.state >= SCE_H_DEFAULT && sc.state <= SCE_H_SGML_BLOCK_DEFAULT) {
1932 		ColouriseHTMLPiece(sc, keywordlists);
1933 	}
1934 
1935 	// Handle some PHP script
1936 	if (sc.state == SCE_HPHP_WORD) {
1937 		if (!IsPhpWordChar(static_cast<char>(sc.ch))) {
1938 			sc.SetState(SCE_HPHP_DEFAULT);
1939 		}
1940 	} else if (sc.state == SCE_HPHP_COMMENTLINE) {
1941 		if (sc.ch == '\r' || sc.ch == '\n') {
1942 			sc.SetState(SCE_HPHP_DEFAULT);
1943 		}
1944 	} else if (sc.state == SCE_HPHP_COMMENT) {
1945 		if (sc.Match('*', '/')) {
1946 			sc.Forward();
1947 			sc.Forward();
1948 			sc.SetState(SCE_HPHP_DEFAULT);
1949 		}
1950 	} else if (sc.state == SCE_HPHP_HSTRING) {
1951 		if (sc.ch == '\"') {
1952 			sc.ForwardSetState(SCE_HPHP_DEFAULT);
1953 		}
1954 	} else if (sc.state == SCE_HPHP_SIMPLESTRING) {
1955 		if (sc.ch == '\'') {
1956 			sc.ForwardSetState(SCE_HPHP_DEFAULT);
1957 		}
1958 	} else if (sc.state == SCE_HPHP_VARIABLE) {
1959 		if (!IsPhpWordChar(static_cast<char>(sc.ch))) {
1960 			sc.SetState(SCE_HPHP_DEFAULT);
1961 		}
1962 	} else if (sc.state == SCE_HPHP_OPERATOR) {
1963 		sc.SetState(SCE_HPHP_DEFAULT);
1964 	}
1965 
1966 	// Enter new sc.state
1967 	if ((sc.state == SCE_H_DEFAULT) || (sc.state == SCE_H_TAGUNKNOWN)) {
1968 		if (sc.Match("<?php")) {
1969 			sc.SetState(SCE_H_QUESTION);
1970 			sc.Forward();
1971 			sc.Forward();
1972 			sc.Forward();
1973 			sc.Forward();
1974 			sc.Forward();
1975 			sc.SetState(SCE_HPHP_DEFAULT);
1976 		}
1977 	}
1978 	if (sc.state == SCE_HPHP_DEFAULT) {
1979 		if (IsPhpWordStart(static_cast<char>(sc.ch))) {
1980 			sc.SetState(SCE_HPHP_WORD);
1981 		} else if (sc.ch == '#') {
1982 			sc.SetState(SCE_HPHP_COMMENTLINE);
1983 		} else if (sc.Match("<!--")) {
1984 			sc.SetState(SCE_HPHP_COMMENTLINE);
1985 		} else if (sc.Match('/', '/')) {
1986 			sc.SetState(SCE_HPHP_COMMENTLINE);
1987 		} else if (sc.Match('/', '*')) {
1988 			sc.SetState(SCE_HPHP_COMMENT);
1989 		} else if (sc.ch == '\"') {
1990 			sc.SetState(SCE_HPHP_HSTRING);
1991 		} else if (sc.ch == '\'') {
1992 			sc.SetState(SCE_HPHP_SIMPLESTRING);
1993 		} else if (sc.ch == '$' && IsPhpWordStart(static_cast<char>(sc.chNext))) {
1994 			sc.SetState(SCE_HPHP_VARIABLE);
1995 		} else if (isoperator(static_cast<char>(sc.ch))) {
1996 			sc.SetState(SCE_HPHP_OPERATOR);
1997 		}
1998 	}
1999 }
2000 
ColourisePHPDoc(unsigned int startPos,int length,int initStyle,WordList * keywordlists[],Accessor & styler)2001 static void ColourisePHPDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
2002                                   Accessor &styler) {
2003 	// Lexer for HTML requires more lexical states (7 bits worth) than most lexers
2004 	StyleContext sc(startPos, length, initStyle, styler, 0x7f);
2005 	for (; sc.More(); sc.Forward()) {
2006 		ColourisePHPPiece(sc, keywordlists);
2007 	}
2008 	sc.Complete();
2009 }
2010 
ColourisePHPScriptDoc(unsigned int startPos,int length,int initStyle,WordList * keywordlists[],Accessor & styler)2011 static void ColourisePHPScriptDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
2012                                                Accessor &styler) {
2013 	if(startPos == 0) initStyle = SCE_HPHP_DEFAULT;
2014 		ColouriseHyperTextDoc(startPos,length,initStyle,keywordlists,styler);
2015 }
2016 
2017 static const char * const htmlWordListDesc[] = {
2018 	"HTML elements and attributes",
2019 	"JavaScript keywords",
2020 	"VBScript keywords",
2021 	"Python keywords",
2022 	"PHP keywords",
2023 	"SGML and DTD keywords",
2024 	0,
2025 };
2026 
2027 static const char * const phpscriptWordListDesc[] = {
2028 	"", //Unused
2029 	"", //Unused
2030 	"", //Unused
2031 	"", //Unused
2032 	"PHP keywords",
2033 	"", //Unused
2034 	0,
2035 };
2036 
2037 LexerModule lmHTML(SCLEX_HTML, ColouriseHyperTextDoc, "hypertext", 0, htmlWordListDesc, 7);
2038 LexerModule lmXML(SCLEX_XML, ColouriseHyperTextDoc, "xml", 0, htmlWordListDesc, 7);
2039 // SCLEX_ASP and SCLEX_PHP should not be used in new code: use SCLEX_HTML instead.
2040 LexerModule lmASP(SCLEX_ASP, ColouriseASPDoc, "asp", 0, htmlWordListDesc, 7);
2041 LexerModule lmPHP(SCLEX_PHP, ColourisePHPDoc, "php", 0, htmlWordListDesc, 7);
2042 LexerModule lmPHPSCRIPT(SCLEX_PHPSCRIPT, ColourisePHPScriptDoc, "phpscript", 0, phpscriptWordListDesc, 7);
2043