1 // Scintilla source code edit control
2 /** @file LexHTML.cxx
3  ** Lexer for HTML.
4  **/
5 // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
7 
8 #include <stdlib.h>
9 #include <string.h>
10 #include <stdio.h>
11 #include <stdarg.h>
12 #include <assert.h>
13 #include <ctype.h>
14 #include <string>
15 #include <map>
16 #include <set>
17 
18 #include "ILexer.h"
19 #include "Scintilla.h"
20 #include "SciLexer.h"
21 #include "StringCopy.h"
22 #include "WordList.h"
23 #include "LexAccessor.h"
24 #include "Accessor.h"
25 #include "StyleContext.h"
26 #include "CharacterSet.h"
27 #include "LexerModule.h"
28 #include "OptionSet.h"
29 #include "DefaultLexer.h"
30 
31 using namespace Scintilla;
32 
33 namespace {
34 
35 #define SCE_HA_JS (SCE_HJA_START - SCE_HJ_START)
36 #define SCE_HA_VBS (SCE_HBA_START - SCE_HB_START)
37 #define SCE_HA_PYTHON (SCE_HPA_START - SCE_HP_START)
38 
39 enum script_type { eScriptNone = 0, eScriptJS, eScriptVBS, eScriptPython, eScriptPHP, eScriptXML, eScriptSGML, eScriptSGMLblock, eScriptComment };
40 enum script_mode { eHtml = 0, eNonHtmlScript, eNonHtmlPreProc, eNonHtmlScriptPreProc };
41 
IsAWordChar(const int ch)42 inline bool IsAWordChar(const int ch) {
43 	return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
44 }
45 
IsAWordStart(const int ch)46 inline bool IsAWordStart(const int ch) {
47 	return (ch < 0x80) && (isalnum(ch) || ch == '_');
48 }
49 
IsOperator(int ch)50 inline bool IsOperator(int ch) {
51 	if (IsASCII(ch) && isalnum(ch))
52 		return false;
53 	// '.' left out as it is used to make up numbers
54 	if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
55 	        ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
56 	        ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
57 	        ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
58 	        ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
59 	        ch == '?' || ch == '!' || ch == '.' || ch == '~')
60 		return true;
61 	return false;
62 }
63 
GetTextSegment(Accessor & styler,Sci_PositionU start,Sci_PositionU end,char * s,size_t len)64 void GetTextSegment(Accessor &styler, Sci_PositionU start, Sci_PositionU end, char *s, size_t len) {
65 	Sci_PositionU i = 0;
66 	for (; (i < end - start + 1) && (i < len-1); i++) {
67 		s[i] = MakeLowerCase(styler[start + i]);
68 	}
69 	s[i] = '\0';
70 }
71 
GetStringSegment(Accessor & styler,Sci_PositionU start,Sci_PositionU end)72 std::string GetStringSegment(Accessor &styler, Sci_PositionU start, Sci_PositionU end) {
73 	std::string s;
74 	Sci_PositionU i = 0;
75 	for (; (i < end - start + 1); i++) {
76 		s.push_back(MakeLowerCase(styler[start + i]));
77 	}
78 	return s;
79 }
80 
GetNextWord(Accessor & styler,Sci_PositionU start)81 std::string GetNextWord(Accessor &styler, Sci_PositionU start) {
82 	std::string ret;
83 	Sci_PositionU i = 0;
84 	for (; i < 200; i++) {	// Put an upper limit to bound time taken for unexpected text.
85 		const char ch = styler.SafeGetCharAt(start + i);
86 		if ((i == 0) && !IsAWordStart(ch))
87 			break;
88 		if ((i > 0) && !IsAWordChar(ch))
89 			break;
90 		ret.push_back(ch);
91 	}
92 	return ret;
93 }
94 
segIsScriptingIndicator(Accessor & styler,Sci_PositionU start,Sci_PositionU end,script_type prevValue)95 script_type segIsScriptingIndicator(Accessor &styler, Sci_PositionU start, Sci_PositionU end, script_type prevValue) {
96 	char s[100];
97 	GetTextSegment(styler, start, end, s, sizeof(s));
98 	//Platform::DebugPrintf("Scripting indicator [%s]\n", s);
99 	if (strstr(s, "src"))	// External script
100 		return eScriptNone;
101 	if (strstr(s, "vbs"))
102 		return eScriptVBS;
103 	if (strstr(s, "pyth"))
104 		return eScriptPython;
105 	if (strstr(s, "javas"))
106 		return eScriptJS;
107 	if (strstr(s, "jscr"))
108 		return eScriptJS;
109 	if (strstr(s, "php"))
110 		return eScriptPHP;
111 	if (strstr(s, "xml")) {
112 		const char *xml = strstr(s, "xml");
113 		for (const char *t=s; t<xml; t++) {
114 			if (!IsASpace(*t)) {
115 				return prevValue;
116 			}
117 		}
118 		return eScriptXML;
119 	}
120 
121 	return prevValue;
122 }
123 
PrintScriptingIndicatorOffset(Accessor & styler,Sci_PositionU start,Sci_PositionU end)124 int PrintScriptingIndicatorOffset(Accessor &styler, Sci_PositionU start, Sci_PositionU end) {
125 	int iResult = 0;
126 	std::string s = GetStringSegment(styler, start, end);
127 	if (0 == strncmp(s.c_str(), "php", 3)) {
128 		iResult = 3;
129 	}
130 	return iResult;
131 }
132 
ScriptOfState(int state)133 script_type ScriptOfState(int state) {
134 	if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
135 		return eScriptPython;
136 	} else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
137 		return eScriptVBS;
138 	} else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
139 		return eScriptJS;
140 	} else if ((state >= SCE_HPHP_DEFAULT) && (state <= SCE_HPHP_COMMENTLINE)) {
141 		return eScriptPHP;
142 	} else if ((state >= SCE_H_SGML_DEFAULT) && (state < SCE_H_SGML_BLOCK_DEFAULT)) {
143 		return eScriptSGML;
144 	} else if (state == SCE_H_SGML_BLOCK_DEFAULT) {
145 		return eScriptSGMLblock;
146 	} else {
147 		return eScriptNone;
148 	}
149 }
150 
statePrintForState(int state,script_mode inScriptType)151 int statePrintForState(int state, script_mode inScriptType) {
152 	int StateToPrint = state;
153 
154 	if (state >= SCE_HJ_START) {
155 		if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
156 			StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_PYTHON);
157 		} else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
158 			StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_VBS);
159 		} else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
160 			StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_JS);
161 		}
162 	}
163 
164 	return StateToPrint;
165 }
166 
stateForPrintState(int StateToPrint)167 int stateForPrintState(int StateToPrint) {
168 	int state;
169 
170 	if ((StateToPrint >= SCE_HPA_START) && (StateToPrint <= SCE_HPA_IDENTIFIER)) {
171 		state = StateToPrint - SCE_HA_PYTHON;
172 	} else if ((StateToPrint >= SCE_HBA_START) && (StateToPrint <= SCE_HBA_STRINGEOL)) {
173 		state = StateToPrint - SCE_HA_VBS;
174 	} else if ((StateToPrint >= SCE_HJA_START) && (StateToPrint <= SCE_HJA_REGEX)) {
175 		state = StateToPrint - SCE_HA_JS;
176 	} else {
177 		state = StateToPrint;
178 	}
179 
180 	return state;
181 }
182 
IsNumber(Sci_PositionU start,Accessor & styler)183 inline bool IsNumber(Sci_PositionU start, Accessor &styler) {
184 	return IsADigit(styler[start]) || (styler[start] == '.') ||
185 	       (styler[start] == '-') || (styler[start] == '#');
186 }
187 
isStringState(int state)188 inline bool isStringState(int state) {
189 	bool bResult;
190 
191 	switch (state) {
192 	case SCE_HJ_DOUBLESTRING:
193 	case SCE_HJ_SINGLESTRING:
194 	case SCE_HJA_DOUBLESTRING:
195 	case SCE_HJA_SINGLESTRING:
196 	case SCE_HB_STRING:
197 	case SCE_HBA_STRING:
198 	case SCE_HP_STRING:
199 	case SCE_HP_CHARACTER:
200 	case SCE_HP_TRIPLE:
201 	case SCE_HP_TRIPLEDOUBLE:
202 	case SCE_HPA_STRING:
203 	case SCE_HPA_CHARACTER:
204 	case SCE_HPA_TRIPLE:
205 	case SCE_HPA_TRIPLEDOUBLE:
206 	case SCE_HPHP_HSTRING:
207 	case SCE_HPHP_SIMPLESTRING:
208 	case SCE_HPHP_HSTRING_VARIABLE:
209 	case SCE_HPHP_COMPLEX_VARIABLE:
210 		bResult = true;
211 		break;
212 	default :
213 		bResult = false;
214 		break;
215 	}
216 	return bResult;
217 }
218 
stateAllowsTermination(int state)219 inline bool stateAllowsTermination(int state) {
220 	bool allowTermination = !isStringState(state);
221 	if (allowTermination) {
222 		switch (state) {
223 		case SCE_HB_COMMENTLINE:
224 		case SCE_HPHP_COMMENT:
225 		case SCE_HP_COMMENTLINE:
226 		case SCE_HPA_COMMENTLINE:
227 			allowTermination = false;
228 		}
229 	}
230 	return allowTermination;
231 }
232 
233 // not really well done, since it's only comments that should lex the %> and <%
isCommentASPState(int state)234 inline bool isCommentASPState(int state) {
235 	bool bResult;
236 
237 	switch (state) {
238 	case SCE_HJ_COMMENT:
239 	case SCE_HJ_COMMENTLINE:
240 	case SCE_HJ_COMMENTDOC:
241 	case SCE_HB_COMMENTLINE:
242 	case SCE_HP_COMMENTLINE:
243 	case SCE_HPHP_COMMENT:
244 	case SCE_HPHP_COMMENTLINE:
245 		bResult = true;
246 		break;
247 	default :
248 		bResult = false;
249 		break;
250 	}
251 	return bResult;
252 }
253 
classifyAttribHTML(Sci_PositionU start,Sci_PositionU end,const WordList & keywords,Accessor & styler)254 void classifyAttribHTML(Sci_PositionU start, Sci_PositionU end, const WordList &keywords, Accessor &styler) {
255 	const bool wordIsNumber = IsNumber(start, styler);
256 	char chAttr = SCE_H_ATTRIBUTEUNKNOWN;
257 	if (wordIsNumber) {
258 		chAttr = SCE_H_NUMBER;
259 	} else {
260 		std::string s = GetStringSegment(styler, start, end);
261 		if (keywords.InList(s.c_str()))
262 			chAttr = SCE_H_ATTRIBUTE;
263 	}
264 	if ((chAttr == SCE_H_ATTRIBUTEUNKNOWN) && !keywords)
265 		// No keywords -> all are known
266 		chAttr = SCE_H_ATTRIBUTE;
267 	styler.ColourTo(end, chAttr);
268 }
269 
classifyTagHTML(Sci_PositionU start,Sci_PositionU end,const WordList & keywords,Accessor & styler,bool & tagDontFold,bool caseSensitive,bool isXml,bool allowScripts,const std::set<std::string> & nonFoldingTags)270 int classifyTagHTML(Sci_PositionU start, Sci_PositionU end,
271                            const WordList &keywords, Accessor &styler, bool &tagDontFold,
272                     bool caseSensitive, bool isXml, bool allowScripts,
273                     const std::set<std::string> &nonFoldingTags) {
274 	std::string tag;
275 	// Copy after the '<'
276 	for (Sci_PositionU cPos = start; cPos <= end; cPos++) {
277 		const char ch = styler[cPos];
278 		if ((ch != '<') && (ch != '/')) {
279 			tag.push_back(caseSensitive ? ch : MakeLowerCase(ch));
280 		}
281 	}
282 	// if the current language is XML, I can fold any tag
283 	// if the current language is HTML, I don't want to fold certain tags (input, meta, etc.)
284 	//...to find it in the list of no-container-tags
285 	tagDontFold = (!isXml) && (nonFoldingTags.count(tag) > 0);
286 	// No keywords -> all are known
287 	char chAttr = SCE_H_TAGUNKNOWN;
288 	if (!tag.empty() && (tag[0] == '!')) {
289 		chAttr = SCE_H_SGML_DEFAULT;
290 	} else if (!keywords || keywords.InList(tag.c_str())) {
291 		chAttr = SCE_H_TAG;
292 	}
293 	styler.ColourTo(end, chAttr);
294 	if (chAttr == SCE_H_TAG) {
295 		if (allowScripts && (tag == "script")) {
296 			// check to see if this is a self-closing tag by sniffing ahead
297 			bool isSelfClose = false;
298 			for (Sci_PositionU cPos = end; cPos <= end + 200; cPos++) {
299 				const char ch = styler.SafeGetCharAt(cPos, '\0');
300 				if (ch == '\0' || ch == '>')
301 					break;
302 				else if (ch == '/' && styler.SafeGetCharAt(cPos + 1, '\0') == '>') {
303 					isSelfClose = true;
304 					break;
305 				}
306 			}
307 
308 			// do not enter a script state if the tag self-closed
309 			if (!isSelfClose)
310 				chAttr = SCE_H_SCRIPT;
311 		} else if (!isXml && (tag == "comment")) {
312 			chAttr = SCE_H_COMMENT;
313 		}
314 	}
315 	return chAttr;
316 }
317 
classifyWordHTJS(Sci_PositionU start,Sci_PositionU end,const WordList & keywords,Accessor & styler,script_mode inScriptType)318 void classifyWordHTJS(Sci_PositionU start, Sci_PositionU end,
319                              const WordList &keywords, Accessor &styler, script_mode inScriptType) {
320 	char s[30 + 1];
321 	Sci_PositionU i = 0;
322 	for (; i < end - start + 1 && i < 30; i++) {
323 		s[i] = styler[start + i];
324 	}
325 	s[i] = '\0';
326 
327 	char chAttr = SCE_HJ_WORD;
328 	const bool wordIsNumber = IsADigit(s[0]) || ((s[0] == '.') && IsADigit(s[1]));
329 	if (wordIsNumber) {
330 		chAttr = SCE_HJ_NUMBER;
331 	} else if (keywords.InList(s)) {
332 		chAttr = SCE_HJ_KEYWORD;
333 	}
334 	styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
335 }
336 
classifyWordHTVB(Sci_PositionU start,Sci_PositionU end,const WordList & keywords,Accessor & styler,script_mode inScriptType)337 int classifyWordHTVB(Sci_PositionU start, Sci_PositionU end, const WordList &keywords, Accessor &styler, script_mode inScriptType) {
338 	char chAttr = SCE_HB_IDENTIFIER;
339 	const bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
340 	if (wordIsNumber) {
341 		chAttr = SCE_HB_NUMBER;
342 	} else {
343 		std::string s = GetStringSegment(styler, start, end);
344 		if (keywords.InList(s.c_str())) {
345 			chAttr = SCE_HB_WORD;
346 			if (s == "rem")
347 				chAttr = SCE_HB_COMMENTLINE;
348 		}
349 	}
350 	styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
351 	if (chAttr == SCE_HB_COMMENTLINE)
352 		return SCE_HB_COMMENTLINE;
353 	else
354 		return SCE_HB_DEFAULT;
355 }
356 
classifyWordHTPy(Sci_PositionU start,Sci_PositionU end,const WordList & keywords,Accessor & styler,std::string & prevWord,script_mode inScriptType,bool isMako)357 void classifyWordHTPy(Sci_PositionU start, Sci_PositionU end, const WordList &keywords, Accessor &styler, std::string &prevWord, script_mode inScriptType, bool isMako) {
358 	const bool wordIsNumber = IsADigit(styler[start]);
359 	std::string s;
360 	for (Sci_PositionU i = 0; i < end - start + 1 && i < 30; i++) {
361 		s.push_back(styler[start + i]);
362 	}
363 	char chAttr = SCE_HP_IDENTIFIER;
364 	if (prevWord == "class")
365 		chAttr = SCE_HP_CLASSNAME;
366 	else if (prevWord == "def")
367 		chAttr = SCE_HP_DEFNAME;
368 	else if (wordIsNumber)
369 		chAttr = SCE_HP_NUMBER;
370 	else if (keywords.InList(s.c_str()))
371 		chAttr = SCE_HP_WORD;
372 	else if (isMako && (s == "block"))
373 		chAttr = SCE_HP_WORD;
374 	styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
375 	prevWord = s;
376 }
377 
378 // Update the word colour to default or keyword
379 // Called when in a PHP word
classifyWordHTPHP(Sci_PositionU start,Sci_PositionU end,const WordList & keywords,Accessor & styler)380 void classifyWordHTPHP(Sci_PositionU start, Sci_PositionU end, const WordList &keywords, Accessor &styler) {
381 	char chAttr = SCE_HPHP_DEFAULT;
382 	const bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.' && start+1 <= end && IsADigit(styler[start+1]));
383 	if (wordIsNumber) {
384 		chAttr = SCE_HPHP_NUMBER;
385 	} else {
386 		std::string s = GetStringSegment(styler, start, end);
387 		if (keywords.InList(s.c_str()))
388 			chAttr = SCE_HPHP_WORD;
389 	}
390 	styler.ColourTo(end, chAttr);
391 }
392 
isWordHSGML(Sci_PositionU start,Sci_PositionU end,const WordList & keywords,Accessor & styler)393 bool isWordHSGML(Sci_PositionU start, Sci_PositionU end, const WordList &keywords, Accessor &styler) {
394 	std::string s;
395 	for (Sci_PositionU i = 0; i < end - start + 1 && i < 30; i++) {
396 		s.push_back(styler[start + i]);
397 	}
398 	return keywords.InList(s.c_str());
399 }
400 
isWordCdata(Sci_PositionU start,Sci_PositionU end,Accessor & styler)401 bool isWordCdata(Sci_PositionU start, Sci_PositionU end, Accessor &styler) {
402 	std::string s;
403 	for (Sci_PositionU i = 0; i < end - start + 1 && i < 30; i++) {
404 		s.push_back(styler[start + i]);
405 	}
406 	return s == "[CDATA[";
407 }
408 
409 // Return the first state to reach when entering a scripting language
StateForScript(script_type scriptLanguage)410 int StateForScript(script_type scriptLanguage) {
411 	int Result;
412 	switch (scriptLanguage) {
413 	case eScriptVBS:
414 		Result = SCE_HB_START;
415 		break;
416 	case eScriptPython:
417 		Result = SCE_HP_START;
418 		break;
419 	case eScriptPHP:
420 		Result = SCE_HPHP_DEFAULT;
421 		break;
422 	case eScriptXML:
423 		Result = SCE_H_TAGUNKNOWN;
424 		break;
425 	case eScriptSGML:
426 		Result = SCE_H_SGML_DEFAULT;
427 		break;
428 	case eScriptComment:
429 		Result = SCE_H_COMMENT;
430 		break;
431 	default :
432 		Result = SCE_HJ_START;
433 		break;
434 	}
435 	return Result;
436 }
437 
issgmlwordchar(int ch)438 inline bool issgmlwordchar(int ch) {
439 	return !IsASCII(ch) ||
440 		(isalnum(ch) || ch == '.' || ch == '_' || ch == ':' || ch == '!' || ch == '#' || ch == '[');
441 }
442 
IsPhpWordStart(int ch)443 inline bool IsPhpWordStart(int ch) {
444 	return (IsASCII(ch) && (isalpha(ch) || (ch == '_'))) || (ch >= 0x7f);
445 }
446 
IsPhpWordChar(int ch)447 inline bool IsPhpWordChar(int ch) {
448 	return IsADigit(ch) || IsPhpWordStart(ch);
449 }
450 
InTagState(int state)451 bool InTagState(int state) {
452 	return state == SCE_H_TAG || state == SCE_H_TAGUNKNOWN ||
453 	       state == SCE_H_SCRIPT ||
454 	       state == SCE_H_ATTRIBUTE || state == SCE_H_ATTRIBUTEUNKNOWN ||
455 	       state == SCE_H_NUMBER || state == SCE_H_OTHER ||
456 	       state == SCE_H_DOUBLESTRING || state == SCE_H_SINGLESTRING;
457 }
458 
IsCommentState(const int state)459 bool IsCommentState(const int state) {
460 	return state == SCE_H_COMMENT || state == SCE_H_SGML_COMMENT;
461 }
462 
IsScriptCommentState(const int state)463 bool IsScriptCommentState(const int state) {
464 	return state == SCE_HJ_COMMENT || state == SCE_HJ_COMMENTLINE || state == SCE_HJA_COMMENT ||
465 		   state == SCE_HJA_COMMENTLINE || state == SCE_HB_COMMENTLINE || state == SCE_HBA_COMMENTLINE;
466 }
467 
isLineEnd(int ch)468 bool isLineEnd(int ch) {
469 	return ch == '\r' || ch == '\n';
470 }
471 
isMakoBlockEnd(const int ch,const int chNext,const std::string & blockType)472 bool isMakoBlockEnd(const int ch, const int chNext, const std::string &blockType) {
473 	if (blockType.empty()) {
474 		return ((ch == '%') && (chNext == '>'));
475 	} else if ((blockType == "inherit") ||
476 			   (blockType == "namespace") ||
477 			   (blockType == "include") ||
478 			   (blockType == "page")) {
479 		return ((ch == '/') && (chNext == '>'));
480 	} else if (blockType == "%") {
481 		if (ch == '/' && isLineEnd(chNext))
482 			return true;
483 		else
484 			return isLineEnd(ch);
485 	} else if (blockType == "{") {
486 		return ch == '}';
487 	} else {
488 		return (ch == '>');
489 	}
490 }
491 
isDjangoBlockEnd(const int ch,const int chNext,const std::string & blockType)492 bool isDjangoBlockEnd(const int ch, const int chNext, const std::string &blockType) {
493 	if (blockType.empty()) {
494 		return false;
495 	} else if (blockType == "%") {
496 		return ((ch == '%') && (chNext == '}'));
497 	} else if (blockType == "{") {
498 		return ((ch == '}') && (chNext == '}'));
499 	} else {
500 		return false;
501 	}
502 }
503 
isPHPStringState(int state)504 bool isPHPStringState(int state) {
505 	return
506 	    (state == SCE_HPHP_HSTRING) ||
507 	    (state == SCE_HPHP_SIMPLESTRING) ||
508 	    (state == SCE_HPHP_HSTRING_VARIABLE) ||
509 	    (state == SCE_HPHP_COMPLEX_VARIABLE);
510 }
511 
FindPhpStringDelimiter(std::string & phpStringDelimiter,Sci_Position i,const Sci_Position lengthDoc,Accessor & styler,bool & isSimpleString)512 Sci_Position FindPhpStringDelimiter(std::string &phpStringDelimiter, Sci_Position i, const Sci_Position lengthDoc, Accessor &styler, bool &isSimpleString) {
513 	Sci_Position j;
514 	const Sci_Position beginning = i - 1;
515 	bool isValidSimpleString = false;
516 
517 	while (i < lengthDoc && (styler[i] == ' ' || styler[i] == '\t'))
518 		i++;
519 	char ch = styler.SafeGetCharAt(i);
520 	const char chNext = styler.SafeGetCharAt(i + 1);
521 	phpStringDelimiter.clear();
522 	if (!IsPhpWordStart(ch)) {
523 		if (ch == '\'' && IsPhpWordStart(chNext)) {
524 			i++;
525 			ch = chNext;
526 			isSimpleString = true;
527 		} else {
528 			return beginning;
529 		}
530 	}
531 	phpStringDelimiter.push_back(ch);
532 	i++;
533 	for (j = i; j < lengthDoc && !isLineEnd(styler[j]); j++) {
534 		if (!IsPhpWordChar(styler[j])) {
535 			if (isSimpleString && (styler[j] == '\'') && isLineEnd(styler.SafeGetCharAt(j + 1))) {
536 				isValidSimpleString = true;
537 				j++;
538 				break;
539 			} else {
540 				phpStringDelimiter.clear();
541 				return beginning;
542 			}
543 		}
544 		phpStringDelimiter.push_back(styler[j]);
545 	}
546 	if (isSimpleString && !isValidSimpleString) {
547 		phpStringDelimiter.clear();
548 		return beginning;
549 	}
550 	return j - 1;
551 }
552 
553 // Options used for LexerHTML
554 struct OptionsHTML {
555 	int aspDefaultLanguage = eScriptJS;
556 	bool caseSensitive = false;
557 	bool allowScripts = true;
558 	bool isMako = false;
559 	bool isDjango = false;
560 	bool fold = false;
561 	bool foldHTML = false;
562 	bool foldHTMLPreprocessor = true;
563 	bool foldCompact = true;
564 	bool foldComment = false;
565 	bool foldHeredoc = false;
OptionsHTML__anondaceee610111::OptionsHTML566 	OptionsHTML() noexcept {
567 	}
568 };
569 
570 const char * const htmlWordListDesc[] = {
571 	"HTML elements and attributes",
572 	"JavaScript keywords",
573 	"VBScript keywords",
574 	"Python keywords",
575 	"PHP keywords",
576 	"SGML and DTD keywords",
577 	0,
578 };
579 
580 const char * const phpscriptWordListDesc[] = {
581 	"", //Unused
582 	"", //Unused
583 	"", //Unused
584 	"", //Unused
585 	"PHP keywords",
586 	"", //Unused
587 	0,
588 };
589 
590 struct OptionSetHTML : public OptionSet<OptionsHTML> {
OptionSetHTML__anondaceee610111::OptionSetHTML591 	OptionSetHTML(bool isPHPScript_) {
592 
593 		DefineProperty("asp.default.language", &OptionsHTML::aspDefaultLanguage,
594 			"Script in ASP code is initially assumed to be in JavaScript. "
595 			"To change this to VBScript set asp.default.language to 2. Python is 3.");
596 
597 		DefineProperty("html.tags.case.sensitive", &OptionsHTML::caseSensitive,
598 			"For XML and HTML, setting this property to 1 will make tags match in a case "
599 			"sensitive way which is the expected behaviour for XML and XHTML.");
600 
601 		DefineProperty("lexer.xml.allow.scripts", &OptionsHTML::allowScripts,
602 			"Set to 0 to disable scripts in XML.");
603 
604 		DefineProperty("lexer.html.mako", &OptionsHTML::isMako,
605 			"Set to 1 to enable the mako template language.");
606 
607 		DefineProperty("lexer.html.django", &OptionsHTML::isDjango,
608 			"Set to 1 to enable the django template language.");
609 
610 		DefineProperty("fold", &OptionsHTML::fold);
611 
612 		DefineProperty("fold.html", &OptionsHTML::foldHTML,
613 			"Folding is turned on or off for HTML and XML files with this option. "
614 			"The fold option must also be on for folding to occur.");
615 
616 		DefineProperty("fold.html.preprocessor", &OptionsHTML::foldHTMLPreprocessor,
617 			"Folding is turned on or off for scripts embedded in HTML files with this option. "
618 			"The default is on.");
619 
620 		DefineProperty("fold.compact", &OptionsHTML::foldCompact);
621 
622 		DefineProperty("fold.hypertext.comment", &OptionsHTML::foldComment,
623 			"Allow folding for comments in scripts embedded in HTML. "
624 			"The default is off.");
625 
626 		DefineProperty("fold.hypertext.heredoc", &OptionsHTML::foldHeredoc,
627 			"Allow folding for heredocs in scripts embedded in HTML. "
628 			"The default is off.");
629 
630 		DefineWordListSets(isPHPScript_ ? phpscriptWordListDesc : htmlWordListDesc);
631 	}
632 };
633 
634 LexicalClass lexicalClassesHTML[] = {
635 	// Lexer HTML SCLEX_HTML SCE_H_ SCE_HJ_ SCE_HJA_ SCE_HB_ SCE_HBA_ SCE_HP_ SCE_HPHP_ SCE_HPA_:
636 	0, "SCE_H_DEFAULT", "default", "Text",
637 	1, "SCE_H_TAG", "tag", "Tags",
638 	2, "SCE_H_ERRORTAGUNKNOWN", "error tag", "Unknown Tags",
639 	3, "SCE_H_ATTRIBUTE", "attribute", "Attributes",
640 	4, "SCE_H_ATTRIBUTEUNKNOWN", "error attribute", "Unknown Attributes",
641 	5, "SCE_H_NUMBER", "literal numeric", "Numbers",
642 	6, "SCE_H_DOUBLESTRING", "literal string", "Double quoted strings",
643 	7, "SCE_H_SINGLESTRING", "literal string", "Single quoted strings",
644 	8, "SCE_H_OTHER", "tag operator", "Other inside tag, including space and '='",
645 	9, "SCE_H_COMMENT", "comment", "Comment",
646 	10, "SCE_H_ENTITY", "literal", "Entities",
647 	11, "SCE_H_TAGEND", "tag", "XML style tag ends '/>'",
648 	12, "SCE_H_XMLSTART", "identifier", "XML identifier start '<?'",
649 	13, "SCE_H_XMLEND", "identifier", "XML identifier end '?>'",
650 	14, "SCE_H_SCRIPT", "error", "Internal state which should never be visible",
651 	15, "SCE_H_ASP", "preprocessor", "ASP <% ... %>",
652 	16, "SCE_H_ASPAT", "preprocessor", "ASP <% ... %>",
653 	17, "SCE_H_CDATA", "literal", "CDATA",
654 	18, "SCE_H_QUESTION", "preprocessor", "PHP",
655 	19, "SCE_H_VALUE", "literal string", "Unquoted values",
656 	20, "SCE_H_XCCOMMENT", "comment", "JSP Comment <%-- ... --%>",
657 	21, "SCE_H_SGML_DEFAULT", "default", "SGML tags <! ... >",
658 	22, "SCE_H_SGML_COMMAND", "preprocessor", "SGML command",
659 	23, "SCE_H_SGML_1ST_PARAM", "preprocessor", "SGML 1st param",
660 	24, "SCE_H_SGML_DOUBLESTRING", "literal string", "SGML double string",
661 	25, "SCE_H_SGML_SIMPLESTRING", "literal string", "SGML single string",
662 	26, "SCE_H_SGML_ERROR", "error", "SGML error",
663 	27, "SCE_H_SGML_SPECIAL", "literal", "SGML special (#XXXX type)",
664 	28, "SCE_H_SGML_ENTITY", "literal", "SGML entity",
665 	29, "SCE_H_SGML_COMMENT", "comment", "SGML comment",
666 	30, "SCE_H_SGML_1ST_PARAM_COMMENT", "error comment", "SGML first parameter - lexer internal. It is an error if any text is in this style.",
667 	31, "SCE_H_SGML_BLOCK_DEFAULT", "default", "SGML block",
668 	32, "", "predefined", "",
669 	33, "", "predefined", "",
670 	34, "", "predefined", "",
671 	35, "", "predefined", "",
672 	36, "", "predefined", "",
673 	37, "", "predefined", "",
674 	38, "", "predefined", "",
675 	39, "", "predefined", "",
676 	40, "SCE_HJ_START", "client javascript default", "JS Start - allows eol filled background to not start on same line as SCRIPT tag",
677 	41, "SCE_HJ_DEFAULT", "client javascript default", "JS Default",
678 	42, "SCE_HJ_COMMENT", "client javascript comment", "JS Comment",
679 	43, "SCE_HJ_COMMENTLINE", "client javascript comment line", "JS Line Comment",
680 	44, "SCE_HJ_COMMENTDOC", "client javascript comment documentation", "JS Doc comment",
681 	45, "SCE_HJ_NUMBER", "client javascript literal numeric", "JS Number",
682 	46, "SCE_HJ_WORD", "client javascript identifier", "JS Word",
683 	47, "SCE_HJ_KEYWORD", "client javascript keyword", "JS Keyword",
684 	48, "SCE_HJ_DOUBLESTRING", "client javascript literal string", "JS Double quoted string",
685 	49, "SCE_HJ_SINGLESTRING", "client javascript literal string", "JS Single quoted string",
686 	50, "SCE_HJ_SYMBOLS", "client javascript operator", "JS Symbols",
687 	51, "SCE_HJ_STRINGEOL", "client javascript error literal string", "JavaScript EOL",
688 	52, "SCE_HJ_REGEX", "client javascript literal regex", "JavaScript RegEx",
689 	53, "", "unused", "",
690 	54, "", "unused", "",
691 	55, "SCE_HJA_START", "server javascript default", "JS Start - allows eol filled background to not start on same line as SCRIPT tag",
692 	56, "SCE_HJA_DEFAULT", "server javascript default", "JS Default",
693 	57, "SCE_HJA_COMMENT", "server javascript comment", "JS Comment",
694 	58, "SCE_HJA_COMMENTLINE", "server javascript comment line", "JS Line Comment",
695 	59, "SCE_HJA_COMMENTDOC", "server javascript comment documentation", "JS Doc comment",
696 	60, "SCE_HJA_NUMBER", "server javascript literal numeric", "JS Number",
697 	61, "SCE_HJA_WORD", "server javascript identifier", "JS Word",
698 	62, "SCE_HJA_KEYWORD", "server javascript keyword", "JS Keyword",
699 	63, "SCE_HJA_DOUBLESTRING", "server javascript literal string", "JS Double quoted string",
700 	64, "SCE_HJA_SINGLESTRING", "server javascript literal string", "JS Single quoted string",
701 	65, "SCE_HJA_SYMBOLS", "server javascript operator", "JS Symbols",
702 	66, "SCE_HJA_STRINGEOL", "server javascript error literal string", "JavaScript EOL",
703 	67, "SCE_HJA_REGEX", "server javascript literal regex", "JavaScript RegEx",
704 	68, "", "unused", "",
705 	69, "", "unused", "",
706 	70, "SCE_HB_START", "client basic default", "Start",
707 	71, "SCE_HB_DEFAULT", "client basic default", "Default",
708 	72, "SCE_HB_COMMENTLINE", "client basic comment line", "Comment",
709 	73, "SCE_HB_NUMBER", "client basic literal numeric", "Number",
710 	74, "SCE_HB_WORD", "client basic keyword", "KeyWord",
711 	75, "SCE_HB_STRING", "client basic literal string", "String",
712 	76, "SCE_HB_IDENTIFIER", "client basic identifier", "Identifier",
713 	77, "SCE_HB_STRINGEOL", "client basic literal string", "Unterminated string",
714 	78, "", "unused", "",
715 	79, "", "unused", "",
716 	80, "SCE_HBA_START", "server basic default", "Start",
717 	81, "SCE_HBA_DEFAULT", "server basic default", "Default",
718 	82, "SCE_HBA_COMMENTLINE", "server basic comment line", "Comment",
719 	83, "SCE_HBA_NUMBER", "server basic literal numeric", "Number",
720 	84, "SCE_HBA_WORD", "server basic keyword", "KeyWord",
721 	85, "SCE_HBA_STRING", "server basic literal string", "String",
722 	86, "SCE_HBA_IDENTIFIER", "server basic identifier", "Identifier",
723 	87, "SCE_HBA_STRINGEOL", "server basic literal string", "Unterminated string",
724 	88, "", "unused", "",
725 	89, "", "unused", "",
726 	90, "SCE_HP_START", "client python default", "Embedded Python",
727 	91, "SCE_HP_DEFAULT", "client python default", "Embedded Python",
728 	92, "SCE_HP_COMMENTLINE", "client python comment line", "Comment",
729 	93, "SCE_HP_NUMBER", "client python literal numeric", "Number",
730 	94, "SCE_HP_STRING", "client python literal string", "String",
731 	95, "SCE_HP_CHARACTER", "client python literal string character", "Single quoted string",
732 	96, "SCE_HP_WORD", "client python keyword", "Keyword",
733 	97, "SCE_HP_TRIPLE", "client python literal string", "Triple quotes",
734 	98, "SCE_HP_TRIPLEDOUBLE", "client python literal string", "Triple double quotes",
735 	99, "SCE_HP_CLASSNAME", "client python identifier", "Class name definition",
736 	100, "SCE_HP_DEFNAME", "client python identifier", "Function or method name definition",
737 	101, "SCE_HP_OPERATOR", "client python operator", "Operators",
738 	102, "SCE_HP_IDENTIFIER", "client python identifier", "Identifiers",
739 	103, "", "unused", "",
740 	104, "SCE_HPHP_COMPLEX_VARIABLE", "server php identifier", "PHP complex variable",
741 	105, "SCE_HPA_START", "server python default", "ASP Python",
742 	106, "SCE_HPA_DEFAULT", "server python default", "ASP Python",
743 	107, "SCE_HPA_COMMENTLINE", "server python comment line", "Comment",
744 	108, "SCE_HPA_NUMBER", "server python literal numeric", "Number",
745 	109, "SCE_HPA_STRING", "server python literal string", "String",
746 	110, "SCE_HPA_CHARACTER", "server python literal string character", "Single quoted string",
747 	111, "SCE_HPA_WORD", "server python keyword", "Keyword",
748 	112, "SCE_HPA_TRIPLE", "server python literal string", "Triple quotes",
749 	113, "SCE_HPA_TRIPLEDOUBLE", "server python literal string", "Triple double quotes",
750 	114, "SCE_HPA_CLASSNAME", "server python identifier", "Class name definition",
751 	115, "SCE_HPA_DEFNAME", "server python identifier", "Function or method name definition",
752 	116, "SCE_HPA_OPERATOR", "server python operator", "Operators",
753 	117, "SCE_HPA_IDENTIFIER", "server python identifier", "Identifiers",
754 	118, "SCE_HPHP_DEFAULT", "server php default", "Default",
755 	119, "SCE_HPHP_HSTRING", "server php literal string", "Double quoted String",
756 	120, "SCE_HPHP_SIMPLESTRING", "server php literal string", "Single quoted string",
757 	121, "SCE_HPHP_WORD", "server php keyword", "Keyword",
758 	122, "SCE_HPHP_NUMBER", "server php literal numeric", "Number",
759 	123, "SCE_HPHP_VARIABLE", "server php identifier", "Variable",
760 	124, "SCE_HPHP_COMMENT", "server php comment", "Comment",
761 	125, "SCE_HPHP_COMMENTLINE", "server php comment line", "One line comment",
762 	126, "SCE_HPHP_HSTRING_VARIABLE", "server php literal string identifier", "PHP variable in double quoted string",
763 	127, "SCE_HPHP_OPERATOR", "server php operator", "PHP operator",
764 };
765 
766 LexicalClass lexicalClassesXML[] = {
767 	// Lexer.Secondary XML SCLEX_XML SCE_H_:
768 	0, "SCE_H_DEFAULT", "default", "Default",
769 	1, "SCE_H_TAG", "tag", "Tags",
770 	2, "SCE_H_TAGUNKNOWN", "error tag", "Unknown Tags",
771 	3, "SCE_H_ATTRIBUTE", "attribute", "Attributes",
772 	4, "SCE_H_ERRORATTRIBUTEUNKNOWN", "error attribute", "Unknown Attributes",
773 	5, "SCE_H_NUMBER", "literal numeric", "Numbers",
774 	6, "SCE_H_DOUBLESTRING", "literal string", "Double quoted strings",
775 	7, "SCE_H_SINGLESTRING", "literal string", "Single quoted strings",
776 	8, "SCE_H_OTHER", "tag operator", "Other inside tag, including space and '='",
777 	9, "SCE_H_COMMENT", "comment", "Comment",
778 	10, "SCE_H_ENTITY", "literal", "Entities",
779 	11, "SCE_H_TAGEND", "tag", "XML style tag ends '/>'",
780 	12, "SCE_H_XMLSTART", "identifier", "XML identifier start '<?'",
781 	13, "SCE_H_XMLEND", "identifier", "XML identifier end '?>'",
782 	14, "", "unused", "",
783 	15, "", "unused", "",
784 	16, "", "unused", "",
785 	17, "SCE_H_CDATA", "literal", "CDATA",
786 	18, "SCE_H_QUESTION", "preprocessor", "Question",
787 	19, "SCE_H_VALUE", "literal string", "Unquoted Value",
788 	20, "", "unused", "",
789 	21, "SCE_H_SGML_DEFAULT", "default", "SGML tags <! ... >",
790 	22, "SCE_H_SGML_COMMAND", "preprocessor", "SGML command",
791 	23, "SCE_H_SGML_1ST_PARAM", "preprocessor", "SGML 1st param",
792 	24, "SCE_H_SGML_DOUBLESTRING", "literal string", "SGML double string",
793 	25, "SCE_H_SGML_SIMPLESTRING", "literal string", "SGML single string",
794 	26, "SCE_H_SGML_ERROR", "error", "SGML error",
795 	27, "SCE_H_SGML_SPECIAL", "literal", "SGML special (#XXXX type)",
796 	28, "SCE_H_SGML_ENTITY", "literal", "SGML entity",
797 	29, "SCE_H_SGML_COMMENT", "comment", "SGML comment",
798 	30, "", "unused", "",
799 	31, "SCE_H_SGML_BLOCK_DEFAULT", "default", "SGML block",
800 };
801 
802 const char *tagsThatDoNotFold[] = {
803 	"area",
804 	"base",
805 	"basefont",
806 	"br",
807 	"col",
808 	"command",
809 	"embed",
810 	"frame",
811 	"hr",
812 	"img",
813 	"input",
814 	"isindex",
815 	"keygen",
816 	"link",
817 	"meta",
818 	"param",
819 	"source",
820 	"track",
821 	"wbr"
822 };
823 
824 }
825 class LexerHTML : public DefaultLexer {
826 	bool isXml;
827 	bool isPHPScript;
828 	WordList keywords;
829 	WordList keywords2;
830 	WordList keywords3;
831 	WordList keywords4;
832 	WordList keywords5;
833 	WordList keywords6; // SGML (DTD) keywords
834 	OptionsHTML options;
835 	OptionSetHTML osHTML;
836 	std::set<std::string> nonFoldingTags;
837 public:
LexerHTML(bool isXml_,bool isPHPScript_)838 	explicit LexerHTML(bool isXml_, bool isPHPScript_) :
839 		DefaultLexer(isXml_ ? lexicalClassesHTML : lexicalClassesXML,
840 			isXml_ ? ELEMENTS(lexicalClassesHTML) : ELEMENTS(lexicalClassesXML)),
841 		isXml(isXml_),
842 		isPHPScript(isPHPScript_),
843 		osHTML(isPHPScript_),
844 		nonFoldingTags(std::begin(tagsThatDoNotFold), std::end(tagsThatDoNotFold)) {
845 	}
~LexerHTML()846 	~LexerHTML() override {
847 	}
Release()848 	void SCI_METHOD Release() override {
849 		delete this;
850 	}
PropertyNames()851 	const char *SCI_METHOD PropertyNames() override {
852 		return osHTML.PropertyNames();
853 	}
PropertyType(const char * name)854 	int SCI_METHOD PropertyType(const char *name) override {
855 		return osHTML.PropertyType(name);
856 	}
DescribeProperty(const char * name)857 	const char *SCI_METHOD DescribeProperty(const char *name) override {
858 		return osHTML.DescribeProperty(name);
859 	}
860 	Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
DescribeWordListSets()861 	const char *SCI_METHOD DescribeWordListSets() override {
862 		return osHTML.DescribeWordListSets();
863 	}
864 	Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
865 	void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
866 	// No Fold as all folding performs in Lex.
867 
LexerFactoryHTML()868 	static ILexer *LexerFactoryHTML() {
869 		return new LexerHTML(false, false);
870 	}
LexerFactoryXML()871 	static ILexer *LexerFactoryXML() {
872 		return new LexerHTML(true, false);
873 	}
LexerFactoryPHPScript()874 	static ILexer *LexerFactoryPHPScript() {
875 		return new LexerHTML(false, true);
876 	}
877 };
878 
PropertySet(const char * key,const char * val)879 Sci_Position SCI_METHOD LexerHTML::PropertySet(const char *key, const char *val) {
880 	if (osHTML.PropertySet(&options, key, val)) {
881 		return 0;
882 	}
883 	return -1;
884 }
885 
WordListSet(int n,const char * wl)886 Sci_Position SCI_METHOD LexerHTML::WordListSet(int n, const char *wl) {
887 	WordList *wordListN = 0;
888 	switch (n) {
889 	case 0:
890 		wordListN = &keywords;
891 		break;
892 	case 1:
893 		wordListN = &keywords2;
894 		break;
895 	case 2:
896 		wordListN = &keywords3;
897 		break;
898 	case 3:
899 		wordListN = &keywords4;
900 		break;
901 	case 4:
902 		wordListN = &keywords5;
903 		break;
904 	case 5:
905 		wordListN = &keywords6;
906 		break;
907 	}
908 	Sci_Position firstModification = -1;
909 	if (wordListN) {
910 		WordList wlNew;
911 		wlNew.Set(wl);
912 		if (*wordListN != wlNew) {
913 			wordListN->Set(wl);
914 			firstModification = 0;
915 		}
916 	}
917 	return firstModification;
918 }
919 
Lex(Sci_PositionU startPos,Sci_Position length,int initStyle,IDocument * pAccess)920 void SCI_METHOD LexerHTML::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
921 	Accessor styler(pAccess, nullptr);
922 	if (isPHPScript && (startPos == 0)) {
923 		initStyle = SCE_HPHP_DEFAULT;
924 	}
925 	styler.StartAt(startPos);
926 	std::string prevWord;
927 	std::string phpStringDelimiter;
928 	int StateToPrint = initStyle;
929 	int state = stateForPrintState(StateToPrint);
930 	std::string makoBlockType;
931 	int makoComment = 0;
932 	std::string djangoBlockType;
933 	// If inside a tag, it may be a script tag, so reread from the start of line starting tag to ensure any language tags are seen
934 	if (InTagState(state)) {
935 		while ((startPos > 0) && (InTagState(styler.StyleAt(startPos - 1)))) {
936 			const Sci_Position backLineStart = styler.LineStart(styler.GetLine(startPos-1));
937 			length += startPos - backLineStart;
938 			startPos = backLineStart;
939 		}
940 		state = SCE_H_DEFAULT;
941 	}
942 	// String can be heredoc, must find a delimiter first. Reread from beginning of line containing the string, to get the correct lineState
943 	if (isPHPStringState(state)) {
944 		while (startPos > 0 && (isPHPStringState(state) || !isLineEnd(styler[startPos - 1]))) {
945 			startPos--;
946 			length++;
947 			state = styler.StyleAt(startPos);
948 		}
949 		if (startPos == 0)
950 			state = SCE_H_DEFAULT;
951 	}
952 	styler.StartAt(startPos);
953 
954 	/* Nothing handles getting out of these, so we need not start in any of them.
955 	 * As we're at line start and they can't span lines, we'll re-detect them anyway */
956 	switch (state) {
957 		case SCE_H_QUESTION:
958 		case SCE_H_XMLSTART:
959 		case SCE_H_XMLEND:
960 		case SCE_H_ASP:
961 			state = SCE_H_DEFAULT;
962 			break;
963 	}
964 
965 	Sci_Position lineCurrent = styler.GetLine(startPos);
966 	int lineState;
967 	if (lineCurrent > 0) {
968 		lineState = styler.GetLineState(lineCurrent-1);
969 	} else {
970 		// Default client and ASP scripting language is JavaScript
971 		lineState = eScriptJS << 8;
972 		lineState |= options.aspDefaultLanguage << 4;
973 	}
974 	script_mode inScriptType = static_cast<script_mode>((lineState >> 0) & 0x03); // 2 bits of scripting mode
975 
976 	bool tagOpened = (lineState >> 2) & 0x01; // 1 bit to know if we are in an opened tag
977 	bool tagClosing = (lineState >> 3) & 0x01; // 1 bit to know if we are in a closing tag
978 	bool tagDontFold = false; //some HTML tags should not be folded
979 	script_type aspScript = static_cast<script_type>((lineState >> 4) & 0x0F); // 4 bits of script name
980 	script_type clientScript = static_cast<script_type>((lineState >> 8) & 0x0F); // 4 bits of script name
981 	int beforePreProc = (lineState >> 12) & 0xFF; // 8 bits of state
982 
983 	script_type scriptLanguage = ScriptOfState(state);
984 	// If eNonHtmlScript coincides with SCE_H_COMMENT, assume eScriptComment
985 	if (inScriptType == eNonHtmlScript && state == SCE_H_COMMENT) {
986 		scriptLanguage = eScriptComment;
987 	}
988 	script_type beforeLanguage = ScriptOfState(beforePreProc);
989 	const bool foldHTML = options.foldHTML;
990 	const bool fold = foldHTML && options.fold;
991 	const bool foldHTMLPreprocessor = foldHTML && options.foldHTMLPreprocessor;
992 	const bool foldCompact = options.foldCompact;
993 	const bool foldComment = fold && options.foldComment;
994 	const bool foldHeredoc = fold && options.foldHeredoc;
995 	const bool caseSensitive = options.caseSensitive;
996 	const bool allowScripts = options.allowScripts;
997 	const bool isMako = options.isMako;
998 	const bool isDjango = options.isDjango;
999 	const CharacterSet setHTMLWord(CharacterSet::setAlphaNum, ".-_:!#", 0x80, true);
1000 	const CharacterSet setTagContinue(CharacterSet::setAlphaNum, ".-_:!#[", 0x80, true);
1001 	const CharacterSet setAttributeContinue(CharacterSet::setAlphaNum, ".-_:!#/", 0x80, true);
1002 	// TODO: also handle + and - (except if they're part of ++ or --) and return keywords
1003 	const CharacterSet setOKBeforeJSRE(CharacterSet::setNone, "([{=,:;!%^&*|?~");
1004 
1005 	int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
1006 	int levelCurrent = levelPrev;
1007 	int visibleChars = 0;
1008 	int lineStartVisibleChars = 0;
1009 
1010 	int chPrev = ' ';
1011 	int ch = ' ';
1012 	int chPrevNonWhite = ' ';
1013 	// look back to set chPrevNonWhite properly for better regex colouring
1014 	if (scriptLanguage == eScriptJS && startPos > 0) {
1015 		Sci_Position back = startPos;
1016 		int style = 0;
1017 		while (--back) {
1018 			style = styler.StyleAt(back);
1019 			if (style < SCE_HJ_DEFAULT || style > SCE_HJ_COMMENTDOC)
1020 				// includes SCE_HJ_COMMENT & SCE_HJ_COMMENTLINE
1021 				break;
1022 		}
1023 		if (style == SCE_HJ_SYMBOLS) {
1024 			chPrevNonWhite = static_cast<unsigned char>(styler.SafeGetCharAt(back));
1025 		}
1026 	}
1027 
1028 	styler.StartSegment(startPos);
1029 	const Sci_Position lengthDoc = startPos + length;
1030 	for (Sci_Position i = startPos; i < lengthDoc; i++) {
1031 		const int chPrev2 = chPrev;
1032 		chPrev = ch;
1033 		if (!IsASpace(ch) && state != SCE_HJ_COMMENT &&
1034 			state != SCE_HJ_COMMENTLINE && state != SCE_HJ_COMMENTDOC)
1035 			chPrevNonWhite = ch;
1036 		ch = static_cast<unsigned char>(styler[i]);
1037 		int chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1038 		const int chNext2 = static_cast<unsigned char>(styler.SafeGetCharAt(i + 2));
1039 
1040 		// Handle DBCS codepages
1041 		if (styler.IsLeadByte(static_cast<char>(ch))) {
1042 			chPrev = ' ';
1043 			i += 1;
1044 			continue;
1045 		}
1046 
1047 		if ((!IsASpace(ch) || !foldCompact) && fold)
1048 			visibleChars++;
1049 		if (!IsASpace(ch))
1050 			lineStartVisibleChars++;
1051 
1052 		// decide what is the current state to print (depending of the script tag)
1053 		StateToPrint = statePrintForState(state, inScriptType);
1054 
1055 		// handle script folding
1056 		if (fold) {
1057 			switch (scriptLanguage) {
1058 			case eScriptJS:
1059 			case eScriptPHP:
1060 				//not currently supported				case eScriptVBS:
1061 
1062 				if ((state != SCE_HPHP_COMMENT) && (state != SCE_HPHP_COMMENTLINE) && (state != SCE_HJ_COMMENT) && (state != SCE_HJ_COMMENTLINE) && (state != SCE_HJ_COMMENTDOC) && (!isStringState(state))) {
1063 				//Platform::DebugPrintf("state=%d, StateToPrint=%d, initStyle=%d\n", state, StateToPrint, initStyle);
1064 				//if ((state == SCE_HPHP_OPERATOR) || (state == SCE_HPHP_DEFAULT) || (state == SCE_HJ_SYMBOLS) || (state == SCE_HJ_START) || (state == SCE_HJ_DEFAULT)) {
1065 					if (ch == '#') {
1066 						Sci_Position j = i + 1;
1067 						while ((j < lengthDoc) && IsASpaceOrTab(styler.SafeGetCharAt(j))) {
1068 							j++;
1069 						}
1070 						if (styler.Match(j, "region") || styler.Match(j, "if")) {
1071 							levelCurrent++;
1072 						} else if (styler.Match(j, "end")) {
1073 							levelCurrent--;
1074 						}
1075 					} else if ((ch == '{') || (ch == '}') || (foldComment && (ch == '/') && (chNext == '*'))) {
1076 						levelCurrent += (((ch == '{') || (ch == '/')) ? 1 : -1);
1077 					}
1078 				} else if (((state == SCE_HPHP_COMMENT) || (state == SCE_HJ_COMMENT)) && foldComment && (ch == '*') && (chNext == '/')) {
1079 					levelCurrent--;
1080 				}
1081 				break;
1082 			case eScriptPython:
1083 				if (state != SCE_HP_COMMENTLINE && !isMako) {
1084 					if ((ch == ':') && ((chNext == '\n') || (chNext == '\r' && chNext2 == '\n'))) {
1085 						levelCurrent++;
1086 					} else if ((ch == '\n') && !((chNext == '\r') && (chNext2 == '\n')) && (chNext != '\n')) {
1087 						// check if the number of tabs is lower than the level
1088 						int Findlevel = (levelCurrent & ~SC_FOLDLEVELBASE) * 8;
1089 						for (Sci_Position j = 0; Findlevel > 0; j++) {
1090 							const char chTmp = styler.SafeGetCharAt(i + j + 1);
1091 							if (chTmp == '\t') {
1092 								Findlevel -= 8;
1093 							} else if (chTmp == ' ') {
1094 								Findlevel--;
1095 							} else {
1096 								break;
1097 							}
1098 						}
1099 
1100 						if (Findlevel > 0) {
1101 							levelCurrent -= Findlevel / 8;
1102 							if (Findlevel % 8)
1103 								levelCurrent--;
1104 						}
1105 					}
1106 				}
1107 				break;
1108 			default:
1109 				break;
1110 			}
1111 		}
1112 
1113 		if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
1114 			// Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
1115 			// Avoid triggering two times on Dos/Win
1116 			// New line -> record any line state onto /next/ line
1117 			if (fold) {
1118 				int lev = levelPrev;
1119 				if (visibleChars == 0)
1120 					lev |= SC_FOLDLEVELWHITEFLAG;
1121 				if ((levelCurrent > levelPrev) && (visibleChars > 0))
1122 					lev |= SC_FOLDLEVELHEADERFLAG;
1123 
1124 				styler.SetLevel(lineCurrent, lev);
1125 				visibleChars = 0;
1126 				levelPrev = levelCurrent;
1127 			}
1128 			styler.SetLineState(lineCurrent,
1129 			                    ((inScriptType & 0x03) << 0) |
1130 			                    ((tagOpened ? 1 : 0) << 2) |
1131 			                    ((tagClosing ? 1 : 0) << 3) |
1132 			                    ((aspScript & 0x0F) << 4) |
1133 			                    ((clientScript & 0x0F) << 8) |
1134 			                    ((beforePreProc & 0xFF) << 12));
1135 			lineCurrent++;
1136 			lineStartVisibleChars = 0;
1137 		}
1138 
1139 		// handle start of Mako comment line
1140 		if (isMako && ch == '#' && chNext == '#') {
1141 			makoComment = 1;
1142 			state = SCE_HP_COMMENTLINE;
1143 		}
1144 
1145 		// handle end of Mako comment line
1146 		else if (isMako && makoComment && (ch == '\r' || ch == '\n')) {
1147 			makoComment = 0;
1148 			styler.ColourTo(i - 1, StateToPrint);
1149 			if (scriptLanguage == eScriptPython) {
1150 				state = SCE_HP_DEFAULT;
1151 			} else {
1152 				state = SCE_H_DEFAULT;
1153 			}
1154 		}
1155 		// Allow falling through to mako handling code if newline is going to end a block
1156 		if (((ch == '\r' && chNext != '\n') || (ch == '\n')) &&
1157 			(!isMako || (makoBlockType != "%"))) {
1158 		}
1159 		// Ignore everything in mako comment until the line ends
1160 		else if (isMako && makoComment) {
1161 		}
1162 
1163 		// generic end of script processing
1164 		else if ((inScriptType == eNonHtmlScript) && (ch == '<') && (chNext == '/')) {
1165 			// Check if it's the end of the script tag (or any other HTML tag)
1166 			switch (state) {
1167 				// in these cases, you can embed HTML tags (to confirm !!!!!!!!!!!!!!!!!!!!!!)
1168 			case SCE_H_DOUBLESTRING:
1169 			case SCE_H_SINGLESTRING:
1170 			case SCE_HJ_COMMENT:
1171 			case SCE_HJ_COMMENTDOC:
1172 			//case SCE_HJ_COMMENTLINE: // removed as this is a common thing done to hide
1173 			// the end of script marker from some JS interpreters.
1174 			case SCE_HB_COMMENTLINE:
1175 			case SCE_HBA_COMMENTLINE:
1176 			case SCE_HJ_DOUBLESTRING:
1177 			case SCE_HJ_SINGLESTRING:
1178 			case SCE_HJ_REGEX:
1179 			case SCE_HB_STRING:
1180 			case SCE_HBA_STRING:
1181 			case SCE_HP_STRING:
1182 			case SCE_HP_TRIPLE:
1183 			case SCE_HP_TRIPLEDOUBLE:
1184 			case SCE_HPHP_HSTRING:
1185 			case SCE_HPHP_SIMPLESTRING:
1186 			case SCE_HPHP_COMMENT:
1187 			case SCE_HPHP_COMMENTLINE:
1188 				break;
1189 			default :
1190 				// check if the closing tag is a script tag
1191 				if (const char *tag =
1192 						state == SCE_HJ_COMMENTLINE || isXml ? "script" :
1193 						state == SCE_H_COMMENT ? "comment" : 0) {
1194 					Sci_Position j = i + 2;
1195 					int chr;
1196 					do {
1197 						chr = static_cast<int>(*tag++);
1198 					} while (chr != 0 && chr == MakeLowerCase(styler.SafeGetCharAt(j++)));
1199 					if (chr != 0) break;
1200 				}
1201 				// closing tag of the script (it's a closing HTML tag anyway)
1202 				styler.ColourTo(i - 1, StateToPrint);
1203 				state = SCE_H_TAGUNKNOWN;
1204 				inScriptType = eHtml;
1205 				scriptLanguage = eScriptNone;
1206 				clientScript = eScriptJS;
1207 				i += 2;
1208 				visibleChars += 2;
1209 				tagClosing = true;
1210 				continue;
1211 			}
1212 		}
1213 
1214 		/////////////////////////////////////
1215 		// handle the start of PHP pre-processor = Non-HTML
1216 		else if ((state != SCE_H_ASPAT) &&
1217 		         !isStringState(state) &&
1218 		         (state != SCE_HPHP_COMMENT) &&
1219 		         (state != SCE_HPHP_COMMENTLINE) &&
1220 		         (ch == '<') &&
1221 		         (chNext == '?') &&
1222 				 !IsScriptCommentState(state)) {
1223  			beforeLanguage = scriptLanguage;
1224 			scriptLanguage = segIsScriptingIndicator(styler, i + 2, i + 6, isXml ? eScriptXML : eScriptPHP);
1225 			if ((scriptLanguage != eScriptPHP) && (isStringState(state) || (state==SCE_H_COMMENT))) continue;
1226 			styler.ColourTo(i - 1, StateToPrint);
1227 			beforePreProc = state;
1228 			i++;
1229 			visibleChars++;
1230 			i += PrintScriptingIndicatorOffset(styler, styler.GetStartSegment() + 2, i + 6);
1231 			if (scriptLanguage == eScriptXML)
1232 				styler.ColourTo(i, SCE_H_XMLSTART);
1233 			else
1234 				styler.ColourTo(i, SCE_H_QUESTION);
1235 			state = StateForScript(scriptLanguage);
1236 			if (inScriptType == eNonHtmlScript)
1237 				inScriptType = eNonHtmlScriptPreProc;
1238 			else
1239 				inScriptType = eNonHtmlPreProc;
1240 			// Fold whole script, but not if the XML first tag (all XML-like tags in this case)
1241 			if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
1242 				levelCurrent++;
1243 			}
1244 			// should be better
1245 			ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1246 			continue;
1247 		}
1248 
1249 		// handle the start Mako template Python code
1250 		else if (isMako && scriptLanguage == eScriptNone && ((ch == '<' && chNext == '%') ||
1251 															 (lineStartVisibleChars == 1 && ch == '%') ||
1252 															 (lineStartVisibleChars == 1 && ch == '/' && chNext == '%') ||
1253 															 (ch == '$' && chNext == '{') ||
1254 															 (ch == '<' && chNext == '/' && chNext2 == '%'))) {
1255 			if (ch == '%' || ch == '/')
1256 				makoBlockType = "%";
1257 			else if (ch == '$')
1258 				makoBlockType = "{";
1259 			else if (chNext == '/')
1260 				makoBlockType = GetNextWord(styler, i+3);
1261 			else
1262 				makoBlockType = GetNextWord(styler, i+2);
1263 			styler.ColourTo(i - 1, StateToPrint);
1264 			beforePreProc = state;
1265 			if (inScriptType == eNonHtmlScript)
1266 				inScriptType = eNonHtmlScriptPreProc;
1267 			else
1268 				inScriptType = eNonHtmlPreProc;
1269 
1270 			if (chNext == '/') {
1271 				i += 2;
1272 				visibleChars += 2;
1273 			} else if (ch != '%') {
1274 				i++;
1275 				visibleChars++;
1276 			}
1277 			state = SCE_HP_START;
1278 			scriptLanguage = eScriptPython;
1279 			styler.ColourTo(i, SCE_H_ASP);
1280 			if (ch != '%' && ch != '$' && ch != '/') {
1281 				i += makoBlockType.length();
1282 				visibleChars += static_cast<int>(makoBlockType.length());
1283 				if (keywords4.InList(makoBlockType.c_str()))
1284 					styler.ColourTo(i, SCE_HP_WORD);
1285 				else
1286 					styler.ColourTo(i, SCE_H_TAGUNKNOWN);
1287 			}
1288 
1289 			ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1290 			continue;
1291 		}
1292 
1293 		// handle the start/end of Django comment
1294 		else if (isDjango && state != SCE_H_COMMENT && (ch == '{' && chNext == '#')) {
1295 			styler.ColourTo(i - 1, StateToPrint);
1296 			beforePreProc = state;
1297 			beforeLanguage = scriptLanguage;
1298 			if (inScriptType == eNonHtmlScript)
1299 				inScriptType = eNonHtmlScriptPreProc;
1300 			else
1301 				inScriptType = eNonHtmlPreProc;
1302 			i += 1;
1303 			visibleChars += 1;
1304 			scriptLanguage = eScriptComment;
1305 			state = SCE_H_COMMENT;
1306 			styler.ColourTo(i, SCE_H_ASP);
1307 			ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1308 			continue;
1309 		} else if (isDjango && state == SCE_H_COMMENT && (ch == '#' && chNext == '}')) {
1310 			styler.ColourTo(i - 1, StateToPrint);
1311 			i += 1;
1312 			visibleChars += 1;
1313 			styler.ColourTo(i, SCE_H_ASP);
1314 			state = beforePreProc;
1315 			if (inScriptType == eNonHtmlScriptPreProc)
1316 				inScriptType = eNonHtmlScript;
1317 			else
1318 				inScriptType = eHtml;
1319 			scriptLanguage = beforeLanguage;
1320 			continue;
1321 		}
1322 
1323 		// handle the start Django template code
1324 		else if (isDjango && scriptLanguage != eScriptPython && scriptLanguage != eScriptComment && (ch == '{' && (chNext == '%' ||  chNext == '{'))) {
1325 			if (chNext == '%')
1326 				djangoBlockType = "%";
1327 			else
1328 				djangoBlockType = "{";
1329 			styler.ColourTo(i - 1, StateToPrint);
1330 			beforePreProc = state;
1331 			if (inScriptType == eNonHtmlScript)
1332 				inScriptType = eNonHtmlScriptPreProc;
1333 			else
1334 				inScriptType = eNonHtmlPreProc;
1335 
1336 			i += 1;
1337 			visibleChars += 1;
1338 			state = SCE_HP_START;
1339 			beforeLanguage = scriptLanguage;
1340 			scriptLanguage = eScriptPython;
1341 			styler.ColourTo(i, SCE_H_ASP);
1342 
1343 			ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1344 			continue;
1345 		}
1346 
1347 		// handle the start of ASP pre-processor = Non-HTML
1348 		else if (!isMako && !isDjango && !isCommentASPState(state) && (ch == '<') && (chNext == '%') && !isPHPStringState(state)) {
1349 			styler.ColourTo(i - 1, StateToPrint);
1350 			beforePreProc = state;
1351 			if (inScriptType == eNonHtmlScript)
1352 				inScriptType = eNonHtmlScriptPreProc;
1353 			else
1354 				inScriptType = eNonHtmlPreProc;
1355 
1356 			if (chNext2 == '@') {
1357 				i += 2; // place as if it was the second next char treated
1358 				visibleChars += 2;
1359 				state = SCE_H_ASPAT;
1360 			} else if ((chNext2 == '-') && (styler.SafeGetCharAt(i + 3) == '-')) {
1361 				styler.ColourTo(i + 3, SCE_H_ASP);
1362 				state = SCE_H_XCCOMMENT;
1363 				scriptLanguage = eScriptVBS;
1364 				continue;
1365 			} else {
1366 				if (chNext2 == '=') {
1367 					i += 2; // place as if it was the second next char treated
1368 					visibleChars += 2;
1369 				} else {
1370 					i++; // place as if it was the next char treated
1371 					visibleChars++;
1372 				}
1373 
1374 				state = StateForScript(aspScript);
1375 			}
1376 			scriptLanguage = eScriptVBS;
1377 			styler.ColourTo(i, SCE_H_ASP);
1378 			// fold whole script
1379 			if (foldHTMLPreprocessor)
1380 				levelCurrent++;
1381 			// should be better
1382 			ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1383 			continue;
1384 		}
1385 
1386 		/////////////////////////////////////
1387 		// handle the start of SGML language (DTD)
1388 		else if (((scriptLanguage == eScriptNone) || (scriptLanguage == eScriptXML)) &&
1389 				 (chPrev == '<') &&
1390 				 (ch == '!') &&
1391 				 (StateToPrint != SCE_H_CDATA) &&
1392 				 (!IsCommentState(StateToPrint)) &&
1393 				 (!IsScriptCommentState(StateToPrint))) {
1394 			beforePreProc = state;
1395 			styler.ColourTo(i - 2, StateToPrint);
1396 			if ((chNext == '-') && (chNext2 == '-')) {
1397 				state = SCE_H_COMMENT; // wait for a pending command
1398 				styler.ColourTo(i + 2, SCE_H_COMMENT);
1399 				i += 2; // follow styling after the --
1400 			} else if (isWordCdata(i + 1, i + 7, styler)) {
1401 				state = SCE_H_CDATA;
1402 			} else {
1403 				styler.ColourTo(i, SCE_H_SGML_DEFAULT); // <! is default
1404 				scriptLanguage = eScriptSGML;
1405 				state = SCE_H_SGML_COMMAND; // wait for a pending command
1406 			}
1407 			// fold whole tag (-- when closing the tag)
1408 			if (foldHTMLPreprocessor || state == SCE_H_COMMENT || state == SCE_H_CDATA)
1409 				levelCurrent++;
1410 			continue;
1411 		}
1412 
1413 		// handle the end of Mako Python code
1414 		else if (isMako &&
1415 			     ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1416 				 (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
1417 				 isMakoBlockEnd(ch, chNext, makoBlockType)) {
1418 			if (state == SCE_H_ASPAT) {
1419 				aspScript = segIsScriptingIndicator(styler,
1420 				                                    styler.GetStartSegment(), i - 1, aspScript);
1421 			}
1422 			if (state == SCE_HP_WORD) {
1423 				classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
1424 			} else {
1425 				styler.ColourTo(i - 1, StateToPrint);
1426 			}
1427 			if ((makoBlockType != "%") && (makoBlockType != "{") && ch != '>') {
1428 				i++;
1429 				visibleChars++;
1430 		    }
1431 			else if ((makoBlockType == "%") && ch == '/') {
1432 				i++;
1433 				visibleChars++;
1434 			}
1435 			if ((makoBlockType != "%") || ch == '/') {
1436 				styler.ColourTo(i, SCE_H_ASP);
1437 			}
1438 			state = beforePreProc;
1439 			if (inScriptType == eNonHtmlScriptPreProc)
1440 				inScriptType = eNonHtmlScript;
1441 			else
1442 				inScriptType = eHtml;
1443 			scriptLanguage = eScriptNone;
1444 			continue;
1445 		}
1446 
1447 		// handle the end of Django template code
1448 		else if (isDjango &&
1449 			     ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1450 				 (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
1451 				 isDjangoBlockEnd(ch, chNext, djangoBlockType)) {
1452 			if (state == SCE_H_ASPAT) {
1453 				aspScript = segIsScriptingIndicator(styler,
1454 				                                    styler.GetStartSegment(), i - 1, aspScript);
1455 			}
1456 			if (state == SCE_HP_WORD) {
1457 				classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
1458 			} else {
1459 				styler.ColourTo(i - 1, StateToPrint);
1460 			}
1461 			i += 1;
1462 			visibleChars += 1;
1463 			styler.ColourTo(i, SCE_H_ASP);
1464 			state = beforePreProc;
1465 			if (inScriptType == eNonHtmlScriptPreProc)
1466 				inScriptType = eNonHtmlScript;
1467 			else
1468 				inScriptType = eHtml;
1469 			scriptLanguage = beforeLanguage;
1470 			continue;
1471 		}
1472 
1473 		// handle the end of a pre-processor = Non-HTML
1474 		else if ((!isMako && !isDjango && ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1475 				  (((scriptLanguage != eScriptNone) && stateAllowsTermination(state))) &&
1476 				  (((ch == '%') || (ch == '?')) && (chNext == '>'))) ||
1477 		         ((scriptLanguage == eScriptSGML) && (ch == '>') && (state != SCE_H_SGML_COMMENT))) {
1478 			if (state == SCE_H_ASPAT) {
1479 				aspScript = segIsScriptingIndicator(styler,
1480 				                                    styler.GetStartSegment(), i - 1, aspScript);
1481 			}
1482 			// Bounce out of any ASP mode
1483 			switch (state) {
1484 			case SCE_HJ_WORD:
1485 				classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
1486 				break;
1487 			case SCE_HB_WORD:
1488 				classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
1489 				break;
1490 			case SCE_HP_WORD:
1491 				classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
1492 				break;
1493 			case SCE_HPHP_WORD:
1494 				classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
1495 				break;
1496 			case SCE_H_XCCOMMENT:
1497 				styler.ColourTo(i - 1, state);
1498 				break;
1499 			default :
1500 				styler.ColourTo(i - 1, StateToPrint);
1501 				break;
1502 			}
1503 			if (scriptLanguage != eScriptSGML) {
1504 				i++;
1505 				visibleChars++;
1506 			}
1507 			if (ch == '%')
1508 				styler.ColourTo(i, SCE_H_ASP);
1509 			else if (scriptLanguage == eScriptXML)
1510 				styler.ColourTo(i, SCE_H_XMLEND);
1511 			else if (scriptLanguage == eScriptSGML)
1512 				styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1513 			else
1514 				styler.ColourTo(i, SCE_H_QUESTION);
1515 			state = beforePreProc;
1516 			if (inScriptType == eNonHtmlScriptPreProc)
1517 				inScriptType = eNonHtmlScript;
1518 			else
1519 				inScriptType = eHtml;
1520 			// Unfold all scripting languages, except for XML tag
1521 			if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
1522 				levelCurrent--;
1523 			}
1524 			scriptLanguage = beforeLanguage;
1525 			continue;
1526 		}
1527 		/////////////////////////////////////
1528 
1529 		switch (state) {
1530 		case SCE_H_DEFAULT:
1531 			if (ch == '<') {
1532 				// in HTML, fold on tag open and unfold on tag close
1533 				tagOpened = true;
1534 				tagClosing = (chNext == '/');
1535 				styler.ColourTo(i - 1, StateToPrint);
1536 				if (chNext != '!')
1537 					state = SCE_H_TAGUNKNOWN;
1538 			} else if (ch == '&') {
1539 				styler.ColourTo(i - 1, SCE_H_DEFAULT);
1540 				state = SCE_H_ENTITY;
1541 			}
1542 			break;
1543 		case SCE_H_SGML_DEFAULT:
1544 		case SCE_H_SGML_BLOCK_DEFAULT:
1545 //			if (scriptLanguage == eScriptSGMLblock)
1546 //				StateToPrint = SCE_H_SGML_BLOCK_DEFAULT;
1547 
1548 			if (ch == '\"') {
1549 				styler.ColourTo(i - 1, StateToPrint);
1550 				state = SCE_H_SGML_DOUBLESTRING;
1551 			} else if (ch == '\'') {
1552 				styler.ColourTo(i - 1, StateToPrint);
1553 				state = SCE_H_SGML_SIMPLESTRING;
1554 			} else if ((ch == '-') && (chPrev == '-')) {
1555 				if (static_cast<Sci_Position>(styler.GetStartSegment()) <= (i - 2)) {
1556 					styler.ColourTo(i - 2, StateToPrint);
1557 				}
1558 				state = SCE_H_SGML_COMMENT;
1559 			} else if (IsASCII(ch) && isalpha(ch) && (chPrev == '%')) {
1560 				styler.ColourTo(i - 2, StateToPrint);
1561 				state = SCE_H_SGML_ENTITY;
1562 			} else if (ch == '#') {
1563 				styler.ColourTo(i - 1, StateToPrint);
1564 				state = SCE_H_SGML_SPECIAL;
1565 			} else if (ch == '[') {
1566 				styler.ColourTo(i - 1, StateToPrint);
1567 				scriptLanguage = eScriptSGMLblock;
1568 				state = SCE_H_SGML_BLOCK_DEFAULT;
1569 			} else if (ch == ']') {
1570 				if (scriptLanguage == eScriptSGMLblock) {
1571 					styler.ColourTo(i, StateToPrint);
1572 					scriptLanguage = eScriptSGML;
1573 				} else {
1574 					styler.ColourTo(i - 1, StateToPrint);
1575 					styler.ColourTo(i, SCE_H_SGML_ERROR);
1576 				}
1577 				state = SCE_H_SGML_DEFAULT;
1578 			} else if (scriptLanguage == eScriptSGMLblock) {
1579 				if ((ch == '!') && (chPrev == '<')) {
1580 					styler.ColourTo(i - 2, StateToPrint);
1581 					styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1582 					state = SCE_H_SGML_COMMAND;
1583 				} else if (ch == '>') {
1584 					styler.ColourTo(i - 1, StateToPrint);
1585 					styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1586 				}
1587 			}
1588 			break;
1589 		case SCE_H_SGML_COMMAND:
1590 			if ((ch == '-') && (chPrev == '-')) {
1591 				styler.ColourTo(i - 2, StateToPrint);
1592 				state = SCE_H_SGML_COMMENT;
1593 			} else if (!issgmlwordchar(ch)) {
1594 				if (isWordHSGML(styler.GetStartSegment(), i - 1, keywords6, styler)) {
1595 					styler.ColourTo(i - 1, StateToPrint);
1596 					state = SCE_H_SGML_1ST_PARAM;
1597 				} else {
1598 					state = SCE_H_SGML_ERROR;
1599 				}
1600 			}
1601 			break;
1602 		case SCE_H_SGML_1ST_PARAM:
1603 			// wait for the beginning of the word
1604 			if ((ch == '-') && (chPrev == '-')) {
1605 				if (scriptLanguage == eScriptSGMLblock) {
1606 					styler.ColourTo(i - 2, SCE_H_SGML_BLOCK_DEFAULT);
1607 				} else {
1608 					styler.ColourTo(i - 2, SCE_H_SGML_DEFAULT);
1609 				}
1610 				state = SCE_H_SGML_1ST_PARAM_COMMENT;
1611 			} else if (issgmlwordchar(ch)) {
1612 				if (scriptLanguage == eScriptSGMLblock) {
1613 					styler.ColourTo(i - 1, SCE_H_SGML_BLOCK_DEFAULT);
1614 				} else {
1615 					styler.ColourTo(i - 1, SCE_H_SGML_DEFAULT);
1616 				}
1617 				// find the length of the word
1618 				int size = 1;
1619 				while (setHTMLWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(i + size))))
1620 					size++;
1621 				styler.ColourTo(i + size - 1, StateToPrint);
1622 				i += size - 1;
1623 				visibleChars += size - 1;
1624 				ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1625 				if (scriptLanguage == eScriptSGMLblock) {
1626 					state = SCE_H_SGML_BLOCK_DEFAULT;
1627 				} else {
1628 					state = SCE_H_SGML_DEFAULT;
1629 				}
1630 				continue;
1631 			}
1632 			break;
1633 		case SCE_H_SGML_ERROR:
1634 			if ((ch == '-') && (chPrev == '-')) {
1635 				styler.ColourTo(i - 2, StateToPrint);
1636 				state = SCE_H_SGML_COMMENT;
1637 			}
1638 			break;
1639 		case SCE_H_SGML_DOUBLESTRING:
1640 			if (ch == '\"') {
1641 				styler.ColourTo(i, StateToPrint);
1642 				state = SCE_H_SGML_DEFAULT;
1643 			}
1644 			break;
1645 		case SCE_H_SGML_SIMPLESTRING:
1646 			if (ch == '\'') {
1647 				styler.ColourTo(i, StateToPrint);
1648 				state = SCE_H_SGML_DEFAULT;
1649 			}
1650 			break;
1651 		case SCE_H_SGML_COMMENT:
1652 			if ((ch == '-') && (chPrev == '-')) {
1653 				styler.ColourTo(i, StateToPrint);
1654 				state = SCE_H_SGML_DEFAULT;
1655 			}
1656 			break;
1657 		case SCE_H_CDATA:
1658 			if ((chPrev2 == ']') && (chPrev == ']') && (ch == '>')) {
1659 				styler.ColourTo(i, StateToPrint);
1660 				state = SCE_H_DEFAULT;
1661 				levelCurrent--;
1662 			}
1663 			break;
1664 		case SCE_H_COMMENT:
1665 			if ((scriptLanguage != eScriptComment) && (chPrev2 == '-') && (chPrev == '-') && (ch == '>')) {
1666 				styler.ColourTo(i, StateToPrint);
1667 				state = SCE_H_DEFAULT;
1668 				levelCurrent--;
1669 			}
1670 			break;
1671 		case SCE_H_SGML_1ST_PARAM_COMMENT:
1672 			if ((ch == '-') && (chPrev == '-')) {
1673 				styler.ColourTo(i, SCE_H_SGML_COMMENT);
1674 				state = SCE_H_SGML_1ST_PARAM;
1675 			}
1676 			break;
1677 		case SCE_H_SGML_SPECIAL:
1678 			if (!(IsASCII(ch) && isupper(ch))) {
1679 				styler.ColourTo(i - 1, StateToPrint);
1680 				if (isalnum(ch)) {
1681 					state = SCE_H_SGML_ERROR;
1682 				} else {
1683 					state = SCE_H_SGML_DEFAULT;
1684 				}
1685 			}
1686 			break;
1687 		case SCE_H_SGML_ENTITY:
1688 			if (ch == ';') {
1689 				styler.ColourTo(i, StateToPrint);
1690 				state = SCE_H_SGML_DEFAULT;
1691 			} else if (!(IsASCII(ch) && isalnum(ch)) && ch != '-' && ch != '.') {
1692 				styler.ColourTo(i, SCE_H_SGML_ERROR);
1693 				state = SCE_H_SGML_DEFAULT;
1694 			}
1695 			break;
1696 		case SCE_H_ENTITY:
1697 			if (ch == ';') {
1698 				styler.ColourTo(i, StateToPrint);
1699 				state = SCE_H_DEFAULT;
1700 			}
1701 			if (ch != '#' && !(IsASCII(ch) && isalnum(ch))	// Should check that '#' follows '&', but it is unlikely anyway...
1702 				&& ch != '.' && ch != '-' && ch != '_' && ch != ':') { // valid in XML
1703 				if (!IsASCII(ch))	// Possibly start of a multibyte character so don't allow this byte to be in entity style
1704 					styler.ColourTo(i-1, SCE_H_TAGUNKNOWN);
1705 				else
1706 					styler.ColourTo(i, SCE_H_TAGUNKNOWN);
1707 				state = SCE_H_DEFAULT;
1708 			}
1709 			break;
1710 		case SCE_H_TAGUNKNOWN:
1711 			if (!setTagContinue.Contains(ch) && !((ch == '/') && (chPrev == '<'))) {
1712 				int eClass = classifyTagHTML(styler.GetStartSegment(),
1713 					i - 1, keywords, styler, tagDontFold, caseSensitive, isXml, allowScripts, nonFoldingTags);
1714 				if (eClass == SCE_H_SCRIPT || eClass == SCE_H_COMMENT) {
1715 					if (!tagClosing) {
1716 						inScriptType = eNonHtmlScript;
1717 						scriptLanguage = eClass == SCE_H_SCRIPT ? clientScript : eScriptComment;
1718 					} else {
1719 						scriptLanguage = eScriptNone;
1720 					}
1721 					eClass = SCE_H_TAG;
1722 				}
1723 				if (ch == '>') {
1724 					styler.ColourTo(i, eClass);
1725 					if (inScriptType == eNonHtmlScript) {
1726 						state = StateForScript(scriptLanguage);
1727 					} else {
1728 						state = SCE_H_DEFAULT;
1729 					}
1730 					tagOpened = false;
1731 					if (!tagDontFold) {
1732 						if (tagClosing) {
1733 							levelCurrent--;
1734 						} else {
1735 							levelCurrent++;
1736 						}
1737 					}
1738 					tagClosing = false;
1739 				} else if (ch == '/' && chNext == '>') {
1740 					if (eClass == SCE_H_TAGUNKNOWN) {
1741 						styler.ColourTo(i + 1, SCE_H_TAGUNKNOWN);
1742 					} else {
1743 						styler.ColourTo(i - 1, StateToPrint);
1744 						styler.ColourTo(i + 1, SCE_H_TAGEND);
1745 					}
1746 					i++;
1747 					ch = chNext;
1748 					state = SCE_H_DEFAULT;
1749 					tagOpened = false;
1750 				} else {
1751 					if (eClass != SCE_H_TAGUNKNOWN) {
1752 						if (eClass == SCE_H_SGML_DEFAULT) {
1753 							state = SCE_H_SGML_DEFAULT;
1754 						} else {
1755 							state = SCE_H_OTHER;
1756 						}
1757 					}
1758 				}
1759 			}
1760 			break;
1761 		case SCE_H_ATTRIBUTE:
1762 			if (!setAttributeContinue.Contains(ch)) {
1763 				if (inScriptType == eNonHtmlScript) {
1764 					const int scriptLanguagePrev = scriptLanguage;
1765 					clientScript = segIsScriptingIndicator(styler, styler.GetStartSegment(), i - 1, scriptLanguage);
1766 					scriptLanguage = clientScript;
1767 					if ((scriptLanguagePrev != scriptLanguage) && (scriptLanguage == eScriptNone))
1768 						inScriptType = eHtml;
1769 				}
1770 				classifyAttribHTML(styler.GetStartSegment(), i - 1, keywords, styler);
1771 				if (ch == '>') {
1772 					styler.ColourTo(i, SCE_H_TAG);
1773 					if (inScriptType == eNonHtmlScript) {
1774 						state = StateForScript(scriptLanguage);
1775 					} else {
1776 						state = SCE_H_DEFAULT;
1777 					}
1778 					tagOpened = false;
1779 					if (!tagDontFold) {
1780 						if (tagClosing) {
1781 							levelCurrent--;
1782 						} else {
1783 							levelCurrent++;
1784 						}
1785 					}
1786 					tagClosing = false;
1787 				} else if (ch == '=') {
1788 					styler.ColourTo(i, SCE_H_OTHER);
1789 					state = SCE_H_VALUE;
1790 				} else {
1791 					state = SCE_H_OTHER;
1792 				}
1793 			}
1794 			break;
1795 		case SCE_H_OTHER:
1796 			if (ch == '>') {
1797 				styler.ColourTo(i - 1, StateToPrint);
1798 				styler.ColourTo(i, SCE_H_TAG);
1799 				if (inScriptType == eNonHtmlScript) {
1800 					state = StateForScript(scriptLanguage);
1801 				} else {
1802 					state = SCE_H_DEFAULT;
1803 				}
1804 				tagOpened = false;
1805 				if (!tagDontFold) {
1806 					if (tagClosing) {
1807 						levelCurrent--;
1808 					} else {
1809 						levelCurrent++;
1810 					}
1811 				}
1812 				tagClosing = false;
1813 			} else if (ch == '\"') {
1814 				styler.ColourTo(i - 1, StateToPrint);
1815 				state = SCE_H_DOUBLESTRING;
1816 			} else if (ch == '\'') {
1817 				styler.ColourTo(i - 1, StateToPrint);
1818 				state = SCE_H_SINGLESTRING;
1819 			} else if (ch == '=') {
1820 				styler.ColourTo(i, StateToPrint);
1821 				state = SCE_H_VALUE;
1822 			} else if (ch == '/' && chNext == '>') {
1823 				styler.ColourTo(i - 1, StateToPrint);
1824 				styler.ColourTo(i + 1, SCE_H_TAGEND);
1825 				i++;
1826 				ch = chNext;
1827 				state = SCE_H_DEFAULT;
1828 				tagOpened = false;
1829 			} else if (ch == '?' && chNext == '>') {
1830 				styler.ColourTo(i - 1, StateToPrint);
1831 				styler.ColourTo(i + 1, SCE_H_XMLEND);
1832 				i++;
1833 				ch = chNext;
1834 				state = SCE_H_DEFAULT;
1835 			} else if (setHTMLWord.Contains(ch)) {
1836 				styler.ColourTo(i - 1, StateToPrint);
1837 				state = SCE_H_ATTRIBUTE;
1838 			}
1839 			break;
1840 		case SCE_H_DOUBLESTRING:
1841 			if (ch == '\"') {
1842 				if (inScriptType == eNonHtmlScript) {
1843 					scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1844 				}
1845 				styler.ColourTo(i, SCE_H_DOUBLESTRING);
1846 				state = SCE_H_OTHER;
1847 			}
1848 			break;
1849 		case SCE_H_SINGLESTRING:
1850 			if (ch == '\'') {
1851 				if (inScriptType == eNonHtmlScript) {
1852 					scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1853 				}
1854 				styler.ColourTo(i, SCE_H_SINGLESTRING);
1855 				state = SCE_H_OTHER;
1856 			}
1857 			break;
1858 		case SCE_H_VALUE:
1859 			if (!setHTMLWord.Contains(ch)) {
1860 				if (ch == '\"' && chPrev == '=') {
1861 					// Should really test for being first character
1862 					state = SCE_H_DOUBLESTRING;
1863 				} else if (ch == '\'' && chPrev == '=') {
1864 					state = SCE_H_SINGLESTRING;
1865 				} else {
1866 					if (IsNumber(styler.GetStartSegment(), styler)) {
1867 						styler.ColourTo(i - 1, SCE_H_NUMBER);
1868 					} else {
1869 						styler.ColourTo(i - 1, StateToPrint);
1870 					}
1871 					if (ch == '>') {
1872 						styler.ColourTo(i, SCE_H_TAG);
1873 						if (inScriptType == eNonHtmlScript) {
1874 							state = StateForScript(scriptLanguage);
1875 						} else {
1876 							state = SCE_H_DEFAULT;
1877 						}
1878 						tagOpened = false;
1879 						if (!tagDontFold) {
1880 							if (tagClosing) {
1881 								levelCurrent--;
1882 							} else {
1883 								levelCurrent++;
1884 							}
1885 						}
1886 						tagClosing = false;
1887 					} else {
1888 						state = SCE_H_OTHER;
1889 					}
1890 				}
1891 			}
1892 			break;
1893 		case SCE_HJ_DEFAULT:
1894 		case SCE_HJ_START:
1895 		case SCE_HJ_SYMBOLS:
1896 			if (IsAWordStart(ch)) {
1897 				styler.ColourTo(i - 1, StateToPrint);
1898 				state = SCE_HJ_WORD;
1899 			} else if (ch == '/' && chNext == '*') {
1900 				styler.ColourTo(i - 1, StateToPrint);
1901 				if (chNext2 == '*')
1902 					state = SCE_HJ_COMMENTDOC;
1903 				else
1904 					state = SCE_HJ_COMMENT;
1905 				if (chNext2 == '/') {
1906 					// Eat the * so it isn't used for the end of the comment
1907 					i++;
1908 				}
1909 			} else if (ch == '/' && chNext == '/') {
1910 				styler.ColourTo(i - 1, StateToPrint);
1911 				state = SCE_HJ_COMMENTLINE;
1912 			} else if (ch == '/' && setOKBeforeJSRE.Contains(chPrevNonWhite)) {
1913 				styler.ColourTo(i - 1, StateToPrint);
1914 				state = SCE_HJ_REGEX;
1915 			} else if (ch == '\"') {
1916 				styler.ColourTo(i - 1, StateToPrint);
1917 				state = SCE_HJ_DOUBLESTRING;
1918 			} else if (ch == '\'') {
1919 				styler.ColourTo(i - 1, StateToPrint);
1920 				state = SCE_HJ_SINGLESTRING;
1921 			} else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1922 			           styler.SafeGetCharAt(i + 3) == '-') {
1923 				styler.ColourTo(i - 1, StateToPrint);
1924 				state = SCE_HJ_COMMENTLINE;
1925 			} else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1926 				styler.ColourTo(i - 1, StateToPrint);
1927 				state = SCE_HJ_COMMENTLINE;
1928 				i += 2;
1929 			} else if (IsOperator(ch)) {
1930 				styler.ColourTo(i - 1, StateToPrint);
1931 				styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1932 				state = SCE_HJ_DEFAULT;
1933 			} else if ((ch == ' ') || (ch == '\t')) {
1934 				if (state == SCE_HJ_START) {
1935 					styler.ColourTo(i - 1, StateToPrint);
1936 					state = SCE_HJ_DEFAULT;
1937 				}
1938 			}
1939 			break;
1940 		case SCE_HJ_WORD:
1941 			if (!IsAWordChar(ch)) {
1942 				classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
1943 				//styler.ColourTo(i - 1, eHTJSKeyword);
1944 				state = SCE_HJ_DEFAULT;
1945 				if (ch == '/' && chNext == '*') {
1946 					if (chNext2 == '*')
1947 						state = SCE_HJ_COMMENTDOC;
1948 					else
1949 						state = SCE_HJ_COMMENT;
1950 				} else if (ch == '/' && chNext == '/') {
1951 					state = SCE_HJ_COMMENTLINE;
1952 				} else if (ch == '\"') {
1953 					state = SCE_HJ_DOUBLESTRING;
1954 				} else if (ch == '\'') {
1955 					state = SCE_HJ_SINGLESTRING;
1956 				} else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1957 					styler.ColourTo(i - 1, StateToPrint);
1958 					state = SCE_HJ_COMMENTLINE;
1959 					i += 2;
1960 				} else if (IsOperator(ch)) {
1961 					styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1962 					state = SCE_HJ_DEFAULT;
1963 				}
1964 			}
1965 			break;
1966 		case SCE_HJ_COMMENT:
1967 		case SCE_HJ_COMMENTDOC:
1968 			if (ch == '/' && chPrev == '*') {
1969 				styler.ColourTo(i, StateToPrint);
1970 				state = SCE_HJ_DEFAULT;
1971 				ch = ' ';
1972 			}
1973 			break;
1974 		case SCE_HJ_COMMENTLINE:
1975 			if (ch == '\r' || ch == '\n') {
1976 				styler.ColourTo(i - 1, statePrintForState(SCE_HJ_COMMENTLINE, inScriptType));
1977 				state = SCE_HJ_DEFAULT;
1978 				ch = ' ';
1979 			}
1980 			break;
1981 		case SCE_HJ_DOUBLESTRING:
1982 			if (ch == '\\') {
1983 				if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
1984 					i++;
1985 				}
1986 			} else if (ch == '\"') {
1987 				styler.ColourTo(i, statePrintForState(SCE_HJ_DOUBLESTRING, inScriptType));
1988 				state = SCE_HJ_DEFAULT;
1989 			} else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1990 				styler.ColourTo(i - 1, StateToPrint);
1991 				state = SCE_HJ_COMMENTLINE;
1992 				i += 2;
1993 			} else if (isLineEnd(ch)) {
1994 				styler.ColourTo(i - 1, StateToPrint);
1995 				state = SCE_HJ_STRINGEOL;
1996 			}
1997 			break;
1998 		case SCE_HJ_SINGLESTRING:
1999 			if (ch == '\\') {
2000 				if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
2001 					i++;
2002 				}
2003 			} else if (ch == '\'') {
2004 				styler.ColourTo(i, statePrintForState(SCE_HJ_SINGLESTRING, inScriptType));
2005 				state = SCE_HJ_DEFAULT;
2006 			} else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
2007 				styler.ColourTo(i - 1, StateToPrint);
2008 				state = SCE_HJ_COMMENTLINE;
2009 				i += 2;
2010 			} else if (isLineEnd(ch)) {
2011 				styler.ColourTo(i - 1, StateToPrint);
2012 				if (chPrev != '\\' && (chPrev2 != '\\' || chPrev != '\r' || ch != '\n')) {
2013 					state = SCE_HJ_STRINGEOL;
2014 				}
2015 			}
2016 			break;
2017 		case SCE_HJ_STRINGEOL:
2018 			if (!isLineEnd(ch)) {
2019 				styler.ColourTo(i - 1, StateToPrint);
2020 				state = SCE_HJ_DEFAULT;
2021 			} else if (!isLineEnd(chNext)) {
2022 				styler.ColourTo(i, StateToPrint);
2023 				state = SCE_HJ_DEFAULT;
2024 			}
2025 			break;
2026 		case SCE_HJ_REGEX:
2027 			if (ch == '\r' || ch == '\n' || ch == '/') {
2028 				if (ch == '/') {
2029 					while (IsASCII(chNext) && islower(chNext)) {   // gobble regex flags
2030 						i++;
2031 						ch = chNext;
2032 						chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2033 					}
2034 				}
2035 				styler.ColourTo(i, StateToPrint);
2036 				state = SCE_HJ_DEFAULT;
2037 			} else if (ch == '\\') {
2038 				// Gobble up the quoted character
2039 				if (chNext == '\\' || chNext == '/') {
2040 					i++;
2041 					ch = chNext;
2042 					chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2043 				}
2044 			}
2045 			break;
2046 		case SCE_HB_DEFAULT:
2047 		case SCE_HB_START:
2048 			if (IsAWordStart(ch)) {
2049 				styler.ColourTo(i - 1, StateToPrint);
2050 				state = SCE_HB_WORD;
2051 			} else if (ch == '\'') {
2052 				styler.ColourTo(i - 1, StateToPrint);
2053 				state = SCE_HB_COMMENTLINE;
2054 			} else if (ch == '\"') {
2055 				styler.ColourTo(i - 1, StateToPrint);
2056 				state = SCE_HB_STRING;
2057 			} else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
2058 			           styler.SafeGetCharAt(i + 3) == '-') {
2059 				styler.ColourTo(i - 1, StateToPrint);
2060 				state = SCE_HB_COMMENTLINE;
2061 			} else if (IsOperator(ch)) {
2062 				styler.ColourTo(i - 1, StateToPrint);
2063 				styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
2064 				state = SCE_HB_DEFAULT;
2065 			} else if ((ch == ' ') || (ch == '\t')) {
2066 				if (state == SCE_HB_START) {
2067 					styler.ColourTo(i - 1, StateToPrint);
2068 					state = SCE_HB_DEFAULT;
2069 				}
2070 			}
2071 			break;
2072 		case SCE_HB_WORD:
2073 			if (!IsAWordChar(ch)) {
2074 				state = classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
2075 				if (state == SCE_HB_DEFAULT) {
2076 					if (ch == '\"') {
2077 						state = SCE_HB_STRING;
2078 					} else if (ch == '\'') {
2079 						state = SCE_HB_COMMENTLINE;
2080 					} else if (IsOperator(ch)) {
2081 						styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
2082 						state = SCE_HB_DEFAULT;
2083 					}
2084 				}
2085 			}
2086 			break;
2087 		case SCE_HB_STRING:
2088 			if (ch == '\"') {
2089 				styler.ColourTo(i, StateToPrint);
2090 				state = SCE_HB_DEFAULT;
2091 			} else if (ch == '\r' || ch == '\n') {
2092 				styler.ColourTo(i - 1, StateToPrint);
2093 				state = SCE_HB_STRINGEOL;
2094 			}
2095 			break;
2096 		case SCE_HB_COMMENTLINE:
2097 			if (ch == '\r' || ch == '\n') {
2098 				styler.ColourTo(i - 1, StateToPrint);
2099 				state = SCE_HB_DEFAULT;
2100 			}
2101 			break;
2102 		case SCE_HB_STRINGEOL:
2103 			if (!isLineEnd(ch)) {
2104 				styler.ColourTo(i - 1, StateToPrint);
2105 				state = SCE_HB_DEFAULT;
2106 			} else if (!isLineEnd(chNext)) {
2107 				styler.ColourTo(i, StateToPrint);
2108 				state = SCE_HB_DEFAULT;
2109 			}
2110 			break;
2111 		case SCE_HP_DEFAULT:
2112 		case SCE_HP_START:
2113 			if (IsAWordStart(ch)) {
2114 				styler.ColourTo(i - 1, StateToPrint);
2115 				state = SCE_HP_WORD;
2116 			} else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
2117 			           styler.SafeGetCharAt(i + 3) == '-') {
2118 				styler.ColourTo(i - 1, StateToPrint);
2119 				state = SCE_HP_COMMENTLINE;
2120 			} else if (ch == '#') {
2121 				styler.ColourTo(i - 1, StateToPrint);
2122 				state = SCE_HP_COMMENTLINE;
2123 			} else if (ch == '\"') {
2124 				styler.ColourTo(i - 1, StateToPrint);
2125 				if (chNext == '\"' && chNext2 == '\"') {
2126 					i += 2;
2127 					state = SCE_HP_TRIPLEDOUBLE;
2128 					ch = ' ';
2129 					chPrev = ' ';
2130 					chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2131 				} else {
2132 					//					state = statePrintForState(SCE_HP_STRING,inScriptType);
2133 					state = SCE_HP_STRING;
2134 				}
2135 			} else if (ch == '\'') {
2136 				styler.ColourTo(i - 1, StateToPrint);
2137 				if (chNext == '\'' && chNext2 == '\'') {
2138 					i += 2;
2139 					state = SCE_HP_TRIPLE;
2140 					ch = ' ';
2141 					chPrev = ' ';
2142 					chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2143 				} else {
2144 					state = SCE_HP_CHARACTER;
2145 				}
2146 			} else if (IsOperator(ch)) {
2147 				styler.ColourTo(i - 1, StateToPrint);
2148 				styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
2149 			} else if ((ch == ' ') || (ch == '\t')) {
2150 				if (state == SCE_HP_START) {
2151 					styler.ColourTo(i - 1, StateToPrint);
2152 					state = SCE_HP_DEFAULT;
2153 				}
2154 			}
2155 			break;
2156 		case SCE_HP_WORD:
2157 			if (!IsAWordChar(ch)) {
2158 				classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
2159 				state = SCE_HP_DEFAULT;
2160 				if (ch == '#') {
2161 					state = SCE_HP_COMMENTLINE;
2162 				} else if (ch == '\"') {
2163 					if (chNext == '\"' && chNext2 == '\"') {
2164 						i += 2;
2165 						state = SCE_HP_TRIPLEDOUBLE;
2166 						ch = ' ';
2167 						chPrev = ' ';
2168 						chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2169 					} else {
2170 						state = SCE_HP_STRING;
2171 					}
2172 				} else if (ch == '\'') {
2173 					if (chNext == '\'' && chNext2 == '\'') {
2174 						i += 2;
2175 						state = SCE_HP_TRIPLE;
2176 						ch = ' ';
2177 						chPrev = ' ';
2178 						chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2179 					} else {
2180 						state = SCE_HP_CHARACTER;
2181 					}
2182 				} else if (IsOperator(ch)) {
2183 					styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
2184 				}
2185 			}
2186 			break;
2187 		case SCE_HP_COMMENTLINE:
2188 			if (ch == '\r' || ch == '\n') {
2189 				styler.ColourTo(i - 1, StateToPrint);
2190 				state = SCE_HP_DEFAULT;
2191 			}
2192 			break;
2193 		case SCE_HP_STRING:
2194 			if (ch == '\\') {
2195 				if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
2196 					i++;
2197 					ch = chNext;
2198 					chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2199 				}
2200 			} else if (ch == '\"') {
2201 				styler.ColourTo(i, StateToPrint);
2202 				state = SCE_HP_DEFAULT;
2203 			}
2204 			break;
2205 		case SCE_HP_CHARACTER:
2206 			if (ch == '\\') {
2207 				if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
2208 					i++;
2209 					ch = chNext;
2210 					chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2211 				}
2212 			} else if (ch == '\'') {
2213 				styler.ColourTo(i, StateToPrint);
2214 				state = SCE_HP_DEFAULT;
2215 			}
2216 			break;
2217 		case SCE_HP_TRIPLE:
2218 			if (ch == '\'' && chPrev == '\'' && chPrev2 == '\'') {
2219 				styler.ColourTo(i, StateToPrint);
2220 				state = SCE_HP_DEFAULT;
2221 			}
2222 			break;
2223 		case SCE_HP_TRIPLEDOUBLE:
2224 			if (ch == '\"' && chPrev == '\"' && chPrev2 == '\"') {
2225 				styler.ColourTo(i, StateToPrint);
2226 				state = SCE_HP_DEFAULT;
2227 			}
2228 			break;
2229 			///////////// start - PHP state handling
2230 		case SCE_HPHP_WORD:
2231 			if (!IsAWordChar(ch)) {
2232 				classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
2233 				if (ch == '/' && chNext == '*') {
2234 					i++;
2235 					state = SCE_HPHP_COMMENT;
2236 				} else if (ch == '/' && chNext == '/') {
2237 					i++;
2238 					state = SCE_HPHP_COMMENTLINE;
2239 				} else if (ch == '#') {
2240 					state = SCE_HPHP_COMMENTLINE;
2241 				} else if (ch == '\"') {
2242 					state = SCE_HPHP_HSTRING;
2243 					phpStringDelimiter = "\"";
2244 				} else if (styler.Match(i, "<<<")) {
2245 					bool isSimpleString = false;
2246 					i = FindPhpStringDelimiter(phpStringDelimiter, i + 3, lengthDoc, styler, isSimpleString);
2247 					if (!phpStringDelimiter.empty()) {
2248 						state = (isSimpleString ? SCE_HPHP_SIMPLESTRING : SCE_HPHP_HSTRING);
2249 						if (foldHeredoc) levelCurrent++;
2250 					}
2251 				} else if (ch == '\'') {
2252 					state = SCE_HPHP_SIMPLESTRING;
2253 					phpStringDelimiter = "\'";
2254 				} else if (ch == '$' && IsPhpWordStart(chNext)) {
2255 					state = SCE_HPHP_VARIABLE;
2256 				} else if (IsOperator(ch)) {
2257 					state = SCE_HPHP_OPERATOR;
2258 				} else {
2259 					state = SCE_HPHP_DEFAULT;
2260 				}
2261 			}
2262 			break;
2263 		case SCE_HPHP_NUMBER:
2264 			// recognize bases 8,10 or 16 integers OR floating-point numbers
2265 			if (!IsADigit(ch)
2266 				&& strchr(".xXabcdefABCDEF", ch) == NULL
2267 				&& ((ch != '-' && ch != '+') || (chPrev != 'e' && chPrev != 'E'))) {
2268 				styler.ColourTo(i - 1, SCE_HPHP_NUMBER);
2269 				if (IsOperator(ch))
2270 					state = SCE_HPHP_OPERATOR;
2271 				else
2272 					state = SCE_HPHP_DEFAULT;
2273 			}
2274 			break;
2275 		case SCE_HPHP_VARIABLE:
2276 			if (!IsPhpWordChar(chNext)) {
2277 				styler.ColourTo(i, SCE_HPHP_VARIABLE);
2278 				state = SCE_HPHP_DEFAULT;
2279 			}
2280 			break;
2281 		case SCE_HPHP_COMMENT:
2282 			if (ch == '/' && chPrev == '*') {
2283 				styler.ColourTo(i, StateToPrint);
2284 				state = SCE_HPHP_DEFAULT;
2285 			}
2286 			break;
2287 		case SCE_HPHP_COMMENTLINE:
2288 			if (ch == '\r' || ch == '\n') {
2289 				styler.ColourTo(i - 1, StateToPrint);
2290 				state = SCE_HPHP_DEFAULT;
2291 			}
2292 			break;
2293 		case SCE_HPHP_HSTRING:
2294 			if (ch == '\\' && ((phpStringDelimiter == "\"") || chNext == '$' || chNext == '{')) {
2295 				// skip the next char
2296 				i++;
2297 			} else if (((ch == '{' && chNext == '$') || (ch == '$' && chNext == '{'))
2298 				&& IsPhpWordStart(chNext2)) {
2299 				styler.ColourTo(i - 1, StateToPrint);
2300 				state = SCE_HPHP_COMPLEX_VARIABLE;
2301 			} else if (ch == '$' && IsPhpWordStart(chNext)) {
2302 				styler.ColourTo(i - 1, StateToPrint);
2303 				state = SCE_HPHP_HSTRING_VARIABLE;
2304 			} else if (styler.Match(i, phpStringDelimiter.c_str())) {
2305 				if (phpStringDelimiter == "\"") {
2306 					styler.ColourTo(i, StateToPrint);
2307 					state = SCE_HPHP_DEFAULT;
2308 				} else if (isLineEnd(chPrev)) {
2309 					const int psdLength = static_cast<int>(phpStringDelimiter.length());
2310 					const char chAfterPsd = styler.SafeGetCharAt(i + psdLength);
2311 					const char chAfterPsd2 = styler.SafeGetCharAt(i + psdLength + 1);
2312 					if (isLineEnd(chAfterPsd) ||
2313 						(chAfterPsd == ';' && isLineEnd(chAfterPsd2))) {
2314 							i += (((i + psdLength) < lengthDoc) ? psdLength : lengthDoc) - 1;
2315 						styler.ColourTo(i, StateToPrint);
2316 						state = SCE_HPHP_DEFAULT;
2317 						if (foldHeredoc) levelCurrent--;
2318 					}
2319 				}
2320 			}
2321 			break;
2322 		case SCE_HPHP_SIMPLESTRING:
2323 			if (phpStringDelimiter == "\'") {
2324 				if (ch == '\\') {
2325 					// skip the next char
2326 					i++;
2327 				} else if (ch == '\'') {
2328 					styler.ColourTo(i, StateToPrint);
2329 					state = SCE_HPHP_DEFAULT;
2330 				}
2331 			} else if (isLineEnd(chPrev) && styler.Match(i, phpStringDelimiter.c_str())) {
2332 				const int psdLength = static_cast<int>(phpStringDelimiter.length());
2333 				const char chAfterPsd = styler.SafeGetCharAt(i + psdLength);
2334 				const char chAfterPsd2 = styler.SafeGetCharAt(i + psdLength + 1);
2335 				if (isLineEnd(chAfterPsd) ||
2336 				(chAfterPsd == ';' && isLineEnd(chAfterPsd2))) {
2337 					i += (((i + psdLength) < lengthDoc) ? psdLength : lengthDoc) - 1;
2338 					styler.ColourTo(i, StateToPrint);
2339 					state = SCE_HPHP_DEFAULT;
2340 					if (foldHeredoc) levelCurrent--;
2341 				}
2342 			}
2343 			break;
2344 		case SCE_HPHP_HSTRING_VARIABLE:
2345 			if (!IsPhpWordChar(chNext)) {
2346 				styler.ColourTo(i, StateToPrint);
2347 				state = SCE_HPHP_HSTRING;
2348 			}
2349 			break;
2350 		case SCE_HPHP_COMPLEX_VARIABLE:
2351 			if (ch == '}') {
2352 				styler.ColourTo(i, StateToPrint);
2353 				state = SCE_HPHP_HSTRING;
2354 			}
2355 			break;
2356 		case SCE_HPHP_OPERATOR:
2357 		case SCE_HPHP_DEFAULT:
2358 			styler.ColourTo(i - 1, StateToPrint);
2359 			if (IsADigit(ch) || (ch == '.' && IsADigit(chNext))) {
2360 				state = SCE_HPHP_NUMBER;
2361 			} else if (IsAWordStart(ch)) {
2362 				state = SCE_HPHP_WORD;
2363 			} else if (ch == '/' && chNext == '*') {
2364 				i++;
2365 				state = SCE_HPHP_COMMENT;
2366 			} else if (ch == '/' && chNext == '/') {
2367 				i++;
2368 				state = SCE_HPHP_COMMENTLINE;
2369 			} else if (ch == '#') {
2370 				state = SCE_HPHP_COMMENTLINE;
2371 			} else if (ch == '\"') {
2372 				state = SCE_HPHP_HSTRING;
2373 				phpStringDelimiter = "\"";
2374 			} else if (styler.Match(i, "<<<")) {
2375 				bool isSimpleString = false;
2376 				i = FindPhpStringDelimiter(phpStringDelimiter, i + 3, lengthDoc, styler, isSimpleString);
2377 				if (!phpStringDelimiter.empty()) {
2378 					state = (isSimpleString ? SCE_HPHP_SIMPLESTRING : SCE_HPHP_HSTRING);
2379 					if (foldHeredoc) levelCurrent++;
2380 				}
2381 			} else if (ch == '\'') {
2382 				state = SCE_HPHP_SIMPLESTRING;
2383 				phpStringDelimiter = "\'";
2384 			} else if (ch == '$' && IsPhpWordStart(chNext)) {
2385 				state = SCE_HPHP_VARIABLE;
2386 			} else if (IsOperator(ch)) {
2387 				state = SCE_HPHP_OPERATOR;
2388 			} else if ((state == SCE_HPHP_OPERATOR) && (IsASpace(ch))) {
2389 				state = SCE_HPHP_DEFAULT;
2390 			}
2391 			break;
2392 			///////////// end - PHP state handling
2393 		}
2394 
2395 		// Some of the above terminated their lexeme but since the same character starts
2396 		// the same class again, only reenter if non empty segment.
2397 
2398 		const bool nonEmptySegment = i >= static_cast<Sci_Position>(styler.GetStartSegment());
2399 		if (state == SCE_HB_DEFAULT) {    // One of the above succeeded
2400 			if ((ch == '\"') && (nonEmptySegment)) {
2401 				state = SCE_HB_STRING;
2402 			} else if (ch == '\'') {
2403 				state = SCE_HB_COMMENTLINE;
2404 			} else if (IsAWordStart(ch)) {
2405 				state = SCE_HB_WORD;
2406 			} else if (IsOperator(ch)) {
2407 				styler.ColourTo(i, SCE_HB_DEFAULT);
2408 			}
2409 		} else if (state == SCE_HBA_DEFAULT) {    // One of the above succeeded
2410 			if ((ch == '\"') && (nonEmptySegment)) {
2411 				state = SCE_HBA_STRING;
2412 			} else if (ch == '\'') {
2413 				state = SCE_HBA_COMMENTLINE;
2414 			} else if (IsAWordStart(ch)) {
2415 				state = SCE_HBA_WORD;
2416 			} else if (IsOperator(ch)) {
2417 				styler.ColourTo(i, SCE_HBA_DEFAULT);
2418 			}
2419 		} else if (state == SCE_HJ_DEFAULT) {    // One of the above succeeded
2420 			if (ch == '/' && chNext == '*') {
2421 				if (styler.SafeGetCharAt(i + 2) == '*')
2422 					state = SCE_HJ_COMMENTDOC;
2423 				else
2424 					state = SCE_HJ_COMMENT;
2425 			} else if (ch == '/' && chNext == '/') {
2426 				state = SCE_HJ_COMMENTLINE;
2427 			} else if ((ch == '\"') && (nonEmptySegment)) {
2428 				state = SCE_HJ_DOUBLESTRING;
2429 			} else if ((ch == '\'') && (nonEmptySegment)) {
2430 				state = SCE_HJ_SINGLESTRING;
2431 			} else if (IsAWordStart(ch)) {
2432 				state = SCE_HJ_WORD;
2433 			} else if (IsOperator(ch)) {
2434 				styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
2435 			}
2436 		}
2437 	}
2438 
2439 	switch (state) {
2440 	case SCE_HJ_WORD:
2441 		classifyWordHTJS(styler.GetStartSegment(), lengthDoc - 1, keywords2, styler, inScriptType);
2442 		break;
2443 	case SCE_HB_WORD:
2444 		classifyWordHTVB(styler.GetStartSegment(), lengthDoc - 1, keywords3, styler, inScriptType);
2445 		break;
2446 	case SCE_HP_WORD:
2447 		classifyWordHTPy(styler.GetStartSegment(), lengthDoc - 1, keywords4, styler, prevWord, inScriptType, isMako);
2448 		break;
2449 	case SCE_HPHP_WORD:
2450 		classifyWordHTPHP(styler.GetStartSegment(), lengthDoc - 1, keywords5, styler);
2451 		break;
2452 	default:
2453 		StateToPrint = statePrintForState(state, inScriptType);
2454 		if (static_cast<Sci_Position>(styler.GetStartSegment()) < lengthDoc)
2455 			styler.ColourTo(lengthDoc - 1, StateToPrint);
2456 		break;
2457 	}
2458 
2459 	// Fill in the real level of the next line, keeping the current flags as they will be filled in later
2460 	if (fold) {
2461 		const int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
2462 		styler.SetLevel(lineCurrent, levelPrev | flagsNext);
2463 	}
2464 	styler.Flush();
2465 }
2466 
2467 LexerModule lmHTML(SCLEX_HTML, LexerHTML::LexerFactoryHTML, "hypertext", htmlWordListDesc);
2468 LexerModule lmXML(SCLEX_XML, LexerHTML::LexerFactoryXML, "xml", htmlWordListDesc);
2469 LexerModule lmPHPSCRIPT(SCLEX_PHPSCRIPT, LexerHTML::LexerFactoryPHPScript, "phpscript", phpscriptWordListDesc);
2470