1 // Scintilla source code edit control
2 /** @file LexHTML.cxx
3 ** Lexer for HTML.
4 **/
5 // Copyright 1998-2005 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
7
8 #include <stdlib.h>
9 #include <string.h>
10 #include <stdio.h>
11 #include <stdarg.h>
12 #include <assert.h>
13 #include <ctype.h>
14 #include <string>
15 #include <map>
16 #include <set>
17
18 #include "ILexer.h"
19 #include "Scintilla.h"
20 #include "SciLexer.h"
21 #include "WordList.h"
22 #include "LexAccessor.h"
23 #include "Accessor.h"
24 #include "StyleContext.h"
25 #include "CharacterSet.h"
26 #include "LexerModule.h"
27 #include "OptionSet.h"
28 #include "DefaultLexer.h"
29
30 using namespace Scintilla;
31
32 namespace {
33
34 #define SCE_HA_JS (SCE_HJA_START - SCE_HJ_START)
35 #define SCE_HA_VBS (SCE_HBA_START - SCE_HB_START)
36 #define SCE_HA_PYTHON (SCE_HPA_START - SCE_HP_START)
37
38 enum script_type { eScriptNone = 0, eScriptJS, eScriptVBS, eScriptPython, eScriptPHP, eScriptXML, eScriptSGML, eScriptSGMLblock, eScriptComment };
39 enum script_mode { eHtml = 0, eNonHtmlScript, eNonHtmlPreProc, eNonHtmlScriptPreProc };
40
IsAWordChar(const int ch)41 inline bool IsAWordChar(const int ch) {
42 return (ch < 0x80) && (isalnum(ch) || ch == '.' || ch == '_');
43 }
44
IsAWordStart(const int ch)45 inline bool IsAWordStart(const int ch) {
46 return (ch < 0x80) && (isalnum(ch) || ch == '_');
47 }
48
IsOperator(int ch)49 inline bool IsOperator(int ch) {
50 if (IsASCII(ch) && isalnum(ch))
51 return false;
52 // '.' left out as it is used to make up numbers
53 if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
54 ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
55 ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
56 ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
57 ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
58 ch == '?' || ch == '!' || ch == '.' || ch == '~')
59 return true;
60 return false;
61 }
62
GetTextSegment(Accessor & styler,Sci_PositionU start,Sci_PositionU end,char * s,size_t len)63 void GetTextSegment(Accessor &styler, Sci_PositionU start, Sci_PositionU end, char *s, size_t len) {
64 Sci_PositionU i = 0;
65 for (; (i < end - start + 1) && (i < len-1); i++) {
66 s[i] = MakeLowerCase(styler[start + i]);
67 }
68 s[i] = '\0';
69 }
70
GetStringSegment(Accessor & styler,Sci_PositionU start,Sci_PositionU end)71 std::string GetStringSegment(Accessor &styler, Sci_PositionU start, Sci_PositionU end) {
72 std::string s;
73 Sci_PositionU i = 0;
74 for (; (i < end - start + 1); i++) {
75 s.push_back(MakeLowerCase(styler[start + i]));
76 }
77 return s;
78 }
79
GetNextWord(Accessor & styler,Sci_PositionU start)80 std::string GetNextWord(Accessor &styler, Sci_PositionU start) {
81 std::string ret;
82 Sci_PositionU i = 0;
83 for (; i < 200; i++) { // Put an upper limit to bound time taken for unexpected text.
84 const char ch = styler.SafeGetCharAt(start + i);
85 if ((i == 0) && !IsAWordStart(ch))
86 break;
87 if ((i > 0) && !IsAWordChar(ch))
88 break;
89 ret.push_back(ch);
90 }
91 return ret;
92 }
93
segIsScriptingIndicator(Accessor & styler,Sci_PositionU start,Sci_PositionU end,script_type prevValue)94 script_type segIsScriptingIndicator(Accessor &styler, Sci_PositionU start, Sci_PositionU end, script_type prevValue) {
95 char s[100];
96 GetTextSegment(styler, start, end, s, sizeof(s));
97 //Platform::DebugPrintf("Scripting indicator [%s]\n", s);
98 if (strstr(s, "src")) // External script
99 return eScriptNone;
100 if (strstr(s, "vbs"))
101 return eScriptVBS;
102 if (strstr(s, "pyth"))
103 return eScriptPython;
104 if (strstr(s, "javas"))
105 return eScriptJS;
106 if (strstr(s, "jscr"))
107 return eScriptJS;
108 if (strstr(s, "php"))
109 return eScriptPHP;
110 if (strstr(s, "xml")) {
111 const char *xml = strstr(s, "xml");
112 for (const char *t=s; t<xml; t++) {
113 if (!IsASpace(*t)) {
114 return prevValue;
115 }
116 }
117 return eScriptXML;
118 }
119
120 return prevValue;
121 }
122
PrintScriptingIndicatorOffset(Accessor & styler,Sci_PositionU start,Sci_PositionU end)123 int PrintScriptingIndicatorOffset(Accessor &styler, Sci_PositionU start, Sci_PositionU end) {
124 int iResult = 0;
125 std::string s = GetStringSegment(styler, start, end);
126 if (0 == strncmp(s.c_str(), "php", 3)) {
127 iResult = 3;
128 }
129 return iResult;
130 }
131
ScriptOfState(int state)132 script_type ScriptOfState(int state) {
133 if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
134 return eScriptPython;
135 } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
136 return eScriptVBS;
137 } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
138 return eScriptJS;
139 } else if ((state >= SCE_HPHP_DEFAULT) && (state <= SCE_HPHP_COMMENTLINE)) {
140 return eScriptPHP;
141 } else if ((state >= SCE_H_SGML_DEFAULT) && (state < SCE_H_SGML_BLOCK_DEFAULT)) {
142 return eScriptSGML;
143 } else if (state == SCE_H_SGML_BLOCK_DEFAULT) {
144 return eScriptSGMLblock;
145 } else {
146 return eScriptNone;
147 }
148 }
149
statePrintForState(int state,script_mode inScriptType)150 int statePrintForState(int state, script_mode inScriptType) {
151 int StateToPrint = state;
152
153 if (state >= SCE_HJ_START) {
154 if ((state >= SCE_HP_START) && (state <= SCE_HP_IDENTIFIER)) {
155 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_PYTHON);
156 } else if ((state >= SCE_HB_START) && (state <= SCE_HB_STRINGEOL)) {
157 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_VBS);
158 } else if ((state >= SCE_HJ_START) && (state <= SCE_HJ_REGEX)) {
159 StateToPrint = state + ((inScriptType == eNonHtmlScript) ? 0 : SCE_HA_JS);
160 }
161 }
162
163 return StateToPrint;
164 }
165
stateForPrintState(int StateToPrint)166 int stateForPrintState(int StateToPrint) {
167 int state;
168
169 if ((StateToPrint >= SCE_HPA_START) && (StateToPrint <= SCE_HPA_IDENTIFIER)) {
170 state = StateToPrint - SCE_HA_PYTHON;
171 } else if ((StateToPrint >= SCE_HBA_START) && (StateToPrint <= SCE_HBA_STRINGEOL)) {
172 state = StateToPrint - SCE_HA_VBS;
173 } else if ((StateToPrint >= SCE_HJA_START) && (StateToPrint <= SCE_HJA_REGEX)) {
174 state = StateToPrint - SCE_HA_JS;
175 } else {
176 state = StateToPrint;
177 }
178
179 return state;
180 }
181
IsNumber(Sci_PositionU start,Accessor & styler)182 inline bool IsNumber(Sci_PositionU start, Accessor &styler) {
183 return IsADigit(styler[start]) || (styler[start] == '.') ||
184 (styler[start] == '-') || (styler[start] == '#');
185 }
186
isStringState(int state)187 inline bool isStringState(int state) {
188 bool bResult;
189
190 switch (state) {
191 case SCE_HJ_DOUBLESTRING:
192 case SCE_HJ_SINGLESTRING:
193 case SCE_HJA_DOUBLESTRING:
194 case SCE_HJA_SINGLESTRING:
195 case SCE_HB_STRING:
196 case SCE_HBA_STRING:
197 case SCE_HP_STRING:
198 case SCE_HP_CHARACTER:
199 case SCE_HP_TRIPLE:
200 case SCE_HP_TRIPLEDOUBLE:
201 case SCE_HPA_STRING:
202 case SCE_HPA_CHARACTER:
203 case SCE_HPA_TRIPLE:
204 case SCE_HPA_TRIPLEDOUBLE:
205 case SCE_HPHP_HSTRING:
206 case SCE_HPHP_SIMPLESTRING:
207 case SCE_HPHP_HSTRING_VARIABLE:
208 case SCE_HPHP_COMPLEX_VARIABLE:
209 bResult = true;
210 break;
211 default :
212 bResult = false;
213 break;
214 }
215 return bResult;
216 }
217
stateAllowsTermination(int state)218 inline bool stateAllowsTermination(int state) {
219 bool allowTermination = !isStringState(state);
220 if (allowTermination) {
221 switch (state) {
222 case SCE_HB_COMMENTLINE:
223 case SCE_HPHP_COMMENT:
224 case SCE_HP_COMMENTLINE:
225 case SCE_HPA_COMMENTLINE:
226 allowTermination = false;
227 }
228 }
229 return allowTermination;
230 }
231
232 // not really well done, since it's only comments that should lex the %> and <%
isCommentASPState(int state)233 inline bool isCommentASPState(int state) {
234 bool bResult;
235
236 switch (state) {
237 case SCE_HJ_COMMENT:
238 case SCE_HJ_COMMENTLINE:
239 case SCE_HJ_COMMENTDOC:
240 case SCE_HB_COMMENTLINE:
241 case SCE_HP_COMMENTLINE:
242 case SCE_HPHP_COMMENT:
243 case SCE_HPHP_COMMENTLINE:
244 bResult = true;
245 break;
246 default :
247 bResult = false;
248 break;
249 }
250 return bResult;
251 }
252
classifyAttribHTML(Sci_PositionU start,Sci_PositionU end,const WordList & keywords,Accessor & styler)253 void classifyAttribHTML(Sci_PositionU start, Sci_PositionU end, const WordList &keywords, Accessor &styler) {
254 const bool wordIsNumber = IsNumber(start, styler);
255 char chAttr = SCE_H_ATTRIBUTEUNKNOWN;
256 if (wordIsNumber) {
257 chAttr = SCE_H_NUMBER;
258 } else {
259 std::string s = GetStringSegment(styler, start, end);
260 if (keywords.InList(s.c_str()))
261 chAttr = SCE_H_ATTRIBUTE;
262 }
263 if ((chAttr == SCE_H_ATTRIBUTEUNKNOWN) && !keywords)
264 // No keywords -> all are known
265 chAttr = SCE_H_ATTRIBUTE;
266 styler.ColourTo(end, chAttr);
267 }
268
269 // https://html.spec.whatwg.org/multipage/custom-elements.html#custom-elements-core-concepts
isHTMLCustomElement(const std::string & tag)270 bool isHTMLCustomElement(const std::string &tag) {
271 // check valid HTML custom element name: starts with an ASCII lower alpha and contains hyphen.
272 // IsUpperOrLowerCase() is used for `html.tags.case.sensitive=1`.
273 if (tag.length() < 2 || !IsUpperOrLowerCase(tag[0])) {
274 return false;
275 }
276 if (tag.find('-') == std::string::npos) {
277 return false;
278 }
279 return true;
280 }
281
classifyTagHTML(Sci_PositionU start,Sci_PositionU end,const WordList & keywords,Accessor & styler,bool & tagDontFold,bool caseSensitive,bool isXml,bool allowScripts,const std::set<std::string> & nonFoldingTags)282 int classifyTagHTML(Sci_PositionU start, Sci_PositionU end,
283 const WordList &keywords, Accessor &styler, bool &tagDontFold,
284 bool caseSensitive, bool isXml, bool allowScripts,
285 const std::set<std::string> &nonFoldingTags) {
286 std::string tag;
287 // Copy after the '<' and stop before ' '
288 for (Sci_PositionU cPos = start; cPos <= end; cPos++) {
289 const char ch = styler[cPos];
290 if (IsASpace(ch)) {
291 break;
292 }
293 if ((ch != '<') && (ch != '/')) {
294 tag.push_back(caseSensitive ? ch : MakeLowerCase(ch));
295 }
296 }
297 // if the current language is XML, I can fold any tag
298 // if the current language is HTML, I don't want to fold certain tags (input, meta, etc.)
299 //...to find it in the list of no-container-tags
300 tagDontFold = (!isXml) && (nonFoldingTags.count(tag) > 0);
301 // No keywords -> all are known
302 char chAttr = SCE_H_TAGUNKNOWN;
303 if (!tag.empty() && (tag[0] == '!')) {
304 chAttr = SCE_H_SGML_DEFAULT;
305 } else if (!keywords || keywords.InList(tag.c_str())) {
306 chAttr = SCE_H_TAG;
307 } else if (!isXml && isHTMLCustomElement(tag)) {
308 chAttr = SCE_H_TAG;
309 }
310 if (chAttr != SCE_H_TAGUNKNOWN) {
311 styler.ColourTo(end, chAttr);
312 }
313 if (chAttr == SCE_H_TAG) {
314 if (allowScripts && (tag == "script")) {
315 // check to see if this is a self-closing tag by sniffing ahead
316 bool isSelfClose = false;
317 for (Sci_PositionU cPos = end; cPos <= end + 200; cPos++) {
318 const char ch = styler.SafeGetCharAt(cPos, '\0');
319 if (ch == '\0' || ch == '>')
320 break;
321 else if (ch == '/' && styler.SafeGetCharAt(cPos + 1, '\0') == '>') {
322 isSelfClose = true;
323 break;
324 }
325 }
326
327 // do not enter a script state if the tag self-closed
328 if (!isSelfClose)
329 chAttr = SCE_H_SCRIPT;
330 } else if (!isXml && (tag == "comment")) {
331 chAttr = SCE_H_COMMENT;
332 }
333 }
334 return chAttr;
335 }
336
classifyWordHTJS(Sci_PositionU start,Sci_PositionU end,const WordList & keywords,Accessor & styler,script_mode inScriptType)337 void classifyWordHTJS(Sci_PositionU start, Sci_PositionU end,
338 const WordList &keywords, Accessor &styler, script_mode inScriptType) {
339 char s[30 + 1];
340 Sci_PositionU i = 0;
341 for (; i < end - start + 1 && i < 30; i++) {
342 s[i] = styler[start + i];
343 }
344 s[i] = '\0';
345
346 char chAttr = SCE_HJ_WORD;
347 const bool wordIsNumber = IsADigit(s[0]) || ((s[0] == '.') && IsADigit(s[1]));
348 if (wordIsNumber) {
349 chAttr = SCE_HJ_NUMBER;
350 } else if (keywords.InList(s)) {
351 chAttr = SCE_HJ_KEYWORD;
352 }
353 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
354 }
355
classifyWordHTVB(Sci_PositionU start,Sci_PositionU end,const WordList & keywords,Accessor & styler,script_mode inScriptType)356 int classifyWordHTVB(Sci_PositionU start, Sci_PositionU end, const WordList &keywords, Accessor &styler, script_mode inScriptType) {
357 char chAttr = SCE_HB_IDENTIFIER;
358 const bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.');
359 if (wordIsNumber) {
360 chAttr = SCE_HB_NUMBER;
361 } else {
362 std::string s = GetStringSegment(styler, start, end);
363 if (keywords.InList(s.c_str())) {
364 chAttr = SCE_HB_WORD;
365 if (s == "rem")
366 chAttr = SCE_HB_COMMENTLINE;
367 }
368 }
369 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
370 if (chAttr == SCE_HB_COMMENTLINE)
371 return SCE_HB_COMMENTLINE;
372 else
373 return SCE_HB_DEFAULT;
374 }
375
classifyWordHTPy(Sci_PositionU start,Sci_PositionU end,const WordList & keywords,Accessor & styler,std::string & prevWord,script_mode inScriptType,bool isMako)376 void classifyWordHTPy(Sci_PositionU start, Sci_PositionU end, const WordList &keywords, Accessor &styler, std::string &prevWord, script_mode inScriptType, bool isMako) {
377 const bool wordIsNumber = IsADigit(styler[start]);
378 std::string s;
379 for (Sci_PositionU i = 0; i < end - start + 1 && i < 30; i++) {
380 s.push_back(styler[start + i]);
381 }
382 char chAttr = SCE_HP_IDENTIFIER;
383 if (prevWord == "class")
384 chAttr = SCE_HP_CLASSNAME;
385 else if (prevWord == "def")
386 chAttr = SCE_HP_DEFNAME;
387 else if (wordIsNumber)
388 chAttr = SCE_HP_NUMBER;
389 else if (keywords.InList(s.c_str()))
390 chAttr = SCE_HP_WORD;
391 else if (isMako && (s == "block"))
392 chAttr = SCE_HP_WORD;
393 styler.ColourTo(end, statePrintForState(chAttr, inScriptType));
394 prevWord = s;
395 }
396
397 // Update the word colour to default or keyword
398 // Called when in a PHP word
classifyWordHTPHP(Sci_PositionU start,Sci_PositionU end,const WordList & keywords,Accessor & styler)399 void classifyWordHTPHP(Sci_PositionU start, Sci_PositionU end, const WordList &keywords, Accessor &styler) {
400 char chAttr = SCE_HPHP_DEFAULT;
401 const bool wordIsNumber = IsADigit(styler[start]) || (styler[start] == '.' && start+1 <= end && IsADigit(styler[start+1]));
402 if (wordIsNumber) {
403 chAttr = SCE_HPHP_NUMBER;
404 } else {
405 std::string s = GetStringSegment(styler, start, end);
406 if (keywords.InList(s.c_str()))
407 chAttr = SCE_HPHP_WORD;
408 }
409 styler.ColourTo(end, chAttr);
410 }
411
isWordHSGML(Sci_PositionU start,Sci_PositionU end,const WordList & keywords,Accessor & styler)412 bool isWordHSGML(Sci_PositionU start, Sci_PositionU end, const WordList &keywords, Accessor &styler) {
413 std::string s;
414 for (Sci_PositionU i = 0; i < end - start + 1 && i < 30; i++) {
415 s.push_back(styler[start + i]);
416 }
417 return keywords.InList(s.c_str());
418 }
419
isWordCdata(Sci_PositionU start,Sci_PositionU end,Accessor & styler)420 bool isWordCdata(Sci_PositionU start, Sci_PositionU end, Accessor &styler) {
421 std::string s;
422 for (Sci_PositionU i = 0; i < end - start + 1 && i < 30; i++) {
423 s.push_back(styler[start + i]);
424 }
425 return s == "[CDATA[";
426 }
427
428 // Return the first state to reach when entering a scripting language
StateForScript(script_type scriptLanguage)429 int StateForScript(script_type scriptLanguage) {
430 int Result;
431 switch (scriptLanguage) {
432 case eScriptVBS:
433 Result = SCE_HB_START;
434 break;
435 case eScriptPython:
436 Result = SCE_HP_START;
437 break;
438 case eScriptPHP:
439 Result = SCE_HPHP_DEFAULT;
440 break;
441 case eScriptXML:
442 Result = SCE_H_TAGUNKNOWN;
443 break;
444 case eScriptSGML:
445 Result = SCE_H_SGML_DEFAULT;
446 break;
447 case eScriptComment:
448 Result = SCE_H_COMMENT;
449 break;
450 default :
451 Result = SCE_HJ_START;
452 break;
453 }
454 return Result;
455 }
456
issgmlwordchar(int ch)457 inline bool issgmlwordchar(int ch) {
458 return !IsASCII(ch) ||
459 (isalnum(ch) || ch == '.' || ch == '_' || ch == ':' || ch == '!' || ch == '#' || ch == '[');
460 }
461
IsPhpWordStart(int ch)462 inline bool IsPhpWordStart(int ch) {
463 return (IsASCII(ch) && (isalpha(ch) || (ch == '_'))) || (ch >= 0x7f);
464 }
465
IsPhpWordChar(int ch)466 inline bool IsPhpWordChar(int ch) {
467 return IsADigit(ch) || IsPhpWordStart(ch);
468 }
469
InTagState(int state)470 bool InTagState(int state) {
471 return state == SCE_H_TAG || state == SCE_H_TAGUNKNOWN ||
472 state == SCE_H_SCRIPT ||
473 state == SCE_H_ATTRIBUTE || state == SCE_H_ATTRIBUTEUNKNOWN ||
474 state == SCE_H_NUMBER || state == SCE_H_OTHER ||
475 state == SCE_H_DOUBLESTRING || state == SCE_H_SINGLESTRING;
476 }
477
IsCommentState(const int state)478 bool IsCommentState(const int state) {
479 return state == SCE_H_COMMENT || state == SCE_H_SGML_COMMENT;
480 }
481
IsScriptCommentState(const int state)482 bool IsScriptCommentState(const int state) {
483 return state == SCE_HJ_COMMENT || state == SCE_HJ_COMMENTLINE || state == SCE_HJA_COMMENT ||
484 state == SCE_HJA_COMMENTLINE || state == SCE_HB_COMMENTLINE || state == SCE_HBA_COMMENTLINE;
485 }
486
isLineEnd(int ch)487 bool isLineEnd(int ch) {
488 return ch == '\r' || ch == '\n';
489 }
490
isMakoBlockEnd(const int ch,const int chNext,const std::string & blockType)491 bool isMakoBlockEnd(const int ch, const int chNext, const std::string &blockType) {
492 if (blockType.empty()) {
493 return ((ch == '%') && (chNext == '>'));
494 } else if ((blockType == "inherit") ||
495 (blockType == "namespace") ||
496 (blockType == "include") ||
497 (blockType == "page")) {
498 return ((ch == '/') && (chNext == '>'));
499 } else if (blockType == "%") {
500 if (ch == '/' && isLineEnd(chNext))
501 return true;
502 else
503 return isLineEnd(ch);
504 } else if (blockType == "{") {
505 return ch == '}';
506 } else {
507 return (ch == '>');
508 }
509 }
510
isDjangoBlockEnd(const int ch,const int chNext,const std::string & blockType)511 bool isDjangoBlockEnd(const int ch, const int chNext, const std::string &blockType) {
512 if (blockType.empty()) {
513 return false;
514 } else if (blockType == "%") {
515 return ((ch == '%') && (chNext == '}'));
516 } else if (blockType == "{") {
517 return ((ch == '}') && (chNext == '}'));
518 } else {
519 return false;
520 }
521 }
522
isPHPStringState(int state)523 bool isPHPStringState(int state) {
524 return
525 (state == SCE_HPHP_HSTRING) ||
526 (state == SCE_HPHP_SIMPLESTRING) ||
527 (state == SCE_HPHP_HSTRING_VARIABLE) ||
528 (state == SCE_HPHP_COMPLEX_VARIABLE);
529 }
530
FindPhpStringDelimiter(std::string & phpStringDelimiter,Sci_Position i,const Sci_Position lengthDoc,Accessor & styler,bool & isSimpleString)531 Sci_Position FindPhpStringDelimiter(std::string &phpStringDelimiter, Sci_Position i, const Sci_Position lengthDoc, Accessor &styler, bool &isSimpleString) {
532 Sci_Position j;
533 const Sci_Position beginning = i - 1;
534 bool isValidSimpleString = false;
535
536 while (i < lengthDoc && (styler[i] == ' ' || styler[i] == '\t'))
537 i++;
538 char ch = styler.SafeGetCharAt(i);
539 const char chNext = styler.SafeGetCharAt(i + 1);
540 phpStringDelimiter.clear();
541 if (!IsPhpWordStart(ch)) {
542 if (ch == '\'' && IsPhpWordStart(chNext)) {
543 i++;
544 ch = chNext;
545 isSimpleString = true;
546 } else {
547 return beginning;
548 }
549 }
550 phpStringDelimiter.push_back(ch);
551 i++;
552 for (j = i; j < lengthDoc && !isLineEnd(styler[j]); j++) {
553 if (!IsPhpWordChar(styler[j])) {
554 if (isSimpleString && (styler[j] == '\'') && isLineEnd(styler.SafeGetCharAt(j + 1))) {
555 isValidSimpleString = true;
556 j++;
557 break;
558 } else {
559 phpStringDelimiter.clear();
560 return beginning;
561 }
562 }
563 phpStringDelimiter.push_back(styler[j]);
564 }
565 if (isSimpleString && !isValidSimpleString) {
566 phpStringDelimiter.clear();
567 return beginning;
568 }
569 return j - 1;
570 }
571
572 // Options used for LexerHTML
573 struct OptionsHTML {
574 int aspDefaultLanguage = eScriptJS;
575 bool caseSensitive = false;
576 bool allowScripts = true;
577 bool isMako = false;
578 bool isDjango = false;
579 bool fold = false;
580 bool foldHTML = false;
581 bool foldHTMLPreprocessor = true;
582 bool foldCompact = true;
583 bool foldComment = false;
584 bool foldHeredoc = false;
585 bool foldXmlAtTagOpen = false;
OptionsHTML__anon61d637fe0111::OptionsHTML586 OptionsHTML() noexcept {
587 }
588 };
589
590 const char * const htmlWordListDesc[] = {
591 "HTML elements and attributes",
592 "JavaScript keywords",
593 "VBScript keywords",
594 "Python keywords",
595 "PHP keywords",
596 "SGML and DTD keywords",
597 0,
598 };
599
600 const char * const phpscriptWordListDesc[] = {
601 "", //Unused
602 "", //Unused
603 "", //Unused
604 "", //Unused
605 "PHP keywords",
606 "", //Unused
607 0,
608 };
609
610 struct OptionSetHTML : public OptionSet<OptionsHTML> {
OptionSetHTML__anon61d637fe0111::OptionSetHTML611 OptionSetHTML(bool isPHPScript_) {
612
613 DefineProperty("asp.default.language", &OptionsHTML::aspDefaultLanguage,
614 "Script in ASP code is initially assumed to be in JavaScript. "
615 "To change this to VBScript set asp.default.language to 2. Python is 3.");
616
617 DefineProperty("html.tags.case.sensitive", &OptionsHTML::caseSensitive,
618 "For XML and HTML, setting this property to 1 will make tags match in a case "
619 "sensitive way which is the expected behaviour for XML and XHTML.");
620
621 DefineProperty("lexer.xml.allow.scripts", &OptionsHTML::allowScripts,
622 "Set to 0 to disable scripts in XML.");
623
624 DefineProperty("lexer.html.mako", &OptionsHTML::isMako,
625 "Set to 1 to enable the mako template language.");
626
627 DefineProperty("lexer.html.django", &OptionsHTML::isDjango,
628 "Set to 1 to enable the django template language.");
629
630 DefineProperty("fold", &OptionsHTML::fold);
631
632 DefineProperty("fold.html", &OptionsHTML::foldHTML,
633 "Folding is turned on or off for HTML and XML files with this option. "
634 "The fold option must also be on for folding to occur.");
635
636 DefineProperty("fold.html.preprocessor", &OptionsHTML::foldHTMLPreprocessor,
637 "Folding is turned on or off for scripts embedded in HTML files with this option. "
638 "The default is on.");
639
640 DefineProperty("fold.compact", &OptionsHTML::foldCompact);
641
642 DefineProperty("fold.hypertext.comment", &OptionsHTML::foldComment,
643 "Allow folding for comments in scripts embedded in HTML. "
644 "The default is off.");
645
646 DefineProperty("fold.hypertext.heredoc", &OptionsHTML::foldHeredoc,
647 "Allow folding for heredocs in scripts embedded in HTML. "
648 "The default is off.");
649
650 DefineProperty("fold.xml.at.tag.open", &OptionsHTML::foldXmlAtTagOpen,
651 "Enable folding for XML at the start of open tag. "
652 "The default is off.");
653
654 DefineWordListSets(isPHPScript_ ? phpscriptWordListDesc : htmlWordListDesc);
655 }
656 };
657
658 LexicalClass lexicalClassesHTML[] = {
659 // Lexer HTML SCLEX_HTML SCE_H_ SCE_HJ_ SCE_HJA_ SCE_HB_ SCE_HBA_ SCE_HP_ SCE_HPHP_ SCE_HPA_:
660 0, "SCE_H_DEFAULT", "default", "Text",
661 1, "SCE_H_TAG", "tag", "Tags",
662 2, "SCE_H_ERRORTAGUNKNOWN", "error tag", "Unknown Tags",
663 3, "SCE_H_ATTRIBUTE", "attribute", "Attributes",
664 4, "SCE_H_ATTRIBUTEUNKNOWN", "error attribute", "Unknown Attributes",
665 5, "SCE_H_NUMBER", "literal numeric", "Numbers",
666 6, "SCE_H_DOUBLESTRING", "literal string", "Double quoted strings",
667 7, "SCE_H_SINGLESTRING", "literal string", "Single quoted strings",
668 8, "SCE_H_OTHER", "tag operator", "Other inside tag, including space and '='",
669 9, "SCE_H_COMMENT", "comment", "Comment",
670 10, "SCE_H_ENTITY", "literal", "Entities",
671 11, "SCE_H_TAGEND", "tag", "XML style tag ends '/>'",
672 12, "SCE_H_XMLSTART", "identifier", "XML identifier start '<?'",
673 13, "SCE_H_XMLEND", "identifier", "XML identifier end '?>'",
674 14, "SCE_H_SCRIPT", "error", "Internal state which should never be visible",
675 15, "SCE_H_ASP", "preprocessor", "ASP <% ... %>",
676 16, "SCE_H_ASPAT", "preprocessor", "ASP <% ... %>",
677 17, "SCE_H_CDATA", "literal", "CDATA",
678 18, "SCE_H_QUESTION", "preprocessor", "PHP",
679 19, "SCE_H_VALUE", "literal string", "Unquoted values",
680 20, "SCE_H_XCCOMMENT", "comment", "JSP Comment <%-- ... --%>",
681 21, "SCE_H_SGML_DEFAULT", "default", "SGML tags <! ... >",
682 22, "SCE_H_SGML_COMMAND", "preprocessor", "SGML command",
683 23, "SCE_H_SGML_1ST_PARAM", "preprocessor", "SGML 1st param",
684 24, "SCE_H_SGML_DOUBLESTRING", "literal string", "SGML double string",
685 25, "SCE_H_SGML_SIMPLESTRING", "literal string", "SGML single string",
686 26, "SCE_H_SGML_ERROR", "error", "SGML error",
687 27, "SCE_H_SGML_SPECIAL", "literal", "SGML special (#XXXX type)",
688 28, "SCE_H_SGML_ENTITY", "literal", "SGML entity",
689 29, "SCE_H_SGML_COMMENT", "comment", "SGML comment",
690 30, "SCE_H_SGML_1ST_PARAM_COMMENT", "error comment", "SGML first parameter - lexer internal. It is an error if any text is in this style.",
691 31, "SCE_H_SGML_BLOCK_DEFAULT", "default", "SGML block",
692 32, "", "predefined", "",
693 33, "", "predefined", "",
694 34, "", "predefined", "",
695 35, "", "predefined", "",
696 36, "", "predefined", "",
697 37, "", "predefined", "",
698 38, "", "predefined", "",
699 39, "", "predefined", "",
700 40, "SCE_HJ_START", "client javascript default", "JS Start - allows eol filled background to not start on same line as SCRIPT tag",
701 41, "SCE_HJ_DEFAULT", "client javascript default", "JS Default",
702 42, "SCE_HJ_COMMENT", "client javascript comment", "JS Comment",
703 43, "SCE_HJ_COMMENTLINE", "client javascript comment line", "JS Line Comment",
704 44, "SCE_HJ_COMMENTDOC", "client javascript comment documentation", "JS Doc comment",
705 45, "SCE_HJ_NUMBER", "client javascript literal numeric", "JS Number",
706 46, "SCE_HJ_WORD", "client javascript identifier", "JS Word",
707 47, "SCE_HJ_KEYWORD", "client javascript keyword", "JS Keyword",
708 48, "SCE_HJ_DOUBLESTRING", "client javascript literal string", "JS Double quoted string",
709 49, "SCE_HJ_SINGLESTRING", "client javascript literal string", "JS Single quoted string",
710 50, "SCE_HJ_SYMBOLS", "client javascript operator", "JS Symbols",
711 51, "SCE_HJ_STRINGEOL", "client javascript error literal string", "JavaScript EOL",
712 52, "SCE_HJ_REGEX", "client javascript literal regex", "JavaScript RegEx",
713 53, "", "unused", "",
714 54, "", "unused", "",
715 55, "SCE_HJA_START", "server javascript default", "JS Start - allows eol filled background to not start on same line as SCRIPT tag",
716 56, "SCE_HJA_DEFAULT", "server javascript default", "JS Default",
717 57, "SCE_HJA_COMMENT", "server javascript comment", "JS Comment",
718 58, "SCE_HJA_COMMENTLINE", "server javascript comment line", "JS Line Comment",
719 59, "SCE_HJA_COMMENTDOC", "server javascript comment documentation", "JS Doc comment",
720 60, "SCE_HJA_NUMBER", "server javascript literal numeric", "JS Number",
721 61, "SCE_HJA_WORD", "server javascript identifier", "JS Word",
722 62, "SCE_HJA_KEYWORD", "server javascript keyword", "JS Keyword",
723 63, "SCE_HJA_DOUBLESTRING", "server javascript literal string", "JS Double quoted string",
724 64, "SCE_HJA_SINGLESTRING", "server javascript literal string", "JS Single quoted string",
725 65, "SCE_HJA_SYMBOLS", "server javascript operator", "JS Symbols",
726 66, "SCE_HJA_STRINGEOL", "server javascript error literal string", "JavaScript EOL",
727 67, "SCE_HJA_REGEX", "server javascript literal regex", "JavaScript RegEx",
728 68, "", "unused", "",
729 69, "", "unused", "",
730 70, "SCE_HB_START", "client basic default", "Start",
731 71, "SCE_HB_DEFAULT", "client basic default", "Default",
732 72, "SCE_HB_COMMENTLINE", "client basic comment line", "Comment",
733 73, "SCE_HB_NUMBER", "client basic literal numeric", "Number",
734 74, "SCE_HB_WORD", "client basic keyword", "KeyWord",
735 75, "SCE_HB_STRING", "client basic literal string", "String",
736 76, "SCE_HB_IDENTIFIER", "client basic identifier", "Identifier",
737 77, "SCE_HB_STRINGEOL", "client basic literal string", "Unterminated string",
738 78, "", "unused", "",
739 79, "", "unused", "",
740 80, "SCE_HBA_START", "server basic default", "Start",
741 81, "SCE_HBA_DEFAULT", "server basic default", "Default",
742 82, "SCE_HBA_COMMENTLINE", "server basic comment line", "Comment",
743 83, "SCE_HBA_NUMBER", "server basic literal numeric", "Number",
744 84, "SCE_HBA_WORD", "server basic keyword", "KeyWord",
745 85, "SCE_HBA_STRING", "server basic literal string", "String",
746 86, "SCE_HBA_IDENTIFIER", "server basic identifier", "Identifier",
747 87, "SCE_HBA_STRINGEOL", "server basic literal string", "Unterminated string",
748 88, "", "unused", "",
749 89, "", "unused", "",
750 90, "SCE_HP_START", "client python default", "Embedded Python",
751 91, "SCE_HP_DEFAULT", "client python default", "Embedded Python",
752 92, "SCE_HP_COMMENTLINE", "client python comment line", "Comment",
753 93, "SCE_HP_NUMBER", "client python literal numeric", "Number",
754 94, "SCE_HP_STRING", "client python literal string", "String",
755 95, "SCE_HP_CHARACTER", "client python literal string character", "Single quoted string",
756 96, "SCE_HP_WORD", "client python keyword", "Keyword",
757 97, "SCE_HP_TRIPLE", "client python literal string", "Triple quotes",
758 98, "SCE_HP_TRIPLEDOUBLE", "client python literal string", "Triple double quotes",
759 99, "SCE_HP_CLASSNAME", "client python identifier", "Class name definition",
760 100, "SCE_HP_DEFNAME", "client python identifier", "Function or method name definition",
761 101, "SCE_HP_OPERATOR", "client python operator", "Operators",
762 102, "SCE_HP_IDENTIFIER", "client python identifier", "Identifiers",
763 103, "", "unused", "",
764 104, "SCE_HPHP_COMPLEX_VARIABLE", "server php identifier", "PHP complex variable",
765 105, "SCE_HPA_START", "server python default", "ASP Python",
766 106, "SCE_HPA_DEFAULT", "server python default", "ASP Python",
767 107, "SCE_HPA_COMMENTLINE", "server python comment line", "Comment",
768 108, "SCE_HPA_NUMBER", "server python literal numeric", "Number",
769 109, "SCE_HPA_STRING", "server python literal string", "String",
770 110, "SCE_HPA_CHARACTER", "server python literal string character", "Single quoted string",
771 111, "SCE_HPA_WORD", "server python keyword", "Keyword",
772 112, "SCE_HPA_TRIPLE", "server python literal string", "Triple quotes",
773 113, "SCE_HPA_TRIPLEDOUBLE", "server python literal string", "Triple double quotes",
774 114, "SCE_HPA_CLASSNAME", "server python identifier", "Class name definition",
775 115, "SCE_HPA_DEFNAME", "server python identifier", "Function or method name definition",
776 116, "SCE_HPA_OPERATOR", "server python operator", "Operators",
777 117, "SCE_HPA_IDENTIFIER", "server python identifier", "Identifiers",
778 118, "SCE_HPHP_DEFAULT", "server php default", "Default",
779 119, "SCE_HPHP_HSTRING", "server php literal string", "Double quoted String",
780 120, "SCE_HPHP_SIMPLESTRING", "server php literal string", "Single quoted string",
781 121, "SCE_HPHP_WORD", "server php keyword", "Keyword",
782 122, "SCE_HPHP_NUMBER", "server php literal numeric", "Number",
783 123, "SCE_HPHP_VARIABLE", "server php identifier", "Variable",
784 124, "SCE_HPHP_COMMENT", "server php comment", "Comment",
785 125, "SCE_HPHP_COMMENTLINE", "server php comment line", "One line comment",
786 126, "SCE_HPHP_HSTRING_VARIABLE", "server php literal string identifier", "PHP variable in double quoted string",
787 127, "SCE_HPHP_OPERATOR", "server php operator", "PHP operator",
788 };
789
790 LexicalClass lexicalClassesXML[] = {
791 // Lexer.Secondary XML SCLEX_XML SCE_H_:
792 0, "SCE_H_DEFAULT", "default", "Default",
793 1, "SCE_H_TAG", "tag", "Tags",
794 2, "SCE_H_TAGUNKNOWN", "error tag", "Unknown Tags",
795 3, "SCE_H_ATTRIBUTE", "attribute", "Attributes",
796 4, "SCE_H_ERRORATTRIBUTEUNKNOWN", "error attribute", "Unknown Attributes",
797 5, "SCE_H_NUMBER", "literal numeric", "Numbers",
798 6, "SCE_H_DOUBLESTRING", "literal string", "Double quoted strings",
799 7, "SCE_H_SINGLESTRING", "literal string", "Single quoted strings",
800 8, "SCE_H_OTHER", "tag operator", "Other inside tag, including space and '='",
801 9, "SCE_H_COMMENT", "comment", "Comment",
802 10, "SCE_H_ENTITY", "literal", "Entities",
803 11, "SCE_H_TAGEND", "tag", "XML style tag ends '/>'",
804 12, "SCE_H_XMLSTART", "identifier", "XML identifier start '<?'",
805 13, "SCE_H_XMLEND", "identifier", "XML identifier end '?>'",
806 14, "", "unused", "",
807 15, "", "unused", "",
808 16, "", "unused", "",
809 17, "SCE_H_CDATA", "literal", "CDATA",
810 18, "SCE_H_QUESTION", "preprocessor", "Question",
811 19, "SCE_H_VALUE", "literal string", "Unquoted Value",
812 20, "", "unused", "",
813 21, "SCE_H_SGML_DEFAULT", "default", "SGML tags <! ... >",
814 22, "SCE_H_SGML_COMMAND", "preprocessor", "SGML command",
815 23, "SCE_H_SGML_1ST_PARAM", "preprocessor", "SGML 1st param",
816 24, "SCE_H_SGML_DOUBLESTRING", "literal string", "SGML double string",
817 25, "SCE_H_SGML_SIMPLESTRING", "literal string", "SGML single string",
818 26, "SCE_H_SGML_ERROR", "error", "SGML error",
819 27, "SCE_H_SGML_SPECIAL", "literal", "SGML special (#XXXX type)",
820 28, "SCE_H_SGML_ENTITY", "literal", "SGML entity",
821 29, "SCE_H_SGML_COMMENT", "comment", "SGML comment",
822 30, "", "unused", "",
823 31, "SCE_H_SGML_BLOCK_DEFAULT", "default", "SGML block",
824 };
825
826 const char *tagsThatDoNotFold[] = {
827 "area",
828 "base",
829 "basefont",
830 "br",
831 "col",
832 "command",
833 "embed",
834 "frame",
835 "hr",
836 "img",
837 "input",
838 "isindex",
839 "keygen",
840 "link",
841 "meta",
842 "param",
843 "source",
844 "track",
845 "wbr"
846 };
847
848 }
849
850 class LexerHTML : public DefaultLexer {
851 bool isXml;
852 bool isPHPScript;
853 WordList keywords;
854 WordList keywords2;
855 WordList keywords3;
856 WordList keywords4;
857 WordList keywords5;
858 WordList keywords6; // SGML (DTD) keywords
859 OptionsHTML options;
860 OptionSetHTML osHTML;
861 std::set<std::string> nonFoldingTags;
862 public:
LexerHTML(bool isXml_,bool isPHPScript_)863 explicit LexerHTML(bool isXml_, bool isPHPScript_) :
864 DefaultLexer(
865 isXml_ ? "xml" : (isPHPScript_ ? "phpscript" : "hypertext"),
866 isXml_ ? SCLEX_XML : (isPHPScript_ ? SCLEX_PHPSCRIPT : SCLEX_HTML),
867 isXml_ ? lexicalClassesHTML : lexicalClassesXML,
868 isXml_ ? std::size(lexicalClassesHTML) : std::size(lexicalClassesXML)),
869 isXml(isXml_),
870 isPHPScript(isPHPScript_),
871 osHTML(isPHPScript_),
872 nonFoldingTags(std::begin(tagsThatDoNotFold), std::end(tagsThatDoNotFold)) {
873 }
~LexerHTML()874 ~LexerHTML() override {
875 }
Release()876 void SCI_METHOD Release() override {
877 delete this;
878 }
PropertyNames()879 const char *SCI_METHOD PropertyNames() override {
880 return osHTML.PropertyNames();
881 }
PropertyType(const char * name)882 int SCI_METHOD PropertyType(const char *name) override {
883 return osHTML.PropertyType(name);
884 }
DescribeProperty(const char * name)885 const char *SCI_METHOD DescribeProperty(const char *name) override {
886 return osHTML.DescribeProperty(name);
887 }
888 Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
PropertyGet(const char * key)889 const char * SCI_METHOD PropertyGet(const char *key) override {
890 return osHTML.PropertyGet(key);
891 }
DescribeWordListSets()892 const char *SCI_METHOD DescribeWordListSets() override {
893 return osHTML.DescribeWordListSets();
894 }
895 Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
896 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
897 // No Fold as all folding performs in Lex.
898
LexerFactoryHTML()899 static ILexer5 *LexerFactoryHTML() {
900 return new LexerHTML(false, false);
901 }
LexerFactoryXML()902 static ILexer5 *LexerFactoryXML() {
903 return new LexerHTML(true, false);
904 }
LexerFactoryPHPScript()905 static ILexer5 *LexerFactoryPHPScript() {
906 return new LexerHTML(false, true);
907 }
908 };
909
PropertySet(const char * key,const char * val)910 Sci_Position SCI_METHOD LexerHTML::PropertySet(const char *key, const char *val) {
911 if (osHTML.PropertySet(&options, key, val)) {
912 return 0;
913 }
914 return -1;
915 }
916
WordListSet(int n,const char * wl)917 Sci_Position SCI_METHOD LexerHTML::WordListSet(int n, const char *wl) {
918 WordList *wordListN = 0;
919 switch (n) {
920 case 0:
921 wordListN = &keywords;
922 break;
923 case 1:
924 wordListN = &keywords2;
925 break;
926 case 2:
927 wordListN = &keywords3;
928 break;
929 case 3:
930 wordListN = &keywords4;
931 break;
932 case 4:
933 wordListN = &keywords5;
934 break;
935 case 5:
936 wordListN = &keywords6;
937 break;
938 }
939 Sci_Position firstModification = -1;
940 if (wordListN) {
941 WordList wlNew;
942 wlNew.Set(wl);
943 if (*wordListN != wlNew) {
944 wordListN->Set(wl);
945 firstModification = 0;
946 }
947 }
948 return firstModification;
949 }
950
Lex(Sci_PositionU startPos,Sci_Position length,int initStyle,IDocument * pAccess)951 void SCI_METHOD LexerHTML::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
952 Accessor styler(pAccess, nullptr);
953 if (isPHPScript && (startPos == 0)) {
954 initStyle = SCE_HPHP_DEFAULT;
955 }
956 styler.StartAt(startPos);
957 std::string prevWord;
958 std::string phpStringDelimiter;
959 int StateToPrint = initStyle;
960 int state = stateForPrintState(StateToPrint);
961 std::string makoBlockType;
962 int makoComment = 0;
963 std::string djangoBlockType;
964 // If inside a tag, it may be a script tag, so reread from the start of line starting tag to ensure any language tags are seen
965 if (InTagState(state)) {
966 while ((startPos > 0) && (InTagState(styler.StyleAt(startPos - 1)))) {
967 const Sci_Position backLineStart = styler.LineStart(styler.GetLine(startPos-1));
968 length += startPos - backLineStart;
969 startPos = backLineStart;
970 }
971 state = SCE_H_DEFAULT;
972 }
973 // String can be heredoc, must find a delimiter first. Reread from beginning of line containing the string, to get the correct lineState
974 if (isPHPStringState(state)) {
975 while (startPos > 0 && (isPHPStringState(state) || !isLineEnd(styler[startPos - 1]))) {
976 startPos--;
977 length++;
978 state = styler.StyleAt(startPos);
979 }
980 if (startPos == 0)
981 state = SCE_H_DEFAULT;
982 }
983 styler.StartAt(startPos);
984
985 /* Nothing handles getting out of these, so we need not start in any of them.
986 * As we're at line start and they can't span lines, we'll re-detect them anyway */
987 switch (state) {
988 case SCE_H_QUESTION:
989 case SCE_H_XMLSTART:
990 case SCE_H_XMLEND:
991 case SCE_H_ASP:
992 state = SCE_H_DEFAULT;
993 break;
994 }
995
996 Sci_Position lineCurrent = styler.GetLine(startPos);
997 int lineState;
998 if (lineCurrent > 0) {
999 lineState = styler.GetLineState(lineCurrent-1);
1000 } else {
1001 // Default client and ASP scripting language is JavaScript
1002 lineState = eScriptJS << 8;
1003 lineState |= options.aspDefaultLanguage << 4;
1004 }
1005 script_mode inScriptType = static_cast<script_mode>((lineState >> 0) & 0x03); // 2 bits of scripting mode
1006
1007 bool tagOpened = (lineState >> 2) & 0x01; // 1 bit to know if we are in an opened tag
1008 bool tagClosing = (lineState >> 3) & 0x01; // 1 bit to know if we are in a closing tag
1009 bool tagDontFold = false; //some HTML tags should not be folded
1010 script_type aspScript = static_cast<script_type>((lineState >> 4) & 0x0F); // 4 bits of script name
1011 script_type clientScript = static_cast<script_type>((lineState >> 8) & 0x0F); // 4 bits of script name
1012 int beforePreProc = (lineState >> 12) & 0xFF; // 8 bits of state
1013
1014 script_type scriptLanguage = ScriptOfState(state);
1015 // If eNonHtmlScript coincides with SCE_H_COMMENT, assume eScriptComment
1016 if (inScriptType == eNonHtmlScript && state == SCE_H_COMMENT) {
1017 scriptLanguage = eScriptComment;
1018 }
1019 script_type beforeLanguage = ScriptOfState(beforePreProc);
1020 const bool foldHTML = options.foldHTML;
1021 const bool fold = foldHTML && options.fold;
1022 const bool foldHTMLPreprocessor = foldHTML && options.foldHTMLPreprocessor;
1023 const bool foldCompact = options.foldCompact;
1024 const bool foldComment = fold && options.foldComment;
1025 const bool foldHeredoc = fold && options.foldHeredoc;
1026 const bool foldXmlAtTagOpen = isXml && fold && options.foldXmlAtTagOpen;
1027 const bool caseSensitive = options.caseSensitive;
1028 const bool allowScripts = options.allowScripts;
1029 const bool isMako = options.isMako;
1030 const bool isDjango = options.isDjango;
1031 const CharacterSet setHTMLWord(CharacterSet::setAlphaNum, ".-_:!#", 0x80, true);
1032 const CharacterSet setTagContinue(CharacterSet::setAlphaNum, ".-_:!#[", 0x80, true);
1033 const CharacterSet setAttributeContinue(CharacterSet::setAlphaNum, ".-_:!#/", 0x80, true);
1034 // TODO: also handle + and - (except if they're part of ++ or --) and return keywords
1035 const CharacterSet setOKBeforeJSRE(CharacterSet::setNone, "([{=,:;!%^&*|?~");
1036
1037 int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
1038 int levelCurrent = levelPrev;
1039 int visibleChars = 0;
1040 int lineStartVisibleChars = 0;
1041
1042 int chPrev = ' ';
1043 int ch = ' ';
1044 int chPrevNonWhite = ' ';
1045 // look back to set chPrevNonWhite properly for better regex colouring
1046 if (scriptLanguage == eScriptJS && startPos > 0) {
1047 Sci_Position back = startPos;
1048 int style = 0;
1049 while (--back) {
1050 style = styler.StyleAt(back);
1051 if (style < SCE_HJ_DEFAULT || style > SCE_HJ_COMMENTDOC)
1052 // includes SCE_HJ_COMMENT & SCE_HJ_COMMENTLINE
1053 break;
1054 }
1055 if (style == SCE_HJ_SYMBOLS) {
1056 chPrevNonWhite = static_cast<unsigned char>(styler.SafeGetCharAt(back));
1057 }
1058 }
1059
1060 styler.StartSegment(startPos);
1061 const Sci_Position lengthDoc = startPos + length;
1062 for (Sci_Position i = startPos; i < lengthDoc; i++) {
1063 const int chPrev2 = chPrev;
1064 chPrev = ch;
1065 if (!IsASpace(ch) && state != SCE_HJ_COMMENT &&
1066 state != SCE_HJ_COMMENTLINE && state != SCE_HJ_COMMENTDOC)
1067 chPrevNonWhite = ch;
1068 ch = static_cast<unsigned char>(styler[i]);
1069 int chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
1070 const int chNext2 = static_cast<unsigned char>(styler.SafeGetCharAt(i + 2));
1071
1072 // Handle DBCS codepages
1073 if (styler.IsLeadByte(static_cast<char>(ch))) {
1074 chPrev = ' ';
1075 i += 1;
1076 continue;
1077 }
1078
1079 if ((!IsASpace(ch) || !foldCompact) && fold)
1080 visibleChars++;
1081 if (!IsASpace(ch))
1082 lineStartVisibleChars++;
1083
1084 // decide what is the current state to print (depending of the script tag)
1085 StateToPrint = statePrintForState(state, inScriptType);
1086
1087 // handle script folding
1088 if (fold) {
1089 switch (scriptLanguage) {
1090 case eScriptJS:
1091 case eScriptPHP:
1092 //not currently supported case eScriptVBS:
1093
1094 if ((state != SCE_HPHP_COMMENT) && (state != SCE_HPHP_COMMENTLINE) && (state != SCE_HJ_COMMENT) && (state != SCE_HJ_COMMENTLINE) && (state != SCE_HJ_COMMENTDOC) && (!isStringState(state))) {
1095 //Platform::DebugPrintf("state=%d, StateToPrint=%d, initStyle=%d\n", state, StateToPrint, initStyle);
1096 //if ((state == SCE_HPHP_OPERATOR) || (state == SCE_HPHP_DEFAULT) || (state == SCE_HJ_SYMBOLS) || (state == SCE_HJ_START) || (state == SCE_HJ_DEFAULT)) {
1097 if (ch == '#') {
1098 Sci_Position j = i + 1;
1099 while ((j < lengthDoc) && IsASpaceOrTab(styler.SafeGetCharAt(j))) {
1100 j++;
1101 }
1102 if (styler.Match(j, "region") || styler.Match(j, "if")) {
1103 levelCurrent++;
1104 } else if (styler.Match(j, "end")) {
1105 levelCurrent--;
1106 }
1107 } else if ((ch == '{') || (ch == '}') || (foldComment && (ch == '/') && (chNext == '*'))) {
1108 levelCurrent += (((ch == '{') || (ch == '/')) ? 1 : -1);
1109 }
1110 } else if (((state == SCE_HPHP_COMMENT) || (state == SCE_HJ_COMMENT)) && foldComment && (ch == '*') && (chNext == '/')) {
1111 levelCurrent--;
1112 }
1113 break;
1114 case eScriptPython:
1115 if (state != SCE_HP_COMMENTLINE && !isMako) {
1116 if ((ch == ':') && ((chNext == '\n') || (chNext == '\r' && chNext2 == '\n'))) {
1117 levelCurrent++;
1118 } else if ((ch == '\n') && !((chNext == '\r') && (chNext2 == '\n')) && (chNext != '\n')) {
1119 // check if the number of tabs is lower than the level
1120 int Findlevel = (levelCurrent & ~SC_FOLDLEVELBASE) * 8;
1121 for (Sci_Position j = 0; Findlevel > 0; j++) {
1122 const char chTmp = styler.SafeGetCharAt(i + j + 1);
1123 if (chTmp == '\t') {
1124 Findlevel -= 8;
1125 } else if (chTmp == ' ') {
1126 Findlevel--;
1127 } else {
1128 break;
1129 }
1130 }
1131
1132 if (Findlevel > 0) {
1133 levelCurrent -= Findlevel / 8;
1134 if (Findlevel % 8)
1135 levelCurrent--;
1136 }
1137 }
1138 }
1139 break;
1140 default:
1141 break;
1142 }
1143 }
1144
1145 if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
1146 // Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
1147 // Avoid triggering two times on Dos/Win
1148 // New line -> record any line state onto /next/ line
1149 if (fold) {
1150 int lev = levelPrev;
1151 if (visibleChars == 0)
1152 lev |= SC_FOLDLEVELWHITEFLAG;
1153 if ((levelCurrent > levelPrev) && (visibleChars > 0))
1154 lev |= SC_FOLDLEVELHEADERFLAG;
1155
1156 styler.SetLevel(lineCurrent, lev);
1157 visibleChars = 0;
1158 levelPrev = levelCurrent;
1159 }
1160 styler.SetLineState(lineCurrent,
1161 ((inScriptType & 0x03) << 0) |
1162 ((tagOpened ? 1 : 0) << 2) |
1163 ((tagClosing ? 1 : 0) << 3) |
1164 ((aspScript & 0x0F) << 4) |
1165 ((clientScript & 0x0F) << 8) |
1166 ((beforePreProc & 0xFF) << 12));
1167 lineCurrent++;
1168 lineStartVisibleChars = 0;
1169 }
1170
1171 // handle start of Mako comment line
1172 if (isMako && ch == '#' && chNext == '#') {
1173 makoComment = 1;
1174 state = SCE_HP_COMMENTLINE;
1175 }
1176
1177 // handle end of Mako comment line
1178 else if (isMako && makoComment && (ch == '\r' || ch == '\n')) {
1179 makoComment = 0;
1180 styler.ColourTo(i - 1, StateToPrint);
1181 if (scriptLanguage == eScriptPython) {
1182 state = SCE_HP_DEFAULT;
1183 } else {
1184 state = SCE_H_DEFAULT;
1185 }
1186 }
1187 // Allow falling through to mako handling code if newline is going to end a block
1188 if (((ch == '\r' && chNext != '\n') || (ch == '\n')) &&
1189 (!isMako || (makoBlockType != "%"))) {
1190 }
1191 // Ignore everything in mako comment until the line ends
1192 else if (isMako && makoComment) {
1193 }
1194
1195 // generic end of script processing
1196 else if ((inScriptType == eNonHtmlScript) && (ch == '<') && (chNext == '/')) {
1197 // Check if it's the end of the script tag (or any other HTML tag)
1198 switch (state) {
1199 // in these cases, you can embed HTML tags (to confirm !!!!!!!!!!!!!!!!!!!!!!)
1200 case SCE_H_DOUBLESTRING:
1201 case SCE_H_SINGLESTRING:
1202 case SCE_HJ_COMMENT:
1203 case SCE_HJ_COMMENTDOC:
1204 //case SCE_HJ_COMMENTLINE: // removed as this is a common thing done to hide
1205 // the end of script marker from some JS interpreters.
1206 case SCE_HB_COMMENTLINE:
1207 case SCE_HBA_COMMENTLINE:
1208 case SCE_HJ_DOUBLESTRING:
1209 case SCE_HJ_SINGLESTRING:
1210 case SCE_HJ_REGEX:
1211 case SCE_HB_STRING:
1212 case SCE_HBA_STRING:
1213 case SCE_HP_STRING:
1214 case SCE_HP_TRIPLE:
1215 case SCE_HP_TRIPLEDOUBLE:
1216 case SCE_HPHP_HSTRING:
1217 case SCE_HPHP_SIMPLESTRING:
1218 case SCE_HPHP_COMMENT:
1219 case SCE_HPHP_COMMENTLINE:
1220 break;
1221 default :
1222 // check if the closing tag is a script tag
1223 if (const char *tag =
1224 state == SCE_HJ_COMMENTLINE || isXml ? "script" :
1225 state == SCE_H_COMMENT ? "comment" : 0) {
1226 Sci_Position j = i + 2;
1227 int chr;
1228 do {
1229 chr = static_cast<int>(*tag++);
1230 } while (chr != 0 && chr == MakeLowerCase(styler.SafeGetCharAt(j++)));
1231 if (chr != 0) break;
1232 }
1233 // closing tag of the script (it's a closing HTML tag anyway)
1234 styler.ColourTo(i - 1, StateToPrint);
1235 state = SCE_H_TAGUNKNOWN;
1236 inScriptType = eHtml;
1237 scriptLanguage = eScriptNone;
1238 clientScript = eScriptJS;
1239 i += 2;
1240 visibleChars += 2;
1241 tagClosing = true;
1242 if (foldXmlAtTagOpen) {
1243 levelCurrent--;
1244 }
1245 continue;
1246 }
1247 }
1248
1249 /////////////////////////////////////
1250 // handle the start of PHP pre-processor = Non-HTML
1251 else if ((state != SCE_H_ASPAT) &&
1252 !isStringState(state) &&
1253 (state != SCE_HPHP_COMMENT) &&
1254 (state != SCE_HPHP_COMMENTLINE) &&
1255 (ch == '<') &&
1256 (chNext == '?') &&
1257 !IsScriptCommentState(state)) {
1258 beforeLanguage = scriptLanguage;
1259 scriptLanguage = segIsScriptingIndicator(styler, i + 2, i + 6, isXml ? eScriptXML : eScriptPHP);
1260 if ((scriptLanguage != eScriptPHP) && (isStringState(state) || (state==SCE_H_COMMENT))) continue;
1261 styler.ColourTo(i - 1, StateToPrint);
1262 beforePreProc = state;
1263 i++;
1264 visibleChars++;
1265 i += PrintScriptingIndicatorOffset(styler, styler.GetStartSegment() + 2, i + 6);
1266 if (scriptLanguage == eScriptXML)
1267 styler.ColourTo(i, SCE_H_XMLSTART);
1268 else
1269 styler.ColourTo(i, SCE_H_QUESTION);
1270 state = StateForScript(scriptLanguage);
1271 if (inScriptType == eNonHtmlScript)
1272 inScriptType = eNonHtmlScriptPreProc;
1273 else
1274 inScriptType = eNonHtmlPreProc;
1275 // Fold whole script, but not if the XML first tag (all XML-like tags in this case)
1276 if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
1277 levelCurrent++;
1278 }
1279 // should be better
1280 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1281 continue;
1282 }
1283
1284 // handle the start Mako template Python code
1285 else if (isMako && scriptLanguage == eScriptNone && ((ch == '<' && chNext == '%') ||
1286 (lineStartVisibleChars == 1 && ch == '%') ||
1287 (lineStartVisibleChars == 1 && ch == '/' && chNext == '%') ||
1288 (ch == '$' && chNext == '{') ||
1289 (ch == '<' && chNext == '/' && chNext2 == '%'))) {
1290 if (ch == '%' || ch == '/')
1291 makoBlockType = "%";
1292 else if (ch == '$')
1293 makoBlockType = "{";
1294 else if (chNext == '/')
1295 makoBlockType = GetNextWord(styler, i+3);
1296 else
1297 makoBlockType = GetNextWord(styler, i+2);
1298 styler.ColourTo(i - 1, StateToPrint);
1299 beforePreProc = state;
1300 if (inScriptType == eNonHtmlScript)
1301 inScriptType = eNonHtmlScriptPreProc;
1302 else
1303 inScriptType = eNonHtmlPreProc;
1304
1305 if (chNext == '/') {
1306 i += 2;
1307 visibleChars += 2;
1308 } else if (ch != '%') {
1309 i++;
1310 visibleChars++;
1311 }
1312 state = SCE_HP_START;
1313 scriptLanguage = eScriptPython;
1314 styler.ColourTo(i, SCE_H_ASP);
1315 if (ch != '%' && ch != '$' && ch != '/') {
1316 i += makoBlockType.length();
1317 visibleChars += static_cast<int>(makoBlockType.length());
1318 if (keywords4.InList(makoBlockType.c_str()))
1319 styler.ColourTo(i, SCE_HP_WORD);
1320 else
1321 styler.ColourTo(i, SCE_H_TAGUNKNOWN);
1322 }
1323
1324 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1325 continue;
1326 }
1327
1328 // handle the start/end of Django comment
1329 else if (isDjango && state != SCE_H_COMMENT && (ch == '{' && chNext == '#')) {
1330 styler.ColourTo(i - 1, StateToPrint);
1331 beforePreProc = state;
1332 beforeLanguage = scriptLanguage;
1333 if (inScriptType == eNonHtmlScript)
1334 inScriptType = eNonHtmlScriptPreProc;
1335 else
1336 inScriptType = eNonHtmlPreProc;
1337 i += 1;
1338 visibleChars += 1;
1339 scriptLanguage = eScriptComment;
1340 state = SCE_H_COMMENT;
1341 styler.ColourTo(i, SCE_H_ASP);
1342 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1343 continue;
1344 } else if (isDjango && state == SCE_H_COMMENT && (ch == '#' && chNext == '}')) {
1345 styler.ColourTo(i - 1, StateToPrint);
1346 i += 1;
1347 visibleChars += 1;
1348 styler.ColourTo(i, SCE_H_ASP);
1349 state = beforePreProc;
1350 if (inScriptType == eNonHtmlScriptPreProc)
1351 inScriptType = eNonHtmlScript;
1352 else
1353 inScriptType = eHtml;
1354 scriptLanguage = beforeLanguage;
1355 continue;
1356 }
1357
1358 // handle the start Django template code
1359 else if (isDjango && scriptLanguage != eScriptPython && scriptLanguage != eScriptComment && (ch == '{' && (chNext == '%' || chNext == '{'))) {
1360 if (chNext == '%')
1361 djangoBlockType = "%";
1362 else
1363 djangoBlockType = "{";
1364 styler.ColourTo(i - 1, StateToPrint);
1365 beforePreProc = state;
1366 if (inScriptType == eNonHtmlScript)
1367 inScriptType = eNonHtmlScriptPreProc;
1368 else
1369 inScriptType = eNonHtmlPreProc;
1370
1371 i += 1;
1372 visibleChars += 1;
1373 state = SCE_HP_START;
1374 beforeLanguage = scriptLanguage;
1375 scriptLanguage = eScriptPython;
1376 styler.ColourTo(i, SCE_H_ASP);
1377
1378 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1379 continue;
1380 }
1381
1382 // handle the start of ASP pre-processor = Non-HTML
1383 else if (!isMako && !isDjango && !isCommentASPState(state) && (ch == '<') && (chNext == '%') && !isPHPStringState(state)) {
1384 styler.ColourTo(i - 1, StateToPrint);
1385 beforePreProc = state;
1386 if (inScriptType == eNonHtmlScript)
1387 inScriptType = eNonHtmlScriptPreProc;
1388 else
1389 inScriptType = eNonHtmlPreProc;
1390
1391 if (chNext2 == '@') {
1392 i += 2; // place as if it was the second next char treated
1393 visibleChars += 2;
1394 state = SCE_H_ASPAT;
1395 } else if ((chNext2 == '-') && (styler.SafeGetCharAt(i + 3) == '-')) {
1396 styler.ColourTo(i + 3, SCE_H_ASP);
1397 state = SCE_H_XCCOMMENT;
1398 scriptLanguage = eScriptVBS;
1399 continue;
1400 } else {
1401 if (chNext2 == '=') {
1402 i += 2; // place as if it was the second next char treated
1403 visibleChars += 2;
1404 } else {
1405 i++; // place as if it was the next char treated
1406 visibleChars++;
1407 }
1408
1409 state = StateForScript(aspScript);
1410 }
1411 scriptLanguage = eScriptVBS;
1412 styler.ColourTo(i, SCE_H_ASP);
1413 // fold whole script
1414 if (foldHTMLPreprocessor)
1415 levelCurrent++;
1416 // should be better
1417 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1418 continue;
1419 }
1420
1421 /////////////////////////////////////
1422 // handle the start of SGML language (DTD)
1423 else if (((scriptLanguage == eScriptNone) || (scriptLanguage == eScriptXML)) &&
1424 (chPrev == '<') &&
1425 (ch == '!') &&
1426 (StateToPrint != SCE_H_CDATA) &&
1427 (!IsCommentState(StateToPrint)) &&
1428 (!IsScriptCommentState(StateToPrint))) {
1429 beforePreProc = state;
1430 styler.ColourTo(i - 2, StateToPrint);
1431 if ((chNext == '-') && (chNext2 == '-')) {
1432 state = SCE_H_COMMENT; // wait for a pending command
1433 styler.ColourTo(i + 2, SCE_H_COMMENT);
1434 i += 2; // follow styling after the --
1435 } else if (isWordCdata(i + 1, i + 7, styler)) {
1436 state = SCE_H_CDATA;
1437 } else {
1438 styler.ColourTo(i, SCE_H_SGML_DEFAULT); // <! is default
1439 scriptLanguage = eScriptSGML;
1440 state = SCE_H_SGML_COMMAND; // wait for a pending command
1441 }
1442 // fold whole tag (-- when closing the tag)
1443 if (foldHTMLPreprocessor || state == SCE_H_COMMENT || state == SCE_H_CDATA)
1444 levelCurrent++;
1445 continue;
1446 }
1447
1448 // handle the end of Mako Python code
1449 else if (isMako &&
1450 ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1451 (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
1452 isMakoBlockEnd(ch, chNext, makoBlockType)) {
1453 if (state == SCE_H_ASPAT) {
1454 aspScript = segIsScriptingIndicator(styler,
1455 styler.GetStartSegment(), i - 1, aspScript);
1456 }
1457 if (state == SCE_HP_WORD) {
1458 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
1459 } else {
1460 styler.ColourTo(i - 1, StateToPrint);
1461 }
1462 if ((makoBlockType != "%") && (makoBlockType != "{") && ch != '>') {
1463 i++;
1464 visibleChars++;
1465 }
1466 else if ((makoBlockType == "%") && ch == '/') {
1467 i++;
1468 visibleChars++;
1469 }
1470 if ((makoBlockType != "%") || ch == '/') {
1471 styler.ColourTo(i, SCE_H_ASP);
1472 }
1473 state = beforePreProc;
1474 if (inScriptType == eNonHtmlScriptPreProc)
1475 inScriptType = eNonHtmlScript;
1476 else
1477 inScriptType = eHtml;
1478 scriptLanguage = eScriptNone;
1479 continue;
1480 }
1481
1482 // handle the end of Django template code
1483 else if (isDjango &&
1484 ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1485 (scriptLanguage != eScriptNone) && stateAllowsTermination(state) &&
1486 isDjangoBlockEnd(ch, chNext, djangoBlockType)) {
1487 if (state == SCE_H_ASPAT) {
1488 aspScript = segIsScriptingIndicator(styler,
1489 styler.GetStartSegment(), i - 1, aspScript);
1490 }
1491 if (state == SCE_HP_WORD) {
1492 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
1493 } else {
1494 styler.ColourTo(i - 1, StateToPrint);
1495 }
1496 i += 1;
1497 visibleChars += 1;
1498 styler.ColourTo(i, SCE_H_ASP);
1499 state = beforePreProc;
1500 if (inScriptType == eNonHtmlScriptPreProc)
1501 inScriptType = eNonHtmlScript;
1502 else
1503 inScriptType = eHtml;
1504 scriptLanguage = beforeLanguage;
1505 continue;
1506 }
1507
1508 // handle the end of a pre-processor = Non-HTML
1509 else if ((!isMako && !isDjango && ((inScriptType == eNonHtmlPreProc) || (inScriptType == eNonHtmlScriptPreProc)) &&
1510 (((scriptLanguage != eScriptNone) && stateAllowsTermination(state))) &&
1511 (((ch == '%') || (ch == '?')) && (chNext == '>'))) ||
1512 ((scriptLanguage == eScriptSGML) && (ch == '>') && (state != SCE_H_SGML_COMMENT))) {
1513 if (state == SCE_H_ASPAT) {
1514 aspScript = segIsScriptingIndicator(styler,
1515 styler.GetStartSegment(), i - 1, aspScript);
1516 }
1517 // Bounce out of any ASP mode
1518 switch (state) {
1519 case SCE_HJ_WORD:
1520 classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
1521 break;
1522 case SCE_HB_WORD:
1523 classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
1524 break;
1525 case SCE_HP_WORD:
1526 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
1527 break;
1528 case SCE_HPHP_WORD:
1529 classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
1530 break;
1531 case SCE_H_XCCOMMENT:
1532 styler.ColourTo(i - 1, state);
1533 break;
1534 default :
1535 styler.ColourTo(i - 1, StateToPrint);
1536 break;
1537 }
1538 if (scriptLanguage != eScriptSGML) {
1539 i++;
1540 visibleChars++;
1541 }
1542 if (ch == '%')
1543 styler.ColourTo(i, SCE_H_ASP);
1544 else if (scriptLanguage == eScriptXML)
1545 styler.ColourTo(i, SCE_H_XMLEND);
1546 else if (scriptLanguage == eScriptSGML)
1547 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1548 else
1549 styler.ColourTo(i, SCE_H_QUESTION);
1550 state = beforePreProc;
1551 if (inScriptType == eNonHtmlScriptPreProc)
1552 inScriptType = eNonHtmlScript;
1553 else
1554 inScriptType = eHtml;
1555 // Unfold all scripting languages, except for XML tag
1556 if (foldHTMLPreprocessor && (scriptLanguage != eScriptXML)) {
1557 levelCurrent--;
1558 }
1559 scriptLanguage = beforeLanguage;
1560 continue;
1561 }
1562 /////////////////////////////////////
1563
1564 switch (state) {
1565 case SCE_H_DEFAULT:
1566 if (ch == '<') {
1567 // in HTML, fold on tag open and unfold on tag close
1568 tagOpened = true;
1569 tagClosing = (chNext == '/');
1570 if (foldXmlAtTagOpen && !(chNext == '/' || chNext == '?' || chNext == '!' || chNext == '-' || chNext == '%')) {
1571 levelCurrent++;
1572 }
1573 if (foldXmlAtTagOpen && chNext == '/') {
1574 levelCurrent--;
1575 }
1576 styler.ColourTo(i - 1, StateToPrint);
1577 if (chNext != '!')
1578 state = SCE_H_TAGUNKNOWN;
1579 } else if (ch == '&') {
1580 styler.ColourTo(i - 1, SCE_H_DEFAULT);
1581 state = SCE_H_ENTITY;
1582 }
1583 break;
1584 case SCE_H_SGML_DEFAULT:
1585 case SCE_H_SGML_BLOCK_DEFAULT:
1586 // if (scriptLanguage == eScriptSGMLblock)
1587 // StateToPrint = SCE_H_SGML_BLOCK_DEFAULT;
1588
1589 if (ch == '\"') {
1590 styler.ColourTo(i - 1, StateToPrint);
1591 state = SCE_H_SGML_DOUBLESTRING;
1592 } else if (ch == '\'') {
1593 styler.ColourTo(i - 1, StateToPrint);
1594 state = SCE_H_SGML_SIMPLESTRING;
1595 } else if ((ch == '-') && (chPrev == '-')) {
1596 if (static_cast<Sci_Position>(styler.GetStartSegment()) <= (i - 2)) {
1597 styler.ColourTo(i - 2, StateToPrint);
1598 }
1599 state = SCE_H_SGML_COMMENT;
1600 } else if (IsASCII(ch) && isalpha(ch) && (chPrev == '%')) {
1601 styler.ColourTo(i - 2, StateToPrint);
1602 state = SCE_H_SGML_ENTITY;
1603 } else if (ch == '#') {
1604 styler.ColourTo(i - 1, StateToPrint);
1605 state = SCE_H_SGML_SPECIAL;
1606 } else if (ch == '[') {
1607 styler.ColourTo(i - 1, StateToPrint);
1608 scriptLanguage = eScriptSGMLblock;
1609 state = SCE_H_SGML_BLOCK_DEFAULT;
1610 } else if (ch == ']') {
1611 if (scriptLanguage == eScriptSGMLblock) {
1612 styler.ColourTo(i, StateToPrint);
1613 scriptLanguage = eScriptSGML;
1614 } else {
1615 styler.ColourTo(i - 1, StateToPrint);
1616 styler.ColourTo(i, SCE_H_SGML_ERROR);
1617 }
1618 state = SCE_H_SGML_DEFAULT;
1619 } else if (scriptLanguage == eScriptSGMLblock) {
1620 if ((ch == '!') && (chPrev == '<')) {
1621 styler.ColourTo(i - 2, StateToPrint);
1622 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1623 state = SCE_H_SGML_COMMAND;
1624 } else if (ch == '>') {
1625 styler.ColourTo(i - 1, StateToPrint);
1626 styler.ColourTo(i, SCE_H_SGML_DEFAULT);
1627 }
1628 }
1629 break;
1630 case SCE_H_SGML_COMMAND:
1631 if ((ch == '-') && (chPrev == '-')) {
1632 styler.ColourTo(i - 2, StateToPrint);
1633 state = SCE_H_SGML_COMMENT;
1634 } else if (!issgmlwordchar(ch)) {
1635 if (isWordHSGML(styler.GetStartSegment(), i - 1, keywords6, styler)) {
1636 styler.ColourTo(i - 1, StateToPrint);
1637 state = SCE_H_SGML_1ST_PARAM;
1638 } else {
1639 state = SCE_H_SGML_ERROR;
1640 }
1641 }
1642 break;
1643 case SCE_H_SGML_1ST_PARAM:
1644 // wait for the beginning of the word
1645 if ((ch == '-') && (chPrev == '-')) {
1646 if (scriptLanguage == eScriptSGMLblock) {
1647 styler.ColourTo(i - 2, SCE_H_SGML_BLOCK_DEFAULT);
1648 } else {
1649 styler.ColourTo(i - 2, SCE_H_SGML_DEFAULT);
1650 }
1651 state = SCE_H_SGML_1ST_PARAM_COMMENT;
1652 } else if (issgmlwordchar(ch)) {
1653 if (scriptLanguage == eScriptSGMLblock) {
1654 styler.ColourTo(i - 1, SCE_H_SGML_BLOCK_DEFAULT);
1655 } else {
1656 styler.ColourTo(i - 1, SCE_H_SGML_DEFAULT);
1657 }
1658 // find the length of the word
1659 int size = 1;
1660 while (setHTMLWord.Contains(static_cast<unsigned char>(styler.SafeGetCharAt(i + size))))
1661 size++;
1662 styler.ColourTo(i + size - 1, StateToPrint);
1663 i += size - 1;
1664 visibleChars += size - 1;
1665 ch = static_cast<unsigned char>(styler.SafeGetCharAt(i));
1666 if (scriptLanguage == eScriptSGMLblock) {
1667 state = SCE_H_SGML_BLOCK_DEFAULT;
1668 } else {
1669 state = SCE_H_SGML_DEFAULT;
1670 }
1671 continue;
1672 }
1673 break;
1674 case SCE_H_SGML_ERROR:
1675 if ((ch == '-') && (chPrev == '-')) {
1676 styler.ColourTo(i - 2, StateToPrint);
1677 state = SCE_H_SGML_COMMENT;
1678 }
1679 break;
1680 case SCE_H_SGML_DOUBLESTRING:
1681 if (ch == '\"') {
1682 styler.ColourTo(i, StateToPrint);
1683 state = SCE_H_SGML_DEFAULT;
1684 }
1685 break;
1686 case SCE_H_SGML_SIMPLESTRING:
1687 if (ch == '\'') {
1688 styler.ColourTo(i, StateToPrint);
1689 state = SCE_H_SGML_DEFAULT;
1690 }
1691 break;
1692 case SCE_H_SGML_COMMENT:
1693 if ((ch == '-') && (chPrev == '-')) {
1694 styler.ColourTo(i, StateToPrint);
1695 state = SCE_H_SGML_DEFAULT;
1696 }
1697 break;
1698 case SCE_H_CDATA:
1699 if ((chPrev2 == ']') && (chPrev == ']') && (ch == '>')) {
1700 styler.ColourTo(i, StateToPrint);
1701 state = SCE_H_DEFAULT;
1702 levelCurrent--;
1703 }
1704 break;
1705 case SCE_H_COMMENT:
1706 if ((scriptLanguage != eScriptComment) && (chPrev2 == '-') && (chPrev == '-') && (ch == '>')) {
1707 styler.ColourTo(i, StateToPrint);
1708 state = SCE_H_DEFAULT;
1709 levelCurrent--;
1710 }
1711 break;
1712 case SCE_H_SGML_1ST_PARAM_COMMENT:
1713 if ((ch == '-') && (chPrev == '-')) {
1714 styler.ColourTo(i, SCE_H_SGML_COMMENT);
1715 state = SCE_H_SGML_1ST_PARAM;
1716 }
1717 break;
1718 case SCE_H_SGML_SPECIAL:
1719 if (!(IsASCII(ch) && isupper(ch))) {
1720 styler.ColourTo(i - 1, StateToPrint);
1721 if (isalnum(ch)) {
1722 state = SCE_H_SGML_ERROR;
1723 } else {
1724 state = SCE_H_SGML_DEFAULT;
1725 }
1726 }
1727 break;
1728 case SCE_H_SGML_ENTITY:
1729 if (ch == ';') {
1730 styler.ColourTo(i, StateToPrint);
1731 state = SCE_H_SGML_DEFAULT;
1732 } else if (!(IsASCII(ch) && isalnum(ch)) && ch != '-' && ch != '.') {
1733 styler.ColourTo(i, SCE_H_SGML_ERROR);
1734 state = SCE_H_SGML_DEFAULT;
1735 }
1736 break;
1737 case SCE_H_ENTITY:
1738 if (ch == ';') {
1739 styler.ColourTo(i, StateToPrint);
1740 state = SCE_H_DEFAULT;
1741 }
1742 if (ch != '#' && !(IsASCII(ch) && isalnum(ch)) // Should check that '#' follows '&', but it is unlikely anyway...
1743 && ch != '.' && ch != '-' && ch != '_' && ch != ':') { // valid in XML
1744 if (!IsASCII(ch)) // Possibly start of a multibyte character so don't allow this byte to be in entity style
1745 styler.ColourTo(i-1, SCE_H_TAGUNKNOWN);
1746 else
1747 styler.ColourTo(i, SCE_H_TAGUNKNOWN);
1748 state = SCE_H_DEFAULT;
1749 }
1750 break;
1751 case SCE_H_TAGUNKNOWN:
1752 if (!setTagContinue.Contains(ch) && !((ch == '/') && (chPrev == '<'))) {
1753 int eClass = classifyTagHTML(styler.GetStartSegment(),
1754 i - 1, keywords, styler, tagDontFold, caseSensitive, isXml, allowScripts, nonFoldingTags);
1755 if (eClass == SCE_H_SCRIPT || eClass == SCE_H_COMMENT) {
1756 if (!tagClosing) {
1757 inScriptType = eNonHtmlScript;
1758 scriptLanguage = eClass == SCE_H_SCRIPT ? clientScript : eScriptComment;
1759 } else {
1760 scriptLanguage = eScriptNone;
1761 }
1762 eClass = SCE_H_TAG;
1763 }
1764 if (ch == '>') {
1765 styler.ColourTo(i, eClass);
1766 if (inScriptType == eNonHtmlScript) {
1767 state = StateForScript(scriptLanguage);
1768 } else {
1769 state = SCE_H_DEFAULT;
1770 }
1771 tagOpened = false;
1772 if (!(foldXmlAtTagOpen || tagDontFold)) {
1773 if (tagClosing) {
1774 levelCurrent--;
1775 } else {
1776 levelCurrent++;
1777 }
1778 }
1779 tagClosing = false;
1780 } else if (ch == '/' && chNext == '>') {
1781 if (eClass == SCE_H_TAGUNKNOWN) {
1782 styler.ColourTo(i + 1, SCE_H_TAGUNKNOWN);
1783 } else {
1784 styler.ColourTo(i - 1, StateToPrint);
1785 styler.ColourTo(i + 1, SCE_H_TAGEND);
1786 }
1787 i++;
1788 ch = chNext;
1789 state = SCE_H_DEFAULT;
1790 tagOpened = false;
1791 if (foldXmlAtTagOpen) {
1792 levelCurrent--;
1793 }
1794 } else {
1795 if (eClass != SCE_H_TAGUNKNOWN) {
1796 if (eClass == SCE_H_SGML_DEFAULT) {
1797 state = SCE_H_SGML_DEFAULT;
1798 } else {
1799 state = SCE_H_OTHER;
1800 }
1801 }
1802 }
1803 }
1804 break;
1805 case SCE_H_ATTRIBUTE:
1806 if (!setAttributeContinue.Contains(ch)) {
1807 if (inScriptType == eNonHtmlScript) {
1808 const int scriptLanguagePrev = scriptLanguage;
1809 clientScript = segIsScriptingIndicator(styler, styler.GetStartSegment(), i - 1, scriptLanguage);
1810 scriptLanguage = clientScript;
1811 if ((scriptLanguagePrev != scriptLanguage) && (scriptLanguage == eScriptNone))
1812 inScriptType = eHtml;
1813 }
1814 classifyAttribHTML(styler.GetStartSegment(), i - 1, keywords, styler);
1815 if (ch == '>') {
1816 styler.ColourTo(i, SCE_H_TAG);
1817 if (inScriptType == eNonHtmlScript) {
1818 state = StateForScript(scriptLanguage);
1819 } else {
1820 state = SCE_H_DEFAULT;
1821 }
1822 tagOpened = false;
1823 if (!(foldXmlAtTagOpen || tagDontFold)) {
1824 if (tagClosing) {
1825 levelCurrent--;
1826 } else {
1827 levelCurrent++;
1828 }
1829 }
1830 tagClosing = false;
1831 } else if (ch == '=') {
1832 styler.ColourTo(i, SCE_H_OTHER);
1833 state = SCE_H_VALUE;
1834 } else {
1835 state = SCE_H_OTHER;
1836 }
1837 }
1838 break;
1839 case SCE_H_OTHER:
1840 if (ch == '>') {
1841 styler.ColourTo(i - 1, StateToPrint);
1842 styler.ColourTo(i, SCE_H_TAG);
1843 if (inScriptType == eNonHtmlScript) {
1844 state = StateForScript(scriptLanguage);
1845 } else {
1846 state = SCE_H_DEFAULT;
1847 }
1848 tagOpened = false;
1849 if (!(foldXmlAtTagOpen || tagDontFold)) {
1850 if (tagClosing) {
1851 levelCurrent--;
1852 } else {
1853 levelCurrent++;
1854 }
1855 }
1856 tagClosing = false;
1857 } else if (ch == '\"') {
1858 styler.ColourTo(i - 1, StateToPrint);
1859 state = SCE_H_DOUBLESTRING;
1860 } else if (ch == '\'') {
1861 styler.ColourTo(i - 1, StateToPrint);
1862 state = SCE_H_SINGLESTRING;
1863 } else if (ch == '=') {
1864 styler.ColourTo(i, StateToPrint);
1865 state = SCE_H_VALUE;
1866 } else if (ch == '/' && chNext == '>') {
1867 styler.ColourTo(i - 1, StateToPrint);
1868 styler.ColourTo(i + 1, SCE_H_TAGEND);
1869 i++;
1870 ch = chNext;
1871 state = SCE_H_DEFAULT;
1872 tagOpened = false;
1873 if (foldXmlAtTagOpen) {
1874 levelCurrent--;
1875 }
1876 } else if (ch == '?' && chNext == '>') {
1877 styler.ColourTo(i - 1, StateToPrint);
1878 styler.ColourTo(i + 1, SCE_H_XMLEND);
1879 i++;
1880 ch = chNext;
1881 state = SCE_H_DEFAULT;
1882 } else if (setHTMLWord.Contains(ch)) {
1883 styler.ColourTo(i - 1, StateToPrint);
1884 state = SCE_H_ATTRIBUTE;
1885 }
1886 break;
1887 case SCE_H_DOUBLESTRING:
1888 if (ch == '\"') {
1889 if (inScriptType == eNonHtmlScript) {
1890 scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1891 }
1892 styler.ColourTo(i, SCE_H_DOUBLESTRING);
1893 state = SCE_H_OTHER;
1894 }
1895 break;
1896 case SCE_H_SINGLESTRING:
1897 if (ch == '\'') {
1898 if (inScriptType == eNonHtmlScript) {
1899 scriptLanguage = segIsScriptingIndicator(styler, styler.GetStartSegment(), i, scriptLanguage);
1900 }
1901 styler.ColourTo(i, SCE_H_SINGLESTRING);
1902 state = SCE_H_OTHER;
1903 }
1904 break;
1905 case SCE_H_VALUE:
1906 if (!setHTMLWord.Contains(ch)) {
1907 if (ch == '\"' && chPrev == '=') {
1908 // Should really test for being first character
1909 state = SCE_H_DOUBLESTRING;
1910 } else if (ch == '\'' && chPrev == '=') {
1911 state = SCE_H_SINGLESTRING;
1912 } else {
1913 if (IsNumber(styler.GetStartSegment(), styler)) {
1914 styler.ColourTo(i - 1, SCE_H_NUMBER);
1915 } else {
1916 styler.ColourTo(i - 1, StateToPrint);
1917 }
1918 if (ch == '>') {
1919 styler.ColourTo(i, SCE_H_TAG);
1920 if (inScriptType == eNonHtmlScript) {
1921 state = StateForScript(scriptLanguage);
1922 } else {
1923 state = SCE_H_DEFAULT;
1924 }
1925 tagOpened = false;
1926 if (!tagDontFold) {
1927 if (tagClosing) {
1928 levelCurrent--;
1929 } else {
1930 levelCurrent++;
1931 }
1932 }
1933 tagClosing = false;
1934 } else {
1935 state = SCE_H_OTHER;
1936 }
1937 }
1938 }
1939 break;
1940 case SCE_HJ_DEFAULT:
1941 case SCE_HJ_START:
1942 case SCE_HJ_SYMBOLS:
1943 if (IsAWordStart(ch)) {
1944 styler.ColourTo(i - 1, StateToPrint);
1945 state = SCE_HJ_WORD;
1946 } else if (ch == '/' && chNext == '*') {
1947 styler.ColourTo(i - 1, StateToPrint);
1948 if (chNext2 == '*')
1949 state = SCE_HJ_COMMENTDOC;
1950 else
1951 state = SCE_HJ_COMMENT;
1952 if (chNext2 == '/') {
1953 // Eat the * so it isn't used for the end of the comment
1954 i++;
1955 }
1956 } else if (ch == '/' && chNext == '/') {
1957 styler.ColourTo(i - 1, StateToPrint);
1958 state = SCE_HJ_COMMENTLINE;
1959 } else if (ch == '/' && setOKBeforeJSRE.Contains(chPrevNonWhite)) {
1960 styler.ColourTo(i - 1, StateToPrint);
1961 state = SCE_HJ_REGEX;
1962 } else if (ch == '\"') {
1963 styler.ColourTo(i - 1, StateToPrint);
1964 state = SCE_HJ_DOUBLESTRING;
1965 } else if (ch == '\'') {
1966 styler.ColourTo(i - 1, StateToPrint);
1967 state = SCE_HJ_SINGLESTRING;
1968 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
1969 styler.SafeGetCharAt(i + 3) == '-') {
1970 styler.ColourTo(i - 1, StateToPrint);
1971 state = SCE_HJ_COMMENTLINE;
1972 } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
1973 styler.ColourTo(i - 1, StateToPrint);
1974 state = SCE_HJ_COMMENTLINE;
1975 i += 2;
1976 } else if (IsOperator(ch)) {
1977 styler.ColourTo(i - 1, StateToPrint);
1978 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
1979 state = SCE_HJ_DEFAULT;
1980 } else if ((ch == ' ') || (ch == '\t')) {
1981 if (state == SCE_HJ_START) {
1982 styler.ColourTo(i - 1, StateToPrint);
1983 state = SCE_HJ_DEFAULT;
1984 }
1985 }
1986 break;
1987 case SCE_HJ_WORD:
1988 if (!IsAWordChar(ch)) {
1989 classifyWordHTJS(styler.GetStartSegment(), i - 1, keywords2, styler, inScriptType);
1990 //styler.ColourTo(i - 1, eHTJSKeyword);
1991 state = SCE_HJ_DEFAULT;
1992 if (ch == '/' && chNext == '*') {
1993 if (chNext2 == '*')
1994 state = SCE_HJ_COMMENTDOC;
1995 else
1996 state = SCE_HJ_COMMENT;
1997 } else if (ch == '/' && chNext == '/') {
1998 state = SCE_HJ_COMMENTLINE;
1999 } else if (ch == '\"') {
2000 state = SCE_HJ_DOUBLESTRING;
2001 } else if (ch == '\'') {
2002 state = SCE_HJ_SINGLESTRING;
2003 } else if ((ch == '-') && (chNext == '-') && (chNext2 == '>')) {
2004 styler.ColourTo(i - 1, StateToPrint);
2005 state = SCE_HJ_COMMENTLINE;
2006 i += 2;
2007 } else if (IsOperator(ch)) {
2008 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
2009 state = SCE_HJ_DEFAULT;
2010 }
2011 }
2012 break;
2013 case SCE_HJ_COMMENT:
2014 case SCE_HJ_COMMENTDOC:
2015 if (ch == '/' && chPrev == '*') {
2016 styler.ColourTo(i, StateToPrint);
2017 state = SCE_HJ_DEFAULT;
2018 ch = ' ';
2019 }
2020 break;
2021 case SCE_HJ_COMMENTLINE:
2022 if (ch == '\r' || ch == '\n') {
2023 styler.ColourTo(i - 1, statePrintForState(SCE_HJ_COMMENTLINE, inScriptType));
2024 state = SCE_HJ_DEFAULT;
2025 ch = ' ';
2026 }
2027 break;
2028 case SCE_HJ_DOUBLESTRING:
2029 if (ch == '\\') {
2030 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
2031 i++;
2032 }
2033 } else if (ch == '\"') {
2034 styler.ColourTo(i, statePrintForState(SCE_HJ_DOUBLESTRING, inScriptType));
2035 state = SCE_HJ_DEFAULT;
2036 } else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
2037 styler.ColourTo(i - 1, StateToPrint);
2038 state = SCE_HJ_COMMENTLINE;
2039 i += 2;
2040 } else if (isLineEnd(ch)) {
2041 styler.ColourTo(i - 1, StateToPrint);
2042 state = SCE_HJ_STRINGEOL;
2043 }
2044 break;
2045 case SCE_HJ_SINGLESTRING:
2046 if (ch == '\\') {
2047 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
2048 i++;
2049 }
2050 } else if (ch == '\'') {
2051 styler.ColourTo(i, statePrintForState(SCE_HJ_SINGLESTRING, inScriptType));
2052 state = SCE_HJ_DEFAULT;
2053 } else if ((inScriptType == eNonHtmlScript) && (ch == '-') && (chNext == '-') && (chNext2 == '>')) {
2054 styler.ColourTo(i - 1, StateToPrint);
2055 state = SCE_HJ_COMMENTLINE;
2056 i += 2;
2057 } else if (isLineEnd(ch)) {
2058 styler.ColourTo(i - 1, StateToPrint);
2059 if (chPrev != '\\' && (chPrev2 != '\\' || chPrev != '\r' || ch != '\n')) {
2060 state = SCE_HJ_STRINGEOL;
2061 }
2062 }
2063 break;
2064 case SCE_HJ_STRINGEOL:
2065 if (!isLineEnd(ch)) {
2066 styler.ColourTo(i - 1, StateToPrint);
2067 state = SCE_HJ_DEFAULT;
2068 } else if (!isLineEnd(chNext)) {
2069 styler.ColourTo(i, StateToPrint);
2070 state = SCE_HJ_DEFAULT;
2071 }
2072 break;
2073 case SCE_HJ_REGEX:
2074 if (ch == '\r' || ch == '\n' || ch == '/') {
2075 if (ch == '/') {
2076 while (IsASCII(chNext) && islower(chNext)) { // gobble regex flags
2077 i++;
2078 ch = chNext;
2079 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2080 }
2081 }
2082 styler.ColourTo(i, StateToPrint);
2083 state = SCE_HJ_DEFAULT;
2084 } else if (ch == '\\') {
2085 // Gobble up the quoted character
2086 if (chNext == '\\' || chNext == '/') {
2087 i++;
2088 ch = chNext;
2089 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2090 }
2091 }
2092 break;
2093 case SCE_HB_DEFAULT:
2094 case SCE_HB_START:
2095 if (IsAWordStart(ch)) {
2096 styler.ColourTo(i - 1, StateToPrint);
2097 state = SCE_HB_WORD;
2098 } else if (ch == '\'') {
2099 styler.ColourTo(i - 1, StateToPrint);
2100 state = SCE_HB_COMMENTLINE;
2101 } else if (ch == '\"') {
2102 styler.ColourTo(i - 1, StateToPrint);
2103 state = SCE_HB_STRING;
2104 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
2105 styler.SafeGetCharAt(i + 3) == '-') {
2106 styler.ColourTo(i - 1, StateToPrint);
2107 state = SCE_HB_COMMENTLINE;
2108 } else if (IsOperator(ch)) {
2109 styler.ColourTo(i - 1, StateToPrint);
2110 styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
2111 state = SCE_HB_DEFAULT;
2112 } else if ((ch == ' ') || (ch == '\t')) {
2113 if (state == SCE_HB_START) {
2114 styler.ColourTo(i - 1, StateToPrint);
2115 state = SCE_HB_DEFAULT;
2116 }
2117 }
2118 break;
2119 case SCE_HB_WORD:
2120 if (!IsAWordChar(ch)) {
2121 state = classifyWordHTVB(styler.GetStartSegment(), i - 1, keywords3, styler, inScriptType);
2122 if (state == SCE_HB_DEFAULT) {
2123 if (ch == '\"') {
2124 state = SCE_HB_STRING;
2125 } else if (ch == '\'') {
2126 state = SCE_HB_COMMENTLINE;
2127 } else if (IsOperator(ch)) {
2128 styler.ColourTo(i, statePrintForState(SCE_HB_DEFAULT, inScriptType));
2129 state = SCE_HB_DEFAULT;
2130 }
2131 }
2132 }
2133 break;
2134 case SCE_HB_STRING:
2135 if (ch == '\"') {
2136 styler.ColourTo(i, StateToPrint);
2137 state = SCE_HB_DEFAULT;
2138 } else if (ch == '\r' || ch == '\n') {
2139 styler.ColourTo(i - 1, StateToPrint);
2140 state = SCE_HB_STRINGEOL;
2141 }
2142 break;
2143 case SCE_HB_COMMENTLINE:
2144 if (ch == '\r' || ch == '\n') {
2145 styler.ColourTo(i - 1, StateToPrint);
2146 state = SCE_HB_DEFAULT;
2147 }
2148 break;
2149 case SCE_HB_STRINGEOL:
2150 if (!isLineEnd(ch)) {
2151 styler.ColourTo(i - 1, StateToPrint);
2152 state = SCE_HB_DEFAULT;
2153 } else if (!isLineEnd(chNext)) {
2154 styler.ColourTo(i, StateToPrint);
2155 state = SCE_HB_DEFAULT;
2156 }
2157 break;
2158 case SCE_HP_DEFAULT:
2159 case SCE_HP_START:
2160 if (IsAWordStart(ch)) {
2161 styler.ColourTo(i - 1, StateToPrint);
2162 state = SCE_HP_WORD;
2163 } else if ((ch == '<') && (chNext == '!') && (chNext2 == '-') &&
2164 styler.SafeGetCharAt(i + 3) == '-') {
2165 styler.ColourTo(i - 1, StateToPrint);
2166 state = SCE_HP_COMMENTLINE;
2167 } else if (ch == '#') {
2168 styler.ColourTo(i - 1, StateToPrint);
2169 state = SCE_HP_COMMENTLINE;
2170 } else if (ch == '\"') {
2171 styler.ColourTo(i - 1, StateToPrint);
2172 if (chNext == '\"' && chNext2 == '\"') {
2173 i += 2;
2174 state = SCE_HP_TRIPLEDOUBLE;
2175 ch = ' ';
2176 chPrev = ' ';
2177 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2178 } else {
2179 // state = statePrintForState(SCE_HP_STRING,inScriptType);
2180 state = SCE_HP_STRING;
2181 }
2182 } else if (ch == '\'') {
2183 styler.ColourTo(i - 1, StateToPrint);
2184 if (chNext == '\'' && chNext2 == '\'') {
2185 i += 2;
2186 state = SCE_HP_TRIPLE;
2187 ch = ' ';
2188 chPrev = ' ';
2189 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2190 } else {
2191 state = SCE_HP_CHARACTER;
2192 }
2193 } else if (IsOperator(ch)) {
2194 styler.ColourTo(i - 1, StateToPrint);
2195 styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
2196 } else if ((ch == ' ') || (ch == '\t')) {
2197 if (state == SCE_HP_START) {
2198 styler.ColourTo(i - 1, StateToPrint);
2199 state = SCE_HP_DEFAULT;
2200 }
2201 }
2202 break;
2203 case SCE_HP_WORD:
2204 if (!IsAWordChar(ch)) {
2205 classifyWordHTPy(styler.GetStartSegment(), i - 1, keywords4, styler, prevWord, inScriptType, isMako);
2206 state = SCE_HP_DEFAULT;
2207 if (ch == '#') {
2208 state = SCE_HP_COMMENTLINE;
2209 } else if (ch == '\"') {
2210 if (chNext == '\"' && chNext2 == '\"') {
2211 i += 2;
2212 state = SCE_HP_TRIPLEDOUBLE;
2213 ch = ' ';
2214 chPrev = ' ';
2215 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2216 } else {
2217 state = SCE_HP_STRING;
2218 }
2219 } else if (ch == '\'') {
2220 if (chNext == '\'' && chNext2 == '\'') {
2221 i += 2;
2222 state = SCE_HP_TRIPLE;
2223 ch = ' ';
2224 chPrev = ' ';
2225 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2226 } else {
2227 state = SCE_HP_CHARACTER;
2228 }
2229 } else if (IsOperator(ch)) {
2230 styler.ColourTo(i, statePrintForState(SCE_HP_OPERATOR, inScriptType));
2231 }
2232 }
2233 break;
2234 case SCE_HP_COMMENTLINE:
2235 if (ch == '\r' || ch == '\n') {
2236 styler.ColourTo(i - 1, StateToPrint);
2237 state = SCE_HP_DEFAULT;
2238 }
2239 break;
2240 case SCE_HP_STRING:
2241 if (ch == '\\') {
2242 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
2243 i++;
2244 ch = chNext;
2245 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2246 }
2247 } else if (ch == '\"') {
2248 styler.ColourTo(i, StateToPrint);
2249 state = SCE_HP_DEFAULT;
2250 }
2251 break;
2252 case SCE_HP_CHARACTER:
2253 if (ch == '\\') {
2254 if (chNext == '\"' || chNext == '\'' || chNext == '\\') {
2255 i++;
2256 ch = chNext;
2257 chNext = static_cast<unsigned char>(styler.SafeGetCharAt(i + 1));
2258 }
2259 } else if (ch == '\'') {
2260 styler.ColourTo(i, StateToPrint);
2261 state = SCE_HP_DEFAULT;
2262 }
2263 break;
2264 case SCE_HP_TRIPLE:
2265 if (ch == '\'' && chPrev == '\'' && chPrev2 == '\'') {
2266 styler.ColourTo(i, StateToPrint);
2267 state = SCE_HP_DEFAULT;
2268 }
2269 break;
2270 case SCE_HP_TRIPLEDOUBLE:
2271 if (ch == '\"' && chPrev == '\"' && chPrev2 == '\"') {
2272 styler.ColourTo(i, StateToPrint);
2273 state = SCE_HP_DEFAULT;
2274 }
2275 break;
2276 ///////////// start - PHP state handling
2277 case SCE_HPHP_WORD:
2278 if (!IsAWordChar(ch)) {
2279 classifyWordHTPHP(styler.GetStartSegment(), i - 1, keywords5, styler);
2280 if (ch == '/' && chNext == '*') {
2281 i++;
2282 state = SCE_HPHP_COMMENT;
2283 } else if (ch == '/' && chNext == '/') {
2284 i++;
2285 state = SCE_HPHP_COMMENTLINE;
2286 } else if (ch == '#') {
2287 state = SCE_HPHP_COMMENTLINE;
2288 } else if (ch == '\"') {
2289 state = SCE_HPHP_HSTRING;
2290 phpStringDelimiter = "\"";
2291 } else if (styler.Match(i, "<<<")) {
2292 bool isSimpleString = false;
2293 i = FindPhpStringDelimiter(phpStringDelimiter, i + 3, lengthDoc, styler, isSimpleString);
2294 if (!phpStringDelimiter.empty()) {
2295 state = (isSimpleString ? SCE_HPHP_SIMPLESTRING : SCE_HPHP_HSTRING);
2296 if (foldHeredoc) levelCurrent++;
2297 }
2298 } else if (ch == '\'') {
2299 state = SCE_HPHP_SIMPLESTRING;
2300 phpStringDelimiter = "\'";
2301 } else if (ch == '$' && IsPhpWordStart(chNext)) {
2302 state = SCE_HPHP_VARIABLE;
2303 } else if (IsOperator(ch)) {
2304 state = SCE_HPHP_OPERATOR;
2305 } else {
2306 state = SCE_HPHP_DEFAULT;
2307 }
2308 }
2309 break;
2310 case SCE_HPHP_NUMBER:
2311 // recognize bases 8,10 or 16 integers OR floating-point numbers
2312 if (!IsADigit(ch)
2313 && strchr(".xXabcdefABCDEF", ch) == NULL
2314 && ((ch != '-' && ch != '+') || (chPrev != 'e' && chPrev != 'E'))) {
2315 styler.ColourTo(i - 1, SCE_HPHP_NUMBER);
2316 if (IsOperator(ch))
2317 state = SCE_HPHP_OPERATOR;
2318 else
2319 state = SCE_HPHP_DEFAULT;
2320 }
2321 break;
2322 case SCE_HPHP_VARIABLE:
2323 if (!IsPhpWordChar(chNext)) {
2324 styler.ColourTo(i, SCE_HPHP_VARIABLE);
2325 state = SCE_HPHP_DEFAULT;
2326 }
2327 break;
2328 case SCE_HPHP_COMMENT:
2329 if (ch == '/' && chPrev == '*') {
2330 styler.ColourTo(i, StateToPrint);
2331 state = SCE_HPHP_DEFAULT;
2332 }
2333 break;
2334 case SCE_HPHP_COMMENTLINE:
2335 if (ch == '\r' || ch == '\n') {
2336 styler.ColourTo(i - 1, StateToPrint);
2337 state = SCE_HPHP_DEFAULT;
2338 }
2339 break;
2340 case SCE_HPHP_HSTRING:
2341 if (ch == '\\' && ((phpStringDelimiter == "\"") || chNext == '$' || chNext == '{')) {
2342 // skip the next char
2343 i++;
2344 } else if (((ch == '{' && chNext == '$') || (ch == '$' && chNext == '{'))
2345 && IsPhpWordStart(chNext2)) {
2346 styler.ColourTo(i - 1, StateToPrint);
2347 state = SCE_HPHP_COMPLEX_VARIABLE;
2348 } else if (ch == '$' && IsPhpWordStart(chNext)) {
2349 styler.ColourTo(i - 1, StateToPrint);
2350 state = SCE_HPHP_HSTRING_VARIABLE;
2351 } else if (styler.Match(i, phpStringDelimiter.c_str())) {
2352 if (phpStringDelimiter == "\"") {
2353 styler.ColourTo(i, StateToPrint);
2354 state = SCE_HPHP_DEFAULT;
2355 } else if (isLineEnd(chPrev)) {
2356 const int psdLength = static_cast<int>(phpStringDelimiter.length());
2357 const char chAfterPsd = styler.SafeGetCharAt(i + psdLength);
2358 const char chAfterPsd2 = styler.SafeGetCharAt(i + psdLength + 1);
2359 if (isLineEnd(chAfterPsd) ||
2360 (chAfterPsd == ';' && isLineEnd(chAfterPsd2))) {
2361 i += (((i + psdLength) < lengthDoc) ? psdLength : lengthDoc) - 1;
2362 styler.ColourTo(i, StateToPrint);
2363 state = SCE_HPHP_DEFAULT;
2364 if (foldHeredoc) levelCurrent--;
2365 }
2366 }
2367 }
2368 break;
2369 case SCE_HPHP_SIMPLESTRING:
2370 if (phpStringDelimiter == "\'") {
2371 if (ch == '\\') {
2372 // skip the next char
2373 i++;
2374 } else if (ch == '\'') {
2375 styler.ColourTo(i, StateToPrint);
2376 state = SCE_HPHP_DEFAULT;
2377 }
2378 } else if (isLineEnd(chPrev) && styler.Match(i, phpStringDelimiter.c_str())) {
2379 const int psdLength = static_cast<int>(phpStringDelimiter.length());
2380 const char chAfterPsd = styler.SafeGetCharAt(i + psdLength);
2381 const char chAfterPsd2 = styler.SafeGetCharAt(i + psdLength + 1);
2382 if (isLineEnd(chAfterPsd) ||
2383 (chAfterPsd == ';' && isLineEnd(chAfterPsd2))) {
2384 i += (((i + psdLength) < lengthDoc) ? psdLength : lengthDoc) - 1;
2385 styler.ColourTo(i, StateToPrint);
2386 state = SCE_HPHP_DEFAULT;
2387 if (foldHeredoc) levelCurrent--;
2388 }
2389 }
2390 break;
2391 case SCE_HPHP_HSTRING_VARIABLE:
2392 if (!IsPhpWordChar(chNext)) {
2393 styler.ColourTo(i, StateToPrint);
2394 state = SCE_HPHP_HSTRING;
2395 }
2396 break;
2397 case SCE_HPHP_COMPLEX_VARIABLE:
2398 if (ch == '}') {
2399 styler.ColourTo(i, StateToPrint);
2400 state = SCE_HPHP_HSTRING;
2401 }
2402 break;
2403 case SCE_HPHP_OPERATOR:
2404 case SCE_HPHP_DEFAULT:
2405 styler.ColourTo(i - 1, StateToPrint);
2406 if (IsADigit(ch) || (ch == '.' && IsADigit(chNext))) {
2407 state = SCE_HPHP_NUMBER;
2408 } else if (IsAWordStart(ch)) {
2409 state = SCE_HPHP_WORD;
2410 } else if (ch == '/' && chNext == '*') {
2411 i++;
2412 state = SCE_HPHP_COMMENT;
2413 } else if (ch == '/' && chNext == '/') {
2414 i++;
2415 state = SCE_HPHP_COMMENTLINE;
2416 } else if (ch == '#') {
2417 state = SCE_HPHP_COMMENTLINE;
2418 } else if (ch == '\"') {
2419 state = SCE_HPHP_HSTRING;
2420 phpStringDelimiter = "\"";
2421 } else if (styler.Match(i, "<<<")) {
2422 bool isSimpleString = false;
2423 i = FindPhpStringDelimiter(phpStringDelimiter, i + 3, lengthDoc, styler, isSimpleString);
2424 if (!phpStringDelimiter.empty()) {
2425 state = (isSimpleString ? SCE_HPHP_SIMPLESTRING : SCE_HPHP_HSTRING);
2426 if (foldHeredoc) levelCurrent++;
2427 }
2428 } else if (ch == '\'') {
2429 state = SCE_HPHP_SIMPLESTRING;
2430 phpStringDelimiter = "\'";
2431 } else if (ch == '$' && IsPhpWordStart(chNext)) {
2432 state = SCE_HPHP_VARIABLE;
2433 } else if (IsOperator(ch)) {
2434 state = SCE_HPHP_OPERATOR;
2435 } else if ((state == SCE_HPHP_OPERATOR) && (IsASpace(ch))) {
2436 state = SCE_HPHP_DEFAULT;
2437 }
2438 break;
2439 ///////////// end - PHP state handling
2440 }
2441
2442 // Some of the above terminated their lexeme but since the same character starts
2443 // the same class again, only reenter if non empty segment.
2444
2445 const bool nonEmptySegment = i >= static_cast<Sci_Position>(styler.GetStartSegment());
2446 if (state == SCE_HB_DEFAULT) { // One of the above succeeded
2447 if ((ch == '\"') && (nonEmptySegment)) {
2448 state = SCE_HB_STRING;
2449 } else if (ch == '\'') {
2450 state = SCE_HB_COMMENTLINE;
2451 } else if (IsAWordStart(ch)) {
2452 state = SCE_HB_WORD;
2453 } else if (IsOperator(ch)) {
2454 styler.ColourTo(i, SCE_HB_DEFAULT);
2455 }
2456 } else if (state == SCE_HBA_DEFAULT) { // One of the above succeeded
2457 if ((ch == '\"') && (nonEmptySegment)) {
2458 state = SCE_HBA_STRING;
2459 } else if (ch == '\'') {
2460 state = SCE_HBA_COMMENTLINE;
2461 } else if (IsAWordStart(ch)) {
2462 state = SCE_HBA_WORD;
2463 } else if (IsOperator(ch)) {
2464 styler.ColourTo(i, SCE_HBA_DEFAULT);
2465 }
2466 } else if (state == SCE_HJ_DEFAULT) { // One of the above succeeded
2467 if (ch == '/' && chNext == '*') {
2468 if (styler.SafeGetCharAt(i + 2) == '*')
2469 state = SCE_HJ_COMMENTDOC;
2470 else
2471 state = SCE_HJ_COMMENT;
2472 } else if (ch == '/' && chNext == '/') {
2473 state = SCE_HJ_COMMENTLINE;
2474 } else if ((ch == '\"') && (nonEmptySegment)) {
2475 state = SCE_HJ_DOUBLESTRING;
2476 } else if ((ch == '\'') && (nonEmptySegment)) {
2477 state = SCE_HJ_SINGLESTRING;
2478 } else if (IsAWordStart(ch)) {
2479 state = SCE_HJ_WORD;
2480 } else if (IsOperator(ch)) {
2481 styler.ColourTo(i, statePrintForState(SCE_HJ_SYMBOLS, inScriptType));
2482 }
2483 }
2484 }
2485
2486 switch (state) {
2487 case SCE_HJ_WORD:
2488 classifyWordHTJS(styler.GetStartSegment(), lengthDoc - 1, keywords2, styler, inScriptType);
2489 break;
2490 case SCE_HB_WORD:
2491 classifyWordHTVB(styler.GetStartSegment(), lengthDoc - 1, keywords3, styler, inScriptType);
2492 break;
2493 case SCE_HP_WORD:
2494 classifyWordHTPy(styler.GetStartSegment(), lengthDoc - 1, keywords4, styler, prevWord, inScriptType, isMako);
2495 break;
2496 case SCE_HPHP_WORD:
2497 classifyWordHTPHP(styler.GetStartSegment(), lengthDoc - 1, keywords5, styler);
2498 break;
2499 default:
2500 StateToPrint = statePrintForState(state, inScriptType);
2501 if (static_cast<Sci_Position>(styler.GetStartSegment()) < lengthDoc)
2502 styler.ColourTo(lengthDoc - 1, StateToPrint);
2503 break;
2504 }
2505
2506 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
2507 if (fold) {
2508 const int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
2509 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
2510 }
2511 styler.Flush();
2512 }
2513
2514 LexerModule lmHTML(SCLEX_HTML, LexerHTML::LexerFactoryHTML, "hypertext", htmlWordListDesc);
2515 LexerModule lmXML(SCLEX_XML, LexerHTML::LexerFactoryXML, "xml", htmlWordListDesc);
2516 LexerModule lmPHPSCRIPT(SCLEX_PHPSCRIPT, LexerHTML::LexerFactoryPHPScript, "phpscript", phpscriptWordListDesc);
2517