1 // Scintilla source code edit control
2 /** @file LexHollywood.cxx
3 ** Lexer for Hollywood
4 ** Written by Andreas Falkenhahn, based on the BlitzBasic/PureBasic/Lua lexers
5 ** Thanks to Nicholai Benalal
6 ** For more information on Hollywood, see http://www.hollywood-mal.com/
7 ** Mail me (andreas <at> airsoftsoftwair <dot> de) for any bugs.
8 ** This code is subject to the same license terms as the rest of the Scintilla project:
9 ** The License.txt file describes the conditions under which this software may be distributed.
10 **/
11
12 #include <stdlib.h>
13 #include <string.h>
14 #include <stdio.h>
15 #include <stdarg.h>
16 #include <assert.h>
17 #include <ctype.h>
18
19 #include <string>
20 #include <map>
21
22 #include "ILexer.h"
23 #include "Scintilla.h"
24 #include "SciLexer.h"
25
26 #include "WordList.h"
27 #include "LexAccessor.h"
28 #include "StyleContext.h"
29 #include "CharacterSet.h"
30 #include "LexerModule.h"
31 #include "OptionSet.h"
32 #include "DefaultLexer.h"
33
34 using namespace Scintilla;
35
36 /* Bits:
37 * 1 - whitespace
38 * 2 - operator
39 * 4 - identifier
40 * 8 - decimal digit
41 * 16 - hex digit
42 * 32 - bin digit
43 * 64 - letter
44 */
45 static int character_classification[128] =
46 {
47 0, // NUL ($0)
48 0, // SOH ($1)
49 0, // STX ($2)
50 0, // ETX ($3)
51 0, // EOT ($4)
52 0, // ENQ ($5)
53 0, // ACK ($6)
54 0, // BEL ($7)
55 0, // BS ($8)
56 1, // HT ($9)
57 1, // LF ($A)
58 0, // VT ($B)
59 0, // FF ($C)
60 1, // CR ($D)
61 0, // SO ($E)
62 0, // SI ($F)
63 0, // DLE ($10)
64 0, // DC1 ($11)
65 0, // DC2 ($12)
66 0, // DC3 ($13)
67 0, // DC4 ($14)
68 0, // NAK ($15)
69 0, // SYN ($16)
70 0, // ETB ($17)
71 0, // CAN ($18)
72 0, // EM ($19)
73 0, // SUB ($1A)
74 0, // ESC ($1B)
75 0, // FS ($1C)
76 0, // GS ($1D)
77 0, // RS ($1E)
78 0, // US ($1F)
79 1, // space ($20)
80 4, // ! ($21)
81 0, // " ($22)
82 0, // # ($23)
83 4, // $ ($24)
84 2, // % ($25)
85 2, // & ($26)
86 2, // ' ($27)
87 2, // ( ($28)
88 2, // ) ($29)
89 2, // * ($2A)
90 2, // + ($2B)
91 2, // , ($2C)
92 2, // - ($2D)
93 // NB: we treat "." as an identifier although it is also an operator and a decimal digit
94 // the reason why we treat it as an identifier is to support syntax highlighting for
95 // plugin commands which always use a "." in their names, e.g. pdf.OpenDocument();
96 // we handle the decimal digit case manually below so that 3.1415 and .123 is styled correctly
97 // the collateral damage of treating "." as an identifier is that "." is never styled
98 // SCE_HOLLYWOOD_OPERATOR
99 4, // . ($2E)
100 2, // / ($2F)
101 28, // 0 ($30)
102 28, // 1 ($31)
103 28, // 2 ($32)
104 28, // 3 ($33)
105 28, // 4 ($34)
106 28, // 5 ($35)
107 28, // 6 ($36)
108 28, // 7 ($37)
109 28, // 8 ($38)
110 28, // 9 ($39)
111 2, // : ($3A)
112 2, // ; ($3B)
113 2, // < ($3C)
114 2, // = ($3D)
115 2, // > ($3E)
116 2, // ? ($3F)
117 0, // @ ($40)
118 84, // A ($41)
119 84, // B ($42)
120 84, // C ($43)
121 84, // D ($44)
122 84, // E ($45)
123 84, // F ($46)
124 68, // G ($47)
125 68, // H ($48)
126 68, // I ($49)
127 68, // J ($4A)
128 68, // K ($4B)
129 68, // L ($4C)
130 68, // M ($4D)
131 68, // N ($4E)
132 68, // O ($4F)
133 68, // P ($50)
134 68, // Q ($51)
135 68, // R ($52)
136 68, // S ($53)
137 68, // T ($54)
138 68, // U ($55)
139 68, // V ($56)
140 68, // W ($57)
141 68, // X ($58)
142 68, // Y ($59)
143 68, // Z ($5A)
144 2, // [ ($5B)
145 2, // \ ($5C)
146 2, // ] ($5D)
147 2, // ^ ($5E)
148 68, // _ ($5F)
149 2, // ` ($60)
150 84, // a ($61)
151 84, // b ($62)
152 84, // c ($63)
153 84, // d ($64)
154 84, // e ($65)
155 84, // f ($66)
156 68, // g ($67)
157 68, // h ($68)
158 68, // i ($69)
159 68, // j ($6A)
160 68, // k ($6B)
161 68, // l ($6C)
162 68, // m ($6D)
163 68, // n ($6E)
164 68, // o ($6F)
165 68, // p ($70)
166 68, // q ($71)
167 68, // r ($72)
168 68, // s ($73)
169 68, // t ($74)
170 68, // u ($75)
171 68, // v ($76)
172 68, // w ($77)
173 68, // x ($78)
174 68, // y ($79)
175 68, // z ($7A)
176 2, // { ($7B)
177 2, // | ($7C)
178 2, // } ($7D)
179 2, // ~ ($7E)
180 0, //  ($7F)
181 };
182
IsSpace(int c)183 static bool IsSpace(int c) {
184 return c < 128 && (character_classification[c] & 1);
185 }
186
IsOperator(int c)187 static bool IsOperator(int c) {
188 return c < 128 && (character_classification[c] & 2);
189 }
190
IsIdentifier(int c)191 static bool IsIdentifier(int c) {
192 return c < 128 && (character_classification[c] & 4);
193 }
194
IsDigit(int c)195 static bool IsDigit(int c) {
196 return c < 128 && (character_classification[c] & 8);
197 }
198
IsHexDigit(int c)199 static bool IsHexDigit(int c) {
200 return c < 128 && (character_classification[c] & 16);
201 }
202
LowerCase(int c)203 static int LowerCase(int c)
204 {
205 if (c >= 'A' && c <= 'Z')
206 return 'a' + c - 'A';
207 return c;
208 }
209
CheckHollywoodFoldPoint(char const * token)210 static int CheckHollywoodFoldPoint(char const *token) {
211 if (!strcmp(token, "function")) {
212 return 1;
213 }
214 if (!strcmp(token, "endfunction")) {
215 return -1;
216 }
217 return 0;
218 }
219
220 // An individual named option for use in an OptionSet
221
222 // Options used for LexerHollywood
223 struct OptionsHollywood {
224 bool fold;
225 bool foldCompact;
OptionsHollywoodOptionsHollywood226 OptionsHollywood() {
227 fold = false;
228 foldCompact = false;
229 }
230 };
231
232 static const char * const hollywoodWordListDesc[] = {
233 "Hollywood keywords",
234 "Hollywood standard API functions",
235 "Hollywood plugin API functions",
236 "Hollywood plugin methods",
237 0
238 };
239
240 struct OptionSetHollywood : public OptionSet<OptionsHollywood> {
OptionSetHollywoodOptionSetHollywood241 OptionSetHollywood(const char * const wordListDescriptions[]) {
242 DefineProperty("fold", &OptionsHollywood::fold);
243 DefineProperty("fold.compact", &OptionsHollywood::foldCompact);
244 DefineWordListSets(wordListDescriptions);
245 }
246 };
247
248 class LexerHollywood : public DefaultLexer {
249 int (*CheckFoldPoint)(char const *);
250 WordList keywordlists[4];
251 OptionsHollywood options;
252 OptionSetHollywood osHollywood;
253 public:
LexerHollywood(int (* CheckFoldPoint_)(char const *),const char * const wordListDescriptions[])254 LexerHollywood(int (*CheckFoldPoint_)(char const *), const char * const wordListDescriptions[]) :
255 DefaultLexer("hollywood", SCLEX_HOLLYWOOD),
256 CheckFoldPoint(CheckFoldPoint_),
257 osHollywood(wordListDescriptions) {
258 }
~LexerHollywood()259 virtual ~LexerHollywood() {
260 }
Release()261 void SCI_METHOD Release() override {
262 delete this;
263 }
Version() const264 int SCI_METHOD Version() const override {
265 return lvRelease5;
266 }
PropertyNames()267 const char * SCI_METHOD PropertyNames() override {
268 return osHollywood.PropertyNames();
269 }
PropertyType(const char * name)270 int SCI_METHOD PropertyType(const char *name) override {
271 return osHollywood.PropertyType(name);
272 }
DescribeProperty(const char * name)273 const char * SCI_METHOD DescribeProperty(const char *name) override {
274 return osHollywood.DescribeProperty(name);
275 }
276 Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override;
PropertyGet(const char * key)277 const char * SCI_METHOD PropertyGet(const char* key) override {
278 return osHollywood.PropertyGet(key);
279 }
DescribeWordListSets()280 const char * SCI_METHOD DescribeWordListSets() override {
281 return osHollywood.DescribeWordListSets();
282 }
283 Sci_Position SCI_METHOD WordListSet(int n, const char *wl) override;
284 void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
285 void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
286
PrivateCall(int,void *)287 void * SCI_METHOD PrivateCall(int, void *) override {
288 return 0;
289 }
LexerFactoryHollywood()290 static ILexer5 *LexerFactoryHollywood() {
291 return new LexerHollywood(CheckHollywoodFoldPoint, hollywoodWordListDesc);
292 }
293 };
294
PropertySet(const char * key,const char * val)295 Sci_Position SCI_METHOD LexerHollywood::PropertySet(const char *key, const char *val) {
296 if (osHollywood.PropertySet(&options, key, val)) {
297 return 0;
298 }
299 return -1;
300 }
301
WordListSet(int n,const char * wl)302 Sci_Position SCI_METHOD LexerHollywood::WordListSet(int n, const char *wl) {
303 WordList *wordListN = 0;
304 switch (n) {
305 case 0:
306 wordListN = &keywordlists[0];
307 break;
308 case 1:
309 wordListN = &keywordlists[1];
310 break;
311 case 2:
312 wordListN = &keywordlists[2];
313 break;
314 case 3:
315 wordListN = &keywordlists[3];
316 break;
317 }
318 Sci_Position firstModification = -1;
319 if (wordListN) {
320 WordList wlNew;
321 wlNew.Set(wl);
322 if (*wordListN != wlNew) {
323 wordListN->Set(wl);
324 firstModification = 0;
325 }
326 }
327 return firstModification;
328 }
329
Lex(Sci_PositionU startPos,Sci_Position length,int initStyle,IDocument * pAccess)330 void SCI_METHOD LexerHollywood::Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) {
331 LexAccessor styler(pAccess);
332
333 styler.StartAt(startPos);
334 bool inString = false;
335
336 StyleContext sc(startPos, length, initStyle, styler);
337
338 // Can't use sc.More() here else we miss the last character
339 for (; ; sc.Forward())
340 {
341 if (sc.atLineStart) inString = false;
342
343 if (sc.ch == '\"' && sc.chPrev != '\\') inString = !inString;
344
345 if (sc.state == SCE_HOLLYWOOD_IDENTIFIER) {
346 if (!IsIdentifier(sc.ch)) {
347 char s[100];
348 int kstates[4] = {
349 SCE_HOLLYWOOD_KEYWORD,
350 SCE_HOLLYWOOD_STDAPI,
351 SCE_HOLLYWOOD_PLUGINAPI,
352 SCE_HOLLYWOOD_PLUGINMETHOD,
353 };
354 sc.GetCurrentLowered(s, sizeof(s));
355 for (int i = 0; i < 4; i++) {
356 if (keywordlists[i].InList(s)) {
357 sc.ChangeState(kstates[i]);
358 }
359 }
360 sc.SetState(SCE_HOLLYWOOD_DEFAULT);
361 }
362 } else if (sc.state == SCE_HOLLYWOOD_OPERATOR) {
363
364 // always reset to default on operators because otherwise
365 // comments won't be recognized in sequences like "+/* Hello*/"
366 // --> "+/*" would be recognized as a sequence of operators
367
368 // if (!IsOperator(sc.ch)) sc.SetState(SCE_HOLLYWOOD_DEFAULT);
369 sc.SetState(SCE_HOLLYWOOD_DEFAULT);
370
371 } else if (sc.state == SCE_HOLLYWOOD_PREPROCESSOR) {
372 if (!IsIdentifier(sc.ch))
373 sc.SetState(SCE_HOLLYWOOD_DEFAULT);
374 } else if (sc.state == SCE_HOLLYWOOD_CONSTANT) {
375 if (!IsIdentifier(sc.ch))
376 sc.SetState(SCE_HOLLYWOOD_DEFAULT);
377 } else if (sc.state == SCE_HOLLYWOOD_NUMBER) {
378 if (!IsDigit(sc.ch) && sc.ch != '.')
379 sc.SetState(SCE_HOLLYWOOD_DEFAULT);
380 } else if (sc.state == SCE_HOLLYWOOD_HEXNUMBER) {
381 if (!IsHexDigit(sc.ch))
382 sc.SetState(SCE_HOLLYWOOD_DEFAULT);
383 } else if (sc.state == SCE_HOLLYWOOD_STRING) {
384 if (sc.ch == '"') {
385 sc.ForwardSetState(SCE_HOLLYWOOD_DEFAULT);
386 }
387 if (sc.atLineEnd) {
388 sc.SetState(SCE_HOLLYWOOD_DEFAULT);
389 }
390 } else if (sc.state == SCE_HOLLYWOOD_COMMENT) {
391 if (sc.atLineEnd) {
392 sc.SetState(SCE_HOLLYWOOD_DEFAULT);
393 }
394 } else if (sc.state == SCE_HOLLYWOOD_COMMENTBLOCK) {
395 if (sc.Match("*/") && !inString) {
396 sc.Forward();
397 sc.ForwardSetState(SCE_HOLLYWOOD_DEFAULT);
398 }
399 } else if (sc.state == SCE_HOLLYWOOD_STRINGBLOCK) {
400 if (sc.Match("]]") && !inString) {
401 sc.Forward();
402 sc.ForwardSetState(SCE_HOLLYWOOD_DEFAULT);
403 }
404 }
405
406 if (sc.state == SCE_HOLLYWOOD_DEFAULT) {
407 if (sc.Match(';')) {
408 sc.SetState(SCE_HOLLYWOOD_COMMENT);
409 } else if (sc.Match("/*")) {
410 sc.SetState(SCE_HOLLYWOOD_COMMENTBLOCK);
411 sc.Forward();
412 } else if (sc.Match("[[")) {
413 sc.SetState(SCE_HOLLYWOOD_STRINGBLOCK);
414 sc.Forward();
415 } else if (sc.Match('"')) {
416 sc.SetState(SCE_HOLLYWOOD_STRING);
417 } else if (sc.Match('$')) {
418 sc.SetState(SCE_HOLLYWOOD_HEXNUMBER);
419 } else if (sc.Match("0x") || sc.Match("0X")) { // must be before IsDigit() because of 0x
420 sc.SetState(SCE_HOLLYWOOD_HEXNUMBER);
421 sc.Forward();
422 } else if (sc.ch == '.' && (sc.chNext >= '0' && sc.chNext <= '9')) { // ".1234" style numbers
423 sc.SetState(SCE_HOLLYWOOD_NUMBER);
424 sc.Forward();
425 } else if (IsDigit(sc.ch)) {
426 sc.SetState(SCE_HOLLYWOOD_NUMBER);
427 } else if (sc.Match('#')) {
428 sc.SetState(SCE_HOLLYWOOD_CONSTANT);
429 } else if (sc.Match('@')) {
430 sc.SetState(SCE_HOLLYWOOD_PREPROCESSOR);
431 } else if (IsOperator(sc.ch)) {
432 sc.SetState(SCE_HOLLYWOOD_OPERATOR);
433 } else if (IsIdentifier(sc.ch)) {
434 sc.SetState(SCE_HOLLYWOOD_IDENTIFIER);
435 }
436 }
437
438 if (!sc.More())
439 break;
440 }
441 sc.Complete();
442 }
443
Fold(Sci_PositionU startPos,Sci_Position length,int,IDocument * pAccess)444 void SCI_METHOD LexerHollywood::Fold(Sci_PositionU startPos, Sci_Position length, int /* initStyle */, IDocument *pAccess) {
445
446 if (!options.fold)
447 return;
448
449 LexAccessor styler(pAccess);
450
451 Sci_PositionU lengthDoc = startPos + length;
452 int visibleChars = 0;
453 Sci_Position lineCurrent = styler.GetLine(startPos);
454 int levelPrev = styler.LevelAt(lineCurrent) & SC_FOLDLEVELNUMBERMASK;
455 int levelCurrent = levelPrev;
456 char chNext = styler[startPos];
457 int styleNext = styler.StyleAt(startPos);
458 int done = 0;
459 char word[256];
460 int wordlen = 0;
461
462 for (Sci_PositionU i = startPos; i < lengthDoc; i++) {
463 char ch = chNext;
464 chNext = styler.SafeGetCharAt(i + 1);
465 int style = styleNext;
466 styleNext = styler.StyleAt(i + 1);
467 bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
468 if (!done) {
469 if (wordlen) { // are we scanning a token already?
470 word[wordlen] = static_cast<char>(LowerCase(ch));
471 if (!IsIdentifier(ch)) { // done with token
472 word[wordlen] = '\0';
473 levelCurrent += CheckFoldPoint(word);
474 done = 1;
475 } else if (wordlen < 255) {
476 wordlen++;
477 }
478 } else { // start scanning at first non-whitespace character
479 if (!IsSpace(ch)) {
480 if (style != SCE_HOLLYWOOD_COMMENTBLOCK && IsIdentifier(ch)) {
481 word[0] = static_cast<char>(LowerCase(ch));
482 wordlen = 1;
483 } else // done with this line
484 done = 1;
485 }
486 }
487 }
488
489 if (atEOL) {
490 int lev = levelPrev;
491 if (visibleChars == 0 && options.foldCompact) {
492 lev |= SC_FOLDLEVELWHITEFLAG;
493 }
494 if ((levelCurrent > levelPrev) && (visibleChars > 0)) {
495 lev |= SC_FOLDLEVELHEADERFLAG;
496 }
497 if (lev != styler.LevelAt(lineCurrent)) {
498 styler.SetLevel(lineCurrent, lev);
499 }
500 lineCurrent++;
501 levelPrev = levelCurrent;
502 visibleChars = 0;
503 done = 0;
504 wordlen = 0;
505 }
506 if (!IsSpace(ch)) {
507 visibleChars++;
508 }
509 }
510 // Fill in the real level of the next line, keeping the current flags as they will be filled in later
511
512 int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
513 styler.SetLevel(lineCurrent, levelPrev | flagsNext);
514 }
515
516 LexerModule lmHollywood(SCLEX_HOLLYWOOD, LexerHollywood::LexerFactoryHollywood, "hollywood", hollywoodWordListDesc);
517