1 // Scintilla source code edit control
2 /** @file LexKVIrc.cxx
3  ** Lexer for KVIrc script.
4  **/
5 // Copyright 2013 by OmegaPhil <OmegaPhil+scintilla@gmail.com>, based in
6 // part from LexPython Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
7 // and LexCmake Copyright 2007 by Cristian Adam <cristian [dot] adam [at] gmx [dot] net>
8 
9 // The License.txt file describes the conditions under which this software may be distributed.
10 
11 #include <stdlib.h>
12 #include <string.h>
13 #include <stdio.h>
14 #include <stdarg.h>
15 #include <assert.h>
16 #include <ctype.h>
17 
18 #include "ILexer.h"
19 #include "Scintilla.h"
20 #include "SciLexer.h"
21 
22 #include "WordList.h"
23 #include "LexAccessor.h"
24 #include "Accessor.h"
25 #include "StyleContext.h"
26 #include "CharacterSet.h"
27 #include "LexerModule.h"
28 
29 #ifdef SCI_NAMESPACE
30 using namespace Scintilla;
31 #endif
32 
33 
34 /* KVIrc Script syntactic rules: http://www.kvirc.net/doc/doc_syntactic_rules.html */
35 
36 /* Utility functions */
IsAWordChar(int ch)37 static inline bool IsAWordChar(int ch) {
38 
39     /* Keyword list includes modules, i.e. words including '.', and
40      * alias namespaces include ':' */
41     return (ch < 0x80) && (isalnum(ch) || ch == '_' || ch == '.'
42             || ch == ':');
43 }
IsAWordStart(int ch)44 static inline bool IsAWordStart(int ch) {
45 
46     /* Functions (start with '$') are treated separately to keywords */
47     return (ch < 0x80) && (isalnum(ch) || ch == '_' );
48 }
49 
50 /* Interface function called by Scintilla to request some text to be
51  syntax highlighted */
ColouriseKVIrcDoc(Sci_PositionU startPos,Sci_Position length,int initStyle,WordList * keywordlists[],Accessor & styler)52 static void ColouriseKVIrcDoc(Sci_PositionU startPos, Sci_Position length,
53                               int initStyle, WordList *keywordlists[],
54                               Accessor &styler)
55 {
56     /* Fetching style context */
57     StyleContext sc(startPos, length, initStyle, styler);
58 
59     /* Accessing keywords and function-marking keywords */
60     WordList &keywords = *keywordlists[0];
61     WordList &functionKeywords = *keywordlists[1];
62 
63     /* Looping for all characters - only automatically moving forward
64      * when asked for (transitions leaving strings and keywords do this
65      * already) */
66     bool next = true;
67     for( ; sc.More(); next ? sc.Forward() : (void)0 )
68     {
69         /* Resetting next */
70         next = true;
71 
72         /* Dealing with different states */
73         switch (sc.state)
74         {
75             case SCE_KVIRC_DEFAULT:
76 
77                 /* Detecting single-line comments
78                  * Unfortunately KVIrc script allows raw '#<channel
79                  * name>' to be used, and appending # to an array returns
80                  * its length...
81                  * Going for a compromise where single line comments not
82                  * starting on a newline are allowed in all cases except
83                  * when they are preceeded with an opening bracket or comma
84                  * (this will probably be the most common style a valid
85                  * string-less channel name will be used with), with the
86                  * array length case included
87                  */
88                 if (
89                     (sc.ch == '#' && sc.atLineStart) ||
90                     (sc.ch == '#' && (
91                         sc.chPrev != '(' && sc.chPrev != ',' &&
92                         sc.chPrev != ']')
93                     )
94                 )
95                 {
96                     sc.SetState(SCE_KVIRC_COMMENT);
97                     break;
98                 }
99 
100                 /* Detecting multi-line comments */
101                 if (sc.Match('/', '*'))
102                 {
103                     sc.SetState(SCE_KVIRC_COMMENTBLOCK);
104                     break;
105                 }
106 
107                 /* Detecting strings */
108                 if (sc.ch == '"')
109                 {
110                     sc.SetState(SCE_KVIRC_STRING);
111                     break;
112                 }
113 
114                 /* Detecting functions */
115                 if (sc.ch == '$')
116                 {
117                     sc.SetState(SCE_KVIRC_FUNCTION);
118                     break;
119                 }
120 
121                 /* Detecting variables */
122                 if (sc.ch == '%')
123                 {
124                     sc.SetState(SCE_KVIRC_VARIABLE);
125                     break;
126                 }
127 
128                 /* Detecting numbers - isdigit is unsafe as it does not
129                  * validate, use CharacterSet.h functions */
130                 if (IsADigit(sc.ch))
131                 {
132                     sc.SetState(SCE_KVIRC_NUMBER);
133                     break;
134                 }
135 
136                 /* Detecting words */
137                 if (IsAWordStart(sc.ch) && IsAWordChar(sc.chNext))
138                 {
139                     sc.SetState(SCE_KVIRC_WORD);
140                     sc.Forward();
141                     break;
142                 }
143 
144                 /* Detecting operators */
145                 if (isoperator(sc.ch))
146                 {
147                     sc.SetState(SCE_KVIRC_OPERATOR);
148                     break;
149                 }
150 
151                 break;
152 
153             case SCE_KVIRC_COMMENT:
154 
155                 /* Breaking out of single line comment when a newline
156                  * is introduced */
157                 if (sc.ch == '\r' || sc.ch == '\n')
158                 {
159                     sc.SetState(SCE_KVIRC_DEFAULT);
160                     break;
161                 }
162 
163                 break;
164 
165             case SCE_KVIRC_COMMENTBLOCK:
166 
167                 /* Detecting end of multi-line comment */
168                 if (sc.Match('*', '/'))
169                 {
170                     // Moving the current position forward two characters
171                     // so that '*/' is included in the comment
172                     sc.Forward(2);
173                     sc.SetState(SCE_KVIRC_DEFAULT);
174 
175                     /* Comment has been exited and the current position
176                      * moved forward, yet the new current character
177                      * has yet to be defined - loop without moving
178                      * forward again */
179                     next = false;
180                     break;
181                 }
182 
183                 break;
184 
185             case SCE_KVIRC_STRING:
186 
187                 /* Detecting end of string - closing speechmarks */
188                 if (sc.ch == '"')
189                 {
190                     /* Allowing escaped speechmarks to pass */
191                     if (sc.chPrev == '\\')
192                         break;
193 
194                     /* Moving the current position forward to capture the
195                      * terminating speechmarks, and ending string */
196                     sc.ForwardSetState(SCE_KVIRC_DEFAULT);
197 
198                     /* String has been exited and the current position
199                      * moved forward, yet the new current character
200                      * has yet to be defined - loop without moving
201                      * forward again */
202                     next = false;
203                     break;
204                 }
205 
206                 /* Functions and variables are now highlighted in strings
207                  * Detecting functions */
208                 if (sc.ch == '$')
209                 {
210                     /* Allowing escaped functions to pass */
211                     if (sc.chPrev == '\\')
212                         break;
213 
214                     sc.SetState(SCE_KVIRC_STRING_FUNCTION);
215                     break;
216                 }
217 
218                 /* Detecting variables */
219                 if (sc.ch == '%')
220                 {
221                     /* Allowing escaped variables to pass */
222                     if (sc.chPrev == '\\')
223                         break;
224 
225                     sc.SetState(SCE_KVIRC_STRING_VARIABLE);
226                     break;
227                 }
228 
229                 /* Breaking out of a string when a newline is introduced */
230                 if (sc.ch == '\r' || sc.ch == '\n')
231                 {
232                     /* Allowing escaped newlines */
233                     if (sc.chPrev == '\\')
234                         break;
235 
236                     sc.SetState(SCE_KVIRC_DEFAULT);
237                     break;
238                 }
239 
240                 break;
241 
242             case SCE_KVIRC_FUNCTION:
243             case SCE_KVIRC_VARIABLE:
244 
245                 /* Detecting the end of a function/variable (word) */
246                 if (!IsAWordChar(sc.ch))
247                 {
248                     sc.SetState(SCE_KVIRC_DEFAULT);
249 
250                     /* Word has been exited yet the current character
251                      * has yet to be defined - loop without moving
252                      * forward again */
253                     next = false;
254                     break;
255                 }
256 
257                 break;
258 
259             case SCE_KVIRC_STRING_FUNCTION:
260             case SCE_KVIRC_STRING_VARIABLE:
261 
262                 /* A function or variable in a string
263                  * Detecting the end of a function/variable (word) */
264                 if (!IsAWordChar(sc.ch))
265                 {
266                     sc.SetState(SCE_KVIRC_STRING);
267 
268                     /* Word has been exited yet the current character
269                      * has yet to be defined - loop without moving
270                      * forward again */
271                     next = false;
272                     break;
273                 }
274 
275                 break;
276 
277             case SCE_KVIRC_NUMBER:
278 
279                 /* Detecting the end of a number */
280                 if (!IsADigit(sc.ch))
281                 {
282                     sc.SetState(SCE_KVIRC_DEFAULT);
283 
284                     /* Number has been exited yet the current character
285                      * has yet to be defined - loop without moving
286                      * forward */
287                     next = false;
288                     break;
289                 }
290 
291                 break;
292 
293             case SCE_KVIRC_OPERATOR:
294 
295                 /* Because '%' is an operator but is also the marker for
296                  * a variable, I need to always treat operators as single
297                  * character strings and therefore redo their detection
298                  * after every character */
299                 sc.SetState(SCE_KVIRC_DEFAULT);
300 
301                 /* Operator has been exited yet the current character
302                  * has yet to be defined - loop without moving
303                  * forward */
304                 next = false;
305                 break;
306 
307             case SCE_KVIRC_WORD:
308 
309                 /* Detecting the end of a word */
310                 if (!IsAWordChar(sc.ch))
311                 {
312                     /* Checking if the word was actually a keyword -
313                      * fetching the current word, NULL-terminated like
314                      * the keyword list */
315                     char s[100];
316                     Sci_Position wordLen = sc.currentPos - styler.GetStartSegment();
317                     if (wordLen > 99)
318                         wordLen = 99;  /* Include '\0' in buffer */
319                     Sci_Position i;
320                     for( i = 0; i < wordLen; ++i )
321                     {
322                         s[i] = styler.SafeGetCharAt( styler.GetStartSegment() + i );
323                     }
324                     s[wordLen] = '\0';
325 
326                     /* Actually detecting keywords and fixing the state */
327                     if (keywords.InList(s))
328                     {
329                         /* The SetState call actually commits the
330                          * previous keyword state */
331                         sc.ChangeState(SCE_KVIRC_KEYWORD);
332                     }
333                     else if (functionKeywords.InList(s))
334                     {
335                         // Detecting function keywords and fixing the state
336                         sc.ChangeState(SCE_KVIRC_FUNCTION_KEYWORD);
337                     }
338 
339                     /* Transitioning to default and committing the previous
340                      * word state */
341                     sc.SetState(SCE_KVIRC_DEFAULT);
342 
343                     /* Word has been exited yet the current character
344                      * has yet to be defined - loop without moving
345                      * forward again */
346                     next = false;
347                     break;
348                 }
349 
350                 break;
351         }
352     }
353 
354     /* Indicating processing is complete */
355     sc.Complete();
356 }
357 
FoldKVIrcDoc(Sci_PositionU startPos,Sci_Position length,int,WordList * [],Accessor & styler)358 static void FoldKVIrcDoc(Sci_PositionU startPos, Sci_Position length, int /*initStyle - unused*/,
359                       WordList *[], Accessor &styler)
360 {
361     /* Based on CMake's folder */
362 
363     /* Exiting if folding isnt enabled */
364     if ( styler.GetPropertyInt("fold") == 0 )
365         return;
366 
367     /* Obtaining current line number*/
368     Sci_Position currentLine = styler.GetLine(startPos);
369 
370     /* Obtaining starting character - indentation is done on a line basis,
371      * not character */
372     Sci_PositionU safeStartPos = styler.LineStart( currentLine );
373 
374     /* Initialising current level - this is defined as indentation level
375      * in the low 12 bits, with flag bits in the upper four bits.
376      * It looks like two indentation states are maintained in the returned
377      * 32bit value - 'nextLevel' in the most-significant bits, 'currentLevel'
378      * in the least-significant bits. Since the next level is the most
379      * up to date, this must refer to the current state of indentation.
380      * So the code bitshifts the old current level out of existence to
381      * get at the actual current state of indentation
382      * Based on the LexerCPP.cxx line 958 comment */
383     int currentLevel = SC_FOLDLEVELBASE;
384     if (currentLine > 0)
385         currentLevel = styler.LevelAt(currentLine - 1) >> 16;
386     int nextLevel = currentLevel;
387 
388     // Looping for characters in range
389     for (Sci_PositionU i = safeStartPos; i < startPos + length; ++i)
390     {
391         /* Folding occurs after syntax highlighting, meaning Scintilla
392          * already knows where the comments are
393          * Fetching the current state */
394         int state = styler.StyleAt(i) & 31;
395 
396         switch( styler.SafeGetCharAt(i) )
397         {
398             case '{':
399 
400                 /* Indenting only when the braces are not contained in
401                  * a comment */
402                 if (state != SCE_KVIRC_COMMENT &&
403                     state != SCE_KVIRC_COMMENTBLOCK)
404                     ++nextLevel;
405                 break;
406 
407             case '}':
408 
409                 /* Outdenting only when the braces are not contained in
410                  * a comment */
411                 if (state != SCE_KVIRC_COMMENT &&
412                     state != SCE_KVIRC_COMMENTBLOCK)
413                     --nextLevel;
414                 break;
415 
416             case '\n':
417             case '\r':
418 
419                 /* Preparing indentation information to return - combining
420                  * current and next level data */
421                 int lev = currentLevel | nextLevel << 16;
422 
423                 /* If the next level increases the indent level, mark the
424                  * current line as a fold point - current level data is
425                  * in the least significant bits */
426                 if (nextLevel > currentLevel )
427                     lev |= SC_FOLDLEVELHEADERFLAG;
428 
429                 /* Updating indentation level if needed */
430                 if (lev != styler.LevelAt(currentLine))
431                     styler.SetLevel(currentLine, lev);
432 
433                 /* Updating variables */
434                 ++currentLine;
435                 currentLevel = nextLevel;
436 
437                 /* Dealing with problematic Windows newlines -
438                  * incrementing to avoid the extra newline breaking the
439                  * fold point */
440                 if (styler.SafeGetCharAt(i) == '\r' &&
441                     styler.SafeGetCharAt(i + 1) == '\n')
442                     ++i;
443                 break;
444         }
445     }
446 
447     /* At this point the data has ended, so presumably the end of the line?
448      * Preparing indentation information to return - combining current
449      * and next level data */
450     int lev = currentLevel | nextLevel << 16;
451 
452     /* If the next level increases the indent level, mark the current
453      * line as a fold point - current level data is in the least
454      * significant bits */
455     if (nextLevel > currentLevel )
456         lev |= SC_FOLDLEVELHEADERFLAG;
457 
458     /* Updating indentation level if needed */
459     if (lev != styler.LevelAt(currentLine))
460         styler.SetLevel(currentLine, lev);
461 }
462 
463 /* Registering wordlists */
464 static const char *const kvircWordListDesc[] = {
465 	"primary",
466 	"function_keywords",
467 	0
468 };
469 
470 
471 /* Registering functions and wordlists */
472 LexerModule lmKVIrc(SCLEX_KVIRC, ColouriseKVIrcDoc, "kvirc", FoldKVIrcDoc,
473                     kvircWordListDesc);
474