1 // Scintilla source code edit control
2 /** @file LexKVIrc.cxx
3  ** Lexer for KVIrc script.
4  **/
5 // Copyright 2013 by OmegaPhil <OmegaPhil+scintilla@gmail.com>, based in
6 // part from LexPython Copyright 1998-2002 by Neil Hodgson <neilh@scintilla.org>
7 // and LexCmake Copyright 2007 by Cristian Adam <cristian [dot] adam [at] gmx [dot] net>
8 
9 // The License.txt file describes the conditions under which this software may be distributed.
10 
11 #include <stdlib.h>
12 #include <string.h>
13 #include <stdio.h>
14 #include <stdarg.h>
15 #include <assert.h>
16 #include <ctype.h>
17 
18 #include "ILexer.h"
19 #include "Scintilla.h"
20 #include "SciLexer.h"
21 
22 #include "WordList.h"
23 #include "LexAccessor.h"
24 #include "Accessor.h"
25 #include "StyleContext.h"
26 #include "CharacterSet.h"
27 #include "LexerModule.h"
28 
29 using namespace Scintilla;
30 
31 
32 /* KVIrc Script syntactic rules: http://www.kvirc.net/doc/doc_syntactic_rules.html */
33 
34 /* Utility functions */
IsAWordChar(int ch)35 static inline bool IsAWordChar(int ch) {
36 
37     /* Keyword list includes modules, i.e. words including '.', and
38      * alias namespaces include ':' */
39     return (ch < 0x80) && (isalnum(ch) || ch == '_' || ch == '.'
40             || ch == ':');
41 }
IsAWordStart(int ch)42 static inline bool IsAWordStart(int ch) {
43 
44     /* Functions (start with '$') are treated separately to keywords */
45     return (ch < 0x80) && (isalnum(ch) || ch == '_' );
46 }
47 
48 /* Interface function called by Scintilla to request some text to be
49  syntax highlighted */
ColouriseKVIrcDoc(Sci_PositionU startPos,Sci_Position length,int initStyle,WordList * keywordlists[],Accessor & styler)50 static void ColouriseKVIrcDoc(Sci_PositionU startPos, Sci_Position length,
51                               int initStyle, WordList *keywordlists[],
52                               Accessor &styler)
53 {
54     /* Fetching style context */
55     StyleContext sc(startPos, length, initStyle, styler);
56 
57     /* Accessing keywords and function-marking keywords */
58     WordList &keywords = *keywordlists[0];
59     WordList &functionKeywords = *keywordlists[1];
60 
61     /* Looping for all characters - only automatically moving forward
62      * when asked for (transitions leaving strings and keywords do this
63      * already) */
64     bool next = true;
65     for( ; sc.More(); next ? sc.Forward() : (void)0 )
66     {
67         /* Resetting next */
68         next = true;
69 
70         /* Dealing with different states */
71         switch (sc.state)
72         {
73             case SCE_KVIRC_DEFAULT:
74 
75                 /* Detecting single-line comments
76                  * Unfortunately KVIrc script allows raw '#<channel
77                  * name>' to be used, and appending # to an array returns
78                  * its length...
79                  * Going for a compromise where single line comments not
80                  * starting on a newline are allowed in all cases except
81                  * when they are preceeded with an opening bracket or comma
82                  * (this will probably be the most common style a valid
83                  * string-less channel name will be used with), with the
84                  * array length case included
85                  */
86                 if (
87                     (sc.ch == '#' && sc.atLineStart) ||
88                     (sc.ch == '#' && (
89                         sc.chPrev != '(' && sc.chPrev != ',' &&
90                         sc.chPrev != ']')
91                     )
92                 )
93                 {
94                     sc.SetState(SCE_KVIRC_COMMENT);
95                     break;
96                 }
97 
98                 /* Detecting multi-line comments */
99                 if (sc.Match('/', '*'))
100                 {
101                     sc.SetState(SCE_KVIRC_COMMENTBLOCK);
102                     break;
103                 }
104 
105                 /* Detecting strings */
106                 if (sc.ch == '"')
107                 {
108                     sc.SetState(SCE_KVIRC_STRING);
109                     break;
110                 }
111 
112                 /* Detecting functions */
113                 if (sc.ch == '$')
114                 {
115                     sc.SetState(SCE_KVIRC_FUNCTION);
116                     break;
117                 }
118 
119                 /* Detecting variables */
120                 if (sc.ch == '%')
121                 {
122                     sc.SetState(SCE_KVIRC_VARIABLE);
123                     break;
124                 }
125 
126                 /* Detecting numbers - isdigit is unsafe as it does not
127                  * validate, use CharacterSet.h functions */
128                 if (IsADigit(sc.ch))
129                 {
130                     sc.SetState(SCE_KVIRC_NUMBER);
131                     break;
132                 }
133 
134                 /* Detecting words */
135                 if (IsAWordStart(sc.ch) && IsAWordChar(sc.chNext))
136                 {
137                     sc.SetState(SCE_KVIRC_WORD);
138                     sc.Forward();
139                     break;
140                 }
141 
142                 /* Detecting operators */
143                 if (isoperator(sc.ch))
144                 {
145                     sc.SetState(SCE_KVIRC_OPERATOR);
146                     break;
147                 }
148 
149                 break;
150 
151             case SCE_KVIRC_COMMENT:
152 
153                 /* Breaking out of single line comment when a newline
154                  * is introduced */
155                 if (sc.ch == '\r' || sc.ch == '\n')
156                 {
157                     sc.SetState(SCE_KVIRC_DEFAULT);
158                     break;
159                 }
160 
161                 break;
162 
163             case SCE_KVIRC_COMMENTBLOCK:
164 
165                 /* Detecting end of multi-line comment */
166                 if (sc.Match('*', '/'))
167                 {
168                     // Moving the current position forward two characters
169                     // so that '*/' is included in the comment
170                     sc.Forward(2);
171                     sc.SetState(SCE_KVIRC_DEFAULT);
172 
173                     /* Comment has been exited and the current position
174                      * moved forward, yet the new current character
175                      * has yet to be defined - loop without moving
176                      * forward again */
177                     next = false;
178                     break;
179                 }
180 
181                 break;
182 
183             case SCE_KVIRC_STRING:
184 
185                 /* Detecting end of string - closing speechmarks */
186                 if (sc.ch == '"')
187                 {
188                     /* Allowing escaped speechmarks to pass */
189                     if (sc.chPrev == '\\')
190                         break;
191 
192                     /* Moving the current position forward to capture the
193                      * terminating speechmarks, and ending string */
194                     sc.ForwardSetState(SCE_KVIRC_DEFAULT);
195 
196                     /* String has been exited and the current position
197                      * moved forward, yet the new current character
198                      * has yet to be defined - loop without moving
199                      * forward again */
200                     next = false;
201                     break;
202                 }
203 
204                 /* Functions and variables are now highlighted in strings
205                  * Detecting functions */
206                 if (sc.ch == '$')
207                 {
208                     /* Allowing escaped functions to pass */
209                     if (sc.chPrev == '\\')
210                         break;
211 
212                     sc.SetState(SCE_KVIRC_STRING_FUNCTION);
213                     break;
214                 }
215 
216                 /* Detecting variables */
217                 if (sc.ch == '%')
218                 {
219                     /* Allowing escaped variables to pass */
220                     if (sc.chPrev == '\\')
221                         break;
222 
223                     sc.SetState(SCE_KVIRC_STRING_VARIABLE);
224                     break;
225                 }
226 
227                 /* Breaking out of a string when a newline is introduced */
228                 if (sc.ch == '\r' || sc.ch == '\n')
229                 {
230                     /* Allowing escaped newlines */
231                     if (sc.chPrev == '\\')
232                         break;
233 
234                     sc.SetState(SCE_KVIRC_DEFAULT);
235                     break;
236                 }
237 
238                 break;
239 
240             case SCE_KVIRC_FUNCTION:
241             case SCE_KVIRC_VARIABLE:
242 
243                 /* Detecting the end of a function/variable (word) */
244                 if (!IsAWordChar(sc.ch))
245                 {
246                     sc.SetState(SCE_KVIRC_DEFAULT);
247 
248                     /* Word has been exited yet the current character
249                      * has yet to be defined - loop without moving
250                      * forward again */
251                     next = false;
252                     break;
253                 }
254 
255                 break;
256 
257             case SCE_KVIRC_STRING_FUNCTION:
258             case SCE_KVIRC_STRING_VARIABLE:
259 
260                 /* A function or variable in a string
261                  * Detecting the end of a function/variable (word) */
262                 if (!IsAWordChar(sc.ch))
263                 {
264                     sc.SetState(SCE_KVIRC_STRING);
265 
266                     /* Word has been exited yet the current character
267                      * has yet to be defined - loop without moving
268                      * forward again */
269                     next = false;
270                     break;
271                 }
272 
273                 break;
274 
275             case SCE_KVIRC_NUMBER:
276 
277                 /* Detecting the end of a number */
278                 if (!IsADigit(sc.ch))
279                 {
280                     sc.SetState(SCE_KVIRC_DEFAULT);
281 
282                     /* Number has been exited yet the current character
283                      * has yet to be defined - loop without moving
284                      * forward */
285                     next = false;
286                     break;
287                 }
288 
289                 break;
290 
291             case SCE_KVIRC_OPERATOR:
292 
293                 /* Because '%' is an operator but is also the marker for
294                  * a variable, I need to always treat operators as single
295                  * character strings and therefore redo their detection
296                  * after every character */
297                 sc.SetState(SCE_KVIRC_DEFAULT);
298 
299                 /* Operator has been exited yet the current character
300                  * has yet to be defined - loop without moving
301                  * forward */
302                 next = false;
303                 break;
304 
305             case SCE_KVIRC_WORD:
306 
307                 /* Detecting the end of a word */
308                 if (!IsAWordChar(sc.ch))
309                 {
310                     /* Checking if the word was actually a keyword -
311                      * fetching the current word, NULL-terminated like
312                      * the keyword list */
313                     char s[100];
314                     Sci_Position wordLen = sc.currentPos - styler.GetStartSegment();
315                     if (wordLen > 99)
316                         wordLen = 99;  /* Include '\0' in buffer */
317                     Sci_Position i;
318                     for( i = 0; i < wordLen; ++i )
319                     {
320                         s[i] = styler.SafeGetCharAt( styler.GetStartSegment() + i );
321                     }
322                     s[wordLen] = '\0';
323 
324                     /* Actually detecting keywords and fixing the state */
325                     if (keywords.InList(s))
326                     {
327                         /* The SetState call actually commits the
328                          * previous keyword state */
329                         sc.ChangeState(SCE_KVIRC_KEYWORD);
330                     }
331                     else if (functionKeywords.InList(s))
332                     {
333                         // Detecting function keywords and fixing the state
334                         sc.ChangeState(SCE_KVIRC_FUNCTION_KEYWORD);
335                     }
336 
337                     /* Transitioning to default and committing the previous
338                      * word state */
339                     sc.SetState(SCE_KVIRC_DEFAULT);
340 
341                     /* Word has been exited yet the current character
342                      * has yet to be defined - loop without moving
343                      * forward again */
344                     next = false;
345                     break;
346                 }
347 
348                 break;
349         }
350     }
351 
352     /* Indicating processing is complete */
353     sc.Complete();
354 }
355 
FoldKVIrcDoc(Sci_PositionU startPos,Sci_Position length,int,WordList * [],Accessor & styler)356 static void FoldKVIrcDoc(Sci_PositionU startPos, Sci_Position length, int /*initStyle - unused*/,
357                       WordList *[], Accessor &styler)
358 {
359     /* Based on CMake's folder */
360 
361     /* Exiting if folding isnt enabled */
362     if ( styler.GetPropertyInt("fold") == 0 )
363         return;
364 
365     /* Obtaining current line number*/
366     Sci_Position currentLine = styler.GetLine(startPos);
367 
368     /* Obtaining starting character - indentation is done on a line basis,
369      * not character */
370     Sci_PositionU safeStartPos = styler.LineStart( currentLine );
371 
372     /* Initialising current level - this is defined as indentation level
373      * in the low 12 bits, with flag bits in the upper four bits.
374      * It looks like two indentation states are maintained in the returned
375      * 32bit value - 'nextLevel' in the most-significant bits, 'currentLevel'
376      * in the least-significant bits. Since the next level is the most
377      * up to date, this must refer to the current state of indentation.
378      * So the code bitshifts the old current level out of existence to
379      * get at the actual current state of indentation
380      * Based on the LexerCPP.cxx line 958 comment */
381     int currentLevel = SC_FOLDLEVELBASE;
382     if (currentLine > 0)
383         currentLevel = styler.LevelAt(currentLine - 1) >> 16;
384     int nextLevel = currentLevel;
385 
386     // Looping for characters in range
387     for (Sci_PositionU i = safeStartPos; i < startPos + length; ++i)
388     {
389         /* Folding occurs after syntax highlighting, meaning Scintilla
390          * already knows where the comments are
391          * Fetching the current state */
392         int state = styler.StyleAt(i) & 31;
393 
394         switch( styler.SafeGetCharAt(i) )
395         {
396             case '{':
397 
398                 /* Indenting only when the braces are not contained in
399                  * a comment */
400                 if (state != SCE_KVIRC_COMMENT &&
401                     state != SCE_KVIRC_COMMENTBLOCK)
402                     ++nextLevel;
403                 break;
404 
405             case '}':
406 
407                 /* Outdenting only when the braces are not contained in
408                  * a comment */
409                 if (state != SCE_KVIRC_COMMENT &&
410                     state != SCE_KVIRC_COMMENTBLOCK)
411                     --nextLevel;
412                 break;
413 
414             case '\n':
415             case '\r':
416 
417                 /* Preparing indentation information to return - combining
418                  * current and next level data */
419                 int lev = currentLevel | nextLevel << 16;
420 
421                 /* If the next level increases the indent level, mark the
422                  * current line as a fold point - current level data is
423                  * in the least significant bits */
424                 if (nextLevel > currentLevel )
425                     lev |= SC_FOLDLEVELHEADERFLAG;
426 
427                 /* Updating indentation level if needed */
428                 if (lev != styler.LevelAt(currentLine))
429                     styler.SetLevel(currentLine, lev);
430 
431                 /* Updating variables */
432                 ++currentLine;
433                 currentLevel = nextLevel;
434 
435                 /* Dealing with problematic Windows newlines -
436                  * incrementing to avoid the extra newline breaking the
437                  * fold point */
438                 if (styler.SafeGetCharAt(i) == '\r' &&
439                     styler.SafeGetCharAt(i + 1) == '\n')
440                     ++i;
441                 break;
442         }
443     }
444 
445     /* At this point the data has ended, so presumably the end of the line?
446      * Preparing indentation information to return - combining current
447      * and next level data */
448     int lev = currentLevel | nextLevel << 16;
449 
450     /* If the next level increases the indent level, mark the current
451      * line as a fold point - current level data is in the least
452      * significant bits */
453     if (nextLevel > currentLevel )
454         lev |= SC_FOLDLEVELHEADERFLAG;
455 
456     /* Updating indentation level if needed */
457     if (lev != styler.LevelAt(currentLine))
458         styler.SetLevel(currentLine, lev);
459 }
460 
461 /* Registering wordlists */
462 static const char *const kvircWordListDesc[] = {
463 	"primary",
464 	"function_keywords",
465 	0
466 };
467 
468 
469 /* Registering functions and wordlists */
470 LexerModule lmKVIrc(SCLEX_KVIRC, ColouriseKVIrcDoc, "kvirc", FoldKVIrcDoc,
471                     kvircWordListDesc);
472