1 // Scintilla source code edit control
2 /** @file LexA68k.cxx
3  ** Lexer for Assembler, just for the MASM syntax
4  ** Written by Martial Demolins AKA Folco
5  **/
6 // Copyright 2010 Martial Demolins <mdemolins(a)gmail.com>
7 // The License.txt file describes the conditions under which this software
8 // may be distributed.
9 
10 
11 #include <stdlib.h>
12 #include <string.h>
13 #include <stdio.h>
14 #include <stdarg.h>
15 #include <assert.h>
16 #include <ctype.h>
17 
18 #include "ILexer.h"
19 #include "Scintilla.h"
20 #include "SciLexer.h"
21 
22 #include "WordList.h"
23 #include "LexAccessor.h"
24 #include "Accessor.h"
25 #include "StyleContext.h"
26 #include "CharacterSet.h"
27 #include "LexerModule.h"
28 
29 using namespace Scintilla;
30 
31 
32 // Return values for GetOperatorType
33 #define NO_OPERATOR     0
34 #define OPERATOR_1CHAR  1
35 #define OPERATOR_2CHAR  2
36 
37 
38 /**
39  *  IsIdentifierStart
40  *
41  *  Return true if the given char is a valid identifier first char
42  */
43 
IsIdentifierStart(const int ch)44 static inline bool IsIdentifierStart (const int ch)
45 {
46     return (isalpha(ch) || (ch == '_') || (ch == '\\'));
47 }
48 
49 
50 /**
51  *  IsIdentifierChar
52  *
53  *  Return true if the given char is a valid identifier char
54  */
55 
IsIdentifierChar(const int ch)56 static inline bool IsIdentifierChar (const int ch)
57 {
58     return (isalnum(ch) || (ch == '_') || (ch == '@') || (ch == ':') || (ch == '.'));
59 }
60 
61 
62 /**
63  *  GetOperatorType
64  *
65  *  Return:
66  *  NO_OPERATOR     if char is not an operator
67  *  OPERATOR_1CHAR  if the operator is one char long
68  *  OPERATOR_2CHAR  if the operator is two chars long
69  */
70 
GetOperatorType(const int ch1,const int ch2)71 static inline int GetOperatorType (const int ch1, const int ch2)
72 {
73     int OpType = NO_OPERATOR;
74 
75     if ((ch1 == '+') || (ch1 == '-') || (ch1 == '*') || (ch1 == '/') || (ch1 == '#') ||
76         (ch1 == '(') || (ch1 == ')') || (ch1 == '~') || (ch1 == '&') || (ch1 == '|') || (ch1 == ','))
77         OpType = OPERATOR_1CHAR;
78 
79     else if ((ch1 == ch2) && (ch1 == '<' || ch1 == '>'))
80         OpType = OPERATOR_2CHAR;
81 
82     return OpType;
83 }
84 
85 
86 /**
87  *  IsBin
88  *
89  *  Return true if the given char is 0 or 1
90  */
91 
IsBin(const int ch)92 static inline bool IsBin (const int ch)
93 {
94     return (ch == '0') || (ch == '1');
95 }
96 
97 
98 /**
99  *  IsDoxygenChar
100  *
101  *  Return true if the char may be part of a Doxygen keyword
102  */
103 
IsDoxygenChar(const int ch)104 static inline bool IsDoxygenChar (const int ch)
105 {
106     return isalpha(ch) || (ch == '$') || (ch == '[') || (ch == ']') || (ch == '{') || (ch == '}');
107 }
108 
109 
110 /**
111  *  ColouriseA68kDoc
112  *
113  *  Main function, which colourises a 68k source
114  */
115 
ColouriseA68kDoc(Sci_PositionU startPos,Sci_Position length,int initStyle,WordList * keywordlists[],Accessor & styler)116 static void ColouriseA68kDoc (Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[], Accessor &styler)
117 {
118     // Used to buffer a string, to be able to compare it using built-in functions
119     char Buffer[100];
120 
121 
122     // Used to know the length of an operator
123     int OpType;
124 
125 
126     // Get references to keywords lists
127     WordList &cpuInstruction = *keywordlists[0];
128     WordList &registers = *keywordlists[1];
129     WordList &directive = *keywordlists[2];
130     WordList &extInstruction = *keywordlists[3];
131     WordList &alert          = *keywordlists[4];
132     WordList &doxygenKeyword = *keywordlists[5];
133 
134 
135     // Instanciate a context for our source
136     StyleContext sc(startPos, length, initStyle, styler);
137 
138 
139     /************************************************************
140     *
141     *   Parse the source
142     *
143     ************************************************************/
144 
145     for ( ; sc.More(); sc.Forward())
146     {
147         /************************************************************
148         *
149         *   A style always terminates at the end of a line, even for
150         *   comments (no multi-lines comments)
151         *
152         ************************************************************/
153         if (sc.atLineStart) {
154             sc.SetState(SCE_A68K_DEFAULT);
155         }
156 
157 
158         /************************************************************
159         *
160         *   If we are not in "default style", check if the style continues
161         *   In this case, we just have to loop
162         *
163         ************************************************************/
164 
165         if (sc.state != SCE_A68K_DEFAULT)
166         {
167             if (   ((sc.state == SCE_A68K_NUMBER_DEC)        && isdigit(sc.ch))                      // Decimal number
168                 || ((sc.state == SCE_A68K_NUMBER_BIN) && IsBin(sc.ch))                                      // Binary number
169                 || ((sc.state == SCE_A68K_NUMBER_HEX) && isxdigit(sc.ch))                                   // Hexa number
170                 || ((sc.state == SCE_A68K_MACRO_ARG)         && isdigit(sc.ch))                      // Macro argument
171                 || ((sc.state == SCE_A68K_STRING1)    && (sc.ch != '\''))                                   // String single-quoted
172                 || ((sc.state == SCE_A68K_STRING2)    && (sc.ch != '\"'))                                   // String double-quoted
173                 || ((sc.state == SCE_A68K_MACRO_DECLARATION) && IsIdentifierChar(sc.ch))             // Macro declaration (or global label, we don't know at this point)
174                 || ((sc.state == SCE_A68K_IDENTIFIER)        && IsIdentifierChar(sc.ch))             // Identifier
175                 || ((sc.state == SCE_A68K_LABEL)             && IsIdentifierChar(sc.ch))             // Label (local)
176                 || ((sc.state == SCE_A68K_COMMENT_DOXYGEN)   && IsDoxygenChar(sc.ch))                // Doxygen keyword
177                 || ((sc.state == SCE_A68K_COMMENT_SPECIAL)   && isalpha(sc.ch))                      // Alert
178                 || ((sc.state == SCE_A68K_COMMENT)           && !isalpha(sc.ch) && (sc.ch != '\\'))) // Normal comment
179             {
180                 continue;
181             }
182 
183         /************************************************************
184         *
185         *   Check if current state terminates
186         *
187         ************************************************************/
188 
189             // Strings: include terminal ' or " in the current string by skipping it
190             if ((sc.state == SCE_A68K_STRING1) || (sc.state == SCE_A68K_STRING2)) {
191                 sc.Forward();
192                 }
193 
194 
195             // If a macro declaration was terminated with ':', it was a label
196             else if ((sc.state == SCE_A68K_MACRO_DECLARATION) && (sc.chPrev == ':')) {
197                 sc.ChangeState(SCE_A68K_LABEL);
198             }
199 
200 
201             // If it wasn't a Doxygen keyword, change it to normal comment
202             else if (sc.state == SCE_A68K_COMMENT_DOXYGEN) {
203                 sc.GetCurrent(Buffer, sizeof(Buffer));
204                 if (!doxygenKeyword.InList(Buffer)) {
205                     sc.ChangeState(SCE_A68K_COMMENT);
206                 }
207                 sc.SetState(SCE_A68K_COMMENT);
208                 continue;
209             }
210 
211 
212             // If it wasn't an Alert, change it to normal comment
213             else if (sc.state == SCE_A68K_COMMENT_SPECIAL) {
214                 sc.GetCurrent(Buffer, sizeof(Buffer));
215                 if (!alert.InList(Buffer)) {
216                     sc.ChangeState(SCE_A68K_COMMENT);
217                 }
218                 // Reset style to normal comment, or to Doxygen keyword if it begins with '\'
219                 if (sc.ch == '\\') {
220                     sc.SetState(SCE_A68K_COMMENT_DOXYGEN);
221                 }
222                 else {
223                 sc.SetState(SCE_A68K_COMMENT);
224                 }
225                 continue;
226             }
227 
228 
229             // If we are in a comment, it's a Doxygen keyword or an Alert
230             else if (sc.state == SCE_A68K_COMMENT) {
231                 if (sc.ch == '\\') {
232                     sc.SetState(SCE_A68K_COMMENT_DOXYGEN);
233                 }
234                 else {
235                     sc.SetState(SCE_A68K_COMMENT_SPECIAL);
236                 }
237                 continue;
238             }
239 
240 
241             // Check if we are at the end of an identifier
242             // In this case, colourise it if was a keyword.
243             else if ((sc.state == SCE_A68K_IDENTIFIER) && !IsIdentifierChar(sc.ch)) {
244                 sc.GetCurrentLowered(Buffer, sizeof(Buffer));                           // Buffer the string of the current context
245                 if (cpuInstruction.InList(Buffer)) {                                    // And check if it belongs to a keyword list
246                     sc.ChangeState(SCE_A68K_CPUINSTRUCTION);
247                 }
248                 else if (extInstruction.InList(Buffer)) {
249                     sc.ChangeState(SCE_A68K_EXTINSTRUCTION);
250                 }
251                 else if (registers.InList(Buffer)) {
252                     sc.ChangeState(SCE_A68K_REGISTER);
253                 }
254                 else if (directive.InList(Buffer)) {
255                     sc.ChangeState(SCE_A68K_DIRECTIVE);
256                 }
257             }
258 
259             // All special contexts are now handled.Come back to default style
260             sc.SetState(SCE_A68K_DEFAULT);
261         }
262 
263 
264         /************************************************************
265         *
266         *   Check if we must enter a new state
267         *
268         ************************************************************/
269 
270         // Something which begins at the beginning of a line, and with
271         // - '\' + an identifier start char, or
272         // - '\\@' + an identifier start char
273         // is a local label (second case is used for macro local labels). We set it already as a label, it can't be a macro/equ declaration
274         if (sc.atLineStart && (sc.ch < 0x80) && IsIdentifierStart(sc.chNext) && (sc.ch == '\\')) {
275             sc.SetState(SCE_A68K_LABEL);
276         }
277 
278         if (sc.atLineStart && (sc.ch < 0x80) && (sc.ch == '\\') && (sc.chNext == '\\')) {
279             sc.Forward(2);
280             if ((sc.ch == '@') && IsIdentifierStart(sc.chNext)) {
281                 sc.ChangeState(SCE_A68K_LABEL);
282                 sc.SetState(SCE_A68K_LABEL);
283             }
284         }
285 
286         // Label and macro identifiers start at the beginning of a line
287         // We set both as a macro id, but if it wasn't one (':' at the end),
288         // it will be changed as a label.
289         if (sc.atLineStart && (sc.ch < 0x80) && IsIdentifierStart(sc.ch)) {
290             sc.SetState(SCE_A68K_MACRO_DECLARATION);
291         }
292         else if ((sc.ch < 0x80) && (sc.ch == ';')) {                            // Default: alert in a comment. If it doesn't match
293             sc.SetState(SCE_A68K_COMMENT);                                      // with an alert, it will be toggle to a normal comment
294         }
295         else if ((sc.ch < 0x80) && isdigit(sc.ch)) {                            // Decimal numbers haven't prefix
296             sc.SetState(SCE_A68K_NUMBER_DEC);
297         }
298         else if ((sc.ch < 0x80) && (sc.ch == '%')) {                            // Binary numbers are prefixed with '%'
299             sc.SetState(SCE_A68K_NUMBER_BIN);
300         }
301         else if ((sc.ch < 0x80) && (sc.ch == '$')) {                            // Hexadecimal numbers are prefixed with '$'
302             sc.SetState(SCE_A68K_NUMBER_HEX);
303         }
304         else if ((sc.ch < 0x80) && (sc.ch == '\'')) {                           // String (single-quoted)
305             sc.SetState(SCE_A68K_STRING1);
306         }
307         else if ((sc.ch < 0x80) && (sc.ch == '\"')) {                           // String (double-quoted)
308             sc.SetState(SCE_A68K_STRING2);
309         }
310         else if ((sc.ch < 0x80) && (sc.ch == '\\') && (isdigit(sc.chNext))) {   // Replacement symbols in macro are prefixed with '\'
311             sc.SetState(SCE_A68K_MACRO_ARG);
312         }
313         else if ((sc.ch < 0x80) && IsIdentifierStart(sc.ch)) {                  // An identifier: constant, label, etc...
314             sc.SetState(SCE_A68K_IDENTIFIER);
315         }
316         else {
317             if (sc.ch < 0x80) {
318                 OpType = GetOperatorType(sc.ch, sc.chNext);                     // Check if current char is an operator
319                 if (OpType != NO_OPERATOR) {
320                     sc.SetState(SCE_A68K_OPERATOR);
321                     if (OpType == OPERATOR_2CHAR) {                             // Check if the operator is 2 bytes long
322                         sc.ForwardSetState(SCE_A68K_OPERATOR);                  // (>> or <<)
323                     }
324                 }
325             }
326         }
327     }                                                                           // End of for()
328     sc.Complete();
329 }
330 
331 
332 // Names of the keyword lists
333 
334 static const char * const a68kWordListDesc[] =
335 {
336     "CPU instructions",
337     "Registers",
338     "Directives",
339     "Extended instructions",
340     "Comment special words",
341     "Doxygen keywords",
342     0
343 };
344 
345 LexerModule lmA68k(SCLEX_A68K, ColouriseA68kDoc, "a68k", 0, a68kWordListDesc);
346