1 // Scintilla source code edit control
2 /** @file LexA68k.cxx
3  ** Lexer for Assembler, just for the MASM syntax
4  ** Written by Martial Demolins AKA Folco
5  **/
6 // Copyright 2010 Martial Demolins <mdemolins(a)gmail.com>
7 // The License.txt file describes the conditions under which this software
8 // may be distributed.
9 
10 
11 #include <stdlib.h>
12 #include <string.h>
13 #include <stdio.h>
14 #include <stdarg.h>
15 #include <assert.h>
16 #include <ctype.h>
17 
18 #include "ILexer.h"
19 #include "Scintilla.h"
20 #include "SciLexer.h"
21 
22 #include "WordList.h"
23 #include "LexAccessor.h"
24 #include "Accessor.h"
25 #include "StyleContext.h"
26 #include "CharacterSet.h"
27 #include "LexerModule.h"
28 
29 #ifdef SCI_NAMESPACE
30 using namespace Scintilla;
31 #endif
32 
33 
34 // Return values for GetOperatorType
35 #define NO_OPERATOR     0
36 #define OPERATOR_1CHAR  1
37 #define OPERATOR_2CHAR  2
38 
39 
40 /**
41  *  IsIdentifierStart
42  *
43  *  Return true if the given char is a valid identifier first char
44  */
45 
IsIdentifierStart(const int ch)46 static inline bool IsIdentifierStart (const int ch)
47 {
48     return (isalpha(ch) || (ch == '_') || (ch == '\\'));
49 }
50 
51 
52 /**
53  *  IsIdentifierChar
54  *
55  *  Return true if the given char is a valid identifier char
56  */
57 
IsIdentifierChar(const int ch)58 static inline bool IsIdentifierChar (const int ch)
59 {
60     return (isalnum(ch) || (ch == '_') || (ch == '@') || (ch == ':') || (ch == '.'));
61 }
62 
63 
64 /**
65  *  GetOperatorType
66  *
67  *  Return:
68  *  NO_OPERATOR     if char is not an operator
69  *  OPERATOR_1CHAR  if the operator is one char long
70  *  OPERATOR_2CHAR  if the operator is two chars long
71  */
72 
GetOperatorType(const int ch1,const int ch2)73 static inline int GetOperatorType (const int ch1, const int ch2)
74 {
75     int OpType = NO_OPERATOR;
76 
77     if ((ch1 == '+') || (ch1 == '-') || (ch1 == '*') || (ch1 == '/') || (ch1 == '#') ||
78         (ch1 == '(') || (ch1 == ')') || (ch1 == '~') || (ch1 == '&') || (ch1 == '|') || (ch1 == ','))
79         OpType = OPERATOR_1CHAR;
80 
81     else if ((ch1 == ch2) && (ch1 == '<' || ch1 == '>'))
82         OpType = OPERATOR_2CHAR;
83 
84     return OpType;
85 }
86 
87 
88 /**
89  *  IsBin
90  *
91  *  Return true if the given char is 0 or 1
92  */
93 
IsBin(const int ch)94 static inline bool IsBin (const int ch)
95 {
96     return (ch == '0') || (ch == '1');
97 }
98 
99 
100 /**
101  *  IsDoxygenChar
102  *
103  *  Return true if the char may be part of a Doxygen keyword
104  */
105 
IsDoxygenChar(const int ch)106 static inline bool IsDoxygenChar (const int ch)
107 {
108     return isalpha(ch) || (ch == '$') || (ch == '[') || (ch == ']') || (ch == '{') || (ch == '}');
109 }
110 
111 
112 /**
113  *  ColouriseA68kDoc
114  *
115  *  Main function, which colourises a 68k source
116  */
117 
ColouriseA68kDoc(Sci_PositionU startPos,Sci_Position length,int initStyle,WordList * keywordlists[],Accessor & styler)118 static void ColouriseA68kDoc (Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[], Accessor &styler)
119 {
120     // Used to buffer a string, to be able to compare it using built-in functions
121     char Buffer[100];
122 
123 
124     // Used to know the length of an operator
125     int OpType;
126 
127 
128     // Get references to keywords lists
129     WordList &cpuInstruction = *keywordlists[0];
130     WordList &registers = *keywordlists[1];
131     WordList &directive = *keywordlists[2];
132     WordList &extInstruction = *keywordlists[3];
133     WordList &alert          = *keywordlists[4];
134     WordList &doxygenKeyword = *keywordlists[5];
135 
136 
137     // Instanciate a context for our source
138     StyleContext sc(startPos, length, initStyle, styler);
139 
140 
141     /************************************************************
142     *
143     *   Parse the source
144     *
145     ************************************************************/
146 
147     for ( ; sc.More(); sc.Forward())
148     {
149         /************************************************************
150         *
151         *   A style always terminates at the end of a line, even for
152         *   comments (no multi-lines comments)
153         *
154         ************************************************************/
155         if (sc.atLineStart) {
156             sc.SetState(SCE_A68K_DEFAULT);
157         }
158 
159 
160         /************************************************************
161         *
162         *   If we are not in "default style", check if the style continues
163         *   In this case, we just have to loop
164         *
165         ************************************************************/
166 
167         if (sc.state != SCE_A68K_DEFAULT)
168         {
169             if (   ((sc.state == SCE_A68K_NUMBER_DEC)        && isdigit(sc.ch))                      // Decimal number
170                 || ((sc.state == SCE_A68K_NUMBER_BIN) && IsBin(sc.ch))                                      // Binary number
171                 || ((sc.state == SCE_A68K_NUMBER_HEX) && isxdigit(sc.ch))                                   // Hexa number
172                 || ((sc.state == SCE_A68K_MACRO_ARG)         && isdigit(sc.ch))                      // Macro argument
173                 || ((sc.state == SCE_A68K_STRING1)    && (sc.ch != '\''))                                   // String single-quoted
174                 || ((sc.state == SCE_A68K_STRING2)    && (sc.ch != '\"'))                                   // String double-quoted
175                 || ((sc.state == SCE_A68K_MACRO_DECLARATION) && IsIdentifierChar(sc.ch))             // Macro declaration (or global label, we don't know at this point)
176                 || ((sc.state == SCE_A68K_IDENTIFIER)        && IsIdentifierChar(sc.ch))             // Identifier
177                 || ((sc.state == SCE_A68K_LABEL)             && IsIdentifierChar(sc.ch))             // Label (local)
178                 || ((sc.state == SCE_A68K_COMMENT_DOXYGEN)   && IsDoxygenChar(sc.ch))                // Doxygen keyword
179                 || ((sc.state == SCE_A68K_COMMENT_SPECIAL)   && isalpha(sc.ch))                      // Alert
180                 || ((sc.state == SCE_A68K_COMMENT)           && !isalpha(sc.ch) && (sc.ch != '\\'))) // Normal comment
181             {
182                 continue;
183             }
184 
185         /************************************************************
186         *
187         *   Check if current state terminates
188         *
189         ************************************************************/
190 
191             // Strings: include terminal ' or " in the current string by skipping it
192             if ((sc.state == SCE_A68K_STRING1) || (sc.state == SCE_A68K_STRING2)) {
193                 sc.Forward();
194                 }
195 
196 
197             // If a macro declaration was terminated with ':', it was a label
198             else if ((sc.state == SCE_A68K_MACRO_DECLARATION) && (sc.chPrev == ':')) {
199                 sc.ChangeState(SCE_A68K_LABEL);
200             }
201 
202 
203             // If it wasn't a Doxygen keyword, change it to normal comment
204             else if (sc.state == SCE_A68K_COMMENT_DOXYGEN) {
205                 sc.GetCurrent(Buffer, sizeof(Buffer));
206                 if (!doxygenKeyword.InList(Buffer)) {
207                     sc.ChangeState(SCE_A68K_COMMENT);
208                 }
209                 sc.SetState(SCE_A68K_COMMENT);
210                 continue;
211             }
212 
213 
214             // If it wasn't an Alert, change it to normal comment
215             else if (sc.state == SCE_A68K_COMMENT_SPECIAL) {
216                 sc.GetCurrent(Buffer, sizeof(Buffer));
217                 if (!alert.InList(Buffer)) {
218                     sc.ChangeState(SCE_A68K_COMMENT);
219                 }
220                 // Reset style to normal comment, or to Doxygen keyword if it begins with '\'
221                 if (sc.ch == '\\') {
222                     sc.SetState(SCE_A68K_COMMENT_DOXYGEN);
223                 }
224                 else {
225                 sc.SetState(SCE_A68K_COMMENT);
226                 }
227                 continue;
228             }
229 
230 
231             // If we are in a comment, it's a Doxygen keyword or an Alert
232             else if (sc.state == SCE_A68K_COMMENT) {
233                 if (sc.ch == '\\') {
234                     sc.SetState(SCE_A68K_COMMENT_DOXYGEN);
235                 }
236                 else {
237                     sc.SetState(SCE_A68K_COMMENT_SPECIAL);
238                 }
239                 continue;
240             }
241 
242 
243             // Check if we are at the end of an identifier
244             // In this case, colourise it if was a keyword.
245             else if ((sc.state == SCE_A68K_IDENTIFIER) && !IsIdentifierChar(sc.ch)) {
246                 sc.GetCurrentLowered(Buffer, sizeof(Buffer));                           // Buffer the string of the current context
247                 if (cpuInstruction.InList(Buffer)) {                                    // And check if it belongs to a keyword list
248                     sc.ChangeState(SCE_A68K_CPUINSTRUCTION);
249                 }
250                 else if (extInstruction.InList(Buffer)) {
251                     sc.ChangeState(SCE_A68K_EXTINSTRUCTION);
252                 }
253                 else if (registers.InList(Buffer)) {
254                     sc.ChangeState(SCE_A68K_REGISTER);
255                 }
256                 else if (directive.InList(Buffer)) {
257                     sc.ChangeState(SCE_A68K_DIRECTIVE);
258                 }
259             }
260 
261             // All special contexts are now handled.Come back to default style
262             sc.SetState(SCE_A68K_DEFAULT);
263         }
264 
265 
266         /************************************************************
267         *
268         *   Check if we must enter a new state
269         *
270         ************************************************************/
271 
272         // Something which begins at the beginning of a line, and with
273         // - '\' + an identifier start char, or
274         // - '\\@' + an identifier start char
275         // is a local label (second case is used for macro local labels). We set it already as a label, it can't be a macro/equ declaration
276         if (sc.atLineStart && (sc.ch < 0x80) && IsIdentifierStart(sc.chNext) && (sc.ch == '\\')) {
277             sc.SetState(SCE_A68K_LABEL);
278         }
279 
280         if (sc.atLineStart && (sc.ch < 0x80) && (sc.ch == '\\') && (sc.chNext == '\\')) {
281             sc.Forward(2);
282             if ((sc.ch == '@') && IsIdentifierStart(sc.chNext)) {
283                 sc.ChangeState(SCE_A68K_LABEL);
284                 sc.SetState(SCE_A68K_LABEL);
285             }
286         }
287 
288         // Label and macro identifiers start at the beginning of a line
289         // We set both as a macro id, but if it wasn't one (':' at the end),
290         // it will be changed as a label.
291         if (sc.atLineStart && (sc.ch < 0x80) && IsIdentifierStart(sc.ch)) {
292             sc.SetState(SCE_A68K_MACRO_DECLARATION);
293         }
294         else if ((sc.ch < 0x80) && (sc.ch == ';')) {                            // Default: alert in a comment. If it doesn't match
295             sc.SetState(SCE_A68K_COMMENT);                                      // with an alert, it will be toggle to a normal comment
296         }
297         else if ((sc.ch < 0x80) && isdigit(sc.ch)) {                            // Decimal numbers haven't prefix
298             sc.SetState(SCE_A68K_NUMBER_DEC);
299         }
300         else if ((sc.ch < 0x80) && (sc.ch == '%')) {                            // Binary numbers are prefixed with '%'
301             sc.SetState(SCE_A68K_NUMBER_BIN);
302         }
303         else if ((sc.ch < 0x80) && (sc.ch == '$')) {                            // Hexadecimal numbers are prefixed with '$'
304             sc.SetState(SCE_A68K_NUMBER_HEX);
305         }
306         else if ((sc.ch < 0x80) && (sc.ch == '\'')) {                           // String (single-quoted)
307             sc.SetState(SCE_A68K_STRING1);
308         }
309         else if ((sc.ch < 0x80) && (sc.ch == '\"')) {                           // String (double-quoted)
310             sc.SetState(SCE_A68K_STRING2);
311         }
312         else if ((sc.ch < 0x80) && (sc.ch == '\\') && (isdigit(sc.chNext))) {   // Replacement symbols in macro are prefixed with '\'
313             sc.SetState(SCE_A68K_MACRO_ARG);
314         }
315         else if ((sc.ch < 0x80) && IsIdentifierStart(sc.ch)) {                  // An identifier: constant, label, etc...
316             sc.SetState(SCE_A68K_IDENTIFIER);
317         }
318         else {
319             if (sc.ch < 0x80) {
320                 OpType = GetOperatorType(sc.ch, sc.chNext);                     // Check if current char is an operator
321                 if (OpType != NO_OPERATOR) {
322                     sc.SetState(SCE_A68K_OPERATOR);
323                     if (OpType == OPERATOR_2CHAR) {                             // Check if the operator is 2 bytes long
324                         sc.ForwardSetState(SCE_A68K_OPERATOR);                  // (>> or <<)
325                     }
326                 }
327             }
328         }
329     }                                                                           // End of for()
330     sc.Complete();
331 }
332 
333 
334 // Names of the keyword lists
335 
336 static const char * const a68kWordListDesc[] =
337 {
338     "CPU instructions",
339     "Registers",
340     "Directives",
341     "Extended instructions",
342     "Comment special words",
343     "Doxygen keywords",
344     0
345 };
346 
347 LexerModule lmA68k(SCLEX_A68K, ColouriseA68kDoc, "a68k", 0, a68kWordListDesc);
348