1 // Scintilla source code edit control
2 /** @file LexA68k.cxx
3 ** Lexer for Assembler, just for the MASM syntax
4 ** Written by Martial Demolins AKA Folco
5 **/
6 // Copyright 2010 Martial Demolins <mdemolins(a)gmail.com>
7 // The License.txt file describes the conditions under which this software
8 // may be distributed.
9
10
11 #include <stdlib.h>
12 #include <string.h>
13 #include <stdio.h>
14 #include <stdarg.h>
15 #include <assert.h>
16 #include <ctype.h>
17
18 #include "ILexer.h"
19 #include "Scintilla.h"
20 #include "SciLexer.h"
21
22 #include "WordList.h"
23 #include "LexAccessor.h"
24 #include "Accessor.h"
25 #include "StyleContext.h"
26 #include "CharacterSet.h"
27 #include "LexerModule.h"
28
29 #ifdef SCI_NAMESPACE
30 using namespace Scintilla;
31 #endif
32
33
34 // Return values for GetOperatorType
35 #define NO_OPERATOR 0
36 #define OPERATOR_1CHAR 1
37 #define OPERATOR_2CHAR 2
38
39
40 /**
41 * IsIdentifierStart
42 *
43 * Return true if the given char is a valid identifier first char
44 */
45
IsIdentifierStart(const int ch)46 static inline bool IsIdentifierStart (const int ch)
47 {
48 return (isalpha(ch) || (ch == '_') || (ch == '\\'));
49 }
50
51
52 /**
53 * IsIdentifierChar
54 *
55 * Return true if the given char is a valid identifier char
56 */
57
IsIdentifierChar(const int ch)58 static inline bool IsIdentifierChar (const int ch)
59 {
60 return (isalnum(ch) || (ch == '_') || (ch == '@') || (ch == ':') || (ch == '.'));
61 }
62
63
64 /**
65 * GetOperatorType
66 *
67 * Return:
68 * NO_OPERATOR if char is not an operator
69 * OPERATOR_1CHAR if the operator is one char long
70 * OPERATOR_2CHAR if the operator is two chars long
71 */
72
GetOperatorType(const int ch1,const int ch2)73 static inline int GetOperatorType (const int ch1, const int ch2)
74 {
75 int OpType = NO_OPERATOR;
76
77 if ((ch1 == '+') || (ch1 == '-') || (ch1 == '*') || (ch1 == '/') || (ch1 == '#') ||
78 (ch1 == '(') || (ch1 == ')') || (ch1 == '~') || (ch1 == '&') || (ch1 == '|') || (ch1 == ','))
79 OpType = OPERATOR_1CHAR;
80
81 else if ((ch1 == ch2) && (ch1 == '<' || ch1 == '>'))
82 OpType = OPERATOR_2CHAR;
83
84 return OpType;
85 }
86
87
88 /**
89 * IsBin
90 *
91 * Return true if the given char is 0 or 1
92 */
93
IsBin(const int ch)94 static inline bool IsBin (const int ch)
95 {
96 return (ch == '0') || (ch == '1');
97 }
98
99
100 /**
101 * IsDoxygenChar
102 *
103 * Return true if the char may be part of a Doxygen keyword
104 */
105
IsDoxygenChar(const int ch)106 static inline bool IsDoxygenChar (const int ch)
107 {
108 return isalpha(ch) || (ch == '$') || (ch == '[') || (ch == ']') || (ch == '{') || (ch == '}');
109 }
110
111
112 /**
113 * ColouriseA68kDoc
114 *
115 * Main function, which colourises a 68k source
116 */
117
ColouriseA68kDoc(Sci_PositionU startPos,Sci_Position length,int initStyle,WordList * keywordlists[],Accessor & styler)118 static void ColouriseA68kDoc (Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[], Accessor &styler)
119 {
120 // Used to buffer a string, to be able to compare it using built-in functions
121 char Buffer[100];
122
123
124 // Used to know the length of an operator
125 int OpType;
126
127
128 // Get references to keywords lists
129 WordList &cpuInstruction = *keywordlists[0];
130 WordList ®isters = *keywordlists[1];
131 WordList &directive = *keywordlists[2];
132 WordList &extInstruction = *keywordlists[3];
133 WordList &alert = *keywordlists[4];
134 WordList &doxygenKeyword = *keywordlists[5];
135
136
137 // Instanciate a context for our source
138 StyleContext sc(startPos, length, initStyle, styler);
139
140
141 /************************************************************
142 *
143 * Parse the source
144 *
145 ************************************************************/
146
147 for ( ; sc.More(); sc.Forward())
148 {
149 /************************************************************
150 *
151 * A style always terminates at the end of a line, even for
152 * comments (no multi-lines comments)
153 *
154 ************************************************************/
155 if (sc.atLineStart) {
156 sc.SetState(SCE_A68K_DEFAULT);
157 }
158
159
160 /************************************************************
161 *
162 * If we are not in "default style", check if the style continues
163 * In this case, we just have to loop
164 *
165 ************************************************************/
166
167 if (sc.state != SCE_A68K_DEFAULT)
168 {
169 if ( ((sc.state == SCE_A68K_NUMBER_DEC) && isdigit(sc.ch)) // Decimal number
170 || ((sc.state == SCE_A68K_NUMBER_BIN) && IsBin(sc.ch)) // Binary number
171 || ((sc.state == SCE_A68K_NUMBER_HEX) && isxdigit(sc.ch)) // Hexa number
172 || ((sc.state == SCE_A68K_MACRO_ARG) && isdigit(sc.ch)) // Macro argument
173 || ((sc.state == SCE_A68K_STRING1) && (sc.ch != '\'')) // String single-quoted
174 || ((sc.state == SCE_A68K_STRING2) && (sc.ch != '\"')) // String double-quoted
175 || ((sc.state == SCE_A68K_MACRO_DECLARATION) && IsIdentifierChar(sc.ch)) // Macro declaration (or global label, we don't know at this point)
176 || ((sc.state == SCE_A68K_IDENTIFIER) && IsIdentifierChar(sc.ch)) // Identifier
177 || ((sc.state == SCE_A68K_LABEL) && IsIdentifierChar(sc.ch)) // Label (local)
178 || ((sc.state == SCE_A68K_COMMENT_DOXYGEN) && IsDoxygenChar(sc.ch)) // Doxygen keyword
179 || ((sc.state == SCE_A68K_COMMENT_SPECIAL) && isalpha(sc.ch)) // Alert
180 || ((sc.state == SCE_A68K_COMMENT) && !isalpha(sc.ch) && (sc.ch != '\\'))) // Normal comment
181 {
182 continue;
183 }
184
185 /************************************************************
186 *
187 * Check if current state terminates
188 *
189 ************************************************************/
190
191 // Strings: include terminal ' or " in the current string by skipping it
192 if ((sc.state == SCE_A68K_STRING1) || (sc.state == SCE_A68K_STRING2)) {
193 sc.Forward();
194 }
195
196
197 // If a macro declaration was terminated with ':', it was a label
198 else if ((sc.state == SCE_A68K_MACRO_DECLARATION) && (sc.chPrev == ':')) {
199 sc.ChangeState(SCE_A68K_LABEL);
200 }
201
202
203 // If it wasn't a Doxygen keyword, change it to normal comment
204 else if (sc.state == SCE_A68K_COMMENT_DOXYGEN) {
205 sc.GetCurrent(Buffer, sizeof(Buffer));
206 if (!doxygenKeyword.InList(Buffer)) {
207 sc.ChangeState(SCE_A68K_COMMENT);
208 }
209 sc.SetState(SCE_A68K_COMMENT);
210 continue;
211 }
212
213
214 // If it wasn't an Alert, change it to normal comment
215 else if (sc.state == SCE_A68K_COMMENT_SPECIAL) {
216 sc.GetCurrent(Buffer, sizeof(Buffer));
217 if (!alert.InList(Buffer)) {
218 sc.ChangeState(SCE_A68K_COMMENT);
219 }
220 // Reset style to normal comment, or to Doxygen keyword if it begins with '\'
221 if (sc.ch == '\\') {
222 sc.SetState(SCE_A68K_COMMENT_DOXYGEN);
223 }
224 else {
225 sc.SetState(SCE_A68K_COMMENT);
226 }
227 continue;
228 }
229
230
231 // If we are in a comment, it's a Doxygen keyword or an Alert
232 else if (sc.state == SCE_A68K_COMMENT) {
233 if (sc.ch == '\\') {
234 sc.SetState(SCE_A68K_COMMENT_DOXYGEN);
235 }
236 else {
237 sc.SetState(SCE_A68K_COMMENT_SPECIAL);
238 }
239 continue;
240 }
241
242
243 // Check if we are at the end of an identifier
244 // In this case, colourise it if was a keyword.
245 else if ((sc.state == SCE_A68K_IDENTIFIER) && !IsIdentifierChar(sc.ch)) {
246 sc.GetCurrentLowered(Buffer, sizeof(Buffer)); // Buffer the string of the current context
247 if (cpuInstruction.InList(Buffer)) { // And check if it belongs to a keyword list
248 sc.ChangeState(SCE_A68K_CPUINSTRUCTION);
249 }
250 else if (extInstruction.InList(Buffer)) {
251 sc.ChangeState(SCE_A68K_EXTINSTRUCTION);
252 }
253 else if (registers.InList(Buffer)) {
254 sc.ChangeState(SCE_A68K_REGISTER);
255 }
256 else if (directive.InList(Buffer)) {
257 sc.ChangeState(SCE_A68K_DIRECTIVE);
258 }
259 }
260
261 // All special contexts are now handled.Come back to default style
262 sc.SetState(SCE_A68K_DEFAULT);
263 }
264
265
266 /************************************************************
267 *
268 * Check if we must enter a new state
269 *
270 ************************************************************/
271
272 // Something which begins at the beginning of a line, and with
273 // - '\' + an identifier start char, or
274 // - '\\@' + an identifier start char
275 // is a local label (second case is used for macro local labels). We set it already as a label, it can't be a macro/equ declaration
276 if (sc.atLineStart && (sc.ch < 0x80) && IsIdentifierStart(sc.chNext) && (sc.ch == '\\')) {
277 sc.SetState(SCE_A68K_LABEL);
278 }
279
280 if (sc.atLineStart && (sc.ch < 0x80) && (sc.ch == '\\') && (sc.chNext == '\\')) {
281 sc.Forward(2);
282 if ((sc.ch == '@') && IsIdentifierStart(sc.chNext)) {
283 sc.ChangeState(SCE_A68K_LABEL);
284 sc.SetState(SCE_A68K_LABEL);
285 }
286 }
287
288 // Label and macro identifiers start at the beginning of a line
289 // We set both as a macro id, but if it wasn't one (':' at the end),
290 // it will be changed as a label.
291 if (sc.atLineStart && (sc.ch < 0x80) && IsIdentifierStart(sc.ch)) {
292 sc.SetState(SCE_A68K_MACRO_DECLARATION);
293 }
294 else if ((sc.ch < 0x80) && (sc.ch == ';')) { // Default: alert in a comment. If it doesn't match
295 sc.SetState(SCE_A68K_COMMENT); // with an alert, it will be toggle to a normal comment
296 }
297 else if ((sc.ch < 0x80) && isdigit(sc.ch)) { // Decimal numbers haven't prefix
298 sc.SetState(SCE_A68K_NUMBER_DEC);
299 }
300 else if ((sc.ch < 0x80) && (sc.ch == '%')) { // Binary numbers are prefixed with '%'
301 sc.SetState(SCE_A68K_NUMBER_BIN);
302 }
303 else if ((sc.ch < 0x80) && (sc.ch == '$')) { // Hexadecimal numbers are prefixed with '$'
304 sc.SetState(SCE_A68K_NUMBER_HEX);
305 }
306 else if ((sc.ch < 0x80) && (sc.ch == '\'')) { // String (single-quoted)
307 sc.SetState(SCE_A68K_STRING1);
308 }
309 else if ((sc.ch < 0x80) && (sc.ch == '\"')) { // String (double-quoted)
310 sc.SetState(SCE_A68K_STRING2);
311 }
312 else if ((sc.ch < 0x80) && (sc.ch == '\\') && (isdigit(sc.chNext))) { // Replacement symbols in macro are prefixed with '\'
313 sc.SetState(SCE_A68K_MACRO_ARG);
314 }
315 else if ((sc.ch < 0x80) && IsIdentifierStart(sc.ch)) { // An identifier: constant, label, etc...
316 sc.SetState(SCE_A68K_IDENTIFIER);
317 }
318 else {
319 if (sc.ch < 0x80) {
320 OpType = GetOperatorType(sc.ch, sc.chNext); // Check if current char is an operator
321 if (OpType != NO_OPERATOR) {
322 sc.SetState(SCE_A68K_OPERATOR);
323 if (OpType == OPERATOR_2CHAR) { // Check if the operator is 2 bytes long
324 sc.ForwardSetState(SCE_A68K_OPERATOR); // (>> or <<)
325 }
326 }
327 }
328 }
329 } // End of for()
330 sc.Complete();
331 }
332
333
334 // Names of the keyword lists
335
336 static const char * const a68kWordListDesc[] =
337 {
338 "CPU instructions",
339 "Registers",
340 "Directives",
341 "Extended instructions",
342 "Comment special words",
343 "Doxygen keywords",
344 0
345 };
346
347 LexerModule lmA68k(SCLEX_A68K, ColouriseA68kDoc, "a68k", 0, a68kWordListDesc);
348