1 // Scintilla source code edit control
2 /** @file LexA68k.cxx
3 ** Lexer for Assembler, just for the MASM syntax
4 ** Written by Martial Demolins AKA Folco
5 **/
6 // Copyright 2010 Martial Demolins <mdemolins(a)gmail.com>
7 // The License.txt file describes the conditions under which this software
8 // may be distributed.
9
10
11 #include <stdlib.h>
12 #include <string.h>
13 #include <stdio.h>
14 #include <stdarg.h>
15 #include <assert.h>
16 #include <ctype.h>
17
18 #include "ILexer.h"
19 #include "Scintilla.h"
20 #include "SciLexer.h"
21
22 #include "WordList.h"
23 #include "LexAccessor.h"
24 #include "Accessor.h"
25 #include "StyleContext.h"
26 #include "CharacterSet.h"
27 #include "LexerModule.h"
28
29 using namespace Scintilla;
30
31
32 // Return values for GetOperatorType
33 #define NO_OPERATOR 0
34 #define OPERATOR_1CHAR 1
35 #define OPERATOR_2CHAR 2
36
37
38 /**
39 * IsIdentifierStart
40 *
41 * Return true if the given char is a valid identifier first char
42 */
43
IsIdentifierStart(const int ch)44 static inline bool IsIdentifierStart (const int ch)
45 {
46 return (isalpha(ch) || (ch == '_') || (ch == '\\'));
47 }
48
49
50 /**
51 * IsIdentifierChar
52 *
53 * Return true if the given char is a valid identifier char
54 */
55
IsIdentifierChar(const int ch)56 static inline bool IsIdentifierChar (const int ch)
57 {
58 return (isalnum(ch) || (ch == '_') || (ch == '@') || (ch == ':') || (ch == '.'));
59 }
60
61
62 /**
63 * GetOperatorType
64 *
65 * Return:
66 * NO_OPERATOR if char is not an operator
67 * OPERATOR_1CHAR if the operator is one char long
68 * OPERATOR_2CHAR if the operator is two chars long
69 */
70
GetOperatorType(const int ch1,const int ch2)71 static inline int GetOperatorType (const int ch1, const int ch2)
72 {
73 int OpType = NO_OPERATOR;
74
75 if ((ch1 == '+') || (ch1 == '-') || (ch1 == '*') || (ch1 == '/') || (ch1 == '#') ||
76 (ch1 == '(') || (ch1 == ')') || (ch1 == '~') || (ch1 == '&') || (ch1 == '|') || (ch1 == ','))
77 OpType = OPERATOR_1CHAR;
78
79 else if ((ch1 == ch2) && (ch1 == '<' || ch1 == '>'))
80 OpType = OPERATOR_2CHAR;
81
82 return OpType;
83 }
84
85
86 /**
87 * IsBin
88 *
89 * Return true if the given char is 0 or 1
90 */
91
IsBin(const int ch)92 static inline bool IsBin (const int ch)
93 {
94 return (ch == '0') || (ch == '1');
95 }
96
97
98 /**
99 * IsDoxygenChar
100 *
101 * Return true if the char may be part of a Doxygen keyword
102 */
103
IsDoxygenChar(const int ch)104 static inline bool IsDoxygenChar (const int ch)
105 {
106 return isalpha(ch) || (ch == '$') || (ch == '[') || (ch == ']') || (ch == '{') || (ch == '}');
107 }
108
109
110 /**
111 * ColouriseA68kDoc
112 *
113 * Main function, which colourises a 68k source
114 */
115
ColouriseA68kDoc(Sci_PositionU startPos,Sci_Position length,int initStyle,WordList * keywordlists[],Accessor & styler)116 static void ColouriseA68kDoc (Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[], Accessor &styler)
117 {
118 // Used to buffer a string, to be able to compare it using built-in functions
119 char Buffer[100];
120
121
122 // Used to know the length of an operator
123 int OpType;
124
125
126 // Get references to keywords lists
127 WordList &cpuInstruction = *keywordlists[0];
128 WordList ®isters = *keywordlists[1];
129 WordList &directive = *keywordlists[2];
130 WordList &extInstruction = *keywordlists[3];
131 WordList &alert = *keywordlists[4];
132 WordList &doxygenKeyword = *keywordlists[5];
133
134
135 // Instanciate a context for our source
136 StyleContext sc(startPos, length, initStyle, styler);
137
138
139 /************************************************************
140 *
141 * Parse the source
142 *
143 ************************************************************/
144
145 for ( ; sc.More(); sc.Forward())
146 {
147 /************************************************************
148 *
149 * A style always terminates at the end of a line, even for
150 * comments (no multi-lines comments)
151 *
152 ************************************************************/
153 if (sc.atLineStart) {
154 sc.SetState(SCE_A68K_DEFAULT);
155 }
156
157
158 /************************************************************
159 *
160 * If we are not in "default style", check if the style continues
161 * In this case, we just have to loop
162 *
163 ************************************************************/
164
165 if (sc.state != SCE_A68K_DEFAULT)
166 {
167 if ( ((sc.state == SCE_A68K_NUMBER_DEC) && isdigit(sc.ch)) // Decimal number
168 || ((sc.state == SCE_A68K_NUMBER_BIN) && IsBin(sc.ch)) // Binary number
169 || ((sc.state == SCE_A68K_NUMBER_HEX) && isxdigit(sc.ch)) // Hexa number
170 || ((sc.state == SCE_A68K_MACRO_ARG) && isdigit(sc.ch)) // Macro argument
171 || ((sc.state == SCE_A68K_STRING1) && (sc.ch != '\'')) // String single-quoted
172 || ((sc.state == SCE_A68K_STRING2) && (sc.ch != '\"')) // String double-quoted
173 || ((sc.state == SCE_A68K_MACRO_DECLARATION) && IsIdentifierChar(sc.ch)) // Macro declaration (or global label, we don't know at this point)
174 || ((sc.state == SCE_A68K_IDENTIFIER) && IsIdentifierChar(sc.ch)) // Identifier
175 || ((sc.state == SCE_A68K_LABEL) && IsIdentifierChar(sc.ch)) // Label (local)
176 || ((sc.state == SCE_A68K_COMMENT_DOXYGEN) && IsDoxygenChar(sc.ch)) // Doxygen keyword
177 || ((sc.state == SCE_A68K_COMMENT_SPECIAL) && isalpha(sc.ch)) // Alert
178 || ((sc.state == SCE_A68K_COMMENT) && !isalpha(sc.ch) && (sc.ch != '\\'))) // Normal comment
179 {
180 continue;
181 }
182
183 /************************************************************
184 *
185 * Check if current state terminates
186 *
187 ************************************************************/
188
189 // Strings: include terminal ' or " in the current string by skipping it
190 if ((sc.state == SCE_A68K_STRING1) || (sc.state == SCE_A68K_STRING2)) {
191 sc.Forward();
192 }
193
194
195 // If a macro declaration was terminated with ':', it was a label
196 else if ((sc.state == SCE_A68K_MACRO_DECLARATION) && (sc.chPrev == ':')) {
197 sc.ChangeState(SCE_A68K_LABEL);
198 }
199
200
201 // If it wasn't a Doxygen keyword, change it to normal comment
202 else if (sc.state == SCE_A68K_COMMENT_DOXYGEN) {
203 sc.GetCurrent(Buffer, sizeof(Buffer));
204 if (!doxygenKeyword.InList(Buffer)) {
205 sc.ChangeState(SCE_A68K_COMMENT);
206 }
207 sc.SetState(SCE_A68K_COMMENT);
208 continue;
209 }
210
211
212 // If it wasn't an Alert, change it to normal comment
213 else if (sc.state == SCE_A68K_COMMENT_SPECIAL) {
214 sc.GetCurrent(Buffer, sizeof(Buffer));
215 if (!alert.InList(Buffer)) {
216 sc.ChangeState(SCE_A68K_COMMENT);
217 }
218 // Reset style to normal comment, or to Doxygen keyword if it begins with '\'
219 if (sc.ch == '\\') {
220 sc.SetState(SCE_A68K_COMMENT_DOXYGEN);
221 }
222 else {
223 sc.SetState(SCE_A68K_COMMENT);
224 }
225 continue;
226 }
227
228
229 // If we are in a comment, it's a Doxygen keyword or an Alert
230 else if (sc.state == SCE_A68K_COMMENT) {
231 if (sc.ch == '\\') {
232 sc.SetState(SCE_A68K_COMMENT_DOXYGEN);
233 }
234 else {
235 sc.SetState(SCE_A68K_COMMENT_SPECIAL);
236 }
237 continue;
238 }
239
240
241 // Check if we are at the end of an identifier
242 // In this case, colourise it if was a keyword.
243 else if ((sc.state == SCE_A68K_IDENTIFIER) && !IsIdentifierChar(sc.ch)) {
244 sc.GetCurrentLowered(Buffer, sizeof(Buffer)); // Buffer the string of the current context
245 if (cpuInstruction.InList(Buffer)) { // And check if it belongs to a keyword list
246 sc.ChangeState(SCE_A68K_CPUINSTRUCTION);
247 }
248 else if (extInstruction.InList(Buffer)) {
249 sc.ChangeState(SCE_A68K_EXTINSTRUCTION);
250 }
251 else if (registers.InList(Buffer)) {
252 sc.ChangeState(SCE_A68K_REGISTER);
253 }
254 else if (directive.InList(Buffer)) {
255 sc.ChangeState(SCE_A68K_DIRECTIVE);
256 }
257 }
258
259 // All special contexts are now handled.Come back to default style
260 sc.SetState(SCE_A68K_DEFAULT);
261 }
262
263
264 /************************************************************
265 *
266 * Check if we must enter a new state
267 *
268 ************************************************************/
269
270 // Something which begins at the beginning of a line, and with
271 // - '\' + an identifier start char, or
272 // - '\\@' + an identifier start char
273 // is a local label (second case is used for macro local labels). We set it already as a label, it can't be a macro/equ declaration
274 if (sc.atLineStart && (sc.ch < 0x80) && IsIdentifierStart(sc.chNext) && (sc.ch == '\\')) {
275 sc.SetState(SCE_A68K_LABEL);
276 }
277
278 if (sc.atLineStart && (sc.ch < 0x80) && (sc.ch == '\\') && (sc.chNext == '\\')) {
279 sc.Forward(2);
280 if ((sc.ch == '@') && IsIdentifierStart(sc.chNext)) {
281 sc.ChangeState(SCE_A68K_LABEL);
282 sc.SetState(SCE_A68K_LABEL);
283 }
284 }
285
286 // Label and macro identifiers start at the beginning of a line
287 // We set both as a macro id, but if it wasn't one (':' at the end),
288 // it will be changed as a label.
289 if (sc.atLineStart && (sc.ch < 0x80) && IsIdentifierStart(sc.ch)) {
290 sc.SetState(SCE_A68K_MACRO_DECLARATION);
291 }
292 else if ((sc.ch < 0x80) && (sc.ch == ';')) { // Default: alert in a comment. If it doesn't match
293 sc.SetState(SCE_A68K_COMMENT); // with an alert, it will be toggle to a normal comment
294 }
295 else if ((sc.ch < 0x80) && isdigit(sc.ch)) { // Decimal numbers haven't prefix
296 sc.SetState(SCE_A68K_NUMBER_DEC);
297 }
298 else if ((sc.ch < 0x80) && (sc.ch == '%')) { // Binary numbers are prefixed with '%'
299 sc.SetState(SCE_A68K_NUMBER_BIN);
300 }
301 else if ((sc.ch < 0x80) && (sc.ch == '$')) { // Hexadecimal numbers are prefixed with '$'
302 sc.SetState(SCE_A68K_NUMBER_HEX);
303 }
304 else if ((sc.ch < 0x80) && (sc.ch == '\'')) { // String (single-quoted)
305 sc.SetState(SCE_A68K_STRING1);
306 }
307 else if ((sc.ch < 0x80) && (sc.ch == '\"')) { // String (double-quoted)
308 sc.SetState(SCE_A68K_STRING2);
309 }
310 else if ((sc.ch < 0x80) && (sc.ch == '\\') && (isdigit(sc.chNext))) { // Replacement symbols in macro are prefixed with '\'
311 sc.SetState(SCE_A68K_MACRO_ARG);
312 }
313 else if ((sc.ch < 0x80) && IsIdentifierStart(sc.ch)) { // An identifier: constant, label, etc...
314 sc.SetState(SCE_A68K_IDENTIFIER);
315 }
316 else {
317 if (sc.ch < 0x80) {
318 OpType = GetOperatorType(sc.ch, sc.chNext); // Check if current char is an operator
319 if (OpType != NO_OPERATOR) {
320 sc.SetState(SCE_A68K_OPERATOR);
321 if (OpType == OPERATOR_2CHAR) { // Check if the operator is 2 bytes long
322 sc.ForwardSetState(SCE_A68K_OPERATOR); // (>> or <<)
323 }
324 }
325 }
326 }
327 } // End of for()
328 sc.Complete();
329 }
330
331
332 // Names of the keyword lists
333
334 static const char * const a68kWordListDesc[] =
335 {
336 "CPU instructions",
337 "Registers",
338 "Directives",
339 "Extended instructions",
340 "Comment special words",
341 "Doxygen keywords",
342 0
343 };
344
345 LexerModule lmA68k(SCLEX_A68K, ColouriseA68kDoc, "a68k", 0, a68kWordListDesc);
346