1 // Scintilla source code edit control
2 /** @file LexAsm.cxx
3  ** Lexer for Assembler, just for the MASM syntax
4  ** Written by The Black Horus
5  ** Enhancements and NASM stuff by Kein-Hong Man, 2003-10
6  ** SCE_ASM_COMMENTBLOCK and SCE_ASM_CHARACTER are for future GNU as colouring
7  ** Converted to lexer object and added further folding features/properties by "Udo Lechner" <dlchnr(at)gmx(dot)net>
8  **/
9 // Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>
10 // The License.txt file describes the conditions under which this software may be distributed.
11 
12 #include <stdlib.h>
13 #include <string.h>
14 #include <stdio.h>
15 #include <stdarg.h>
16 #include <assert.h>
17 #include <ctype.h>
18 
19 #include <string>
20 #include <map>
21 #include <set>
22 
23 #include "ILexer.h"
24 #include "Scintilla.h"
25 #include "SciLexer.h"
26 
27 #include "WordList.h"
28 #include "LexAccessor.h"
29 #include "StyleContext.h"
30 #include "CharacterSet.h"
31 #include "LexerModule.h"
32 #include "OptionSet.h"
33 
34 #ifdef SCI_NAMESPACE
35 using namespace Scintilla;
36 #endif
37 
IsAWordChar(const int ch)38 static inline bool IsAWordChar(const int ch) {
39 	return (ch < 0x80) && (isalnum(ch) || ch == '.' ||
40 		ch == '_' || ch == '?');
41 }
42 
IsAWordStart(const int ch)43 static inline bool IsAWordStart(const int ch) {
44 	return (ch < 0x80) && (isalnum(ch) || ch == '_' || ch == '.' ||
45 		ch == '%' || ch == '@' || ch == '$' || ch == '?');
46 }
47 
IsAsmOperator(const int ch)48 static inline bool IsAsmOperator(const int ch) {
49 	if ((ch < 0x80) && (isalnum(ch)))
50 		return false;
51 	// '.' left out as it is used to make up numbers
52 	if (ch == '*' || ch == '/' || ch == '-' || ch == '+' ||
53 		ch == '(' || ch == ')' || ch == '=' || ch == '^' ||
54 		ch == '[' || ch == ']' || ch == '<' || ch == '&' ||
55 		ch == '>' || ch == ',' || ch == '|' || ch == '~' ||
56 		ch == '%' || ch == ':')
57 		return true;
58 	return false;
59 }
60 
IsStreamCommentStyle(int style)61 static bool IsStreamCommentStyle(int style) {
62 	return style == SCE_ASM_COMMENTDIRECTIVE || style == SCE_ASM_COMMENTBLOCK;
63 }
64 
LowerCase(int c)65 static inline int LowerCase(int c) {
66 	if (c >= 'A' && c <= 'Z')
67 		return 'a' + c - 'A';
68 	return c;
69 }
70 
71 // An individual named option for use in an OptionSet
72 
73 // Options used for LexerAsm
74 struct OptionsAsm {
75 	std::string delimiter;
76 	bool fold;
77 	bool foldSyntaxBased;
78 	bool foldCommentMultiline;
79 	bool foldCommentExplicit;
80 	std::string foldExplicitStart;
81 	std::string foldExplicitEnd;
82 	bool foldExplicitAnywhere;
83 	bool foldCompact;
OptionsAsmOptionsAsm84 	OptionsAsm() {
85 		delimiter = "";
86 		fold = false;
87 		foldSyntaxBased = true;
88 		foldCommentMultiline = false;
89 		foldCommentExplicit = false;
90 		foldExplicitStart = "";
91 		foldExplicitEnd   = "";
92 		foldExplicitAnywhere = false;
93 		foldCompact = true;
94 	}
95 };
96 
97 static const char * const asmWordListDesc[] = {
98 	"CPU instructions",
99 	"FPU instructions",
100 	"Registers",
101 	"Directives",
102 	"Directive operands",
103 	"Extended instructions",
104 	"Directives4Foldstart",
105 	"Directives4Foldend",
106 	0
107 };
108 
109 struct OptionSetAsm : public OptionSet<OptionsAsm> {
OptionSetAsmOptionSetAsm110 	OptionSetAsm() {
111 		DefineProperty("lexer.asm.comment.delimiter", &OptionsAsm::delimiter,
112 			"Character used for COMMENT directive's delimiter, replacing the standard \"~\".");
113 
114 		DefineProperty("fold", &OptionsAsm::fold);
115 
116 		DefineProperty("fold.asm.syntax.based", &OptionsAsm::foldSyntaxBased,
117 			"Set this property to 0 to disable syntax based folding.");
118 
119 		DefineProperty("fold.asm.comment.multiline", &OptionsAsm::foldCommentMultiline,
120 			"Set this property to 1 to enable folding multi-line comments.");
121 
122 		DefineProperty("fold.asm.comment.explicit", &OptionsAsm::foldCommentExplicit,
123 			"This option enables folding explicit fold points when using the Asm lexer. "
124 			"Explicit fold points allows adding extra folding by placing a ;{ comment at the start and a ;} "
125 			"at the end of a section that should fold.");
126 
127 		DefineProperty("fold.asm.explicit.start", &OptionsAsm::foldExplicitStart,
128 			"The string to use for explicit fold start points, replacing the standard ;{.");
129 
130 		DefineProperty("fold.asm.explicit.end", &OptionsAsm::foldExplicitEnd,
131 			"The string to use for explicit fold end points, replacing the standard ;}.");
132 
133 		DefineProperty("fold.asm.explicit.anywhere", &OptionsAsm::foldExplicitAnywhere,
134 			"Set this property to 1 to enable explicit fold points anywhere, not just in line comments.");
135 
136 		DefineProperty("fold.compact", &OptionsAsm::foldCompact);
137 
138 		DefineWordListSets(asmWordListDesc);
139 	}
140 };
141 
142 class LexerAsm : public ILexer {
143 	WordList cpuInstruction;
144 	WordList mathInstruction;
145 	WordList registers;
146 	WordList directive;
147 	WordList directiveOperand;
148 	WordList extInstruction;
149 	WordList directives4foldstart;
150 	WordList directives4foldend;
151 	OptionsAsm options;
152 	OptionSetAsm osAsm;
153 public:
LexerAsm()154 	LexerAsm() {
155 	}
~LexerAsm()156 	virtual ~LexerAsm() {
157 	}
Release()158 	void SCI_METHOD Release() {
159 		delete this;
160 	}
Version() const161 	int SCI_METHOD Version() const {
162 		return lvOriginal;
163 	}
PropertyNames()164 	const char * SCI_METHOD PropertyNames() {
165 		return osAsm.PropertyNames();
166 	}
PropertyType(const char * name)167 	int SCI_METHOD PropertyType(const char *name) {
168 		return osAsm.PropertyType(name);
169 	}
DescribeProperty(const char * name)170 	const char * SCI_METHOD DescribeProperty(const char *name) {
171 		return osAsm.DescribeProperty(name);
172 	}
173 	int SCI_METHOD PropertySet(const char *key, const char *val);
DescribeWordListSets()174 	const char * SCI_METHOD DescribeWordListSets() {
175 		return osAsm.DescribeWordListSets();
176 	}
177 	int SCI_METHOD WordListSet(int n, const char *wl);
178 	void SCI_METHOD Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
179 	void SCI_METHOD Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess);
180 
PrivateCall(int,void *)181 	void * SCI_METHOD PrivateCall(int, void *) {
182 		return 0;
183 	}
184 
LexerFactoryAsm()185 	static ILexer *LexerFactoryAsm() {
186 		return new LexerAsm();
187 	}
188 };
189 
PropertySet(const char * key,const char * val)190 int SCI_METHOD LexerAsm::PropertySet(const char *key, const char *val) {
191 	if (osAsm.PropertySet(&options, key, val)) {
192 		return 0;
193 	}
194 	return -1;
195 }
196 
WordListSet(int n,const char * wl)197 int SCI_METHOD LexerAsm::WordListSet(int n, const char *wl) {
198 	WordList *wordListN = 0;
199 	switch (n) {
200 	case 0:
201 		wordListN = &cpuInstruction;
202 		break;
203 	case 1:
204 		wordListN = &mathInstruction;
205 		break;
206 	case 2:
207 		wordListN = &registers;
208 		break;
209 	case 3:
210 		wordListN = &directive;
211 		break;
212 	case 4:
213 		wordListN = &directiveOperand;
214 		break;
215 	case 5:
216 		wordListN = &extInstruction;
217 		break;
218 	case 6:
219 		wordListN = &directives4foldstart;
220 		break;
221 	case 7:
222 		wordListN = &directives4foldend;
223 		break;
224 	}
225 	int firstModification = -1;
226 	if (wordListN) {
227 		WordList wlNew;
228 		wlNew.Set(wl);
229 		if (*wordListN != wlNew) {
230 			wordListN->Set(wl);
231 			firstModification = 0;
232 		}
233 	}
234 	return firstModification;
235 }
236 
Lex(unsigned int startPos,int length,int initStyle,IDocument * pAccess)237 void SCI_METHOD LexerAsm::Lex(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
238 	LexAccessor styler(pAccess);
239 
240 	// Do not leak onto next line
241 	if (initStyle == SCE_ASM_STRINGEOL)
242 		initStyle = SCE_ASM_DEFAULT;
243 
244 	StyleContext sc(startPos, length, initStyle, styler);
245 
246 	for (; sc.More(); sc.Forward())
247 	{
248 
249 		// Prevent SCE_ASM_STRINGEOL from leaking back to previous line
250 		if (sc.atLineStart && (sc.state == SCE_ASM_STRING)) {
251 			sc.SetState(SCE_ASM_STRING);
252 		} else if (sc.atLineStart && (sc.state == SCE_ASM_CHARACTER)) {
253 			sc.SetState(SCE_ASM_CHARACTER);
254 		}
255 
256 		// Handle line continuation generically.
257 		if (sc.ch == '\\') {
258 			if (sc.chNext == '\n' || sc.chNext == '\r') {
259 				sc.Forward();
260 				if (sc.ch == '\r' && sc.chNext == '\n') {
261 					sc.Forward();
262 				}
263 				continue;
264 			}
265 		}
266 
267 		// Determine if the current state should terminate.
268 		if (sc.state == SCE_ASM_OPERATOR) {
269 			if (!IsAsmOperator(sc.ch)) {
270 			    sc.SetState(SCE_ASM_DEFAULT);
271 			}
272 		} else if (sc.state == SCE_ASM_NUMBER) {
273 			if (!IsAWordChar(sc.ch)) {
274 				sc.SetState(SCE_ASM_DEFAULT);
275 			}
276 		} else if (sc.state == SCE_ASM_IDENTIFIER) {
277 			if (!IsAWordChar(sc.ch) ) {
278 				char s[100];
279 				sc.GetCurrentLowered(s, sizeof(s));
280 				bool IsDirective = false;
281 
282 				if (cpuInstruction.InList(s)) {
283 					sc.ChangeState(SCE_ASM_CPUINSTRUCTION);
284 				} else if (mathInstruction.InList(s)) {
285 					sc.ChangeState(SCE_ASM_MATHINSTRUCTION);
286 				} else if (registers.InList(s)) {
287 					sc.ChangeState(SCE_ASM_REGISTER);
288 				}  else if (directive.InList(s)) {
289 					sc.ChangeState(SCE_ASM_DIRECTIVE);
290 					IsDirective = true;
291 				} else if (directiveOperand.InList(s)) {
292 					sc.ChangeState(SCE_ASM_DIRECTIVEOPERAND);
293 				} else if (extInstruction.InList(s)) {
294 					sc.ChangeState(SCE_ASM_EXTINSTRUCTION);
295 				}
296 				sc.SetState(SCE_ASM_DEFAULT);
297 				if (IsDirective && !strcmp(s, "comment")) {
298 					char delimiter = options.delimiter.empty() ? '~' : options.delimiter.c_str()[0];
299 					while (IsASpaceOrTab(sc.ch) && !sc.atLineEnd) {
300 						sc.ForwardSetState(SCE_ASM_DEFAULT);
301 					}
302 					if (sc.ch == delimiter) {
303 						sc.SetState(SCE_ASM_COMMENTDIRECTIVE);
304 					}
305 				}
306 			}
307 		} else if (sc.state == SCE_ASM_COMMENTDIRECTIVE) {
308 			char delimiter = options.delimiter.empty() ? '~' : options.delimiter.c_str()[0];
309 			if (sc.ch == delimiter) {
310 				while (!sc.atLineEnd) {
311 					sc.Forward();
312 				}
313 				sc.SetState(SCE_ASM_DEFAULT);
314 			}
315 		} else if (sc.state == SCE_ASM_COMMENT ) {
316 			if (sc.atLineEnd) {
317 				sc.SetState(SCE_ASM_DEFAULT);
318 			}
319 		} else if (sc.state == SCE_ASM_STRING) {
320 			if (sc.ch == '\\') {
321 				if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
322 					sc.Forward();
323 				}
324 			} else if (sc.ch == '\"') {
325 				sc.ForwardSetState(SCE_ASM_DEFAULT);
326 			} else if (sc.atLineEnd) {
327 				sc.ChangeState(SCE_ASM_STRINGEOL);
328 				sc.ForwardSetState(SCE_ASM_DEFAULT);
329 			}
330 		} else if (sc.state == SCE_ASM_CHARACTER) {
331 			if (sc.ch == '\\') {
332 				if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
333 					sc.Forward();
334 				}
335 			} else if (sc.ch == '\'') {
336 				sc.ForwardSetState(SCE_ASM_DEFAULT);
337 			} else if (sc.atLineEnd) {
338 				sc.ChangeState(SCE_ASM_STRINGEOL);
339 				sc.ForwardSetState(SCE_ASM_DEFAULT);
340 			}
341 		}
342 
343 		// Determine if a new state should be entered.
344 		if (sc.state == SCE_ASM_DEFAULT) {
345 			if (sc.ch == ';'){
346 				sc.SetState(SCE_ASM_COMMENT);
347 			} else if (isascii(sc.ch) && (isdigit(sc.ch) || (sc.ch == '.' && isascii(sc.chNext) && isdigit(sc.chNext)))) {
348 				sc.SetState(SCE_ASM_NUMBER);
349 			} else if (IsAWordStart(sc.ch)) {
350 				sc.SetState(SCE_ASM_IDENTIFIER);
351 			} else if (sc.ch == '\"') {
352 				sc.SetState(SCE_ASM_STRING);
353 			} else if (sc.ch == '\'') {
354 				sc.SetState(SCE_ASM_CHARACTER);
355 			} else if (IsAsmOperator(sc.ch)) {
356 				sc.SetState(SCE_ASM_OPERATOR);
357 			}
358 		}
359 
360 	}
361 	sc.Complete();
362 }
363 
364 // Store both the current line's fold level and the next lines in the
365 // level store to make it easy to pick up with each increment
366 // and to make it possible to fiddle the current level for "else".
367 
Fold(unsigned int startPos,int length,int initStyle,IDocument * pAccess)368 void SCI_METHOD LexerAsm::Fold(unsigned int startPos, int length, int initStyle, IDocument *pAccess) {
369 
370 	if (!options.fold)
371 		return;
372 
373 	LexAccessor styler(pAccess);
374 
375 	unsigned int endPos = startPos + length;
376 	int visibleChars = 0;
377 	int lineCurrent = styler.GetLine(startPos);
378 	int levelCurrent = SC_FOLDLEVELBASE;
379 	if (lineCurrent > 0)
380 		levelCurrent = styler.LevelAt(lineCurrent-1) >> 16;
381 	int levelNext = levelCurrent;
382 	char chNext = styler[startPos];
383 	int styleNext = styler.StyleAt(startPos);
384 	int style = initStyle;
385 	char word[100];
386 	int wordlen = 0;
387 	const bool userDefinedFoldMarkers = !options.foldExplicitStart.empty() && !options.foldExplicitEnd.empty();
388 	for (unsigned int i = startPos; i < endPos; i++) {
389 		char ch = chNext;
390 		chNext = styler.SafeGetCharAt(i + 1);
391 		int stylePrev = style;
392 		style = styleNext;
393 		styleNext = styler.StyleAt(i + 1);
394 		bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
395 		if (options.foldCommentMultiline && IsStreamCommentStyle(style)) {
396 			if (!IsStreamCommentStyle(stylePrev)) {
397 				levelNext++;
398 			} else if (!IsStreamCommentStyle(styleNext) && !atEOL) {
399 				// Comments don't end at end of line and the next character may be unstyled.
400 				levelNext--;
401 			}
402 		}
403 		if (options.foldCommentExplicit && ((style == SCE_ASM_COMMENT) || options.foldExplicitAnywhere)) {
404 			if (userDefinedFoldMarkers) {
405 				if (styler.Match(i, options.foldExplicitStart.c_str())) {
406  					levelNext++;
407 				} else if (styler.Match(i, options.foldExplicitEnd.c_str())) {
408  					levelNext--;
409  				}
410 			} else {
411 				if (ch == ';') {
412 					if (chNext == '{') {
413 						levelNext++;
414 					} else if (chNext == '}') {
415 						levelNext--;
416 					}
417 				}
418  			}
419  		}
420 		if (options.foldSyntaxBased && (style == SCE_ASM_DIRECTIVE)) {
421 			word[wordlen++] = static_cast<char>(LowerCase(ch));
422 			if (wordlen == 100) {                   // prevent overflow
423 				word[0] = '\0';
424 				wordlen = 1;
425 			}
426 			if (styleNext != SCE_ASM_DIRECTIVE) {   // reading directive ready
427 				word[wordlen] = '\0';
428 				wordlen = 0;
429 				if (directives4foldstart.InList(word)) {
430 					levelNext++;
431 				} else if (directives4foldend.InList(word)){
432 					levelNext--;
433 				}
434 			}
435 		}
436 		if (!IsASpace(ch))
437 			visibleChars++;
438 		if (atEOL || (i == endPos-1)) {
439 			int levelUse = levelCurrent;
440 			int lev = levelUse | levelNext << 16;
441 			if (visibleChars == 0 && options.foldCompact)
442 				lev |= SC_FOLDLEVELWHITEFLAG;
443 			if (levelUse < levelNext)
444 				lev |= SC_FOLDLEVELHEADERFLAG;
445 			if (lev != styler.LevelAt(lineCurrent)) {
446 				styler.SetLevel(lineCurrent, lev);
447 			}
448 			lineCurrent++;
449 			levelCurrent = levelNext;
450 			if (atEOL && (i == static_cast<unsigned int>(styler.Length()-1))) {
451 				// There is an empty line at end of file so give it same level and empty
452 				styler.SetLevel(lineCurrent, (levelCurrent | levelCurrent << 16) | SC_FOLDLEVELWHITEFLAG);
453 			}
454 			visibleChars = 0;
455 		}
456 	}
457 }
458 
459 LexerModule lmAsm(SCLEX_ASM, LexerAsm::LexerFactoryAsm, "asm", asmWordListDesc);
460 
461