1 // Scintilla source code edit control
2 /** @file LexAsm.cxx
3  ** Lexer for Assembler, just for the MASM syntax
4  ** Written by The Black Horus
5  ** Enhancements and NASM stuff by Kein-Hong Man, 2003-10
6  ** SCE_ASM_COMMENTBLOCK and SCE_ASM_CHARACTER are for future GNU as colouring
7  **/
8 // Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>
9 // The License.txt file describes the conditions under which this software may be distributed.
10 
11 #include <stdlib.h>
12 #include <string.h>
13 #include <ctype.h>
14 #include <stdio.h>
15 #include <stdarg.h>
16 
17 #include "Platform.h"
18 
19 #include "PropSet.h"
20 #include "Accessor.h"
21 #include "StyleContext.h"
22 #include "KeyWords.h"
23 #include "Scintilla.h"
24 #include "SciLexer.h"
25 
26 
IsAWordChar(const int ch)27 static inline bool IsAWordChar(const int ch) {
28 	return (ch < 0x80) && (isalnum(ch) || ch == '.' ||
29 		ch == '_' || ch == '?');
30 }
31 
IsAWordStart(const int ch)32 static inline bool IsAWordStart(const int ch) {
33 	return (ch < 0x80) && (isalnum(ch) || ch == '_' || ch == '.' ||
34 		ch == '%' || ch == '@' || ch == '$' || ch == '?');
35 }
36 
IsAsmOperator(char ch)37 static inline bool IsAsmOperator(char ch) {
38 	if (isalnum(ch))
39 		return false;
40 	// '.' left out as it is used to make up numbers
41 	if (ch == '*' || ch == '/' || ch == '-' || ch == '+' ||
42 		ch == '(' || ch == ')' || ch == '=' || ch == '^' ||
43 		ch == '[' || ch == ']' || ch == '<' || ch == '&' ||
44 		ch == '>' || ch == ',' || ch == '|' || ch == '~' ||
45 		ch == '%' || ch == ':')
46 		return true;
47 	return false;
48 }
49 
ColouriseAsmDoc(unsigned int startPos,int length,int initStyle,WordList * keywordlists[],Accessor & styler)50 static void ColouriseAsmDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
51                             Accessor &styler) {
52 
53 	WordList &cpuInstruction = *keywordlists[0];
54 	WordList &mathInstruction = *keywordlists[1];
55 	WordList &registers = *keywordlists[2];
56 	WordList &directive = *keywordlists[3];
57 	WordList &directiveOperand = *keywordlists[4];
58 	WordList &extInstruction = *keywordlists[5];
59 
60 	// Do not leak onto next line
61 	if (initStyle == SCE_ASM_STRINGEOL)
62 		initStyle = SCE_ASM_DEFAULT;
63 
64 	StyleContext sc(startPos, length, initStyle, styler);
65 
66 	for (; sc.More(); sc.Forward())
67 	{
68 
69 		// Prevent SCE_ASM_STRINGEOL from leaking back to previous line
70 		if (sc.atLineStart && (sc.state == SCE_ASM_STRING)) {
71 			sc.SetState(SCE_ASM_STRING);
72 		} else if (sc.atLineStart && (sc.state == SCE_ASM_CHARACTER)) {
73 			sc.SetState(SCE_ASM_CHARACTER);
74 		}
75 
76 		// Handle line continuation generically.
77 		if (sc.ch == '\\') {
78 			if (sc.chNext == '\n' || sc.chNext == '\r') {
79 				sc.Forward();
80 				if (sc.ch == '\r' && sc.chNext == '\n') {
81 					sc.Forward();
82 				}
83 				continue;
84 			}
85 		}
86 
87 		// Determine if the current state should terminate.
88 		if (sc.state == SCE_ASM_OPERATOR) {
89 			if (!IsAsmOperator(static_cast<char>(sc.ch))) {
90 			    sc.SetState(SCE_ASM_DEFAULT);
91 			}
92 		}else if (sc.state == SCE_ASM_NUMBER) {
93 			if (!IsAWordChar(sc.ch)) {
94 				sc.SetState(SCE_ASM_DEFAULT);
95 			}
96 		} else if (sc.state == SCE_ASM_IDENTIFIER) {
97 			if (!IsAWordChar(sc.ch) ) {
98 				char s[100];
99 				sc.GetCurrentLowered(s, sizeof(s));
100 
101 				if (cpuInstruction.InList(s)) {
102 					sc.ChangeState(SCE_ASM_CPUINSTRUCTION);
103 				} else if (mathInstruction.InList(s)) {
104 					sc.ChangeState(SCE_ASM_MATHINSTRUCTION);
105 				} else if (registers.InList(s)) {
106 					sc.ChangeState(SCE_ASM_REGISTER);
107 				}  else if (directive.InList(s)) {
108 					sc.ChangeState(SCE_ASM_DIRECTIVE);
109 				} else if (directiveOperand.InList(s)) {
110 					sc.ChangeState(SCE_ASM_DIRECTIVEOPERAND);
111 				} else if (extInstruction.InList(s)) {
112 					sc.ChangeState(SCE_ASM_EXTINSTRUCTION);
113 				}
114 				sc.SetState(SCE_ASM_DEFAULT);
115 			}
116 		}
117 		else if (sc.state == SCE_ASM_COMMENT ) {
118 			if (sc.atLineEnd) {
119 				sc.SetState(SCE_ASM_DEFAULT);
120 			}
121 		} else if (sc.state == SCE_ASM_STRING) {
122 			if (sc.ch == '\\') {
123 				if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
124 					sc.Forward();
125 				}
126 			} else if (sc.ch == '\"') {
127 				sc.ForwardSetState(SCE_ASM_DEFAULT);
128 			} else if (sc.atLineEnd) {
129 				sc.ChangeState(SCE_ASM_STRINGEOL);
130 				sc.ForwardSetState(SCE_ASM_DEFAULT);
131 			}
132 		} else if (sc.state == SCE_ASM_CHARACTER) {
133 			if (sc.ch == '\\') {
134 				if (sc.chNext == '\"' || sc.chNext == '\'' || sc.chNext == '\\') {
135 					sc.Forward();
136 				}
137 			} else if (sc.ch == '\'') {
138 				sc.ForwardSetState(SCE_ASM_DEFAULT);
139 			} else if (sc.atLineEnd) {
140 				sc.ChangeState(SCE_ASM_STRINGEOL);
141 				sc.ForwardSetState(SCE_ASM_DEFAULT);
142 			}
143 		}
144 
145 		// Determine if a new state should be entered.
146 		if (sc.state == SCE_ASM_DEFAULT) {
147 			if (sc.ch == ';'){
148 				sc.SetState(SCE_ASM_COMMENT);
149 			} else if (isdigit(sc.ch) || (sc.ch == '.' && isdigit(sc.chNext))) {
150 				sc.SetState(SCE_ASM_NUMBER);
151 			} else if (IsAWordStart(sc.ch)) {
152 				sc.SetState(SCE_ASM_IDENTIFIER);
153 			} else if (sc.ch == '\"') {
154 				sc.SetState(SCE_ASM_STRING);
155 			} else if (sc.ch == '\'') {
156 				sc.SetState(SCE_ASM_CHARACTER);
157 			} else if (IsAsmOperator(static_cast<char>(sc.ch))) {
158 				sc.SetState(SCE_ASM_OPERATOR);
159 			}
160 		}
161 
162 	}
163 	sc.Complete();
164 }
165 
166 static const char * const asmWordListDesc[] = {
167 	"CPU instructions",
168 	"FPU instructions",
169 	"Registers",
170 	"Directives",
171 	"Directive operands",
172 	"Extended instructions",
173 	0
174 };
175 
176 LexerModule lmAsm(SCLEX_ASM, ColouriseAsmDoc, "asm", 0, asmWordListDesc);
177 
178