1 // Scintilla source code edit control
2 /** @file LexMMIXAL.cxx
3  ** Lexer for MMIX Assembler Language.
4  ** Written by Christoph H�sler <christoph.hoesler@student.uni-tuebingen.de>
5  ** For information about MMIX visit http://www-cs-faculty.stanford.edu/~knuth/mmix.html
6  **/
7 // Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>
8 // The License.txt file describes the conditions under which this software may be distributed.
9 
10 #include <stdlib.h>
11 #include <string.h>
12 #include <stdio.h>
13 #include <stdarg.h>
14 #include <assert.h>
15 #include <ctype.h>
16 
17 #include "ILexer.h"
18 #include "Scintilla.h"
19 #include "SciLexer.h"
20 
21 #include "WordList.h"
22 #include "LexAccessor.h"
23 #include "Accessor.h"
24 #include "StyleContext.h"
25 #include "CharacterSet.h"
26 #include "LexerModule.h"
27 
28 #ifdef SCI_NAMESPACE
29 using namespace Scintilla;
30 #endif
31 
32 
IsAWordChar(const int ch)33 static inline bool IsAWordChar(const int ch) {
34 	return (ch < 0x80) && (isalnum(ch) || ch == ':' || ch == '_');
35 }
36 
isMMIXALOperator(char ch)37 inline bool isMMIXALOperator(char ch) {
38 	if (IsASCII(ch) && isalnum(ch))
39 		return false;
40 	if (ch == '+' || ch == '-' || ch == '|' || ch == '^' ||
41 		ch == '*' || ch == '/' ||
42 		ch == '%' || ch == '<' || ch == '>' || ch == '&' ||
43 		ch == '~' || ch == '$' ||
44 		ch == ',' || ch == '(' || ch == ')' ||
45 		ch == '[' || ch == ']')
46 		return true;
47 	return false;
48 }
49 
ColouriseMMIXALDoc(Sci_PositionU startPos,Sci_Position length,int initStyle,WordList * keywordlists[],Accessor & styler)50 static void ColouriseMMIXALDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[],
51                             Accessor &styler) {
52 
53 	WordList &opcodes = *keywordlists[0];
54 	WordList &special_register = *keywordlists[1];
55 	WordList &predef_symbols = *keywordlists[2];
56 
57 	StyleContext sc(startPos, length, initStyle, styler);
58 
59 	for (; sc.More(); sc.Forward())
60 	{
61 		// No EOL continuation
62 		if (sc.atLineStart) {
63 			if (sc.ch ==  '@' && sc.chNext == 'i') {
64 				sc.SetState(SCE_MMIXAL_INCLUDE);
65 			} else {
66 				sc.SetState(SCE_MMIXAL_LEADWS);
67 			}
68 		}
69 
70 		// Check if first non whitespace character in line is alphanumeric
71 		if (sc.state == SCE_MMIXAL_LEADWS && !isspace(sc.ch)) {	// LEADWS
72 			if(!IsAWordChar(sc.ch)) {
73 				sc.SetState(SCE_MMIXAL_COMMENT);
74 			} else {
75 				if(sc.atLineStart) {
76 					sc.SetState(SCE_MMIXAL_LABEL);
77 				} else {
78 					sc.SetState(SCE_MMIXAL_OPCODE_PRE);
79 				}
80 			}
81 		}
82 
83 		// Determine if the current state should terminate.
84 		if (sc.state == SCE_MMIXAL_OPERATOR) {			// OPERATOR
85 			sc.SetState(SCE_MMIXAL_OPERANDS);
86 		} else if (sc.state == SCE_MMIXAL_NUMBER) {		// NUMBER
87 			if (!isdigit(sc.ch)) {
88 				if (IsAWordChar(sc.ch)) {
89 					char s[100];
90 					sc.GetCurrent(s, sizeof(s));
91 					sc.ChangeState(SCE_MMIXAL_REF);
92 					sc.SetState(SCE_MMIXAL_REF);
93 				} else {
94 					sc.SetState(SCE_MMIXAL_OPERANDS);
95 				}
96 			}
97 		} else if (sc.state == SCE_MMIXAL_LABEL) {			// LABEL
98 			if (!IsAWordChar(sc.ch) ) {
99 				sc.SetState(SCE_MMIXAL_OPCODE_PRE);
100 			}
101 		} else if (sc.state == SCE_MMIXAL_REF) {			// REF
102 			if (!IsAWordChar(sc.ch) ) {
103 				char s[100];
104 				sc.GetCurrent(s, sizeof(s));
105 				if (*s == ':') {	// ignore base prefix for match
106 					for (size_t i = 0; i != sizeof(s)-1; ++i) {
107 						*(s+i) = *(s+i+1);
108 					}
109 				}
110 				if (special_register.InList(s)) {
111 					sc.ChangeState(SCE_MMIXAL_REGISTER);
112 				} else if (predef_symbols.InList(s)) {
113 					sc.ChangeState(SCE_MMIXAL_SYMBOL);
114 				}
115 				sc.SetState(SCE_MMIXAL_OPERANDS);
116 			}
117 		} else if (sc.state == SCE_MMIXAL_OPCODE_PRE) {	// OPCODE_PRE
118 				if (!isspace(sc.ch)) {
119 					sc.SetState(SCE_MMIXAL_OPCODE);
120 				}
121 		} else if (sc.state == SCE_MMIXAL_OPCODE) {		// OPCODE
122 			if (!IsAWordChar(sc.ch) ) {
123 				char s[100];
124 				sc.GetCurrent(s, sizeof(s));
125 				if (opcodes.InList(s)) {
126 					sc.ChangeState(SCE_MMIXAL_OPCODE_VALID);
127 				} else {
128 					sc.ChangeState(SCE_MMIXAL_OPCODE_UNKNOWN);
129 				}
130 				sc.SetState(SCE_MMIXAL_OPCODE_POST);
131 			}
132 		} else if (sc.state == SCE_MMIXAL_STRING) {		// STRING
133 			if (sc.ch == '\"') {
134 				sc.ForwardSetState(SCE_MMIXAL_OPERANDS);
135 			} else if (sc.atLineEnd) {
136 				sc.ForwardSetState(SCE_MMIXAL_OPERANDS);
137 			}
138 		} else if (sc.state == SCE_MMIXAL_CHAR) {			// CHAR
139 			if (sc.ch == '\'') {
140 				sc.ForwardSetState(SCE_MMIXAL_OPERANDS);
141 			} else if (sc.atLineEnd) {
142 				sc.ForwardSetState(SCE_MMIXAL_OPERANDS);
143 			}
144 		} else if (sc.state == SCE_MMIXAL_REGISTER) {		// REGISTER
145 			if (!isdigit(sc.ch)) {
146 				sc.SetState(SCE_MMIXAL_OPERANDS);
147 			}
148 		} else if (sc.state == SCE_MMIXAL_HEX) {			// HEX
149 			if (!isxdigit(sc.ch)) {
150 				sc.SetState(SCE_MMIXAL_OPERANDS);
151 			}
152 		}
153 
154 		// Determine if a new state should be entered.
155 		if (sc.state == SCE_MMIXAL_OPCODE_POST ||		// OPCODE_POST
156 			sc.state == SCE_MMIXAL_OPERANDS) {			// OPERANDS
157 			if (sc.state == SCE_MMIXAL_OPERANDS && isspace(sc.ch)) {
158 				if (!sc.atLineEnd) {
159 					sc.SetState(SCE_MMIXAL_COMMENT);
160 				}
161 			} else if (isdigit(sc.ch)) {
162 				sc.SetState(SCE_MMIXAL_NUMBER);
163 			} else if (IsAWordChar(sc.ch) || sc.Match('@')) {
164 				sc.SetState(SCE_MMIXAL_REF);
165 			} else if (sc.Match('\"')) {
166 				sc.SetState(SCE_MMIXAL_STRING);
167 			} else if (sc.Match('\'')) {
168 				sc.SetState(SCE_MMIXAL_CHAR);
169 			} else if (sc.Match('$')) {
170 				sc.SetState(SCE_MMIXAL_REGISTER);
171 			} else if (sc.Match('#')) {
172 				sc.SetState(SCE_MMIXAL_HEX);
173 			} else if (isMMIXALOperator(static_cast<char>(sc.ch))) {
174 				sc.SetState(SCE_MMIXAL_OPERATOR);
175 			}
176 		}
177 	}
178 	sc.Complete();
179 }
180 
181 static const char * const MMIXALWordListDesc[] = {
182 	"Operation Codes",
183 	"Special Register",
184 	"Predefined Symbols",
185 	0
186 };
187 
188 LexerModule lmMMIXAL(SCLEX_MMIXAL, ColouriseMMIXALDoc, "mmixal", 0, MMIXALWordListDesc);
189 
190