1 // Scintilla source code edit control
2 // Encoding: UTF-8
3 /** @file LexMMIXAL.cxx
4  ** Lexer for MMIX Assembler Language.
5  ** Written by Christoph Hösler <christoph.hoesler@student.uni-tuebingen.de>
6  ** For information about MMIX visit http://www-cs-faculty.stanford.edu/~knuth/mmix.html
7  **/
8 // Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>
9 // The License.txt file describes the conditions under which this software may be distributed.
10 
11 #include <stdlib.h>
12 #include <string.h>
13 #include <stdio.h>
14 #include <stdarg.h>
15 #include <assert.h>
16 #include <ctype.h>
17 
18 #include "ILexer.h"
19 #include "Scintilla.h"
20 #include "SciLexer.h"
21 
22 #include "WordList.h"
23 #include "LexAccessor.h"
24 #include "Accessor.h"
25 #include "StyleContext.h"
26 #include "CharacterSet.h"
27 #include "LexerModule.h"
28 
29 using namespace Scintilla;
30 
31 
IsAWordChar(const int ch)32 static inline bool IsAWordChar(const int ch) {
33 	return (ch < 0x80) && (isalnum(ch) || ch == ':' || ch == '_');
34 }
35 
isMMIXALOperator(char ch)36 static inline bool isMMIXALOperator(char ch) {
37 	if (IsASCII(ch) && isalnum(ch))
38 		return false;
39 	if (ch == '+' || ch == '-' || ch == '|' || ch == '^' ||
40 		ch == '*' || ch == '/' ||
41 		ch == '%' || ch == '<' || ch == '>' || ch == '&' ||
42 		ch == '~' || ch == '$' ||
43 		ch == ',' || ch == '(' || ch == ')' ||
44 		ch == '[' || ch == ']')
45 		return true;
46 	return false;
47 }
48 
ColouriseMMIXALDoc(Sci_PositionU startPos,Sci_Position length,int initStyle,WordList * keywordlists[],Accessor & styler)49 static void ColouriseMMIXALDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[],
50                             Accessor &styler) {
51 
52 	WordList &opcodes = *keywordlists[0];
53 	WordList &special_register = *keywordlists[1];
54 	WordList &predef_symbols = *keywordlists[2];
55 
56 	StyleContext sc(startPos, length, initStyle, styler);
57 
58 	for (; sc.More(); sc.Forward())
59 	{
60 		// No EOL continuation
61 		if (sc.atLineStart) {
62 			if (sc.ch ==  '@' && sc.chNext == 'i') {
63 				sc.SetState(SCE_MMIXAL_INCLUDE);
64 			} else {
65 				sc.SetState(SCE_MMIXAL_LEADWS);
66 			}
67 		}
68 
69 		// Check if first non whitespace character in line is alphanumeric
70 		if (sc.state == SCE_MMIXAL_LEADWS && !isspace(sc.ch)) {	// LEADWS
71 			if(!IsAWordChar(sc.ch)) {
72 				sc.SetState(SCE_MMIXAL_COMMENT);
73 			} else {
74 				if(sc.atLineStart) {
75 					sc.SetState(SCE_MMIXAL_LABEL);
76 				} else {
77 					sc.SetState(SCE_MMIXAL_OPCODE_PRE);
78 				}
79 			}
80 		}
81 
82 		// Determine if the current state should terminate.
83 		if (sc.state == SCE_MMIXAL_OPERATOR) {			// OPERATOR
84 			sc.SetState(SCE_MMIXAL_OPERANDS);
85 		} else if (sc.state == SCE_MMIXAL_NUMBER) {		// NUMBER
86 			if (!isdigit(sc.ch)) {
87 				if (IsAWordChar(sc.ch)) {
88 					sc.ChangeState(SCE_MMIXAL_REF);
89 					sc.SetState(SCE_MMIXAL_REF);
90 				} else {
91 					sc.SetState(SCE_MMIXAL_OPERANDS);
92 				}
93 			}
94 		} else if (sc.state == SCE_MMIXAL_LABEL) {			// LABEL
95 			if (!IsAWordChar(sc.ch) ) {
96 				sc.SetState(SCE_MMIXAL_OPCODE_PRE);
97 			}
98 		} else if (sc.state == SCE_MMIXAL_REF) {			// REF
99 			if (!IsAWordChar(sc.ch) ) {
100 				char s0[100];
101 				sc.GetCurrent(s0, sizeof(s0));
102 				const char *s = s0;
103 				if (*s == ':') {	// ignore base prefix for match
104 					++s;
105 				}
106 				if (special_register.InList(s)) {
107 					sc.ChangeState(SCE_MMIXAL_REGISTER);
108 				} else if (predef_symbols.InList(s)) {
109 					sc.ChangeState(SCE_MMIXAL_SYMBOL);
110 				}
111 				sc.SetState(SCE_MMIXAL_OPERANDS);
112 			}
113 		} else if (sc.state == SCE_MMIXAL_OPCODE_PRE) {	// OPCODE_PRE
114 				if (!isspace(sc.ch)) {
115 					sc.SetState(SCE_MMIXAL_OPCODE);
116 				}
117 		} else if (sc.state == SCE_MMIXAL_OPCODE) {		// OPCODE
118 			if (!IsAWordChar(sc.ch) ) {
119 				char s[100];
120 				sc.GetCurrent(s, sizeof(s));
121 				if (opcodes.InList(s)) {
122 					sc.ChangeState(SCE_MMIXAL_OPCODE_VALID);
123 				} else {
124 					sc.ChangeState(SCE_MMIXAL_OPCODE_UNKNOWN);
125 				}
126 				sc.SetState(SCE_MMIXAL_OPCODE_POST);
127 			}
128 		} else if (sc.state == SCE_MMIXAL_STRING) {		// STRING
129 			if (sc.ch == '\"') {
130 				sc.ForwardSetState(SCE_MMIXAL_OPERANDS);
131 			} else if (sc.atLineEnd) {
132 				sc.ForwardSetState(SCE_MMIXAL_OPERANDS);
133 			}
134 		} else if (sc.state == SCE_MMIXAL_CHAR) {			// CHAR
135 			if (sc.ch == '\'') {
136 				sc.ForwardSetState(SCE_MMIXAL_OPERANDS);
137 			} else if (sc.atLineEnd) {
138 				sc.ForwardSetState(SCE_MMIXAL_OPERANDS);
139 			}
140 		} else if (sc.state == SCE_MMIXAL_REGISTER) {		// REGISTER
141 			if (!isdigit(sc.ch)) {
142 				sc.SetState(SCE_MMIXAL_OPERANDS);
143 			}
144 		} else if (sc.state == SCE_MMIXAL_HEX) {			// HEX
145 			if (!isxdigit(sc.ch)) {
146 				sc.SetState(SCE_MMIXAL_OPERANDS);
147 			}
148 		}
149 
150 		// Determine if a new state should be entered.
151 		if (sc.state == SCE_MMIXAL_OPCODE_POST ||		// OPCODE_POST
152 			sc.state == SCE_MMIXAL_OPERANDS) {			// OPERANDS
153 			if (sc.state == SCE_MMIXAL_OPERANDS && isspace(sc.ch)) {
154 				sc.SetState(SCE_MMIXAL_COMMENT);
155 			} else if (isdigit(sc.ch)) {
156 				sc.SetState(SCE_MMIXAL_NUMBER);
157 			} else if (IsAWordChar(sc.ch) || sc.Match('@')) {
158 				sc.SetState(SCE_MMIXAL_REF);
159 			} else if (sc.Match('\"')) {
160 				sc.SetState(SCE_MMIXAL_STRING);
161 			} else if (sc.Match('\'')) {
162 				sc.SetState(SCE_MMIXAL_CHAR);
163 			} else if (sc.Match('$')) {
164 				sc.SetState(SCE_MMIXAL_REGISTER);
165 			} else if (sc.Match('#')) {
166 				sc.SetState(SCE_MMIXAL_HEX);
167 			} else if (isMMIXALOperator(static_cast<char>(sc.ch))) {
168 				sc.SetState(SCE_MMIXAL_OPERATOR);
169 			}
170 		}
171 	}
172 	sc.Complete();
173 }
174 
175 static const char * const MMIXALWordListDesc[] = {
176 	"Operation Codes",
177 	"Special Register",
178 	"Predefined Symbols",
179 	0
180 };
181 
182 LexerModule lmMMIXAL(SCLEX_MMIXAL, ColouriseMMIXALDoc, "mmixal", 0, MMIXALWordListDesc);
183 
184