1 // Scintilla source code edit control
2 // Encoding: UTF-8
3 /** @file LexMMIXAL.cxx
4 ** Lexer for MMIX Assembler Language.
5 ** Written by Christoph Hösler <christoph.hoesler@student.uni-tuebingen.de>
6 ** For information about MMIX visit http://www-cs-faculty.stanford.edu/~knuth/mmix.html
7 **/
8 // Copyright 1998-2003 by Neil Hodgson <neilh@scintilla.org>
9 // The License.txt file describes the conditions under which this software may be distributed.
10
11 #include <stdlib.h>
12 #include <string.h>
13 #include <stdio.h>
14 #include <stdarg.h>
15 #include <assert.h>
16 #include <ctype.h>
17
18 #include "ILexer.h"
19 #include "Scintilla.h"
20 #include "SciLexer.h"
21
22 #include "WordList.h"
23 #include "LexAccessor.h"
24 #include "Accessor.h"
25 #include "StyleContext.h"
26 #include "CharacterSet.h"
27 #include "LexerModule.h"
28
29 using namespace Scintilla;
30
31
IsAWordChar(const int ch)32 static inline bool IsAWordChar(const int ch) {
33 return (ch < 0x80) && (isalnum(ch) || ch == ':' || ch == '_');
34 }
35
isMMIXALOperator(char ch)36 static inline bool isMMIXALOperator(char ch) {
37 if (IsASCII(ch) && isalnum(ch))
38 return false;
39 if (ch == '+' || ch == '-' || ch == '|' || ch == '^' ||
40 ch == '*' || ch == '/' ||
41 ch == '%' || ch == '<' || ch == '>' || ch == '&' ||
42 ch == '~' || ch == '$' ||
43 ch == ',' || ch == '(' || ch == ')' ||
44 ch == '[' || ch == ']')
45 return true;
46 return false;
47 }
48
ColouriseMMIXALDoc(Sci_PositionU startPos,Sci_Position length,int initStyle,WordList * keywordlists[],Accessor & styler)49 static void ColouriseMMIXALDoc(Sci_PositionU startPos, Sci_Position length, int initStyle, WordList *keywordlists[],
50 Accessor &styler) {
51
52 WordList &opcodes = *keywordlists[0];
53 WordList &special_register = *keywordlists[1];
54 WordList &predef_symbols = *keywordlists[2];
55
56 StyleContext sc(startPos, length, initStyle, styler);
57
58 for (; sc.More(); sc.Forward())
59 {
60 // No EOL continuation
61 if (sc.atLineStart) {
62 if (sc.ch == '@' && sc.chNext == 'i') {
63 sc.SetState(SCE_MMIXAL_INCLUDE);
64 } else {
65 sc.SetState(SCE_MMIXAL_LEADWS);
66 }
67 }
68
69 // Check if first non whitespace character in line is alphanumeric
70 if (sc.state == SCE_MMIXAL_LEADWS && !isspace(sc.ch)) { // LEADWS
71 if(!IsAWordChar(sc.ch)) {
72 sc.SetState(SCE_MMIXAL_COMMENT);
73 } else {
74 if(sc.atLineStart) {
75 sc.SetState(SCE_MMIXAL_LABEL);
76 } else {
77 sc.SetState(SCE_MMIXAL_OPCODE_PRE);
78 }
79 }
80 }
81
82 // Determine if the current state should terminate.
83 if (sc.state == SCE_MMIXAL_OPERATOR) { // OPERATOR
84 sc.SetState(SCE_MMIXAL_OPERANDS);
85 } else if (sc.state == SCE_MMIXAL_NUMBER) { // NUMBER
86 if (!isdigit(sc.ch)) {
87 if (IsAWordChar(sc.ch)) {
88 sc.ChangeState(SCE_MMIXAL_REF);
89 sc.SetState(SCE_MMIXAL_REF);
90 } else {
91 sc.SetState(SCE_MMIXAL_OPERANDS);
92 }
93 }
94 } else if (sc.state == SCE_MMIXAL_LABEL) { // LABEL
95 if (!IsAWordChar(sc.ch) ) {
96 sc.SetState(SCE_MMIXAL_OPCODE_PRE);
97 }
98 } else if (sc.state == SCE_MMIXAL_REF) { // REF
99 if (!IsAWordChar(sc.ch) ) {
100 char s0[100];
101 sc.GetCurrent(s0, sizeof(s0));
102 const char *s = s0;
103 if (*s == ':') { // ignore base prefix for match
104 ++s;
105 }
106 if (special_register.InList(s)) {
107 sc.ChangeState(SCE_MMIXAL_REGISTER);
108 } else if (predef_symbols.InList(s)) {
109 sc.ChangeState(SCE_MMIXAL_SYMBOL);
110 }
111 sc.SetState(SCE_MMIXAL_OPERANDS);
112 }
113 } else if (sc.state == SCE_MMIXAL_OPCODE_PRE) { // OPCODE_PRE
114 if (!isspace(sc.ch)) {
115 sc.SetState(SCE_MMIXAL_OPCODE);
116 }
117 } else if (sc.state == SCE_MMIXAL_OPCODE) { // OPCODE
118 if (!IsAWordChar(sc.ch) ) {
119 char s[100];
120 sc.GetCurrent(s, sizeof(s));
121 if (opcodes.InList(s)) {
122 sc.ChangeState(SCE_MMIXAL_OPCODE_VALID);
123 } else {
124 sc.ChangeState(SCE_MMIXAL_OPCODE_UNKNOWN);
125 }
126 sc.SetState(SCE_MMIXAL_OPCODE_POST);
127 }
128 } else if (sc.state == SCE_MMIXAL_STRING) { // STRING
129 if (sc.ch == '\"') {
130 sc.ForwardSetState(SCE_MMIXAL_OPERANDS);
131 } else if (sc.atLineEnd) {
132 sc.ForwardSetState(SCE_MMIXAL_OPERANDS);
133 }
134 } else if (sc.state == SCE_MMIXAL_CHAR) { // CHAR
135 if (sc.ch == '\'') {
136 sc.ForwardSetState(SCE_MMIXAL_OPERANDS);
137 } else if (sc.atLineEnd) {
138 sc.ForwardSetState(SCE_MMIXAL_OPERANDS);
139 }
140 } else if (sc.state == SCE_MMIXAL_REGISTER) { // REGISTER
141 if (!isdigit(sc.ch)) {
142 sc.SetState(SCE_MMIXAL_OPERANDS);
143 }
144 } else if (sc.state == SCE_MMIXAL_HEX) { // HEX
145 if (!isxdigit(sc.ch)) {
146 sc.SetState(SCE_MMIXAL_OPERANDS);
147 }
148 }
149
150 // Determine if a new state should be entered.
151 if (sc.state == SCE_MMIXAL_OPCODE_POST || // OPCODE_POST
152 sc.state == SCE_MMIXAL_OPERANDS) { // OPERANDS
153 if (sc.state == SCE_MMIXAL_OPERANDS && isspace(sc.ch)) {
154 sc.SetState(SCE_MMIXAL_COMMENT);
155 } else if (isdigit(sc.ch)) {
156 sc.SetState(SCE_MMIXAL_NUMBER);
157 } else if (IsAWordChar(sc.ch) || sc.Match('@')) {
158 sc.SetState(SCE_MMIXAL_REF);
159 } else if (sc.Match('\"')) {
160 sc.SetState(SCE_MMIXAL_STRING);
161 } else if (sc.Match('\'')) {
162 sc.SetState(SCE_MMIXAL_CHAR);
163 } else if (sc.Match('$')) {
164 sc.SetState(SCE_MMIXAL_REGISTER);
165 } else if (sc.Match('#')) {
166 sc.SetState(SCE_MMIXAL_HEX);
167 } else if (isMMIXALOperator(static_cast<char>(sc.ch))) {
168 sc.SetState(SCE_MMIXAL_OPERATOR);
169 }
170 }
171 }
172 sc.Complete();
173 }
174
175 static const char * const MMIXALWordListDesc[] = {
176 "Operation Codes",
177 "Special Register",
178 "Predefined Symbols",
179 0
180 };
181
182 LexerModule lmMMIXAL(SCLEX_MMIXAL, ColouriseMMIXALDoc, "mmixal", 0, MMIXALWordListDesc);
183
184