1 // Scintilla source code edit control
2 /** @file LexCOBOL.cxx
3  ** Lexer for COBOL
4  ** Based on LexPascal.cxx
5  ** Written by Laurent le Tynevez
6  ** Updated by Simon Steele <s.steele@pnotepad.org> September 2002
7  ** Updated by Mathias Rauen <scite@madshi.net> May 2003 (Delphi adjustments)
8  ** Updated by Rod Falck, Aug 2006 Converted to COBOL
9  **/
10 
11 #include <stdlib.h>
12 #include <string.h>
13 #include <stdio.h>
14 #include <stdarg.h>
15 #include <assert.h>
16 #include <ctype.h>
17 
18 #include "ILexer.h"
19 #include "Scintilla.h"
20 #include "SciLexer.h"
21 
22 #include "WordList.h"
23 #include "LexAccessor.h"
24 #include "Accessor.h"
25 #include "StyleContext.h"
26 #include "CharacterSet.h"
27 #include "LexerModule.h"
28 
29 #ifdef SCI_NAMESPACE
30 using namespace Scintilla;
31 #endif
32 
33 #define IN_DIVISION 0x01
34 #define IN_DECLARATIVES 0x02
35 #define IN_SECTION 0x04
36 #define IN_PARAGRAPH 0x08
37 #define IN_FLAGS 0xF
38 #define NOT_HEADER 0x10
39 
isCOBOLoperator(char ch)40 inline bool isCOBOLoperator(char ch)
41     {
42     return isoperator(ch);
43     }
44 
isCOBOLwordchar(char ch)45 inline bool isCOBOLwordchar(char ch)
46     {
47     return isascii(ch) && (isalnum(ch) || ch == '-');
48 
49     }
50 
isCOBOLwordstart(char ch)51 inline bool isCOBOLwordstart(char ch)
52     {
53     return isascii(ch) && isalnum(ch);
54     }
55 
CountBits(int nBits)56 static int CountBits(int nBits)
57 	{
58 	int count = 0;
59 	for (int i = 0; i < 32; ++i)
60 		{
61 		count += nBits & 1;
62 		nBits >>= 1;
63 		}
64 	return count;
65 	}
66 
getRange(unsigned int start,unsigned int end,Accessor & styler,char * s,unsigned int len)67 static void getRange(unsigned int start,
68         unsigned int end,
69         Accessor &styler,
70         char *s,
71         unsigned int len) {
72     unsigned int i = 0;
73     while ((i < end - start + 1) && (i < len-1)) {
74         s[i] = static_cast<char>(tolower(styler[start + i]));
75         i++;
76     }
77     s[i] = '\0';
78 }
79 
ColourTo(Accessor & styler,unsigned int end,unsigned int attr)80 static void ColourTo(Accessor &styler, unsigned int end, unsigned int attr) {
81     styler.ColourTo(end, attr);
82 }
83 
84 
classifyWordCOBOL(unsigned int start,unsigned int end,WordList * keywordlists[],Accessor & styler,int nContainment,bool * bAarea)85 static int classifyWordCOBOL(unsigned int start, unsigned int end, /*WordList &keywords*/WordList *keywordlists[], Accessor &styler, int nContainment, bool *bAarea) {
86     int ret = 0;
87 
88     WordList& a_keywords = *keywordlists[0];
89     WordList& b_keywords = *keywordlists[1];
90     WordList& c_keywords = *keywordlists[2];
91 
92     char s[100];
93     getRange(start, end, styler, s, sizeof(s));
94 
95     char chAttr = SCE_C_IDENTIFIER;
96     if (isdigit(s[0]) || (s[0] == '.') || (s[0] == 'v')) {
97         chAttr = SCE_C_NUMBER;
98 		char *p = s + 1;
99 		while (*p) {
100 			if ((!isdigit(*p) && (*p) != 'v') && isCOBOLwordchar(*p)) {
101 				chAttr = SCE_C_IDENTIFIER;
102 			    break;
103 			}
104 			++p;
105 		}
106     }
107     else {
108         if (a_keywords.InList(s)) {
109             chAttr = SCE_C_WORD;
110         }
111         else if (b_keywords.InList(s)) {
112             chAttr = SCE_C_WORD2;
113         }
114         else if (c_keywords.InList(s)) {
115             chAttr = SCE_C_UUID;
116         }
117     }
118     if (*bAarea) {
119         if (strcmp(s, "division") == 0) {
120             ret = IN_DIVISION;
121 			// we've determined the containment, anything else is just ignored for those purposes
122 			*bAarea = false;
123 		} else if (strcmp(s, "declaratives") == 0) {
124             ret = IN_DIVISION | IN_DECLARATIVES;
125 			if (nContainment & IN_DECLARATIVES)
126 				ret |= NOT_HEADER | IN_SECTION;
127 			// we've determined the containment, anything else is just ignored for those purposes
128 			*bAarea = false;
129 		} else if (strcmp(s, "section") == 0) {
130             ret = (nContainment &~ IN_PARAGRAPH) | IN_SECTION;
131 			// we've determined the containment, anything else is just ignored for those purposes
132 			*bAarea = false;
133 		} else if (strcmp(s, "end") == 0 && (nContainment & IN_DECLARATIVES)) {
134             ret = IN_DIVISION | IN_DECLARATIVES | IN_SECTION | NOT_HEADER;
135 		} else {
136 			ret = nContainment | IN_PARAGRAPH;
137         }
138     }
139     ColourTo(styler, end, chAttr);
140     return ret;
141 }
142 
ColouriseCOBOLDoc(unsigned int startPos,int length,int initStyle,WordList * keywordlists[],Accessor & styler)143 static void ColouriseCOBOLDoc(unsigned int startPos, int length, int initStyle, WordList *keywordlists[],
144     Accessor &styler) {
145 
146     styler.StartAt(startPos);
147 
148     int state = initStyle;
149     if (state == SCE_C_CHARACTER)   // Does not leak onto next line
150         state = SCE_C_DEFAULT;
151     char chPrev = ' ';
152     char chNext = styler[startPos];
153     unsigned int lengthDoc = startPos + length;
154 
155     int nContainment;
156 
157     int currentLine = styler.GetLine(startPos);
158     if (currentLine > 0) {
159         styler.SetLineState(currentLine, styler.GetLineState(currentLine-1));
160         nContainment = styler.GetLineState(currentLine);
161 		nContainment &= ~NOT_HEADER;
162     } else {
163         styler.SetLineState(currentLine, 0);
164         nContainment = 0;
165     }
166 
167     styler.StartSegment(startPos);
168     bool bNewLine = true;
169     bool bAarea = !isspacechar(chNext);
170 	int column = 0;
171     for (unsigned int i = startPos; i < lengthDoc; i++) {
172         char ch = chNext;
173 
174         chNext = styler.SafeGetCharAt(i + 1);
175 
176 		++column;
177 
178         if (bNewLine) {
179 			column = 0;
180         }
181 		if (column <= 1 && !bAarea) {
182 			bAarea = !isspacechar(ch);
183 			}
184         bool bSetNewLine = false;
185         if ((ch == '\r' && chNext != '\n') || (ch == '\n')) {
186             // Trigger on CR only (Mac style) or either on LF from CR+LF (Dos/Win) or on LF alone (Unix)
187             // Avoid triggering two times on Dos/Win
188             // End of line
189             if (state == SCE_C_CHARACTER) {
190                 ColourTo(styler, i, state);
191                 state = SCE_C_DEFAULT;
192             }
193             styler.SetLineState(currentLine, nContainment);
194             currentLine++;
195             bSetNewLine = true;
196 			if (nContainment & NOT_HEADER)
197 				nContainment &= ~(NOT_HEADER | IN_DECLARATIVES | IN_SECTION);
198         }
199 
200         if (styler.IsLeadByte(ch)) {
201             chNext = styler.SafeGetCharAt(i + 2);
202             chPrev = ' ';
203             i += 1;
204             continue;
205         }
206 
207         if (state == SCE_C_DEFAULT) {
208             if (isCOBOLwordstart(ch) || (ch == '$' && isascii(chNext) && isalpha(chNext))) {
209                 ColourTo(styler, i-1, state);
210                 state = SCE_C_IDENTIFIER;
211             } else if (column == 6 && ch == '*') {
212             // Cobol comment line: asterisk in column 7.
213                 ColourTo(styler, i-1, state);
214                 state = SCE_C_COMMENTLINE;
215             } else if (ch == '*' && chNext == '>') {
216             // Cobol inline comment: asterisk, followed by greater than.
217                 ColourTo(styler, i-1, state);
218                 state = SCE_C_COMMENTLINE;
219             } else if (column == 0 && ch == '*' && chNext != '*') {
220                 ColourTo(styler, i-1, state);
221                 state = SCE_C_COMMENTLINE;
222             } else if (column == 0 && ch == '/' && chNext != '*') {
223                 ColourTo(styler, i-1, state);
224                 state = SCE_C_COMMENTLINE;
225             } else if (column == 0 && ch == '*' && chNext == '*') {
226                 ColourTo(styler, i-1, state);
227                 state = SCE_C_COMMENTDOC;
228             } else if (column == 0 && ch == '/' && chNext == '*') {
229                 ColourTo(styler, i-1, state);
230                 state = SCE_C_COMMENTDOC;
231             } else if (ch == '"') {
232                 ColourTo(styler, i-1, state);
233                 state = SCE_C_STRING;
234             } else if (ch == '\'') {
235                 ColourTo(styler, i-1, state);
236                 state = SCE_C_CHARACTER;
237             } else if (ch == '?' && column == 0) {
238                 ColourTo(styler, i-1, state);
239                 state = SCE_C_PREPROCESSOR;
240             } else if (isCOBOLoperator(ch)) {
241                 ColourTo(styler, i-1, state);
242                 ColourTo(styler, i, SCE_C_OPERATOR);
243             }
244         } else if (state == SCE_C_IDENTIFIER) {
245             if (!isCOBOLwordchar(ch)) {
246                 int lStateChange = classifyWordCOBOL(styler.GetStartSegment(), i - 1, keywordlists, styler, nContainment, &bAarea);
247 
248                 if(lStateChange != 0) {
249                     styler.SetLineState(currentLine, lStateChange);
250                     nContainment = lStateChange;
251                 }
252 
253                 state = SCE_C_DEFAULT;
254                 chNext = styler.SafeGetCharAt(i + 1);
255                 if (ch == '"') {
256                     state = SCE_C_STRING;
257                 } else if (ch == '\'') {
258                     state = SCE_C_CHARACTER;
259                 } else if (isCOBOLoperator(ch)) {
260                     ColourTo(styler, i, SCE_C_OPERATOR);
261                 }
262             }
263         } else {
264             if (state == SCE_C_PREPROCESSOR) {
265                 if ((ch == '\r' || ch == '\n') && !(chPrev == '\\' || chPrev == '\r')) {
266                     ColourTo(styler, i-1, state);
267                     state = SCE_C_DEFAULT;
268                 }
269             } else if (state == SCE_C_COMMENT) {
270                 if (ch == '\r' || ch == '\n') {
271                     ColourTo(styler, i, state);
272                     state = SCE_C_DEFAULT;
273                 }
274             } else if (state == SCE_C_COMMENTDOC) {
275                 if (ch == '\r' || ch == '\n') {
276                     if (((i > styler.GetStartSegment() + 2) || (
277                         (initStyle == SCE_C_COMMENTDOC) &&
278                         (styler.GetStartSegment() == static_cast<unsigned int>(startPos))))) {
279                             ColourTo(styler, i, state);
280                             state = SCE_C_DEFAULT;
281                     }
282                 }
283             } else if (state == SCE_C_COMMENTLINE) {
284                 if (ch == '\r' || ch == '\n') {
285                     ColourTo(styler, i-1, state);
286                     state = SCE_C_DEFAULT;
287                 }
288             } else if (state == SCE_C_STRING) {
289                 if (ch == '"') {
290                     ColourTo(styler, i, state);
291                     state = SCE_C_DEFAULT;
292                 }
293             } else if (state == SCE_C_CHARACTER) {
294                 if (ch == '\'') {
295                     ColourTo(styler, i, state);
296                     state = SCE_C_DEFAULT;
297                 }
298             }
299         }
300         chPrev = ch;
301         bNewLine = bSetNewLine;
302 		if (bNewLine)
303 			{
304 			bAarea = false;
305 			}
306     }
307     ColourTo(styler, lengthDoc - 1, state);
308 }
309 
FoldCOBOLDoc(unsigned int startPos,int length,int,WordList * [],Accessor & styler)310 static void FoldCOBOLDoc(unsigned int startPos, int length, int, WordList *[],
311                             Accessor &styler) {
312     bool foldCompact = styler.GetPropertyInt("fold.compact", 1) != 0;
313     unsigned int endPos = startPos + length;
314     int visibleChars = 0;
315     int lineCurrent = styler.GetLine(startPos);
316     int levelPrev = lineCurrent > 0 ? styler.LevelAt(lineCurrent - 1) & SC_FOLDLEVELNUMBERMASK : 0xFFF;
317     char chNext = styler[startPos];
318 
319     bool bNewLine = true;
320     bool bAarea = !isspacechar(chNext);
321 	int column = 0;
322 	bool bComment = false;
323     for (unsigned int i = startPos; i < endPos; i++) {
324         char ch = chNext;
325         chNext = styler.SafeGetCharAt(i + 1);
326 		++column;
327 
328         if (bNewLine) {
329 			column = 0;
330 			bComment = (ch == '*' || ch == '/' || ch == '?');
331         }
332 		if (column <= 1 && !bAarea) {
333 			bAarea = !isspacechar(ch);
334 			}
335         bool atEOL = (ch == '\r' && chNext != '\n') || (ch == '\n');
336         if (atEOL) {
337 			int nContainment = styler.GetLineState(lineCurrent);
338             int lev = CountBits(nContainment & IN_FLAGS) | SC_FOLDLEVELBASE;
339 			if (bAarea && !bComment)
340 				--lev;
341             if (visibleChars == 0 && foldCompact)
342                 lev |= SC_FOLDLEVELWHITEFLAG;
343             if ((bAarea) && (visibleChars > 0) && !(nContainment & NOT_HEADER) && !bComment)
344                 lev |= SC_FOLDLEVELHEADERFLAG;
345             if (lev != styler.LevelAt(lineCurrent)) {
346                 styler.SetLevel(lineCurrent, lev);
347             }
348 			if ((lev & SC_FOLDLEVELNUMBERMASK) <= (levelPrev & SC_FOLDLEVELNUMBERMASK)) {
349 				// this level is at the same level or less than the previous line
350 				// therefore these is nothing for the previous header to collapse, so remove the header
351 				styler.SetLevel(lineCurrent - 1, levelPrev & ~SC_FOLDLEVELHEADERFLAG);
352 			}
353             levelPrev = lev;
354             visibleChars = 0;
355 			bAarea = false;
356             bNewLine = true;
357             lineCurrent++;
358         } else {
359             bNewLine = false;
360         }
361 
362 
363         if (!isspacechar(ch))
364             visibleChars++;
365     }
366 
367     // Fill in the real level of the next line, keeping the current flags as they will be filled in later
368     int flagsNext = styler.LevelAt(lineCurrent) & ~SC_FOLDLEVELNUMBERMASK;
369     styler.SetLevel(lineCurrent, levelPrev | flagsNext);
370 }
371 
372 static const char * const COBOLWordListDesc[] = {
373     "A Keywords",
374     "B Keywords",
375     "Extended Keywords",
376     0
377 };
378 
379 LexerModule lmCOBOL(SCLEX_COBOL, ColouriseCOBOLDoc, "COBOL", FoldCOBOLDoc, COBOLWordListDesc);
380