1 // Scintilla source code edit control
2 /** @file LexYAML.cxx
3  ** Lexer for YAML.
4  **/
5 // Copyright 2003- by Sean O'Dell <sean@celsoft.com>
6 // The License.txt file describes the conditions under which this software may be distributed.
7 
8 #include <stdlib.h>
9 #include <string.h>
10 #include <stdio.h>
11 #include <stdarg.h>
12 #include <assert.h>
13 #include <ctype.h>
14 
15 #include "ILexer.h"
16 #include "Scintilla.h"
17 #include "SciLexer.h"
18 
19 #include "WordList.h"
20 #include "LexAccessor.h"
21 #include "Accessor.h"
22 #include "StyleContext.h"
23 #include "CharacterSet.h"
24 #include "LexerModule.h"
25 
26 using namespace Scintilla;
27 
28 static const char * const yamlWordListDesc[] = {
29 	"Keywords",
30 	0
31 };
32 
AtEOL(Accessor & styler,Sci_PositionU i)33 static inline bool AtEOL(Accessor &styler, Sci_PositionU i) {
34 	return (styler[i] == '\n') ||
35 		((styler[i] == '\r') && (styler.SafeGetCharAt(i + 1) != '\n'));
36 }
37 
SpaceCount(char * lineBuffer)38 static unsigned int SpaceCount(char* lineBuffer) {
39 	if (lineBuffer == NULL)
40 		return 0;
41 
42 	char* headBuffer = lineBuffer;
43 
44 	while (*headBuffer == ' ')
45 		headBuffer++;
46 
47 	return static_cast<unsigned int>(headBuffer - lineBuffer);
48 }
49 
KeywordAtChar(char * lineBuffer,char * startComment,const WordList & keywords)50 static bool KeywordAtChar(char* lineBuffer, char* startComment, const WordList &keywords) {
51 	if (lineBuffer == NULL || startComment <= lineBuffer)
52 		return false;
53 	char* endValue = startComment - 1;
54 	while (endValue >= lineBuffer && *endValue == ' ')
55 		endValue--;
56 	Sci_PositionU len = static_cast<Sci_PositionU>(endValue - lineBuffer) + 1;
57 	char s[100];
58 	if (len > (sizeof(s) / sizeof(s[0]) - 1))
59 		return false;
60 	strncpy(s, lineBuffer, len);
61 	s[len] = '\0';
62 	return (keywords.InList(s));
63 }
64 
65 #define YAML_STATE_BITSIZE		16
66 #define YAML_STATE_MASK			(0xFFFF0000)
67 #define YAML_STATE_DOCUMENT		(1 << YAML_STATE_BITSIZE)
68 #define YAML_STATE_VALUE		(2 << YAML_STATE_BITSIZE)
69 #define YAML_STATE_COMMENT		(3 << YAML_STATE_BITSIZE)
70 #define YAML_STATE_TEXT_PARENT	(4 << YAML_STATE_BITSIZE)
71 #define YAML_STATE_TEXT			(5 << YAML_STATE_BITSIZE)
72 
ColouriseYAMLLine(char * lineBuffer,Sci_PositionU currentLine,Sci_PositionU lengthLine,Sci_PositionU startLine,Sci_PositionU endPos,WordList & keywords,Accessor & styler)73 static void ColouriseYAMLLine(
74 	char *lineBuffer,
75 	Sci_PositionU currentLine,
76 	Sci_PositionU lengthLine,
77 	Sci_PositionU startLine,
78 	Sci_PositionU endPos,
79 	WordList &keywords,
80 	Accessor &styler) {
81 
82 	Sci_PositionU i = 0;
83 	bool bInQuotes = false;
84 	unsigned int indentAmount = SpaceCount(lineBuffer);
85 
86 	if (currentLine > 0) {
87 		int parentLineState = styler.GetLineState(currentLine - 1);
88 
89 		if ((parentLineState&YAML_STATE_MASK) == YAML_STATE_TEXT || (parentLineState&YAML_STATE_MASK) == YAML_STATE_TEXT_PARENT) {
90 			unsigned int parentIndentAmount = parentLineState&(~YAML_STATE_MASK);
91 			if (indentAmount > parentIndentAmount) {
92 				styler.SetLineState(currentLine, YAML_STATE_TEXT | parentIndentAmount);
93 				styler.ColourTo(endPos, SCE_YAML_TEXT);
94 				return;
95 			}
96 		}
97 	}
98 	styler.SetLineState(currentLine, 0);
99 	if (strncmp(lineBuffer, "---", 3) == 0 || strncmp(lineBuffer, "...", 3) == 0) {	// Document marker
100 		styler.SetLineState(currentLine, YAML_STATE_DOCUMENT);
101 		styler.ColourTo(endPos, SCE_YAML_DOCUMENT);
102 		return;
103 	}
104 	// Skip initial spaces
105 	while ((i < lengthLine) && lineBuffer[i] == ' ') { // YAML always uses space, never TABS or anything else
106 		i++;
107 	}
108 	if (lineBuffer[i] == '\t') { // if we skipped all spaces, and we are NOT inside a text block, this is wrong
109 		styler.ColourTo(endPos, SCE_YAML_ERROR);
110 		return;
111 	}
112 	if (lineBuffer[i] == '#') {	// Comment
113 		styler.SetLineState(currentLine, YAML_STATE_COMMENT);
114 		styler.ColourTo(endPos, SCE_YAML_COMMENT);
115 		return;
116 	}
117 	while (i < lengthLine) {
118 		if (lineBuffer[i] == '\'' || lineBuffer[i] == '\"') {
119 			bInQuotes = !bInQuotes;
120 		} else if (lineBuffer[i] == '#' && isspacechar(lineBuffer[i - 1]) && !bInQuotes) {
121 			styler.ColourTo(startLine + i - 1, SCE_YAML_DEFAULT);
122 			styler.ColourTo(endPos, SCE_YAML_COMMENT);
123 			return;
124 		} else if (lineBuffer[i] == ':' && !bInQuotes) {
125 			styler.ColourTo(startLine + i - 1, SCE_YAML_IDENTIFIER);
126 			styler.ColourTo(startLine + i, SCE_YAML_OPERATOR);
127 			// Non-folding scalar
128 			i++;
129 			while ((i < lengthLine) && isspacechar(lineBuffer[i]))
130 				i++;
131 			Sci_PositionU endValue = lengthLine - 1;
132 			while ((endValue >= i) && isspacechar(lineBuffer[endValue]))
133 				endValue--;
134 			lineBuffer[endValue + 1] = '\0';
135 			if (lineBuffer[i] == '|' || lineBuffer[i] == '>') {
136 				i++;
137 				if (lineBuffer[i] == '+' || lineBuffer[i] == '-')
138 					i++;
139 				while ((i < lengthLine) && isspacechar(lineBuffer[i]))
140 					i++;
141 				if (lineBuffer[i] == '\0') {
142 					styler.SetLineState(currentLine, YAML_STATE_TEXT_PARENT | indentAmount);
143 					styler.ColourTo(endPos, SCE_YAML_DEFAULT);
144 					return;
145 				} else if (lineBuffer[i] == '#') {
146 					styler.SetLineState(currentLine, YAML_STATE_TEXT_PARENT | indentAmount);
147 					styler.ColourTo(startLine + i - 1, SCE_YAML_DEFAULT);
148 					styler.ColourTo(endPos, SCE_YAML_COMMENT);
149 					return;
150 				} else {
151 					styler.ColourTo(endPos, SCE_YAML_ERROR);
152 					return;
153 				}
154 			} else if (lineBuffer[i] == '#') {
155 				styler.ColourTo(startLine + i - 1, SCE_YAML_DEFAULT);
156 				styler.ColourTo(endPos, SCE_YAML_COMMENT);
157 				return;
158 			}
159 			Sci_PositionU startComment = i;
160 			bInQuotes = false;
161 			while (startComment < lengthLine) { // Comment must be space padded
162 				if (lineBuffer[startComment] == '\'' || lineBuffer[startComment] == '\"')
163 					bInQuotes = !bInQuotes;
164 				if (lineBuffer[startComment] == '#' && isspacechar(lineBuffer[startComment - 1]) && !bInQuotes)
165 					break;
166 				startComment++;
167 			}
168 			styler.SetLineState(currentLine, YAML_STATE_VALUE);
169 			if (lineBuffer[i] == '&' || lineBuffer[i] == '*') {
170 				styler.ColourTo(startLine + startComment - 1, SCE_YAML_REFERENCE);
171 				if (startComment < lengthLine)
172 					styler.ColourTo(endPos, SCE_YAML_COMMENT);
173 				return;
174 			}
175 			if (KeywordAtChar(&lineBuffer[i], &lineBuffer[startComment], keywords)) { // Convertible value (true/false, etc.)
176 				styler.ColourTo(startLine + startComment - 1, SCE_YAML_KEYWORD);
177 				if (startComment < lengthLine)
178 					styler.ColourTo(endPos, SCE_YAML_COMMENT);
179 				return;
180 			}
181 			Sci_PositionU i2 = i;
182 			while ((i < startComment) && lineBuffer[i]) {
183 				if (!(IsASCII(lineBuffer[i]) && isdigit(lineBuffer[i])) && lineBuffer[i] != '-'
184 				        && lineBuffer[i] != '.' && lineBuffer[i] != ',' && lineBuffer[i] != ' ') {
185 					styler.ColourTo(startLine + startComment - 1, SCE_YAML_DEFAULT);
186 					if (startComment < lengthLine)
187 						styler.ColourTo(endPos, SCE_YAML_COMMENT);
188 					return;
189 				}
190 				i++;
191 			}
192 			if (i > i2) {
193 				styler.ColourTo(startLine + startComment - 1, SCE_YAML_NUMBER);
194 				if (startComment < lengthLine)
195 					styler.ColourTo(endPos, SCE_YAML_COMMENT);
196 				return;
197 			}
198 			break; // shouldn't get here, but just in case, the rest of the line is coloured the default
199 		}
200 		i++;
201 	}
202 	styler.ColourTo(endPos, SCE_YAML_DEFAULT);
203 }
204 
ColouriseYAMLDoc(Sci_PositionU startPos,Sci_Position length,int,WordList * keywordLists[],Accessor & styler)205 static void ColouriseYAMLDoc(Sci_PositionU startPos, Sci_Position length, int, WordList *keywordLists[], Accessor &styler) {
206 	char lineBuffer[1024] = "";
207 	styler.StartAt(startPos);
208 	styler.StartSegment(startPos);
209 	Sci_PositionU linePos = 0;
210 	Sci_PositionU startLine = startPos;
211 	Sci_PositionU endPos = startPos + length;
212 	Sci_PositionU maxPos = styler.Length();
213 	Sci_PositionU lineCurrent = styler.GetLine(startPos);
214 
215 	for (Sci_PositionU i = startPos; i < maxPos && i < endPos; i++) {
216 		lineBuffer[linePos++] = styler[i];
217 		if (AtEOL(styler, i) || (linePos >= sizeof(lineBuffer) - 1)) {
218 			// End of line (or of line buffer) met, colourise it
219 			lineBuffer[linePos] = '\0';
220 			ColouriseYAMLLine(lineBuffer, lineCurrent, linePos, startLine, i, *keywordLists[0], styler);
221 			linePos = 0;
222 			startLine = i + 1;
223 			lineCurrent++;
224 		}
225 	}
226 	if (linePos > 0) {	// Last line does not have ending characters
227 		ColouriseYAMLLine(lineBuffer, lineCurrent, linePos, startLine, startPos + length - 1, *keywordLists[0], styler);
228 	}
229 }
230 
IsCommentLine(Sci_Position line,Accessor & styler)231 static bool IsCommentLine(Sci_Position line, Accessor &styler) {
232 	Sci_Position pos = styler.LineStart(line);
233 	if (styler[pos] == '#')
234 		return true;
235 	return false;
236 }
237 
FoldYAMLDoc(Sci_PositionU startPos,Sci_Position length,int,WordList * [],Accessor & styler)238 static void FoldYAMLDoc(Sci_PositionU startPos, Sci_Position length, int /*initStyle - unused*/,
239                       WordList *[], Accessor &styler) {
240 	const Sci_Position maxPos = startPos + length;
241 	const Sci_Position maxLines = styler.GetLine(maxPos - 1);             // Requested last line
242 	const Sci_Position docLines = styler.GetLine(styler.Length() - 1);  // Available last line
243 	const bool foldComment = styler.GetPropertyInt("fold.comment.yaml") != 0;
244 
245 	// Backtrack to previous non-blank line so we can determine indent level
246 	// for any white space lines
247 	// and so we can fix any preceding fold level (which is why we go back
248 	// at least one line in all cases)
249 	int spaceFlags = 0;
250 	Sci_Position lineCurrent = styler.GetLine(startPos);
251 	int indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
252 	while (lineCurrent > 0) {
253 		lineCurrent--;
254 		indentCurrent = styler.IndentAmount(lineCurrent, &spaceFlags, NULL);
255 		if (!(indentCurrent & SC_FOLDLEVELWHITEFLAG) &&
256 		        (!IsCommentLine(lineCurrent, styler)))
257 			break;
258 	}
259 	int indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
260 
261 	// Set up initial loop state
262 	int prevComment = 0;
263 	if (lineCurrent >= 1)
264 		prevComment = foldComment && IsCommentLine(lineCurrent - 1, styler);
265 
266 	// Process all characters to end of requested range
267 	// or comment that hangs over the end of the range.  Cap processing in all cases
268 	// to end of document (in case of unclosed comment at end).
269 	while ((lineCurrent <= docLines) && ((lineCurrent <= maxLines) || prevComment)) {
270 
271 		// Gather info
272 		int lev = indentCurrent;
273 		Sci_Position lineNext = lineCurrent + 1;
274 		int indentNext = indentCurrent;
275 		if (lineNext <= docLines) {
276 			// Information about next line is only available if not at end of document
277 			indentNext = styler.IndentAmount(lineNext, &spaceFlags, NULL);
278 		}
279 		const int comment = foldComment && IsCommentLine(lineCurrent, styler);
280 		const int comment_start = (comment && !prevComment && (lineNext <= docLines) &&
281 		                           IsCommentLine(lineNext, styler) && (lev > SC_FOLDLEVELBASE));
282 		const int comment_continue = (comment && prevComment);
283 		if (!comment)
284 			indentCurrentLevel = indentCurrent & SC_FOLDLEVELNUMBERMASK;
285 		if (indentNext & SC_FOLDLEVELWHITEFLAG)
286 			indentNext = SC_FOLDLEVELWHITEFLAG | indentCurrentLevel;
287 
288 		if (comment_start) {
289 			// Place fold point at start of a block of comments
290 			lev |= SC_FOLDLEVELHEADERFLAG;
291 		} else if (comment_continue) {
292 			// Add level to rest of lines in the block
293 			lev = lev + 1;
294 		}
295 
296 		// Skip past any blank lines for next indent level info; we skip also
297 		// comments (all comments, not just those starting in column 0)
298 		// which effectively folds them into surrounding code rather
299 		// than screwing up folding.
300 
301 		while ((lineNext < docLines) &&
302 		        ((indentNext & SC_FOLDLEVELWHITEFLAG) ||
303 		         (lineNext <= docLines && IsCommentLine(lineNext, styler)))) {
304 
305 			lineNext++;
306 			indentNext = styler.IndentAmount(lineNext, &spaceFlags, NULL);
307 		}
308 
309 		const int levelAfterComments = indentNext & SC_FOLDLEVELNUMBERMASK;
310 		const int levelBeforeComments = Maximum(indentCurrentLevel,levelAfterComments);
311 
312 		// Now set all the indent levels on the lines we skipped
313 		// Do this from end to start.  Once we encounter one line
314 		// which is indented more than the line after the end of
315 		// the comment-block, use the level of the block before
316 
317 		Sci_Position skipLine = lineNext;
318 		int skipLevel = levelAfterComments;
319 
320 		while (--skipLine > lineCurrent) {
321 			int skipLineIndent = styler.IndentAmount(skipLine, &spaceFlags, NULL);
322 
323 			if ((skipLineIndent & SC_FOLDLEVELNUMBERMASK) > levelAfterComments)
324 				skipLevel = levelBeforeComments;
325 
326 			int whiteFlag = skipLineIndent & SC_FOLDLEVELWHITEFLAG;
327 
328 			styler.SetLevel(skipLine, skipLevel | whiteFlag);
329 		}
330 
331 		// Set fold header on non-comment line
332 		if (!comment && !(indentCurrent & SC_FOLDLEVELWHITEFLAG) ) {
333 			if ((indentCurrent & SC_FOLDLEVELNUMBERMASK) < (indentNext & SC_FOLDLEVELNUMBERMASK))
334 				lev |= SC_FOLDLEVELHEADERFLAG;
335 		}
336 
337 		// Keep track of block comment state of previous line
338 		prevComment = comment_start || comment_continue;
339 
340 		// Set fold level for this line and move to next line
341 		styler.SetLevel(lineCurrent, lev);
342 		indentCurrent = indentNext;
343 		lineCurrent = lineNext;
344 	}
345 
346 	// NOTE: Cannot set level of last line here because indentCurrent doesn't have
347 	// header flag set; the loop above is crafted to take care of this case!
348 	//styler.SetLevel(lineCurrent, indentCurrent);
349 }
350 
351 LexerModule lmYAML(SCLEX_YAML, ColouriseYAMLDoc, "yaml", FoldYAMLDoc, yamlWordListDesc);
352