1 /******************************************************************
2 * LexMarkdown.cxx
3 *
4 * A simple Markdown lexer for scintilla.
5 *
6 * Includes highlighting for some extra features from the
7 * Pandoc implementation; strikeout, using '#.' as a default
8 * ordered list item marker, and delimited code blocks.
9 *
10 * Limitations:
11 *
12 * Standard indented code blocks are not highlighted at all,
13 * as it would conflict with other indentation schemes. Use
14 * delimited code blocks for blanket highlighting of an
15 * entire code block. Embedded HTML is not highlighted either.
16 * Blanket HTML highlighting has issues, because some Markdown
17 * implementations allow Markdown markup inside of the HTML. Also,
18 * there is a following blank line issue that can't be ignored,
19 * explained in the next paragraph. Embedded HTML and code
20 * blocks would be better supported with language specific
21 * highlighting.
22 *
23 * The highlighting aims to accurately reflect correct syntax,
24 * but a few restrictions are relaxed. Delimited code blocks are
25 * highlighted, even if the line following the code block is not blank.
26 * Requiring a blank line after a block, breaks the highlighting
27 * in certain cases, because of the way Scintilla ends up calling
28 * the lexer.
29 *
30 * Written by Jon Strait - jstrait@moonloop.net
31 *
32 * The License.txt file describes the conditions under which this
33 * software may be distributed.
34 *
35 *****************************************************************/
36
37 #include <stdlib.h>
38 #include <string.h>
39 #include <stdio.h>
40 #include <stdarg.h>
41 #include <assert.h>
42
43 #include "ILexer.h"
44 #include "Scintilla.h"
45 #include "SciLexer.h"
46
47 #include "WordList.h"
48 #include "LexAccessor.h"
49 #include "Accessor.h"
50 #include "StyleContext.h"
51 #include "CharacterSet.h"
52 #include "LexerModule.h"
53
54 #ifdef SCI_NAMESPACE
55 using namespace Scintilla;
56 #endif
57
IsNewline(const int ch)58 static inline bool IsNewline(const int ch) {
59 return (ch == '\n' || ch == '\r');
60 }
61
62 // True if can follow ch down to the end with possibly trailing whitespace
FollowToLineEnd(const int ch,const int state,const unsigned int endPos,StyleContext & sc)63 static bool FollowToLineEnd(const int ch, const int state, const unsigned int endPos, StyleContext &sc) {
64 unsigned int i = 0;
65 while (sc.GetRelative(++i) == ch)
66 ;
67 // Skip over whitespace
68 while (IsASpaceOrTab(sc.GetRelative(i)) && sc.currentPos + i < endPos)
69 ++i;
70 if (IsNewline(sc.GetRelative(i)) || sc.currentPos + i == endPos) {
71 sc.Forward(i);
72 sc.ChangeState(state);
73 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
74 return true;
75 }
76 else return false;
77 }
78
79 // Set the state on text section from current to length characters,
80 // then set the rest until the newline to default, except for any characters matching token
SetStateAndZoom(const int state,const int length,const int token,StyleContext & sc)81 static void SetStateAndZoom(const int state, const int length, const int token, StyleContext &sc) {
82 sc.SetState(state);
83 sc.Forward(length);
84 sc.SetState(SCE_MARKDOWN_DEFAULT);
85 sc.Forward();
86 bool started = false;
87 while (sc.More() && !IsNewline(sc.ch)) {
88 if (sc.ch == token && !started) {
89 sc.SetState(state);
90 started = true;
91 }
92 else if (sc.ch != token) {
93 sc.SetState(SCE_MARKDOWN_DEFAULT);
94 started = false;
95 }
96 sc.Forward();
97 }
98 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
99 }
100
101 // Does the previous line have more than spaces and tabs?
HasPrevLineContent(StyleContext & sc)102 static bool HasPrevLineContent(StyleContext &sc) {
103 int i = 0;
104 // Go back to the previous newline
105 while ((--i + (int)sc.currentPos) >= 0 && !IsNewline(sc.GetRelative(i)))
106 ;
107 while ((--i + (int)sc.currentPos) >= 0) {
108 if (IsNewline(sc.GetRelative(i)))
109 break;
110 if (!IsASpaceOrTab(sc.GetRelative(i)))
111 return true;
112 }
113 return false;
114 }
115
AtTermStart(StyleContext & sc)116 static bool AtTermStart(StyleContext &sc) {
117 return sc.currentPos == 0 || isspacechar(sc.chPrev);
118 }
119
IsValidHrule(const unsigned int endPos,StyleContext & sc)120 static bool IsValidHrule(const unsigned int endPos, StyleContext &sc) {
121 int count = 1;
122 unsigned int i = 0;
123 for (;;) {
124 ++i;
125 int c = sc.GetRelative(i);
126 if (c == sc.ch)
127 ++count;
128 // hit a terminating character
129 else if (!IsASpaceOrTab(c) || sc.currentPos + i == endPos) {
130 // Are we a valid HRULE
131 if ((IsNewline(c) || sc.currentPos + i == endPos) &&
132 count >= 3 && !HasPrevLineContent(sc)) {
133 sc.SetState(SCE_MARKDOWN_HRULE);
134 sc.Forward(i);
135 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
136 return true;
137 }
138 else {
139 sc.SetState(SCE_MARKDOWN_DEFAULT);
140 return false;
141 }
142 }
143 }
144 }
145
ColorizeMarkdownDoc(unsigned int startPos,int length,int initStyle,WordList **,Accessor & styler)146 static void ColorizeMarkdownDoc(unsigned int startPos, int length, int initStyle,
147 WordList **, Accessor &styler) {
148 unsigned int endPos = startPos + length;
149 int precharCount = 0;
150 // Don't advance on a new loop iteration and retry at the same position.
151 // Useful in the corner case of having to start at the beginning file position
152 // in the default state.
153 bool freezeCursor = false;
154
155 StyleContext sc(startPos, length, initStyle, styler);
156
157 while (sc.More()) {
158 // Skip past escaped characters
159 if (sc.ch == '\\') {
160 sc.Forward();
161 continue;
162 }
163
164 // A blockquotes resets the line semantics
165 if (sc.state == SCE_MARKDOWN_BLOCKQUOTE)
166 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
167
168 // Conditional state-based actions
169 if (sc.state == SCE_MARKDOWN_CODE2) {
170 if (sc.Match("``") && sc.GetRelative(-2) != ' ') {
171 sc.Forward(2);
172 sc.SetState(SCE_MARKDOWN_DEFAULT);
173 }
174 }
175 else if (sc.state == SCE_MARKDOWN_CODE) {
176 if (sc.ch == '`' && sc.chPrev != ' ')
177 sc.ForwardSetState(SCE_MARKDOWN_DEFAULT);
178 }
179 /* De-activated because it gets in the way of other valid indentation
180 * schemes, for example multiple paragraphs inside a list item.
181 // Code block
182 else if (sc.state == SCE_MARKDOWN_CODEBK) {
183 bool d = true;
184 if (IsNewline(sc.ch)) {
185 if (sc.chNext != '\t') {
186 for (int c = 1; c < 5; ++c) {
187 if (sc.GetRelative(c) != ' ')
188 d = false;
189 }
190 }
191 }
192 else if (sc.atLineStart) {
193 if (sc.ch != '\t' ) {
194 for (int i = 0; i < 4; ++i) {
195 if (sc.GetRelative(i) != ' ')
196 d = false;
197 }
198 }
199 }
200 if (!d)
201 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
202 }
203 */
204 // Strong
205 else if (sc.state == SCE_MARKDOWN_STRONG1) {
206 if (sc.Match("**") && sc.chPrev != ' ') {
207 sc.Forward(2);
208 sc.SetState(SCE_MARKDOWN_DEFAULT);
209 }
210 }
211 else if (sc.state == SCE_MARKDOWN_STRONG2) {
212 if (sc.Match("__") && sc.chPrev != ' ') {
213 sc.Forward(2);
214 sc.SetState(SCE_MARKDOWN_DEFAULT);
215 }
216 }
217 // Emphasis
218 else if (sc.state == SCE_MARKDOWN_EM1) {
219 if (sc.ch == '*' && sc.chPrev != ' ')
220 sc.ForwardSetState(SCE_MARKDOWN_DEFAULT);
221 }
222 else if (sc.state == SCE_MARKDOWN_EM2) {
223 if (sc.ch == '_' && sc.chPrev != ' ')
224 sc.ForwardSetState(SCE_MARKDOWN_DEFAULT);
225 }
226 else if (sc.state == SCE_MARKDOWN_CODEBK) {
227 if (sc.atLineStart && sc.Match("~~~")) {
228 int i = 1;
229 while (!IsNewline(sc.GetRelative(i)) && sc.currentPos + i < endPos)
230 i++;
231 sc.Forward(i);
232 sc.SetState(SCE_MARKDOWN_DEFAULT);
233 }
234 }
235 else if (sc.state == SCE_MARKDOWN_STRIKEOUT) {
236 if (sc.Match("~~") && sc.chPrev != ' ') {
237 sc.Forward(2);
238 sc.SetState(SCE_MARKDOWN_DEFAULT);
239 }
240 }
241 else if (sc.state == SCE_MARKDOWN_LINE_BEGIN) {
242 // Header
243 if (sc.Match("######"))
244 SetStateAndZoom(SCE_MARKDOWN_HEADER6, 6, '#', sc);
245 else if (sc.Match("#####"))
246 SetStateAndZoom(SCE_MARKDOWN_HEADER5, 5, '#', sc);
247 else if (sc.Match("####"))
248 SetStateAndZoom(SCE_MARKDOWN_HEADER4, 4, '#', sc);
249 else if (sc.Match("###"))
250 SetStateAndZoom(SCE_MARKDOWN_HEADER3, 3, '#', sc);
251 else if (sc.Match("##"))
252 SetStateAndZoom(SCE_MARKDOWN_HEADER2, 2, '#', sc);
253 else if (sc.Match("#")) {
254 // Catch the special case of an unordered list
255 if (sc.chNext == '.' && IsASpaceOrTab(sc.GetRelative(2))) {
256 precharCount = 0;
257 sc.SetState(SCE_MARKDOWN_PRECHAR);
258 }
259 else
260 SetStateAndZoom(SCE_MARKDOWN_HEADER1, 1, '#', sc);
261 }
262 // Code block
263 else if (sc.Match("~~~")) {
264 if (!HasPrevLineContent(sc))
265 sc.SetState(SCE_MARKDOWN_CODEBK);
266 else
267 sc.SetState(SCE_MARKDOWN_DEFAULT);
268 }
269 else if (sc.ch == '=') {
270 if (HasPrevLineContent(sc) && FollowToLineEnd('=', SCE_MARKDOWN_HEADER1, endPos, sc))
271 ;
272 else
273 sc.SetState(SCE_MARKDOWN_DEFAULT);
274 }
275 else if (sc.ch == '-') {
276 if (HasPrevLineContent(sc) && FollowToLineEnd('-', SCE_MARKDOWN_HEADER2, endPos, sc))
277 ;
278 else {
279 precharCount = 0;
280 sc.SetState(SCE_MARKDOWN_PRECHAR);
281 }
282 }
283 else if (IsNewline(sc.ch))
284 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
285 else {
286 precharCount = 0;
287 sc.SetState(SCE_MARKDOWN_PRECHAR);
288 }
289 }
290
291 // The header lasts until the newline
292 else if (sc.state == SCE_MARKDOWN_HEADER1 || sc.state == SCE_MARKDOWN_HEADER2 ||
293 sc.state == SCE_MARKDOWN_HEADER3 || sc.state == SCE_MARKDOWN_HEADER4 ||
294 sc.state == SCE_MARKDOWN_HEADER5 || sc.state == SCE_MARKDOWN_HEADER6) {
295 if (IsNewline(sc.ch))
296 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
297 }
298
299 // New state only within the initial whitespace
300 if (sc.state == SCE_MARKDOWN_PRECHAR) {
301 // Blockquote
302 if (sc.ch == '>' && precharCount < 5)
303 sc.SetState(SCE_MARKDOWN_BLOCKQUOTE);
304 /*
305 // Begin of code block
306 else if (!HasPrevLineContent(sc) && (sc.chPrev == '\t' || precharCount >= 4))
307 sc.SetState(SCE_MARKDOWN_CODEBK);
308 */
309 // HRule - Total of three or more hyphens, asterisks, or underscores
310 // on a line by themselves
311 else if ((sc.ch == '-' || sc.ch == '*' || sc.ch == '_') && IsValidHrule(endPos, sc))
312 ;
313 // Unordered list
314 else if ((sc.ch == '-' || sc.ch == '*' || sc.ch == '+') && IsASpaceOrTab(sc.chNext)) {
315 sc.SetState(SCE_MARKDOWN_ULIST_ITEM);
316 sc.ForwardSetState(SCE_MARKDOWN_DEFAULT);
317 }
318 // Ordered list
319 else if (IsADigit(sc.ch)) {
320 int digitCount = 0;
321 while (IsADigit(sc.GetRelative(++digitCount)))
322 ;
323 if (sc.GetRelative(digitCount) == '.' &&
324 IsASpaceOrTab(sc.GetRelative(digitCount + 1))) {
325 sc.SetState(SCE_MARKDOWN_OLIST_ITEM);
326 sc.Forward(digitCount + 1);
327 sc.SetState(SCE_MARKDOWN_DEFAULT);
328 }
329 }
330 // Alternate Ordered list
331 else if (sc.ch == '#' && sc.chNext == '.' && IsASpaceOrTab(sc.GetRelative(2))) {
332 sc.SetState(SCE_MARKDOWN_OLIST_ITEM);
333 sc.Forward(2);
334 sc.SetState(SCE_MARKDOWN_DEFAULT);
335 }
336 else if (sc.ch != ' ' || precharCount > 2)
337 sc.SetState(SCE_MARKDOWN_DEFAULT);
338 else
339 ++precharCount;
340 }
341
342 // New state anywhere in doc
343 if (sc.state == SCE_MARKDOWN_DEFAULT) {
344 if (sc.atLineStart && sc.ch == '#') {
345 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
346 freezeCursor = true;
347 }
348 // Links and Images
349 if (sc.Match("![") || sc.ch == '[') {
350 int i = 0, j = 0, k = 0;
351 int len = endPos - sc.currentPos;
352 while (i < len && (sc.GetRelative(++i) != ']' || sc.GetRelative(i - 1) == '\\'))
353 ;
354 if (sc.GetRelative(i) == ']') {
355 j = i;
356 if (sc.GetRelative(++i) == '(') {
357 while (i < len && (sc.GetRelative(++i) != ')' || sc.GetRelative(i - 1) == '\\'))
358 ;
359 if (sc.GetRelative(i) == ')')
360 k = i;
361 }
362 else if (sc.GetRelative(i) == '[' || sc.GetRelative(++i) == '[') {
363 while (i < len && (sc.GetRelative(++i) != ']' || sc.GetRelative(i - 1) == '\\'))
364 ;
365 if (sc.GetRelative(i) == ']')
366 k = i;
367 }
368 }
369 // At least a link text
370 if (j) {
371 sc.SetState(SCE_MARKDOWN_LINK);
372 sc.Forward(j);
373 // Also has a URL or reference portion
374 if (k)
375 sc.Forward(k - j);
376 sc.ForwardSetState(SCE_MARKDOWN_DEFAULT);
377 }
378 }
379 // Code - also a special case for alternate inside spacing
380 if (sc.Match("``") && sc.GetRelative(3) != ' ' && AtTermStart(sc)) {
381 sc.SetState(SCE_MARKDOWN_CODE2);
382 sc.Forward();
383 }
384 else if (sc.ch == '`' && sc.chNext != ' ' && AtTermStart(sc)) {
385 sc.SetState(SCE_MARKDOWN_CODE);
386 }
387 // Strong
388 else if (sc.Match("**") && sc.GetRelative(2) != ' ' && AtTermStart(sc)) {
389 sc.SetState(SCE_MARKDOWN_STRONG1);
390 sc.Forward();
391 }
392 else if (sc.Match("__") && sc.GetRelative(2) != ' ' && AtTermStart(sc)) {
393 sc.SetState(SCE_MARKDOWN_STRONG2);
394 sc.Forward();
395 }
396 // Emphasis
397 else if (sc.ch == '*' && sc.chNext != ' ' && AtTermStart(sc)) {
398 sc.SetState(SCE_MARKDOWN_EM1);
399 }
400 else if (sc.ch == '_' && sc.chNext != ' ' && AtTermStart(sc)) {
401 sc.SetState(SCE_MARKDOWN_EM2);
402 }
403 // Strikeout
404 else if (sc.Match("~~") && sc.GetRelative(2) != ' ' && AtTermStart(sc)) {
405 sc.SetState(SCE_MARKDOWN_STRIKEOUT);
406 sc.Forward();
407 }
408 // Beginning of line
409 else if (IsNewline(sc.ch)) {
410 sc.SetState(SCE_MARKDOWN_LINE_BEGIN);
411 }
412 }
413 // Advance if not holding back the cursor for this iteration.
414 if (!freezeCursor)
415 sc.Forward();
416 freezeCursor = false;
417 }
418 sc.Complete();
419 }
420
421 LexerModule lmMarkdown(SCLEX_MARKDOWN, ColorizeMarkdownDoc, "markdown");
422