1%{
2/**********************************************************************
3
4  markdown_parser.leg - markdown parser in C using a PEG grammar.
5  (c) 2008 John MacFarlane (jgm at berkeley dot edu).
6
7  This program is free software; you can redistribute it and/or modify
8  it under the terms of the GNU General Public License or the MIT
9  license.  See LICENSE for details.
10
11  This program is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  GNU General Public License for more details.
15
16 ***********************************************************************/
17
18#include <stdbool.h>
19#include <assert.h>
20#include "markdown_peg.h"
21#include "utility_functions.h"
22
23
24
25/**********************************************************************
26
27  Definitions for leg parser generator.
28  YY_INPUT is the function the parser calls to get new input.
29  We take all new input from (static) charbuf.
30
31 ***********************************************************************/
32
33
34
35# define YYSTYPE element *
36#ifdef __DEBUG__
37# define YY_DEBUG 1
38#endif
39
40#define YY_INPUT(buf, result, max_size)              \
41{                                                    \
42    int yyc;                                         \
43    if (charbuf && *charbuf != '\0') {               \
44        yyc= *charbuf++;                             \
45    } else {                                         \
46        yyc= EOF;                                    \
47    }                                                \
48    result= (EOF == yyc) ? 0 : (*(buf)= yyc, 1);     \
49}
50
51#define YY_RULE(T)	T
52
53
54/**********************************************************************
55
56  PEG grammar and parser actions for markdown syntax.
57
58 ***********************************************************************/
59
60%}
61
62Doc =       BOM? a:StartList ( Block { a = cons($$, a); } )*
63            { parse_result = reverse(a); }
64
65Block =     BlankLine*
66            ( BlockQuote
67            | Verbatim
68            | Note
69            | Reference
70            | HorizontalRule
71            | Heading
72            | OrderedList
73            | BulletList
74            | HtmlBlock
75            | StyleBlock
76            | Para
77            | Plain )
78
79Para =      NonindentSpace a:Inlines BlankLine+
80            { $$ = a; $$->key = PARA; }
81
82Plain =     a:Inlines
83            { $$ = a; $$->key = PLAIN; }
84
85AtxInline = !Newline !(Sp? '#'* Sp Newline) Inline
86
87AtxStart =  < ( "######" | "#####" | "####" | "###" | "##" | "#" ) >
88            { $$ = mk_element(H1 + (strlen(yytext) - 1)); }
89
90AtxHeading = s:AtxStart Sp? a:StartList ( AtxInline { a = cons($$, a); } )+ (Sp? '#'* Sp)?  Newline
91            { $$ = mk_list(s->key, a);
92              free(s); }
93
94SetextHeading = SetextHeading1 | SetextHeading2
95
96SetextBottom1 = '='+ Newline
97
98SetextBottom2 = '-'+ Newline
99
100SetextHeading1 =  &(RawLine SetextBottom1)
101                  a:StartList ( !Endline Inline { a = cons($$, a); } )+ Sp? Newline
102                  SetextBottom1 { $$ = mk_list(H1, a); }
103
104SetextHeading2 =  &(RawLine SetextBottom2)
105                  a:StartList ( !Endline Inline { a = cons($$, a); } )+ Sp? Newline
106                  SetextBottom2 { $$ = mk_list(H2, a); }
107
108Heading = SetextHeading | AtxHeading
109
110BlockQuote = a:BlockQuoteRaw
111             {  $$ = mk_element(BLOCKQUOTE);
112                $$->children = a;
113             }
114
115BlockQuoteRaw =  a:StartList
116                 (( '>' ' '? Line { a = cons($$, a); } )
117                  ( !'>' !BlankLine Line { a = cons($$, a); } )*
118                  ( BlankLine { a = cons(mk_str("\n"), a); } )*
119                 )+
120                 {   $$ = mk_str_from_list(a, true);
121                     $$->key = RAW;
122                 }
123
124NonblankIndentedLine = !BlankLine IndentedLine
125
126VerbatimChunk = a:StartList
127                ( BlankLine { a = cons(mk_str("\n"), a); } )*
128                ( NonblankIndentedLine { a = cons($$, a); } )+
129                { $$ = mk_str_from_list(a, false); }
130
131Verbatim =     a:StartList ( VerbatimChunk { a = cons($$, a); } )+
132               { $$ = mk_str_from_list(a, false);
133                 $$->key = VERBATIM; }
134
135HorizontalRule = NonindentSpace
136                 ( '*' Sp '*' Sp '*' (Sp '*')*
137                 | '-' Sp '-' Sp '-' (Sp '-')*
138                 | '_' Sp '_' Sp '_' (Sp '_')*)
139                 Sp Newline BlankLine+
140                 { $$ = mk_element(HRULE); }
141
142Bullet = !HorizontalRule NonindentSpace ('+' | '*' | '-') Spacechar+
143
144BulletList = &Bullet (ListTight | ListLoose)
145             { $$->key = BULLETLIST; }
146
147ListTight = a:StartList
148            ( ListItemTight { a = cons($$, a); } )+
149            BlankLine* !(Bullet | Enumerator)
150            { $$ = mk_list(LIST, a); }
151
152ListLoose = a:StartList
153            ( b:ListItem BlankLine*
154              {   element *li;
155                  li = b->children;
156                  li->contents.str = realloc(li->contents.str, strlen(li->contents.str) + 3);
157                  strcat(li->contents.str, "\n\n");  /* In loose list, \n\n added to end of each element */
158                  a = cons(b, a);
159              } )+
160            { $$ = mk_list(LIST, a); }
161
162ListItem =  ( Bullet | Enumerator )
163            a:StartList
164            ListBlock { a = cons($$, a); }
165            ( ListContinuationBlock { a = cons($$, a); } )*
166            {  element *raw;
167               raw = mk_str_from_list(a, false);
168               raw->key = RAW;
169               $$ = mk_element(LISTITEM);
170               $$->children = raw;
171            }
172
173ListItemTight =
174            ( Bullet | Enumerator )
175            a:StartList
176            ListBlock { a = cons($$, a); }
177            ( !BlankLine
178              ListContinuationBlock { a = cons($$, a); } )*
179            !ListContinuationBlock
180            {  element *raw;
181               raw = mk_str_from_list(a, false);
182               raw->key = RAW;
183               $$ = mk_element(LISTITEM);
184               $$->children = raw;
185            }
186
187ListBlock = a:StartList
188            !BlankLine Line { a = cons($$, a); }
189            ( ListBlockLine { a = cons($$, a); } )*
190            { $$ = mk_str_from_list(a, false); }
191
192ListContinuationBlock = a:StartList
193                        ( < BlankLine* >
194                          {   if (strlen(yytext) == 0)
195                                   a = cons(mk_str("\001"), a); /* block separator */
196                              else
197                                   a = cons(mk_str(yytext), a); } )
198                        ( Indent ListBlock { a = cons($$, a); } )+
199                        {  $$ = mk_str_from_list(a, false); }
200
201Enumerator = NonindentSpace [0-9]+ '.' Spacechar+
202
203OrderedList = &Enumerator (ListTight | ListLoose)
204              { $$->key = ORDEREDLIST; }
205
206ListBlockLine = !BlankLine
207                !( Indent? (Bullet | Enumerator) )
208                !HorizontalRule
209                OptionallyIndentedLine
210
211# Parsers for different kinds of block-level HTML content.
212# This is repetitive due to constraints of PEG grammar.
213
214HtmlBlockOpenAddress = '<' Spnl ("address" | "ADDRESS") Spnl HtmlAttribute* '>'
215HtmlBlockCloseAddress = '<' Spnl '/' ("address" | "ADDRESS") Spnl '>'
216HtmlBlockAddress = HtmlBlockOpenAddress (HtmlBlockAddress | !HtmlBlockCloseAddress .)* HtmlBlockCloseAddress
217
218HtmlBlockOpenBlockquote = '<' Spnl ("blockquote" | "BLOCKQUOTE") Spnl HtmlAttribute* '>'
219HtmlBlockCloseBlockquote = '<' Spnl '/' ("blockquote" | "BLOCKQUOTE") Spnl '>'
220HtmlBlockBlockquote = HtmlBlockOpenBlockquote (HtmlBlockBlockquote | !HtmlBlockCloseBlockquote .)* HtmlBlockCloseBlockquote
221
222HtmlBlockOpenCenter = '<' Spnl ("center" | "CENTER") Spnl HtmlAttribute* '>'
223HtmlBlockCloseCenter = '<' Spnl '/' ("center" | "CENTER") Spnl '>'
224HtmlBlockCenter = HtmlBlockOpenCenter (HtmlBlockCenter | !HtmlBlockCloseCenter .)* HtmlBlockCloseCenter
225
226HtmlBlockOpenDir = '<' Spnl ("dir" | "DIR") Spnl HtmlAttribute* '>'
227HtmlBlockCloseDir = '<' Spnl '/' ("dir" | "DIR") Spnl '>'
228HtmlBlockDir = HtmlBlockOpenDir (HtmlBlockDir | !HtmlBlockCloseDir .)* HtmlBlockCloseDir
229
230HtmlBlockOpenDiv = '<' Spnl ("div" | "DIV") Spnl HtmlAttribute* '>'
231HtmlBlockCloseDiv = '<' Spnl '/' ("div" | "DIV") Spnl '>'
232HtmlBlockDiv = HtmlBlockOpenDiv (HtmlBlockDiv | !HtmlBlockCloseDiv .)* HtmlBlockCloseDiv
233
234HtmlBlockOpenDl = '<' Spnl ("dl" | "DL") Spnl HtmlAttribute* '>'
235HtmlBlockCloseDl = '<' Spnl '/' ("dl" | "DL") Spnl '>'
236HtmlBlockDl = HtmlBlockOpenDl (HtmlBlockDl | !HtmlBlockCloseDl .)* HtmlBlockCloseDl
237
238HtmlBlockOpenFieldset = '<' Spnl ("fieldset" | "FIELDSET") Spnl HtmlAttribute* '>'
239HtmlBlockCloseFieldset = '<' Spnl '/' ("fieldset" | "FIELDSET") Spnl '>'
240HtmlBlockFieldset = HtmlBlockOpenFieldset (HtmlBlockFieldset | !HtmlBlockCloseFieldset .)* HtmlBlockCloseFieldset
241
242HtmlBlockOpenForm = '<' Spnl ("form" | "FORM") Spnl HtmlAttribute* '>'
243HtmlBlockCloseForm = '<' Spnl '/' ("form" | "FORM") Spnl '>'
244HtmlBlockForm = HtmlBlockOpenForm (HtmlBlockForm | !HtmlBlockCloseForm .)* HtmlBlockCloseForm
245
246HtmlBlockOpenH1 = '<' Spnl ("h1" | "H1") Spnl HtmlAttribute* '>'
247HtmlBlockCloseH1 = '<' Spnl '/' ("h1" | "H1") Spnl '>'
248HtmlBlockH1 = HtmlBlockOpenH1 (HtmlBlockH1 | !HtmlBlockCloseH1 .)* HtmlBlockCloseH1
249
250HtmlBlockOpenH2 = '<' Spnl ("h2" | "H2") Spnl HtmlAttribute* '>'
251HtmlBlockCloseH2 = '<' Spnl '/' ("h2" | "H2") Spnl '>'
252HtmlBlockH2 = HtmlBlockOpenH2 (HtmlBlockH2 | !HtmlBlockCloseH2 .)* HtmlBlockCloseH2
253
254HtmlBlockOpenH3 = '<' Spnl ("h3" | "H3") Spnl HtmlAttribute* '>'
255HtmlBlockCloseH3 = '<' Spnl '/' ("h3" | "H3") Spnl '>'
256HtmlBlockH3 = HtmlBlockOpenH3 (HtmlBlockH3 | !HtmlBlockCloseH3 .)* HtmlBlockCloseH3
257
258HtmlBlockOpenH4 = '<' Spnl ("h4" | "H4") Spnl HtmlAttribute* '>'
259HtmlBlockCloseH4 = '<' Spnl '/' ("h4" | "H4") Spnl '>'
260HtmlBlockH4 = HtmlBlockOpenH4 (HtmlBlockH4 | !HtmlBlockCloseH4 .)* HtmlBlockCloseH4
261
262HtmlBlockOpenH5 = '<' Spnl ("h5" | "H5") Spnl HtmlAttribute* '>'
263HtmlBlockCloseH5 = '<' Spnl '/' ("h5" | "H5") Spnl '>'
264HtmlBlockH5 = HtmlBlockOpenH5 (HtmlBlockH5 | !HtmlBlockCloseH5 .)* HtmlBlockCloseH5
265
266HtmlBlockOpenH6 = '<' Spnl ("h6" | "H6") Spnl HtmlAttribute* '>'
267HtmlBlockCloseH6 = '<' Spnl '/' ("h6" | "H6") Spnl '>'
268HtmlBlockH6 = HtmlBlockOpenH6 (HtmlBlockH6 | !HtmlBlockCloseH6 .)* HtmlBlockCloseH6
269
270HtmlBlockOpenMenu = '<' Spnl ("menu" | "MENU") Spnl HtmlAttribute* '>'
271HtmlBlockCloseMenu = '<' Spnl '/' ("menu" | "MENU") Spnl '>'
272HtmlBlockMenu = HtmlBlockOpenMenu (HtmlBlockMenu | !HtmlBlockCloseMenu .)* HtmlBlockCloseMenu
273
274HtmlBlockOpenNoframes = '<' Spnl ("noframes" | "NOFRAMES") Spnl HtmlAttribute* '>'
275HtmlBlockCloseNoframes = '<' Spnl '/' ("noframes" | "NOFRAMES") Spnl '>'
276HtmlBlockNoframes = HtmlBlockOpenNoframes (HtmlBlockNoframes | !HtmlBlockCloseNoframes .)* HtmlBlockCloseNoframes
277
278HtmlBlockOpenNoscript = '<' Spnl ("noscript" | "NOSCRIPT") Spnl HtmlAttribute* '>'
279HtmlBlockCloseNoscript = '<' Spnl '/' ("noscript" | "NOSCRIPT") Spnl '>'
280HtmlBlockNoscript = HtmlBlockOpenNoscript (HtmlBlockNoscript | !HtmlBlockCloseNoscript .)* HtmlBlockCloseNoscript
281
282HtmlBlockOpenOl = '<' Spnl ("ol" | "OL") Spnl HtmlAttribute* '>'
283HtmlBlockCloseOl = '<' Spnl '/' ("ol" | "OL") Spnl '>'
284HtmlBlockOl = HtmlBlockOpenOl (HtmlBlockOl | !HtmlBlockCloseOl .)* HtmlBlockCloseOl
285
286HtmlBlockOpenP = '<' Spnl ("p" | "P") Spnl HtmlAttribute* '>'
287HtmlBlockCloseP = '<' Spnl '/' ("p" | "P") Spnl '>'
288HtmlBlockP = HtmlBlockOpenP (HtmlBlockP | !HtmlBlockCloseP .)* HtmlBlockCloseP
289
290HtmlBlockOpenPre = '<' Spnl ("pre" | "PRE") Spnl HtmlAttribute* '>'
291HtmlBlockClosePre = '<' Spnl '/' ("pre" | "PRE") Spnl '>'
292HtmlBlockPre = HtmlBlockOpenPre (HtmlBlockPre | !HtmlBlockClosePre .)* HtmlBlockClosePre
293
294HtmlBlockOpenTable = '<' Spnl ("table" | "TABLE") Spnl HtmlAttribute* '>'
295HtmlBlockCloseTable = '<' Spnl '/' ("table" | "TABLE") Spnl '>'
296HtmlBlockTable = HtmlBlockOpenTable (HtmlBlockTable | !HtmlBlockCloseTable .)* HtmlBlockCloseTable
297
298HtmlBlockOpenUl = '<' Spnl ("ul" | "UL") Spnl HtmlAttribute* '>'
299HtmlBlockCloseUl = '<' Spnl '/' ("ul" | "UL") Spnl '>'
300HtmlBlockUl = HtmlBlockOpenUl (HtmlBlockUl | !HtmlBlockCloseUl .)* HtmlBlockCloseUl
301
302HtmlBlockOpenDd = '<' Spnl ("dd" | "DD") Spnl HtmlAttribute* '>'
303HtmlBlockCloseDd = '<' Spnl '/' ("dd" | "DD") Spnl '>'
304HtmlBlockDd = HtmlBlockOpenDd (HtmlBlockDd | !HtmlBlockCloseDd .)* HtmlBlockCloseDd
305
306HtmlBlockOpenDt = '<' Spnl ("dt" | "DT") Spnl HtmlAttribute* '>'
307HtmlBlockCloseDt = '<' Spnl '/' ("dt" | "DT") Spnl '>'
308HtmlBlockDt = HtmlBlockOpenDt (HtmlBlockDt | !HtmlBlockCloseDt .)* HtmlBlockCloseDt
309
310HtmlBlockOpenFrameset = '<' Spnl ("frameset" | "FRAMESET") Spnl HtmlAttribute* '>'
311HtmlBlockCloseFrameset = '<' Spnl '/' ("frameset" | "FRAMESET") Spnl '>'
312HtmlBlockFrameset = HtmlBlockOpenFrameset (HtmlBlockFrameset | !HtmlBlockCloseFrameset .)* HtmlBlockCloseFrameset
313
314HtmlBlockOpenLi = '<' Spnl ("li" | "LI") Spnl HtmlAttribute* '>'
315HtmlBlockCloseLi = '<' Spnl '/' ("li" | "LI") Spnl '>'
316HtmlBlockLi = HtmlBlockOpenLi (HtmlBlockLi | !HtmlBlockCloseLi .)* HtmlBlockCloseLi
317
318HtmlBlockOpenTbody = '<' Spnl ("tbody" | "TBODY") Spnl HtmlAttribute* '>'
319HtmlBlockCloseTbody = '<' Spnl '/' ("tbody" | "TBODY") Spnl '>'
320HtmlBlockTbody = HtmlBlockOpenTbody (HtmlBlockTbody | !HtmlBlockCloseTbody .)* HtmlBlockCloseTbody
321
322HtmlBlockOpenTd = '<' Spnl ("td" | "TD") Spnl HtmlAttribute* '>'
323HtmlBlockCloseTd = '<' Spnl '/' ("td" | "TD") Spnl '>'
324HtmlBlockTd = HtmlBlockOpenTd (HtmlBlockTd | !HtmlBlockCloseTd .)* HtmlBlockCloseTd
325
326HtmlBlockOpenTfoot = '<' Spnl ("tfoot" | "TFOOT") Spnl HtmlAttribute* '>'
327HtmlBlockCloseTfoot = '<' Spnl '/' ("tfoot" | "TFOOT") Spnl '>'
328HtmlBlockTfoot = HtmlBlockOpenTfoot (HtmlBlockTfoot | !HtmlBlockCloseTfoot .)* HtmlBlockCloseTfoot
329
330HtmlBlockOpenTh = '<' Spnl ("th" | "TH") Spnl HtmlAttribute* '>'
331HtmlBlockCloseTh = '<' Spnl '/' ("th" | "TH") Spnl '>'
332HtmlBlockTh = HtmlBlockOpenTh (HtmlBlockTh | !HtmlBlockCloseTh .)* HtmlBlockCloseTh
333
334HtmlBlockOpenThead = '<' Spnl ("thead" | "THEAD") Spnl HtmlAttribute* '>'
335HtmlBlockCloseThead = '<' Spnl '/' ("thead" | "THEAD") Spnl '>'
336HtmlBlockThead = HtmlBlockOpenThead (HtmlBlockThead | !HtmlBlockCloseThead .)* HtmlBlockCloseThead
337
338HtmlBlockOpenTr = '<' Spnl ("tr" | "TR") Spnl HtmlAttribute* '>'
339HtmlBlockCloseTr = '<' Spnl '/' ("tr" | "TR") Spnl '>'
340HtmlBlockTr = HtmlBlockOpenTr (HtmlBlockTr | !HtmlBlockCloseTr .)* HtmlBlockCloseTr
341
342HtmlBlockOpenScript = '<' Spnl ("script" | "SCRIPT") Spnl HtmlAttribute* '>'
343HtmlBlockCloseScript = '<' Spnl '/' ("script" | "SCRIPT") Spnl '>'
344HtmlBlockScript = HtmlBlockOpenScript (!HtmlBlockCloseScript .)* HtmlBlockCloseScript
345
346HtmlBlockOpenHead = '<' Spnl ("head" | "HEAD") Spnl HtmlAttribute* '>'
347HtmlBlockCloseHead = '<' Spnl '/' ("head" | "HEAD") Spnl '>'
348HtmlBlockHead = HtmlBlockOpenHead (!HtmlBlockCloseHead .)* HtmlBlockCloseHead
349
350HtmlBlockInTags = HtmlBlockAddress
351                | HtmlBlockBlockquote
352                | HtmlBlockCenter
353                | HtmlBlockDir
354                | HtmlBlockDiv
355                | HtmlBlockDl
356                | HtmlBlockFieldset
357                | HtmlBlockForm
358                | HtmlBlockH1
359                | HtmlBlockH2
360                | HtmlBlockH3
361                | HtmlBlockH4
362                | HtmlBlockH5
363                | HtmlBlockH6
364                | HtmlBlockMenu
365                | HtmlBlockNoframes
366                | HtmlBlockNoscript
367                | HtmlBlockOl
368                | HtmlBlockP
369                | HtmlBlockPre
370                | HtmlBlockTable
371                | HtmlBlockUl
372                | HtmlBlockDd
373                | HtmlBlockDt
374                | HtmlBlockFrameset
375                | HtmlBlockLi
376                | HtmlBlockTbody
377                | HtmlBlockTd
378                | HtmlBlockTfoot
379                | HtmlBlockTh
380                | HtmlBlockThead
381                | HtmlBlockTr
382                | HtmlBlockScript
383                | HtmlBlockHead
384
385HtmlBlock = < ( HtmlBlockInTags | HtmlComment | HtmlBlockSelfClosing ) >
386            BlankLine+
387            {   if (extension(EXT_FILTER_HTML)) {
388                    $$ = mk_list(LIST, NULL);
389                } else {
390                    $$ = mk_str(yytext);
391                    $$->key = HTMLBLOCK;
392                }
393            }
394
395HtmlBlockSelfClosing = '<' Spnl HtmlBlockType Spnl HtmlAttribute* '/' Spnl '>'
396
397HtmlBlockType = "address" | "blockquote" | "center" | "dir" | "div" | "dl" | "fieldset" | "form" | "h1" | "h2" | "h3" |
398                "h4" | "h5" | "h6" | "hr" | "isindex" | "menu" | "noframes" | "noscript" | "ol" | "p" | "pre" | "table" |
399                "ul" | "dd" | "dt" | "frameset" | "li" | "tbody" | "td" | "tfoot" | "th" | "thead" | "tr" | "script" |
400                "ADDRESS" | "BLOCKQUOTE" | "CENTER" | "DIR" | "DIV" | "DL" | "FIELDSET" | "FORM" | "H1" | "H2" | "H3" |
401                "H4" | "H5" | "H6" | "HR" | "ISINDEX" | "MENU" | "NOFRAMES" | "NOSCRIPT" | "OL" | "P" | "PRE" | "TABLE" |
402                "UL" | "DD" | "DT" | "FRAMESET" | "LI" | "TBODY" | "TD" | "TFOOT" | "TH" | "THEAD" | "TR" | "SCRIPT"
403
404StyleOpen =     '<' Spnl ("style" | "STYLE") Spnl HtmlAttribute* '>'
405StyleClose =    '<' Spnl '/' ("style" | "STYLE") Spnl '>'
406InStyleTags =   StyleOpen (!StyleClose .)* StyleClose
407StyleBlock =    < InStyleTags >
408                BlankLine*
409                {   if (extension(EXT_FILTER_STYLES)) {
410                        $$ = mk_list(LIST, NULL);
411                    } else {
412                        $$ = mk_str(yytext);
413                        $$->key = HTMLBLOCK;
414                    }
415                }
416
417Inlines  =  a:StartList ( !Endline Inline { a = cons($$, a); }
418                        | c:Endline &Inline { a = cons(c, a); } )+ Endline?
419            { $$ = mk_list(LIST, a); }
420
421Inline  = Str
422        | Endline
423        | UlOrStarLine
424        | Space
425        | Strong
426        | Emph
427        | Image
428        | Link
429        | NoteReference
430        | InlineNote
431        | Code
432        | RawHtml
433        | Entity
434        | EscapedChar
435        | Smart
436        | Symbol
437
438Space = Spacechar+
439        { $$ = mk_str(" ");
440          $$->key = SPACE; }
441
442Str = a:StartList < NormalChar+ > { a = cons(mk_str(yytext), a); }
443      ( StrChunk { a = cons($$, a); } )*
444      { if (a->next == NULL) { $$ = a; } else { $$ = mk_list(LIST, a); } }
445
446StrChunk = < (NormalChar | '_'+ &Alphanumeric)+ > { $$ = mk_str(yytext); } |
447           AposChunk
448
449AposChunk = &{ extension(EXT_SMART) } '\'' &Alphanumeric
450      { $$ = mk_element(APOSTROPHE); }
451
452EscapedChar =   '\\' !Newline < [-\\`|*_{}[\]()#+.!><] >
453                { $$ = mk_str(yytext); }
454
455Entity =    ( HexEntity | DecEntity | CharEntity )
456            { $$ = mk_str(yytext); $$->key = HTML; }
457
458Endline =   LineBreak | TerminalEndline | NormalEndline
459
460NormalEndline =   Sp Newline !BlankLine !'>' !AtxStart
461                  !(Line ('='+ | '-'+) Newline)
462                  { $$ = mk_str("\n");
463                    $$->key = SPACE; }
464
465TerminalEndline = Sp Newline Eof
466                  { $$ = NULL; }
467
468LineBreak = "  " NormalEndline
469            { $$ = mk_element(LINEBREAK); }
470
471Symbol =    < SpecialChar >
472            { $$ = mk_str(yytext); }
473
474# This keeps the parser from getting bogged down on long strings of '*' or '_',
475# or strings of '*' or '_' with space on each side:
476UlOrStarLine =  (UlLine | StarLine) { $$ = mk_str(yytext); }
477StarLine =      < "****" '*'* > | < Spacechar '*'+ &Spacechar >
478UlLine   =      < "____" '_'* > | < Spacechar '_'+ &Spacechar >
479
480Emph =      EmphStar | EmphUl
481
482Whitespace = Spacechar | Newline
483
484EmphStar =  '*' !Whitespace
485            a:StartList
486            ( !'*' b:Inline { a = cons(b, a); }
487            | b:StrongStar  { a = cons(b, a); }
488            )+
489            '*'
490            { $$ = mk_list(EMPH, a); }
491
492EmphUl =    '_' !Whitespace
493            a:StartList
494            ( !'_' b:Inline { a = cons(b, a); }
495            | b:StrongUl  { a = cons(b, a); }
496            )+
497            '_'
498            { $$ = mk_list(EMPH, a); }
499
500Strong = StrongStar | StrongUl
501
502StrongStar =    "**" !Whitespace
503                a:StartList
504                ( !"**" b:Inline { a = cons(b, a); })+
505                "**"
506                { $$ = mk_list(STRONG, a); }
507
508StrongUl   =    "__" !Whitespace
509                a:StartList
510                ( !"__" b:Inline { a = cons(b, a); })+
511                "__"
512                { $$ = mk_list(STRONG, a); }
513
514Image = '!' ( ExplicitLink | ReferenceLink )
515        { if ($$->key == LINK) {
516              $$->key = IMAGE;
517          } else {
518              element *result;
519              result = $$;
520              $$->children = cons(mk_str("!"), result->children);
521          } }
522
523Link =  ExplicitLink | ReferenceLink | AutoLink
524
525ReferenceLink = ReferenceLinkDouble | ReferenceLinkSingle
526
527ReferenceLinkDouble =  a:Label < Spnl > !"[]" b:Label
528                       {   link match;
529                           if (find_reference(&match, b->children)) {
530                               $$ = mk_link(a->children, match.url, match.title);
531                               free(a);
532                               free_element_list(b);
533                           } else {
534                               element *result;
535                               result = mk_element(LIST);
536                               result->children = cons(mk_str("["), cons(a, cons(mk_str("]"), cons(mk_str(yytext),
537                                                   cons(mk_str("["), cons(b, mk_str("]")))))));
538                               $$ = result;
539                           }
540                       }
541
542ReferenceLinkSingle =  a:Label < (Spnl "[]")? >
543                       {   link match;
544                           if (find_reference(&match, a->children)) {
545                               $$ = mk_link(a->children, match.url, match.title);
546                               free(a);
547                           }
548                           else {
549                               element *result;
550                               result = mk_element(LIST);
551                               result->children = cons(mk_str("["), cons(a, cons(mk_str("]"), mk_str(yytext))));
552                               $$ = result;
553                           }
554                       }
555
556ExplicitLink =  l:Label '(' Sp s:Source Spnl t:Title Sp ')'
557                { $$ = mk_link(l->children, s->contents.str, t->contents.str);
558                  free_element(s);
559                  free_element(t);
560                  free(l); }
561
562Source  = ( '<' < SourceContents > '>' | < SourceContents > )
563          { $$ = mk_str(yytext); }
564
565SourceContents = ( ( !'(' !')' !'>' Nonspacechar )+ | '(' SourceContents ')')*
566
567Title = ( TitleSingle | TitleDouble | < "" > )
568        { $$ = mk_str(yytext); }
569
570TitleSingle = '\'' < ( !( '\'' Sp ( ')' | Newline ) ) . )* > '\''
571
572TitleDouble = '"' < ( !( '"' Sp ( ')' | Newline ) ) . )* > '"'
573
574AutoLink = AutoLinkUrl | AutoLinkEmail
575
576AutoLinkUrl =   '<' < [A-Za-z]+ "://" ( !Newline !'>' . )+ > '>'
577                {   $$ = mk_link(mk_str(yytext), yytext, ""); }
578
579AutoLinkEmail = '<' ( "mailto:" )? < [-A-Za-z0-9+_./!%~$]+ '@' ( !Newline !'>' . )+ > '>'
580                {   char *mailto = malloc(strlen(yytext) + 8);
581                    sprintf(mailto, "mailto:%s", yytext);
582                    $$ = mk_link(mk_str(yytext), mailto, "");
583                    free(mailto);
584                }
585
586Reference = NonindentSpace !"[]" l:Label ':' Spnl s:RefSrc t:RefTitle BlankLine+
587            { $$ = mk_link(l->children, s->contents.str, t->contents.str);
588              free_element(s);
589              free_element(t);
590              free(l);
591              $$->key = REFERENCE; }
592
593Label = '[' ( !'^' &{ extension(EXT_NOTES) } | &. &{ !extension(EXT_NOTES) } )
594        a:StartList
595        ( !']' Inline { a = cons($$, a); } )*
596        ']'
597        { $$ = mk_list(LIST, a); }
598
599RefSrc = < Nonspacechar+ >
600         { $$ = mk_str(yytext);
601           $$->key = HTML; }
602
603RefTitle =  ( RefTitleSingle | RefTitleDouble | RefTitleParens | EmptyTitle )
604            { $$ = mk_str(yytext); }
605
606EmptyTitle = < "" >
607
608RefTitleSingle = Spnl '\'' < ( !( '\'' Sp Newline | Newline ) . )* > '\''
609
610RefTitleDouble = Spnl '"' < ( !('"' Sp Newline | Newline) . )* > '"'
611
612RefTitleParens = Spnl '(' < ( !(')' Sp Newline | Newline) . )* > ')'
613
614References = a:StartList
615             ( b:Reference { a = cons(b, a); } | SkipBlock )*
616             { references = reverse(a); }
617
618Ticks1 = "`" !'`'
619Ticks2 = "``" !'`'
620Ticks3 = "```" !'`'
621Ticks4 = "````" !'`'
622Ticks5 = "`````" !'`'
623
624Code = ( Ticks1 Sp < ( ( !'`' Nonspacechar )+ | !Ticks1 '`'+ | !( Sp Ticks1 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks1
625       | Ticks2 Sp < ( ( !'`' Nonspacechar )+ | !Ticks2 '`'+ | !( Sp Ticks2 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks2
626       | Ticks3 Sp < ( ( !'`' Nonspacechar )+ | !Ticks3 '`'+ | !( Sp Ticks3 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks3
627       | Ticks4 Sp < ( ( !'`' Nonspacechar )+ | !Ticks4 '`'+ | !( Sp Ticks4 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks4
628       | Ticks5 Sp < ( ( !'`' Nonspacechar )+ | !Ticks5 '`'+ | !( Sp Ticks5 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks5
629       )
630       { $$ = mk_str(yytext); $$->key = CODE; }
631
632RawHtml =   < (HtmlComment | HtmlBlockScript | HtmlTag) >
633            {   if (extension(EXT_FILTER_HTML)) {
634                    $$ = mk_list(LIST, NULL);
635                } else {
636                    $$ = mk_str(yytext);
637                    $$->key = HTML;
638                }
639            }
640
641BlankLine =     Sp Newline
642
643Quoted =        '"' (!'"' .)* '"' | '\'' (!'\'' .)* '\''
644HtmlAttribute = (AlphanumericAscii | '-')+ Spnl ('=' Spnl (Quoted | (!'>' Nonspacechar)+))? Spnl
645HtmlComment =   "<!--" (!"-->" .)* "-->"
646HtmlTag =       '<' Spnl '/'? AlphanumericAscii+ Spnl HtmlAttribute* '/'? Spnl '>'
647Eof =           !.
648Spacechar =     ' ' | '\t'
649Nonspacechar =  !Spacechar !Newline .
650Newline =       '\n' | '\r' '\n'?
651Sp =            Spacechar*
652Spnl =          Sp (Newline Sp)?
653SpecialChar =   '*' | '_' | '`' | '&' | '[' | ']' | '(' | ')' | '<' | '!' | '#' | '\\' | '\'' | '"' | ExtendedSpecialChar
654NormalChar =    !( SpecialChar | Spacechar | Newline ) .
655Alphanumeric = [0-9A-Za-z] | '\200' | '\201' | '\202' | '\203' | '\204' | '\205' | '\206' | '\207' | '\210' | '\211' | '\212' | '\213' | '\214' | '\215' | '\216' | '\217' | '\220' | '\221' | '\222' | '\223' | '\224' | '\225' | '\226' | '\227' | '\230' | '\231' | '\232' | '\233' | '\234' | '\235' | '\236' | '\237' | '\240' | '\241' | '\242' | '\243' | '\244' | '\245' | '\246' | '\247' | '\250' | '\251' | '\252' | '\253' | '\254' | '\255' | '\256' | '\257' | '\260' | '\261' | '\262' | '\263' | '\264' | '\265' | '\266' | '\267' | '\270' | '\271' | '\272' | '\273' | '\274' | '\275' | '\276' | '\277' | '\300' | '\301' | '\302' | '\303' | '\304' | '\305' | '\306' | '\307' | '\310' | '\311' | '\312' | '\313' | '\314' | '\315' | '\316' | '\317' | '\320' | '\321' | '\322' | '\323' | '\324' | '\325' | '\326' | '\327' | '\330' | '\331' | '\332' | '\333' | '\334' | '\335' | '\336' | '\337' | '\340' | '\341' | '\342' | '\343' | '\344' | '\345' | '\346' | '\347' | '\350' | '\351' | '\352' | '\353' | '\354' | '\355' | '\356' | '\357' | '\360' | '\361' | '\362' | '\363' | '\364' | '\365' | '\366' | '\367' | '\370' | '\371' | '\372' | '\373' | '\374' | '\375' | '\376' | '\377'
656AlphanumericAscii = [A-Za-z0-9]
657Digit = [0-9]
658BOM = "\357\273\277"
659
660HexEntity =     < '&' '#' [Xx] [0-9a-fA-F]+ ';' >
661DecEntity =     < '&' '#' [0-9]+ > ';' >
662CharEntity =    < '&' [A-Za-z0-9]+ ';' >
663
664NonindentSpace =    "   " | "  " | " " | ""
665Indent =            "\t" | "    "
666IndentedLine =      Indent Line
667OptionallyIndentedLine = Indent? Line
668
669# StartList starts a list data structure that can be added to with cons:
670StartList = &.
671            { $$ = NULL; }
672
673Line =  RawLine
674        { $$ = mk_str(yytext); }
675RawLine = ( < (!'\r' !'\n' .)* Newline > | < .+ > Eof )
676
677SkipBlock = HtmlBlock
678          | ( !'#' !SetextBottom1 !SetextBottom2 !BlankLine RawLine )+ BlankLine*
679          | BlankLine+
680          | RawLine
681
682# Syntax extensions
683
684ExtendedSpecialChar = &{ extension(EXT_SMART) } ('.' | '-' | '\'' | '"')
685                    | &{ extension(EXT_NOTES) } ( '^' )
686
687Smart = &{ extension(EXT_SMART) }
688        ( Ellipsis | Dash | SingleQuoted | DoubleQuoted | Apostrophe )
689
690Apostrophe = '\''
691             { $$ = mk_element(APOSTROPHE); }
692
693Ellipsis = ("..." | ". . .")
694           { $$ = mk_element(ELLIPSIS); }
695
696Dash = EmDash | EnDash
697
698EnDash = '-' &Digit
699         { $$ = mk_element(ENDASH); }
700
701EmDash = ("---" | "--")
702         { $$ = mk_element(EMDASH); }
703
704SingleQuoteStart = '\'' !(Spacechar | Newline)
705
706SingleQuoteEnd = '\'' !Alphanumeric
707
708SingleQuoted = SingleQuoteStart
709               a:StartList
710               ( !SingleQuoteEnd b:Inline { a = cons(b, a); } )+
711               SingleQuoteEnd
712               { $$ = mk_list(SINGLEQUOTED, a); }
713
714DoubleQuoteStart = '"'
715
716DoubleQuoteEnd = '"'
717
718DoubleQuoted =  DoubleQuoteStart
719                a:StartList
720                ( !DoubleQuoteEnd b:Inline { a = cons(b, a); } )+
721                DoubleQuoteEnd
722                { $$ = mk_list(DOUBLEQUOTED, a); }
723
724NoteReference = &{ extension(EXT_NOTES) }
725                ref:RawNoteReference
726                {   element *match;
727                    if (find_note(&match, ref->contents.str)) {
728                        $$ = mk_element(NOTE);
729                        assert(match->children != NULL);
730                        $$->children = match->children;
731                        $$->contents.str = 0;
732                    } else {
733                        char *s;
734                        s = malloc(strlen(ref->contents.str) + 4);
735                        sprintf(s, "[^%s]", ref->contents.str);
736                        $$ = mk_str(s);
737                        free(s);
738                    }
739                }
740
741RawNoteReference = "[^" < ( !Newline !']' . )+ > ']'
742                   { $$ = mk_str(yytext); }
743
744Note =          &{ extension(EXT_NOTES) }
745                NonindentSpace ref:RawNoteReference ':' Sp
746                a:StartList
747                ( RawNoteBlock { a = cons($$, a); } )
748                ( &Indent RawNoteBlock { a = cons($$, a); } )*
749                {   $$ = mk_list(NOTE, a);
750                    $$->contents.str = strdup(ref->contents.str);
751                }
752
753InlineNote =    &{ extension(EXT_NOTES) }
754                "^["
755                a:StartList
756                ( !']' Inline { a = cons($$, a); } )+
757                ']'
758                { $$ = mk_list(NOTE, a);
759                  $$->contents.str = 0; }
760
761Notes =         a:StartList
762                ( b:Note { a = cons(b, a); } | SkipBlock )*
763                { notes = reverse(a); }
764
765RawNoteBlock =  a:StartList
766                    ( !BlankLine OptionallyIndentedLine { a = cons($$, a); } )+
767                ( < BlankLine* > { a = cons(mk_str(yytext), a); } )
768                {   $$ = mk_str_from_list(a, true);
769                    $$->key = RAW;
770                }
771
772%%
773
774
775