1%{ 2/********************************************************************** 3 4 markdown_parser.leg - markdown parser in C using a PEG grammar. 5 (c) 2008 John MacFarlane (jgm at berkeley dot edu). 6 7 This program is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License or the MIT 9 license. See LICENSE for details. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 ***********************************************************************/ 17 18#include <stdbool.h> 19#include <assert.h> 20#include "markdown_peg.h" 21#include "utility_functions.h" 22 23 24 25/********************************************************************** 26 27 Definitions for leg parser generator. 28 YY_INPUT is the function the parser calls to get new input. 29 We take all new input from (static) charbuf. 30 31 ***********************************************************************/ 32 33 34 35# define YYSTYPE element * 36#ifdef __DEBUG__ 37# define YY_DEBUG 1 38#endif 39 40#define YY_INPUT(buf, result, max_size) \ 41{ \ 42 int yyc; \ 43 if (charbuf && *charbuf != '\0') { \ 44 yyc= *charbuf++; \ 45 } else { \ 46 yyc= EOF; \ 47 } \ 48 result= (EOF == yyc) ? 0 : (*(buf)= yyc, 1); \ 49} 50 51#define YY_RULE(T) T 52 53 54/********************************************************************** 55 56 PEG grammar and parser actions for markdown syntax. 57 58 ***********************************************************************/ 59 60%} 61 62Doc = BOM? a:StartList ( Block { a = cons($$, a); } )* 63 { parse_result = reverse(a); } 64 65Block = BlankLine* 66 ( BlockQuote 67 | Verbatim 68 | Note 69 | Reference 70 | HorizontalRule 71 | Heading 72 | OrderedList 73 | BulletList 74 | HtmlBlock 75 | StyleBlock 76 | Para 77 | Plain ) 78 79Para = NonindentSpace a:Inlines BlankLine+ 80 { $$ = a; $$->key = PARA; } 81 82Plain = a:Inlines 83 { $$ = a; $$->key = PLAIN; } 84 85AtxInline = !Newline !(Sp? '#'* Sp Newline) Inline 86 87AtxStart = < ( "######" | "#####" | "####" | "###" | "##" | "#" ) > 88 { $$ = mk_element(H1 + (strlen(yytext) - 1)); } 89 90AtxHeading = s:AtxStart Sp? a:StartList ( AtxInline { a = cons($$, a); } )+ (Sp? '#'* Sp)? Newline 91 { $$ = mk_list(s->key, a); 92 free(s); } 93 94SetextHeading = SetextHeading1 | SetextHeading2 95 96SetextBottom1 = '='+ Newline 97 98SetextBottom2 = '-'+ Newline 99 100SetextHeading1 = &(RawLine SetextBottom1) 101 a:StartList ( !Endline Inline { a = cons($$, a); } )+ Sp? Newline 102 SetextBottom1 { $$ = mk_list(H1, a); } 103 104SetextHeading2 = &(RawLine SetextBottom2) 105 a:StartList ( !Endline Inline { a = cons($$, a); } )+ Sp? Newline 106 SetextBottom2 { $$ = mk_list(H2, a); } 107 108Heading = SetextHeading | AtxHeading 109 110BlockQuote = a:BlockQuoteRaw 111 { $$ = mk_element(BLOCKQUOTE); 112 $$->children = a; 113 } 114 115BlockQuoteRaw = a:StartList 116 (( '>' ' '? Line { a = cons($$, a); } ) 117 ( !'>' !BlankLine Line { a = cons($$, a); } )* 118 ( BlankLine { a = cons(mk_str("\n"), a); } )* 119 )+ 120 { $$ = mk_str_from_list(a, true); 121 $$->key = RAW; 122 } 123 124NonblankIndentedLine = !BlankLine IndentedLine 125 126VerbatimChunk = a:StartList 127 ( BlankLine { a = cons(mk_str("\n"), a); } )* 128 ( NonblankIndentedLine { a = cons($$, a); } )+ 129 { $$ = mk_str_from_list(a, false); } 130 131Verbatim = a:StartList ( VerbatimChunk { a = cons($$, a); } )+ 132 { $$ = mk_str_from_list(a, false); 133 $$->key = VERBATIM; } 134 135HorizontalRule = NonindentSpace 136 ( '*' Sp '*' Sp '*' (Sp '*')* 137 | '-' Sp '-' Sp '-' (Sp '-')* 138 | '_' Sp '_' Sp '_' (Sp '_')*) 139 Sp Newline BlankLine+ 140 { $$ = mk_element(HRULE); } 141 142Bullet = !HorizontalRule NonindentSpace ('+' | '*' | '-') Spacechar+ 143 144BulletList = &Bullet (ListTight | ListLoose) 145 { $$->key = BULLETLIST; } 146 147ListTight = a:StartList 148 ( ListItemTight { a = cons($$, a); } )+ 149 BlankLine* !(Bullet | Enumerator) 150 { $$ = mk_list(LIST, a); } 151 152ListLoose = a:StartList 153 ( b:ListItem BlankLine* 154 { element *li; 155 li = b->children; 156 li->contents.str = realloc(li->contents.str, strlen(li->contents.str) + 3); 157 strcat(li->contents.str, "\n\n"); /* In loose list, \n\n added to end of each element */ 158 a = cons(b, a); 159 } )+ 160 { $$ = mk_list(LIST, a); } 161 162ListItem = ( Bullet | Enumerator ) 163 a:StartList 164 ListBlock { a = cons($$, a); } 165 ( ListContinuationBlock { a = cons($$, a); } )* 166 { element *raw; 167 raw = mk_str_from_list(a, false); 168 raw->key = RAW; 169 $$ = mk_element(LISTITEM); 170 $$->children = raw; 171 } 172 173ListItemTight = 174 ( Bullet | Enumerator ) 175 a:StartList 176 ListBlock { a = cons($$, a); } 177 ( !BlankLine 178 ListContinuationBlock { a = cons($$, a); } )* 179 !ListContinuationBlock 180 { element *raw; 181 raw = mk_str_from_list(a, false); 182 raw->key = RAW; 183 $$ = mk_element(LISTITEM); 184 $$->children = raw; 185 } 186 187ListBlock = a:StartList 188 !BlankLine Line { a = cons($$, a); } 189 ( ListBlockLine { a = cons($$, a); } )* 190 { $$ = mk_str_from_list(a, false); } 191 192ListContinuationBlock = a:StartList 193 ( < BlankLine* > 194 { if (strlen(yytext) == 0) 195 a = cons(mk_str("\001"), a); /* block separator */ 196 else 197 a = cons(mk_str(yytext), a); } ) 198 ( Indent ListBlock { a = cons($$, a); } )+ 199 { $$ = mk_str_from_list(a, false); } 200 201Enumerator = NonindentSpace [0-9]+ '.' Spacechar+ 202 203OrderedList = &Enumerator (ListTight | ListLoose) 204 { $$->key = ORDEREDLIST; } 205 206ListBlockLine = !BlankLine 207 !( Indent? (Bullet | Enumerator) ) 208 !HorizontalRule 209 OptionallyIndentedLine 210 211# Parsers for different kinds of block-level HTML content. 212# This is repetitive due to constraints of PEG grammar. 213 214HtmlBlockOpenAddress = '<' Spnl ("address" | "ADDRESS") Spnl HtmlAttribute* '>' 215HtmlBlockCloseAddress = '<' Spnl '/' ("address" | "ADDRESS") Spnl '>' 216HtmlBlockAddress = HtmlBlockOpenAddress (HtmlBlockAddress | !HtmlBlockCloseAddress .)* HtmlBlockCloseAddress 217 218HtmlBlockOpenBlockquote = '<' Spnl ("blockquote" | "BLOCKQUOTE") Spnl HtmlAttribute* '>' 219HtmlBlockCloseBlockquote = '<' Spnl '/' ("blockquote" | "BLOCKQUOTE") Spnl '>' 220HtmlBlockBlockquote = HtmlBlockOpenBlockquote (HtmlBlockBlockquote | !HtmlBlockCloseBlockquote .)* HtmlBlockCloseBlockquote 221 222HtmlBlockOpenCenter = '<' Spnl ("center" | "CENTER") Spnl HtmlAttribute* '>' 223HtmlBlockCloseCenter = '<' Spnl '/' ("center" | "CENTER") Spnl '>' 224HtmlBlockCenter = HtmlBlockOpenCenter (HtmlBlockCenter | !HtmlBlockCloseCenter .)* HtmlBlockCloseCenter 225 226HtmlBlockOpenDir = '<' Spnl ("dir" | "DIR") Spnl HtmlAttribute* '>' 227HtmlBlockCloseDir = '<' Spnl '/' ("dir" | "DIR") Spnl '>' 228HtmlBlockDir = HtmlBlockOpenDir (HtmlBlockDir | !HtmlBlockCloseDir .)* HtmlBlockCloseDir 229 230HtmlBlockOpenDiv = '<' Spnl ("div" | "DIV") Spnl HtmlAttribute* '>' 231HtmlBlockCloseDiv = '<' Spnl '/' ("div" | "DIV") Spnl '>' 232HtmlBlockDiv = HtmlBlockOpenDiv (HtmlBlockDiv | !HtmlBlockCloseDiv .)* HtmlBlockCloseDiv 233 234HtmlBlockOpenDl = '<' Spnl ("dl" | "DL") Spnl HtmlAttribute* '>' 235HtmlBlockCloseDl = '<' Spnl '/' ("dl" | "DL") Spnl '>' 236HtmlBlockDl = HtmlBlockOpenDl (HtmlBlockDl | !HtmlBlockCloseDl .)* HtmlBlockCloseDl 237 238HtmlBlockOpenFieldset = '<' Spnl ("fieldset" | "FIELDSET") Spnl HtmlAttribute* '>' 239HtmlBlockCloseFieldset = '<' Spnl '/' ("fieldset" | "FIELDSET") Spnl '>' 240HtmlBlockFieldset = HtmlBlockOpenFieldset (HtmlBlockFieldset | !HtmlBlockCloseFieldset .)* HtmlBlockCloseFieldset 241 242HtmlBlockOpenForm = '<' Spnl ("form" | "FORM") Spnl HtmlAttribute* '>' 243HtmlBlockCloseForm = '<' Spnl '/' ("form" | "FORM") Spnl '>' 244HtmlBlockForm = HtmlBlockOpenForm (HtmlBlockForm | !HtmlBlockCloseForm .)* HtmlBlockCloseForm 245 246HtmlBlockOpenH1 = '<' Spnl ("h1" | "H1") Spnl HtmlAttribute* '>' 247HtmlBlockCloseH1 = '<' Spnl '/' ("h1" | "H1") Spnl '>' 248HtmlBlockH1 = HtmlBlockOpenH1 (HtmlBlockH1 | !HtmlBlockCloseH1 .)* HtmlBlockCloseH1 249 250HtmlBlockOpenH2 = '<' Spnl ("h2" | "H2") Spnl HtmlAttribute* '>' 251HtmlBlockCloseH2 = '<' Spnl '/' ("h2" | "H2") Spnl '>' 252HtmlBlockH2 = HtmlBlockOpenH2 (HtmlBlockH2 | !HtmlBlockCloseH2 .)* HtmlBlockCloseH2 253 254HtmlBlockOpenH3 = '<' Spnl ("h3" | "H3") Spnl HtmlAttribute* '>' 255HtmlBlockCloseH3 = '<' Spnl '/' ("h3" | "H3") Spnl '>' 256HtmlBlockH3 = HtmlBlockOpenH3 (HtmlBlockH3 | !HtmlBlockCloseH3 .)* HtmlBlockCloseH3 257 258HtmlBlockOpenH4 = '<' Spnl ("h4" | "H4") Spnl HtmlAttribute* '>' 259HtmlBlockCloseH4 = '<' Spnl '/' ("h4" | "H4") Spnl '>' 260HtmlBlockH4 = HtmlBlockOpenH4 (HtmlBlockH4 | !HtmlBlockCloseH4 .)* HtmlBlockCloseH4 261 262HtmlBlockOpenH5 = '<' Spnl ("h5" | "H5") Spnl HtmlAttribute* '>' 263HtmlBlockCloseH5 = '<' Spnl '/' ("h5" | "H5") Spnl '>' 264HtmlBlockH5 = HtmlBlockOpenH5 (HtmlBlockH5 | !HtmlBlockCloseH5 .)* HtmlBlockCloseH5 265 266HtmlBlockOpenH6 = '<' Spnl ("h6" | "H6") Spnl HtmlAttribute* '>' 267HtmlBlockCloseH6 = '<' Spnl '/' ("h6" | "H6") Spnl '>' 268HtmlBlockH6 = HtmlBlockOpenH6 (HtmlBlockH6 | !HtmlBlockCloseH6 .)* HtmlBlockCloseH6 269 270HtmlBlockOpenMenu = '<' Spnl ("menu" | "MENU") Spnl HtmlAttribute* '>' 271HtmlBlockCloseMenu = '<' Spnl '/' ("menu" | "MENU") Spnl '>' 272HtmlBlockMenu = HtmlBlockOpenMenu (HtmlBlockMenu | !HtmlBlockCloseMenu .)* HtmlBlockCloseMenu 273 274HtmlBlockOpenNoframes = '<' Spnl ("noframes" | "NOFRAMES") Spnl HtmlAttribute* '>' 275HtmlBlockCloseNoframes = '<' Spnl '/' ("noframes" | "NOFRAMES") Spnl '>' 276HtmlBlockNoframes = HtmlBlockOpenNoframes (HtmlBlockNoframes | !HtmlBlockCloseNoframes .)* HtmlBlockCloseNoframes 277 278HtmlBlockOpenNoscript = '<' Spnl ("noscript" | "NOSCRIPT") Spnl HtmlAttribute* '>' 279HtmlBlockCloseNoscript = '<' Spnl '/' ("noscript" | "NOSCRIPT") Spnl '>' 280HtmlBlockNoscript = HtmlBlockOpenNoscript (HtmlBlockNoscript | !HtmlBlockCloseNoscript .)* HtmlBlockCloseNoscript 281 282HtmlBlockOpenOl = '<' Spnl ("ol" | "OL") Spnl HtmlAttribute* '>' 283HtmlBlockCloseOl = '<' Spnl '/' ("ol" | "OL") Spnl '>' 284HtmlBlockOl = HtmlBlockOpenOl (HtmlBlockOl | !HtmlBlockCloseOl .)* HtmlBlockCloseOl 285 286HtmlBlockOpenP = '<' Spnl ("p" | "P") Spnl HtmlAttribute* '>' 287HtmlBlockCloseP = '<' Spnl '/' ("p" | "P") Spnl '>' 288HtmlBlockP = HtmlBlockOpenP (HtmlBlockP | !HtmlBlockCloseP .)* HtmlBlockCloseP 289 290HtmlBlockOpenPre = '<' Spnl ("pre" | "PRE") Spnl HtmlAttribute* '>' 291HtmlBlockClosePre = '<' Spnl '/' ("pre" | "PRE") Spnl '>' 292HtmlBlockPre = HtmlBlockOpenPre (HtmlBlockPre | !HtmlBlockClosePre .)* HtmlBlockClosePre 293 294HtmlBlockOpenTable = '<' Spnl ("table" | "TABLE") Spnl HtmlAttribute* '>' 295HtmlBlockCloseTable = '<' Spnl '/' ("table" | "TABLE") Spnl '>' 296HtmlBlockTable = HtmlBlockOpenTable (HtmlBlockTable | !HtmlBlockCloseTable .)* HtmlBlockCloseTable 297 298HtmlBlockOpenUl = '<' Spnl ("ul" | "UL") Spnl HtmlAttribute* '>' 299HtmlBlockCloseUl = '<' Spnl '/' ("ul" | "UL") Spnl '>' 300HtmlBlockUl = HtmlBlockOpenUl (HtmlBlockUl | !HtmlBlockCloseUl .)* HtmlBlockCloseUl 301 302HtmlBlockOpenDd = '<' Spnl ("dd" | "DD") Spnl HtmlAttribute* '>' 303HtmlBlockCloseDd = '<' Spnl '/' ("dd" | "DD") Spnl '>' 304HtmlBlockDd = HtmlBlockOpenDd (HtmlBlockDd | !HtmlBlockCloseDd .)* HtmlBlockCloseDd 305 306HtmlBlockOpenDt = '<' Spnl ("dt" | "DT") Spnl HtmlAttribute* '>' 307HtmlBlockCloseDt = '<' Spnl '/' ("dt" | "DT") Spnl '>' 308HtmlBlockDt = HtmlBlockOpenDt (HtmlBlockDt | !HtmlBlockCloseDt .)* HtmlBlockCloseDt 309 310HtmlBlockOpenFrameset = '<' Spnl ("frameset" | "FRAMESET") Spnl HtmlAttribute* '>' 311HtmlBlockCloseFrameset = '<' Spnl '/' ("frameset" | "FRAMESET") Spnl '>' 312HtmlBlockFrameset = HtmlBlockOpenFrameset (HtmlBlockFrameset | !HtmlBlockCloseFrameset .)* HtmlBlockCloseFrameset 313 314HtmlBlockOpenLi = '<' Spnl ("li" | "LI") Spnl HtmlAttribute* '>' 315HtmlBlockCloseLi = '<' Spnl '/' ("li" | "LI") Spnl '>' 316HtmlBlockLi = HtmlBlockOpenLi (HtmlBlockLi | !HtmlBlockCloseLi .)* HtmlBlockCloseLi 317 318HtmlBlockOpenTbody = '<' Spnl ("tbody" | "TBODY") Spnl HtmlAttribute* '>' 319HtmlBlockCloseTbody = '<' Spnl '/' ("tbody" | "TBODY") Spnl '>' 320HtmlBlockTbody = HtmlBlockOpenTbody (HtmlBlockTbody | !HtmlBlockCloseTbody .)* HtmlBlockCloseTbody 321 322HtmlBlockOpenTd = '<' Spnl ("td" | "TD") Spnl HtmlAttribute* '>' 323HtmlBlockCloseTd = '<' Spnl '/' ("td" | "TD") Spnl '>' 324HtmlBlockTd = HtmlBlockOpenTd (HtmlBlockTd | !HtmlBlockCloseTd .)* HtmlBlockCloseTd 325 326HtmlBlockOpenTfoot = '<' Spnl ("tfoot" | "TFOOT") Spnl HtmlAttribute* '>' 327HtmlBlockCloseTfoot = '<' Spnl '/' ("tfoot" | "TFOOT") Spnl '>' 328HtmlBlockTfoot = HtmlBlockOpenTfoot (HtmlBlockTfoot | !HtmlBlockCloseTfoot .)* HtmlBlockCloseTfoot 329 330HtmlBlockOpenTh = '<' Spnl ("th" | "TH") Spnl HtmlAttribute* '>' 331HtmlBlockCloseTh = '<' Spnl '/' ("th" | "TH") Spnl '>' 332HtmlBlockTh = HtmlBlockOpenTh (HtmlBlockTh | !HtmlBlockCloseTh .)* HtmlBlockCloseTh 333 334HtmlBlockOpenThead = '<' Spnl ("thead" | "THEAD") Spnl HtmlAttribute* '>' 335HtmlBlockCloseThead = '<' Spnl '/' ("thead" | "THEAD") Spnl '>' 336HtmlBlockThead = HtmlBlockOpenThead (HtmlBlockThead | !HtmlBlockCloseThead .)* HtmlBlockCloseThead 337 338HtmlBlockOpenTr = '<' Spnl ("tr" | "TR") Spnl HtmlAttribute* '>' 339HtmlBlockCloseTr = '<' Spnl '/' ("tr" | "TR") Spnl '>' 340HtmlBlockTr = HtmlBlockOpenTr (HtmlBlockTr | !HtmlBlockCloseTr .)* HtmlBlockCloseTr 341 342HtmlBlockOpenScript = '<' Spnl ("script" | "SCRIPT") Spnl HtmlAttribute* '>' 343HtmlBlockCloseScript = '<' Spnl '/' ("script" | "SCRIPT") Spnl '>' 344HtmlBlockScript = HtmlBlockOpenScript (!HtmlBlockCloseScript .)* HtmlBlockCloseScript 345 346HtmlBlockOpenHead = '<' Spnl ("head" | "HEAD") Spnl HtmlAttribute* '>' 347HtmlBlockCloseHead = '<' Spnl '/' ("head" | "HEAD") Spnl '>' 348HtmlBlockHead = HtmlBlockOpenHead (!HtmlBlockCloseHead .)* HtmlBlockCloseHead 349 350HtmlBlockInTags = HtmlBlockAddress 351 | HtmlBlockBlockquote 352 | HtmlBlockCenter 353 | HtmlBlockDir 354 | HtmlBlockDiv 355 | HtmlBlockDl 356 | HtmlBlockFieldset 357 | HtmlBlockForm 358 | HtmlBlockH1 359 | HtmlBlockH2 360 | HtmlBlockH3 361 | HtmlBlockH4 362 | HtmlBlockH5 363 | HtmlBlockH6 364 | HtmlBlockMenu 365 | HtmlBlockNoframes 366 | HtmlBlockNoscript 367 | HtmlBlockOl 368 | HtmlBlockP 369 | HtmlBlockPre 370 | HtmlBlockTable 371 | HtmlBlockUl 372 | HtmlBlockDd 373 | HtmlBlockDt 374 | HtmlBlockFrameset 375 | HtmlBlockLi 376 | HtmlBlockTbody 377 | HtmlBlockTd 378 | HtmlBlockTfoot 379 | HtmlBlockTh 380 | HtmlBlockThead 381 | HtmlBlockTr 382 | HtmlBlockScript 383 | HtmlBlockHead 384 385HtmlBlock = < ( HtmlBlockInTags | HtmlComment | HtmlBlockSelfClosing ) > 386 BlankLine+ 387 { if (extension(EXT_FILTER_HTML)) { 388 $$ = mk_list(LIST, NULL); 389 } else { 390 $$ = mk_str(yytext); 391 $$->key = HTMLBLOCK; 392 } 393 } 394 395HtmlBlockSelfClosing = '<' Spnl HtmlBlockType Spnl HtmlAttribute* '/' Spnl '>' 396 397HtmlBlockType = "address" | "blockquote" | "center" | "dir" | "div" | "dl" | "fieldset" | "form" | "h1" | "h2" | "h3" | 398 "h4" | "h5" | "h6" | "hr" | "isindex" | "menu" | "noframes" | "noscript" | "ol" | "p" | "pre" | "table" | 399 "ul" | "dd" | "dt" | "frameset" | "li" | "tbody" | "td" | "tfoot" | "th" | "thead" | "tr" | "script" | 400 "ADDRESS" | "BLOCKQUOTE" | "CENTER" | "DIR" | "DIV" | "DL" | "FIELDSET" | "FORM" | "H1" | "H2" | "H3" | 401 "H4" | "H5" | "H6" | "HR" | "ISINDEX" | "MENU" | "NOFRAMES" | "NOSCRIPT" | "OL" | "P" | "PRE" | "TABLE" | 402 "UL" | "DD" | "DT" | "FRAMESET" | "LI" | "TBODY" | "TD" | "TFOOT" | "TH" | "THEAD" | "TR" | "SCRIPT" 403 404StyleOpen = '<' Spnl ("style" | "STYLE") Spnl HtmlAttribute* '>' 405StyleClose = '<' Spnl '/' ("style" | "STYLE") Spnl '>' 406InStyleTags = StyleOpen (!StyleClose .)* StyleClose 407StyleBlock = < InStyleTags > 408 BlankLine* 409 { if (extension(EXT_FILTER_STYLES)) { 410 $$ = mk_list(LIST, NULL); 411 } else { 412 $$ = mk_str(yytext); 413 $$->key = HTMLBLOCK; 414 } 415 } 416 417Inlines = a:StartList ( !Endline Inline { a = cons($$, a); } 418 | c:Endline &Inline { a = cons(c, a); } )+ Endline? 419 { $$ = mk_list(LIST, a); } 420 421Inline = Str 422 | Endline 423 | UlOrStarLine 424 | Space 425 | Strong 426 | Emph 427 | Image 428 | Link 429 | NoteReference 430 | InlineNote 431 | Code 432 | RawHtml 433 | Entity 434 | EscapedChar 435 | Smart 436 | Symbol 437 438Space = Spacechar+ 439 { $$ = mk_str(" "); 440 $$->key = SPACE; } 441 442Str = a:StartList < NormalChar+ > { a = cons(mk_str(yytext), a); } 443 ( StrChunk { a = cons($$, a); } )* 444 { if (a->next == NULL) { $$ = a; } else { $$ = mk_list(LIST, a); } } 445 446StrChunk = < (NormalChar | '_'+ &Alphanumeric)+ > { $$ = mk_str(yytext); } | 447 AposChunk 448 449AposChunk = &{ extension(EXT_SMART) } '\'' &Alphanumeric 450 { $$ = mk_element(APOSTROPHE); } 451 452EscapedChar = '\\' !Newline < [-\\`|*_{}[\]()#+.!><] > 453 { $$ = mk_str(yytext); } 454 455Entity = ( HexEntity | DecEntity | CharEntity ) 456 { $$ = mk_str(yytext); $$->key = HTML; } 457 458Endline = LineBreak | TerminalEndline | NormalEndline 459 460NormalEndline = Sp Newline !BlankLine !'>' !AtxStart 461 !(Line ('='+ | '-'+) Newline) 462 { $$ = mk_str("\n"); 463 $$->key = SPACE; } 464 465TerminalEndline = Sp Newline Eof 466 { $$ = NULL; } 467 468LineBreak = " " NormalEndline 469 { $$ = mk_element(LINEBREAK); } 470 471Symbol = < SpecialChar > 472 { $$ = mk_str(yytext); } 473 474# This keeps the parser from getting bogged down on long strings of '*' or '_', 475# or strings of '*' or '_' with space on each side: 476UlOrStarLine = (UlLine | StarLine) { $$ = mk_str(yytext); } 477StarLine = < "****" '*'* > | < Spacechar '*'+ &Spacechar > 478UlLine = < "____" '_'* > | < Spacechar '_'+ &Spacechar > 479 480Emph = EmphStar | EmphUl 481 482Whitespace = Spacechar | Newline 483 484EmphStar = '*' !Whitespace 485 a:StartList 486 ( !'*' b:Inline { a = cons(b, a); } 487 | b:StrongStar { a = cons(b, a); } 488 )+ 489 '*' 490 { $$ = mk_list(EMPH, a); } 491 492EmphUl = '_' !Whitespace 493 a:StartList 494 ( !'_' b:Inline { a = cons(b, a); } 495 | b:StrongUl { a = cons(b, a); } 496 )+ 497 '_' 498 { $$ = mk_list(EMPH, a); } 499 500Strong = StrongStar | StrongUl 501 502StrongStar = "**" !Whitespace 503 a:StartList 504 ( !"**" b:Inline { a = cons(b, a); })+ 505 "**" 506 { $$ = mk_list(STRONG, a); } 507 508StrongUl = "__" !Whitespace 509 a:StartList 510 ( !"__" b:Inline { a = cons(b, a); })+ 511 "__" 512 { $$ = mk_list(STRONG, a); } 513 514Image = '!' ( ExplicitLink | ReferenceLink ) 515 { if ($$->key == LINK) { 516 $$->key = IMAGE; 517 } else { 518 element *result; 519 result = $$; 520 $$->children = cons(mk_str("!"), result->children); 521 } } 522 523Link = ExplicitLink | ReferenceLink | AutoLink 524 525ReferenceLink = ReferenceLinkDouble | ReferenceLinkSingle 526 527ReferenceLinkDouble = a:Label < Spnl > !"[]" b:Label 528 { link match; 529 if (find_reference(&match, b->children)) { 530 $$ = mk_link(a->children, match.url, match.title); 531 free(a); 532 free_element_list(b); 533 } else { 534 element *result; 535 result = mk_element(LIST); 536 result->children = cons(mk_str("["), cons(a, cons(mk_str("]"), cons(mk_str(yytext), 537 cons(mk_str("["), cons(b, mk_str("]"))))))); 538 $$ = result; 539 } 540 } 541 542ReferenceLinkSingle = a:Label < (Spnl "[]")? > 543 { link match; 544 if (find_reference(&match, a->children)) { 545 $$ = mk_link(a->children, match.url, match.title); 546 free(a); 547 } 548 else { 549 element *result; 550 result = mk_element(LIST); 551 result->children = cons(mk_str("["), cons(a, cons(mk_str("]"), mk_str(yytext)))); 552 $$ = result; 553 } 554 } 555 556ExplicitLink = l:Label '(' Sp s:Source Spnl t:Title Sp ')' 557 { $$ = mk_link(l->children, s->contents.str, t->contents.str); 558 free_element(s); 559 free_element(t); 560 free(l); } 561 562Source = ( '<' < SourceContents > '>' | < SourceContents > ) 563 { $$ = mk_str(yytext); } 564 565SourceContents = ( ( !'(' !')' !'>' Nonspacechar )+ | '(' SourceContents ')')* 566 567Title = ( TitleSingle | TitleDouble | < "" > ) 568 { $$ = mk_str(yytext); } 569 570TitleSingle = '\'' < ( !( '\'' Sp ( ')' | Newline ) ) . )* > '\'' 571 572TitleDouble = '"' < ( !( '"' Sp ( ')' | Newline ) ) . )* > '"' 573 574AutoLink = AutoLinkUrl | AutoLinkEmail 575 576AutoLinkUrl = '<' < [A-Za-z]+ "://" ( !Newline !'>' . )+ > '>' 577 { $$ = mk_link(mk_str(yytext), yytext, ""); } 578 579AutoLinkEmail = '<' ( "mailto:" )? < [-A-Za-z0-9+_./!%~$]+ '@' ( !Newline !'>' . )+ > '>' 580 { char *mailto = malloc(strlen(yytext) + 8); 581 sprintf(mailto, "mailto:%s", yytext); 582 $$ = mk_link(mk_str(yytext), mailto, ""); 583 free(mailto); 584 } 585 586Reference = NonindentSpace !"[]" l:Label ':' Spnl s:RefSrc t:RefTitle BlankLine+ 587 { $$ = mk_link(l->children, s->contents.str, t->contents.str); 588 free_element(s); 589 free_element(t); 590 free(l); 591 $$->key = REFERENCE; } 592 593Label = '[' ( !'^' &{ extension(EXT_NOTES) } | &. &{ !extension(EXT_NOTES) } ) 594 a:StartList 595 ( !']' Inline { a = cons($$, a); } )* 596 ']' 597 { $$ = mk_list(LIST, a); } 598 599RefSrc = < Nonspacechar+ > 600 { $$ = mk_str(yytext); 601 $$->key = HTML; } 602 603RefTitle = ( RefTitleSingle | RefTitleDouble | RefTitleParens | EmptyTitle ) 604 { $$ = mk_str(yytext); } 605 606EmptyTitle = < "" > 607 608RefTitleSingle = Spnl '\'' < ( !( '\'' Sp Newline | Newline ) . )* > '\'' 609 610RefTitleDouble = Spnl '"' < ( !('"' Sp Newline | Newline) . )* > '"' 611 612RefTitleParens = Spnl '(' < ( !(')' Sp Newline | Newline) . )* > ')' 613 614References = a:StartList 615 ( b:Reference { a = cons(b, a); } | SkipBlock )* 616 { references = reverse(a); } 617 618Ticks1 = "`" !'`' 619Ticks2 = "``" !'`' 620Ticks3 = "```" !'`' 621Ticks4 = "````" !'`' 622Ticks5 = "`````" !'`' 623 624Code = ( Ticks1 Sp < ( ( !'`' Nonspacechar )+ | !Ticks1 '`'+ | !( Sp Ticks1 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks1 625 | Ticks2 Sp < ( ( !'`' Nonspacechar )+ | !Ticks2 '`'+ | !( Sp Ticks2 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks2 626 | Ticks3 Sp < ( ( !'`' Nonspacechar )+ | !Ticks3 '`'+ | !( Sp Ticks3 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks3 627 | Ticks4 Sp < ( ( !'`' Nonspacechar )+ | !Ticks4 '`'+ | !( Sp Ticks4 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks4 628 | Ticks5 Sp < ( ( !'`' Nonspacechar )+ | !Ticks5 '`'+ | !( Sp Ticks5 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks5 629 ) 630 { $$ = mk_str(yytext); $$->key = CODE; } 631 632RawHtml = < (HtmlComment | HtmlBlockScript | HtmlTag) > 633 { if (extension(EXT_FILTER_HTML)) { 634 $$ = mk_list(LIST, NULL); 635 } else { 636 $$ = mk_str(yytext); 637 $$->key = HTML; 638 } 639 } 640 641BlankLine = Sp Newline 642 643Quoted = '"' (!'"' .)* '"' | '\'' (!'\'' .)* '\'' 644HtmlAttribute = (AlphanumericAscii | '-')+ Spnl ('=' Spnl (Quoted | (!'>' Nonspacechar)+))? Spnl 645HtmlComment = "<!--" (!"-->" .)* "-->" 646HtmlTag = '<' Spnl '/'? AlphanumericAscii+ Spnl HtmlAttribute* '/'? Spnl '>' 647Eof = !. 648Spacechar = ' ' | '\t' 649Nonspacechar = !Spacechar !Newline . 650Newline = '\n' | '\r' '\n'? 651Sp = Spacechar* 652Spnl = Sp (Newline Sp)? 653SpecialChar = '*' | '_' | '`' | '&' | '[' | ']' | '(' | ')' | '<' | '!' | '#' | '\\' | '\'' | '"' | ExtendedSpecialChar 654NormalChar = !( SpecialChar | Spacechar | Newline ) . 655Alphanumeric = [0-9A-Za-z] | '\200' | '\201' | '\202' | '\203' | '\204' | '\205' | '\206' | '\207' | '\210' | '\211' | '\212' | '\213' | '\214' | '\215' | '\216' | '\217' | '\220' | '\221' | '\222' | '\223' | '\224' | '\225' | '\226' | '\227' | '\230' | '\231' | '\232' | '\233' | '\234' | '\235' | '\236' | '\237' | '\240' | '\241' | '\242' | '\243' | '\244' | '\245' | '\246' | '\247' | '\250' | '\251' | '\252' | '\253' | '\254' | '\255' | '\256' | '\257' | '\260' | '\261' | '\262' | '\263' | '\264' | '\265' | '\266' | '\267' | '\270' | '\271' | '\272' | '\273' | '\274' | '\275' | '\276' | '\277' | '\300' | '\301' | '\302' | '\303' | '\304' | '\305' | '\306' | '\307' | '\310' | '\311' | '\312' | '\313' | '\314' | '\315' | '\316' | '\317' | '\320' | '\321' | '\322' | '\323' | '\324' | '\325' | '\326' | '\327' | '\330' | '\331' | '\332' | '\333' | '\334' | '\335' | '\336' | '\337' | '\340' | '\341' | '\342' | '\343' | '\344' | '\345' | '\346' | '\347' | '\350' | '\351' | '\352' | '\353' | '\354' | '\355' | '\356' | '\357' | '\360' | '\361' | '\362' | '\363' | '\364' | '\365' | '\366' | '\367' | '\370' | '\371' | '\372' | '\373' | '\374' | '\375' | '\376' | '\377' 656AlphanumericAscii = [A-Za-z0-9] 657Digit = [0-9] 658BOM = "\357\273\277" 659 660HexEntity = < '&' '#' [Xx] [0-9a-fA-F]+ ';' > 661DecEntity = < '&' '#' [0-9]+ > ';' > 662CharEntity = < '&' [A-Za-z0-9]+ ';' > 663 664NonindentSpace = " " | " " | " " | "" 665Indent = "\t" | " " 666IndentedLine = Indent Line 667OptionallyIndentedLine = Indent? Line 668 669# StartList starts a list data structure that can be added to with cons: 670StartList = &. 671 { $$ = NULL; } 672 673Line = RawLine 674 { $$ = mk_str(yytext); } 675RawLine = ( < (!'\r' !'\n' .)* Newline > | < .+ > Eof ) 676 677SkipBlock = HtmlBlock 678 | ( !'#' !SetextBottom1 !SetextBottom2 !BlankLine RawLine )+ BlankLine* 679 | BlankLine+ 680 | RawLine 681 682# Syntax extensions 683 684ExtendedSpecialChar = &{ extension(EXT_SMART) } ('.' | '-' | '\'' | '"') 685 | &{ extension(EXT_NOTES) } ( '^' ) 686 687Smart = &{ extension(EXT_SMART) } 688 ( Ellipsis | Dash | SingleQuoted | DoubleQuoted | Apostrophe ) 689 690Apostrophe = '\'' 691 { $$ = mk_element(APOSTROPHE); } 692 693Ellipsis = ("..." | ". . .") 694 { $$ = mk_element(ELLIPSIS); } 695 696Dash = EmDash | EnDash 697 698EnDash = '-' &Digit 699 { $$ = mk_element(ENDASH); } 700 701EmDash = ("---" | "--") 702 { $$ = mk_element(EMDASH); } 703 704SingleQuoteStart = '\'' !(Spacechar | Newline) 705 706SingleQuoteEnd = '\'' !Alphanumeric 707 708SingleQuoted = SingleQuoteStart 709 a:StartList 710 ( !SingleQuoteEnd b:Inline { a = cons(b, a); } )+ 711 SingleQuoteEnd 712 { $$ = mk_list(SINGLEQUOTED, a); } 713 714DoubleQuoteStart = '"' 715 716DoubleQuoteEnd = '"' 717 718DoubleQuoted = DoubleQuoteStart 719 a:StartList 720 ( !DoubleQuoteEnd b:Inline { a = cons(b, a); } )+ 721 DoubleQuoteEnd 722 { $$ = mk_list(DOUBLEQUOTED, a); } 723 724NoteReference = &{ extension(EXT_NOTES) } 725 ref:RawNoteReference 726 { element *match; 727 if (find_note(&match, ref->contents.str)) { 728 $$ = mk_element(NOTE); 729 assert(match->children != NULL); 730 $$->children = match->children; 731 $$->contents.str = 0; 732 } else { 733 char *s; 734 s = malloc(strlen(ref->contents.str) + 4); 735 sprintf(s, "[^%s]", ref->contents.str); 736 $$ = mk_str(s); 737 free(s); 738 } 739 } 740 741RawNoteReference = "[^" < ( !Newline !']' . )+ > ']' 742 { $$ = mk_str(yytext); } 743 744Note = &{ extension(EXT_NOTES) } 745 NonindentSpace ref:RawNoteReference ':' Sp 746 a:StartList 747 ( RawNoteBlock { a = cons($$, a); } ) 748 ( &Indent RawNoteBlock { a = cons($$, a); } )* 749 { $$ = mk_list(NOTE, a); 750 $$->contents.str = strdup(ref->contents.str); 751 } 752 753InlineNote = &{ extension(EXT_NOTES) } 754 "^[" 755 a:StartList 756 ( !']' Inline { a = cons($$, a); } )+ 757 ']' 758 { $$ = mk_list(NOTE, a); 759 $$->contents.str = 0; } 760 761Notes = a:StartList 762 ( b:Note { a = cons(b, a); } | SkipBlock )* 763 { notes = reverse(a); } 764 765RawNoteBlock = a:StartList 766 ( !BlankLine OptionallyIndentedLine { a = cons($$, a); } )+ 767 ( < BlankLine* > { a = cons(mk_str(yytext), a); } ) 768 { $$ = mk_str_from_list(a, true); 769 $$->key = RAW; 770 } 771 772%% 773 774 775