1// Package markdown is a package for parsing and processing markdown text. 2// It translates plain text with simple formatting rules into HTML or XML. 3package mmark 4 5import ( 6 "bytes" 7 "path" 8 "unicode/utf8" 9) 10 11const Version = "1.3.6" 12 13var test = false 14 15// These are the supported markdown parsing extensions. 16// OR these values together to select multiple extensions. 17const ( 18 _ = 1 << iota 19 EXTENSION_ABBREVIATIONS // Render abbreviations `*[HTML]: Hyper Text Markup Language` 20 EXTENSION_AUTO_HEADER_IDS // Create the header ID from the text 21 EXTENSION_AUTOLINK // Detect embedded URLs that are not explicitly marked 22 EXTENSION_CITATION // Support citations via the link syntax 23 EXTENSION_EXAMPLE_LISTS // Render '(@tag) ' example lists 24 EXTENSION_FENCED_CODE // Render fenced code blocks 25 EXTENSION_FOOTNOTES // Pandoc-style footnotes 26 EXTENSION_HARD_LINE_BREAK // Translate newlines into line breaks 27 EXTENSION_HEADER_IDS // Specify header IDs with {#id} 28 EXTENSION_INCLUDE // Include file with {{ syntax 29 EXTENSION_INLINE_ATTR // Detect CommonMark's IAL syntax 30 EXTENSION_LAX_HTML_BLOCKS // Loosen up HTML block parsing rules 31 EXTENSION_MATH // Detect $$...$$ and parse as math 32 EXTENSION_MATTER // Use {frontmatter} {mainmatter} {backmatter} (TODO(miek): not actually used) 33 EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK // No need to insert an empty line to start a (code, quote, order list, unorder list)block 34 EXTENSION_PARTS // Detect part headers (-#) 35 EXTENSION_QUOTES // Allow A> as asides 36 EXTENSION_SHORT_REF // (#id) will be a cross reference 37 EXTENSION_SPACE_HEADERS // Be strict about prefix header rules 38 EXTENSION_TABLES // Render tables 39 EXTENSION_TITLEBLOCK_TOML // Titleblock in TOML 40 EXTENSION_UNIQUE_HEADER_IDS // When detecting identical anchors add a sequence number -1, -2 etc 41 EXTENSION_BACKSLASH_LINE_BREAK // Translate trailing backslashes into line breaks 42 EXTENSION_RFC7328 // Parse RFC 7328 markdown. Depends on FOOTNOTES extension. 43 EXTENSION_DEFINITION_LISTS // render definition lists 44 45 commonHtmlFlags = 0 | 46 HTML_USE_SMARTYPANTS | 47 HTML_SMARTYPANTS_FRACTIONS | 48 HTML_SMARTYPANTS_DASHES | 49 HTML_SMARTYPANTS_LATEX_DASHES 50 51 commonExtensions = 0 | 52 EXTENSION_TABLES | 53 EXTENSION_FENCED_CODE | 54 EXTENSION_AUTOLINK | 55 EXTENSION_SPACE_HEADERS | 56 EXTENSION_HEADER_IDS | 57 EXTENSION_ABBREVIATIONS | 58 EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK | // CommonMark 59 EXTENSION_BACKSLASH_LINE_BREAK | // CommonMark 60 EXTENSION_DEFINITION_LISTS 61 62 commonXmlExtensions = commonExtensions | 63 EXTENSION_UNIQUE_HEADER_IDS | 64 EXTENSION_AUTO_HEADER_IDS | 65 EXTENSION_INLINE_ATTR | 66 EXTENSION_QUOTES | 67 EXTENSION_MATTER | 68 EXTENSION_CITATION | 69 EXTENSION_EXAMPLE_LISTS | 70 EXTENSION_SHORT_REF | 71 EXTENSION_DEFINITION_LISTS 72) 73 74// These are the possible flag values for the link renderer. 75// Only a single one of these values will be used; they are not ORed together. 76// These are mostly of interest if you are writing a new output format. 77const ( 78 _LINK_TYPE_NOT_AUTOLINK = iota 79 _LINK_TYPE_NORMAL 80 _LINK_TYPE_EMAIL 81) 82 83// These are the possible flag values for the ListItem renderer. 84// Multiple flag values may be ORed together. 85// These are mostly of interest if you are writing a new output format. 86const ( 87 _LIST_TYPE_ORDERED = 1 << iota 88 _LIST_TYPE_ORDERED_ROMAN_UPPER 89 _LIST_TYPE_ORDERED_ROMAN_LOWER 90 _LIST_TYPE_ORDERED_ALPHA_UPPER 91 _LIST_TYPE_ORDERED_ALPHA_LOWER 92 _LIST_TYPE_ORDERED_GROUP 93 _LIST_TYPE_DEFINITION 94 _LIST_TYPE_TERM 95 _LIST_ITEM_CONTAINS_BLOCK 96 _LIST_ITEM_BEGINNING_OF_LIST 97 _LIST_ITEM_END_OF_LIST 98 _LIST_INSIDE_LIST 99 _INSIDE_FIGURE 100) 101 102// These are the possible flag values for the table cell renderer. 103// Only a single one of these values will be used; they are not ORed together. 104// These are mostly of interest if you are writing a new output format. 105const ( 106 _TABLE_ALIGNMENT_LEFT = 1 << iota 107 _TABLE_ALIGNMENT_RIGHT 108 _TABLE_ALIGNMENT_CENTER = (_TABLE_ALIGNMENT_LEFT | _TABLE_ALIGNMENT_RIGHT) 109) 110 111// The size of a tab stop. 112const _TAB_SIZE_DEFAULT = 4 113 114const ( 115 _DOC_FRONT_MATTER = iota + 1 // Different divisions of the document 116 _DOC_MAIN_MATTER 117 _DOC_BACK_MATTER 118 _ABSTRACT // Special headers, keep track if there are open 119 _NOTE // Special Note headers, keep track if there are open 120 _PREFACE 121 _COLOPHON 122) 123 124// blockTags is a set of tags that are recognized as HTML block tags. 125// Any of these can be included in markdown text without special escaping. 126var blockTags = map[string]struct{}{ 127 "blockquote": {}, 128 "del": {}, 129 "div": {}, 130 "dl": {}, 131 "fieldset": {}, 132 "form": {}, 133 "h1": {}, 134 "h2": {}, 135 "h3": {}, 136 "h4": {}, 137 "h5": {}, 138 "h6": {}, 139 "iframe": {}, 140 "ins": {}, 141 "math": {}, 142 "noscript": {}, 143 "ol": {}, 144 "pre": {}, 145 "p": {}, 146 "script": {}, 147 "style": {}, 148 "table": {}, 149 "ul": {}, 150 151 // HTML5 152 "article": {}, 153 "aside": {}, 154 "canvas": {}, 155 "figcaption": {}, 156 "figure": {}, 157 "footer": {}, 158 "header": {}, 159 "hgroup": {}, 160 "output": {}, 161 "progress": {}, 162 "section": {}, 163 "video": {}, 164} 165 166// Renderer is the rendering interface. 167// This is mostly of interest if you are implementing a new rendering format. 168// 169// When a byte slice is provided, it contains the (rendered) contents of the 170// element. 171// 172// When a callback is provided instead, it will write the contents of the 173// respective element directly to the output buffer and return true on success. 174// If the callback returns false, the rendering function should reset the 175// output buffer as though it had never been called. 176// 177// Currently Html, XML2RFC v3 and XML2RFC v2 implementations are provided. 178type Renderer interface { 179 // block-level callbacks 180 BlockCode(out *bytes.Buffer, text []byte, lang string, caption []byte, subfigure bool, callouts bool) 181 BlockQuote(out *bytes.Buffer, text []byte, attribution []byte) 182 BlockHtml(out *bytes.Buffer, text []byte) 183 CommentHtml(out *bytes.Buffer, text []byte) 184 // SpecialHeader is used for Abstract and Preface. The what string contains abstract or preface. 185 SpecialHeader(out *bytes.Buffer, what []byte, text func() bool, id string) 186 // Note is use for typesetting notes. 187 Note(out *bytes.Buffer, text func() bool, id string) 188 Part(out *bytes.Buffer, text func() bool, id string) 189 Header(out *bytes.Buffer, text func() bool, level int, id string) 190 HRule(out *bytes.Buffer) 191 List(out *bytes.Buffer, text func() bool, flags, start int, group []byte) 192 ListItem(out *bytes.Buffer, text []byte, flags int) 193 Paragraph(out *bytes.Buffer, text func() bool, flags int) 194 195 Table(out *bytes.Buffer, header []byte, body []byte, footer []byte, columnData []int, caption []byte) 196 TableRow(out *bytes.Buffer, text []byte) 197 TableHeaderCell(out *bytes.Buffer, text []byte, flags, colspan int) 198 TableCell(out *bytes.Buffer, text []byte, flags, colspan int) 199 200 Footnotes(out *bytes.Buffer, text func() bool) 201 FootnoteItem(out *bytes.Buffer, name, text []byte, flags int) 202 TitleBlockTOML(out *bytes.Buffer, data *title) 203 Aside(out *bytes.Buffer, text []byte) 204 Figure(out *bytes.Buffer, text []byte, caption []byte) 205 206 // Span-level callbacks 207 AutoLink(out *bytes.Buffer, link []byte, kind int) 208 CodeSpan(out *bytes.Buffer, text []byte) 209 // CalloutText is called when a callout is seen in the text. Id is the text 210 // seen between < and > and ids references the callout counter(s) in the code. 211 CalloutText(out *bytes.Buffer, id string, ids []string) 212 // Called when a callout is seen in a code block. Index is the callout counter, id 213 // is the number seen between < and >. 214 CalloutCode(out *bytes.Buffer, index, id string) 215 DoubleEmphasis(out *bytes.Buffer, text []byte) 216 Emphasis(out *bytes.Buffer, text []byte) 217 Subscript(out *bytes.Buffer, text []byte) 218 Superscript(out *bytes.Buffer, text []byte) 219 Image(out *bytes.Buffer, link []byte, title []byte, alt []byte, subfigure bool) 220 LineBreak(out *bytes.Buffer) 221 Link(out *bytes.Buffer, link []byte, title []byte, content []byte) 222 RawHtmlTag(out *bytes.Buffer, tag []byte) 223 TripleEmphasis(out *bytes.Buffer, text []byte) 224 StrikeThrough(out *bytes.Buffer, text []byte) 225 FootnoteRef(out *bytes.Buffer, ref []byte, id int) 226 Index(out *bytes.Buffer, primary, secondary []byte, prim bool) 227 Citation(out *bytes.Buffer, link, title []byte) 228 Abbreviation(out *bytes.Buffer, abbr, title []byte) 229 Example(out *bytes.Buffer, index int) 230 Math(out *bytes.Buffer, text []byte, display bool) 231 232 // Low-level callbacks 233 Entity(out *bytes.Buffer, entity []byte) 234 NormalText(out *bytes.Buffer, text []byte) 235 236 // Header and footer 237 DocumentHeader(out *bytes.Buffer, start bool) 238 DocumentFooter(out *bytes.Buffer, start bool) 239 240 // Frontmatter, mainmatter or backmatter 241 DocumentMatter(out *bytes.Buffer, matter int) 242 References(out *bytes.Buffer, citations map[string]*citation) 243 244 // Helper functions 245 Flags() int 246 247 // Attr returns the inline attribute. 248 Attr() *inlineAttr 249 // SetAttr set the inline attribute. 250 SetAttr(*inlineAttr) 251 // AttrString return the string representation of this inline attribute. 252 AttrString(*inlineAttr) string 253} 254 255// Callback functions for inline parsing. One such function is defined 256// for each character that triggers a response when parsing inline data. 257type inlineParser func(p *parser, out *bytes.Buffer, data []byte, offset int) int 258 259// Parser holds runtime state used by the parser. 260// This is constructed by the Markdown function. 261type parser struct { 262 r Renderer 263 refs map[string]*reference 264 citations map[string]*citation 265 abbreviations map[string]*abbreviation 266 examples map[string]int 267 callouts map[string][]string 268 codeBlock int // count codeblock for callout ID generation 269 inlineCallback [256]inlineParser 270 flags int 271 nesting int 272 maxNesting int 273 insideLink bool 274 insideDefinitionList bool // when in def. list ... TODO(miek):doc 275 insideList int // list in list counter 276 insideFigure bool // when inside a F> paragraph 277 displayMath bool 278 279 // Footnotes need to be ordered as well as available to quickly check for 280 // presence. If a ref is also a footnote, it's stored both in refs and here 281 // in notes. Slice is nil if footnotes not enabled. 282 notes []*reference 283 284 appendix bool // have we seen a {backmatter}? 285 titleblock bool // have we seen a titleblock 286 headerLen int // if a header is written what is length 287 288 partCount int // TODO, keep track of part counts (-#) 289 chapterCount int // TODO, keep track of chapter count (#) 290 291 // Placeholder IAL that can be added to blocklevel elements. 292 ial *inlineAttr 293 294 // Prevent identical header anchors by appending -<sequence_number> starting 295 // with -1, this is the same thing that pandoc does. 296 anchors map[string]int 297} 298 299// Markdown is an io.Writer. Writing a buffer with markdown text will be converted to 300// the output format the renderer outputs. Note that the conversion only takes place 301// when String() or Bytes() is called. 302type Markdown struct { 303 renderer Renderer 304 extensions int 305 in *bytes.Buffer 306 out *bytes.Buffer 307 308 renderedSinceLastWrite bool 309} 310 311func NewMarkdown(renderer Renderer, extensions int) *Markdown { 312 return &Markdown{renderer, extensions, &bytes.Buffer{}, &bytes.Buffer{}, false} 313} 314 315func (m *Markdown) Write(p []byte) (n int, err error) { 316 m.renderedSinceLastWrite = false 317 return m.in.Write(p) 318} 319 320func (m *Markdown) String() string { m.render(); return m.out.String() } 321func (m *Markdown) Bytes() []byte { m.render(); return m.out.Bytes() } 322 323func (m *Markdown) render() { 324 if m.renderer == nil { 325 // default to Html renderer 326 } 327 if m.renderedSinceLastWrite { 328 return 329 } 330 m.out = Parse(m.in.Bytes(), m.renderer, m.extensions) 331 m.renderedSinceLastWrite = true 332} 333 334// Parse is the main rendering function. 335// It parses and renders a block of markdown-encoded text. 336// The supplied Renderer is used to format the output, and extensions dictates 337// which non-standard extensions are enabled. 338// 339// To use the supplied Html or XML renderers, see HtmlRenderer, XmlRenderer and 340// Xml2Renderer, respectively. 341func Parse(input []byte, renderer Renderer, extensions int) *bytes.Buffer { 342 // no point in parsing if we can't render 343 if renderer == nil { 344 return nil 345 } 346 347 // fill in the render structure 348 p := new(parser) 349 p.r = renderer 350 p.flags = extensions 351 p.refs = make(map[string]*reference) 352 p.abbreviations = make(map[string]*abbreviation) 353 p.anchors = make(map[string]int) 354 p.examples = make(map[string]int) 355 // newly created in 'callouts' 356 p.maxNesting = 16 357 p.insideLink = false 358 359 // register inline parsers 360 p.inlineCallback['*'] = emphasis 361 p.inlineCallback['_'] = emphasis 362 p.inlineCallback['~'] = emphasis 363 p.inlineCallback['`'] = codeSpan 364 p.inlineCallback['\n'] = lineBreak 365 p.inlineCallback['['] = link 366 p.inlineCallback['<'] = leftAngle 367 p.inlineCallback['\\'] = escape 368 p.inlineCallback['&'] = entity 369 p.inlineCallback['{'] = leftBrace 370 p.inlineCallback['^'] = superscript // subscript is handled in emphasis 371 p.inlineCallback['('] = index // also find example list references and cross references 372 p.inlineCallback['$'] = math 373 374 if extensions&EXTENSION_AUTOLINK != 0 { 375 p.inlineCallback[':'] = autoLink 376 } 377 378 if extensions&EXTENSION_FOOTNOTES != 0 { 379 p.notes = make([]*reference, 0) 380 } 381 382 if extensions&EXTENSION_CITATION != 0 { 383 p.inlineCallback['@'] = citationReference // @ref, short form of citations 384 p.citations = make(map[string]*citation) 385 } 386 387 first := firstPass(p, input, 0) 388 second := secondPass(p, first.Bytes(), 0) 389 return second 390} 391 392// first pass: 393// - extract references 394// - extract abbreviations 395// - expand tabs 396// - normalize newlines 397// - copy everything else 398// - include includes 399func firstPass(p *parser, input []byte, depth int) *bytes.Buffer { 400 var out bytes.Buffer 401 if depth > 8 { 402 printf(p, "nested includes depth > 8") 403 out.WriteByte('\n') 404 return &out 405 } 406 407 tabSize := _TAB_SIZE_DEFAULT 408 beg, end := 0, 0 409 lastFencedCodeBlockEnd := 0 410 for beg < len(input) { // iterate over lines 411 if beg >= lastFencedCodeBlockEnd { // don't parse inside fenced code blocks 412 if end = isReference(p, input[beg:], tabSize); end > 0 { 413 beg += end 414 continue 415 } 416 } 417 // skip to the next line 418 end = beg 419 for end < len(input) && input[end] != '\n' && input[end] != '\r' { 420 end++ 421 } 422 423 if p.flags&EXTENSION_FENCED_CODE != 0 { 424 // track fenced code block boundaries to suppress tab expansion 425 // inside them: 426 if beg >= lastFencedCodeBlockEnd { 427 if i := p.fencedCode(&out, append(input[beg:], '\n'), false); i > 0 { 428 lastFencedCodeBlockEnd = beg + i 429 } 430 } 431 } 432 433 // add the line body if present 434 if end > beg { 435 if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks. 436 out.Write(input[beg:end]) 437 } else { 438 if p.flags&EXTENSION_INCLUDE != 0 && beg+1 < len(input) && input[beg] == '{' && input[beg+1] == '{' { 439 if beg == 0 || (beg > 0 && input[beg-1] == '\n') { 440 if j := p.include(&out, input[beg:end], depth); j > 0 { 441 beg += j 442 } 443 } 444 } 445 expandTabs(&out, input[beg:end], tabSize) 446 } 447 } 448 out.WriteByte('\n') 449 450 if end < len(input) && input[end] == '\r' { 451 end++ 452 } 453 if end < len(input) && input[end] == '\n' { 454 end++ 455 } 456 beg = end 457 } 458 459 // empty input? 460 if out.Len() == 0 { 461 out.WriteByte('\n') 462 } 463 464 return &out 465} 466 467// second pass: actual rendering 468func secondPass(p *parser, input []byte, depth int) *bytes.Buffer { 469 var output bytes.Buffer 470 471 p.r.DocumentHeader(&output, depth == 0) 472 p.headerLen = output.Len() 473 p.block(&output, input) 474 475 if p.flags&EXTENSION_FOOTNOTES != 0 && len(p.notes) > 0 { 476 p.r.Footnotes(&output, func() bool { 477 flags := _LIST_ITEM_BEGINNING_OF_LIST 478 for i := 0; i < len(p.notes); i += 1 { 479 var buf bytes.Buffer 480 ref := p.notes[i] 481 if ref.hasBlock { 482 flags |= _LIST_ITEM_CONTAINS_BLOCK 483 p.block(&buf, ref.title) 484 } else { 485 p.inline(&buf, ref.title) 486 } 487 p.r.FootnoteItem(&output, ref.link, buf.Bytes(), flags) 488 flags &^= _LIST_ITEM_BEGINNING_OF_LIST | _LIST_ITEM_CONTAINS_BLOCK 489 } 490 491 return true 492 }) 493 } 494 if !p.appendix { 495 if len(p.citations) > 0 { 496 // appendix not started in doc, start it now and output references 497 p.r.DocumentMatter(&output, _DOC_BACK_MATTER) 498 p.r.References(&output, p.citations) 499 } 500 p.appendix = true 501 } 502 p.r.DocumentFooter(&output, depth == 0) 503 504 if p.nesting != 0 { 505 panic("Nesting level did not end at zero") 506 } 507 508 return &output 509} 510 511// 512// Link references 513// 514// This section implements support for references that (usually) appear 515// as footnotes in a document, and can be referenced anywhere in the document. 516// The basic format is: 517// 518// [1]: http://www.google.com/ "Google" 519// [2]: http://www.github.com/ "Github" 520// 521// Anywhere in the document, the reference can be linked by referring to its 522// label, i.e., 1 and 2 in this example, as in: 523// 524// This library is hosted on [Github][2], a git hosting site. 525// 526// Actual footnotes as specified in Pandoc and supported by some other Markdown 527// libraries such as php-markdown are also taken care of. They look like this: 528// 529// This sentence needs a bit of further explanation.[^note] 530// 531// [^note]: This is the explanation. 532// 533// Footnotes should be placed at the end of the document in an ordered list. 534// Inline footnotes such as: 535// 536// Inline footnotes^[Not supported.] also exist. 537// 538// are not yet supported. 539 540// References are parsed and stored in this struct. 541type reference struct { 542 link []byte 543 title []byte 544 noteId int // 0 if not a footnote ref 545 hasBlock bool 546} 547 548// abbreviations are parsed and stored in this struct. 549type abbreviation struct { 550 title []byte 551} 552 553// citations are parsed and stored in this struct. 554type citation struct { 555 link []byte 556 title []byte 557 xml []byte // raw include of reference XML 558 typ byte // 'i' for informal, 'n' normative (default = 'i') 559 seq int // sequence number for I-Ds 560} 561 562// Check whether or not data starts with a reference link. 563// If so, it is parsed and stored in the list of references 564// (in the render struct). 565// Returns the number of bytes to skip to move past it, 566// or zero if the first line is not a reference. 567func isReference(p *parser, data []byte, tabSize int) int { 568 // up to 3 optional leading spaces 569 if len(data) < 4 { 570 return 0 571 } 572 i := 0 573 for i < 3 && data[i] == ' ' { 574 i++ 575 } 576 577 noteId := 0 578 abbrId := "" 579 580 // id part: anything but a newline between brackets 581 // abbreviations start with *[ 582 if data[i] != '[' && data[i] != '*' { 583 return 0 584 } 585 if data[i] == '*' && (i < len(data)-1 && data[i+1] != '[') { 586 return 0 587 } 588 if data[i] == '*' && p.flags&EXTENSION_ABBREVIATIONS != 0 { 589 abbrId = "yes" // any non empty 590 } 591 i++ 592 if p.flags&EXTENSION_FOOTNOTES != 0 { 593 if data[i] == '^' { 594 // we can set it to anything here because the proper noteIds will 595 // be assigned later during the second pass. It just has to be != 0 596 noteId = 1 597 i++ 598 } 599 } 600 idOffset := i 601 for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' { 602 if data[i] == '\\' { 603 i++ 604 } 605 i++ 606 } 607 if i >= len(data) || data[i] != ']' { 608 return 0 609 } 610 idEnd := i 611 if abbrId != "" { 612 abbrId = string(data[idOffset+1 : idEnd]) 613 } 614 615 // spacer: colon (space | tab)* newline? (space | tab)* 616 i++ 617 if i >= len(data) || data[i] != ':' { 618 return 0 619 } 620 i++ 621 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { 622 i++ 623 } 624 if i < len(data) && (data[i] == '\n' || data[i] == '\r') { 625 i++ 626 if i < len(data) && data[i] == '\n' && data[i-1] == '\r' { 627 i++ 628 } 629 } 630 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { 631 i++ 632 } 633 if i >= len(data) { 634 return 0 635 } 636 637 var ( 638 linkOffset, linkEnd int 639 titleOffset, titleEnd int 640 lineEnd int 641 raw []byte 642 hasBlock bool 643 ) 644 645 if p.flags&EXTENSION_FOOTNOTES != 0 && noteId != 0 { 646 linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize) 647 lineEnd = linkEnd 648 } else if abbrId != "" { 649 titleOffset, titleEnd, lineEnd = scanAbbreviation(p, data, idEnd) 650 p.abbreviations[abbrId] = &abbreviation{title: data[titleOffset:titleEnd]} 651 return lineEnd 652 } else { 653 linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i) 654 } 655 if lineEnd == 0 { 656 return 0 657 } 658 659 // a valid ref has been found 660 ref := &reference{ 661 noteId: noteId, 662 hasBlock: hasBlock, 663 } 664 665 if noteId > 0 { 666 // reusing the link field for the id since footnotes don't have links 667 ref.link = data[idOffset:idEnd] 668 // if footnote, it's not really a title, it's the contained text 669 ref.title = raw 670 } else { 671 ref.link = data[linkOffset:linkEnd] 672 ref.title = data[titleOffset:titleEnd] 673 } 674 675 // id matches are case-insensitive 676 id := string(bytes.ToLower(data[idOffset:idEnd])) 677 678 // CommonMark don't overwrite newly found references 679 if _, ok := p.refs[id]; !ok { 680 p.refs[id] = ref 681 } 682 683 return lineEnd 684} 685 686func scanLinkRef(p *parser, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) { 687 // link: whitespace-free sequence, optionally between angle brackets 688 if data[i] == '<' { 689 i++ 690 } 691 linkOffset = i 692 if i == len(data) { 693 return 694 } 695 for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' { 696 i++ 697 } 698 linkEnd = i 699 if data[linkOffset] == '<' && data[linkEnd-1] == '>' { 700 linkOffset++ 701 linkEnd-- 702 } 703 704 // optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) 705 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { 706 i++ 707 } 708 if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' { 709 return 710 } 711 712 // compute end-of-line 713 if i >= len(data) || data[i] == '\r' || data[i] == '\n' { 714 lineEnd = i 715 } 716 if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' { 717 lineEnd++ 718 } 719 720 // optional (space|tab)* spacer after a newline 721 if lineEnd > 0 { 722 i = lineEnd + 1 723 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { 724 i++ 725 } 726 } 727 728 // optional title: any non-newline sequence enclosed in '"() alone on its line 729 if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') { 730 i++ 731 titleOffset = i 732 733 // look for EOL 734 for i < len(data) && data[i] != '\n' && data[i] != '\r' { 735 i++ 736 } 737 if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' { 738 titleEnd = i + 1 739 } else { 740 titleEnd = i 741 } 742 743 // step back 744 i-- 745 for i > titleOffset && (data[i] == ' ' || data[i] == '\t') { 746 i-- 747 } 748 if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') { 749 lineEnd = titleEnd 750 titleEnd = i 751 } 752 } 753 754 return 755} 756 757// The first bit of this logic is the same as (*parser).listItem, but the rest 758// is much simpler. This function simply finds the entire block and shifts it 759// over by one tab if it is indeed a block (just returns the line if it's not). 760// blockEnd is the end of the section in the input buffer, and contents is the 761// extracted text that was shifted over one tab. It will need to be rendered at 762// the end of the document. 763func scanFootnote(p *parser, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) { 764 if i == 0 || len(data) == 0 { 765 return 766 } 767 768 // skip leading whitespace on first line 769 for i < len(data) && data[i] == ' ' { 770 i++ 771 } 772 773 blockStart = i 774 775 // find the end of the line 776 blockEnd = i 777 for i < len(data) && data[i-1] != '\n' { 778 i++ 779 } 780 781 // get working buffer 782 var raw bytes.Buffer 783 784 // put the first line into the working buffer 785 raw.Write(data[blockEnd:i]) 786 blockEnd = i 787 788 // process the following lines 789 containsBlankLine := false 790 791gatherLines: 792 for blockEnd < len(data) { 793 i++ 794 795 // find the end of this line 796 for i < len(data) && data[i-1] != '\n' { 797 i++ 798 } 799 800 // if it is an empty line, guess that it is part of this item 801 // and move on to the next line 802 if p.isEmpty(data[blockEnd:i]) > 0 { 803 containsBlankLine = true 804 blockEnd = i 805 continue 806 } 807 808 n := 0 809 if n = isIndented(data[blockEnd:i], indentSize); n == 0 { 810 // this is the end of the block. 811 // we don't want to include this last line in the index. 812 break gatherLines 813 } 814 815 // if there were blank lines before this one, insert a new one now 816 if containsBlankLine { 817 raw.WriteByte('\n') 818 containsBlankLine = false 819 } 820 821 // get rid of that first tab, write to buffer 822 raw.Write(data[blockEnd+n : i]) 823 hasBlock = true 824 825 blockEnd = i 826 } 827 828 if data[blockEnd-1] != '\n' { 829 raw.WriteByte('\n') 830 } 831 832 contents = raw.Bytes() 833 834 return 835} 836 837func scanAbbreviation(p *parser, data []byte, i int) (titleOffset, titleEnd, lineEnd int) { 838 lineEnd = i 839 for lineEnd < len(data) && data[lineEnd] != '\n' { 840 lineEnd++ 841 } 842 843 if len(data[i+2:lineEnd]) == 0 || p.isEmpty(data[i+2:lineEnd]) > 0 { 844 return i + 2, i + 2, lineEnd 845 } 846 847 titleOffset = i + 2 848 for data[titleOffset] == ' ' { 849 titleOffset++ 850 } 851 titleEnd = lineEnd 852 for data[titleEnd-1] == ' ' { 853 titleEnd-- 854 } 855 856 return 857} 858 859// Miscellaneous helper functions 860// Test if a character is a whitespace character. 861func isspace(c byte) bool { 862 return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v' 863} 864func ispunct(c byte) bool { 865 for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") { 866 if c == r { 867 return true 868 } 869 } 870 return false 871} 872 873func isupper(c byte) bool { return (c >= 'A' && c <= 'Z') } 874func isletter(c byte) bool { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') } 875 876// Test if a character is a letter or a digit. 877// TODO: check when this is looking for ASCII alnum and when it should use unicode 878func isalnum(c byte) bool { return (c >= '0' && c <= '9') || isletter(c) } 879func isnum(c byte) bool { return (c >= '0' && c <= '9') } 880 881// check if the string only contains, i, v, x, c and l. If uppercase is true, check uppercase version. 882func isroman(digit byte, uppercase bool) bool { 883 if !uppercase { 884 if digit == 'i' || digit == 'v' || digit == 'x' || digit == 'c' || digit == 'l' { 885 return true 886 } 887 return false 888 } 889 if digit == 'I' || digit == 'V' || digit == 'X' || digit == 'C' || digit == 'L' { 890 return true 891 } 892 return false 893} 894 895// replace {{file.md}} with the contents of the file. 896func (p *parser) include(out *bytes.Buffer, data []byte, depth int) int { 897 i := 0 898 if len(data) < 3 { 899 return 0 900 } 901 if data[i] != '{' && data[i+1] != '{' { 902 return 0 903 } 904 905 // find the end delimiter 906 end, j := 0, 0 907 for end = i; end < len(data) && j < 2; end++ { 908 if data[end] == '}' { 909 j++ 910 } else { 911 j = 0 912 } 913 } 914 if j < 2 && end >= len(data) { 915 return 0 916 } 917 filename := data[i+2 : end-2] 918 919 // Now a possible address in blockquotes 920 var address []byte 921 if end < len(data) && data[end] == '[' { 922 j = end 923 for j < len(data) && data[j] != ']' { 924 j++ 925 } 926 if j == len(data) { 927 // assuming no address 928 address = nil 929 } else { 930 address = data[end+1 : j] 931 end = j + 1 932 } 933 } 934 935 input := parseAddress(address, filename) 936 if input == nil { 937 return end 938 } 939 if input[len(input)-1] != '\n' { 940 input = append(input, '\n') 941 } 942 first := firstPass(p, input, depth+1) 943 out.Write(first.Bytes()) 944 return end 945} 946 947// replace <{{file.go}}[address] with the contents of the file. Pay attention to the indentation of the 948// include and prefix the code with that number of spaces + 4, it returns the new bytes and a boolean 949// indicating we've detected a code include. 950func (p *parser) codeInclude(out *bytes.Buffer, data []byte) int { 951 // TODO: this is not an inline element 952 i := 0 953 l := len(data) 954 if l < 3 { 955 return 0 956 } 957 if data[i] != '<' && data[i+1] != '{' && data[i+2] != '{' { 958 return 0 959 } 960 961 // find the end delimiter 962 end, j := 0, 0 963 for end = i; end < l && j < 2; end++ { 964 if data[end] == '}' { 965 j++ 966 } else { 967 j = 0 968 } 969 } 970 if j < 2 && end >= l { 971 return 0 972 } 973 974 lang := "" 975 // found <{{filename}} 976 // this could be the end, or we could have an option [address] -block attached 977 filename := data[i+3 : end-2] 978 // get the extension of the filename, if it is a member of a predefined set a 979 // language we use it as the lang (and we will emit <sourcecode>) 980 if x := path.Ext(string(filename)); x != "" { 981 // x includes the dot 982 if _, ok := SourceCodeTypes[x[1:]]; ok { 983 lang = x[1:] 984 } 985 } 986 987 // Now a possible address in blockquotes 988 var address []byte 989 if end < l && data[end] == '[' { 990 j = end 991 for j < l && data[j] != ']' { 992 j++ 993 } 994 if j == l { 995 // assuming no address 996 address = nil 997 end = l 998 } else { 999 address = data[end+1 : j] 1000 end = j + 1 1001 } 1002 } 1003 1004 code := parseAddress(address, filename) 1005 1006 if len(code) == 0 { 1007 code = []byte{'\n'} 1008 } 1009 if code[len(code)-1] != '\n' { 1010 code = append(code, '\n') 1011 } 1012 1013 // if the next line starts with Figure: we consider that a caption 1014 var caption bytes.Buffer 1015 if end < l-1 && bytes.HasPrefix(data[end+1:], []byte("Figure: ")) { 1016 line := end + 1 1017 j := end + 1 1018 for line < l { 1019 j++ 1020 // find the end of this line 1021 for j <= l && data[j-1] != '\n' { 1022 j++ 1023 } 1024 if p.isEmpty(data[line:j]) > 0 { 1025 break 1026 } 1027 line = j 1028 } 1029 p.inline(&caption, data[end+1+8:j-1]) // +8 for 'Figure: ' 1030 end = j - 1 1031 } 1032 1033 co := "" 1034 if p.ial != nil { 1035 co = p.ial.Value("callout") 1036 p.ial.DropAttr("callout") 1037 } 1038 1039 p.r.SetAttr(p.ial) 1040 p.ial = nil 1041 1042 if co != "" { 1043 var callout bytes.Buffer 1044 callouts(p, &callout, code, 0, co) 1045 p.r.BlockCode(out, callout.Bytes(), lang, caption.Bytes(), p.insideFigure, true) 1046 } else { 1047 p.callouts = nil 1048 p.r.BlockCode(out, code, lang, caption.Bytes(), p.insideFigure, false) 1049 } 1050 p.r.SetAttr(nil) // reset it again. TODO(miek): double check 1051 1052 return end 1053} 1054 1055// replace tab characters with spaces, aligning to the next tab_size column. 1056// always ends output with a newline 1057func expandTabs(out *bytes.Buffer, line []byte, tabSize int) { 1058 // first, check for common cases: no tabs, or only tabs at beginning of line 1059 i, prefix := 0, 0 1060 slowcase := false 1061 for i = 0; i < len(line); i++ { 1062 if line[i] == '\t' { 1063 if prefix == i { 1064 prefix++ 1065 } else { 1066 slowcase = true 1067 break 1068 } 1069 } 1070 } 1071 1072 // no need to decode runes if all tabs are at the beginning of the line 1073 if !slowcase { 1074 for i = 0; i < prefix*tabSize; i++ { 1075 out.WriteByte(' ') 1076 } 1077 out.Write(line[prefix:]) 1078 return 1079 } 1080 1081 // the slow case: we need to count runes to figure out how 1082 // many spaces to insert for each tab 1083 column := 0 1084 i = 0 1085 for i < len(line) { 1086 start := i 1087 for i < len(line) && line[i] != '\t' { 1088 _, size := utf8.DecodeRune(line[i:]) 1089 i += size 1090 column++ 1091 } 1092 1093 if i > start { 1094 out.Write(line[start:i]) 1095 } 1096 1097 if i >= len(line) { 1098 break 1099 } 1100 1101 for { 1102 out.WriteByte(' ') 1103 column++ 1104 if column%tabSize == 0 { 1105 break 1106 } 1107 } 1108 1109 i++ 1110 } 1111} 1112 1113// Find if a line counts as indented or not. 1114// Returns number of characters the indent is (0 = not indented). 1115func isIndented(data []byte, indentSize int) int { 1116 if len(data) == 0 { 1117 return 0 1118 } 1119 if data[0] == '\t' { 1120 return 1 1121 } 1122 if len(data) < indentSize { 1123 return 0 1124 } 1125 for i := 0; i < indentSize; i++ { 1126 if data[i] != ' ' { 1127 return 0 1128 } 1129 } 1130 return indentSize 1131} 1132 1133// Create a url-safe slug for fragments 1134func slugify(in []byte) []byte { 1135 if len(in) == 0 { 1136 return in 1137 } 1138 out := make([]byte, 0, len(in)) 1139 sym := false 1140 1141 for _, ch := range in { 1142 if isalnum(ch) { 1143 sym = false 1144 out = append(out, ch) 1145 } else if sym { 1146 continue 1147 } else { 1148 out = append(out, '-') 1149 sym = true 1150 } 1151 } 1152 var a, b int 1153 var ch byte 1154 for a, ch = range out { 1155 if ch != '-' { 1156 break 1157 } 1158 } 1159 for b = len(out) - 1; b > 0; b-- { 1160 if out[b] != '-' { 1161 break 1162 } 1163 } 1164 return out[a : b+1] 1165} 1166