1// 2// Blackfriday Markdown Processor 3// Available at http://github.com/russross/blackfriday 4// 5// Copyright © 2011 Russ Ross <russ@russross.com>. 6// Distributed under the Simplified BSD License. 7// See README.md for details. 8// 9 10// 11// 12// Markdown parsing and processing 13// 14// 15 16package blackfriday 17 18import ( 19 "bytes" 20 "fmt" 21 "strings" 22 "unicode/utf8" 23) 24 25const VERSION = "1.5" 26 27// These are the supported markdown parsing extensions. 28// OR these values together to select multiple extensions. 29const ( 30 EXTENSION_NO_INTRA_EMPHASIS = 1 << iota // ignore emphasis markers inside words 31 EXTENSION_TABLES // render tables 32 EXTENSION_FENCED_CODE // render fenced code blocks 33 EXTENSION_AUTOLINK // detect embedded URLs that are not explicitly marked 34 EXTENSION_STRIKETHROUGH // strikethrough text using ~~test~~ 35 EXTENSION_LAX_HTML_BLOCKS // loosen up HTML block parsing rules 36 EXTENSION_SPACE_HEADERS // be strict about prefix header rules 37 EXTENSION_HARD_LINE_BREAK // translate newlines into line breaks 38 EXTENSION_TAB_SIZE_EIGHT // expand tabs to eight spaces instead of four 39 EXTENSION_FOOTNOTES // Pandoc-style footnotes 40 EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block 41 EXTENSION_HEADER_IDS // specify header IDs with {#id} 42 EXTENSION_TITLEBLOCK // Titleblock ala pandoc 43 EXTENSION_AUTO_HEADER_IDS // Create the header ID from the text 44 EXTENSION_BACKSLASH_LINE_BREAK // translate trailing backslashes into line breaks 45 EXTENSION_DEFINITION_LISTS // render definition lists 46 EXTENSION_JOIN_LINES // delete newline and join lines 47 48 commonHtmlFlags = 0 | 49 HTML_USE_XHTML | 50 HTML_USE_SMARTYPANTS | 51 HTML_SMARTYPANTS_FRACTIONS | 52 HTML_SMARTYPANTS_DASHES | 53 HTML_SMARTYPANTS_LATEX_DASHES 54 55 commonExtensions = 0 | 56 EXTENSION_NO_INTRA_EMPHASIS | 57 EXTENSION_TABLES | 58 EXTENSION_FENCED_CODE | 59 EXTENSION_AUTOLINK | 60 EXTENSION_STRIKETHROUGH | 61 EXTENSION_SPACE_HEADERS | 62 EXTENSION_HEADER_IDS | 63 EXTENSION_BACKSLASH_LINE_BREAK | 64 EXTENSION_DEFINITION_LISTS 65) 66 67// These are the possible flag values for the link renderer. 68// Only a single one of these values will be used; they are not ORed together. 69// These are mostly of interest if you are writing a new output format. 70const ( 71 LINK_TYPE_NOT_AUTOLINK = iota 72 LINK_TYPE_NORMAL 73 LINK_TYPE_EMAIL 74) 75 76// These are the possible flag values for the ListItem renderer. 77// Multiple flag values may be ORed together. 78// These are mostly of interest if you are writing a new output format. 79const ( 80 LIST_TYPE_ORDERED = 1 << iota 81 LIST_TYPE_DEFINITION 82 LIST_TYPE_TERM 83 LIST_ITEM_CONTAINS_BLOCK 84 LIST_ITEM_BEGINNING_OF_LIST 85 LIST_ITEM_END_OF_LIST 86) 87 88// These are the possible flag values for the table cell renderer. 89// Only a single one of these values will be used; they are not ORed together. 90// These are mostly of interest if you are writing a new output format. 91const ( 92 TABLE_ALIGNMENT_LEFT = 1 << iota 93 TABLE_ALIGNMENT_RIGHT 94 TABLE_ALIGNMENT_CENTER = (TABLE_ALIGNMENT_LEFT | TABLE_ALIGNMENT_RIGHT) 95) 96 97// The size of a tab stop. 98const ( 99 TAB_SIZE_DEFAULT = 4 100 TAB_SIZE_EIGHT = 8 101) 102 103// blockTags is a set of tags that are recognized as HTML block tags. 104// Any of these can be included in markdown text without special escaping. 105var blockTags = map[string]struct{}{ 106 "blockquote": {}, 107 "del": {}, 108 "div": {}, 109 "dl": {}, 110 "fieldset": {}, 111 "form": {}, 112 "h1": {}, 113 "h2": {}, 114 "h3": {}, 115 "h4": {}, 116 "h5": {}, 117 "h6": {}, 118 "iframe": {}, 119 "ins": {}, 120 "math": {}, 121 "noscript": {}, 122 "ol": {}, 123 "pre": {}, 124 "p": {}, 125 "script": {}, 126 "style": {}, 127 "table": {}, 128 "ul": {}, 129 130 // HTML5 131 "address": {}, 132 "article": {}, 133 "aside": {}, 134 "canvas": {}, 135 "figcaption": {}, 136 "figure": {}, 137 "footer": {}, 138 "header": {}, 139 "hgroup": {}, 140 "main": {}, 141 "nav": {}, 142 "output": {}, 143 "progress": {}, 144 "section": {}, 145 "video": {}, 146} 147 148// Renderer is the rendering interface. 149// This is mostly of interest if you are implementing a new rendering format. 150// 151// When a byte slice is provided, it contains the (rendered) contents of the 152// element. 153// 154// When a callback is provided instead, it will write the contents of the 155// respective element directly to the output buffer and return true on success. 156// If the callback returns false, the rendering function should reset the 157// output buffer as though it had never been called. 158// 159// Currently Html and Latex implementations are provided 160type Renderer interface { 161 // block-level callbacks 162 BlockCode(out *bytes.Buffer, text []byte, infoString string) 163 BlockQuote(out *bytes.Buffer, text []byte) 164 BlockHtml(out *bytes.Buffer, text []byte) 165 Header(out *bytes.Buffer, text func() bool, level int, id string) 166 HRule(out *bytes.Buffer) 167 List(out *bytes.Buffer, text func() bool, flags int) 168 ListItem(out *bytes.Buffer, text []byte, flags int) 169 Paragraph(out *bytes.Buffer, text func() bool) 170 Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) 171 TableRow(out *bytes.Buffer, text []byte) 172 TableHeaderCell(out *bytes.Buffer, text []byte, flags int) 173 TableCell(out *bytes.Buffer, text []byte, flags int) 174 Footnotes(out *bytes.Buffer, text func() bool) 175 FootnoteItem(out *bytes.Buffer, name, text []byte, flags int) 176 TitleBlock(out *bytes.Buffer, text []byte) 177 178 // Span-level callbacks 179 AutoLink(out *bytes.Buffer, link []byte, kind int) 180 CodeSpan(out *bytes.Buffer, text []byte) 181 DoubleEmphasis(out *bytes.Buffer, text []byte) 182 Emphasis(out *bytes.Buffer, text []byte) 183 Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) 184 LineBreak(out *bytes.Buffer) 185 Link(out *bytes.Buffer, link []byte, title []byte, content []byte) 186 RawHtmlTag(out *bytes.Buffer, tag []byte) 187 TripleEmphasis(out *bytes.Buffer, text []byte) 188 StrikeThrough(out *bytes.Buffer, text []byte) 189 FootnoteRef(out *bytes.Buffer, ref []byte, id int) 190 191 // Low-level callbacks 192 Entity(out *bytes.Buffer, entity []byte) 193 NormalText(out *bytes.Buffer, text []byte) 194 195 // Header and footer 196 DocumentHeader(out *bytes.Buffer) 197 DocumentFooter(out *bytes.Buffer) 198 199 GetFlags() int 200} 201 202// Callback functions for inline parsing. One such function is defined 203// for each character that triggers a response when parsing inline data. 204type inlineParser func(p *parser, out *bytes.Buffer, data []byte, offset int) int 205 206// Parser holds runtime state used by the parser. 207// This is constructed by the Markdown function. 208type parser struct { 209 r Renderer 210 refOverride ReferenceOverrideFunc 211 refs map[string]*reference 212 inlineCallback [256]inlineParser 213 flags int 214 nesting int 215 maxNesting int 216 insideLink bool 217 218 // Footnotes need to be ordered as well as available to quickly check for 219 // presence. If a ref is also a footnote, it's stored both in refs and here 220 // in notes. Slice is nil if footnotes not enabled. 221 notes []*reference 222 notesRecord map[string]struct{} 223} 224 225func (p *parser) getRef(refid string) (ref *reference, found bool) { 226 if p.refOverride != nil { 227 r, overridden := p.refOverride(refid) 228 if overridden { 229 if r == nil { 230 return nil, false 231 } 232 return &reference{ 233 link: []byte(r.Link), 234 title: []byte(r.Title), 235 noteId: 0, 236 hasBlock: false, 237 text: []byte(r.Text)}, true 238 } 239 } 240 // refs are case insensitive 241 ref, found = p.refs[strings.ToLower(refid)] 242 return ref, found 243} 244 245func (p *parser) isFootnote(ref *reference) bool { 246 _, ok := p.notesRecord[string(ref.link)] 247 return ok 248} 249 250// 251// 252// Public interface 253// 254// 255 256// Reference represents the details of a link. 257// See the documentation in Options for more details on use-case. 258type Reference struct { 259 // Link is usually the URL the reference points to. 260 Link string 261 // Title is the alternate text describing the link in more detail. 262 Title string 263 // Text is the optional text to override the ref with if the syntax used was 264 // [refid][] 265 Text string 266} 267 268// ReferenceOverrideFunc is expected to be called with a reference string and 269// return either a valid Reference type that the reference string maps to or 270// nil. If overridden is false, the default reference logic will be executed. 271// See the documentation in Options for more details on use-case. 272type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool) 273 274// Options represents configurable overrides and callbacks (in addition to the 275// extension flag set) for configuring a Markdown parse. 276type Options struct { 277 // Extensions is a flag set of bit-wise ORed extension bits. See the 278 // EXTENSION_* flags defined in this package. 279 Extensions int 280 281 // ReferenceOverride is an optional function callback that is called every 282 // time a reference is resolved. 283 // 284 // In Markdown, the link reference syntax can be made to resolve a link to 285 // a reference instead of an inline URL, in one of the following ways: 286 // 287 // * [link text][refid] 288 // * [refid][] 289 // 290 // Usually, the refid is defined at the bottom of the Markdown document. If 291 // this override function is provided, the refid is passed to the override 292 // function first, before consulting the defined refids at the bottom. If 293 // the override function indicates an override did not occur, the refids at 294 // the bottom will be used to fill in the link details. 295 ReferenceOverride ReferenceOverrideFunc 296} 297 298// MarkdownBasic is a convenience function for simple rendering. 299// It processes markdown input with no extensions enabled. 300func MarkdownBasic(input []byte) []byte { 301 // set up the HTML renderer 302 htmlFlags := HTML_USE_XHTML 303 renderer := HtmlRenderer(htmlFlags, "", "") 304 305 // set up the parser 306 return MarkdownOptions(input, renderer, Options{Extensions: 0}) 307} 308 309// Call Markdown with most useful extensions enabled 310// MarkdownCommon is a convenience function for simple rendering. 311// It processes markdown input with common extensions enabled, including: 312// 313// * Smartypants processing with smart fractions and LaTeX dashes 314// 315// * Intra-word emphasis suppression 316// 317// * Tables 318// 319// * Fenced code blocks 320// 321// * Autolinking 322// 323// * Strikethrough support 324// 325// * Strict header parsing 326// 327// * Custom Header IDs 328func MarkdownCommon(input []byte) []byte { 329 // set up the HTML renderer 330 renderer := HtmlRenderer(commonHtmlFlags, "", "") 331 return MarkdownOptions(input, renderer, Options{ 332 Extensions: commonExtensions}) 333} 334 335// Markdown is the main rendering function. 336// It parses and renders a block of markdown-encoded text. 337// The supplied Renderer is used to format the output, and extensions dictates 338// which non-standard extensions are enabled. 339// 340// To use the supplied Html or LaTeX renderers, see HtmlRenderer and 341// LatexRenderer, respectively. 342func Markdown(input []byte, renderer Renderer, extensions int) []byte { 343 return MarkdownOptions(input, renderer, Options{ 344 Extensions: extensions}) 345} 346 347// MarkdownOptions is just like Markdown but takes additional options through 348// the Options struct. 349func MarkdownOptions(input []byte, renderer Renderer, opts Options) []byte { 350 // no point in parsing if we can't render 351 if renderer == nil { 352 return nil 353 } 354 355 extensions := opts.Extensions 356 357 // fill in the render structure 358 p := new(parser) 359 p.r = renderer 360 p.flags = extensions 361 p.refOverride = opts.ReferenceOverride 362 p.refs = make(map[string]*reference) 363 p.maxNesting = 16 364 p.insideLink = false 365 366 // register inline parsers 367 p.inlineCallback['*'] = emphasis 368 p.inlineCallback['_'] = emphasis 369 if extensions&EXTENSION_STRIKETHROUGH != 0 { 370 p.inlineCallback['~'] = emphasis 371 } 372 p.inlineCallback['`'] = codeSpan 373 p.inlineCallback['\n'] = lineBreak 374 p.inlineCallback['['] = link 375 p.inlineCallback['<'] = leftAngle 376 p.inlineCallback['\\'] = escape 377 p.inlineCallback['&'] = entity 378 379 if extensions&EXTENSION_AUTOLINK != 0 { 380 p.inlineCallback[':'] = autoLink 381 } 382 383 if extensions&EXTENSION_FOOTNOTES != 0 { 384 p.notes = make([]*reference, 0) 385 p.notesRecord = make(map[string]struct{}) 386 } 387 388 first := firstPass(p, input) 389 second := secondPass(p, first) 390 return second 391} 392 393// first pass: 394// - normalize newlines 395// - extract references (outside of fenced code blocks) 396// - expand tabs (outside of fenced code blocks) 397// - copy everything else 398func firstPass(p *parser, input []byte) []byte { 399 var out bytes.Buffer 400 tabSize := TAB_SIZE_DEFAULT 401 if p.flags&EXTENSION_TAB_SIZE_EIGHT != 0 { 402 tabSize = TAB_SIZE_EIGHT 403 } 404 beg := 0 405 lastFencedCodeBlockEnd := 0 406 for beg < len(input) { 407 // Find end of this line, then process the line. 408 end := beg 409 for end < len(input) && input[end] != '\n' && input[end] != '\r' { 410 end++ 411 } 412 413 if p.flags&EXTENSION_FENCED_CODE != 0 { 414 // track fenced code block boundaries to suppress tab expansion 415 // and reference extraction inside them: 416 if beg >= lastFencedCodeBlockEnd { 417 if i := p.fencedCodeBlock(&out, input[beg:], false); i > 0 { 418 lastFencedCodeBlockEnd = beg + i 419 } 420 } 421 } 422 423 // add the line body if present 424 if end > beg { 425 if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks. 426 out.Write(input[beg:end]) 427 } else if refEnd := isReference(p, input[beg:], tabSize); refEnd > 0 { 428 beg += refEnd 429 continue 430 } else { 431 expandTabs(&out, input[beg:end], tabSize) 432 } 433 } 434 435 if end < len(input) && input[end] == '\r' { 436 end++ 437 } 438 if end < len(input) && input[end] == '\n' { 439 end++ 440 } 441 out.WriteByte('\n') 442 443 beg = end 444 } 445 446 // empty input? 447 if out.Len() == 0 { 448 out.WriteByte('\n') 449 } 450 451 return out.Bytes() 452} 453 454// second pass: actual rendering 455func secondPass(p *parser, input []byte) []byte { 456 var output bytes.Buffer 457 458 p.r.DocumentHeader(&output) 459 p.block(&output, input) 460 461 if p.flags&EXTENSION_FOOTNOTES != 0 && len(p.notes) > 0 { 462 p.r.Footnotes(&output, func() bool { 463 flags := LIST_ITEM_BEGINNING_OF_LIST 464 for i := 0; i < len(p.notes); i += 1 { 465 ref := p.notes[i] 466 var buf bytes.Buffer 467 if ref.hasBlock { 468 flags |= LIST_ITEM_CONTAINS_BLOCK 469 p.block(&buf, ref.title) 470 } else { 471 p.inline(&buf, ref.title) 472 } 473 p.r.FootnoteItem(&output, ref.link, buf.Bytes(), flags) 474 flags &^= LIST_ITEM_BEGINNING_OF_LIST | LIST_ITEM_CONTAINS_BLOCK 475 } 476 477 return true 478 }) 479 } 480 481 p.r.DocumentFooter(&output) 482 483 if p.nesting != 0 { 484 panic("Nesting level did not end at zero") 485 } 486 487 return output.Bytes() 488} 489 490// 491// Link references 492// 493// This section implements support for references that (usually) appear 494// as footnotes in a document, and can be referenced anywhere in the document. 495// The basic format is: 496// 497// [1]: http://www.google.com/ "Google" 498// [2]: http://www.github.com/ "Github" 499// 500// Anywhere in the document, the reference can be linked by referring to its 501// label, i.e., 1 and 2 in this example, as in: 502// 503// This library is hosted on [Github][2], a git hosting site. 504// 505// Actual footnotes as specified in Pandoc and supported by some other Markdown 506// libraries such as php-markdown are also taken care of. They look like this: 507// 508// This sentence needs a bit of further explanation.[^note] 509// 510// [^note]: This is the explanation. 511// 512// Footnotes should be placed at the end of the document in an ordered list. 513// Inline footnotes such as: 514// 515// Inline footnotes^[Not supported.] also exist. 516// 517// are not yet supported. 518 519// References are parsed and stored in this struct. 520type reference struct { 521 link []byte 522 title []byte 523 noteId int // 0 if not a footnote ref 524 hasBlock bool 525 text []byte 526} 527 528func (r *reference) String() string { 529 return fmt.Sprintf("{link: %q, title: %q, text: %q, noteId: %d, hasBlock: %v}", 530 r.link, r.title, r.text, r.noteId, r.hasBlock) 531} 532 533// Check whether or not data starts with a reference link. 534// If so, it is parsed and stored in the list of references 535// (in the render struct). 536// Returns the number of bytes to skip to move past it, 537// or zero if the first line is not a reference. 538func isReference(p *parser, data []byte, tabSize int) int { 539 // up to 3 optional leading spaces 540 if len(data) < 4 { 541 return 0 542 } 543 i := 0 544 for i < 3 && data[i] == ' ' { 545 i++ 546 } 547 548 noteId := 0 549 550 // id part: anything but a newline between brackets 551 if data[i] != '[' { 552 return 0 553 } 554 i++ 555 if p.flags&EXTENSION_FOOTNOTES != 0 { 556 if i < len(data) && data[i] == '^' { 557 // we can set it to anything here because the proper noteIds will 558 // be assigned later during the second pass. It just has to be != 0 559 noteId = 1 560 i++ 561 } 562 } 563 idOffset := i 564 for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' { 565 i++ 566 } 567 if i >= len(data) || data[i] != ']' { 568 return 0 569 } 570 idEnd := i 571 572 // spacer: colon (space | tab)* newline? (space | tab)* 573 i++ 574 if i >= len(data) || data[i] != ':' { 575 return 0 576 } 577 i++ 578 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { 579 i++ 580 } 581 if i < len(data) && (data[i] == '\n' || data[i] == '\r') { 582 i++ 583 if i < len(data) && data[i] == '\n' && data[i-1] == '\r' { 584 i++ 585 } 586 } 587 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { 588 i++ 589 } 590 if i >= len(data) { 591 return 0 592 } 593 594 var ( 595 linkOffset, linkEnd int 596 titleOffset, titleEnd int 597 lineEnd int 598 raw []byte 599 hasBlock bool 600 ) 601 602 if p.flags&EXTENSION_FOOTNOTES != 0 && noteId != 0 { 603 linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize) 604 lineEnd = linkEnd 605 } else { 606 linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i) 607 } 608 if lineEnd == 0 { 609 return 0 610 } 611 612 // a valid ref has been found 613 614 ref := &reference{ 615 noteId: noteId, 616 hasBlock: hasBlock, 617 } 618 619 if noteId > 0 { 620 // reusing the link field for the id since footnotes don't have links 621 ref.link = data[idOffset:idEnd] 622 // if footnote, it's not really a title, it's the contained text 623 ref.title = raw 624 } else { 625 ref.link = data[linkOffset:linkEnd] 626 ref.title = data[titleOffset:titleEnd] 627 } 628 629 // id matches are case-insensitive 630 id := string(bytes.ToLower(data[idOffset:idEnd])) 631 632 p.refs[id] = ref 633 634 return lineEnd 635} 636 637func scanLinkRef(p *parser, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) { 638 // link: whitespace-free sequence, optionally between angle brackets 639 if data[i] == '<' { 640 i++ 641 } 642 linkOffset = i 643 if i == len(data) { 644 return 645 } 646 for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' { 647 i++ 648 } 649 linkEnd = i 650 if data[linkOffset] == '<' && data[linkEnd-1] == '>' { 651 linkOffset++ 652 linkEnd-- 653 } 654 655 // optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) 656 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { 657 i++ 658 } 659 if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' { 660 return 661 } 662 663 // compute end-of-line 664 if i >= len(data) || data[i] == '\r' || data[i] == '\n' { 665 lineEnd = i 666 } 667 if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' { 668 lineEnd++ 669 } 670 671 // optional (space|tab)* spacer after a newline 672 if lineEnd > 0 { 673 i = lineEnd + 1 674 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { 675 i++ 676 } 677 } 678 679 // optional title: any non-newline sequence enclosed in '"() alone on its line 680 if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') { 681 i++ 682 titleOffset = i 683 684 // look for EOL 685 for i < len(data) && data[i] != '\n' && data[i] != '\r' { 686 i++ 687 } 688 if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' { 689 titleEnd = i + 1 690 } else { 691 titleEnd = i 692 } 693 694 // step back 695 i-- 696 for i > titleOffset && (data[i] == ' ' || data[i] == '\t') { 697 i-- 698 } 699 if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') { 700 lineEnd = titleEnd 701 titleEnd = i 702 } 703 } 704 705 return 706} 707 708// The first bit of this logic is the same as (*parser).listItem, but the rest 709// is much simpler. This function simply finds the entire block and shifts it 710// over by one tab if it is indeed a block (just returns the line if it's not). 711// blockEnd is the end of the section in the input buffer, and contents is the 712// extracted text that was shifted over one tab. It will need to be rendered at 713// the end of the document. 714func scanFootnote(p *parser, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) { 715 if i == 0 || len(data) == 0 { 716 return 717 } 718 719 // skip leading whitespace on first line 720 for i < len(data) && data[i] == ' ' { 721 i++ 722 } 723 724 blockStart = i 725 726 // find the end of the line 727 blockEnd = i 728 for i < len(data) && data[i-1] != '\n' { 729 i++ 730 } 731 732 // get working buffer 733 var raw bytes.Buffer 734 735 // put the first line into the working buffer 736 raw.Write(data[blockEnd:i]) 737 blockEnd = i 738 739 // process the following lines 740 containsBlankLine := false 741 742gatherLines: 743 for blockEnd < len(data) { 744 i++ 745 746 // find the end of this line 747 for i < len(data) && data[i-1] != '\n' { 748 i++ 749 } 750 751 // if it is an empty line, guess that it is part of this item 752 // and move on to the next line 753 if p.isEmpty(data[blockEnd:i]) > 0 { 754 containsBlankLine = true 755 blockEnd = i 756 continue 757 } 758 759 n := 0 760 if n = isIndented(data[blockEnd:i], indentSize); n == 0 { 761 // this is the end of the block. 762 // we don't want to include this last line in the index. 763 break gatherLines 764 } 765 766 // if there were blank lines before this one, insert a new one now 767 if containsBlankLine { 768 raw.WriteByte('\n') 769 containsBlankLine = false 770 } 771 772 // get rid of that first tab, write to buffer 773 raw.Write(data[blockEnd+n : i]) 774 hasBlock = true 775 776 blockEnd = i 777 } 778 779 if data[blockEnd-1] != '\n' { 780 raw.WriteByte('\n') 781 } 782 783 contents = raw.Bytes() 784 785 return 786} 787 788// 789// 790// Miscellaneous helper functions 791// 792// 793 794// Test if a character is a punctuation symbol. 795// Taken from a private function in regexp in the stdlib. 796func ispunct(c byte) bool { 797 for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") { 798 if c == r { 799 return true 800 } 801 } 802 return false 803} 804 805// Test if a character is a whitespace character. 806func isspace(c byte) bool { 807 return ishorizontalspace(c) || isverticalspace(c) 808} 809 810// Test if a character is a horizontal whitespace character. 811func ishorizontalspace(c byte) bool { 812 return c == ' ' || c == '\t' 813} 814 815// Test if a character is a vertical whitespace character. 816func isverticalspace(c byte) bool { 817 return c == '\n' || c == '\r' || c == '\f' || c == '\v' 818} 819 820// Test if a character is letter. 821func isletter(c byte) bool { 822 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') 823} 824 825// Test if a character is a letter or a digit. 826// TODO: check when this is looking for ASCII alnum and when it should use unicode 827func isalnum(c byte) bool { 828 return (c >= '0' && c <= '9') || isletter(c) 829} 830 831// Replace tab characters with spaces, aligning to the next TAB_SIZE column. 832// always ends output with a newline 833func expandTabs(out *bytes.Buffer, line []byte, tabSize int) { 834 // first, check for common cases: no tabs, or only tabs at beginning of line 835 i, prefix := 0, 0 836 slowcase := false 837 for i = 0; i < len(line); i++ { 838 if line[i] == '\t' { 839 if prefix == i { 840 prefix++ 841 } else { 842 slowcase = true 843 break 844 } 845 } 846 } 847 848 // no need to decode runes if all tabs are at the beginning of the line 849 if !slowcase { 850 for i = 0; i < prefix*tabSize; i++ { 851 out.WriteByte(' ') 852 } 853 out.Write(line[prefix:]) 854 return 855 } 856 857 // the slow case: we need to count runes to figure out how 858 // many spaces to insert for each tab 859 column := 0 860 i = 0 861 for i < len(line) { 862 start := i 863 for i < len(line) && line[i] != '\t' { 864 _, size := utf8.DecodeRune(line[i:]) 865 i += size 866 column++ 867 } 868 869 if i > start { 870 out.Write(line[start:i]) 871 } 872 873 if i >= len(line) { 874 break 875 } 876 877 for { 878 out.WriteByte(' ') 879 column++ 880 if column%tabSize == 0 { 881 break 882 } 883 } 884 885 i++ 886 } 887} 888 889// Find if a line counts as indented or not. 890// Returns number of characters the indent is (0 = not indented). 891func isIndented(data []byte, indentSize int) int { 892 if len(data) == 0 { 893 return 0 894 } 895 if data[0] == '\t' { 896 return 1 897 } 898 if len(data) < indentSize { 899 return 0 900 } 901 for i := 0; i < indentSize; i++ { 902 if data[i] != ' ' { 903 return 0 904 } 905 } 906 return indentSize 907} 908 909// Create a url-safe slug for fragments 910func slugify(in []byte) []byte { 911 if len(in) == 0 { 912 return in 913 } 914 out := make([]byte, 0, len(in)) 915 sym := false 916 917 for _, ch := range in { 918 if isalnum(ch) { 919 sym = false 920 out = append(out, ch) 921 } else if sym { 922 continue 923 } else { 924 out = append(out, '-') 925 sym = true 926 } 927 } 928 var a, b int 929 var ch byte 930 for a, ch = range out { 931 if ch != '-' { 932 break 933 } 934 } 935 for b = len(out) - 1; b > 0; b-- { 936 if out[b] != '-' { 937 break 938 } 939 } 940 return out[a : b+1] 941} 942