1// 2// Blackfriday Markdown Processor 3// Available at http://github.com/russross/blackfriday 4// 5// Copyright © 2011 Russ Ross <russ@russross.com>. 6// Distributed under the Simplified BSD License. 7// See README.md for details. 8// 9 10// 11// 12// Markdown parsing and processing 13// 14// 15 16package blackfriday 17 18import ( 19 "bytes" 20 "fmt" 21 "strings" 22 "unicode/utf8" 23) 24 25const VERSION = "1.5" 26 27// These are the supported markdown parsing extensions. 28// OR these values together to select multiple extensions. 29const ( 30 EXTENSION_NO_INTRA_EMPHASIS = 1 << iota // ignore emphasis markers inside words 31 EXTENSION_TABLES // render tables 32 EXTENSION_FENCED_CODE // render fenced code blocks 33 EXTENSION_AUTOLINK // detect embedded URLs that are not explicitly marked 34 EXTENSION_STRIKETHROUGH // strikethrough text using ~~test~~ 35 EXTENSION_LAX_HTML_BLOCKS // loosen up HTML block parsing rules 36 EXTENSION_SPACE_HEADERS // be strict about prefix header rules 37 EXTENSION_HARD_LINE_BREAK // translate newlines into line breaks 38 EXTENSION_TAB_SIZE_EIGHT // expand tabs to eight spaces instead of four 39 EXTENSION_FOOTNOTES // Pandoc-style footnotes 40 EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block 41 EXTENSION_HEADER_IDS // specify header IDs with {#id} 42 EXTENSION_TITLEBLOCK // Titleblock ala pandoc 43 EXTENSION_AUTO_HEADER_IDS // Create the header ID from the text 44 EXTENSION_BACKSLASH_LINE_BREAK // translate trailing backslashes into line breaks 45 EXTENSION_DEFINITION_LISTS // render definition lists 46 EXTENSION_JOIN_LINES // delete newline and join lines 47 48 commonHtmlFlags = 0 | 49 HTML_USE_XHTML | 50 HTML_USE_SMARTYPANTS | 51 HTML_SMARTYPANTS_FRACTIONS | 52 HTML_SMARTYPANTS_DASHES | 53 HTML_SMARTYPANTS_LATEX_DASHES 54 55 commonExtensions = 0 | 56 EXTENSION_NO_INTRA_EMPHASIS | 57 EXTENSION_TABLES | 58 EXTENSION_FENCED_CODE | 59 EXTENSION_AUTOLINK | 60 EXTENSION_STRIKETHROUGH | 61 EXTENSION_SPACE_HEADERS | 62 EXTENSION_HEADER_IDS | 63 EXTENSION_BACKSLASH_LINE_BREAK | 64 EXTENSION_DEFINITION_LISTS 65) 66 67// These are the possible flag values for the link renderer. 68// Only a single one of these values will be used; they are not ORed together. 69// These are mostly of interest if you are writing a new output format. 70const ( 71 LINK_TYPE_NOT_AUTOLINK = iota 72 LINK_TYPE_NORMAL 73 LINK_TYPE_EMAIL 74) 75 76// These are the possible flag values for the ListItem renderer. 77// Multiple flag values may be ORed together. 78// These are mostly of interest if you are writing a new output format. 79const ( 80 LIST_TYPE_ORDERED = 1 << iota 81 LIST_TYPE_DEFINITION 82 LIST_TYPE_TERM 83 LIST_ITEM_CONTAINS_BLOCK 84 LIST_ITEM_BEGINNING_OF_LIST 85 LIST_ITEM_END_OF_LIST 86) 87 88// These are the possible flag values for the table cell renderer. 89// Only a single one of these values will be used; they are not ORed together. 90// These are mostly of interest if you are writing a new output format. 91const ( 92 TABLE_ALIGNMENT_LEFT = 1 << iota 93 TABLE_ALIGNMENT_RIGHT 94 TABLE_ALIGNMENT_CENTER = (TABLE_ALIGNMENT_LEFT | TABLE_ALIGNMENT_RIGHT) 95) 96 97// The size of a tab stop. 98const ( 99 TAB_SIZE_DEFAULT = 4 100 TAB_SIZE_EIGHT = 8 101) 102 103// blockTags is a set of tags that are recognized as HTML block tags. 104// Any of these can be included in markdown text without special escaping. 105var blockTags = map[string]struct{}{ 106 "blockquote": {}, 107 "del": {}, 108 "div": {}, 109 "dl": {}, 110 "fieldset": {}, 111 "form": {}, 112 "h1": {}, 113 "h2": {}, 114 "h3": {}, 115 "h4": {}, 116 "h5": {}, 117 "h6": {}, 118 "iframe": {}, 119 "ins": {}, 120 "math": {}, 121 "noscript": {}, 122 "ol": {}, 123 "pre": {}, 124 "p": {}, 125 "script": {}, 126 "style": {}, 127 "table": {}, 128 "ul": {}, 129 130 // HTML5 131 "address": {}, 132 "article": {}, 133 "aside": {}, 134 "canvas": {}, 135 "details": {}, 136 "figcaption": {}, 137 "figure": {}, 138 "footer": {}, 139 "header": {}, 140 "hgroup": {}, 141 "main": {}, 142 "nav": {}, 143 "output": {}, 144 "progress": {}, 145 "section": {}, 146 "summary": {}, 147 "video": {}, 148} 149 150// Renderer is the rendering interface. 151// This is mostly of interest if you are implementing a new rendering format. 152// 153// When a byte slice is provided, it contains the (rendered) contents of the 154// element. 155// 156// When a callback is provided instead, it will write the contents of the 157// respective element directly to the output buffer and return true on success. 158// If the callback returns false, the rendering function should reset the 159// output buffer as though it had never been called. 160// 161// Currently Html and Latex implementations are provided 162type Renderer interface { 163 // block-level callbacks 164 BlockCode(out *bytes.Buffer, text []byte, infoString string) 165 BlockQuote(out *bytes.Buffer, text []byte) 166 BlockHtml(out *bytes.Buffer, text []byte) 167 Header(out *bytes.Buffer, text func() bool, level int, id string) 168 HRule(out *bytes.Buffer) 169 List(out *bytes.Buffer, text func() bool, flags int) 170 ListItem(out *bytes.Buffer, text []byte, flags int) 171 Paragraph(out *bytes.Buffer, text func() bool) 172 Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) 173 TableRow(out *bytes.Buffer, text []byte) 174 TableHeaderCell(out *bytes.Buffer, text []byte, flags int) 175 TableCell(out *bytes.Buffer, text []byte, flags int) 176 Footnotes(out *bytes.Buffer, text func() bool) 177 FootnoteItem(out *bytes.Buffer, name, text []byte, flags int) 178 TitleBlock(out *bytes.Buffer, text []byte) 179 180 // Span-level callbacks 181 AutoLink(out *bytes.Buffer, link []byte, kind int) 182 CodeSpan(out *bytes.Buffer, text []byte) 183 DoubleEmphasis(out *bytes.Buffer, text []byte) 184 Emphasis(out *bytes.Buffer, text []byte) 185 Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) 186 LineBreak(out *bytes.Buffer) 187 Link(out *bytes.Buffer, link []byte, title []byte, content []byte) 188 RawHtmlTag(out *bytes.Buffer, tag []byte) 189 TripleEmphasis(out *bytes.Buffer, text []byte) 190 StrikeThrough(out *bytes.Buffer, text []byte) 191 FootnoteRef(out *bytes.Buffer, ref []byte, id int) 192 193 // Low-level callbacks 194 Entity(out *bytes.Buffer, entity []byte) 195 NormalText(out *bytes.Buffer, text []byte) 196 197 // Header and footer 198 DocumentHeader(out *bytes.Buffer) 199 DocumentFooter(out *bytes.Buffer) 200 201 GetFlags() int 202} 203 204// Callback functions for inline parsing. One such function is defined 205// for each character that triggers a response when parsing inline data. 206type inlineParser func(p *parser, out *bytes.Buffer, data []byte, offset int) int 207 208// Parser holds runtime state used by the parser. 209// This is constructed by the Markdown function. 210type parser struct { 211 r Renderer 212 refOverride ReferenceOverrideFunc 213 refs map[string]*reference 214 inlineCallback [256]inlineParser 215 flags int 216 nesting int 217 maxNesting int 218 insideLink bool 219 220 // Footnotes need to be ordered as well as available to quickly check for 221 // presence. If a ref is also a footnote, it's stored both in refs and here 222 // in notes. Slice is nil if footnotes not enabled. 223 notes []*reference 224 notesRecord map[string]struct{} 225} 226 227func (p *parser) getRef(refid string) (ref *reference, found bool) { 228 if p.refOverride != nil { 229 r, overridden := p.refOverride(refid) 230 if overridden { 231 if r == nil { 232 return nil, false 233 } 234 return &reference{ 235 link: []byte(r.Link), 236 title: []byte(r.Title), 237 noteId: 0, 238 hasBlock: false, 239 text: []byte(r.Text)}, true 240 } 241 } 242 // refs are case insensitive 243 ref, found = p.refs[strings.ToLower(refid)] 244 return ref, found 245} 246 247func (p *parser) isFootnote(ref *reference) bool { 248 _, ok := p.notesRecord[string(ref.link)] 249 return ok 250} 251 252// 253// 254// Public interface 255// 256// 257 258// Reference represents the details of a link. 259// See the documentation in Options for more details on use-case. 260type Reference struct { 261 // Link is usually the URL the reference points to. 262 Link string 263 // Title is the alternate text describing the link in more detail. 264 Title string 265 // Text is the optional text to override the ref with if the syntax used was 266 // [refid][] 267 Text string 268} 269 270// ReferenceOverrideFunc is expected to be called with a reference string and 271// return either a valid Reference type that the reference string maps to or 272// nil. If overridden is false, the default reference logic will be executed. 273// See the documentation in Options for more details on use-case. 274type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool) 275 276// Options represents configurable overrides and callbacks (in addition to the 277// extension flag set) for configuring a Markdown parse. 278type Options struct { 279 // Extensions is a flag set of bit-wise ORed extension bits. See the 280 // EXTENSION_* flags defined in this package. 281 Extensions int 282 283 // ReferenceOverride is an optional function callback that is called every 284 // time a reference is resolved. 285 // 286 // In Markdown, the link reference syntax can be made to resolve a link to 287 // a reference instead of an inline URL, in one of the following ways: 288 // 289 // * [link text][refid] 290 // * [refid][] 291 // 292 // Usually, the refid is defined at the bottom of the Markdown document. If 293 // this override function is provided, the refid is passed to the override 294 // function first, before consulting the defined refids at the bottom. If 295 // the override function indicates an override did not occur, the refids at 296 // the bottom will be used to fill in the link details. 297 ReferenceOverride ReferenceOverrideFunc 298} 299 300// MarkdownBasic is a convenience function for simple rendering. 301// It processes markdown input with no extensions enabled. 302func MarkdownBasic(input []byte) []byte { 303 // set up the HTML renderer 304 htmlFlags := HTML_USE_XHTML 305 renderer := HtmlRenderer(htmlFlags, "", "") 306 307 // set up the parser 308 return MarkdownOptions(input, renderer, Options{Extensions: 0}) 309} 310 311// Call Markdown with most useful extensions enabled 312// MarkdownCommon is a convenience function for simple rendering. 313// It processes markdown input with common extensions enabled, including: 314// 315// * Smartypants processing with smart fractions and LaTeX dashes 316// 317// * Intra-word emphasis suppression 318// 319// * Tables 320// 321// * Fenced code blocks 322// 323// * Autolinking 324// 325// * Strikethrough support 326// 327// * Strict header parsing 328// 329// * Custom Header IDs 330func MarkdownCommon(input []byte) []byte { 331 // set up the HTML renderer 332 renderer := HtmlRenderer(commonHtmlFlags, "", "") 333 return MarkdownOptions(input, renderer, Options{ 334 Extensions: commonExtensions}) 335} 336 337// Markdown is the main rendering function. 338// It parses and renders a block of markdown-encoded text. 339// The supplied Renderer is used to format the output, and extensions dictates 340// which non-standard extensions are enabled. 341// 342// To use the supplied Html or LaTeX renderers, see HtmlRenderer and 343// LatexRenderer, respectively. 344func Markdown(input []byte, renderer Renderer, extensions int) []byte { 345 return MarkdownOptions(input, renderer, Options{ 346 Extensions: extensions}) 347} 348 349// MarkdownOptions is just like Markdown but takes additional options through 350// the Options struct. 351func MarkdownOptions(input []byte, renderer Renderer, opts Options) []byte { 352 // no point in parsing if we can't render 353 if renderer == nil { 354 return nil 355 } 356 357 extensions := opts.Extensions 358 359 // fill in the render structure 360 p := new(parser) 361 p.r = renderer 362 p.flags = extensions 363 p.refOverride = opts.ReferenceOverride 364 p.refs = make(map[string]*reference) 365 p.maxNesting = 16 366 p.insideLink = false 367 368 // register inline parsers 369 p.inlineCallback['*'] = emphasis 370 p.inlineCallback['_'] = emphasis 371 if extensions&EXTENSION_STRIKETHROUGH != 0 { 372 p.inlineCallback['~'] = emphasis 373 } 374 p.inlineCallback['`'] = codeSpan 375 p.inlineCallback['\n'] = lineBreak 376 p.inlineCallback['['] = link 377 p.inlineCallback['<'] = leftAngle 378 p.inlineCallback['\\'] = escape 379 p.inlineCallback['&'] = entity 380 381 if extensions&EXTENSION_AUTOLINK != 0 { 382 p.inlineCallback[':'] = autoLink 383 } 384 385 if extensions&EXTENSION_FOOTNOTES != 0 { 386 p.notes = make([]*reference, 0) 387 p.notesRecord = make(map[string]struct{}) 388 } 389 390 first := firstPass(p, input) 391 second := secondPass(p, first) 392 return second 393} 394 395// first pass: 396// - normalize newlines 397// - extract references (outside of fenced code blocks) 398// - expand tabs (outside of fenced code blocks) 399// - copy everything else 400func firstPass(p *parser, input []byte) []byte { 401 var out bytes.Buffer 402 tabSize := TAB_SIZE_DEFAULT 403 if p.flags&EXTENSION_TAB_SIZE_EIGHT != 0 { 404 tabSize = TAB_SIZE_EIGHT 405 } 406 beg := 0 407 lastFencedCodeBlockEnd := 0 408 for beg < len(input) { 409 // Find end of this line, then process the line. 410 end := beg 411 for end < len(input) && input[end] != '\n' && input[end] != '\r' { 412 end++ 413 } 414 415 if p.flags&EXTENSION_FENCED_CODE != 0 { 416 // track fenced code block boundaries to suppress tab expansion 417 // and reference extraction inside them: 418 if beg >= lastFencedCodeBlockEnd { 419 if i := p.fencedCodeBlock(&out, input[beg:], false); i > 0 { 420 lastFencedCodeBlockEnd = beg + i 421 } 422 } 423 } 424 425 // add the line body if present 426 if end > beg { 427 if end < lastFencedCodeBlockEnd { // Do not expand tabs while inside fenced code blocks. 428 out.Write(input[beg:end]) 429 } else if refEnd := isReference(p, input[beg:], tabSize); refEnd > 0 { 430 beg += refEnd 431 continue 432 } else { 433 expandTabs(&out, input[beg:end], tabSize) 434 } 435 } 436 437 if end < len(input) && input[end] == '\r' { 438 end++ 439 } 440 if end < len(input) && input[end] == '\n' { 441 end++ 442 } 443 out.WriteByte('\n') 444 445 beg = end 446 } 447 448 // empty input? 449 if out.Len() == 0 { 450 out.WriteByte('\n') 451 } 452 453 return out.Bytes() 454} 455 456// second pass: actual rendering 457func secondPass(p *parser, input []byte) []byte { 458 var output bytes.Buffer 459 460 p.r.DocumentHeader(&output) 461 p.block(&output, input) 462 463 if p.flags&EXTENSION_FOOTNOTES != 0 && len(p.notes) > 0 { 464 p.r.Footnotes(&output, func() bool { 465 flags := LIST_ITEM_BEGINNING_OF_LIST 466 for i := 0; i < len(p.notes); i += 1 { 467 ref := p.notes[i] 468 var buf bytes.Buffer 469 if ref.hasBlock { 470 flags |= LIST_ITEM_CONTAINS_BLOCK 471 p.block(&buf, ref.title) 472 } else { 473 p.inline(&buf, ref.title) 474 } 475 p.r.FootnoteItem(&output, ref.link, buf.Bytes(), flags) 476 flags &^= LIST_ITEM_BEGINNING_OF_LIST | LIST_ITEM_CONTAINS_BLOCK 477 } 478 479 return true 480 }) 481 } 482 483 p.r.DocumentFooter(&output) 484 485 if p.nesting != 0 { 486 panic("Nesting level did not end at zero") 487 } 488 489 return output.Bytes() 490} 491 492// 493// Link references 494// 495// This section implements support for references that (usually) appear 496// as footnotes in a document, and can be referenced anywhere in the document. 497// The basic format is: 498// 499// [1]: http://www.google.com/ "Google" 500// [2]: http://www.github.com/ "Github" 501// 502// Anywhere in the document, the reference can be linked by referring to its 503// label, i.e., 1 and 2 in this example, as in: 504// 505// This library is hosted on [Github][2], a git hosting site. 506// 507// Actual footnotes as specified in Pandoc and supported by some other Markdown 508// libraries such as php-markdown are also taken care of. They look like this: 509// 510// This sentence needs a bit of further explanation.[^note] 511// 512// [^note]: This is the explanation. 513// 514// Footnotes should be placed at the end of the document in an ordered list. 515// Inline footnotes such as: 516// 517// Inline footnotes^[Not supported.] also exist. 518// 519// are not yet supported. 520 521// References are parsed and stored in this struct. 522type reference struct { 523 link []byte 524 title []byte 525 noteId int // 0 if not a footnote ref 526 hasBlock bool 527 text []byte 528} 529 530func (r *reference) String() string { 531 return fmt.Sprintf("{link: %q, title: %q, text: %q, noteId: %d, hasBlock: %v}", 532 r.link, r.title, r.text, r.noteId, r.hasBlock) 533} 534 535// Check whether or not data starts with a reference link. 536// If so, it is parsed and stored in the list of references 537// (in the render struct). 538// Returns the number of bytes to skip to move past it, 539// or zero if the first line is not a reference. 540func isReference(p *parser, data []byte, tabSize int) int { 541 // up to 3 optional leading spaces 542 if len(data) < 4 { 543 return 0 544 } 545 i := 0 546 for i < 3 && data[i] == ' ' { 547 i++ 548 } 549 550 noteId := 0 551 552 // id part: anything but a newline between brackets 553 if data[i] != '[' { 554 return 0 555 } 556 i++ 557 if p.flags&EXTENSION_FOOTNOTES != 0 { 558 if i < len(data) && data[i] == '^' { 559 // we can set it to anything here because the proper noteIds will 560 // be assigned later during the second pass. It just has to be != 0 561 noteId = 1 562 i++ 563 } 564 } 565 idOffset := i 566 for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' { 567 i++ 568 } 569 if i >= len(data) || data[i] != ']' { 570 return 0 571 } 572 idEnd := i 573 574 // spacer: colon (space | tab)* newline? (space | tab)* 575 i++ 576 if i >= len(data) || data[i] != ':' { 577 return 0 578 } 579 i++ 580 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { 581 i++ 582 } 583 if i < len(data) && (data[i] == '\n' || data[i] == '\r') { 584 i++ 585 if i < len(data) && data[i] == '\n' && data[i-1] == '\r' { 586 i++ 587 } 588 } 589 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { 590 i++ 591 } 592 if i >= len(data) { 593 return 0 594 } 595 596 var ( 597 linkOffset, linkEnd int 598 titleOffset, titleEnd int 599 lineEnd int 600 raw []byte 601 hasBlock bool 602 ) 603 604 if p.flags&EXTENSION_FOOTNOTES != 0 && noteId != 0 { 605 linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize) 606 lineEnd = linkEnd 607 } else { 608 linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i) 609 } 610 if lineEnd == 0 { 611 return 0 612 } 613 614 // a valid ref has been found 615 616 ref := &reference{ 617 noteId: noteId, 618 hasBlock: hasBlock, 619 } 620 621 if noteId > 0 { 622 // reusing the link field for the id since footnotes don't have links 623 ref.link = data[idOffset:idEnd] 624 // if footnote, it's not really a title, it's the contained text 625 ref.title = raw 626 } else { 627 ref.link = data[linkOffset:linkEnd] 628 ref.title = data[titleOffset:titleEnd] 629 } 630 631 // id matches are case-insensitive 632 id := string(bytes.ToLower(data[idOffset:idEnd])) 633 634 p.refs[id] = ref 635 636 return lineEnd 637} 638 639func scanLinkRef(p *parser, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) { 640 // link: whitespace-free sequence, optionally between angle brackets 641 if data[i] == '<' { 642 i++ 643 } 644 linkOffset = i 645 if i == len(data) { 646 return 647 } 648 for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' { 649 i++ 650 } 651 linkEnd = i 652 if data[linkOffset] == '<' && data[linkEnd-1] == '>' { 653 linkOffset++ 654 linkEnd-- 655 } 656 657 // optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) 658 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { 659 i++ 660 } 661 if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' { 662 return 663 } 664 665 // compute end-of-line 666 if i >= len(data) || data[i] == '\r' || data[i] == '\n' { 667 lineEnd = i 668 } 669 if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' { 670 lineEnd++ 671 } 672 673 // optional (space|tab)* spacer after a newline 674 if lineEnd > 0 { 675 i = lineEnd + 1 676 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { 677 i++ 678 } 679 } 680 681 // optional title: any non-newline sequence enclosed in '"() alone on its line 682 if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') { 683 i++ 684 titleOffset = i 685 686 // look for EOL 687 for i < len(data) && data[i] != '\n' && data[i] != '\r' { 688 i++ 689 } 690 if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' { 691 titleEnd = i + 1 692 } else { 693 titleEnd = i 694 } 695 696 // step back 697 i-- 698 for i > titleOffset && (data[i] == ' ' || data[i] == '\t') { 699 i-- 700 } 701 if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') { 702 lineEnd = titleEnd 703 titleEnd = i 704 } 705 } 706 707 return 708} 709 710// The first bit of this logic is the same as (*parser).listItem, but the rest 711// is much simpler. This function simply finds the entire block and shifts it 712// over by one tab if it is indeed a block (just returns the line if it's not). 713// blockEnd is the end of the section in the input buffer, and contents is the 714// extracted text that was shifted over one tab. It will need to be rendered at 715// the end of the document. 716func scanFootnote(p *parser, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) { 717 if i == 0 || len(data) == 0 { 718 return 719 } 720 721 // skip leading whitespace on first line 722 for i < len(data) && data[i] == ' ' { 723 i++ 724 } 725 726 blockStart = i 727 728 // find the end of the line 729 blockEnd = i 730 for i < len(data) && data[i-1] != '\n' { 731 i++ 732 } 733 734 // get working buffer 735 var raw bytes.Buffer 736 737 // put the first line into the working buffer 738 raw.Write(data[blockEnd:i]) 739 blockEnd = i 740 741 // process the following lines 742 containsBlankLine := false 743 744gatherLines: 745 for blockEnd < len(data) { 746 i++ 747 748 // find the end of this line 749 for i < len(data) && data[i-1] != '\n' { 750 i++ 751 } 752 753 // if it is an empty line, guess that it is part of this item 754 // and move on to the next line 755 if p.isEmpty(data[blockEnd:i]) > 0 { 756 containsBlankLine = true 757 blockEnd = i 758 continue 759 } 760 761 n := 0 762 if n = isIndented(data[blockEnd:i], indentSize); n == 0 { 763 // this is the end of the block. 764 // we don't want to include this last line in the index. 765 break gatherLines 766 } 767 768 // if there were blank lines before this one, insert a new one now 769 if containsBlankLine { 770 raw.WriteByte('\n') 771 containsBlankLine = false 772 } 773 774 // get rid of that first tab, write to buffer 775 raw.Write(data[blockEnd+n : i]) 776 hasBlock = true 777 778 blockEnd = i 779 } 780 781 if data[blockEnd-1] != '\n' { 782 raw.WriteByte('\n') 783 } 784 785 contents = raw.Bytes() 786 787 return 788} 789 790// 791// 792// Miscellaneous helper functions 793// 794// 795 796// Test if a character is a punctuation symbol. 797// Taken from a private function in regexp in the stdlib. 798func ispunct(c byte) bool { 799 for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") { 800 if c == r { 801 return true 802 } 803 } 804 return false 805} 806 807// Test if a character is a whitespace character. 808func isspace(c byte) bool { 809 return ishorizontalspace(c) || isverticalspace(c) 810} 811 812// Test if a character is a horizontal whitespace character. 813func ishorizontalspace(c byte) bool { 814 return c == ' ' || c == '\t' 815} 816 817// Test if a character is a vertical whitespace character. 818func isverticalspace(c byte) bool { 819 return c == '\n' || c == '\r' || c == '\f' || c == '\v' 820} 821 822// Test if a character is letter. 823func isletter(c byte) bool { 824 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') 825} 826 827// Test if a character is a letter or a digit. 828// TODO: check when this is looking for ASCII alnum and when it should use unicode 829func isalnum(c byte) bool { 830 return (c >= '0' && c <= '9') || isletter(c) 831} 832 833// Replace tab characters with spaces, aligning to the next TAB_SIZE column. 834// always ends output with a newline 835func expandTabs(out *bytes.Buffer, line []byte, tabSize int) { 836 // first, check for common cases: no tabs, or only tabs at beginning of line 837 i, prefix := 0, 0 838 slowcase := false 839 for i = 0; i < len(line); i++ { 840 if line[i] == '\t' { 841 if prefix == i { 842 prefix++ 843 } else { 844 slowcase = true 845 break 846 } 847 } 848 } 849 850 // no need to decode runes if all tabs are at the beginning of the line 851 if !slowcase { 852 for i = 0; i < prefix*tabSize; i++ { 853 out.WriteByte(' ') 854 } 855 out.Write(line[prefix:]) 856 return 857 } 858 859 // the slow case: we need to count runes to figure out how 860 // many spaces to insert for each tab 861 column := 0 862 i = 0 863 for i < len(line) { 864 start := i 865 for i < len(line) && line[i] != '\t' { 866 _, size := utf8.DecodeRune(line[i:]) 867 i += size 868 column++ 869 } 870 871 if i > start { 872 out.Write(line[start:i]) 873 } 874 875 if i >= len(line) { 876 break 877 } 878 879 for { 880 out.WriteByte(' ') 881 column++ 882 if column%tabSize == 0 { 883 break 884 } 885 } 886 887 i++ 888 } 889} 890 891// Find if a line counts as indented or not. 892// Returns number of characters the indent is (0 = not indented). 893func isIndented(data []byte, indentSize int) int { 894 if len(data) == 0 { 895 return 0 896 } 897 if data[0] == '\t' { 898 return 1 899 } 900 if len(data) < indentSize { 901 return 0 902 } 903 for i := 0; i < indentSize; i++ { 904 if data[i] != ' ' { 905 return 0 906 } 907 } 908 return indentSize 909} 910 911// Create a url-safe slug for fragments 912func slugify(in []byte) []byte { 913 if len(in) == 0 { 914 return in 915 } 916 out := make([]byte, 0, len(in)) 917 sym := false 918 919 for _, ch := range in { 920 if isalnum(ch) { 921 sym = false 922 out = append(out, ch) 923 } else if sym { 924 continue 925 } else { 926 out = append(out, '-') 927 sym = true 928 } 929 } 930 var a, b int 931 var ch byte 932 for a, ch = range out { 933 if ch != '-' { 934 break 935 } 936 } 937 for b = len(out) - 1; b > 0; b-- { 938 if out[b] != '-' { 939 break 940 } 941 } 942 return out[a : b+1] 943} 944