1// Blackfriday Markdown Processor 2// Available at http://github.com/russross/blackfriday 3// 4// Copyright © 2011 Russ Ross <russ@russross.com>. 5// Distributed under the Simplified BSD License. 6// See README.md for details. 7 8package blackfriday 9 10import ( 11 "bytes" 12 "fmt" 13 "io" 14 "strings" 15 "unicode/utf8" 16) 17 18// 19// Markdown parsing and processing 20// 21 22// Version string of the package. Appears in the rendered document when 23// CompletePage flag is on. 24const Version = "2.0" 25 26// Extensions is a bitwise or'ed collection of enabled Blackfriday's 27// extensions. 28type Extensions int 29 30// These are the supported markdown parsing extensions. 31// OR these values together to select multiple extensions. 32const ( 33 NoExtensions Extensions = 0 34 NoIntraEmphasis Extensions = 1 << iota // Ignore emphasis markers inside words 35 Tables // Render tables 36 FencedCode // Render fenced code blocks 37 Autolink // Detect embedded URLs that are not explicitly marked 38 Strikethrough // Strikethrough text using ~~test~~ 39 LaxHTMLBlocks // Loosen up HTML block parsing rules 40 SpaceHeadings // Be strict about prefix heading rules 41 HardLineBreak // Translate newlines into line breaks 42 TabSizeEight // Expand tabs to eight spaces instead of four 43 Footnotes // Pandoc-style footnotes 44 NoEmptyLineBeforeBlock // No need to insert an empty line to start a (code, quote, ordered list, unordered list) block 45 HeadingIDs // specify heading IDs with {#id} 46 Titleblock // Titleblock ala pandoc 47 AutoHeadingIDs // Create the heading ID from the text 48 BackslashLineBreak // Translate trailing backslashes into line breaks 49 DefinitionLists // Render definition lists 50 51 CommonHTMLFlags HTMLFlags = UseXHTML | Smartypants | 52 SmartypantsFractions | SmartypantsDashes | SmartypantsLatexDashes 53 54 CommonExtensions Extensions = NoIntraEmphasis | Tables | FencedCode | 55 Autolink | Strikethrough | SpaceHeadings | HeadingIDs | 56 BackslashLineBreak | DefinitionLists 57) 58 59// ListType contains bitwise or'ed flags for list and list item objects. 60type ListType int 61 62// These are the possible flag values for the ListItem renderer. 63// Multiple flag values may be ORed together. 64// These are mostly of interest if you are writing a new output format. 65const ( 66 ListTypeOrdered ListType = 1 << iota 67 ListTypeDefinition 68 ListTypeTerm 69 70 ListItemContainsBlock 71 ListItemBeginningOfList // TODO: figure out if this is of any use now 72 ListItemEndOfList 73) 74 75// CellAlignFlags holds a type of alignment in a table cell. 76type CellAlignFlags int 77 78// These are the possible flag values for the table cell renderer. 79// Only a single one of these values will be used; they are not ORed together. 80// These are mostly of interest if you are writing a new output format. 81const ( 82 TableAlignmentLeft CellAlignFlags = 1 << iota 83 TableAlignmentRight 84 TableAlignmentCenter = (TableAlignmentLeft | TableAlignmentRight) 85) 86 87// The size of a tab stop. 88const ( 89 TabSizeDefault = 4 90 TabSizeDouble = 8 91) 92 93// blockTags is a set of tags that are recognized as HTML block tags. 94// Any of these can be included in markdown text without special escaping. 95var blockTags = map[string]struct{}{ 96 "blockquote": {}, 97 "del": {}, 98 "div": {}, 99 "dl": {}, 100 "fieldset": {}, 101 "form": {}, 102 "h1": {}, 103 "h2": {}, 104 "h3": {}, 105 "h4": {}, 106 "h5": {}, 107 "h6": {}, 108 "iframe": {}, 109 "ins": {}, 110 "math": {}, 111 "noscript": {}, 112 "ol": {}, 113 "pre": {}, 114 "p": {}, 115 "script": {}, 116 "style": {}, 117 "table": {}, 118 "ul": {}, 119 120 // HTML5 121 "address": {}, 122 "article": {}, 123 "aside": {}, 124 "canvas": {}, 125 "figcaption": {}, 126 "figure": {}, 127 "footer": {}, 128 "header": {}, 129 "hgroup": {}, 130 "main": {}, 131 "nav": {}, 132 "output": {}, 133 "progress": {}, 134 "section": {}, 135 "video": {}, 136} 137 138// Renderer is the rendering interface. This is mostly of interest if you are 139// implementing a new rendering format. 140// 141// Only an HTML implementation is provided in this repository, see the README 142// for external implementations. 143type Renderer interface { 144 // RenderNode is the main rendering method. It will be called once for 145 // every leaf node and twice for every non-leaf node (first with 146 // entering=true, then with entering=false). The method should write its 147 // rendition of the node to the supplied writer w. 148 RenderNode(w io.Writer, node *Node, entering bool) WalkStatus 149 150 // RenderHeader is a method that allows the renderer to produce some 151 // content preceding the main body of the output document. The header is 152 // understood in the broad sense here. For example, the default HTML 153 // renderer will write not only the HTML document preamble, but also the 154 // table of contents if it was requested. 155 // 156 // The method will be passed an entire document tree, in case a particular 157 // implementation needs to inspect it to produce output. 158 // 159 // The output should be written to the supplied writer w. If your 160 // implementation has no header to write, supply an empty implementation. 161 RenderHeader(w io.Writer, ast *Node) 162 163 // RenderFooter is a symmetric counterpart of RenderHeader. 164 RenderFooter(w io.Writer, ast *Node) 165} 166 167// Callback functions for inline parsing. One such function is defined 168// for each character that triggers a response when parsing inline data. 169type inlineParser func(p *Markdown, data []byte, offset int) (int, *Node) 170 171// Markdown is a type that holds extensions and the runtime state used by 172// Parse, and the renderer. You can not use it directly, construct it with New. 173type Markdown struct { 174 renderer Renderer 175 referenceOverride ReferenceOverrideFunc 176 refs map[string]*reference 177 inlineCallback [256]inlineParser 178 extensions Extensions 179 nesting int 180 maxNesting int 181 insideLink bool 182 183 // Footnotes need to be ordered as well as available to quickly check for 184 // presence. If a ref is also a footnote, it's stored both in refs and here 185 // in notes. Slice is nil if footnotes not enabled. 186 notes []*reference 187 188 doc *Node 189 tip *Node // = doc 190 oldTip *Node 191 lastMatchedContainer *Node // = doc 192 allClosed bool 193} 194 195func (p *Markdown) getRef(refid string) (ref *reference, found bool) { 196 if p.referenceOverride != nil { 197 r, overridden := p.referenceOverride(refid) 198 if overridden { 199 if r == nil { 200 return nil, false 201 } 202 return &reference{ 203 link: []byte(r.Link), 204 title: []byte(r.Title), 205 noteID: 0, 206 hasBlock: false, 207 text: []byte(r.Text)}, true 208 } 209 } 210 // refs are case insensitive 211 ref, found = p.refs[strings.ToLower(refid)] 212 return ref, found 213} 214 215func (p *Markdown) finalize(block *Node) { 216 above := block.Parent 217 block.open = false 218 p.tip = above 219} 220 221func (p *Markdown) addChild(node NodeType, offset uint32) *Node { 222 return p.addExistingChild(NewNode(node), offset) 223} 224 225func (p *Markdown) addExistingChild(node *Node, offset uint32) *Node { 226 for !p.tip.canContain(node.Type) { 227 p.finalize(p.tip) 228 } 229 p.tip.AppendChild(node) 230 p.tip = node 231 return node 232} 233 234func (p *Markdown) closeUnmatchedBlocks() { 235 if !p.allClosed { 236 for p.oldTip != p.lastMatchedContainer { 237 parent := p.oldTip.Parent 238 p.finalize(p.oldTip) 239 p.oldTip = parent 240 } 241 p.allClosed = true 242 } 243} 244 245// 246// 247// Public interface 248// 249// 250 251// Reference represents the details of a link. 252// See the documentation in Options for more details on use-case. 253type Reference struct { 254 // Link is usually the URL the reference points to. 255 Link string 256 // Title is the alternate text describing the link in more detail. 257 Title string 258 // Text is the optional text to override the ref with if the syntax used was 259 // [refid][] 260 Text string 261} 262 263// ReferenceOverrideFunc is expected to be called with a reference string and 264// return either a valid Reference type that the reference string maps to or 265// nil. If overridden is false, the default reference logic will be executed. 266// See the documentation in Options for more details on use-case. 267type ReferenceOverrideFunc func(reference string) (ref *Reference, overridden bool) 268 269// New constructs a Markdown processor. You can use the same With* functions as 270// for Run() to customize parser's behavior and the renderer. 271func New(opts ...Option) *Markdown { 272 var p Markdown 273 for _, opt := range opts { 274 opt(&p) 275 } 276 p.refs = make(map[string]*reference) 277 p.maxNesting = 16 278 p.insideLink = false 279 docNode := NewNode(Document) 280 p.doc = docNode 281 p.tip = docNode 282 p.oldTip = docNode 283 p.lastMatchedContainer = docNode 284 p.allClosed = true 285 // register inline parsers 286 p.inlineCallback[' '] = maybeLineBreak 287 p.inlineCallback['*'] = emphasis 288 p.inlineCallback['_'] = emphasis 289 if p.extensions&Strikethrough != 0 { 290 p.inlineCallback['~'] = emphasis 291 } 292 p.inlineCallback['`'] = codeSpan 293 p.inlineCallback['\n'] = lineBreak 294 p.inlineCallback['['] = link 295 p.inlineCallback['<'] = leftAngle 296 p.inlineCallback['\\'] = escape 297 p.inlineCallback['&'] = entity 298 p.inlineCallback['!'] = maybeImage 299 p.inlineCallback['^'] = maybeInlineFootnote 300 if p.extensions&Autolink != 0 { 301 p.inlineCallback['h'] = maybeAutoLink 302 p.inlineCallback['m'] = maybeAutoLink 303 p.inlineCallback['f'] = maybeAutoLink 304 p.inlineCallback['H'] = maybeAutoLink 305 p.inlineCallback['M'] = maybeAutoLink 306 p.inlineCallback['F'] = maybeAutoLink 307 } 308 if p.extensions&Footnotes != 0 { 309 p.notes = make([]*reference, 0) 310 } 311 return &p 312} 313 314// Option customizes the Markdown processor's default behavior. 315type Option func(*Markdown) 316 317// WithRenderer allows you to override the default renderer. 318func WithRenderer(r Renderer) Option { 319 return func(p *Markdown) { 320 p.renderer = r 321 } 322} 323 324// WithExtensions allows you to pick some of the many extensions provided by 325// Blackfriday. You can bitwise OR them. 326func WithExtensions(e Extensions) Option { 327 return func(p *Markdown) { 328 p.extensions = e 329 } 330} 331 332// WithNoExtensions turns off all extensions and custom behavior. 333func WithNoExtensions() Option { 334 return func(p *Markdown) { 335 p.extensions = NoExtensions 336 p.renderer = NewHTMLRenderer(HTMLRendererParameters{ 337 Flags: HTMLFlagsNone, 338 }) 339 } 340} 341 342// WithRefOverride sets an optional function callback that is called every 343// time a reference is resolved. 344// 345// In Markdown, the link reference syntax can be made to resolve a link to 346// a reference instead of an inline URL, in one of the following ways: 347// 348// * [link text][refid] 349// * [refid][] 350// 351// Usually, the refid is defined at the bottom of the Markdown document. If 352// this override function is provided, the refid is passed to the override 353// function first, before consulting the defined refids at the bottom. If 354// the override function indicates an override did not occur, the refids at 355// the bottom will be used to fill in the link details. 356func WithRefOverride(o ReferenceOverrideFunc) Option { 357 return func(p *Markdown) { 358 p.referenceOverride = o 359 } 360} 361 362// Run is the main entry point to Blackfriday. It parses and renders a 363// block of markdown-encoded text. 364// 365// The simplest invocation of Run takes one argument, input: 366// output := Run(input) 367// This will parse the input with CommonExtensions enabled and render it with 368// the default HTMLRenderer (with CommonHTMLFlags). 369// 370// Variadic arguments opts can customize the default behavior. Since Markdown 371// type does not contain exported fields, you can not use it directly. Instead, 372// use the With* functions. For example, this will call the most basic 373// functionality, with no extensions: 374// output := Run(input, WithNoExtensions()) 375// 376// You can use any number of With* arguments, even contradicting ones. They 377// will be applied in order of appearance and the latter will override the 378// former: 379// output := Run(input, WithNoExtensions(), WithExtensions(exts), 380// WithRenderer(yourRenderer)) 381func Run(input []byte, opts ...Option) []byte { 382 r := NewHTMLRenderer(HTMLRendererParameters{ 383 Flags: CommonHTMLFlags, 384 }) 385 optList := []Option{WithRenderer(r), WithExtensions(CommonExtensions)} 386 optList = append(optList, opts...) 387 parser := New(optList...) 388 ast := parser.Parse(input) 389 var buf bytes.Buffer 390 parser.renderer.RenderHeader(&buf, ast) 391 ast.Walk(func(node *Node, entering bool) WalkStatus { 392 return parser.renderer.RenderNode(&buf, node, entering) 393 }) 394 parser.renderer.RenderFooter(&buf, ast) 395 return buf.Bytes() 396} 397 398// Parse is an entry point to the parsing part of Blackfriday. It takes an 399// input markdown document and produces a syntax tree for its contents. This 400// tree can then be rendered with a default or custom renderer, or 401// analyzed/transformed by the caller to whatever non-standard needs they have. 402// The return value is the root node of the syntax tree. 403func (p *Markdown) Parse(input []byte) *Node { 404 p.block(input) 405 // Walk the tree and finish up some of unfinished blocks 406 for p.tip != nil { 407 p.finalize(p.tip) 408 } 409 // Walk the tree again and process inline markdown in each block 410 p.doc.Walk(func(node *Node, entering bool) WalkStatus { 411 if node.Type == Paragraph || node.Type == Heading || node.Type == TableCell { 412 p.inline(node, node.content) 413 node.content = nil 414 } 415 return GoToNext 416 }) 417 p.parseRefsToAST() 418 return p.doc 419} 420 421func (p *Markdown) parseRefsToAST() { 422 if p.extensions&Footnotes == 0 || len(p.notes) == 0 { 423 return 424 } 425 p.tip = p.doc 426 block := p.addBlock(List, nil) 427 block.IsFootnotesList = true 428 block.ListFlags = ListTypeOrdered 429 flags := ListItemBeginningOfList 430 // Note: this loop is intentionally explicit, not range-form. This is 431 // because the body of the loop will append nested footnotes to p.notes and 432 // we need to process those late additions. Range form would only walk over 433 // the fixed initial set. 434 for i := 0; i < len(p.notes); i++ { 435 ref := p.notes[i] 436 p.addExistingChild(ref.footnote, 0) 437 block := ref.footnote 438 block.ListFlags = flags | ListTypeOrdered 439 block.RefLink = ref.link 440 if ref.hasBlock { 441 flags |= ListItemContainsBlock 442 p.block(ref.title) 443 } else { 444 p.inline(block, ref.title) 445 } 446 flags &^= ListItemBeginningOfList | ListItemContainsBlock 447 } 448 above := block.Parent 449 finalizeList(block) 450 p.tip = above 451 block.Walk(func(node *Node, entering bool) WalkStatus { 452 if node.Type == Paragraph || node.Type == Heading { 453 p.inline(node, node.content) 454 node.content = nil 455 } 456 return GoToNext 457 }) 458} 459 460// 461// Link references 462// 463// This section implements support for references that (usually) appear 464// as footnotes in a document, and can be referenced anywhere in the document. 465// The basic format is: 466// 467// [1]: http://www.google.com/ "Google" 468// [2]: http://www.github.com/ "Github" 469// 470// Anywhere in the document, the reference can be linked by referring to its 471// label, i.e., 1 and 2 in this example, as in: 472// 473// This library is hosted on [Github][2], a git hosting site. 474// 475// Actual footnotes as specified in Pandoc and supported by some other Markdown 476// libraries such as php-markdown are also taken care of. They look like this: 477// 478// This sentence needs a bit of further explanation.[^note] 479// 480// [^note]: This is the explanation. 481// 482// Footnotes should be placed at the end of the document in an ordered list. 483// Finally, there are inline footnotes such as: 484// 485// Inline footnotes^[Also supported.] provide a quick inline explanation, 486// but are rendered at the bottom of the document. 487// 488 489// reference holds all information necessary for a reference-style links or 490// footnotes. 491// 492// Consider this markdown with reference-style links: 493// 494// [link][ref] 495// 496// [ref]: /url/ "tooltip title" 497// 498// It will be ultimately converted to this HTML: 499// 500// <p><a href=\"/url/\" title=\"title\">link</a></p> 501// 502// And a reference structure will be populated as follows: 503// 504// p.refs["ref"] = &reference{ 505// link: "/url/", 506// title: "tooltip title", 507// } 508// 509// Alternatively, reference can contain information about a footnote. Consider 510// this markdown: 511// 512// Text needing a footnote.[^a] 513// 514// [^a]: This is the note 515// 516// A reference structure will be populated as follows: 517// 518// p.refs["a"] = &reference{ 519// link: "a", 520// title: "This is the note", 521// noteID: <some positive int>, 522// } 523// 524// TODO: As you can see, it begs for splitting into two dedicated structures 525// for refs and for footnotes. 526type reference struct { 527 link []byte 528 title []byte 529 noteID int // 0 if not a footnote ref 530 hasBlock bool 531 footnote *Node // a link to the Item node within a list of footnotes 532 533 text []byte // only gets populated by refOverride feature with Reference.Text 534} 535 536func (r *reference) String() string { 537 return fmt.Sprintf("{link: %q, title: %q, text: %q, noteID: %d, hasBlock: %v}", 538 r.link, r.title, r.text, r.noteID, r.hasBlock) 539} 540 541// Check whether or not data starts with a reference link. 542// If so, it is parsed and stored in the list of references 543// (in the render struct). 544// Returns the number of bytes to skip to move past it, 545// or zero if the first line is not a reference. 546func isReference(p *Markdown, data []byte, tabSize int) int { 547 // up to 3 optional leading spaces 548 if len(data) < 4 { 549 return 0 550 } 551 i := 0 552 for i < 3 && data[i] == ' ' { 553 i++ 554 } 555 556 noteID := 0 557 558 // id part: anything but a newline between brackets 559 if data[i] != '[' { 560 return 0 561 } 562 i++ 563 if p.extensions&Footnotes != 0 { 564 if i < len(data) && data[i] == '^' { 565 // we can set it to anything here because the proper noteIds will 566 // be assigned later during the second pass. It just has to be != 0 567 noteID = 1 568 i++ 569 } 570 } 571 idOffset := i 572 for i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != ']' { 573 i++ 574 } 575 if i >= len(data) || data[i] != ']' { 576 return 0 577 } 578 idEnd := i 579 // footnotes can have empty ID, like this: [^], but a reference can not be 580 // empty like this: []. Break early if it's not a footnote and there's no ID 581 if noteID == 0 && idOffset == idEnd { 582 return 0 583 } 584 // spacer: colon (space | tab)* newline? (space | tab)* 585 i++ 586 if i >= len(data) || data[i] != ':' { 587 return 0 588 } 589 i++ 590 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { 591 i++ 592 } 593 if i < len(data) && (data[i] == '\n' || data[i] == '\r') { 594 i++ 595 if i < len(data) && data[i] == '\n' && data[i-1] == '\r' { 596 i++ 597 } 598 } 599 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { 600 i++ 601 } 602 if i >= len(data) { 603 return 0 604 } 605 606 var ( 607 linkOffset, linkEnd int 608 titleOffset, titleEnd int 609 lineEnd int 610 raw []byte 611 hasBlock bool 612 ) 613 614 if p.extensions&Footnotes != 0 && noteID != 0 { 615 linkOffset, linkEnd, raw, hasBlock = scanFootnote(p, data, i, tabSize) 616 lineEnd = linkEnd 617 } else { 618 linkOffset, linkEnd, titleOffset, titleEnd, lineEnd = scanLinkRef(p, data, i) 619 } 620 if lineEnd == 0 { 621 return 0 622 } 623 624 // a valid ref has been found 625 626 ref := &reference{ 627 noteID: noteID, 628 hasBlock: hasBlock, 629 } 630 631 if noteID > 0 { 632 // reusing the link field for the id since footnotes don't have links 633 ref.link = data[idOffset:idEnd] 634 // if footnote, it's not really a title, it's the contained text 635 ref.title = raw 636 } else { 637 ref.link = data[linkOffset:linkEnd] 638 ref.title = data[titleOffset:titleEnd] 639 } 640 641 // id matches are case-insensitive 642 id := string(bytes.ToLower(data[idOffset:idEnd])) 643 644 p.refs[id] = ref 645 646 return lineEnd 647} 648 649func scanLinkRef(p *Markdown, data []byte, i int) (linkOffset, linkEnd, titleOffset, titleEnd, lineEnd int) { 650 // link: whitespace-free sequence, optionally between angle brackets 651 if data[i] == '<' { 652 i++ 653 } 654 linkOffset = i 655 for i < len(data) && data[i] != ' ' && data[i] != '\t' && data[i] != '\n' && data[i] != '\r' { 656 i++ 657 } 658 linkEnd = i 659 if data[linkOffset] == '<' && data[linkEnd-1] == '>' { 660 linkOffset++ 661 linkEnd-- 662 } 663 664 // optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) 665 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { 666 i++ 667 } 668 if i < len(data) && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(' { 669 return 670 } 671 672 // compute end-of-line 673 if i >= len(data) || data[i] == '\r' || data[i] == '\n' { 674 lineEnd = i 675 } 676 if i+1 < len(data) && data[i] == '\r' && data[i+1] == '\n' { 677 lineEnd++ 678 } 679 680 // optional (space|tab)* spacer after a newline 681 if lineEnd > 0 { 682 i = lineEnd + 1 683 for i < len(data) && (data[i] == ' ' || data[i] == '\t') { 684 i++ 685 } 686 } 687 688 // optional title: any non-newline sequence enclosed in '"() alone on its line 689 if i+1 < len(data) && (data[i] == '\'' || data[i] == '"' || data[i] == '(') { 690 i++ 691 titleOffset = i 692 693 // look for EOL 694 for i < len(data) && data[i] != '\n' && data[i] != '\r' { 695 i++ 696 } 697 if i+1 < len(data) && data[i] == '\n' && data[i+1] == '\r' { 698 titleEnd = i + 1 699 } else { 700 titleEnd = i 701 } 702 703 // step back 704 i-- 705 for i > titleOffset && (data[i] == ' ' || data[i] == '\t') { 706 i-- 707 } 708 if i > titleOffset && (data[i] == '\'' || data[i] == '"' || data[i] == ')') { 709 lineEnd = titleEnd 710 titleEnd = i 711 } 712 } 713 714 return 715} 716 717// The first bit of this logic is the same as Parser.listItem, but the rest 718// is much simpler. This function simply finds the entire block and shifts it 719// over by one tab if it is indeed a block (just returns the line if it's not). 720// blockEnd is the end of the section in the input buffer, and contents is the 721// extracted text that was shifted over one tab. It will need to be rendered at 722// the end of the document. 723func scanFootnote(p *Markdown, data []byte, i, indentSize int) (blockStart, blockEnd int, contents []byte, hasBlock bool) { 724 if i == 0 || len(data) == 0 { 725 return 726 } 727 728 // skip leading whitespace on first line 729 for i < len(data) && data[i] == ' ' { 730 i++ 731 } 732 733 blockStart = i 734 735 // find the end of the line 736 blockEnd = i 737 for i < len(data) && data[i-1] != '\n' { 738 i++ 739 } 740 741 // get working buffer 742 var raw bytes.Buffer 743 744 // put the first line into the working buffer 745 raw.Write(data[blockEnd:i]) 746 blockEnd = i 747 748 // process the following lines 749 containsBlankLine := false 750 751gatherLines: 752 for blockEnd < len(data) { 753 i++ 754 755 // find the end of this line 756 for i < len(data) && data[i-1] != '\n' { 757 i++ 758 } 759 760 // if it is an empty line, guess that it is part of this item 761 // and move on to the next line 762 if p.isEmpty(data[blockEnd:i]) > 0 { 763 containsBlankLine = true 764 blockEnd = i 765 continue 766 } 767 768 n := 0 769 if n = isIndented(data[blockEnd:i], indentSize); n == 0 { 770 // this is the end of the block. 771 // we don't want to include this last line in the index. 772 break gatherLines 773 } 774 775 // if there were blank lines before this one, insert a new one now 776 if containsBlankLine { 777 raw.WriteByte('\n') 778 containsBlankLine = false 779 } 780 781 // get rid of that first tab, write to buffer 782 raw.Write(data[blockEnd+n : i]) 783 hasBlock = true 784 785 blockEnd = i 786 } 787 788 if data[blockEnd-1] != '\n' { 789 raw.WriteByte('\n') 790 } 791 792 contents = raw.Bytes() 793 794 return 795} 796 797// 798// 799// Miscellaneous helper functions 800// 801// 802 803// Test if a character is a punctuation symbol. 804// Taken from a private function in regexp in the stdlib. 805func ispunct(c byte) bool { 806 for _, r := range []byte("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") { 807 if c == r { 808 return true 809 } 810 } 811 return false 812} 813 814// Test if a character is a whitespace character. 815func isspace(c byte) bool { 816 return ishorizontalspace(c) || isverticalspace(c) 817} 818 819// Test if a character is a horizontal whitespace character. 820func ishorizontalspace(c byte) bool { 821 return c == ' ' || c == '\t' 822} 823 824// Test if a character is a vertical character. 825func isverticalspace(c byte) bool { 826 return c == '\n' || c == '\r' || c == '\f' || c == '\v' 827} 828 829// Test if a character is letter. 830func isletter(c byte) bool { 831 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') 832} 833 834// Test if a character is a letter or a digit. 835// TODO: check when this is looking for ASCII alnum and when it should use unicode 836func isalnum(c byte) bool { 837 return (c >= '0' && c <= '9') || isletter(c) 838} 839 840// Replace tab characters with spaces, aligning to the next TAB_SIZE column. 841// always ends output with a newline 842func expandTabs(out *bytes.Buffer, line []byte, tabSize int) { 843 // first, check for common cases: no tabs, or only tabs at beginning of line 844 i, prefix := 0, 0 845 slowcase := false 846 for i = 0; i < len(line); i++ { 847 if line[i] == '\t' { 848 if prefix == i { 849 prefix++ 850 } else { 851 slowcase = true 852 break 853 } 854 } 855 } 856 857 // no need to decode runes if all tabs are at the beginning of the line 858 if !slowcase { 859 for i = 0; i < prefix*tabSize; i++ { 860 out.WriteByte(' ') 861 } 862 out.Write(line[prefix:]) 863 return 864 } 865 866 // the slow case: we need to count runes to figure out how 867 // many spaces to insert for each tab 868 column := 0 869 i = 0 870 for i < len(line) { 871 start := i 872 for i < len(line) && line[i] != '\t' { 873 _, size := utf8.DecodeRune(line[i:]) 874 i += size 875 column++ 876 } 877 878 if i > start { 879 out.Write(line[start:i]) 880 } 881 882 if i >= len(line) { 883 break 884 } 885 886 for { 887 out.WriteByte(' ') 888 column++ 889 if column%tabSize == 0 { 890 break 891 } 892 } 893 894 i++ 895 } 896} 897 898// Find if a line counts as indented or not. 899// Returns number of characters the indent is (0 = not indented). 900func isIndented(data []byte, indentSize int) int { 901 if len(data) == 0 { 902 return 0 903 } 904 if data[0] == '\t' { 905 return 1 906 } 907 if len(data) < indentSize { 908 return 0 909 } 910 for i := 0; i < indentSize; i++ { 911 if data[i] != ' ' { 912 return 0 913 } 914 } 915 return indentSize 916} 917 918// Create a url-safe slug for fragments 919func slugify(in []byte) []byte { 920 if len(in) == 0 { 921 return in 922 } 923 out := make([]byte, 0, len(in)) 924 sym := false 925 926 for _, ch := range in { 927 if isalnum(ch) { 928 sym = false 929 out = append(out, ch) 930 } else if sym { 931 continue 932 } else { 933 out = append(out, '-') 934 sym = true 935 } 936 } 937 var a, b int 938 var ch byte 939 for a, ch = range out { 940 if ch != '-' { 941 break 942 } 943 } 944 for b = len(out) - 1; b > 0; b-- { 945 if out[b] != '-' { 946 break 947 } 948 } 949 return out[a : b+1] 950} 951