1// 2// Blackfriday Markdown Processor 3// Available at http://github.com/russross/blackfriday 4// 5// Copyright © 2011 Russ Ross <russ@russross.com>. 6// Distributed under the Simplified BSD License. 7// See README.md for details. 8// 9 10// 11// Functions to parse block-level elements. 12// 13 14package blackfriday 15 16import ( 17 "bytes" 18 "html" 19 "regexp" 20 "strings" 21 22 "github.com/shurcooL/sanitized_anchor_name" 23) 24 25const ( 26 charEntity = "&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});" 27 escapable = "[!\"#$%&'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]" 28) 29 30var ( 31 reBackslashOrAmp = regexp.MustCompile("[\\&]") 32 reEntityOrEscapedChar = regexp.MustCompile("(?i)\\\\" + escapable + "|" + charEntity) 33) 34 35// Parse block-level data. 36// Note: this function and many that it calls assume that 37// the input buffer ends with a newline. 38func (p *Markdown) block(data []byte) { 39 // this is called recursively: enforce a maximum depth 40 if p.nesting >= p.maxNesting { 41 return 42 } 43 p.nesting++ 44 45 // parse out one block-level construct at a time 46 for len(data) > 0 { 47 // prefixed heading: 48 // 49 // # Heading 1 50 // ## Heading 2 51 // ... 52 // ###### Heading 6 53 if p.isPrefixHeading(data) { 54 data = data[p.prefixHeading(data):] 55 continue 56 } 57 58 // block of preformatted HTML: 59 // 60 // <div> 61 // ... 62 // </div> 63 if data[0] == '<' { 64 if i := p.html(data, true); i > 0 { 65 data = data[i:] 66 continue 67 } 68 } 69 70 // title block 71 // 72 // % stuff 73 // % more stuff 74 // % even more stuff 75 if p.extensions&Titleblock != 0 { 76 if data[0] == '%' { 77 if i := p.titleBlock(data, true); i > 0 { 78 data = data[i:] 79 continue 80 } 81 } 82 } 83 84 // blank lines. note: returns the # of bytes to skip 85 if i := p.isEmpty(data); i > 0 { 86 data = data[i:] 87 continue 88 } 89 90 // indented code block: 91 // 92 // func max(a, b int) int { 93 // if a > b { 94 // return a 95 // } 96 // return b 97 // } 98 if p.codePrefix(data) > 0 { 99 data = data[p.code(data):] 100 continue 101 } 102 103 // fenced code block: 104 // 105 // ``` go 106 // func fact(n int) int { 107 // if n <= 1 { 108 // return n 109 // } 110 // return n * fact(n-1) 111 // } 112 // ``` 113 if p.extensions&FencedCode != 0 { 114 if i := p.fencedCodeBlock(data, true); i > 0 { 115 data = data[i:] 116 continue 117 } 118 } 119 120 // horizontal rule: 121 // 122 // ------ 123 // or 124 // ****** 125 // or 126 // ______ 127 if p.isHRule(data) { 128 p.addBlock(HorizontalRule, nil) 129 var i int 130 for i = 0; i < len(data) && data[i] != '\n'; i++ { 131 } 132 data = data[i:] 133 continue 134 } 135 136 // block quote: 137 // 138 // > A big quote I found somewhere 139 // > on the web 140 if p.quotePrefix(data) > 0 { 141 data = data[p.quote(data):] 142 continue 143 } 144 145 // table: 146 // 147 // Name | Age | Phone 148 // ------|-----|--------- 149 // Bob | 31 | 555-1234 150 // Alice | 27 | 555-4321 151 if p.extensions&Tables != 0 { 152 if i := p.table(data); i > 0 { 153 data = data[i:] 154 continue 155 } 156 } 157 158 // an itemized/unordered list: 159 // 160 // * Item 1 161 // * Item 2 162 // 163 // also works with + or - 164 if p.uliPrefix(data) > 0 { 165 data = data[p.list(data, 0):] 166 continue 167 } 168 169 // a numbered/ordered list: 170 // 171 // 1. Item 1 172 // 2. Item 2 173 if p.oliPrefix(data) > 0 { 174 data = data[p.list(data, ListTypeOrdered):] 175 continue 176 } 177 178 // definition lists: 179 // 180 // Term 1 181 // : Definition a 182 // : Definition b 183 // 184 // Term 2 185 // : Definition c 186 if p.extensions&DefinitionLists != 0 { 187 if p.dliPrefix(data) > 0 { 188 data = data[p.list(data, ListTypeDefinition):] 189 continue 190 } 191 } 192 193 // anything else must look like a normal paragraph 194 // note: this finds underlined headings, too 195 data = data[p.paragraph(data):] 196 } 197 198 p.nesting-- 199} 200 201func (p *Markdown) addBlock(typ NodeType, content []byte) *Node { 202 p.closeUnmatchedBlocks() 203 container := p.addChild(typ, 0) 204 container.content = content 205 return container 206} 207 208func (p *Markdown) isPrefixHeading(data []byte) bool { 209 if data[0] != '#' { 210 return false 211 } 212 213 if p.extensions&SpaceHeadings != 0 { 214 level := 0 215 for level < 6 && level < len(data) && data[level] == '#' { 216 level++ 217 } 218 if level == len(data) || data[level] != ' ' { 219 return false 220 } 221 } 222 return true 223} 224 225func (p *Markdown) prefixHeading(data []byte) int { 226 level := 0 227 for level < 6 && level < len(data) && data[level] == '#' { 228 level++ 229 } 230 i := skipChar(data, level, ' ') 231 end := skipUntilChar(data, i, '\n') 232 skip := end 233 id := "" 234 if p.extensions&HeadingIDs != 0 { 235 j, k := 0, 0 236 // find start/end of heading id 237 for j = i; j < end-1 && (data[j] != '{' || data[j+1] != '#'); j++ { 238 } 239 for k = j + 1; k < end && data[k] != '}'; k++ { 240 } 241 // extract heading id iff found 242 if j < end && k < end { 243 id = string(data[j+2 : k]) 244 end = j 245 skip = k + 1 246 for end > 0 && data[end-1] == ' ' { 247 end-- 248 } 249 } 250 } 251 for end > 0 && data[end-1] == '#' { 252 if isBackslashEscaped(data, end-1) { 253 break 254 } 255 end-- 256 } 257 for end > 0 && data[end-1] == ' ' { 258 end-- 259 } 260 if end > i { 261 if id == "" && p.extensions&AutoHeadingIDs != 0 { 262 id = sanitized_anchor_name.Create(string(data[i:end])) 263 } 264 block := p.addBlock(Heading, data[i:end]) 265 block.HeadingID = id 266 block.Level = level 267 } 268 return skip 269} 270 271func (p *Markdown) isUnderlinedHeading(data []byte) int { 272 // test of level 1 heading 273 if data[0] == '=' { 274 i := skipChar(data, 1, '=') 275 i = skipChar(data, i, ' ') 276 if i < len(data) && data[i] == '\n' { 277 return 1 278 } 279 return 0 280 } 281 282 // test of level 2 heading 283 if data[0] == '-' { 284 i := skipChar(data, 1, '-') 285 i = skipChar(data, i, ' ') 286 if i < len(data) && data[i] == '\n' { 287 return 2 288 } 289 return 0 290 } 291 292 return 0 293} 294 295func (p *Markdown) titleBlock(data []byte, doRender bool) int { 296 if data[0] != '%' { 297 return 0 298 } 299 splitData := bytes.Split(data, []byte("\n")) 300 var i int 301 for idx, b := range splitData { 302 if !bytes.HasPrefix(b, []byte("%")) { 303 i = idx // - 1 304 break 305 } 306 } 307 308 data = bytes.Join(splitData[0:i], []byte("\n")) 309 consumed := len(data) 310 data = bytes.TrimPrefix(data, []byte("% ")) 311 data = bytes.Replace(data, []byte("\n% "), []byte("\n"), -1) 312 block := p.addBlock(Heading, data) 313 block.Level = 1 314 block.IsTitleblock = true 315 316 return consumed 317} 318 319func (p *Markdown) html(data []byte, doRender bool) int { 320 var i, j int 321 322 // identify the opening tag 323 if data[0] != '<' { 324 return 0 325 } 326 curtag, tagfound := p.htmlFindTag(data[1:]) 327 328 // handle special cases 329 if !tagfound { 330 // check for an HTML comment 331 if size := p.htmlComment(data, doRender); size > 0 { 332 return size 333 } 334 335 // check for an <hr> tag 336 if size := p.htmlHr(data, doRender); size > 0 { 337 return size 338 } 339 340 // no special case recognized 341 return 0 342 } 343 344 // look for an unindented matching closing tag 345 // followed by a blank line 346 found := false 347 /* 348 closetag := []byte("\n</" + curtag + ">") 349 j = len(curtag) + 1 350 for !found { 351 // scan for a closing tag at the beginning of a line 352 if skip := bytes.Index(data[j:], closetag); skip >= 0 { 353 j += skip + len(closetag) 354 } else { 355 break 356 } 357 358 // see if it is the only thing on the line 359 if skip := p.isEmpty(data[j:]); skip > 0 { 360 // see if it is followed by a blank line/eof 361 j += skip 362 if j >= len(data) { 363 found = true 364 i = j 365 } else { 366 if skip := p.isEmpty(data[j:]); skip > 0 { 367 j += skip 368 found = true 369 i = j 370 } 371 } 372 } 373 } 374 */ 375 376 // if not found, try a second pass looking for indented match 377 // but not if tag is "ins" or "del" (following original Markdown.pl) 378 if !found && curtag != "ins" && curtag != "del" { 379 i = 1 380 for i < len(data) { 381 i++ 382 for i < len(data) && !(data[i-1] == '<' && data[i] == '/') { 383 i++ 384 } 385 386 if i+2+len(curtag) >= len(data) { 387 break 388 } 389 390 j = p.htmlFindEnd(curtag, data[i-1:]) 391 392 if j > 0 { 393 i += j - 1 394 found = true 395 break 396 } 397 } 398 } 399 400 if !found { 401 return 0 402 } 403 404 // the end of the block has been found 405 if doRender { 406 // trim newlines 407 end := i 408 for end > 0 && data[end-1] == '\n' { 409 end-- 410 } 411 finalizeHTMLBlock(p.addBlock(HTMLBlock, data[:end])) 412 } 413 414 return i 415} 416 417func finalizeHTMLBlock(block *Node) { 418 block.Literal = block.content 419 block.content = nil 420} 421 422// HTML comment, lax form 423func (p *Markdown) htmlComment(data []byte, doRender bool) int { 424 i := p.inlineHTMLComment(data) 425 // needs to end with a blank line 426 if j := p.isEmpty(data[i:]); j > 0 { 427 size := i + j 428 if doRender { 429 // trim trailing newlines 430 end := size 431 for end > 0 && data[end-1] == '\n' { 432 end-- 433 } 434 block := p.addBlock(HTMLBlock, data[:end]) 435 finalizeHTMLBlock(block) 436 } 437 return size 438 } 439 return 0 440} 441 442// HR, which is the only self-closing block tag considered 443func (p *Markdown) htmlHr(data []byte, doRender bool) int { 444 if len(data) < 4 { 445 return 0 446 } 447 if data[0] != '<' || (data[1] != 'h' && data[1] != 'H') || (data[2] != 'r' && data[2] != 'R') { 448 return 0 449 } 450 if data[3] != ' ' && data[3] != '/' && data[3] != '>' { 451 // not an <hr> tag after all; at least not a valid one 452 return 0 453 } 454 i := 3 455 for i < len(data) && data[i] != '>' && data[i] != '\n' { 456 i++ 457 } 458 if i < len(data) && data[i] == '>' { 459 i++ 460 if j := p.isEmpty(data[i:]); j > 0 { 461 size := i + j 462 if doRender { 463 // trim newlines 464 end := size 465 for end > 0 && data[end-1] == '\n' { 466 end-- 467 } 468 finalizeHTMLBlock(p.addBlock(HTMLBlock, data[:end])) 469 } 470 return size 471 } 472 } 473 return 0 474} 475 476func (p *Markdown) htmlFindTag(data []byte) (string, bool) { 477 i := 0 478 for i < len(data) && isalnum(data[i]) { 479 i++ 480 } 481 key := string(data[:i]) 482 if _, ok := blockTags[key]; ok { 483 return key, true 484 } 485 return "", false 486} 487 488func (p *Markdown) htmlFindEnd(tag string, data []byte) int { 489 // assume data[0] == '<' && data[1] == '/' already tested 490 if tag == "hr" { 491 return 2 492 } 493 // check if tag is a match 494 closetag := []byte("</" + tag + ">") 495 if !bytes.HasPrefix(data, closetag) { 496 return 0 497 } 498 i := len(closetag) 499 500 // check that the rest of the line is blank 501 skip := 0 502 if skip = p.isEmpty(data[i:]); skip == 0 { 503 return 0 504 } 505 i += skip 506 skip = 0 507 508 if i >= len(data) { 509 return i 510 } 511 512 if p.extensions&LaxHTMLBlocks != 0 { 513 return i 514 } 515 if skip = p.isEmpty(data[i:]); skip == 0 { 516 // following line must be blank 517 return 0 518 } 519 520 return i + skip 521} 522 523func (*Markdown) isEmpty(data []byte) int { 524 // it is okay to call isEmpty on an empty buffer 525 if len(data) == 0 { 526 return 0 527 } 528 529 var i int 530 for i = 0; i < len(data) && data[i] != '\n'; i++ { 531 if data[i] != ' ' && data[i] != '\t' { 532 return 0 533 } 534 } 535 if i < len(data) && data[i] == '\n' { 536 i++ 537 } 538 return i 539} 540 541func (*Markdown) isHRule(data []byte) bool { 542 i := 0 543 544 // skip up to three spaces 545 for i < 3 && data[i] == ' ' { 546 i++ 547 } 548 549 // look at the hrule char 550 if data[i] != '*' && data[i] != '-' && data[i] != '_' { 551 return false 552 } 553 c := data[i] 554 555 // the whole line must be the char or whitespace 556 n := 0 557 for i < len(data) && data[i] != '\n' { 558 switch { 559 case data[i] == c: 560 n++ 561 case data[i] != ' ': 562 return false 563 } 564 i++ 565 } 566 567 return n >= 3 568} 569 570// isFenceLine checks if there's a fence line (e.g., ``` or ``` go) at the beginning of data, 571// and returns the end index if so, or 0 otherwise. It also returns the marker found. 572// If info is not nil, it gets set to the syntax specified in the fence line. 573func isFenceLine(data []byte, info *string, oldmarker string) (end int, marker string) { 574 i, size := 0, 0 575 576 // skip up to three spaces 577 for i < len(data) && i < 3 && data[i] == ' ' { 578 i++ 579 } 580 581 // check for the marker characters: ~ or ` 582 if i >= len(data) { 583 return 0, "" 584 } 585 if data[i] != '~' && data[i] != '`' { 586 return 0, "" 587 } 588 589 c := data[i] 590 591 // the whole line must be the same char or whitespace 592 for i < len(data) && data[i] == c { 593 size++ 594 i++ 595 } 596 597 // the marker char must occur at least 3 times 598 if size < 3 { 599 return 0, "" 600 } 601 marker = string(data[i-size : i]) 602 603 // if this is the end marker, it must match the beginning marker 604 if oldmarker != "" && marker != oldmarker { 605 return 0, "" 606 } 607 608 // TODO(shurcooL): It's probably a good idea to simplify the 2 code paths here 609 // into one, always get the info string, and discard it if the caller doesn't care. 610 if info != nil { 611 infoLength := 0 612 i = skipChar(data, i, ' ') 613 614 if i >= len(data) { 615 if i == len(data) { 616 return i, marker 617 } 618 return 0, "" 619 } 620 621 infoStart := i 622 623 if data[i] == '{' { 624 i++ 625 infoStart++ 626 627 for i < len(data) && data[i] != '}' && data[i] != '\n' { 628 infoLength++ 629 i++ 630 } 631 632 if i >= len(data) || data[i] != '}' { 633 return 0, "" 634 } 635 636 // strip all whitespace at the beginning and the end 637 // of the {} block 638 for infoLength > 0 && isspace(data[infoStart]) { 639 infoStart++ 640 infoLength-- 641 } 642 643 for infoLength > 0 && isspace(data[infoStart+infoLength-1]) { 644 infoLength-- 645 } 646 i++ 647 i = skipChar(data, i, ' ') 648 } else { 649 for i < len(data) && !isverticalspace(data[i]) { 650 infoLength++ 651 i++ 652 } 653 } 654 655 *info = strings.TrimSpace(string(data[infoStart : infoStart+infoLength])) 656 } 657 658 if i == len(data) { 659 return i, marker 660 } 661 if i > len(data) || data[i] != '\n' { 662 return 0, "" 663 } 664 return i + 1, marker // Take newline into account. 665} 666 667// fencedCodeBlock returns the end index if data contains a fenced code block at the beginning, 668// or 0 otherwise. It writes to out if doRender is true, otherwise it has no side effects. 669// If doRender is true, a final newline is mandatory to recognize the fenced code block. 670func (p *Markdown) fencedCodeBlock(data []byte, doRender bool) int { 671 var info string 672 beg, marker := isFenceLine(data, &info, "") 673 if beg == 0 || beg >= len(data) { 674 return 0 675 } 676 677 var work bytes.Buffer 678 work.Write([]byte(info)) 679 work.WriteByte('\n') 680 681 for { 682 // safe to assume beg < len(data) 683 684 // check for the end of the code block 685 fenceEnd, _ := isFenceLine(data[beg:], nil, marker) 686 if fenceEnd != 0 { 687 beg += fenceEnd 688 break 689 } 690 691 // copy the current line 692 end := skipUntilChar(data, beg, '\n') + 1 693 694 // did we reach the end of the buffer without a closing marker? 695 if end >= len(data) { 696 return 0 697 } 698 699 // verbatim copy to the working buffer 700 if doRender { 701 work.Write(data[beg:end]) 702 } 703 beg = end 704 } 705 706 if doRender { 707 block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer 708 block.IsFenced = true 709 finalizeCodeBlock(block) 710 } 711 712 return beg 713} 714 715func unescapeChar(str []byte) []byte { 716 if str[0] == '\\' { 717 return []byte{str[1]} 718 } 719 return []byte(html.UnescapeString(string(str))) 720} 721 722func unescapeString(str []byte) []byte { 723 if reBackslashOrAmp.Match(str) { 724 return reEntityOrEscapedChar.ReplaceAllFunc(str, unescapeChar) 725 } 726 return str 727} 728 729func finalizeCodeBlock(block *Node) { 730 if block.IsFenced { 731 newlinePos := bytes.IndexByte(block.content, '\n') 732 firstLine := block.content[:newlinePos] 733 rest := block.content[newlinePos+1:] 734 block.Info = unescapeString(bytes.Trim(firstLine, "\n")) 735 block.Literal = rest 736 } else { 737 block.Literal = block.content 738 } 739 block.content = nil 740} 741 742func (p *Markdown) table(data []byte) int { 743 table := p.addBlock(Table, nil) 744 i, columns := p.tableHeader(data) 745 if i == 0 { 746 p.tip = table.Parent 747 table.Unlink() 748 return 0 749 } 750 751 p.addBlock(TableBody, nil) 752 753 for i < len(data) { 754 pipes, rowStart := 0, i 755 for ; i < len(data) && data[i] != '\n'; i++ { 756 if data[i] == '|' { 757 pipes++ 758 } 759 } 760 761 if pipes == 0 { 762 i = rowStart 763 break 764 } 765 766 // include the newline in data sent to tableRow 767 if i < len(data) && data[i] == '\n' { 768 i++ 769 } 770 p.tableRow(data[rowStart:i], columns, false) 771 } 772 773 return i 774} 775 776// check if the specified position is preceded by an odd number of backslashes 777func isBackslashEscaped(data []byte, i int) bool { 778 backslashes := 0 779 for i-backslashes-1 >= 0 && data[i-backslashes-1] == '\\' { 780 backslashes++ 781 } 782 return backslashes&1 == 1 783} 784 785func (p *Markdown) tableHeader(data []byte) (size int, columns []CellAlignFlags) { 786 i := 0 787 colCount := 1 788 for i = 0; i < len(data) && data[i] != '\n'; i++ { 789 if data[i] == '|' && !isBackslashEscaped(data, i) { 790 colCount++ 791 } 792 } 793 794 // doesn't look like a table header 795 if colCount == 1 { 796 return 797 } 798 799 // include the newline in the data sent to tableRow 800 j := i 801 if j < len(data) && data[j] == '\n' { 802 j++ 803 } 804 header := data[:j] 805 806 // column count ignores pipes at beginning or end of line 807 if data[0] == '|' { 808 colCount-- 809 } 810 if i > 2 && data[i-1] == '|' && !isBackslashEscaped(data, i-1) { 811 colCount-- 812 } 813 814 columns = make([]CellAlignFlags, colCount) 815 816 // move on to the header underline 817 i++ 818 if i >= len(data) { 819 return 820 } 821 822 if data[i] == '|' && !isBackslashEscaped(data, i) { 823 i++ 824 } 825 i = skipChar(data, i, ' ') 826 827 // each column header is of form: / *:?-+:? *|/ with # dashes + # colons >= 3 828 // and trailing | optional on last column 829 col := 0 830 for i < len(data) && data[i] != '\n' { 831 dashes := 0 832 833 if data[i] == ':' { 834 i++ 835 columns[col] |= TableAlignmentLeft 836 dashes++ 837 } 838 for i < len(data) && data[i] == '-' { 839 i++ 840 dashes++ 841 } 842 if i < len(data) && data[i] == ':' { 843 i++ 844 columns[col] |= TableAlignmentRight 845 dashes++ 846 } 847 for i < len(data) && data[i] == ' ' { 848 i++ 849 } 850 if i == len(data) { 851 return 852 } 853 // end of column test is messy 854 switch { 855 case dashes < 3: 856 // not a valid column 857 return 858 859 case data[i] == '|' && !isBackslashEscaped(data, i): 860 // marker found, now skip past trailing whitespace 861 col++ 862 i++ 863 for i < len(data) && data[i] == ' ' { 864 i++ 865 } 866 867 // trailing junk found after last column 868 if col >= colCount && i < len(data) && data[i] != '\n' { 869 return 870 } 871 872 case (data[i] != '|' || isBackslashEscaped(data, i)) && col+1 < colCount: 873 // something else found where marker was required 874 return 875 876 case data[i] == '\n': 877 // marker is optional for the last column 878 col++ 879 880 default: 881 // trailing junk found after last column 882 return 883 } 884 } 885 if col != colCount { 886 return 887 } 888 889 p.addBlock(TableHead, nil) 890 p.tableRow(header, columns, true) 891 size = i 892 if size < len(data) && data[size] == '\n' { 893 size++ 894 } 895 return 896} 897 898func (p *Markdown) tableRow(data []byte, columns []CellAlignFlags, header bool) { 899 p.addBlock(TableRow, nil) 900 i, col := 0, 0 901 902 if data[i] == '|' && !isBackslashEscaped(data, i) { 903 i++ 904 } 905 906 for col = 0; col < len(columns) && i < len(data); col++ { 907 for i < len(data) && data[i] == ' ' { 908 i++ 909 } 910 911 cellStart := i 912 913 for i < len(data) && (data[i] != '|' || isBackslashEscaped(data, i)) && data[i] != '\n' { 914 i++ 915 } 916 917 cellEnd := i 918 919 // skip the end-of-cell marker, possibly taking us past end of buffer 920 i++ 921 922 for cellEnd > cellStart && cellEnd-1 < len(data) && data[cellEnd-1] == ' ' { 923 cellEnd-- 924 } 925 926 cell := p.addBlock(TableCell, data[cellStart:cellEnd]) 927 cell.IsHeader = header 928 cell.Align = columns[col] 929 } 930 931 // pad it out with empty columns to get the right number 932 for ; col < len(columns); col++ { 933 cell := p.addBlock(TableCell, nil) 934 cell.IsHeader = header 935 cell.Align = columns[col] 936 } 937 938 // silently ignore rows with too many cells 939} 940 941// returns blockquote prefix length 942func (p *Markdown) quotePrefix(data []byte) int { 943 i := 0 944 for i < 3 && i < len(data) && data[i] == ' ' { 945 i++ 946 } 947 if i < len(data) && data[i] == '>' { 948 if i+1 < len(data) && data[i+1] == ' ' { 949 return i + 2 950 } 951 return i + 1 952 } 953 return 0 954} 955 956// blockquote ends with at least one blank line 957// followed by something without a blockquote prefix 958func (p *Markdown) terminateBlockquote(data []byte, beg, end int) bool { 959 if p.isEmpty(data[beg:]) <= 0 { 960 return false 961 } 962 if end >= len(data) { 963 return true 964 } 965 return p.quotePrefix(data[end:]) == 0 && p.isEmpty(data[end:]) == 0 966} 967 968// parse a blockquote fragment 969func (p *Markdown) quote(data []byte) int { 970 block := p.addBlock(BlockQuote, nil) 971 var raw bytes.Buffer 972 beg, end := 0, 0 973 for beg < len(data) { 974 end = beg 975 // Step over whole lines, collecting them. While doing that, check for 976 // fenced code and if one's found, incorporate it altogether, 977 // irregardless of any contents inside it 978 for end < len(data) && data[end] != '\n' { 979 if p.extensions&FencedCode != 0 { 980 if i := p.fencedCodeBlock(data[end:], false); i > 0 { 981 // -1 to compensate for the extra end++ after the loop: 982 end += i - 1 983 break 984 } 985 } 986 end++ 987 } 988 if end < len(data) && data[end] == '\n' { 989 end++ 990 } 991 if pre := p.quotePrefix(data[beg:]); pre > 0 { 992 // skip the prefix 993 beg += pre 994 } else if p.terminateBlockquote(data, beg, end) { 995 break 996 } 997 // this line is part of the blockquote 998 raw.Write(data[beg:end]) 999 beg = end 1000 } 1001 p.block(raw.Bytes()) 1002 p.finalize(block) 1003 return end 1004} 1005 1006// returns prefix length for block code 1007func (p *Markdown) codePrefix(data []byte) int { 1008 if len(data) >= 1 && data[0] == '\t' { 1009 return 1 1010 } 1011 if len(data) >= 4 && data[0] == ' ' && data[1] == ' ' && data[2] == ' ' && data[3] == ' ' { 1012 return 4 1013 } 1014 return 0 1015} 1016 1017func (p *Markdown) code(data []byte) int { 1018 var work bytes.Buffer 1019 1020 i := 0 1021 for i < len(data) { 1022 beg := i 1023 for i < len(data) && data[i] != '\n' { 1024 i++ 1025 } 1026 if i < len(data) && data[i] == '\n' { 1027 i++ 1028 } 1029 1030 blankline := p.isEmpty(data[beg:i]) > 0 1031 if pre := p.codePrefix(data[beg:i]); pre > 0 { 1032 beg += pre 1033 } else if !blankline { 1034 // non-empty, non-prefixed line breaks the pre 1035 i = beg 1036 break 1037 } 1038 1039 // verbatim copy to the working buffer 1040 if blankline { 1041 work.WriteByte('\n') 1042 } else { 1043 work.Write(data[beg:i]) 1044 } 1045 } 1046 1047 // trim all the \n off the end of work 1048 workbytes := work.Bytes() 1049 eol := len(workbytes) 1050 for eol > 0 && workbytes[eol-1] == '\n' { 1051 eol-- 1052 } 1053 if eol != len(workbytes) { 1054 work.Truncate(eol) 1055 } 1056 1057 work.WriteByte('\n') 1058 1059 block := p.addBlock(CodeBlock, work.Bytes()) // TODO: get rid of temp buffer 1060 block.IsFenced = false 1061 finalizeCodeBlock(block) 1062 1063 return i 1064} 1065 1066// returns unordered list item prefix 1067func (p *Markdown) uliPrefix(data []byte) int { 1068 i := 0 1069 // start with up to 3 spaces 1070 for i < len(data) && i < 3 && data[i] == ' ' { 1071 i++ 1072 } 1073 if i >= len(data)-1 { 1074 return 0 1075 } 1076 // need one of {'*', '+', '-'} followed by a space or a tab 1077 if (data[i] != '*' && data[i] != '+' && data[i] != '-') || 1078 (data[i+1] != ' ' && data[i+1] != '\t') { 1079 return 0 1080 } 1081 return i + 2 1082} 1083 1084// returns ordered list item prefix 1085func (p *Markdown) oliPrefix(data []byte) int { 1086 i := 0 1087 1088 // start with up to 3 spaces 1089 for i < 3 && i < len(data) && data[i] == ' ' { 1090 i++ 1091 } 1092 1093 // count the digits 1094 start := i 1095 for i < len(data) && data[i] >= '0' && data[i] <= '9' { 1096 i++ 1097 } 1098 if start == i || i >= len(data)-1 { 1099 return 0 1100 } 1101 1102 // we need >= 1 digits followed by a dot and a space or a tab 1103 if data[i] != '.' || !(data[i+1] == ' ' || data[i+1] == '\t') { 1104 return 0 1105 } 1106 return i + 2 1107} 1108 1109// returns definition list item prefix 1110func (p *Markdown) dliPrefix(data []byte) int { 1111 if len(data) < 2 { 1112 return 0 1113 } 1114 i := 0 1115 // need a ':' followed by a space or a tab 1116 if data[i] != ':' || !(data[i+1] == ' ' || data[i+1] == '\t') { 1117 return 0 1118 } 1119 for i < len(data) && data[i] == ' ' { 1120 i++ 1121 } 1122 return i + 2 1123} 1124 1125// parse ordered or unordered list block 1126func (p *Markdown) list(data []byte, flags ListType) int { 1127 i := 0 1128 flags |= ListItemBeginningOfList 1129 block := p.addBlock(List, nil) 1130 block.ListFlags = flags 1131 block.Tight = true 1132 1133 for i < len(data) { 1134 skip := p.listItem(data[i:], &flags) 1135 if flags&ListItemContainsBlock != 0 { 1136 block.ListData.Tight = false 1137 } 1138 i += skip 1139 if skip == 0 || flags&ListItemEndOfList != 0 { 1140 break 1141 } 1142 flags &= ^ListItemBeginningOfList 1143 } 1144 1145 above := block.Parent 1146 finalizeList(block) 1147 p.tip = above 1148 return i 1149} 1150 1151// Returns true if the list item is not the same type as its parent list 1152func (p *Markdown) listTypeChanged(data []byte, flags *ListType) bool { 1153 if p.dliPrefix(data) > 0 && *flags&ListTypeDefinition == 0 { 1154 return true 1155 } else if p.oliPrefix(data) > 0 && *flags&ListTypeOrdered == 0 { 1156 return true 1157 } else if p.uliPrefix(data) > 0 && (*flags&ListTypeOrdered != 0 || *flags&ListTypeDefinition != 0) { 1158 return true 1159 } 1160 return false 1161} 1162 1163// Returns true if block ends with a blank line, descending if needed 1164// into lists and sublists. 1165func endsWithBlankLine(block *Node) bool { 1166 // TODO: figure this out. Always false now. 1167 for block != nil { 1168 //if block.lastLineBlank { 1169 //return true 1170 //} 1171 t := block.Type 1172 if t == List || t == Item { 1173 block = block.LastChild 1174 } else { 1175 break 1176 } 1177 } 1178 return false 1179} 1180 1181func finalizeList(block *Node) { 1182 block.open = false 1183 item := block.FirstChild 1184 for item != nil { 1185 // check for non-final list item ending with blank line: 1186 if endsWithBlankLine(item) && item.Next != nil { 1187 block.ListData.Tight = false 1188 break 1189 } 1190 // recurse into children of list item, to see if there are spaces 1191 // between any of them: 1192 subItem := item.FirstChild 1193 for subItem != nil { 1194 if endsWithBlankLine(subItem) && (item.Next != nil || subItem.Next != nil) { 1195 block.ListData.Tight = false 1196 break 1197 } 1198 subItem = subItem.Next 1199 } 1200 item = item.Next 1201 } 1202} 1203 1204// Parse a single list item. 1205// Assumes initial prefix is already removed if this is a sublist. 1206func (p *Markdown) listItem(data []byte, flags *ListType) int { 1207 // keep track of the indentation of the first line 1208 itemIndent := 0 1209 if data[0] == '\t' { 1210 itemIndent += 4 1211 } else { 1212 for itemIndent < 3 && data[itemIndent] == ' ' { 1213 itemIndent++ 1214 } 1215 } 1216 1217 var bulletChar byte = '*' 1218 i := p.uliPrefix(data) 1219 if i == 0 { 1220 i = p.oliPrefix(data) 1221 } else { 1222 bulletChar = data[i-2] 1223 } 1224 if i == 0 { 1225 i = p.dliPrefix(data) 1226 // reset definition term flag 1227 if i > 0 { 1228 *flags &= ^ListTypeTerm 1229 } 1230 } 1231 if i == 0 { 1232 // if in definition list, set term flag and continue 1233 if *flags&ListTypeDefinition != 0 { 1234 *flags |= ListTypeTerm 1235 } else { 1236 return 0 1237 } 1238 } 1239 1240 // skip leading whitespace on first line 1241 for i < len(data) && data[i] == ' ' { 1242 i++ 1243 } 1244 1245 // find the end of the line 1246 line := i 1247 for i > 0 && i < len(data) && data[i-1] != '\n' { 1248 i++ 1249 } 1250 1251 // get working buffer 1252 var raw bytes.Buffer 1253 1254 // put the first line into the working buffer 1255 raw.Write(data[line:i]) 1256 line = i 1257 1258 // process the following lines 1259 containsBlankLine := false 1260 sublist := 0 1261 codeBlockMarker := "" 1262 1263gatherlines: 1264 for line < len(data) { 1265 i++ 1266 1267 // find the end of this line 1268 for i < len(data) && data[i-1] != '\n' { 1269 i++ 1270 } 1271 1272 // if it is an empty line, guess that it is part of this item 1273 // and move on to the next line 1274 if p.isEmpty(data[line:i]) > 0 { 1275 containsBlankLine = true 1276 line = i 1277 continue 1278 } 1279 1280 // calculate the indentation 1281 indent := 0 1282 indentIndex := 0 1283 if data[line] == '\t' { 1284 indentIndex++ 1285 indent += 4 1286 } else { 1287 for indent < 4 && line+indent < i && data[line+indent] == ' ' { 1288 indent++ 1289 indentIndex++ 1290 } 1291 } 1292 1293 chunk := data[line+indentIndex : i] 1294 1295 if p.extensions&FencedCode != 0 { 1296 // determine if in or out of codeblock 1297 // if in codeblock, ignore normal list processing 1298 _, marker := isFenceLine(chunk, nil, codeBlockMarker) 1299 if marker != "" { 1300 if codeBlockMarker == "" { 1301 // start of codeblock 1302 codeBlockMarker = marker 1303 } else { 1304 // end of codeblock. 1305 codeBlockMarker = "" 1306 } 1307 } 1308 // we are in a codeblock, write line, and continue 1309 if codeBlockMarker != "" || marker != "" { 1310 raw.Write(data[line+indentIndex : i]) 1311 line = i 1312 continue gatherlines 1313 } 1314 } 1315 1316 // evaluate how this line fits in 1317 switch { 1318 // is this a nested list item? 1319 case (p.uliPrefix(chunk) > 0 && !p.isHRule(chunk)) || 1320 p.oliPrefix(chunk) > 0 || 1321 p.dliPrefix(chunk) > 0: 1322 1323 // to be a nested list, it must be indented more 1324 // if not, it is either a different kind of list 1325 // or the next item in the same list 1326 if indent <= itemIndent { 1327 if p.listTypeChanged(chunk, flags) { 1328 *flags |= ListItemEndOfList 1329 } else if containsBlankLine { 1330 *flags |= ListItemContainsBlock 1331 } 1332 1333 break gatherlines 1334 } 1335 1336 if containsBlankLine { 1337 *flags |= ListItemContainsBlock 1338 } 1339 1340 // is this the first item in the nested list? 1341 if sublist == 0 { 1342 sublist = raw.Len() 1343 } 1344 1345 // is this a nested prefix heading? 1346 case p.isPrefixHeading(chunk): 1347 // if the heading is not indented, it is not nested in the list 1348 // and thus ends the list 1349 if containsBlankLine && indent < 4 { 1350 *flags |= ListItemEndOfList 1351 break gatherlines 1352 } 1353 *flags |= ListItemContainsBlock 1354 1355 // anything following an empty line is only part 1356 // of this item if it is indented 4 spaces 1357 // (regardless of the indentation of the beginning of the item) 1358 case containsBlankLine && indent < 4: 1359 if *flags&ListTypeDefinition != 0 && i < len(data)-1 { 1360 // is the next item still a part of this list? 1361 next := i 1362 for next < len(data) && data[next] != '\n' { 1363 next++ 1364 } 1365 for next < len(data)-1 && data[next] == '\n' { 1366 next++ 1367 } 1368 if i < len(data)-1 && data[i] != ':' && data[next] != ':' { 1369 *flags |= ListItemEndOfList 1370 } 1371 } else { 1372 *flags |= ListItemEndOfList 1373 } 1374 break gatherlines 1375 1376 // a blank line means this should be parsed as a block 1377 case containsBlankLine: 1378 raw.WriteByte('\n') 1379 *flags |= ListItemContainsBlock 1380 } 1381 1382 // if this line was preceded by one or more blanks, 1383 // re-introduce the blank into the buffer 1384 if containsBlankLine { 1385 containsBlankLine = false 1386 raw.WriteByte('\n') 1387 } 1388 1389 // add the line into the working buffer without prefix 1390 raw.Write(data[line+indentIndex : i]) 1391 1392 line = i 1393 } 1394 1395 rawBytes := raw.Bytes() 1396 1397 block := p.addBlock(Item, nil) 1398 block.ListFlags = *flags 1399 block.Tight = false 1400 block.BulletChar = bulletChar 1401 block.Delimiter = '.' // Only '.' is possible in Markdown, but ')' will also be possible in CommonMark 1402 1403 // render the contents of the list item 1404 if *flags&ListItemContainsBlock != 0 && *flags&ListTypeTerm == 0 { 1405 // intermediate render of block item, except for definition term 1406 if sublist > 0 { 1407 p.block(rawBytes[:sublist]) 1408 p.block(rawBytes[sublist:]) 1409 } else { 1410 p.block(rawBytes) 1411 } 1412 } else { 1413 // intermediate render of inline item 1414 if sublist > 0 { 1415 child := p.addChild(Paragraph, 0) 1416 child.content = rawBytes[:sublist] 1417 p.block(rawBytes[sublist:]) 1418 } else { 1419 child := p.addChild(Paragraph, 0) 1420 child.content = rawBytes 1421 } 1422 } 1423 return line 1424} 1425 1426// render a single paragraph that has already been parsed out 1427func (p *Markdown) renderParagraph(data []byte) { 1428 if len(data) == 0 { 1429 return 1430 } 1431 1432 // trim leading spaces 1433 beg := 0 1434 for data[beg] == ' ' { 1435 beg++ 1436 } 1437 1438 end := len(data) 1439 // trim trailing newline 1440 if data[len(data)-1] == '\n' { 1441 end-- 1442 } 1443 1444 // trim trailing spaces 1445 for end > beg && data[end-1] == ' ' { 1446 end-- 1447 } 1448 1449 p.addBlock(Paragraph, data[beg:end]) 1450} 1451 1452func (p *Markdown) paragraph(data []byte) int { 1453 // prev: index of 1st char of previous line 1454 // line: index of 1st char of current line 1455 // i: index of cursor/end of current line 1456 var prev, line, i int 1457 tabSize := TabSizeDefault 1458 if p.extensions&TabSizeEight != 0 { 1459 tabSize = TabSizeDouble 1460 } 1461 // keep going until we find something to mark the end of the paragraph 1462 for i < len(data) { 1463 // mark the beginning of the current line 1464 prev = line 1465 current := data[i:] 1466 line = i 1467 1468 // did we find a reference or a footnote? If so, end a paragraph 1469 // preceding it and report that we have consumed up to the end of that 1470 // reference: 1471 if refEnd := isReference(p, current, tabSize); refEnd > 0 { 1472 p.renderParagraph(data[:i]) 1473 return i + refEnd 1474 } 1475 1476 // did we find a blank line marking the end of the paragraph? 1477 if n := p.isEmpty(current); n > 0 { 1478 // did this blank line followed by a definition list item? 1479 if p.extensions&DefinitionLists != 0 { 1480 if i < len(data)-1 && data[i+1] == ':' { 1481 return p.list(data[prev:], ListTypeDefinition) 1482 } 1483 } 1484 1485 p.renderParagraph(data[:i]) 1486 return i + n 1487 } 1488 1489 // an underline under some text marks a heading, so our paragraph ended on prev line 1490 if i > 0 { 1491 if level := p.isUnderlinedHeading(current); level > 0 { 1492 // render the paragraph 1493 p.renderParagraph(data[:prev]) 1494 1495 // ignore leading and trailing whitespace 1496 eol := i - 1 1497 for prev < eol && data[prev] == ' ' { 1498 prev++ 1499 } 1500 for eol > prev && data[eol-1] == ' ' { 1501 eol-- 1502 } 1503 1504 id := "" 1505 if p.extensions&AutoHeadingIDs != 0 { 1506 id = sanitized_anchor_name.Create(string(data[prev:eol])) 1507 } 1508 1509 block := p.addBlock(Heading, data[prev:eol]) 1510 block.Level = level 1511 block.HeadingID = id 1512 1513 // find the end of the underline 1514 for i < len(data) && data[i] != '\n' { 1515 i++ 1516 } 1517 return i 1518 } 1519 } 1520 1521 // if the next line starts a block of HTML, then the paragraph ends here 1522 if p.extensions&LaxHTMLBlocks != 0 { 1523 if data[i] == '<' && p.html(current, false) > 0 { 1524 // rewind to before the HTML block 1525 p.renderParagraph(data[:i]) 1526 return i 1527 } 1528 } 1529 1530 // if there's a prefixed heading or a horizontal rule after this, paragraph is over 1531 if p.isPrefixHeading(current) || p.isHRule(current) { 1532 p.renderParagraph(data[:i]) 1533 return i 1534 } 1535 1536 // if there's a fenced code block, paragraph is over 1537 if p.extensions&FencedCode != 0 { 1538 if p.fencedCodeBlock(current, false) > 0 { 1539 p.renderParagraph(data[:i]) 1540 return i 1541 } 1542 } 1543 1544 // if there's a definition list item, prev line is a definition term 1545 if p.extensions&DefinitionLists != 0 { 1546 if p.dliPrefix(current) != 0 { 1547 ret := p.list(data[prev:], ListTypeDefinition) 1548 return ret 1549 } 1550 } 1551 1552 // if there's a list after this, paragraph is over 1553 if p.extensions&NoEmptyLineBeforeBlock != 0 { 1554 if p.uliPrefix(current) != 0 || 1555 p.oliPrefix(current) != 0 || 1556 p.quotePrefix(current) != 0 || 1557 p.codePrefix(current) != 0 { 1558 p.renderParagraph(data[:i]) 1559 return i 1560 } 1561 } 1562 1563 // otherwise, scan to the beginning of the next line 1564 nl := bytes.IndexByte(data[i:], '\n') 1565 if nl >= 0 { 1566 i += nl + 1 1567 } else { 1568 i += len(data[i:]) 1569 } 1570 } 1571 1572 p.renderParagraph(data[:i]) 1573 return i 1574} 1575 1576func skipChar(data []byte, start int, char byte) int { 1577 i := start 1578 for i < len(data) && data[i] == char { 1579 i++ 1580 } 1581 return i 1582} 1583 1584func skipUntilChar(text []byte, start int, char byte) int { 1585 i := start 1586 for i < len(text) && text[i] != char { 1587 i++ 1588 } 1589 return i 1590} 1591