1// 2// Blackfriday Markdown Processor 3// Available at http://github.com/russross/blackfriday 4// 5// Copyright © 2011 Russ Ross <russ@russross.com>. 6// Distributed under the Simplified BSD License. 7// See README.md for details. 8// 9 10// 11// Functions to parse inline elements. 12// 13 14package blackfriday 15 16import ( 17 "bytes" 18 "regexp" 19 "strconv" 20) 21 22var ( 23 urlRe = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+` 24 anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`) 25) 26 27// Functions to parse text within a block 28// Each function returns the number of chars taken care of 29// data is the complete block being rendered 30// offset is the number of valid chars before the current cursor 31 32func (p *parser) inline(out *bytes.Buffer, data []byte) { 33 // this is called recursively: enforce a maximum depth 34 if p.nesting >= p.maxNesting { 35 return 36 } 37 p.nesting++ 38 39 i, end := 0, 0 40 for i < len(data) { 41 // copy inactive chars into the output 42 for end < len(data) && p.inlineCallback[data[end]] == nil { 43 end++ 44 } 45 46 p.r.NormalText(out, data[i:end]) 47 48 if end >= len(data) { 49 break 50 } 51 i = end 52 53 // call the trigger 54 handler := p.inlineCallback[data[end]] 55 if consumed := handler(p, out, data, i); consumed == 0 { 56 // no action from the callback; buffer the byte for later 57 end = i + 1 58 } else { 59 // skip past whatever the callback used 60 i += consumed 61 end = i 62 } 63 } 64 65 p.nesting-- 66} 67 68// single and double emphasis parsing 69func emphasis(p *parser, out *bytes.Buffer, data []byte, offset int) int { 70 data = data[offset:] 71 c := data[0] 72 ret := 0 73 74 if len(data) > 2 && data[1] != c { 75 // whitespace cannot follow an opening emphasis; 76 // strikethrough only takes two characters '~~' 77 if c == '~' || isspace(data[1]) { 78 return 0 79 } 80 if ret = helperEmphasis(p, out, data[1:], c); ret == 0 { 81 return 0 82 } 83 84 return ret + 1 85 } 86 87 if len(data) > 3 && data[1] == c && data[2] != c { 88 if isspace(data[2]) { 89 return 0 90 } 91 if ret = helperDoubleEmphasis(p, out, data[2:], c); ret == 0 { 92 return 0 93 } 94 95 return ret + 2 96 } 97 98 if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c { 99 if c == '~' || isspace(data[3]) { 100 return 0 101 } 102 if ret = helperTripleEmphasis(p, out, data, 3, c); ret == 0 { 103 return 0 104 } 105 106 return ret + 3 107 } 108 109 return 0 110} 111 112func codeSpan(p *parser, out *bytes.Buffer, data []byte, offset int) int { 113 data = data[offset:] 114 115 nb := 0 116 117 // count the number of backticks in the delimiter 118 for nb < len(data) && data[nb] == '`' { 119 nb++ 120 } 121 122 // find the next delimiter 123 i, end := 0, 0 124 for end = nb; end < len(data) && i < nb; end++ { 125 if data[end] == '`' { 126 i++ 127 } else { 128 i = 0 129 } 130 } 131 132 // no matching delimiter? 133 if i < nb && end >= len(data) { 134 return 0 135 } 136 137 // trim outside whitespace 138 fBegin := nb 139 for fBegin < end && data[fBegin] == ' ' { 140 fBegin++ 141 } 142 143 fEnd := end - nb 144 for fEnd > fBegin && data[fEnd-1] == ' ' { 145 fEnd-- 146 } 147 148 // render the code span 149 if fBegin != fEnd { 150 p.r.CodeSpan(out, data[fBegin:fEnd]) 151 } 152 153 return end 154 155} 156 157// newline preceded by two spaces becomes <br> 158// newline without two spaces works when EXTENSION_HARD_LINE_BREAK is enabled 159func lineBreak(p *parser, out *bytes.Buffer, data []byte, offset int) int { 160 // remove trailing spaces from out 161 outBytes := out.Bytes() 162 end := len(outBytes) 163 eol := end 164 for eol > 0 && outBytes[eol-1] == ' ' { 165 eol-- 166 } 167 out.Truncate(eol) 168 169 precededByTwoSpaces := offset >= 2 && data[offset-2] == ' ' && data[offset-1] == ' ' 170 precededByBackslash := offset >= 1 && data[offset-1] == '\\' // see http://spec.commonmark.org/0.18/#example-527 171 precededByBackslash = precededByBackslash && p.flags&EXTENSION_BACKSLASH_LINE_BREAK != 0 172 173 if p.flags&EXTENSION_JOIN_LINES != 0 { 174 return 1 175 } 176 177 // should there be a hard line break here? 178 if p.flags&EXTENSION_HARD_LINE_BREAK == 0 && !precededByTwoSpaces && !precededByBackslash { 179 return 0 180 } 181 182 if precededByBackslash && eol > 0 { 183 out.Truncate(eol - 1) 184 } 185 p.r.LineBreak(out) 186 return 1 187} 188 189type linkType int 190 191const ( 192 linkNormal linkType = iota 193 linkImg 194 linkDeferredFootnote 195 linkInlineFootnote 196) 197 198func isReferenceStyleLink(data []byte, pos int, t linkType) bool { 199 if t == linkDeferredFootnote { 200 return false 201 } 202 return pos < len(data)-1 && data[pos] == '[' && data[pos+1] != '^' 203} 204 205// '[': parse a link or an image or a footnote 206func link(p *parser, out *bytes.Buffer, data []byte, offset int) int { 207 // no links allowed inside regular links, footnote, and deferred footnotes 208 if p.insideLink && (offset > 0 && data[offset-1] == '[' || len(data)-1 > offset && data[offset+1] == '^') { 209 return 0 210 } 211 212 var t linkType 213 switch { 214 // special case: ![^text] == deferred footnote (that follows something with 215 // an exclamation point) 216 case p.flags&EXTENSION_FOOTNOTES != 0 && len(data)-1 > offset && data[offset+1] == '^': 217 t = linkDeferredFootnote 218 // ![alt] == image 219 case offset > 0 && data[offset-1] == '!': 220 t = linkImg 221 // ^[text] == inline footnote 222 // [^refId] == deferred footnote 223 case p.flags&EXTENSION_FOOTNOTES != 0: 224 if offset > 0 && data[offset-1] == '^' { 225 t = linkInlineFootnote 226 } else if len(data)-1 > offset && data[offset+1] == '^' { 227 t = linkDeferredFootnote 228 } 229 // [text] == regular link 230 default: 231 t = linkNormal 232 } 233 234 data = data[offset:] 235 236 var ( 237 i = 1 238 noteId int 239 title, link, altContent []byte 240 textHasNl = false 241 ) 242 243 if t == linkDeferredFootnote { 244 i++ 245 } 246 247 brace := 0 248 249 // look for the matching closing bracket 250 for level := 1; level > 0 && i < len(data); i++ { 251 switch { 252 case data[i] == '\n': 253 textHasNl = true 254 255 case data[i-1] == '\\': 256 continue 257 258 case data[i] == '[': 259 level++ 260 261 case data[i] == ']': 262 level-- 263 if level <= 0 { 264 i-- // compensate for extra i++ in for loop 265 } 266 } 267 } 268 269 if i >= len(data) { 270 return 0 271 } 272 273 txtE := i 274 i++ 275 276 // skip any amount of whitespace or newline 277 // (this is much more lax than original markdown syntax) 278 for i < len(data) && isspace(data[i]) { 279 i++ 280 } 281 282 switch { 283 // inline style link 284 case i < len(data) && data[i] == '(': 285 // skip initial whitespace 286 i++ 287 288 for i < len(data) && isspace(data[i]) { 289 i++ 290 } 291 292 linkB := i 293 294 // look for link end: ' " ), check for new opening braces and take this 295 // into account, this may lead for overshooting and probably will require 296 // some fine-tuning. 297 findlinkend: 298 for i < len(data) { 299 switch { 300 case data[i] == '\\': 301 i += 2 302 303 case data[i] == '(': 304 brace++ 305 i++ 306 307 case data[i] == ')': 308 if brace <= 0 { 309 break findlinkend 310 } 311 brace-- 312 i++ 313 314 case data[i] == '\'' || data[i] == '"': 315 break findlinkend 316 317 default: 318 i++ 319 } 320 } 321 322 if i >= len(data) { 323 return 0 324 } 325 linkE := i 326 327 // look for title end if present 328 titleB, titleE := 0, 0 329 if data[i] == '\'' || data[i] == '"' { 330 i++ 331 titleB = i 332 333 findtitleend: 334 for i < len(data) { 335 switch { 336 case data[i] == '\\': 337 i += 2 338 339 case data[i] == ')': 340 break findtitleend 341 342 default: 343 i++ 344 } 345 } 346 347 if i >= len(data) { 348 return 0 349 } 350 351 // skip whitespace after title 352 titleE = i - 1 353 for titleE > titleB && isspace(data[titleE]) { 354 titleE-- 355 } 356 357 // check for closing quote presence 358 if data[titleE] != '\'' && data[titleE] != '"' { 359 titleB, titleE = 0, 0 360 linkE = i 361 } 362 } 363 364 // remove whitespace at the end of the link 365 for linkE > linkB && isspace(data[linkE-1]) { 366 linkE-- 367 } 368 369 // remove optional angle brackets around the link 370 if data[linkB] == '<' { 371 linkB++ 372 } 373 if data[linkE-1] == '>' { 374 linkE-- 375 } 376 377 // build escaped link and title 378 if linkE > linkB { 379 link = data[linkB:linkE] 380 } 381 382 if titleE > titleB { 383 title = data[titleB:titleE] 384 } 385 386 i++ 387 388 // reference style link 389 case isReferenceStyleLink(data, i, t): 390 var id []byte 391 altContentConsidered := false 392 393 // look for the id 394 i++ 395 linkB := i 396 for i < len(data) && data[i] != ']' { 397 i++ 398 } 399 if i >= len(data) { 400 return 0 401 } 402 linkE := i 403 404 // find the reference 405 if linkB == linkE { 406 if textHasNl { 407 var b bytes.Buffer 408 409 for j := 1; j < txtE; j++ { 410 switch { 411 case data[j] != '\n': 412 b.WriteByte(data[j]) 413 case data[j-1] != ' ': 414 b.WriteByte(' ') 415 } 416 } 417 418 id = b.Bytes() 419 } else { 420 id = data[1:txtE] 421 altContentConsidered = true 422 } 423 } else { 424 id = data[linkB:linkE] 425 } 426 427 // find the reference with matching id 428 lr, ok := p.getRef(string(id)) 429 if !ok { 430 return 0 431 } 432 433 // keep link and title from reference 434 link = lr.link 435 title = lr.title 436 if altContentConsidered { 437 altContent = lr.text 438 } 439 i++ 440 441 // shortcut reference style link or reference or inline footnote 442 default: 443 var id []byte 444 445 // craft the id 446 if textHasNl { 447 var b bytes.Buffer 448 449 for j := 1; j < txtE; j++ { 450 switch { 451 case data[j] != '\n': 452 b.WriteByte(data[j]) 453 case data[j-1] != ' ': 454 b.WriteByte(' ') 455 } 456 } 457 458 id = b.Bytes() 459 } else { 460 if t == linkDeferredFootnote { 461 id = data[2:txtE] // get rid of the ^ 462 } else { 463 id = data[1:txtE] 464 } 465 } 466 467 if t == linkInlineFootnote { 468 // create a new reference 469 noteId = len(p.notes) + 1 470 471 var fragment []byte 472 if len(id) > 0 { 473 if len(id) < 16 { 474 fragment = make([]byte, len(id)) 475 } else { 476 fragment = make([]byte, 16) 477 } 478 copy(fragment, slugify(id)) 479 } else { 480 fragment = append([]byte("footnote-"), []byte(strconv.Itoa(noteId))...) 481 } 482 483 ref := &reference{ 484 noteId: noteId, 485 hasBlock: false, 486 link: fragment, 487 title: id, 488 } 489 490 p.notes = append(p.notes, ref) 491 p.notesRecord[string(ref.link)] = struct{}{} 492 493 link = ref.link 494 title = ref.title 495 } else { 496 // find the reference with matching id 497 lr, ok := p.getRef(string(id)) 498 if !ok { 499 return 0 500 } 501 502 if t == linkDeferredFootnote && !p.isFootnote(lr) { 503 lr.noteId = len(p.notes) + 1 504 p.notes = append(p.notes, lr) 505 p.notesRecord[string(lr.link)] = struct{}{} 506 } 507 508 // keep link and title from reference 509 link = lr.link 510 // if inline footnote, title == footnote contents 511 title = lr.title 512 noteId = lr.noteId 513 } 514 515 // rewind the whitespace 516 i = txtE + 1 517 } 518 519 // build content: img alt is escaped, link content is parsed 520 var content bytes.Buffer 521 if txtE > 1 { 522 if t == linkImg { 523 content.Write(data[1:txtE]) 524 } else { 525 // links cannot contain other links, so turn off link parsing temporarily 526 insideLink := p.insideLink 527 p.insideLink = true 528 p.inline(&content, data[1:txtE]) 529 p.insideLink = insideLink 530 } 531 } 532 533 var uLink []byte 534 if t == linkNormal || t == linkImg { 535 if len(link) > 0 { 536 var uLinkBuf bytes.Buffer 537 unescapeText(&uLinkBuf, link) 538 uLink = uLinkBuf.Bytes() 539 } 540 541 // links need something to click on and somewhere to go 542 if len(uLink) == 0 || (t == linkNormal && content.Len() == 0) { 543 return 0 544 } 545 } 546 547 // call the relevant rendering function 548 switch t { 549 case linkNormal: 550 if len(altContent) > 0 { 551 p.r.Link(out, uLink, title, altContent) 552 } else { 553 p.r.Link(out, uLink, title, content.Bytes()) 554 } 555 556 case linkImg: 557 outSize := out.Len() 558 outBytes := out.Bytes() 559 if outSize > 0 && outBytes[outSize-1] == '!' { 560 out.Truncate(outSize - 1) 561 } 562 563 p.r.Image(out, uLink, title, content.Bytes()) 564 565 case linkInlineFootnote: 566 outSize := out.Len() 567 outBytes := out.Bytes() 568 if outSize > 0 && outBytes[outSize-1] == '^' { 569 out.Truncate(outSize - 1) 570 } 571 572 p.r.FootnoteRef(out, link, noteId) 573 574 case linkDeferredFootnote: 575 p.r.FootnoteRef(out, link, noteId) 576 577 default: 578 return 0 579 } 580 581 return i 582} 583 584func (p *parser) inlineHTMLComment(out *bytes.Buffer, data []byte) int { 585 if len(data) < 5 { 586 return 0 587 } 588 if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' { 589 return 0 590 } 591 i := 5 592 // scan for an end-of-comment marker, across lines if necessary 593 for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') { 594 i++ 595 } 596 // no end-of-comment marker 597 if i >= len(data) { 598 return 0 599 } 600 return i + 1 601} 602 603// '<' when tags or autolinks are allowed 604func leftAngle(p *parser, out *bytes.Buffer, data []byte, offset int) int { 605 data = data[offset:] 606 altype := LINK_TYPE_NOT_AUTOLINK 607 end := tagLength(data, &altype) 608 if size := p.inlineHTMLComment(out, data); size > 0 { 609 end = size 610 } 611 if end > 2 { 612 if altype != LINK_TYPE_NOT_AUTOLINK { 613 var uLink bytes.Buffer 614 unescapeText(&uLink, data[1:end+1-2]) 615 if uLink.Len() > 0 { 616 p.r.AutoLink(out, uLink.Bytes(), altype) 617 } 618 } else { 619 p.r.RawHtmlTag(out, data[:end]) 620 } 621 } 622 623 return end 624} 625 626// '\\' backslash escape 627var escapeChars = []byte("\\`*_{}[]()#+-.!:|&<>~") 628 629func escape(p *parser, out *bytes.Buffer, data []byte, offset int) int { 630 data = data[offset:] 631 632 if len(data) > 1 { 633 if bytes.IndexByte(escapeChars, data[1]) < 0 { 634 return 0 635 } 636 637 p.r.NormalText(out, data[1:2]) 638 } 639 640 return 2 641} 642 643func unescapeText(ob *bytes.Buffer, src []byte) { 644 i := 0 645 for i < len(src) { 646 org := i 647 for i < len(src) && src[i] != '\\' { 648 i++ 649 } 650 651 if i > org { 652 ob.Write(src[org:i]) 653 } 654 655 if i+1 >= len(src) { 656 break 657 } 658 659 ob.WriteByte(src[i+1]) 660 i += 2 661 } 662} 663 664// '&' escaped when it doesn't belong to an entity 665// valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; 666func entity(p *parser, out *bytes.Buffer, data []byte, offset int) int { 667 data = data[offset:] 668 669 end := 1 670 671 if end < len(data) && data[end] == '#' { 672 end++ 673 } 674 675 for end < len(data) && isalnum(data[end]) { 676 end++ 677 } 678 679 if end < len(data) && data[end] == ';' { 680 end++ // real entity 681 } else { 682 return 0 // lone '&' 683 } 684 685 p.r.Entity(out, data[:end]) 686 687 return end 688} 689 690func linkEndsWithEntity(data []byte, linkEnd int) bool { 691 entityRanges := htmlEntity.FindAllIndex(data[:linkEnd], -1) 692 return entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd 693} 694 695func autoLink(p *parser, out *bytes.Buffer, data []byte, offset int) int { 696 // quick check to rule out most false hits on ':' 697 if p.insideLink || len(data) < offset+3 || data[offset+1] != '/' || data[offset+2] != '/' { 698 return 0 699 } 700 701 // Now a more expensive check to see if we're not inside an anchor element 702 anchorStart := offset 703 offsetFromAnchor := 0 704 for anchorStart > 0 && data[anchorStart] != '<' { 705 anchorStart-- 706 offsetFromAnchor++ 707 } 708 709 anchorStr := anchorRe.Find(data[anchorStart:]) 710 if anchorStr != nil { 711 out.Write(anchorStr[offsetFromAnchor:]) 712 return len(anchorStr) - offsetFromAnchor 713 } 714 715 // scan backward for a word boundary 716 rewind := 0 717 for offset-rewind > 0 && rewind <= 7 && isletter(data[offset-rewind-1]) { 718 rewind++ 719 } 720 if rewind > 6 { // longest supported protocol is "mailto" which has 6 letters 721 return 0 722 } 723 724 origData := data 725 data = data[offset-rewind:] 726 727 if !isSafeLink(data) { 728 return 0 729 } 730 731 linkEnd := 0 732 for linkEnd < len(data) && !isEndOfLink(data[linkEnd]) { 733 linkEnd++ 734 } 735 736 // Skip punctuation at the end of the link 737 if (data[linkEnd-1] == '.' || data[linkEnd-1] == ',') && data[linkEnd-2] != '\\' { 738 linkEnd-- 739 } 740 741 // But don't skip semicolon if it's a part of escaped entity: 742 if data[linkEnd-1] == ';' && data[linkEnd-2] != '\\' && !linkEndsWithEntity(data, linkEnd) { 743 linkEnd-- 744 } 745 746 // See if the link finishes with a punctuation sign that can be closed. 747 var copen byte 748 switch data[linkEnd-1] { 749 case '"': 750 copen = '"' 751 case '\'': 752 copen = '\'' 753 case ')': 754 copen = '(' 755 case ']': 756 copen = '[' 757 case '}': 758 copen = '{' 759 default: 760 copen = 0 761 } 762 763 if copen != 0 { 764 bufEnd := offset - rewind + linkEnd - 2 765 766 openDelim := 1 767 768 /* Try to close the final punctuation sign in this same line; 769 * if we managed to close it outside of the URL, that means that it's 770 * not part of the URL. If it closes inside the URL, that means it 771 * is part of the URL. 772 * 773 * Examples: 774 * 775 * foo http://www.pokemon.com/Pikachu_(Electric) bar 776 * => http://www.pokemon.com/Pikachu_(Electric) 777 * 778 * foo (http://www.pokemon.com/Pikachu_(Electric)) bar 779 * => http://www.pokemon.com/Pikachu_(Electric) 780 * 781 * foo http://www.pokemon.com/Pikachu_(Electric)) bar 782 * => http://www.pokemon.com/Pikachu_(Electric)) 783 * 784 * (foo http://www.pokemon.com/Pikachu_(Electric)) bar 785 * => foo http://www.pokemon.com/Pikachu_(Electric) 786 */ 787 788 for bufEnd >= 0 && origData[bufEnd] != '\n' && openDelim != 0 { 789 if origData[bufEnd] == data[linkEnd-1] { 790 openDelim++ 791 } 792 793 if origData[bufEnd] == copen { 794 openDelim-- 795 } 796 797 bufEnd-- 798 } 799 800 if openDelim == 0 { 801 linkEnd-- 802 } 803 } 804 805 // we were triggered on the ':', so we need to rewind the output a bit 806 if out.Len() >= rewind { 807 out.Truncate(len(out.Bytes()) - rewind) 808 } 809 810 var uLink bytes.Buffer 811 unescapeText(&uLink, data[:linkEnd]) 812 813 if uLink.Len() > 0 { 814 p.r.AutoLink(out, uLink.Bytes(), LINK_TYPE_NORMAL) 815 } 816 817 return linkEnd - rewind 818} 819 820func isEndOfLink(char byte) bool { 821 return isspace(char) || char == '<' 822} 823 824var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")} 825var validPaths = [][]byte{[]byte("/"), []byte("./"), []byte("../")} 826 827func isSafeLink(link []byte) bool { 828 for _, path := range validPaths { 829 if len(link) >= len(path) && bytes.Equal(link[:len(path)], path) { 830 if len(link) == len(path) { 831 return true 832 } else if isalnum(link[len(path)]) { 833 return true 834 } 835 } 836 } 837 838 for _, prefix := range validUris { 839 // TODO: handle unicode here 840 // case-insensitive prefix test 841 if len(link) > len(prefix) && bytes.Equal(bytes.ToLower(link[:len(prefix)]), prefix) && isalnum(link[len(prefix)]) { 842 return true 843 } 844 } 845 846 return false 847} 848 849// return the length of the given tag, or 0 is it's not valid 850func tagLength(data []byte, autolink *int) int { 851 var i, j int 852 853 // a valid tag can't be shorter than 3 chars 854 if len(data) < 3 { 855 return 0 856 } 857 858 // begins with a '<' optionally followed by '/', followed by letter or number 859 if data[0] != '<' { 860 return 0 861 } 862 if data[1] == '/' { 863 i = 2 864 } else { 865 i = 1 866 } 867 868 if !isalnum(data[i]) { 869 return 0 870 } 871 872 // scheme test 873 *autolink = LINK_TYPE_NOT_AUTOLINK 874 875 // try to find the beginning of an URI 876 for i < len(data) && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') { 877 i++ 878 } 879 880 if i > 1 && i < len(data) && data[i] == '@' { 881 if j = isMailtoAutoLink(data[i:]); j != 0 { 882 *autolink = LINK_TYPE_EMAIL 883 return i + j 884 } 885 } 886 887 if i > 2 && i < len(data) && data[i] == ':' { 888 *autolink = LINK_TYPE_NORMAL 889 i++ 890 } 891 892 // complete autolink test: no whitespace or ' or " 893 switch { 894 case i >= len(data): 895 *autolink = LINK_TYPE_NOT_AUTOLINK 896 case *autolink != 0: 897 j = i 898 899 for i < len(data) { 900 if data[i] == '\\' { 901 i += 2 902 } else if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isspace(data[i]) { 903 break 904 } else { 905 i++ 906 } 907 908 } 909 910 if i >= len(data) { 911 return 0 912 } 913 if i > j && data[i] == '>' { 914 return i + 1 915 } 916 917 // one of the forbidden chars has been found 918 *autolink = LINK_TYPE_NOT_AUTOLINK 919 } 920 921 // look for something looking like a tag end 922 for i < len(data) && data[i] != '>' { 923 i++ 924 } 925 if i >= len(data) { 926 return 0 927 } 928 return i + 1 929} 930 931// look for the address part of a mail autolink and '>' 932// this is less strict than the original markdown e-mail address matching 933func isMailtoAutoLink(data []byte) int { 934 nb := 0 935 936 // address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' 937 for i := 0; i < len(data); i++ { 938 if isalnum(data[i]) { 939 continue 940 } 941 942 switch data[i] { 943 case '@': 944 nb++ 945 946 case '-', '.', '_': 947 // Do nothing. 948 949 case '>': 950 if nb == 1 { 951 return i + 1 952 } else { 953 return 0 954 } 955 default: 956 return 0 957 } 958 } 959 960 return 0 961} 962 963// look for the next emph char, skipping other constructs 964func helperFindEmphChar(data []byte, c byte) int { 965 i := 0 966 967 for i < len(data) { 968 for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' { 969 i++ 970 } 971 if i >= len(data) { 972 return 0 973 } 974 // do not count escaped chars 975 if i != 0 && data[i-1] == '\\' { 976 i++ 977 continue 978 } 979 if data[i] == c { 980 return i 981 } 982 983 if data[i] == '`' { 984 // skip a code span 985 tmpI := 0 986 i++ 987 for i < len(data) && data[i] != '`' { 988 if tmpI == 0 && data[i] == c { 989 tmpI = i 990 } 991 i++ 992 } 993 if i >= len(data) { 994 return tmpI 995 } 996 i++ 997 } else if data[i] == '[' { 998 // skip a link 999 tmpI := 0 1000 i++ 1001 for i < len(data) && data[i] != ']' { 1002 if tmpI == 0 && data[i] == c { 1003 tmpI = i 1004 } 1005 i++ 1006 } 1007 i++ 1008 for i < len(data) && (data[i] == ' ' || data[i] == '\n') { 1009 i++ 1010 } 1011 if i >= len(data) { 1012 return tmpI 1013 } 1014 if data[i] != '[' && data[i] != '(' { // not a link 1015 if tmpI > 0 { 1016 return tmpI 1017 } else { 1018 continue 1019 } 1020 } 1021 cc := data[i] 1022 i++ 1023 for i < len(data) && data[i] != cc { 1024 if tmpI == 0 && data[i] == c { 1025 return i 1026 } 1027 i++ 1028 } 1029 if i >= len(data) { 1030 return tmpI 1031 } 1032 i++ 1033 } 1034 } 1035 return 0 1036} 1037 1038func helperEmphasis(p *parser, out *bytes.Buffer, data []byte, c byte) int { 1039 i := 0 1040 1041 // skip one symbol if coming from emph3 1042 if len(data) > 1 && data[0] == c && data[1] == c { 1043 i = 1 1044 } 1045 1046 for i < len(data) { 1047 length := helperFindEmphChar(data[i:], c) 1048 if length == 0 { 1049 return 0 1050 } 1051 i += length 1052 if i >= len(data) { 1053 return 0 1054 } 1055 1056 if i+1 < len(data) && data[i+1] == c { 1057 i++ 1058 continue 1059 } 1060 1061 if data[i] == c && !isspace(data[i-1]) { 1062 1063 if p.flags&EXTENSION_NO_INTRA_EMPHASIS != 0 { 1064 if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) { 1065 continue 1066 } 1067 } 1068 1069 var work bytes.Buffer 1070 p.inline(&work, data[:i]) 1071 p.r.Emphasis(out, work.Bytes()) 1072 return i + 1 1073 } 1074 } 1075 1076 return 0 1077} 1078 1079func helperDoubleEmphasis(p *parser, out *bytes.Buffer, data []byte, c byte) int { 1080 i := 0 1081 1082 for i < len(data) { 1083 length := helperFindEmphChar(data[i:], c) 1084 if length == 0 { 1085 return 0 1086 } 1087 i += length 1088 1089 if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) { 1090 var work bytes.Buffer 1091 p.inline(&work, data[:i]) 1092 1093 if work.Len() > 0 { 1094 // pick the right renderer 1095 if c == '~' { 1096 p.r.StrikeThrough(out, work.Bytes()) 1097 } else { 1098 p.r.DoubleEmphasis(out, work.Bytes()) 1099 } 1100 } 1101 return i + 2 1102 } 1103 i++ 1104 } 1105 return 0 1106} 1107 1108func helperTripleEmphasis(p *parser, out *bytes.Buffer, data []byte, offset int, c byte) int { 1109 i := 0 1110 origData := data 1111 data = data[offset:] 1112 1113 for i < len(data) { 1114 length := helperFindEmphChar(data[i:], c) 1115 if length == 0 { 1116 return 0 1117 } 1118 i += length 1119 1120 // skip whitespace preceded symbols 1121 if data[i] != c || isspace(data[i-1]) { 1122 continue 1123 } 1124 1125 switch { 1126 case i+2 < len(data) && data[i+1] == c && data[i+2] == c: 1127 // triple symbol found 1128 var work bytes.Buffer 1129 1130 p.inline(&work, data[:i]) 1131 if work.Len() > 0 { 1132 p.r.TripleEmphasis(out, work.Bytes()) 1133 } 1134 return i + 3 1135 case (i+1 < len(data) && data[i+1] == c): 1136 // double symbol found, hand over to emph1 1137 length = helperEmphasis(p, out, origData[offset-2:], c) 1138 if length == 0 { 1139 return 0 1140 } else { 1141 return length - 2 1142 } 1143 default: 1144 // single symbol found, hand over to emph2 1145 length = helperDoubleEmphasis(p, out, origData[offset-1:], c) 1146 if length == 0 { 1147 return 0 1148 } else { 1149 return length - 1 1150 } 1151 } 1152 } 1153 return 0 1154} 1155