1//
2// Blackfriday Markdown Processor
3// Available at http://github.com/russross/blackfriday
4//
5// Copyright © 2011 Russ Ross <russ@russross.com>.
6// Distributed under the Simplified BSD License.
7// See README.md for details.
8//
9
10//
11// Functions to parse inline elements.
12//
13
14package blackfriday
15
16import (
17	"bytes"
18	"regexp"
19	"strconv"
20)
21
22var (
23	urlRe    = `((https?|ftp):\/\/|\/)[-A-Za-z0-9+&@#\/%?=~_|!:,.;\(\)]+`
24	anchorRe = regexp.MustCompile(`^(<a\shref="` + urlRe + `"(\stitle="[^"<>]+")?\s?>` + urlRe + `<\/a>)`)
25)
26
27// Functions to parse text within a block
28// Each function returns the number of chars taken care of
29// data is the complete block being rendered
30// offset is the number of valid chars before the current cursor
31
32func (p *parser) inline(out *bytes.Buffer, data []byte) {
33	// this is called recursively: enforce a maximum depth
34	if p.nesting >= p.maxNesting {
35		return
36	}
37	p.nesting++
38
39	i, end := 0, 0
40	for i < len(data) {
41		// copy inactive chars into the output
42		for end < len(data) && p.inlineCallback[data[end]] == nil {
43			end++
44		}
45
46		p.r.NormalText(out, data[i:end])
47
48		if end >= len(data) {
49			break
50		}
51		i = end
52
53		// call the trigger
54		handler := p.inlineCallback[data[end]]
55		if consumed := handler(p, out, data, i); consumed == 0 {
56			// no action from the callback; buffer the byte for later
57			end = i + 1
58		} else {
59			// skip past whatever the callback used
60			i += consumed
61			end = i
62		}
63	}
64
65	p.nesting--
66}
67
68// single and double emphasis parsing
69func emphasis(p *parser, out *bytes.Buffer, data []byte, offset int) int {
70	data = data[offset:]
71	c := data[0]
72	ret := 0
73
74	if len(data) > 2 && data[1] != c {
75		// whitespace cannot follow an opening emphasis;
76		// strikethrough only takes two characters '~~'
77		if c == '~' || isspace(data[1]) {
78			return 0
79		}
80		if ret = helperEmphasis(p, out, data[1:], c); ret == 0 {
81			return 0
82		}
83
84		return ret + 1
85	}
86
87	if len(data) > 3 && data[1] == c && data[2] != c {
88		if isspace(data[2]) {
89			return 0
90		}
91		if ret = helperDoubleEmphasis(p, out, data[2:], c); ret == 0 {
92			return 0
93		}
94
95		return ret + 2
96	}
97
98	if len(data) > 4 && data[1] == c && data[2] == c && data[3] != c {
99		if c == '~' || isspace(data[3]) {
100			return 0
101		}
102		if ret = helperTripleEmphasis(p, out, data, 3, c); ret == 0 {
103			return 0
104		}
105
106		return ret + 3
107	}
108
109	return 0
110}
111
112func codeSpan(p *parser, out *bytes.Buffer, data []byte, offset int) int {
113	data = data[offset:]
114
115	nb := 0
116
117	// count the number of backticks in the delimiter
118	for nb < len(data) && data[nb] == '`' {
119		nb++
120	}
121
122	// find the next delimiter
123	i, end := 0, 0
124	for end = nb; end < len(data) && i < nb; end++ {
125		if data[end] == '`' {
126			i++
127		} else {
128			i = 0
129		}
130	}
131
132	// no matching delimiter?
133	if i < nb && end >= len(data) {
134		return 0
135	}
136
137	// trim outside whitespace
138	fBegin := nb
139	for fBegin < end && data[fBegin] == ' ' {
140		fBegin++
141	}
142
143	fEnd := end - nb
144	for fEnd > fBegin && data[fEnd-1] == ' ' {
145		fEnd--
146	}
147
148	// render the code span
149	if fBegin != fEnd {
150		p.r.CodeSpan(out, data[fBegin:fEnd])
151	}
152
153	return end
154
155}
156
157// newline preceded by two spaces becomes <br>
158// newline without two spaces works when EXTENSION_HARD_LINE_BREAK is enabled
159func lineBreak(p *parser, out *bytes.Buffer, data []byte, offset int) int {
160	// remove trailing spaces from out
161	outBytes := out.Bytes()
162	end := len(outBytes)
163	eol := end
164	for eol > 0 && outBytes[eol-1] == ' ' {
165		eol--
166	}
167	out.Truncate(eol)
168
169	precededByTwoSpaces := offset >= 2 && data[offset-2] == ' ' && data[offset-1] == ' '
170	precededByBackslash := offset >= 1 && data[offset-1] == '\\' // see http://spec.commonmark.org/0.18/#example-527
171	precededByBackslash = precededByBackslash && p.flags&EXTENSION_BACKSLASH_LINE_BREAK != 0
172
173	if p.flags&EXTENSION_JOIN_LINES != 0 {
174		return 1
175	}
176
177	// should there be a hard line break here?
178	if p.flags&EXTENSION_HARD_LINE_BREAK == 0 && !precededByTwoSpaces && !precededByBackslash {
179		return 0
180	}
181
182	if precededByBackslash && eol > 0 {
183		out.Truncate(eol - 1)
184	}
185	p.r.LineBreak(out)
186	return 1
187}
188
189type linkType int
190
191const (
192	linkNormal linkType = iota
193	linkImg
194	linkDeferredFootnote
195	linkInlineFootnote
196)
197
198func isReferenceStyleLink(data []byte, pos int, t linkType) bool {
199	if t == linkDeferredFootnote {
200		return false
201	}
202	return pos < len(data)-1 && data[pos] == '[' && data[pos+1] != '^'
203}
204
205// '[': parse a link or an image or a footnote
206func link(p *parser, out *bytes.Buffer, data []byte, offset int) int {
207	// no links allowed inside regular links, footnote, and deferred footnotes
208	if p.insideLink && (offset > 0 && data[offset-1] == '[' || len(data)-1 > offset && data[offset+1] == '^') {
209		return 0
210	}
211
212	var t linkType
213	switch {
214	// special case: ![^text] == deferred footnote (that follows something with
215	// an exclamation point)
216	case p.flags&EXTENSION_FOOTNOTES != 0 && len(data)-1 > offset && data[offset+1] == '^':
217		t = linkDeferredFootnote
218	// ![alt] == image
219	case offset > 0 && data[offset-1] == '!':
220		t = linkImg
221	// ^[text] == inline footnote
222	// [^refId] == deferred footnote
223	case p.flags&EXTENSION_FOOTNOTES != 0:
224		if offset > 0 && data[offset-1] == '^' {
225			t = linkInlineFootnote
226		} else if len(data)-1 > offset && data[offset+1] == '^' {
227			t = linkDeferredFootnote
228		}
229	// [text] == regular link
230	default:
231		t = linkNormal
232	}
233
234	data = data[offset:]
235
236	var (
237		i                       = 1
238		noteId                  int
239		title, link, altContent []byte
240		textHasNl               = false
241	)
242
243	if t == linkDeferredFootnote {
244		i++
245	}
246
247	brace := 0
248
249	// look for the matching closing bracket
250	for level := 1; level > 0 && i < len(data); i++ {
251		switch {
252		case data[i] == '\n':
253			textHasNl = true
254
255		case data[i-1] == '\\':
256			continue
257
258		case data[i] == '[':
259			level++
260
261		case data[i] == ']':
262			level--
263			if level <= 0 {
264				i-- // compensate for extra i++ in for loop
265			}
266		}
267	}
268
269	if i >= len(data) {
270		return 0
271	}
272
273	txtE := i
274	i++
275
276	// skip any amount of whitespace or newline
277	// (this is much more lax than original markdown syntax)
278	for i < len(data) && isspace(data[i]) {
279		i++
280	}
281
282	switch {
283	// inline style link
284	case i < len(data) && data[i] == '(':
285		// skip initial whitespace
286		i++
287
288		for i < len(data) && isspace(data[i]) {
289			i++
290		}
291
292		linkB := i
293
294		// look for link end: ' " ), check for new opening braces and take this
295		// into account, this may lead for overshooting and probably will require
296		// some fine-tuning.
297	findlinkend:
298		for i < len(data) {
299			switch {
300			case data[i] == '\\':
301				i += 2
302
303			case data[i] == '(':
304				brace++
305				i++
306
307			case data[i] == ')':
308				if brace <= 0 {
309					break findlinkend
310				}
311				brace--
312				i++
313
314			case data[i] == '\'' || data[i] == '"':
315				break findlinkend
316
317			default:
318				i++
319			}
320		}
321
322		if i >= len(data) {
323			return 0
324		}
325		linkE := i
326
327		// look for title end if present
328		titleB, titleE := 0, 0
329		if data[i] == '\'' || data[i] == '"' {
330			i++
331			titleB = i
332
333		findtitleend:
334			for i < len(data) {
335				switch {
336				case data[i] == '\\':
337					i += 2
338
339				case data[i] == ')':
340					break findtitleend
341
342				default:
343					i++
344				}
345			}
346
347			if i >= len(data) {
348				return 0
349			}
350
351			// skip whitespace after title
352			titleE = i - 1
353			for titleE > titleB && isspace(data[titleE]) {
354				titleE--
355			}
356
357			// check for closing quote presence
358			if data[titleE] != '\'' && data[titleE] != '"' {
359				titleB, titleE = 0, 0
360				linkE = i
361			}
362		}
363
364		// remove whitespace at the end of the link
365		for linkE > linkB && isspace(data[linkE-1]) {
366			linkE--
367		}
368
369		// remove optional angle brackets around the link
370		if data[linkB] == '<' {
371			linkB++
372		}
373		if data[linkE-1] == '>' {
374			linkE--
375		}
376
377		// build escaped link and title
378		if linkE > linkB {
379			link = data[linkB:linkE]
380		}
381
382		if titleE > titleB {
383			title = data[titleB:titleE]
384		}
385
386		i++
387
388	// reference style link
389	case isReferenceStyleLink(data, i, t):
390		var id []byte
391		altContentConsidered := false
392
393		// look for the id
394		i++
395		linkB := i
396		for i < len(data) && data[i] != ']' {
397			i++
398		}
399		if i >= len(data) {
400			return 0
401		}
402		linkE := i
403
404		// find the reference
405		if linkB == linkE {
406			if textHasNl {
407				var b bytes.Buffer
408
409				for j := 1; j < txtE; j++ {
410					switch {
411					case data[j] != '\n':
412						b.WriteByte(data[j])
413					case data[j-1] != ' ':
414						b.WriteByte(' ')
415					}
416				}
417
418				id = b.Bytes()
419			} else {
420				id = data[1:txtE]
421				altContentConsidered = true
422			}
423		} else {
424			id = data[linkB:linkE]
425		}
426
427		// find the reference with matching id
428		lr, ok := p.getRef(string(id))
429		if !ok {
430			return 0
431		}
432
433		// keep link and title from reference
434		link = lr.link
435		title = lr.title
436		if altContentConsidered {
437			altContent = lr.text
438		}
439		i++
440
441	// shortcut reference style link or reference or inline footnote
442	default:
443		var id []byte
444
445		// craft the id
446		if textHasNl {
447			var b bytes.Buffer
448
449			for j := 1; j < txtE; j++ {
450				switch {
451				case data[j] != '\n':
452					b.WriteByte(data[j])
453				case data[j-1] != ' ':
454					b.WriteByte(' ')
455				}
456			}
457
458			id = b.Bytes()
459		} else {
460			if t == linkDeferredFootnote {
461				id = data[2:txtE] // get rid of the ^
462			} else {
463				id = data[1:txtE]
464			}
465		}
466
467		if t == linkInlineFootnote {
468			// create a new reference
469			noteId = len(p.notes) + 1
470
471			var fragment []byte
472			if len(id) > 0 {
473				if len(id) < 16 {
474					fragment = make([]byte, len(id))
475				} else {
476					fragment = make([]byte, 16)
477				}
478				copy(fragment, slugify(id))
479			} else {
480				fragment = append([]byte("footnote-"), []byte(strconv.Itoa(noteId))...)
481			}
482
483			ref := &reference{
484				noteId:   noteId,
485				hasBlock: false,
486				link:     fragment,
487				title:    id,
488			}
489
490			p.notes = append(p.notes, ref)
491			p.notesRecord[string(ref.link)] = struct{}{}
492
493			link = ref.link
494			title = ref.title
495		} else {
496			// find the reference with matching id
497			lr, ok := p.getRef(string(id))
498			if !ok {
499				return 0
500			}
501
502			if t == linkDeferredFootnote && !p.isFootnote(lr) {
503				lr.noteId = len(p.notes) + 1
504				p.notes = append(p.notes, lr)
505				p.notesRecord[string(lr.link)] = struct{}{}
506			}
507
508			// keep link and title from reference
509			link = lr.link
510			// if inline footnote, title == footnote contents
511			title = lr.title
512			noteId = lr.noteId
513		}
514
515		// rewind the whitespace
516		i = txtE + 1
517	}
518
519	// build content: img alt is escaped, link content is parsed
520	var content bytes.Buffer
521	if txtE > 1 {
522		if t == linkImg {
523			content.Write(data[1:txtE])
524		} else {
525			// links cannot contain other links, so turn off link parsing temporarily
526			insideLink := p.insideLink
527			p.insideLink = true
528			p.inline(&content, data[1:txtE])
529			p.insideLink = insideLink
530		}
531	}
532
533	var uLink []byte
534	if t == linkNormal || t == linkImg {
535		if len(link) > 0 {
536			var uLinkBuf bytes.Buffer
537			unescapeText(&uLinkBuf, link)
538			uLink = uLinkBuf.Bytes()
539		}
540
541		// links need something to click on and somewhere to go
542		if len(uLink) == 0 || (t == linkNormal && content.Len() == 0) {
543			return 0
544		}
545	}
546
547	// call the relevant rendering function
548	switch t {
549	case linkNormal:
550		if len(altContent) > 0 {
551			p.r.Link(out, uLink, title, altContent)
552		} else {
553			p.r.Link(out, uLink, title, content.Bytes())
554		}
555
556	case linkImg:
557		outSize := out.Len()
558		outBytes := out.Bytes()
559		if outSize > 0 && outBytes[outSize-1] == '!' {
560			out.Truncate(outSize - 1)
561		}
562
563		p.r.Image(out, uLink, title, content.Bytes())
564
565	case linkInlineFootnote:
566		outSize := out.Len()
567		outBytes := out.Bytes()
568		if outSize > 0 && outBytes[outSize-1] == '^' {
569			out.Truncate(outSize - 1)
570		}
571
572		p.r.FootnoteRef(out, link, noteId)
573
574	case linkDeferredFootnote:
575		p.r.FootnoteRef(out, link, noteId)
576
577	default:
578		return 0
579	}
580
581	return i
582}
583
584func (p *parser) inlineHTMLComment(out *bytes.Buffer, data []byte) int {
585	if len(data) < 5 {
586		return 0
587	}
588	if data[0] != '<' || data[1] != '!' || data[2] != '-' || data[3] != '-' {
589		return 0
590	}
591	i := 5
592	// scan for an end-of-comment marker, across lines if necessary
593	for i < len(data) && !(data[i-2] == '-' && data[i-1] == '-' && data[i] == '>') {
594		i++
595	}
596	// no end-of-comment marker
597	if i >= len(data) {
598		return 0
599	}
600	return i + 1
601}
602
603// '<' when tags or autolinks are allowed
604func leftAngle(p *parser, out *bytes.Buffer, data []byte, offset int) int {
605	data = data[offset:]
606	altype := LINK_TYPE_NOT_AUTOLINK
607	end := tagLength(data, &altype)
608	if size := p.inlineHTMLComment(out, data); size > 0 {
609		end = size
610	}
611	if end > 2 {
612		if altype != LINK_TYPE_NOT_AUTOLINK {
613			var uLink bytes.Buffer
614			unescapeText(&uLink, data[1:end+1-2])
615			if uLink.Len() > 0 {
616				p.r.AutoLink(out, uLink.Bytes(), altype)
617			}
618		} else {
619			p.r.RawHtmlTag(out, data[:end])
620		}
621	}
622
623	return end
624}
625
626// '\\' backslash escape
627var escapeChars = []byte("\\`*_{}[]()#+-.!:|&<>~")
628
629func escape(p *parser, out *bytes.Buffer, data []byte, offset int) int {
630	data = data[offset:]
631
632	if len(data) > 1 {
633		if bytes.IndexByte(escapeChars, data[1]) < 0 {
634			return 0
635		}
636
637		p.r.NormalText(out, data[1:2])
638	}
639
640	return 2
641}
642
643func unescapeText(ob *bytes.Buffer, src []byte) {
644	i := 0
645	for i < len(src) {
646		org := i
647		for i < len(src) && src[i] != '\\' {
648			i++
649		}
650
651		if i > org {
652			ob.Write(src[org:i])
653		}
654
655		if i+1 >= len(src) {
656			break
657		}
658
659		ob.WriteByte(src[i+1])
660		i += 2
661	}
662}
663
664// '&' escaped when it doesn't belong to an entity
665// valid entities are assumed to be anything matching &#?[A-Za-z0-9]+;
666func entity(p *parser, out *bytes.Buffer, data []byte, offset int) int {
667	data = data[offset:]
668
669	end := 1
670
671	if end < len(data) && data[end] == '#' {
672		end++
673	}
674
675	for end < len(data) && isalnum(data[end]) {
676		end++
677	}
678
679	if end < len(data) && data[end] == ';' {
680		end++ // real entity
681	} else {
682		return 0 // lone '&'
683	}
684
685	p.r.Entity(out, data[:end])
686
687	return end
688}
689
690func linkEndsWithEntity(data []byte, linkEnd int) bool {
691	entityRanges := htmlEntity.FindAllIndex(data[:linkEnd], -1)
692	return entityRanges != nil && entityRanges[len(entityRanges)-1][1] == linkEnd
693}
694
695func autoLink(p *parser, out *bytes.Buffer, data []byte, offset int) int {
696	// quick check to rule out most false hits on ':'
697	if p.insideLink || len(data) < offset+3 || data[offset+1] != '/' || data[offset+2] != '/' {
698		return 0
699	}
700
701	// Now a more expensive check to see if we're not inside an anchor element
702	anchorStart := offset
703	offsetFromAnchor := 0
704	for anchorStart > 0 && data[anchorStart] != '<' {
705		anchorStart--
706		offsetFromAnchor++
707	}
708
709	anchorStr := anchorRe.Find(data[anchorStart:])
710	if anchorStr != nil {
711		out.Write(anchorStr[offsetFromAnchor:])
712		return len(anchorStr) - offsetFromAnchor
713	}
714
715	// scan backward for a word boundary
716	rewind := 0
717	for offset-rewind > 0 && rewind <= 7 && isletter(data[offset-rewind-1]) {
718		rewind++
719	}
720	if rewind > 6 { // longest supported protocol is "mailto" which has 6 letters
721		return 0
722	}
723
724	origData := data
725	data = data[offset-rewind:]
726
727	if !isSafeLink(data) {
728		return 0
729	}
730
731	linkEnd := 0
732	for linkEnd < len(data) && !isEndOfLink(data[linkEnd]) {
733		linkEnd++
734	}
735
736	// Skip punctuation at the end of the link
737	if (data[linkEnd-1] == '.' || data[linkEnd-1] == ',') && data[linkEnd-2] != '\\' {
738		linkEnd--
739	}
740
741	// But don't skip semicolon if it's a part of escaped entity:
742	if data[linkEnd-1] == ';' && data[linkEnd-2] != '\\' && !linkEndsWithEntity(data, linkEnd) {
743		linkEnd--
744	}
745
746	// See if the link finishes with a punctuation sign that can be closed.
747	var copen byte
748	switch data[linkEnd-1] {
749	case '"':
750		copen = '"'
751	case '\'':
752		copen = '\''
753	case ')':
754		copen = '('
755	case ']':
756		copen = '['
757	case '}':
758		copen = '{'
759	default:
760		copen = 0
761	}
762
763	if copen != 0 {
764		bufEnd := offset - rewind + linkEnd - 2
765
766		openDelim := 1
767
768		/* Try to close the final punctuation sign in this same line;
769		 * if we managed to close it outside of the URL, that means that it's
770		 * not part of the URL. If it closes inside the URL, that means it
771		 * is part of the URL.
772		 *
773		 * Examples:
774		 *
775		 *      foo http://www.pokemon.com/Pikachu_(Electric) bar
776		 *              => http://www.pokemon.com/Pikachu_(Electric)
777		 *
778		 *      foo (http://www.pokemon.com/Pikachu_(Electric)) bar
779		 *              => http://www.pokemon.com/Pikachu_(Electric)
780		 *
781		 *      foo http://www.pokemon.com/Pikachu_(Electric)) bar
782		 *              => http://www.pokemon.com/Pikachu_(Electric))
783		 *
784		 *      (foo http://www.pokemon.com/Pikachu_(Electric)) bar
785		 *              => foo http://www.pokemon.com/Pikachu_(Electric)
786		 */
787
788		for bufEnd >= 0 && origData[bufEnd] != '\n' && openDelim != 0 {
789			if origData[bufEnd] == data[linkEnd-1] {
790				openDelim++
791			}
792
793			if origData[bufEnd] == copen {
794				openDelim--
795			}
796
797			bufEnd--
798		}
799
800		if openDelim == 0 {
801			linkEnd--
802		}
803	}
804
805	// we were triggered on the ':', so we need to rewind the output a bit
806	if out.Len() >= rewind {
807		out.Truncate(len(out.Bytes()) - rewind)
808	}
809
810	var uLink bytes.Buffer
811	unescapeText(&uLink, data[:linkEnd])
812
813	if uLink.Len() > 0 {
814		p.r.AutoLink(out, uLink.Bytes(), LINK_TYPE_NORMAL)
815	}
816
817	return linkEnd - rewind
818}
819
820func isEndOfLink(char byte) bool {
821	return isspace(char) || char == '<'
822}
823
824var validUris = [][]byte{[]byte("http://"), []byte("https://"), []byte("ftp://"), []byte("mailto://")}
825var validPaths = [][]byte{[]byte("/"), []byte("./"), []byte("../")}
826
827func isSafeLink(link []byte) bool {
828	for _, path := range validPaths {
829		if len(link) >= len(path) && bytes.Equal(link[:len(path)], path) {
830			if len(link) == len(path) {
831				return true
832			} else if isalnum(link[len(path)]) {
833				return true
834			}
835		}
836	}
837
838	for _, prefix := range validUris {
839		// TODO: handle unicode here
840		// case-insensitive prefix test
841		if len(link) > len(prefix) && bytes.Equal(bytes.ToLower(link[:len(prefix)]), prefix) && isalnum(link[len(prefix)]) {
842			return true
843		}
844	}
845
846	return false
847}
848
849// return the length of the given tag, or 0 is it's not valid
850func tagLength(data []byte, autolink *int) int {
851	var i, j int
852
853	// a valid tag can't be shorter than 3 chars
854	if len(data) < 3 {
855		return 0
856	}
857
858	// begins with a '<' optionally followed by '/', followed by letter or number
859	if data[0] != '<' {
860		return 0
861	}
862	if data[1] == '/' {
863		i = 2
864	} else {
865		i = 1
866	}
867
868	if !isalnum(data[i]) {
869		return 0
870	}
871
872	// scheme test
873	*autolink = LINK_TYPE_NOT_AUTOLINK
874
875	// try to find the beginning of an URI
876	for i < len(data) && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-') {
877		i++
878	}
879
880	if i > 1 && i < len(data) && data[i] == '@' {
881		if j = isMailtoAutoLink(data[i:]); j != 0 {
882			*autolink = LINK_TYPE_EMAIL
883			return i + j
884		}
885	}
886
887	if i > 2 && i < len(data) && data[i] == ':' {
888		*autolink = LINK_TYPE_NORMAL
889		i++
890	}
891
892	// complete autolink test: no whitespace or ' or "
893	switch {
894	case i >= len(data):
895		*autolink = LINK_TYPE_NOT_AUTOLINK
896	case *autolink != 0:
897		j = i
898
899		for i < len(data) {
900			if data[i] == '\\' {
901				i += 2
902			} else if data[i] == '>' || data[i] == '\'' || data[i] == '"' || isspace(data[i]) {
903				break
904			} else {
905				i++
906			}
907
908		}
909
910		if i >= len(data) {
911			return 0
912		}
913		if i > j && data[i] == '>' {
914			return i + 1
915		}
916
917		// one of the forbidden chars has been found
918		*autolink = LINK_TYPE_NOT_AUTOLINK
919	}
920
921	// look for something looking like a tag end
922	for i < len(data) && data[i] != '>' {
923		i++
924	}
925	if i >= len(data) {
926		return 0
927	}
928	return i + 1
929}
930
931// look for the address part of a mail autolink and '>'
932// this is less strict than the original markdown e-mail address matching
933func isMailtoAutoLink(data []byte) int {
934	nb := 0
935
936	// address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@'
937	for i := 0; i < len(data); i++ {
938		if isalnum(data[i]) {
939			continue
940		}
941
942		switch data[i] {
943		case '@':
944			nb++
945
946		case '-', '.', '_':
947			// Do nothing.
948
949		case '>':
950			if nb == 1 {
951				return i + 1
952			} else {
953				return 0
954			}
955		default:
956			return 0
957		}
958	}
959
960	return 0
961}
962
963// look for the next emph char, skipping other constructs
964func helperFindEmphChar(data []byte, c byte) int {
965	i := 0
966
967	for i < len(data) {
968		for i < len(data) && data[i] != c && data[i] != '`' && data[i] != '[' {
969			i++
970		}
971		if i >= len(data) {
972			return 0
973		}
974		// do not count escaped chars
975		if i != 0 && data[i-1] == '\\' {
976			i++
977			continue
978		}
979		if data[i] == c {
980			return i
981		}
982
983		if data[i] == '`' {
984			// skip a code span
985			tmpI := 0
986			i++
987			for i < len(data) && data[i] != '`' {
988				if tmpI == 0 && data[i] == c {
989					tmpI = i
990				}
991				i++
992			}
993			if i >= len(data) {
994				return tmpI
995			}
996			i++
997		} else if data[i] == '[' {
998			// skip a link
999			tmpI := 0
1000			i++
1001			for i < len(data) && data[i] != ']' {
1002				if tmpI == 0 && data[i] == c {
1003					tmpI = i
1004				}
1005				i++
1006			}
1007			i++
1008			for i < len(data) && (data[i] == ' ' || data[i] == '\n') {
1009				i++
1010			}
1011			if i >= len(data) {
1012				return tmpI
1013			}
1014			if data[i] != '[' && data[i] != '(' { // not a link
1015				if tmpI > 0 {
1016					return tmpI
1017				} else {
1018					continue
1019				}
1020			}
1021			cc := data[i]
1022			i++
1023			for i < len(data) && data[i] != cc {
1024				if tmpI == 0 && data[i] == c {
1025					return i
1026				}
1027				i++
1028			}
1029			if i >= len(data) {
1030				return tmpI
1031			}
1032			i++
1033		}
1034	}
1035	return 0
1036}
1037
1038func helperEmphasis(p *parser, out *bytes.Buffer, data []byte, c byte) int {
1039	i := 0
1040
1041	// skip one symbol if coming from emph3
1042	if len(data) > 1 && data[0] == c && data[1] == c {
1043		i = 1
1044	}
1045
1046	for i < len(data) {
1047		length := helperFindEmphChar(data[i:], c)
1048		if length == 0 {
1049			return 0
1050		}
1051		i += length
1052		if i >= len(data) {
1053			return 0
1054		}
1055
1056		if i+1 < len(data) && data[i+1] == c {
1057			i++
1058			continue
1059		}
1060
1061		if data[i] == c && !isspace(data[i-1]) {
1062
1063			if p.flags&EXTENSION_NO_INTRA_EMPHASIS != 0 {
1064				if !(i+1 == len(data) || isspace(data[i+1]) || ispunct(data[i+1])) {
1065					continue
1066				}
1067			}
1068
1069			var work bytes.Buffer
1070			p.inline(&work, data[:i])
1071			p.r.Emphasis(out, work.Bytes())
1072			return i + 1
1073		}
1074	}
1075
1076	return 0
1077}
1078
1079func helperDoubleEmphasis(p *parser, out *bytes.Buffer, data []byte, c byte) int {
1080	i := 0
1081
1082	for i < len(data) {
1083		length := helperFindEmphChar(data[i:], c)
1084		if length == 0 {
1085			return 0
1086		}
1087		i += length
1088
1089		if i+1 < len(data) && data[i] == c && data[i+1] == c && i > 0 && !isspace(data[i-1]) {
1090			var work bytes.Buffer
1091			p.inline(&work, data[:i])
1092
1093			if work.Len() > 0 {
1094				// pick the right renderer
1095				if c == '~' {
1096					p.r.StrikeThrough(out, work.Bytes())
1097				} else {
1098					p.r.DoubleEmphasis(out, work.Bytes())
1099				}
1100			}
1101			return i + 2
1102		}
1103		i++
1104	}
1105	return 0
1106}
1107
1108func helperTripleEmphasis(p *parser, out *bytes.Buffer, data []byte, offset int, c byte) int {
1109	i := 0
1110	origData := data
1111	data = data[offset:]
1112
1113	for i < len(data) {
1114		length := helperFindEmphChar(data[i:], c)
1115		if length == 0 {
1116			return 0
1117		}
1118		i += length
1119
1120		// skip whitespace preceded symbols
1121		if data[i] != c || isspace(data[i-1]) {
1122			continue
1123		}
1124
1125		switch {
1126		case i+2 < len(data) && data[i+1] == c && data[i+2] == c:
1127			// triple symbol found
1128			var work bytes.Buffer
1129
1130			p.inline(&work, data[:i])
1131			if work.Len() > 0 {
1132				p.r.TripleEmphasis(out, work.Bytes())
1133			}
1134			return i + 3
1135		case (i+1 < len(data) && data[i+1] == c):
1136			// double symbol found, hand over to emph1
1137			length = helperEmphasis(p, out, origData[offset-2:], c)
1138			if length == 0 {
1139				return 0
1140			} else {
1141				return length - 2
1142			}
1143		default:
1144			// single symbol found, hand over to emph2
1145			length = helperDoubleEmphasis(p, out, origData[offset-1:], c)
1146			if length == 0 {
1147				return 0
1148			} else {
1149				return length - 1
1150			}
1151		}
1152	}
1153	return 0
1154}
1155