1package source
2
3import (
4	"bytes"
5	"io"
6	"regexp"
7	"strings"
8	"unicode"
9	"unicode/utf8"
10)
11
12// CommentToMarkdown converts comment text to formatted markdown.
13// The comment was prepared by DocReader,
14// so it is known not to have leading, trailing blank lines
15// nor to have trailing spaces at the end of lines.
16// The comment markers have already been removed.
17//
18// Each line is converted into a markdown line and empty lines are just converted to
19// newlines. Heading are prefixed with `### ` to make it a markdown heading.
20//
21// A span of indented lines retains a 4 space prefix block, with the common indent
22// prefix removed unless empty, in which case it will be converted to a newline.
23//
24// URLs in the comment text are converted into links.
25func CommentToMarkdown(text string) string {
26	buf := &bytes.Buffer{}
27	commentToMarkdown(buf, text)
28	return buf.String()
29}
30
31var (
32	mdNewline   = []byte("\n")
33	mdHeader    = []byte("### ")
34	mdIndent    = []byte("    ")
35	mdLinkStart = []byte("[")
36	mdLinkDiv   = []byte("](")
37	mdLinkEnd   = []byte(")")
38)
39
40func commentToMarkdown(w io.Writer, text string) {
41	isFirstLine := true
42	for _, b := range blocks(text) {
43		switch b.op {
44		case opPara:
45			if !isFirstLine {
46				w.Write(mdNewline)
47			}
48
49			for _, line := range b.lines {
50				emphasize(w, line, true)
51			}
52		case opHead:
53			if !isFirstLine {
54				w.Write(mdNewline)
55			}
56			w.Write(mdNewline)
57
58			for _, line := range b.lines {
59				w.Write(mdHeader)
60				commentEscape(w, line, true)
61				w.Write(mdNewline)
62			}
63		case opPre:
64			if !isFirstLine {
65				w.Write(mdNewline)
66			}
67			w.Write(mdNewline)
68
69			for _, line := range b.lines {
70				if isBlank(line) {
71					w.Write(mdNewline)
72				} else {
73					w.Write(mdIndent)
74					w.Write([]byte(line))
75					w.Write(mdNewline)
76				}
77			}
78		}
79		isFirstLine = false
80	}
81}
82
83const (
84	ulquo = "“"
85	urquo = "”"
86)
87
88var (
89	markdownEscape = regexp.MustCompile(`([\\\x60*{}[\]()#+\-.!_>~|"$%&'\/:;<=?@^])`)
90
91	unicodeQuoteReplacer = strings.NewReplacer("``", ulquo, "''", urquo)
92)
93
94// commentEscape escapes comment text for markdown. If nice is set,
95// also turn `` into “; and '' into ”;.
96func commentEscape(w io.Writer, text string, nice bool) {
97	if nice {
98		text = convertQuotes(text)
99	}
100	text = escapeRegex(text)
101	w.Write([]byte(text))
102}
103
104func convertQuotes(text string) string {
105	return unicodeQuoteReplacer.Replace(text)
106}
107
108func escapeRegex(text string) string {
109	return markdownEscape.ReplaceAllString(text, `\$1`)
110}
111
112func emphasize(w io.Writer, line string, nice bool) {
113	for {
114		m := matchRx.FindStringSubmatchIndex(line)
115		if m == nil {
116			break
117		}
118		// m >= 6 (two parenthesized sub-regexps in matchRx, 1st one is urlRx)
119
120		// write text before match
121		commentEscape(w, line[0:m[0]], nice)
122
123		// adjust match for URLs
124		match := line[m[0]:m[1]]
125		if strings.Contains(match, "://") {
126			m0, m1 := m[0], m[1]
127			for _, s := range []string{"()", "{}", "[]"} {
128				open, close := s[:1], s[1:] // E.g., "(" and ")"
129				// require opening parentheses before closing parentheses (#22285)
130				if i := strings.Index(match, close); i >= 0 && i < strings.Index(match, open) {
131					m1 = m0 + i
132					match = line[m0:m1]
133				}
134				// require balanced pairs of parentheses (#5043)
135				for i := 0; strings.Count(match, open) != strings.Count(match, close) && i < 10; i++ {
136					m1 = strings.LastIndexAny(line[:m1], s)
137					match = line[m0:m1]
138				}
139			}
140			if m1 != m[1] {
141				// redo matching with shortened line for correct indices
142				m = matchRx.FindStringSubmatchIndex(line[:m[0]+len(match)])
143			}
144		}
145
146		// Following code has been modified from go/doc since words is always
147		// nil. All html formatting has also been transformed into markdown formatting
148
149		// analyze match
150		url := ""
151		if m[2] >= 0 {
152			url = match
153		}
154
155		// write match
156		if len(url) > 0 {
157			w.Write(mdLinkStart)
158		}
159
160		commentEscape(w, match, nice)
161
162		if len(url) > 0 {
163			w.Write(mdLinkDiv)
164			w.Write([]byte(urlReplacer.Replace(url)))
165			w.Write(mdLinkEnd)
166		}
167
168		// advance
169		line = line[m[1]:]
170	}
171	commentEscape(w, line, nice)
172}
173
174// Everything from here on is a copy of go/doc/comment.go
175
176const (
177	// Regexp for Go identifiers
178	identRx = `[\pL_][\pL_0-9]*`
179
180	// Regexp for URLs
181	// Match parens, and check later for balance - see #5043, #22285
182	// Match .,:;?! within path, but not at end - see #18139, #16565
183	// This excludes some rare yet valid urls ending in common punctuation
184	// in order to allow sentences ending in URLs.
185
186	// protocol (required) e.g. http
187	protoPart = `(https?|ftp|file|gopher|mailto|nntp)`
188	// host (required) e.g. www.example.com or [::1]:8080
189	hostPart = `([a-zA-Z0-9_@\-.\[\]:]+)`
190	// path+query+fragment (optional) e.g. /path/index.html?q=foo#bar
191	pathPart = `([.,:;?!]*[a-zA-Z0-9$'()*+&#=@~_/\-\[\]%])*`
192
193	urlRx = protoPart + `://` + hostPart + pathPart
194)
195
196var (
197	matchRx     = regexp.MustCompile(`(` + urlRx + `)|(` + identRx + `)`)
198	urlReplacer = strings.NewReplacer(`(`, `\(`, `)`, `\)`)
199)
200
201func indentLen(s string) int {
202	i := 0
203	for i < len(s) && (s[i] == ' ' || s[i] == '\t') {
204		i++
205	}
206	return i
207}
208
209func isBlank(s string) bool {
210	return len(s) == 0 || (len(s) == 1 && s[0] == '\n')
211}
212
213func commonPrefix(a, b string) string {
214	i := 0
215	for i < len(a) && i < len(b) && a[i] == b[i] {
216		i++
217	}
218	return a[0:i]
219}
220
221func unindent(block []string) {
222	if len(block) == 0 {
223		return
224	}
225
226	// compute maximum common white prefix
227	prefix := block[0][0:indentLen(block[0])]
228	for _, line := range block {
229		if !isBlank(line) {
230			prefix = commonPrefix(prefix, line[0:indentLen(line)])
231		}
232	}
233	n := len(prefix)
234
235	// remove
236	for i, line := range block {
237		if !isBlank(line) {
238			block[i] = line[n:]
239		}
240	}
241}
242
243// heading returns the trimmed line if it passes as a section heading;
244// otherwise it returns the empty string.
245func heading(line string) string {
246	line = strings.TrimSpace(line)
247	if len(line) == 0 {
248		return ""
249	}
250
251	// a heading must start with an uppercase letter
252	r, _ := utf8.DecodeRuneInString(line)
253	if !unicode.IsLetter(r) || !unicode.IsUpper(r) {
254		return ""
255	}
256
257	// it must end in a letter or digit:
258	r, _ = utf8.DecodeLastRuneInString(line)
259	if !unicode.IsLetter(r) && !unicode.IsDigit(r) {
260		return ""
261	}
262
263	// exclude lines with illegal characters. we allow "(),"
264	if strings.ContainsAny(line, ";:!?+*/=[]{}_^°&§~%#@<\">\\") {
265		return ""
266	}
267
268	// allow "'" for possessive "'s" only
269	for b := line; ; {
270		i := strings.IndexRune(b, '\'')
271		if i < 0 {
272			break
273		}
274		if i+1 >= len(b) || b[i+1] != 's' || (i+2 < len(b) && b[i+2] != ' ') {
275			return "" // not followed by "s "
276		}
277		b = b[i+2:]
278	}
279
280	// allow "." when followed by non-space
281	for b := line; ; {
282		i := strings.IndexRune(b, '.')
283		if i < 0 {
284			break
285		}
286		if i+1 >= len(b) || b[i+1] == ' ' {
287			return "" // not followed by non-space
288		}
289		b = b[i+1:]
290	}
291
292	return line
293}
294
295type op int
296
297const (
298	opPara op = iota
299	opHead
300	opPre
301)
302
303type block struct {
304	op    op
305	lines []string
306}
307
308var nonAlphaNumRx = regexp.MustCompile(`[^a-zA-Z0-9]`)
309
310func anchorID(line string) string {
311	// Add a "hdr-" prefix to avoid conflicting with IDs used for package symbols.
312	return "hdr-" + nonAlphaNumRx.ReplaceAllString(line, "_")
313}
314
315func blocks(text string) []block {
316	var (
317		out  []block
318		para []string
319
320		lastWasBlank   = false
321		lastWasHeading = false
322	)
323
324	close := func() {
325		if para != nil {
326			out = append(out, block{opPara, para})
327			para = nil
328		}
329	}
330
331	lines := strings.SplitAfter(text, "\n")
332	unindent(lines)
333	for i := 0; i < len(lines); {
334		line := lines[i]
335		if isBlank(line) {
336			// close paragraph
337			close()
338			i++
339			lastWasBlank = true
340			continue
341		}
342		if indentLen(line) > 0 {
343			// close paragraph
344			close()
345
346			// count indented or blank lines
347			j := i + 1
348			for j < len(lines) && (isBlank(lines[j]) || indentLen(lines[j]) > 0) {
349				j++
350			}
351			// but not trailing blank lines
352			for j > i && isBlank(lines[j-1]) {
353				j--
354			}
355			pre := lines[i:j]
356			i = j
357
358			unindent(pre)
359
360			// put those lines in a pre block
361			out = append(out, block{opPre, pre})
362			lastWasHeading = false
363			continue
364		}
365
366		if lastWasBlank && !lastWasHeading && i+2 < len(lines) &&
367			isBlank(lines[i+1]) && !isBlank(lines[i+2]) && indentLen(lines[i+2]) == 0 {
368			// current line is non-blank, surrounded by blank lines
369			// and the next non-blank line is not indented: this
370			// might be a heading.
371			if head := heading(line); head != "" {
372				close()
373				out = append(out, block{opHead, []string{head}})
374				i += 2
375				lastWasHeading = true
376				continue
377			}
378		}
379
380		// open paragraph
381		lastWasBlank = false
382		lastWasHeading = false
383		para = append(para, lines[i])
384		i++
385	}
386	close()
387
388	return out
389}
390