1// Copyright 2020 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Present2md converts legacy-syntax present files to Markdown-syntax present files.
6//
7// Usage:
8//
9//	present2md [-w] [file ...]
10//
11// By default, present2md prints the Markdown-syntax form of each input file to standard output.
12// If no input file is listed, standard input is used.
13//
14// The -w flag causes present2md to update the files in place, overwriting each with its
15// Markdown-syntax equivalent.
16//
17// Examples
18//
19//	present2md your.article
20//	present2md -w *.article
21//
22package main
23
24import (
25	"bytes"
26	"flag"
27	"fmt"
28	"io"
29	"io/ioutil"
30	"log"
31	"net/url"
32	"os"
33	"strings"
34	"unicode"
35	"unicode/utf8"
36
37	"golang.org/x/tools/present"
38)
39
40func usage() {
41	fmt.Fprintf(os.Stderr, "usage: present2md [-w] [file ...]\n")
42	os.Exit(2)
43}
44
45var (
46	writeBack  = flag.Bool("w", false, "write conversions back to original files")
47	exitStatus = 0
48)
49
50func main() {
51	log.SetPrefix("present2md: ")
52	log.SetFlags(0)
53	flag.Usage = usage
54	flag.Parse()
55
56	args := flag.Args()
57	if len(args) == 0 {
58		if *writeBack {
59			log.Fatalf("cannot use -w with standard input")
60		}
61		convert(os.Stdin, "stdin", false)
62		return
63	}
64
65	for _, arg := range args {
66		f, err := os.Open(arg)
67		if err != nil {
68			log.Print(err)
69			exitStatus = 1
70			continue
71		}
72		err = convert(f, arg, *writeBack)
73		f.Close()
74		if err != nil {
75			log.Print(err)
76			exitStatus = 1
77		}
78	}
79	os.Exit(exitStatus)
80}
81
82// convert reads the data from r, parses it as legacy present,
83// and converts it to Markdown-enabled present.
84// If any errors occur, the data is reported as coming from file.
85// If writeBack is true, the converted version is written back to file.
86// If writeBack is false, the converted version is printed to standard output.
87func convert(r io.Reader, file string, writeBack bool) error {
88	data, err := ioutil.ReadAll(r)
89	if err != nil {
90		return err
91	}
92	if bytes.HasPrefix(data, []byte("# ")) {
93		return fmt.Errorf("%v: already markdown", file)
94	}
95
96	// Convert all comments before parsing the document.
97	// The '//' comment is treated as normal text and so
98	// is passed through the translation unaltered.
99	data = bytes.Replace(data, []byte("\n#"), []byte("\n//"), -1)
100
101	doc, err := present.Parse(bytes.NewReader(data), file, 0)
102	if err != nil {
103		return err
104	}
105
106	// Title and Subtitle, Time, Tags.
107	var md bytes.Buffer
108	fmt.Fprintf(&md, "# %s\n", doc.Title)
109	if doc.Subtitle != "" {
110		fmt.Fprintf(&md, "%s\n", doc.Subtitle)
111	}
112	if !doc.Time.IsZero() {
113		fmt.Fprintf(&md, "%s\n", doc.Time.Format("2 Jan 2006"))
114	}
115	if len(doc.Tags) > 0 {
116		fmt.Fprintf(&md, "Tags: %s\n", strings.Join(doc.Tags, ", "))
117	}
118
119	// Summary, defaulting to first paragraph of section.
120	// (Summaries must be explicit for Markdown-enabled present,
121	// and the expectation is that they will be shorter than the
122	// whole first paragraph. But this is what the blog does today.)
123	if strings.HasSuffix(file, ".article") && len(doc.Sections) > 0 {
124		for _, elem := range doc.Sections[0].Elem {
125			text, ok := elem.(present.Text)
126			if !ok || text.Pre {
127				// skip everything but non-text elements
128				continue
129			}
130			fmt.Fprintf(&md, "Summary:")
131			for i, line := range text.Lines {
132				fmt.Fprintf(&md, " ")
133				printStyled(&md, line, i == 0)
134			}
135			fmt.Fprintf(&md, "\n")
136			break
137		}
138	}
139
140	// Authors
141	for _, a := range doc.Authors {
142		fmt.Fprintf(&md, "\n")
143		for _, elem := range a.Elem {
144			switch elem := elem.(type) {
145			default:
146				// Can only happen if this type switch is incomplete, which is a bug.
147				log.Fatalf("%s: unexpected author type %T", file, elem)
148			case present.Text:
149				for _, line := range elem.Lines {
150					fmt.Fprintf(&md, "%s\n", markdownEscape(line, true))
151				}
152			case present.Link:
153				fmt.Fprintf(&md, "%s\n", markdownEscape(elem.Label, true))
154			}
155		}
156	}
157
158	// Invariant: the output ends in non-blank line now,
159	// and after printing any piece of the file below,
160	// the output should still end in a non-blank line.
161	// If a blank line separator is needed, it should be printed
162	// before the block that needs separating, not after.
163
164	if len(doc.TitleNotes) > 0 {
165		fmt.Fprintf(&md, "\n")
166		for _, line := range doc.TitleNotes {
167			fmt.Fprintf(&md, ": %s\n", line)
168		}
169	}
170
171	if len(doc.Sections) == 1 && strings.HasSuffix(file, ".article") {
172		// Blog drops section headers when there is only one section.
173		// Don't print a title in this case, to make clear that it's being dropped.
174		fmt.Fprintf(&md, "\n##\n")
175		printSectionBody(file, 1, &md, doc.Sections[0].Elem)
176	} else {
177		for _, s := range doc.Sections {
178			fmt.Fprintf(&md, "\n")
179			fmt.Fprintf(&md, "## %s\n", markdownEscape(s.Title, false))
180			printSectionBody(file, 1, &md, s.Elem)
181		}
182	}
183
184	if !writeBack {
185		os.Stdout.Write(md.Bytes())
186		return nil
187	}
188	return ioutil.WriteFile(file, md.Bytes(), 0666)
189}
190
191func printSectionBody(file string, depth int, w *bytes.Buffer, elems []present.Elem) {
192	for _, elem := range elems {
193		switch elem := elem.(type) {
194		default:
195			// Can only happen if this type switch is incomplete, which is a bug.
196			log.Fatalf("%s: unexpected present element type %T", file, elem)
197
198		case present.Text:
199			fmt.Fprintf(w, "\n")
200			lines := elem.Lines
201			for len(lines) > 0 && lines[0] == "" {
202				lines = lines[1:]
203			}
204			if elem.Pre {
205				for _, line := range strings.Split(strings.TrimRight(elem.Raw, "\n"), "\n") {
206					if line == "" {
207						fmt.Fprintf(w, "\n")
208					} else {
209						fmt.Fprintf(w, "\t%s\n", line)
210					}
211				}
212			} else {
213				for _, line := range elem.Lines {
214					printStyled(w, line, true)
215					fmt.Fprintf(w, "\n")
216				}
217			}
218
219		case present.List:
220			fmt.Fprintf(w, "\n")
221			for _, item := range elem.Bullet {
222				fmt.Fprintf(w, "  - ")
223				for i, line := range strings.Split(item, "\n") {
224					if i > 0 {
225						fmt.Fprintf(w, "    ")
226					}
227					printStyled(w, line, false)
228					fmt.Fprintf(w, "\n")
229				}
230			}
231
232		case present.Section:
233			fmt.Fprintf(w, "\n")
234			sep := " "
235			if elem.Title == "" {
236				sep = ""
237			}
238			fmt.Fprintf(w, "%s%s%s\n", strings.Repeat("#", depth+2), sep, markdownEscape(elem.Title, false))
239			printSectionBody(file, depth+1, w, elem.Elem)
240
241		case interface{ PresentCmd() string }:
242			// If there are multiple present commands in a row, don't print a blank line before the second etc.
243			b := w.Bytes()
244			sep := "\n"
245			if len(b) > 0 {
246				i := bytes.LastIndexByte(b[:len(b)-1], '\n')
247				if b[i+1] == '.' {
248					sep = ""
249				}
250			}
251			fmt.Fprintf(w, "%s%s\n", sep, elem.PresentCmd())
252		}
253	}
254}
255
256func markdownEscape(s string, startLine bool) string {
257	var b strings.Builder
258	for i, r := range s {
259		switch {
260		case r == '#' && i == 0,
261			r == '*',
262			r == '_',
263			r == '<' && (i == 0 || s[i-1] != ' ') && i+1 < len(s) && s[i+1] != ' ',
264			r == '[' && strings.Contains(s[i:], "]("):
265			b.WriteRune('\\')
266		}
267		b.WriteRune(r)
268	}
269	return b.String()
270}
271
272// Copy of ../../present/style.go adjusted to produce Markdown instead of HTML.
273
274/*
275	Fonts are demarcated by an initial and final char bracketing a
276	space-delimited word, plus possibly some terminal punctuation.
277	The chars are
278		_ for italic
279		* for bold
280		` (back quote) for fixed width.
281	Inner appearances of the char become spaces. For instance,
282		_this_is_italic_!
283	becomes
284		<i>this is italic</i>!
285*/
286
287func printStyled(w *bytes.Buffer, text string, startLine bool) {
288	w.WriteString(font(text, startLine))
289}
290
291// font returns s with font indicators turned into HTML font tags.
292func font(s string, startLine bool) string {
293	if !strings.ContainsAny(s, "[`_*") {
294		return markdownEscape(s, startLine)
295	}
296	words := split(s)
297	var b bytes.Buffer
298Word:
299	for w, word := range words {
300		words[w] = markdownEscape(word, startLine && w == 0) // for all the continue Word
301		if len(word) < 2 {
302			continue Word
303		}
304		if link, _ := parseInlineLink(word); link != "" {
305			words[w] = link
306			continue Word
307		}
308		const marker = "_*`"
309		// Initial punctuation is OK but must be peeled off.
310		first := strings.IndexAny(word, marker)
311		if first == -1 {
312			continue Word
313		}
314		// Opening marker must be at the beginning of the token or else preceded by punctuation.
315		if first != 0 {
316			r, _ := utf8.DecodeLastRuneInString(word[:first])
317			if !unicode.IsPunct(r) {
318				continue Word
319			}
320		}
321		open, word := markdownEscape(word[:first], startLine && w == 0), word[first:]
322		char := word[0] // ASCII is OK.
323		close := ""
324		switch char {
325		default:
326			continue Word
327		case '_':
328			open += "_"
329			close = "_"
330		case '*':
331			open += "**"
332			close = "**"
333		case '`':
334			open += "`"
335			close = "`"
336		}
337		// Closing marker must be at the end of the token or else followed by punctuation.
338		last := strings.LastIndex(word, word[:1])
339		if last == 0 {
340			continue Word
341		}
342		if last+1 != len(word) {
343			r, _ := utf8.DecodeRuneInString(word[last+1:])
344			if !unicode.IsPunct(r) {
345				continue Word
346			}
347		}
348		head, tail := word[:last+1], word[last+1:]
349		b.Reset()
350		var wid int
351		for i := 1; i < len(head)-1; i += wid {
352			var r rune
353			r, wid = utf8.DecodeRuneInString(head[i:])
354			if r != rune(char) {
355				// Ordinary character.
356				b.WriteRune(r)
357				continue
358			}
359			if head[i+1] != char {
360				// Inner char becomes space.
361				b.WriteRune(' ')
362				continue
363			}
364			// Doubled char becomes real char.
365			// Not worth worrying about "_x__".
366			b.WriteByte(char)
367			wid++ // Consumed two chars, both ASCII.
368		}
369		text := b.String()
370		if close == "`" {
371			for strings.Contains(text, close) {
372				open += "`"
373				close += "`"
374			}
375		} else {
376			text = markdownEscape(text, false)
377		}
378		words[w] = open + text + close + tail
379	}
380	return strings.Join(words, "")
381}
382
383// split is like strings.Fields but also returns the runs of spaces
384// and treats inline links as distinct words.
385func split(s string) []string {
386	var (
387		words = make([]string, 0, 10)
388		start = 0
389	)
390
391	// appendWord appends the string s[start:end] to the words slice.
392	// If the word contains the beginning of a link, the non-link portion
393	// of the word and the entire link are appended as separate words,
394	// and the start index is advanced to the end of the link.
395	appendWord := func(end int) {
396		if j := strings.Index(s[start:end], "[["); j > -1 {
397			if _, l := parseInlineLink(s[start+j:]); l > 0 {
398				// Append portion before link, if any.
399				if j > 0 {
400					words = append(words, s[start:start+j])
401				}
402				// Append link itself.
403				words = append(words, s[start+j:start+j+l])
404				// Advance start index to end of link.
405				start = start + j + l
406				return
407			}
408		}
409		// No link; just add the word.
410		words = append(words, s[start:end])
411		start = end
412	}
413
414	wasSpace := false
415	for i, r := range s {
416		isSpace := unicode.IsSpace(r)
417		if i > start && isSpace != wasSpace {
418			appendWord(i)
419		}
420		wasSpace = isSpace
421	}
422	for start < len(s) {
423		appendWord(len(s))
424	}
425	return words
426}
427
428// parseInlineLink parses an inline link at the start of s, and returns
429// a rendered Markdown link and the total length of the raw inline link.
430// If no inline link is present, it returns all zeroes.
431func parseInlineLink(s string) (link string, length int) {
432	if !strings.HasPrefix(s, "[[") {
433		return
434	}
435	end := strings.Index(s, "]]")
436	if end == -1 {
437		return
438	}
439	urlEnd := strings.Index(s, "]")
440	rawURL := s[2:urlEnd]
441	const badURLChars = `<>"{}|\^[] ` + "`" // per RFC2396 section 2.4.3
442	if strings.ContainsAny(rawURL, badURLChars) {
443		return
444	}
445	if urlEnd == end {
446		simpleURL := ""
447		url, err := url.Parse(rawURL)
448		if err == nil {
449			// If the URL is http://foo.com, drop the http://
450			// In other words, render [[http://golang.org]] as:
451			//   <a href="http://golang.org">golang.org</a>
452			if strings.HasPrefix(rawURL, url.Scheme+"://") {
453				simpleURL = strings.TrimPrefix(rawURL, url.Scheme+"://")
454			} else if strings.HasPrefix(rawURL, url.Scheme+":") {
455				simpleURL = strings.TrimPrefix(rawURL, url.Scheme+":")
456			}
457		}
458		return renderLink(rawURL, simpleURL), end + 2
459	}
460	if s[urlEnd:urlEnd+2] != "][" {
461		return
462	}
463	text := s[urlEnd+2 : end]
464	return renderLink(rawURL, text), end + 2
465}
466
467func renderLink(href, text string) string {
468	text = font(text, false)
469	if text == "" {
470		text = markdownEscape(href, false)
471	}
472	return "[" + text + "](" + href + ")"
473}
474