1// Copyright 2020 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// Present2md converts legacy-syntax present files to Markdown-syntax present files. 6// 7// Usage: 8// 9// present2md [-w] [file ...] 10// 11// By default, present2md prints the Markdown-syntax form of each input file to standard output. 12// If no input file is listed, standard input is used. 13// 14// The -w flag causes present2md to update the files in place, overwriting each with its 15// Markdown-syntax equivalent. 16// 17// Examples 18// 19// present2md your.article 20// present2md -w *.article 21// 22package main 23 24import ( 25 "bytes" 26 "flag" 27 "fmt" 28 "io" 29 "io/ioutil" 30 "log" 31 "net/url" 32 "os" 33 "strings" 34 "unicode" 35 "unicode/utf8" 36 37 "golang.org/x/tools/present" 38) 39 40func usage() { 41 fmt.Fprintf(os.Stderr, "usage: present2md [-w] [file ...]\n") 42 os.Exit(2) 43} 44 45var ( 46 writeBack = flag.Bool("w", false, "write conversions back to original files") 47 exitStatus = 0 48) 49 50func main() { 51 log.SetPrefix("present2md: ") 52 log.SetFlags(0) 53 flag.Usage = usage 54 flag.Parse() 55 56 args := flag.Args() 57 if len(args) == 0 { 58 if *writeBack { 59 log.Fatalf("cannot use -w with standard input") 60 } 61 convert(os.Stdin, "stdin", false) 62 return 63 } 64 65 for _, arg := range args { 66 f, err := os.Open(arg) 67 if err != nil { 68 log.Print(err) 69 exitStatus = 1 70 continue 71 } 72 err = convert(f, arg, *writeBack) 73 f.Close() 74 if err != nil { 75 log.Print(err) 76 exitStatus = 1 77 } 78 } 79 os.Exit(exitStatus) 80} 81 82// convert reads the data from r, parses it as legacy present, 83// and converts it to Markdown-enabled present. 84// If any errors occur, the data is reported as coming from file. 85// If writeBack is true, the converted version is written back to file. 86// If writeBack is false, the converted version is printed to standard output. 87func convert(r io.Reader, file string, writeBack bool) error { 88 data, err := ioutil.ReadAll(r) 89 if err != nil { 90 return err 91 } 92 if bytes.HasPrefix(data, []byte("# ")) { 93 return fmt.Errorf("%v: already markdown", file) 94 } 95 96 // Convert all comments before parsing the document. 97 // The '//' comment is treated as normal text and so 98 // is passed through the translation unaltered. 99 data = bytes.Replace(data, []byte("\n#"), []byte("\n//"), -1) 100 101 doc, err := present.Parse(bytes.NewReader(data), file, 0) 102 if err != nil { 103 return err 104 } 105 106 // Title and Subtitle, Time, Tags. 107 var md bytes.Buffer 108 fmt.Fprintf(&md, "# %s\n", doc.Title) 109 if doc.Subtitle != "" { 110 fmt.Fprintf(&md, "%s\n", doc.Subtitle) 111 } 112 if !doc.Time.IsZero() { 113 fmt.Fprintf(&md, "%s\n", doc.Time.Format("2 Jan 2006")) 114 } 115 if len(doc.Tags) > 0 { 116 fmt.Fprintf(&md, "Tags: %s\n", strings.Join(doc.Tags, ", ")) 117 } 118 119 // Summary, defaulting to first paragraph of section. 120 // (Summaries must be explicit for Markdown-enabled present, 121 // and the expectation is that they will be shorter than the 122 // whole first paragraph. But this is what the blog does today.) 123 if strings.HasSuffix(file, ".article") && len(doc.Sections) > 0 { 124 for _, elem := range doc.Sections[0].Elem { 125 text, ok := elem.(present.Text) 126 if !ok || text.Pre { 127 // skip everything but non-text elements 128 continue 129 } 130 fmt.Fprintf(&md, "Summary:") 131 for i, line := range text.Lines { 132 fmt.Fprintf(&md, " ") 133 printStyled(&md, line, i == 0) 134 } 135 fmt.Fprintf(&md, "\n") 136 break 137 } 138 } 139 140 // Authors 141 for _, a := range doc.Authors { 142 fmt.Fprintf(&md, "\n") 143 for _, elem := range a.Elem { 144 switch elem := elem.(type) { 145 default: 146 // Can only happen if this type switch is incomplete, which is a bug. 147 log.Fatalf("%s: unexpected author type %T", file, elem) 148 case present.Text: 149 for _, line := range elem.Lines { 150 fmt.Fprintf(&md, "%s\n", markdownEscape(line, true)) 151 } 152 case present.Link: 153 fmt.Fprintf(&md, "%s\n", markdownEscape(elem.Label, true)) 154 } 155 } 156 } 157 158 // Invariant: the output ends in non-blank line now, 159 // and after printing any piece of the file below, 160 // the output should still end in a non-blank line. 161 // If a blank line separator is needed, it should be printed 162 // before the block that needs separating, not after. 163 164 if len(doc.TitleNotes) > 0 { 165 fmt.Fprintf(&md, "\n") 166 for _, line := range doc.TitleNotes { 167 fmt.Fprintf(&md, ": %s\n", line) 168 } 169 } 170 171 if len(doc.Sections) == 1 && strings.HasSuffix(file, ".article") { 172 // Blog drops section headers when there is only one section. 173 // Don't print a title in this case, to make clear that it's being dropped. 174 fmt.Fprintf(&md, "\n##\n") 175 printSectionBody(file, 1, &md, doc.Sections[0].Elem) 176 } else { 177 for _, s := range doc.Sections { 178 fmt.Fprintf(&md, "\n") 179 fmt.Fprintf(&md, "## %s\n", markdownEscape(s.Title, false)) 180 printSectionBody(file, 1, &md, s.Elem) 181 } 182 } 183 184 if !writeBack { 185 os.Stdout.Write(md.Bytes()) 186 return nil 187 } 188 return ioutil.WriteFile(file, md.Bytes(), 0666) 189} 190 191func printSectionBody(file string, depth int, w *bytes.Buffer, elems []present.Elem) { 192 for _, elem := range elems { 193 switch elem := elem.(type) { 194 default: 195 // Can only happen if this type switch is incomplete, which is a bug. 196 log.Fatalf("%s: unexpected present element type %T", file, elem) 197 198 case present.Text: 199 fmt.Fprintf(w, "\n") 200 lines := elem.Lines 201 for len(lines) > 0 && lines[0] == "" { 202 lines = lines[1:] 203 } 204 if elem.Pre { 205 for _, line := range strings.Split(strings.TrimRight(elem.Raw, "\n"), "\n") { 206 if line == "" { 207 fmt.Fprintf(w, "\n") 208 } else { 209 fmt.Fprintf(w, "\t%s\n", line) 210 } 211 } 212 } else { 213 for _, line := range elem.Lines { 214 printStyled(w, line, true) 215 fmt.Fprintf(w, "\n") 216 } 217 } 218 219 case present.List: 220 fmt.Fprintf(w, "\n") 221 for _, item := range elem.Bullet { 222 fmt.Fprintf(w, " - ") 223 for i, line := range strings.Split(item, "\n") { 224 if i > 0 { 225 fmt.Fprintf(w, " ") 226 } 227 printStyled(w, line, false) 228 fmt.Fprintf(w, "\n") 229 } 230 } 231 232 case present.Section: 233 fmt.Fprintf(w, "\n") 234 sep := " " 235 if elem.Title == "" { 236 sep = "" 237 } 238 fmt.Fprintf(w, "%s%s%s\n", strings.Repeat("#", depth+2), sep, markdownEscape(elem.Title, false)) 239 printSectionBody(file, depth+1, w, elem.Elem) 240 241 case interface{ PresentCmd() string }: 242 // If there are multiple present commands in a row, don't print a blank line before the second etc. 243 b := w.Bytes() 244 sep := "\n" 245 if len(b) > 0 { 246 i := bytes.LastIndexByte(b[:len(b)-1], '\n') 247 if b[i+1] == '.' { 248 sep = "" 249 } 250 } 251 fmt.Fprintf(w, "%s%s\n", sep, elem.PresentCmd()) 252 } 253 } 254} 255 256func markdownEscape(s string, startLine bool) string { 257 var b strings.Builder 258 for i, r := range s { 259 switch { 260 case r == '#' && i == 0, 261 r == '*', 262 r == '_', 263 r == '<' && (i == 0 || s[i-1] != ' ') && i+1 < len(s) && s[i+1] != ' ', 264 r == '[' && strings.Contains(s[i:], "]("): 265 b.WriteRune('\\') 266 } 267 b.WriteRune(r) 268 } 269 return b.String() 270} 271 272// Copy of ../../present/style.go adjusted to produce Markdown instead of HTML. 273 274/* 275 Fonts are demarcated by an initial and final char bracketing a 276 space-delimited word, plus possibly some terminal punctuation. 277 The chars are 278 _ for italic 279 * for bold 280 ` (back quote) for fixed width. 281 Inner appearances of the char become spaces. For instance, 282 _this_is_italic_! 283 becomes 284 <i>this is italic</i>! 285*/ 286 287func printStyled(w *bytes.Buffer, text string, startLine bool) { 288 w.WriteString(font(text, startLine)) 289} 290 291// font returns s with font indicators turned into HTML font tags. 292func font(s string, startLine bool) string { 293 if !strings.ContainsAny(s, "[`_*") { 294 return markdownEscape(s, startLine) 295 } 296 words := split(s) 297 var b bytes.Buffer 298Word: 299 for w, word := range words { 300 words[w] = markdownEscape(word, startLine && w == 0) // for all the continue Word 301 if len(word) < 2 { 302 continue Word 303 } 304 if link, _ := parseInlineLink(word); link != "" { 305 words[w] = link 306 continue Word 307 } 308 const marker = "_*`" 309 // Initial punctuation is OK but must be peeled off. 310 first := strings.IndexAny(word, marker) 311 if first == -1 { 312 continue Word 313 } 314 // Opening marker must be at the beginning of the token or else preceded by punctuation. 315 if first != 0 { 316 r, _ := utf8.DecodeLastRuneInString(word[:first]) 317 if !unicode.IsPunct(r) { 318 continue Word 319 } 320 } 321 open, word := markdownEscape(word[:first], startLine && w == 0), word[first:] 322 char := word[0] // ASCII is OK. 323 close := "" 324 switch char { 325 default: 326 continue Word 327 case '_': 328 open += "_" 329 close = "_" 330 case '*': 331 open += "**" 332 close = "**" 333 case '`': 334 open += "`" 335 close = "`" 336 } 337 // Closing marker must be at the end of the token or else followed by punctuation. 338 last := strings.LastIndex(word, word[:1]) 339 if last == 0 { 340 continue Word 341 } 342 if last+1 != len(word) { 343 r, _ := utf8.DecodeRuneInString(word[last+1:]) 344 if !unicode.IsPunct(r) { 345 continue Word 346 } 347 } 348 head, tail := word[:last+1], word[last+1:] 349 b.Reset() 350 var wid int 351 for i := 1; i < len(head)-1; i += wid { 352 var r rune 353 r, wid = utf8.DecodeRuneInString(head[i:]) 354 if r != rune(char) { 355 // Ordinary character. 356 b.WriteRune(r) 357 continue 358 } 359 if head[i+1] != char { 360 // Inner char becomes space. 361 b.WriteRune(' ') 362 continue 363 } 364 // Doubled char becomes real char. 365 // Not worth worrying about "_x__". 366 b.WriteByte(char) 367 wid++ // Consumed two chars, both ASCII. 368 } 369 text := b.String() 370 if close == "`" { 371 for strings.Contains(text, close) { 372 open += "`" 373 close += "`" 374 } 375 } else { 376 text = markdownEscape(text, false) 377 } 378 words[w] = open + text + close + tail 379 } 380 return strings.Join(words, "") 381} 382 383// split is like strings.Fields but also returns the runs of spaces 384// and treats inline links as distinct words. 385func split(s string) []string { 386 var ( 387 words = make([]string, 0, 10) 388 start = 0 389 ) 390 391 // appendWord appends the string s[start:end] to the words slice. 392 // If the word contains the beginning of a link, the non-link portion 393 // of the word and the entire link are appended as separate words, 394 // and the start index is advanced to the end of the link. 395 appendWord := func(end int) { 396 if j := strings.Index(s[start:end], "[["); j > -1 { 397 if _, l := parseInlineLink(s[start+j:]); l > 0 { 398 // Append portion before link, if any. 399 if j > 0 { 400 words = append(words, s[start:start+j]) 401 } 402 // Append link itself. 403 words = append(words, s[start+j:start+j+l]) 404 // Advance start index to end of link. 405 start = start + j + l 406 return 407 } 408 } 409 // No link; just add the word. 410 words = append(words, s[start:end]) 411 start = end 412 } 413 414 wasSpace := false 415 for i, r := range s { 416 isSpace := unicode.IsSpace(r) 417 if i > start && isSpace != wasSpace { 418 appendWord(i) 419 } 420 wasSpace = isSpace 421 } 422 for start < len(s) { 423 appendWord(len(s)) 424 } 425 return words 426} 427 428// parseInlineLink parses an inline link at the start of s, and returns 429// a rendered Markdown link and the total length of the raw inline link. 430// If no inline link is present, it returns all zeroes. 431func parseInlineLink(s string) (link string, length int) { 432 if !strings.HasPrefix(s, "[[") { 433 return 434 } 435 end := strings.Index(s, "]]") 436 if end == -1 { 437 return 438 } 439 urlEnd := strings.Index(s, "]") 440 rawURL := s[2:urlEnd] 441 const badURLChars = `<>"{}|\^[] ` + "`" // per RFC2396 section 2.4.3 442 if strings.ContainsAny(rawURL, badURLChars) { 443 return 444 } 445 if urlEnd == end { 446 simpleURL := "" 447 url, err := url.Parse(rawURL) 448 if err == nil { 449 // If the URL is http://foo.com, drop the http:// 450 // In other words, render [[http://golang.org]] as: 451 // <a href="http://golang.org">golang.org</a> 452 if strings.HasPrefix(rawURL, url.Scheme+"://") { 453 simpleURL = strings.TrimPrefix(rawURL, url.Scheme+"://") 454 } else if strings.HasPrefix(rawURL, url.Scheme+":") { 455 simpleURL = strings.TrimPrefix(rawURL, url.Scheme+":") 456 } 457 } 458 return renderLink(rawURL, simpleURL), end + 2 459 } 460 if s[urlEnd:urlEnd+2] != "][" { 461 return 462 } 463 text := s[urlEnd+2 : end] 464 return renderLink(rawURL, text), end + 2 465} 466 467func renderLink(href, text string) string { 468 text = font(text, false) 469 if text == "" { 470 text = markdownEscape(href, false) 471 } 472 return "[" + text + "](" + href + ")" 473} 474