1// Copyright 2020 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// Present2md converts legacy-syntax present files to Markdown-syntax present files. 6// 7// Usage: 8// 9// present2md [-w] [file ...] 10// 11// By default, present2md prints the Markdown-syntax form of each input file to standard output. 12// If no input file is listed, standard input is used. 13// 14// The -w flag causes present2md to update the files in place, overwriting each with its 15// Markdown-syntax equivalent. 16// 17// Examples 18// 19// present2md your.article 20// present2md -w *.article 21// 22package main 23 24import ( 25 "bytes" 26 "flag" 27 "fmt" 28 "io" 29 "io/ioutil" 30 "log" 31 "net/url" 32 "os" 33 "strings" 34 "unicode" 35 "unicode/utf8" 36 37 "golang.org/x/tools/present" 38) 39 40func usage() { 41 fmt.Fprintf(os.Stderr, "usage: present2md [-w] [file ...]\n") 42 os.Exit(2) 43} 44 45var ( 46 writeBack = flag.Bool("w", false, "write conversions back to original files") 47 exitStatus = 0 48) 49 50func main() { 51 log.SetPrefix("present2md: ") 52 log.SetFlags(0) 53 flag.Usage = usage 54 flag.Parse() 55 56 args := flag.Args() 57 if len(args) == 0 { 58 if *writeBack { 59 log.Fatalf("cannot use -w with standard input") 60 } 61 convert(os.Stdin, "stdin", false) 62 return 63 } 64 65 for _, arg := range args { 66 f, err := os.Open(arg) 67 if err != nil { 68 log.Print(err) 69 exitStatus = 1 70 continue 71 } 72 err = convert(f, arg, *writeBack) 73 f.Close() 74 if err != nil { 75 log.Print(err) 76 exitStatus = 1 77 } 78 } 79 os.Exit(exitStatus) 80} 81 82// convert reads the data from r, parses it as legacy present, 83// and converts it to Markdown-enabled present. 84// If any errors occur, the data is reported as coming from file. 85// If writeBack is true, the converted version is written back to file. 86// If writeBack is false, the converted version is printed to standard output. 87func convert(r io.Reader, file string, writeBack bool) error { 88 data, err := ioutil.ReadAll(r) 89 if err != nil { 90 return err 91 } 92 if bytes.HasPrefix(data, []byte("# ")) { 93 return fmt.Errorf("%v: already markdown", file) 94 } 95 96 doc, err := present.Parse(bytes.NewReader(data), file, 0) 97 if err != nil { 98 return err 99 } 100 101 // Title and Subtitle, Time, Tags. 102 var md bytes.Buffer 103 fmt.Fprintf(&md, "# %s\n", doc.Title) 104 if doc.Subtitle != "" { 105 fmt.Fprintf(&md, "%s\n", doc.Subtitle) 106 } 107 if !doc.Time.IsZero() { 108 fmt.Fprintf(&md, "%s\n", doc.Time.Format("2 Jan 2006")) 109 } 110 if len(doc.Tags) > 0 { 111 fmt.Fprintf(&md, "Tags: %s\n", strings.Join(doc.Tags, ", ")) 112 } 113 114 // Summary, defaulting to first paragraph of section. 115 // (Summaries must be explicit for Markdown-enabled present, 116 // and the expectation is that they will be shorter than the 117 // whole first paragraph. But this is what the blog does today.) 118 if strings.HasSuffix(file, ".article") && len(doc.Sections) > 0 { 119 for _, elem := range doc.Sections[0].Elem { 120 text, ok := elem.(present.Text) 121 if !ok || text.Pre { 122 // skip everything but non-text elements 123 continue 124 } 125 fmt.Fprintf(&md, "Summary:") 126 for i, line := range text.Lines { 127 fmt.Fprintf(&md, " ") 128 printStyled(&md, line, i == 0) 129 } 130 fmt.Fprintf(&md, "\n") 131 break 132 } 133 } 134 135 // Authors 136 for _, a := range doc.Authors { 137 fmt.Fprintf(&md, "\n") 138 for _, elem := range a.Elem { 139 switch elem := elem.(type) { 140 default: 141 // Can only happen if this type switch is incomplete, which is a bug. 142 log.Fatalf("%s: unexpected author type %T", file, elem) 143 case present.Text: 144 for _, line := range elem.Lines { 145 fmt.Fprintf(&md, "%s\n", markdownEscape(line, true)) 146 } 147 case present.Link: 148 fmt.Fprintf(&md, "%s\n", markdownEscape(elem.Label, true)) 149 } 150 } 151 } 152 153 // Invariant: the output ends in non-blank line now, 154 // and after printing any piece of the file below, 155 // the output should still end in a non-blank line. 156 // If a blank line separator is needed, it should be printed 157 // before the block that needs separating, not after. 158 159 if len(doc.TitleNotes) > 0 { 160 fmt.Fprintf(&md, "\n") 161 for _, line := range doc.TitleNotes { 162 fmt.Fprintf(&md, ": %s\n", line) 163 } 164 } 165 166 if len(doc.Sections) == 1 && strings.HasSuffix(file, ".article") { 167 // Blog drops section headers when there is only one section. 168 // Don't print a title in this case, to make clear that it's being dropped. 169 fmt.Fprintf(&md, "\n##\n") 170 printSectionBody(file, 1, &md, doc.Sections[0].Elem) 171 } else { 172 for _, s := range doc.Sections { 173 fmt.Fprintf(&md, "\n") 174 fmt.Fprintf(&md, "## %s\n", markdownEscape(s.Title, false)) 175 printSectionBody(file, 1, &md, s.Elem) 176 } 177 } 178 179 if !writeBack { 180 os.Stdout.Write(md.Bytes()) 181 return nil 182 } 183 return ioutil.WriteFile(file, md.Bytes(), 0666) 184} 185 186func printSectionBody(file string, depth int, w *bytes.Buffer, elems []present.Elem) { 187 for _, elem := range elems { 188 switch elem := elem.(type) { 189 default: 190 // Can only happen if this type switch is incomplete, which is a bug. 191 log.Fatalf("%s: unexpected present element type %T", file, elem) 192 193 case present.Text: 194 fmt.Fprintf(w, "\n") 195 lines := elem.Lines 196 for len(lines) > 0 && lines[0] == "" { 197 lines = lines[1:] 198 } 199 if elem.Pre { 200 for _, line := range strings.Split(strings.TrimRight(elem.Raw, "\n"), "\n") { 201 if line == "" { 202 fmt.Fprintf(w, "\n") 203 } else { 204 fmt.Fprintf(w, "\t%s\n", line) 205 } 206 } 207 } else { 208 for _, line := range elem.Lines { 209 printStyled(w, line, true) 210 fmt.Fprintf(w, "\n") 211 } 212 } 213 214 case present.List: 215 fmt.Fprintf(w, "\n") 216 for _, item := range elem.Bullet { 217 fmt.Fprintf(w, " - ") 218 for i, line := range strings.Split(item, "\n") { 219 if i > 0 { 220 fmt.Fprintf(w, " ") 221 } 222 printStyled(w, line, false) 223 fmt.Fprintf(w, "\n") 224 } 225 } 226 227 case present.Section: 228 fmt.Fprintf(w, "\n") 229 sep := " " 230 if elem.Title == "" { 231 sep = "" 232 } 233 fmt.Fprintf(w, "%s%s%s\n", strings.Repeat("#", depth+2), sep, markdownEscape(elem.Title, false)) 234 printSectionBody(file, depth+1, w, elem.Elem) 235 236 case interface{ PresentCmd() string }: 237 // If there are multiple present commands in a row, don't print a blank line before the second etc. 238 b := w.Bytes() 239 sep := "\n" 240 if len(b) > 0 { 241 i := bytes.LastIndexByte(b[:len(b)-1], '\n') 242 if b[i+1] == '.' { 243 sep = "" 244 } 245 } 246 fmt.Fprintf(w, "%s%s\n", sep, elem.PresentCmd()) 247 } 248 } 249} 250 251func markdownEscape(s string, startLine bool) string { 252 var b strings.Builder 253 for i, r := range s { 254 switch { 255 case r == '#' && i == 0, 256 r == '*', 257 r == '_', 258 r == '<' && (i == 0 || s[i-1] != ' ') && i+1 < len(s) && s[i+1] != ' ', 259 r == '[' && strings.Contains(s[i:], "]("): 260 b.WriteRune('\\') 261 } 262 b.WriteRune(r) 263 } 264 return b.String() 265} 266 267// Copy of ../../present/style.go adjusted to produce Markdown instead of HTML. 268 269/* 270 Fonts are demarcated by an initial and final char bracketing a 271 space-delimited word, plus possibly some terminal punctuation. 272 The chars are 273 _ for italic 274 * for bold 275 ` (back quote) for fixed width. 276 Inner appearances of the char become spaces. For instance, 277 _this_is_italic_! 278 becomes 279 <i>this is italic</i>! 280*/ 281 282func printStyled(w *bytes.Buffer, text string, startLine bool) { 283 w.WriteString(font(text, startLine)) 284} 285 286// font returns s with font indicators turned into HTML font tags. 287func font(s string, startLine bool) string { 288 if !strings.ContainsAny(s, "[`_*") { 289 return markdownEscape(s, startLine) 290 } 291 words := split(s) 292 var b bytes.Buffer 293Word: 294 for w, word := range words { 295 words[w] = markdownEscape(word, startLine && w == 0) // for all the continue Word 296 if len(word) < 2 { 297 continue Word 298 } 299 if link, _ := parseInlineLink(word); link != "" { 300 words[w] = link 301 continue Word 302 } 303 const marker = "_*`" 304 // Initial punctuation is OK but must be peeled off. 305 first := strings.IndexAny(word, marker) 306 if first == -1 { 307 continue Word 308 } 309 // Opening marker must be at the beginning of the token or else preceded by punctuation. 310 if first != 0 { 311 r, _ := utf8.DecodeLastRuneInString(word[:first]) 312 if !unicode.IsPunct(r) { 313 continue Word 314 } 315 } 316 open, word := markdownEscape(word[:first], startLine && w == 0), word[first:] 317 char := word[0] // ASCII is OK. 318 close := "" 319 switch char { 320 default: 321 continue Word 322 case '_': 323 open += "_" 324 close = "_" 325 case '*': 326 open += "**" 327 close = "**" 328 case '`': 329 open += "`" 330 close = "`" 331 } 332 // Closing marker must be at the end of the token or else followed by punctuation. 333 last := strings.LastIndex(word, word[:1]) 334 if last == 0 { 335 continue Word 336 } 337 if last+1 != len(word) { 338 r, _ := utf8.DecodeRuneInString(word[last+1:]) 339 if !unicode.IsPunct(r) { 340 continue Word 341 } 342 } 343 head, tail := word[:last+1], word[last+1:] 344 b.Reset() 345 var wid int 346 for i := 1; i < len(head)-1; i += wid { 347 var r rune 348 r, wid = utf8.DecodeRuneInString(head[i:]) 349 if r != rune(char) { 350 // Ordinary character. 351 b.WriteRune(r) 352 continue 353 } 354 if head[i+1] != char { 355 // Inner char becomes space. 356 b.WriteRune(' ') 357 continue 358 } 359 // Doubled char becomes real char. 360 // Not worth worrying about "_x__". 361 b.WriteByte(char) 362 wid++ // Consumed two chars, both ASCII. 363 } 364 text := b.String() 365 if close == "`" { 366 for strings.Contains(text, close) { 367 open += "`" 368 close += "`" 369 } 370 } else { 371 text = markdownEscape(text, false) 372 } 373 words[w] = open + text + close + tail 374 } 375 return strings.Join(words, "") 376} 377 378// split is like strings.Fields but also returns the runs of spaces 379// and treats inline links as distinct words. 380func split(s string) []string { 381 var ( 382 words = make([]string, 0, 10) 383 start = 0 384 ) 385 386 // appendWord appends the string s[start:end] to the words slice. 387 // If the word contains the beginning of a link, the non-link portion 388 // of the word and the entire link are appended as separate words, 389 // and the start index is advanced to the end of the link. 390 appendWord := func(end int) { 391 if j := strings.Index(s[start:end], "[["); j > -1 { 392 if _, l := parseInlineLink(s[start+j:]); l > 0 { 393 // Append portion before link, if any. 394 if j > 0 { 395 words = append(words, s[start:start+j]) 396 } 397 // Append link itself. 398 words = append(words, s[start+j:start+j+l]) 399 // Advance start index to end of link. 400 start = start + j + l 401 return 402 } 403 } 404 // No link; just add the word. 405 words = append(words, s[start:end]) 406 start = end 407 } 408 409 wasSpace := false 410 for i, r := range s { 411 isSpace := unicode.IsSpace(r) 412 if i > start && isSpace != wasSpace { 413 appendWord(i) 414 } 415 wasSpace = isSpace 416 } 417 for start < len(s) { 418 appendWord(len(s)) 419 } 420 return words 421} 422 423// parseInlineLink parses an inline link at the start of s, and returns 424// a rendered Markdown link and the total length of the raw inline link. 425// If no inline link is present, it returns all zeroes. 426func parseInlineLink(s string) (link string, length int) { 427 if !strings.HasPrefix(s, "[[") { 428 return 429 } 430 end := strings.Index(s, "]]") 431 if end == -1 { 432 return 433 } 434 urlEnd := strings.Index(s, "]") 435 rawURL := s[2:urlEnd] 436 const badURLChars = `<>"{}|\^[] ` + "`" // per RFC2396 section 2.4.3 437 if strings.ContainsAny(rawURL, badURLChars) { 438 return 439 } 440 if urlEnd == end { 441 simpleURL := "" 442 url, err := url.Parse(rawURL) 443 if err == nil { 444 // If the URL is http://foo.com, drop the http:// 445 // In other words, render [[http://golang.org]] as: 446 // <a href="http://golang.org">golang.org</a> 447 if strings.HasPrefix(rawURL, url.Scheme+"://") { 448 simpleURL = strings.TrimPrefix(rawURL, url.Scheme+"://") 449 } else if strings.HasPrefix(rawURL, url.Scheme+":") { 450 simpleURL = strings.TrimPrefix(rawURL, url.Scheme+":") 451 } 452 } 453 return renderLink(rawURL, simpleURL), end + 2 454 } 455 if s[urlEnd:urlEnd+2] != "][" { 456 return 457 } 458 text := s[urlEnd+2 : end] 459 return renderLink(rawURL, text), end + 2 460} 461 462func renderLink(href, text string) string { 463 text = font(text, false) 464 if text == "" { 465 text = markdownEscape(href, false) 466 } 467 return "[" + text + "](" + href + ")" 468} 469