1// 2// Blackfriday Markdown Processor 3// Available at http://github.com/russross/blackfriday 4// 5// Copyright © 2011 Russ Ross <russ@russross.com>. 6// Distributed under the Simplified BSD License. 7// See README.md for details. 8// 9 10// 11// 12// SmartyPants rendering 13// 14// 15 16package blackfriday 17 18import ( 19 "bytes" 20 "io" 21) 22 23// SPRenderer is a struct containing state of a Smartypants renderer. 24type SPRenderer struct { 25 inSingleQuote bool 26 inDoubleQuote bool 27 callbacks [256]smartCallback 28} 29 30func wordBoundary(c byte) bool { 31 return c == 0 || isspace(c) || ispunct(c) 32} 33 34func tolower(c byte) byte { 35 if c >= 'A' && c <= 'Z' { 36 return c - 'A' + 'a' 37 } 38 return c 39} 40 41func isdigit(c byte) bool { 42 return c >= '0' && c <= '9' 43} 44 45func smartQuoteHelper(out *bytes.Buffer, previousChar byte, nextChar byte, quote byte, isOpen *bool, addNBSP bool) bool { 46 // edge of the buffer is likely to be a tag that we don't get to see, 47 // so we treat it like text sometimes 48 49 // enumerate all sixteen possibilities for (previousChar, nextChar) 50 // each can be one of {0, space, punct, other} 51 switch { 52 case previousChar == 0 && nextChar == 0: 53 // context is not any help here, so toggle 54 *isOpen = !*isOpen 55 case isspace(previousChar) && nextChar == 0: 56 // [ "] might be [ "<code>foo...] 57 *isOpen = true 58 case ispunct(previousChar) && nextChar == 0: 59 // [!"] hmm... could be [Run!"] or [("<code>...] 60 *isOpen = false 61 case /* isnormal(previousChar) && */ nextChar == 0: 62 // [a"] is probably a close 63 *isOpen = false 64 case previousChar == 0 && isspace(nextChar): 65 // [" ] might be [...foo</code>" ] 66 *isOpen = false 67 case isspace(previousChar) && isspace(nextChar): 68 // [ " ] context is not any help here, so toggle 69 *isOpen = !*isOpen 70 case ispunct(previousChar) && isspace(nextChar): 71 // [!" ] is probably a close 72 *isOpen = false 73 case /* isnormal(previousChar) && */ isspace(nextChar): 74 // [a" ] this is one of the easy cases 75 *isOpen = false 76 case previousChar == 0 && ispunct(nextChar): 77 // ["!] hmm... could be ["$1.95] or [</code>"!...] 78 *isOpen = false 79 case isspace(previousChar) && ispunct(nextChar): 80 // [ "!] looks more like [ "$1.95] 81 *isOpen = true 82 case ispunct(previousChar) && ispunct(nextChar): 83 // [!"!] context is not any help here, so toggle 84 *isOpen = !*isOpen 85 case /* isnormal(previousChar) && */ ispunct(nextChar): 86 // [a"!] is probably a close 87 *isOpen = false 88 case previousChar == 0 /* && isnormal(nextChar) */ : 89 // ["a] is probably an open 90 *isOpen = true 91 case isspace(previousChar) /* && isnormal(nextChar) */ : 92 // [ "a] this is one of the easy cases 93 *isOpen = true 94 case ispunct(previousChar) /* && isnormal(nextChar) */ : 95 // [!"a] is probably an open 96 *isOpen = true 97 default: 98 // [a'b] maybe a contraction? 99 *isOpen = false 100 } 101 102 // Note that with the limited lookahead, this non-breaking 103 // space will also be appended to single double quotes. 104 if addNBSP && !*isOpen { 105 out.WriteString(" ") 106 } 107 108 out.WriteByte('&') 109 if *isOpen { 110 out.WriteByte('l') 111 } else { 112 out.WriteByte('r') 113 } 114 out.WriteByte(quote) 115 out.WriteString("quo;") 116 117 if addNBSP && *isOpen { 118 out.WriteString(" ") 119 } 120 121 return true 122} 123 124func (r *SPRenderer) smartSingleQuote(out *bytes.Buffer, previousChar byte, text []byte) int { 125 if len(text) >= 2 { 126 t1 := tolower(text[1]) 127 128 if t1 == '\'' { 129 nextChar := byte(0) 130 if len(text) >= 3 { 131 nextChar = text[2] 132 } 133 if smartQuoteHelper(out, previousChar, nextChar, 'd', &r.inDoubleQuote, false) { 134 return 1 135 } 136 } 137 138 if (t1 == 's' || t1 == 't' || t1 == 'm' || t1 == 'd') && (len(text) < 3 || wordBoundary(text[2])) { 139 out.WriteString("’") 140 return 0 141 } 142 143 if len(text) >= 3 { 144 t2 := tolower(text[2]) 145 146 if ((t1 == 'r' && t2 == 'e') || (t1 == 'l' && t2 == 'l') || (t1 == 'v' && t2 == 'e')) && 147 (len(text) < 4 || wordBoundary(text[3])) { 148 out.WriteString("’") 149 return 0 150 } 151 } 152 } 153 154 nextChar := byte(0) 155 if len(text) > 1 { 156 nextChar = text[1] 157 } 158 if smartQuoteHelper(out, previousChar, nextChar, 's', &r.inSingleQuote, false) { 159 return 0 160 } 161 162 out.WriteByte(text[0]) 163 return 0 164} 165 166func (r *SPRenderer) smartParens(out *bytes.Buffer, previousChar byte, text []byte) int { 167 if len(text) >= 3 { 168 t1 := tolower(text[1]) 169 t2 := tolower(text[2]) 170 171 if t1 == 'c' && t2 == ')' { 172 out.WriteString("©") 173 return 2 174 } 175 176 if t1 == 'r' && t2 == ')' { 177 out.WriteString("®") 178 return 2 179 } 180 181 if len(text) >= 4 && t1 == 't' && t2 == 'm' && text[3] == ')' { 182 out.WriteString("™") 183 return 3 184 } 185 } 186 187 out.WriteByte(text[0]) 188 return 0 189} 190 191func (r *SPRenderer) smartDash(out *bytes.Buffer, previousChar byte, text []byte) int { 192 if len(text) >= 2 { 193 if text[1] == '-' { 194 out.WriteString("—") 195 return 1 196 } 197 198 if wordBoundary(previousChar) && wordBoundary(text[1]) { 199 out.WriteString("–") 200 return 0 201 } 202 } 203 204 out.WriteByte(text[0]) 205 return 0 206} 207 208func (r *SPRenderer) smartDashLatex(out *bytes.Buffer, previousChar byte, text []byte) int { 209 if len(text) >= 3 && text[1] == '-' && text[2] == '-' { 210 out.WriteString("—") 211 return 2 212 } 213 if len(text) >= 2 && text[1] == '-' { 214 out.WriteString("–") 215 return 1 216 } 217 218 out.WriteByte(text[0]) 219 return 0 220} 221 222func (r *SPRenderer) smartAmpVariant(out *bytes.Buffer, previousChar byte, text []byte, quote byte, addNBSP bool) int { 223 if bytes.HasPrefix(text, []byte(""")) { 224 nextChar := byte(0) 225 if len(text) >= 7 { 226 nextChar = text[6] 227 } 228 if smartQuoteHelper(out, previousChar, nextChar, quote, &r.inDoubleQuote, addNBSP) { 229 return 5 230 } 231 } 232 233 if bytes.HasPrefix(text, []byte("�")) { 234 return 3 235 } 236 237 out.WriteByte('&') 238 return 0 239} 240 241func (r *SPRenderer) smartAmp(angledQuotes, addNBSP bool) func(*bytes.Buffer, byte, []byte) int { 242 var quote byte = 'd' 243 if angledQuotes { 244 quote = 'a' 245 } 246 247 return func(out *bytes.Buffer, previousChar byte, text []byte) int { 248 return r.smartAmpVariant(out, previousChar, text, quote, addNBSP) 249 } 250} 251 252func (r *SPRenderer) smartPeriod(out *bytes.Buffer, previousChar byte, text []byte) int { 253 if len(text) >= 3 && text[1] == '.' && text[2] == '.' { 254 out.WriteString("…") 255 return 2 256 } 257 258 if len(text) >= 5 && text[1] == ' ' && text[2] == '.' && text[3] == ' ' && text[4] == '.' { 259 out.WriteString("…") 260 return 4 261 } 262 263 out.WriteByte(text[0]) 264 return 0 265} 266 267func (r *SPRenderer) smartBacktick(out *bytes.Buffer, previousChar byte, text []byte) int { 268 if len(text) >= 2 && text[1] == '`' { 269 nextChar := byte(0) 270 if len(text) >= 3 { 271 nextChar = text[2] 272 } 273 if smartQuoteHelper(out, previousChar, nextChar, 'd', &r.inDoubleQuote, false) { 274 return 1 275 } 276 } 277 278 out.WriteByte(text[0]) 279 return 0 280} 281 282func (r *SPRenderer) smartNumberGeneric(out *bytes.Buffer, previousChar byte, text []byte) int { 283 if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 { 284 // is it of the form digits/digits(word boundary)?, i.e., \d+/\d+\b 285 // note: check for regular slash (/) or fraction slash (⁄, 0x2044, or 0xe2 81 84 in utf-8) 286 // and avoid changing dates like 1/23/2005 into fractions. 287 numEnd := 0 288 for len(text) > numEnd && isdigit(text[numEnd]) { 289 numEnd++ 290 } 291 if numEnd == 0 { 292 out.WriteByte(text[0]) 293 return 0 294 } 295 denStart := numEnd + 1 296 if len(text) > numEnd+3 && text[numEnd] == 0xe2 && text[numEnd+1] == 0x81 && text[numEnd+2] == 0x84 { 297 denStart = numEnd + 3 298 } else if len(text) < numEnd+2 || text[numEnd] != '/' { 299 out.WriteByte(text[0]) 300 return 0 301 } 302 denEnd := denStart 303 for len(text) > denEnd && isdigit(text[denEnd]) { 304 denEnd++ 305 } 306 if denEnd == denStart { 307 out.WriteByte(text[0]) 308 return 0 309 } 310 if len(text) == denEnd || wordBoundary(text[denEnd]) && text[denEnd] != '/' { 311 out.WriteString("<sup>") 312 out.Write(text[:numEnd]) 313 out.WriteString("</sup>⁄<sub>") 314 out.Write(text[denStart:denEnd]) 315 out.WriteString("</sub>") 316 return denEnd - 1 317 } 318 } 319 320 out.WriteByte(text[0]) 321 return 0 322} 323 324func (r *SPRenderer) smartNumber(out *bytes.Buffer, previousChar byte, text []byte) int { 325 if wordBoundary(previousChar) && previousChar != '/' && len(text) >= 3 { 326 if text[0] == '1' && text[1] == '/' && text[2] == '2' { 327 if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' { 328 out.WriteString("½") 329 return 2 330 } 331 } 332 333 if text[0] == '1' && text[1] == '/' && text[2] == '4' { 334 if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 5 && tolower(text[3]) == 't' && tolower(text[4]) == 'h') { 335 out.WriteString("¼") 336 return 2 337 } 338 } 339 340 if text[0] == '3' && text[1] == '/' && text[2] == '4' { 341 if len(text) < 4 || wordBoundary(text[3]) && text[3] != '/' || (len(text) >= 6 && tolower(text[3]) == 't' && tolower(text[4]) == 'h' && tolower(text[5]) == 's') { 342 out.WriteString("¾") 343 return 2 344 } 345 } 346 } 347 348 out.WriteByte(text[0]) 349 return 0 350} 351 352func (r *SPRenderer) smartDoubleQuoteVariant(out *bytes.Buffer, previousChar byte, text []byte, quote byte) int { 353 nextChar := byte(0) 354 if len(text) > 1 { 355 nextChar = text[1] 356 } 357 if !smartQuoteHelper(out, previousChar, nextChar, quote, &r.inDoubleQuote, false) { 358 out.WriteString(""") 359 } 360 361 return 0 362} 363 364func (r *SPRenderer) smartDoubleQuote(out *bytes.Buffer, previousChar byte, text []byte) int { 365 return r.smartDoubleQuoteVariant(out, previousChar, text, 'd') 366} 367 368func (r *SPRenderer) smartAngledDoubleQuote(out *bytes.Buffer, previousChar byte, text []byte) int { 369 return r.smartDoubleQuoteVariant(out, previousChar, text, 'a') 370} 371 372func (r *SPRenderer) smartLeftAngle(out *bytes.Buffer, previousChar byte, text []byte) int { 373 i := 0 374 375 for i < len(text) && text[i] != '>' { 376 i++ 377 } 378 379 out.Write(text[:i+1]) 380 return i 381} 382 383type smartCallback func(out *bytes.Buffer, previousChar byte, text []byte) int 384 385// NewSmartypantsRenderer constructs a Smartypants renderer object. 386func NewSmartypantsRenderer(flags HTMLFlags) *SPRenderer { 387 var ( 388 r SPRenderer 389 390 smartAmpAngled = r.smartAmp(true, false) 391 smartAmpAngledNBSP = r.smartAmp(true, true) 392 smartAmpRegular = r.smartAmp(false, false) 393 smartAmpRegularNBSP = r.smartAmp(false, true) 394 395 addNBSP = flags&SmartypantsQuotesNBSP != 0 396 ) 397 398 if flags&SmartypantsAngledQuotes == 0 { 399 r.callbacks['"'] = r.smartDoubleQuote 400 if !addNBSP { 401 r.callbacks['&'] = smartAmpRegular 402 } else { 403 r.callbacks['&'] = smartAmpRegularNBSP 404 } 405 } else { 406 r.callbacks['"'] = r.smartAngledDoubleQuote 407 if !addNBSP { 408 r.callbacks['&'] = smartAmpAngled 409 } else { 410 r.callbacks['&'] = smartAmpAngledNBSP 411 } 412 } 413 r.callbacks['\''] = r.smartSingleQuote 414 r.callbacks['('] = r.smartParens 415 if flags&SmartypantsDashes != 0 { 416 if flags&SmartypantsLatexDashes == 0 { 417 r.callbacks['-'] = r.smartDash 418 } else { 419 r.callbacks['-'] = r.smartDashLatex 420 } 421 } 422 r.callbacks['.'] = r.smartPeriod 423 if flags&SmartypantsFractions == 0 { 424 r.callbacks['1'] = r.smartNumber 425 r.callbacks['3'] = r.smartNumber 426 } else { 427 for ch := '1'; ch <= '9'; ch++ { 428 r.callbacks[ch] = r.smartNumberGeneric 429 } 430 } 431 r.callbacks['<'] = r.smartLeftAngle 432 r.callbacks['`'] = r.smartBacktick 433 return &r 434} 435 436// Process is the entry point of the Smartypants renderer. 437func (r *SPRenderer) Process(w io.Writer, text []byte) { 438 mark := 0 439 for i := 0; i < len(text); i++ { 440 if action := r.callbacks[text[i]]; action != nil { 441 if i > mark { 442 w.Write(text[mark:i]) 443 } 444 previousChar := byte(0) 445 if i > 0 { 446 previousChar = text[i-1] 447 } 448 var tmp bytes.Buffer 449 i += action(&tmp, previousChar, text[i:]) 450 w.Write(tmp.Bytes()) 451 mark = i + 1 452 } 453 } 454 if mark < len(text) { 455 w.Write(text[mark:]) 456 } 457} 458