1/* 2 * pandoc_format.ts 3 * 4 * Copyright (C) 2021 by RStudio, PBC 5 * 6 * Unless you have received this program directly from RStudio pursuant 7 * to the terms of a commercial license agreement with RStudio, then 8 * this program is licensed to you under the terms of version 3 of the 9 * GNU Affero General Public License. This program is distributed WITHOUT 10 * ANY EXPRESS OR IMPLIED WARRANTY, INCLUDING THOSE OF NON-INFRINGEMENT, 11 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Please refer to the 12 * AGPL (http://www.gnu.org/licenses/agpl-3.0.txt) for more details. 13 * 14 */ 15 16import { Node as ProsemirrorNode } from 'prosemirror-model'; 17 18import { PandocServer, PandocExtensions } from './pandoc'; 19import { EditorFormat, kHugoDocType } from './format'; 20import { firstYamlBlock, yamlMetadataNodes } from './yaml'; 21import { findValue } from './object'; 22 23export const kMarkdownFormat = 'markdown'; 24export const kMarkdownPhpextraFormat = 'markdown_phpextra'; 25export const kMarkdownGithubFormat = 'markdown_github'; 26export const kMarkdownMmdFormat = 'markdown_mmd'; 27export const kMarkdownStrictFormat = 'markdown_strict'; 28export const kGfmFormat = 'gfm'; 29export const kCommonmarkFormat = 'commonmark'; 30export const kCommonmarkXFormat = 'commonmark_x'; 31 32export interface PandocFormat { 33 mode: string; 34 baseName: string; 35 fullName: string; 36 extensions: PandocExtensions; 37 warnings: PandocFormatWarnings; 38} 39 40export interface PandocFormatWarnings { 41 invalidFormat: string; 42 invalidOptions: string[]; 43} 44 45export interface PandocFormatConfig { 46 mode?: string; 47 extensions?: string; 48 rmdExtensions?: string; 49 wrap?: string; 50 doctypes?: string[]; 51 references_location?: string; 52 references_prefix?: string; 53 canonical?: boolean; 54} 55 56export function matchPandocFormatComment(code: string) { 57 const magicCommentRegEx = /^<!--\s+-\*-([\s\S]*?)-\*-\s+-->\s*$/m; 58 return code.match(magicCommentRegEx); 59} 60 61export function pandocFormatConfigFromDoc(doc: ProsemirrorNode, isRmd: boolean) { 62 return pandocFormatConfigFromYamlInDoc(doc, isRmd) || pandocFormatConfigFromCommentInDoc(doc) || {}; 63} 64 65export function pandocFormatConfigFromCode(code: string, isRmd: boolean): PandocFormatConfig { 66 return pandocFormatConfigFromYamlInCode(code, isRmd) || pandocFormatConfigFromCommentInCode(code) || {}; 67} 68 69function pandocFormatConfigFromYamlInCode(code: string, isRmd: boolean): PandocFormatConfig | null { 70 // get the first yaml block in the file 71 const yaml = firstYamlBlock(code); 72 73 // did we find yaml? 74 if (yaml) { 75 // see if we have any md_extensions defined 76 let mdExtensions : string | undefined = isRmd ? findValue('md_extensions', yaml?.output) : undefined; 77 if (!mdExtensions) { 78 // look for quarto 'from' 79 const from = findValue('from', yaml); 80 if (from) { 81 const fromStr = String(from); 82 const extensions = fromStr.match(/^\w+([+-][\w+-]+)$/); 83 if (extensions) { 84 mdExtensions = extensions[1]; 85 } 86 } 87 } 88 89 // see if we have any markdown options defined 90 let yamlFormatConfig: PandocFormatConfig | undefined; 91 const yamlMarkdownOptions = yaml?.editor_options?.markdown; 92 if (yamlMarkdownOptions instanceof Object) { 93 yamlFormatConfig = readPandocFormatConfig(yamlMarkdownOptions); 94 } 95 96 // combine and return 97 if (mdExtensions || yamlFormatConfig) { 98 const formatConfig: PandocFormatConfig = yamlFormatConfig ? yamlFormatConfig : {}; 99 if (mdExtensions) { 100 formatConfig.extensions = mdExtensions + (formatConfig.extensions || ''); 101 } 102 return formatConfig; 103 } else { 104 return null; 105 } 106 } else { 107 return null; 108 } 109} 110 111function pandocFormatConfigFromYamlInDoc(doc: ProsemirrorNode, isRmd: boolean): PandocFormatConfig | null { 112 const yamlNodes = yamlMetadataNodes(doc); 113 if (yamlNodes.length > 0) { 114 return pandocFormatConfigFromYamlInCode(yamlNodes[0].node.textContent, isRmd); 115 } else { 116 return null; 117 } 118} 119 120function pandocFormatConfigFromCommentInCode(code: string): PandocFormatConfig | null { 121 const keyValueRegEx = /^([^:]+):\s*(.*)$/; 122 const match = matchPandocFormatComment(code); 123 if (match) { 124 const comment = match[1]; 125 // split into semicolons 126 const fields = comment.split(/\s*;\s/).map(field => field.trim()); 127 const variables: { [key: string]: string } = {}; 128 fields.forEach(field => { 129 const keyValueMatch = field.match(keyValueRegEx); 130 if (keyValueMatch) { 131 variables[keyValueMatch[1].trim()] = keyValueMatch[2].trim(); 132 } 133 }); 134 return readPandocFormatConfig(variables); 135 } else { 136 return null; 137 } 138} 139 140function pandocFormatConfigFromCommentInDoc(doc: ProsemirrorNode): PandocFormatConfig | null { 141 let config: PandocFormatConfig | null = null; 142 let foundFirstRawInline = false; 143 doc.descendants((node, pos) => { 144 // don't search once we've found our target 145 if (foundFirstRawInline) { 146 return false; 147 } 148 149 // if it's a text node with a raw-html then scan it for the format comment 150 const schema = doc.type.schema; 151 if ( 152 node.isText && 153 schema.marks.raw_html_comment && 154 schema.marks.raw_html_comment.isInSet(node.marks) && 155 node.attrs.format 156 ) { 157 foundFirstRawInline = true; 158 config = pandocFormatConfigFromCommentInCode(node.textContent); 159 return false; 160 } else { 161 return true; 162 } 163 }); 164 return config; 165} 166 167function readPandocFormatConfig(source: { [key: string]: any }) { 168 const asString = (obj: any): string => { 169 if (typeof obj === 'string') { 170 return obj; 171 } else if (obj) { 172 return obj.toString(); 173 } else { 174 return ''; 175 } 176 }; 177 178 const asBoolean = (obj: any) => { 179 if (typeof obj === 'boolean') { 180 return obj; 181 } else { 182 const str = asString(obj).toLowerCase(); 183 return str === 'true' || str === '1'; 184 } 185 }; 186 187 const readWrap = () => { 188 const wrap = source.wrap || source.wrap_column || source['fill-column']; 189 if (wrap) { 190 return asString(wrap); 191 } else { 192 return undefined; 193 } 194 }; 195 196 const formatConfig: PandocFormatConfig = {}; 197 if (source.mode) { 198 formatConfig.mode = asString(source.mode); 199 } 200 if (source.extensions) { 201 formatConfig.extensions = asString(source.extensions); 202 } 203 if (source.rmd_extensions) { 204 formatConfig.rmdExtensions = asString(source.rmd_extensions); 205 } 206 formatConfig.wrap = readWrap(); 207 if (source.doctype) { 208 formatConfig.doctypes = asString(source.doctype) 209 .split(',') 210 .map(str => str.trim()); 211 } 212 if (source.references) { 213 if (typeof source.references === 'string') { 214 formatConfig.references_location = source.references; 215 } else { 216 formatConfig.references_location = source.references.location; 217 formatConfig.references_prefix = source.references.prefix; 218 } 219 } 220 if (source.canonical) { 221 formatConfig.canonical = asBoolean(source.canonical); 222 } 223 return formatConfig; 224} 225 226export async function resolvePandocFormat(pandoc: PandocServer, format: EditorFormat): Promise<PandocFormat> { 227 // additional markdown variants we support 228 const kMarkdownVariants: { [key: string]: string[] } = { 229 [kCommonmarkFormat]: commonmarkExtensions(), 230 [kCommonmarkXFormat]: commonmarkXExtensions(), 231 [kGfmFormat]: gfmExtensions(), 232 goldmark: goldmarkExtensions(format), 233 blackfriday: blackfridayExtensions(format), 234 }; 235 236 // setup warnings 237 const warnings: PandocFormatWarnings = { invalidFormat: '', invalidOptions: [] }; 238 239 // alias options and basename 240 let options = format.pandocExtensions; 241 let baseName = format.pandocMode; 242 243 // validate the base format (fall back to markdown if it's not known) 244 if ( 245 ![ 246 kMarkdownFormat, 247 kMarkdownPhpextraFormat, 248 kMarkdownGithubFormat, 249 kMarkdownMmdFormat, 250 kMarkdownStrictFormat, 251 kGfmFormat, 252 kCommonmarkFormat, 253 kCommonmarkXFormat 254 ] 255 .concat(Object.keys(kMarkdownVariants)) 256 .includes(baseName) 257 ) { 258 warnings.invalidFormat = baseName; 259 baseName = 'markdown'; 260 } 261 262 // format options we will be building 263 let formatOptions: string; 264 265 // if we are using a variant then get it's base options and merge with user options 266 if (kMarkdownVariants[baseName]) { 267 const variant = kMarkdownVariants[baseName]; 268 options = variant.map(option => `${option}`).join('') + options; 269 baseName = 'markdown_strict'; 270 } 271 272 // query for format options 273 formatOptions = await pandoc.listExtensions(baseName); 274 275 // active pandoc extensions 276 const pandocExtensions: { [key: string]: boolean } = {}; 277 278 // first parse extensions for format 279 parseExtensions(formatOptions).forEach(option => { 280 pandocExtensions[option.name] = option.enabled; 281 }); 282 283 // now parse extensions for user options (validate and build format name) 284 const validOptionNames = parseExtensions(formatOptions).map(option => option.name); 285 286 let fullName = baseName; 287 parseExtensions(options).forEach(option => { 288 // validate that the option is valid 289 if (validOptionNames.includes(option.name)) { 290 // add option 291 fullName += (option.enabled ? '+' : '-') + option.name; 292 pandocExtensions[option.name] = option.enabled; 293 } else { 294 warnings.invalidOptions.push(option.name); 295 } 296 }); 297 298 // return format name, enabled extensiosn, and warnings 299 return { 300 mode: format.pandocMode, 301 baseName, 302 fullName, 303 extensions: (pandocExtensions as unknown) as PandocExtensions, 304 warnings, 305 }; 306} 307 308function parseExtensions(options: string) { 309 // remove any linebreaks 310 options = options.split('\n').join(); 311 312 // parse into separate entries 313 const extensions: Array<{ name: string; enabled: boolean }> = []; 314 const re = /([+-])([a-z_]+)/g; 315 let match = re.exec(options); 316 while (match) { 317 extensions.push({ name: match[2], enabled: match[1] === '+' }); 318 match = re.exec(options); 319 } 320 321 return extensions; 322} 323 324export function pandocFormatWith(format: string, prepend: string, append: string) { 325 const split = splitPandocFormatString(format); 326 return `${split.format}${prepend}${split.options}${append}`; 327} 328 329export function splitPandocFormatString(format: string) { 330 // split out base format from options 331 let optionsPos = format.indexOf('-'); 332 if (optionsPos === -1) { 333 optionsPos = format.indexOf('+'); 334 } 335 const base = optionsPos === -1 ? format : format.substr(0, optionsPos); 336 const options = optionsPos === -1 ? '' : format.substr(optionsPos); 337 return { 338 format: base, 339 options, 340 }; 341} 342 343export function hasFencedCodeBlocks(pandocExtensions: PandocExtensions) { 344 return pandocExtensions.backtick_code_blocks || pandocExtensions.fenced_code_blocks; 345} 346 347// e.g. [My Heading] to link to ## My Heading 348export function hasShortcutHeadingLinks(pandocExtensions: PandocExtensions) { 349 return pandocExtensions.implicit_header_references && pandocExtensions.shortcut_reference_links; 350} 351 352function commonmarkExtensions(rawHTML = true) { 353 const extensions = [ 354 rawHTML ? '+raw_html' : '-raw_html', 355 '+all_symbols_escapable', 356 '+backtick_code_blocks', 357 '+fenced_code_blocks', 358 '+space_in_atx_header', 359 '+intraword_underscores', 360 '+lists_without_preceding_blankline', 361 '+shortcut_reference_links', 362 ]; 363 return extensions; 364} 365 366// https://github.com/jgm/pandoc/commit/0aed9dd589189a9bbe5cae99e0e024e2d4a92c36 367function commonmarkXExtensions() { 368 const extensions = [ 369 '+pipe_tables', 370 '+raw_html', 371 '+auto_identifiers', 372 '+strikeout', 373 '+task_lists', 374 '+emoji', 375 '+raw_tex', 376 '+smart', 377 '+tex_math_dollars', 378 '+superscript', 379 '+subscript', 380 '+definition_lists', 381 '+footnotes', 382 '+fancy_lists', 383 '+fenced_divs', 384 '+bracketed_spans', 385 '+raw_attribute', 386 '+implicit_header_references', 387 // '+attributes' (not yet) 388 ]; 389 return extensions; 390} 391 392function gfmExtensions() { 393 const extensions = [ 394 ...commonmarkExtensions(), 395 '+auto_identifiers', 396 '+autolink_bare_uris', 397 '+emoji', 398 '+gfm_auto_identifiers', 399 '+pipe_tables', 400 '+strikeout', 401 '+task_lists', 402 ]; 403 return extensions; 404} 405 406// https://gohugo.io/getting-started/configuration-markup/#goldmark 407// https://github.com/yuin/goldmark/#html-renderer-options 408function goldmarkExtensions(format: EditorFormat) { 409 const extensions = [ 410 // start with commonmark 411 ...commonmarkExtensions(false), 412 413 // adds most of gfm 414 '+pipe_tables', 415 '+strikeout', 416 '+autolink_bare_uris', 417 '+task_lists', 418 '+backtick_code_blocks', 419 420 // plus some extras 421 '+definition_lists', 422 '+footnotes', 423 '+smart', 424 425 // hugo preprocessor supports yaml metadata 426 '+yaml_metadata_block', 427 ]; 428 429 if (includeTexMathDollars(format)) { 430 extensions.push('+tex_math_dollars'); 431 } 432 433 return extensions; 434} 435 436// https://github.com/russross/blackfriday/tree/v2#extensions 437function blackfridayExtensions(format: EditorFormat) { 438 const extensions = [ 439 '+intraword_underscores', 440 '+pipe_tables', 441 '+backtick_code_blocks', 442 '+definition_lists', 443 '+footnotes', 444 '+autolink_bare_uris', 445 '+strikeout', 446 '+smart', 447 '+yaml_metadata_block', 448 ]; 449 450 if (includeTexMathDollars(format)) { 451 extensions.push('+tex_math_dollars'); 452 } 453 454 return extensions; 455} 456 457function includeTexMathDollars(format: EditorFormat) { 458 // hugo users often sort out some way to include math so we enable it for hugo 459 return format.docTypes.includes(kHugoDocType) || format.rmdExtensions.blogdownMathInCode; 460} 461