1# 2# 3# Nim's Runtime Library 4# (c) Copyright 2012 Andreas Rumpf 5# 6# See the file "copying.txt", included in this 7# distribution, for details about the copyright. 8# 9 10## This module implements an AST for the `reStructuredText`:idx: parser. 11 12import strutils, json 13 14type 15 RstNodeKind* = enum ## the possible node kinds of an PRstNode 16 rnInner, # an inner node or a root 17 rnHeadline, # a headline 18 rnOverline, # an over- and underlined headline 19 rnMarkdownHeadline, # a Markdown headline 20 rnTransition, # a transition (the ------------- <hr> thingie) 21 rnParagraph, # a paragraph 22 rnBulletList, # a bullet list 23 rnBulletItem, # a bullet item 24 rnEnumList, # an enumerated list 25 rnEnumItem, # an enumerated item 26 rnDefList, # a definition list 27 rnDefItem, # an item of a definition list consisting of ... 28 rnDefName, # ... a name part ... 29 rnDefBody, # ... and a body part ... 30 rnFieldList, # a field list 31 rnField, # a field item 32 rnFieldName, # consisting of a field name ... 33 rnFieldBody, # ... and a field body 34 rnOptionList, rnOptionListItem, rnOptionGroup, rnOption, rnOptionString, 35 rnOptionArgument, rnDescription, rnLiteralBlock, rnQuotedLiteralBlock, 36 rnLineBlock, # the | thingie 37 rnLineBlockItem, # a son of rnLineBlock - one line inside it. 38 # When `RstNode` lineIndent="\n" the line's empty 39 rnBlockQuote, # text just indented 40 rnTable, rnGridTable, rnMarkdownTable, rnTableRow, rnTableHeaderCell, rnTableDataCell, 41 rnFootnote, # a footnote 42 rnCitation, # similar to footnote, so use rnFootnote instead 43 rnFootnoteGroup, # footnote group - exists for a purely stylistic 44 # reason: to display a few footnotes as 1 block 45 rnStandaloneHyperlink, rnHyperlink, rnRef, rnInternalRef, rnFootnoteRef, 46 rnDirective, # a general directive 47 rnDirArg, # a directive argument (for some directives). 48 # here are directives that are not rnDirective: 49 rnRaw, rnTitle, rnContents, rnImage, rnFigure, rnCodeBlock, rnAdmonition, 50 rnRawHtml, rnRawLatex, 51 rnContainer, # ``container`` directive 52 rnIndex, # index directve: 53 # .. index:: 54 # key 55 # * `file#id <file#id>`_ 56 # * `file#id <file#id>'_ 57 rnSubstitutionDef, # a definition of a substitution 58 # Inline markup: 59 rnInlineCode, # interpreted text with code in a known language 60 rnCodeFragment, # inline code for highlighting with the specified 61 # class (which cannot be inferred from context) 62 rnUnknownRole, # interpreted text with an unknown role 63 rnSub, rnSup, rnIdx, 64 rnEmphasis, # "*" 65 rnStrongEmphasis, # "**" 66 rnTripleEmphasis, # "***" 67 rnInterpretedText, # "`" an auxiliary role for parsing that will 68 # be converted into other kinds like rnInlineCode 69 rnInlineLiteral, # "``" 70 rnInlineTarget, # "_`target`" 71 rnSubstitutionReferences, # "|" 72 rnSmiley, # some smiley 73 rnDefaultRole, # .. default-role:: code 74 rnLeaf # a leaf; the node's text field contains the 75 # leaf val 76 77 FileIndex* = distinct int32 78 TLineInfo* = object 79 line*: uint16 80 col*: int16 81 fileIndex*: FileIndex 82 83 PRstNode* = ref RstNode ## an RST node 84 RstNodeSeq* = seq[PRstNode] 85 RstNode* {.acyclic, final.} = object ## AST node (result of RST parsing) 86 case kind*: RstNodeKind ## the node's kind 87 of rnLeaf, rnSmiley: 88 text*: string ## string that is expected to be displayed 89 of rnEnumList: 90 labelFmt*: string ## label format like "(1)" 91 of rnLineBlockItem: 92 lineIndent*: string ## a few spaces or newline at the line beginning 93 of rnAdmonition: 94 adType*: string ## admonition type: "note", "caution", etc. This 95 ## text will set the style and also be displayed 96 of rnOverline, rnHeadline, rnMarkdownHeadline: 97 level*: int ## level of headings starting from 1 (main 98 ## chapter) to larger ones (minor sub-sections) 99 ## level=0 means it's document title or subtitle 100 of rnFootnote, rnCitation, rnOptionListItem: 101 order*: int ## footnote order (for auto-symbol footnotes and 102 ## auto-numbered ones without a label) 103 of rnRef, rnSubstitutionReferences, 104 rnInterpretedText, rnField, rnInlineCode, rnCodeBlock, rnFootnoteRef: 105 info*: TLineInfo ## To have line/column info for warnings at 106 ## nodes that are post-processed after parsing 107 else: 108 discard 109 anchor*: string ## anchor, internal link target 110 ## (aka HTML id tag, aka Latex label/hypertarget) 111 sons*: RstNodeSeq ## the node's sons 112 113proc `==`*(a, b: FileIndex): bool {.borrow.} 114 115proc len*(n: PRstNode): int = 116 result = len(n.sons) 117 118proc newRstNode*(kind: RstNodeKind, sons: seq[PRstNode] = @[], 119 anchor = ""): PRstNode = 120 result = PRstNode(kind: kind, sons: sons, anchor: anchor) 121 122proc newRstNode*(kind: RstNodeKind, info: TLineInfo, 123 sons: seq[PRstNode] = @[]): PRstNode = 124 result = PRstNode(kind: kind, sons: sons) 125 result.info = info 126 127proc newRstNode*(kind: RstNodeKind, s: string): PRstNode {.deprecated.} = 128 assert kind in {rnLeaf, rnSmiley} 129 result = newRstNode(kind) 130 result.text = s 131 132proc newRstLeaf*(s: string): PRstNode = 133 result = newRstNode(rnLeaf) 134 result.text = s 135 136proc lastSon*(n: PRstNode): PRstNode = 137 result = n.sons[len(n.sons)-1] 138 139proc add*(father, son: PRstNode) = 140 add(father.sons, son) 141 142proc add*(father: PRstNode; s: string) = 143 add(father.sons, newRstLeaf(s)) 144 145proc addIfNotNil*(father, son: PRstNode) = 146 if son != nil: add(father, son) 147 148 149type 150 RenderContext {.pure.} = object 151 indent: int 152 verbatim: int 153 154proc renderRstToRst(d: var RenderContext, n: PRstNode, 155 result: var string) {.gcsafe.} 156 157proc renderRstSons(d: var RenderContext, n: PRstNode, result: var string) = 158 for i in countup(0, len(n) - 1): 159 renderRstToRst(d, n.sons[i], result) 160 161proc renderRstToRst(d: var RenderContext, n: PRstNode, result: var string) = 162 # this is needed for the index generation; it may also be useful for 163 # debugging, but most code is already debugged... 164 const 165 lvlToChar: array[0..8, char] = ['!', '=', '-', '~', '`', '<', '*', '|', '+'] 166 if n == nil: return 167 var ind = spaces(d.indent) 168 case n.kind 169 of rnInner: 170 renderRstSons(d, n, result) 171 of rnHeadline: 172 result.add("\n") 173 result.add(ind) 174 175 let oldLen = result.len 176 renderRstSons(d, n, result) 177 let headlineLen = result.len - oldLen 178 179 result.add("\n") 180 result.add(ind) 181 result.add repeat(lvlToChar[n.level], headlineLen) 182 of rnOverline: 183 result.add("\n") 184 result.add(ind) 185 186 var headline = "" 187 renderRstSons(d, n, headline) 188 189 let lvl = repeat(lvlToChar[n.level], headline.len - d.indent) 190 result.add(lvl) 191 result.add("\n") 192 result.add(headline) 193 194 result.add("\n") 195 result.add(ind) 196 result.add(lvl) 197 of rnTransition: 198 result.add("\n\n") 199 result.add(ind) 200 result.add repeat('-', 78-d.indent) 201 result.add("\n\n") 202 of rnParagraph: 203 result.add("\n\n") 204 result.add(ind) 205 renderRstSons(d, n, result) 206 of rnBulletItem: 207 inc(d.indent, 2) 208 var tmp = "" 209 renderRstSons(d, n, tmp) 210 if tmp.len > 0: 211 result.add("\n") 212 result.add(ind) 213 result.add("* ") 214 result.add(tmp) 215 dec(d.indent, 2) 216 of rnEnumItem: 217 inc(d.indent, 4) 218 var tmp = "" 219 renderRstSons(d, n, tmp) 220 if tmp.len > 0: 221 result.add("\n") 222 result.add(ind) 223 result.add("(#) ") 224 result.add(tmp) 225 dec(d.indent, 4) 226 of rnOptionList, rnFieldList, rnDefList, rnDefItem, rnLineBlock, rnFieldName, 227 rnFieldBody, rnStandaloneHyperlink, rnBulletList, rnEnumList: 228 renderRstSons(d, n, result) 229 of rnDefName: 230 result.add("\n\n") 231 result.add(ind) 232 renderRstSons(d, n, result) 233 of rnDefBody: 234 inc(d.indent, 2) 235 if n.sons[0].kind != rnBulletList: 236 result.add("\n") 237 result.add(ind) 238 result.add(" ") 239 renderRstSons(d, n, result) 240 dec(d.indent, 2) 241 of rnField: 242 var tmp = "" 243 renderRstToRst(d, n.sons[0], tmp) 244 245 var L = max(tmp.len + 3, 30) 246 inc(d.indent, L) 247 248 result.add "\n" 249 result.add ind 250 result.add ':' 251 result.add tmp 252 result.add ':' 253 result.add spaces(L - tmp.len - 2) 254 renderRstToRst(d, n.sons[1], result) 255 256 dec(d.indent, L) 257 of rnLineBlockItem: 258 result.add("\n") 259 result.add(ind) 260 result.add("| ") 261 renderRstSons(d, n, result) 262 of rnBlockQuote: 263 inc(d.indent, 2) 264 renderRstSons(d, n, result) 265 dec(d.indent, 2) 266 of rnRef: 267 result.add("`") 268 renderRstSons(d, n, result) 269 result.add("`_") 270 of rnHyperlink: 271 result.add('`') 272 renderRstToRst(d, n.sons[0], result) 273 result.add(" <") 274 renderRstToRst(d, n.sons[1], result) 275 result.add(">`_") 276 of rnUnknownRole: 277 result.add('`') 278 renderRstToRst(d, n.sons[0],result) 279 result.add("`:") 280 renderRstToRst(d, n.sons[1],result) 281 result.add(':') 282 of rnSub: 283 result.add('`') 284 renderRstSons(d, n, result) 285 result.add("`:sub:") 286 of rnSup: 287 result.add('`') 288 renderRstSons(d, n, result) 289 result.add("`:sup:") 290 of rnIdx: 291 result.add('`') 292 renderRstSons(d, n, result) 293 result.add("`:idx:") 294 of rnEmphasis: 295 result.add("*") 296 renderRstSons(d, n, result) 297 result.add("*") 298 of rnStrongEmphasis: 299 result.add("**") 300 renderRstSons(d, n, result) 301 result.add("**") 302 of rnTripleEmphasis: 303 result.add("***") 304 renderRstSons(d, n, result) 305 result.add("***") 306 of rnInterpretedText: 307 result.add('`') 308 renderRstSons(d, n, result) 309 result.add('`') 310 of rnInlineLiteral: 311 inc(d.verbatim) 312 result.add("``") 313 renderRstSons(d, n, result) 314 result.add("``") 315 dec(d.verbatim) 316 of rnSmiley: 317 result.add(n.text) 318 of rnLeaf: 319 if d.verbatim == 0 and n.text == "\\": 320 result.add("\\\\") # XXX: escape more special characters! 321 else: 322 result.add(n.text) 323 of rnIndex: 324 result.add("\n\n") 325 result.add(ind) 326 result.add(".. index::\n") 327 328 inc(d.indent, 3) 329 if n.sons[2] != nil: renderRstSons(d, n.sons[2], result) 330 dec(d.indent, 3) 331 of rnContents: 332 result.add("\n\n") 333 result.add(ind) 334 result.add(".. contents::") 335 else: 336 result.add("Error: cannot render: " & $n.kind) 337 338proc renderRstToRst*(n: PRstNode, result: var string) = 339 ## renders `n` into its string representation and appends to `result`. 340 var d: RenderContext 341 renderRstToRst(d, n, result) 342 343proc renderRstToJsonNode(node: PRstNode): JsonNode = 344 result = 345 %[ 346 (key: "kind", val: %($node.kind)), 347 (key: "level", val: %BiggestInt(node.level)) 348 ] 349 if node.kind in {rnLeaf, rnSmiley} and node.text.len > 0: 350 result.add("text", %node.text) 351 if len(node.sons) > 0: 352 var accm = newSeq[JsonNode](len(node.sons)) 353 for i, son in node.sons: 354 accm[i] = renderRstToJsonNode(son) 355 result.add("sons", %accm) 356 357proc renderRstToJson*(node: PRstNode): string = 358 ## Writes the given RST node as JSON that is in the form 359 ## :: 360 ## { 361 ## "kind":string node.kind, 362 ## "text":optional string node.text, 363 ## "level":optional int node.level, 364 ## "sons":optional node array 365 ## } 366 renderRstToJsonNode(node).pretty 367 368proc renderRstToText*(node: PRstNode): string = 369 ## minimal text representation of markup node 370 const code = {rnCodeFragment, rnInterpretedText, rnInlineLiteral, rnInlineCode} 371 if node == nil: 372 return "" 373 case node.kind 374 of rnLeaf, rnSmiley: 375 result.add node.text 376 else: 377 if node.kind in code: result.add "`" 378 for i in 0 ..< node.sons.len: 379 if node.kind in {rnInlineCode, rnCodeBlock} and i == 0: 380 continue # omit language specifier 381 result.add renderRstToText(node.sons[i]) 382 if node.kind in code: result.add "`" 383 384proc treeRepr*(node: PRstNode, indent=0): string = 385 ## Writes the parsed RST `node` into an AST tree with compact string 386 ## representation in the format (one line per every sub-node): 387 ## ``indent - kind - [text|level|order|adType] - anchor (if non-zero)`` 388 ## (suitable for debugging of RST parsing). 389 if node == nil: 390 result.add " ".repeat(indent) & "[nil]\n" 391 return 392 result.add " ".repeat(indent) & $node.kind 393 case node.kind 394 of rnLeaf, rnSmiley: 395 result.add (if node.text == "": "" else: " '" & node.text & "'") 396 of rnEnumList: 397 result.add " labelFmt=" & node.labelFmt 398 of rnLineBlockItem: 399 var txt: string 400 if node.lineIndent == "\n": txt = " (blank line)" 401 else: txt = " lineIndent=" & $node.lineIndent.len 402 result.add txt 403 of rnAdmonition: 404 result.add " adType=" & node.adType 405 of rnHeadline, rnOverline, rnMarkdownHeadline: 406 result.add " level=" & $node.level 407 of rnFootnote, rnCitation, rnOptionListItem: 408 result.add (if node.order == 0: "" else: " order=" & $node.order) 409 else: 410 discard 411 result.add (if node.anchor == "": "" else: " anchor='" & node.anchor & "'") 412 result.add "\n" 413 for son in node.sons: 414 result.add treeRepr(son, indent=indent+2) 415