1#
2#
3#            Nim's Runtime Library
4#        (c) Copyright 2012 Andreas Rumpf
5#
6#    See the file "copying.txt", included in this
7#    distribution, for details about the copyright.
8#
9
10## This module implements an AST for the `reStructuredText`:idx: parser.
11
12import strutils, json
13
14type
15  RstNodeKind* = enum        ## the possible node kinds of an PRstNode
16    rnInner,                  # an inner node or a root
17    rnHeadline,               # a headline
18    rnOverline,               # an over- and underlined headline
19    rnMarkdownHeadline,       # a Markdown headline
20    rnTransition,             # a transition (the ------------- <hr> thingie)
21    rnParagraph,              # a paragraph
22    rnBulletList,             # a bullet list
23    rnBulletItem,             # a bullet item
24    rnEnumList,               # an enumerated list
25    rnEnumItem,               # an enumerated item
26    rnDefList,                # a definition list
27    rnDefItem,                # an item of a definition list consisting of ...
28    rnDefName,                # ... a name part ...
29    rnDefBody,                # ... and a body part ...
30    rnFieldList,              # a field list
31    rnField,                  # a field item
32    rnFieldName,              # consisting of a field name ...
33    rnFieldBody,              # ... and a field body
34    rnOptionList, rnOptionListItem, rnOptionGroup, rnOption, rnOptionString,
35    rnOptionArgument, rnDescription, rnLiteralBlock, rnQuotedLiteralBlock,
36    rnLineBlock,              # the | thingie
37    rnLineBlockItem,          # a son of rnLineBlock - one line inside it.
38                              # When `RstNode` lineIndent="\n" the line's empty
39    rnBlockQuote,             # text just indented
40    rnTable, rnGridTable, rnMarkdownTable, rnTableRow, rnTableHeaderCell, rnTableDataCell,
41    rnFootnote,               # a footnote
42    rnCitation,               # similar to footnote, so use rnFootnote instead
43    rnFootnoteGroup,          # footnote group - exists for a purely stylistic
44                              # reason: to display a few footnotes as 1 block
45    rnStandaloneHyperlink, rnHyperlink, rnRef, rnInternalRef, rnFootnoteRef,
46    rnDirective,              # a general directive
47    rnDirArg,                 # a directive argument (for some directives).
48                              # here are directives that are not rnDirective:
49    rnRaw, rnTitle, rnContents, rnImage, rnFigure, rnCodeBlock, rnAdmonition,
50    rnRawHtml, rnRawLatex,
51    rnContainer,              # ``container`` directive
52    rnIndex,                  # index directve:
53                              # .. index::
54                              #   key
55                              #     * `file#id <file#id>`_
56                              #     * `file#id <file#id>'_
57    rnSubstitutionDef,        # a definition of a substitution
58    # Inline markup:
59    rnInlineCode,             # interpreted text with code in a known language
60    rnCodeFragment,           # inline code for highlighting with the specified
61                              # class (which cannot be inferred from context)
62    rnUnknownRole,            # interpreted text with an unknown role
63    rnSub, rnSup, rnIdx,
64    rnEmphasis,               # "*"
65    rnStrongEmphasis,         # "**"
66    rnTripleEmphasis,         # "***"
67    rnInterpretedText,        # "`" an auxiliary role for parsing that will
68                              # be converted into other kinds like rnInlineCode
69    rnInlineLiteral,          # "``"
70    rnInlineTarget,           # "_`target`"
71    rnSubstitutionReferences, # "|"
72    rnSmiley,                 # some smiley
73    rnDefaultRole,            # .. default-role:: code
74    rnLeaf                    # a leaf; the node's text field contains the
75                              # leaf val
76
77  FileIndex* = distinct int32
78  TLineInfo* = object
79    line*: uint16
80    col*: int16
81    fileIndex*: FileIndex
82
83  PRstNode* = ref RstNode    ## an RST node
84  RstNodeSeq* = seq[PRstNode]
85  RstNode* {.acyclic, final.} = object ## AST node (result of RST parsing)
86    case kind*: RstNodeKind ## the node's kind
87    of rnLeaf, rnSmiley:
88      text*: string           ## string that is expected to be displayed
89    of rnEnumList:
90      labelFmt*: string       ## label format like "(1)"
91    of rnLineBlockItem:
92      lineIndent*: string     ## a few spaces or newline at the line beginning
93    of rnAdmonition:
94      adType*: string         ## admonition type: "note", "caution", etc. This
95                              ## text will set the style and also be displayed
96    of rnOverline, rnHeadline, rnMarkdownHeadline:
97      level*: int             ## level of headings starting from 1 (main
98                              ## chapter) to larger ones (minor sub-sections)
99                              ## level=0 means it's document title or subtitle
100    of rnFootnote, rnCitation, rnOptionListItem:
101      order*: int             ## footnote order (for auto-symbol footnotes and
102                              ## auto-numbered ones without a label)
103    of rnRef, rnSubstitutionReferences,
104        rnInterpretedText, rnField, rnInlineCode, rnCodeBlock, rnFootnoteRef:
105      info*: TLineInfo        ## To have line/column info for warnings at
106                              ## nodes that are post-processed after parsing
107    else:
108      discard
109    anchor*: string           ## anchor, internal link target
110                              ## (aka HTML id tag, aka Latex label/hypertarget)
111    sons*: RstNodeSeq        ## the node's sons
112
113proc `==`*(a, b: FileIndex): bool {.borrow.}
114
115proc len*(n: PRstNode): int =
116  result = len(n.sons)
117
118proc newRstNode*(kind: RstNodeKind, sons: seq[PRstNode] = @[],
119                 anchor = ""): PRstNode =
120  result = PRstNode(kind: kind, sons: sons, anchor: anchor)
121
122proc newRstNode*(kind: RstNodeKind, info: TLineInfo,
123                 sons: seq[PRstNode] = @[]): PRstNode =
124  result = PRstNode(kind: kind, sons: sons)
125  result.info = info
126
127proc newRstNode*(kind: RstNodeKind, s: string): PRstNode {.deprecated.} =
128  assert kind in {rnLeaf, rnSmiley}
129  result = newRstNode(kind)
130  result.text = s
131
132proc newRstLeaf*(s: string): PRstNode =
133  result = newRstNode(rnLeaf)
134  result.text = s
135
136proc lastSon*(n: PRstNode): PRstNode =
137  result = n.sons[len(n.sons)-1]
138
139proc add*(father, son: PRstNode) =
140  add(father.sons, son)
141
142proc add*(father: PRstNode; s: string) =
143  add(father.sons, newRstLeaf(s))
144
145proc addIfNotNil*(father, son: PRstNode) =
146  if son != nil: add(father, son)
147
148
149type
150  RenderContext {.pure.} = object
151    indent: int
152    verbatim: int
153
154proc renderRstToRst(d: var RenderContext, n: PRstNode,
155                    result: var string) {.gcsafe.}
156
157proc renderRstSons(d: var RenderContext, n: PRstNode, result: var string) =
158  for i in countup(0, len(n) - 1):
159    renderRstToRst(d, n.sons[i], result)
160
161proc renderRstToRst(d: var RenderContext, n: PRstNode, result: var string) =
162  # this is needed for the index generation; it may also be useful for
163  # debugging, but most code is already debugged...
164  const
165    lvlToChar: array[0..8, char] = ['!', '=', '-', '~', '`', '<', '*', '|', '+']
166  if n == nil: return
167  var ind = spaces(d.indent)
168  case n.kind
169  of rnInner:
170    renderRstSons(d, n, result)
171  of rnHeadline:
172    result.add("\n")
173    result.add(ind)
174
175    let oldLen = result.len
176    renderRstSons(d, n, result)
177    let headlineLen = result.len - oldLen
178
179    result.add("\n")
180    result.add(ind)
181    result.add repeat(lvlToChar[n.level], headlineLen)
182  of rnOverline:
183    result.add("\n")
184    result.add(ind)
185
186    var headline = ""
187    renderRstSons(d, n, headline)
188
189    let lvl = repeat(lvlToChar[n.level], headline.len - d.indent)
190    result.add(lvl)
191    result.add("\n")
192    result.add(headline)
193
194    result.add("\n")
195    result.add(ind)
196    result.add(lvl)
197  of rnTransition:
198    result.add("\n\n")
199    result.add(ind)
200    result.add repeat('-', 78-d.indent)
201    result.add("\n\n")
202  of rnParagraph:
203    result.add("\n\n")
204    result.add(ind)
205    renderRstSons(d, n, result)
206  of rnBulletItem:
207    inc(d.indent, 2)
208    var tmp = ""
209    renderRstSons(d, n, tmp)
210    if tmp.len > 0:
211      result.add("\n")
212      result.add(ind)
213      result.add("* ")
214      result.add(tmp)
215    dec(d.indent, 2)
216  of rnEnumItem:
217    inc(d.indent, 4)
218    var tmp = ""
219    renderRstSons(d, n, tmp)
220    if tmp.len > 0:
221      result.add("\n")
222      result.add(ind)
223      result.add("(#) ")
224      result.add(tmp)
225    dec(d.indent, 4)
226  of rnOptionList, rnFieldList, rnDefList, rnDefItem, rnLineBlock, rnFieldName,
227     rnFieldBody, rnStandaloneHyperlink, rnBulletList, rnEnumList:
228    renderRstSons(d, n, result)
229  of rnDefName:
230    result.add("\n\n")
231    result.add(ind)
232    renderRstSons(d, n, result)
233  of rnDefBody:
234    inc(d.indent, 2)
235    if n.sons[0].kind != rnBulletList:
236      result.add("\n")
237      result.add(ind)
238      result.add("  ")
239    renderRstSons(d, n, result)
240    dec(d.indent, 2)
241  of rnField:
242    var tmp = ""
243    renderRstToRst(d, n.sons[0], tmp)
244
245    var L = max(tmp.len + 3, 30)
246    inc(d.indent, L)
247
248    result.add "\n"
249    result.add ind
250    result.add ':'
251    result.add tmp
252    result.add ':'
253    result.add spaces(L - tmp.len - 2)
254    renderRstToRst(d, n.sons[1], result)
255
256    dec(d.indent, L)
257  of rnLineBlockItem:
258    result.add("\n")
259    result.add(ind)
260    result.add("| ")
261    renderRstSons(d, n, result)
262  of rnBlockQuote:
263    inc(d.indent, 2)
264    renderRstSons(d, n, result)
265    dec(d.indent, 2)
266  of rnRef:
267    result.add("`")
268    renderRstSons(d, n, result)
269    result.add("`_")
270  of rnHyperlink:
271    result.add('`')
272    renderRstToRst(d, n.sons[0], result)
273    result.add(" <")
274    renderRstToRst(d, n.sons[1], result)
275    result.add(">`_")
276  of rnUnknownRole:
277    result.add('`')
278    renderRstToRst(d, n.sons[0],result)
279    result.add("`:")
280    renderRstToRst(d, n.sons[1],result)
281    result.add(':')
282  of rnSub:
283    result.add('`')
284    renderRstSons(d, n, result)
285    result.add("`:sub:")
286  of rnSup:
287    result.add('`')
288    renderRstSons(d, n, result)
289    result.add("`:sup:")
290  of rnIdx:
291    result.add('`')
292    renderRstSons(d, n, result)
293    result.add("`:idx:")
294  of rnEmphasis:
295    result.add("*")
296    renderRstSons(d, n, result)
297    result.add("*")
298  of rnStrongEmphasis:
299    result.add("**")
300    renderRstSons(d, n, result)
301    result.add("**")
302  of rnTripleEmphasis:
303    result.add("***")
304    renderRstSons(d, n, result)
305    result.add("***")
306  of rnInterpretedText:
307    result.add('`')
308    renderRstSons(d, n, result)
309    result.add('`')
310  of rnInlineLiteral:
311    inc(d.verbatim)
312    result.add("``")
313    renderRstSons(d, n, result)
314    result.add("``")
315    dec(d.verbatim)
316  of rnSmiley:
317    result.add(n.text)
318  of rnLeaf:
319    if d.verbatim == 0 and n.text == "\\":
320      result.add("\\\\") # XXX: escape more special characters!
321    else:
322      result.add(n.text)
323  of rnIndex:
324    result.add("\n\n")
325    result.add(ind)
326    result.add(".. index::\n")
327
328    inc(d.indent, 3)
329    if n.sons[2] != nil: renderRstSons(d, n.sons[2], result)
330    dec(d.indent, 3)
331  of rnContents:
332    result.add("\n\n")
333    result.add(ind)
334    result.add(".. contents::")
335  else:
336    result.add("Error: cannot render: " & $n.kind)
337
338proc renderRstToRst*(n: PRstNode, result: var string) =
339  ## renders `n` into its string representation and appends to `result`.
340  var d: RenderContext
341  renderRstToRst(d, n, result)
342
343proc renderRstToJsonNode(node: PRstNode): JsonNode =
344  result =
345    %[
346      (key: "kind", val: %($node.kind)),
347      (key: "level", val: %BiggestInt(node.level))
348     ]
349  if node.kind in {rnLeaf, rnSmiley} and node.text.len > 0:
350    result.add("text", %node.text)
351  if len(node.sons) > 0:
352    var accm = newSeq[JsonNode](len(node.sons))
353    for i, son in node.sons:
354      accm[i] = renderRstToJsonNode(son)
355    result.add("sons", %accm)
356
357proc renderRstToJson*(node: PRstNode): string =
358  ## Writes the given RST node as JSON that is in the form
359  ## ::
360  ##   {
361  ##     "kind":string node.kind,
362  ##     "text":optional string node.text,
363  ##     "level":optional int node.level,
364  ##     "sons":optional node array
365  ##   }
366  renderRstToJsonNode(node).pretty
367
368proc renderRstToText*(node: PRstNode): string =
369  ## minimal text representation of markup node
370  const code = {rnCodeFragment, rnInterpretedText, rnInlineLiteral, rnInlineCode}
371  if node == nil:
372    return ""
373  case node.kind
374  of rnLeaf, rnSmiley:
375    result.add node.text
376  else:
377    if node.kind in code: result.add "`"
378    for i in 0 ..< node.sons.len:
379      if node.kind in {rnInlineCode, rnCodeBlock} and i == 0:
380        continue  # omit language specifier
381      result.add renderRstToText(node.sons[i])
382    if node.kind in code: result.add "`"
383
384proc treeRepr*(node: PRstNode, indent=0): string =
385  ## Writes the parsed RST `node` into an AST tree with compact string
386  ## representation in the format (one line per every sub-node):
387  ## ``indent - kind - [text|level|order|adType] - anchor (if non-zero)``
388  ## (suitable for debugging of RST parsing).
389  if node == nil:
390    result.add " ".repeat(indent) & "[nil]\n"
391    return
392  result.add " ".repeat(indent) & $node.kind
393  case node.kind
394  of rnLeaf, rnSmiley:
395    result.add (if node.text == "": "" else: "  '" & node.text & "'")
396  of rnEnumList:
397    result.add "  labelFmt=" & node.labelFmt
398  of rnLineBlockItem:
399    var txt: string
400    if node.lineIndent == "\n": txt = "  (blank line)"
401    else: txt = "  lineIndent=" & $node.lineIndent.len
402    result.add txt
403  of rnAdmonition:
404    result.add "  adType=" & node.adType
405  of rnHeadline, rnOverline, rnMarkdownHeadline:
406    result.add "  level=" & $node.level
407  of rnFootnote, rnCitation, rnOptionListItem:
408    result.add (if node.order == 0:   "" else: "  order=" & $node.order)
409  else:
410    discard
411  result.add (if node.anchor == "": "" else: "  anchor='" & node.anchor & "'")
412  result.add "\n"
413  for son in node.sons:
414    result.add treeRepr(son, indent=indent+2)
415