1#
2#
3#            Nim's Runtime Library
4#        (c) Copyright 2015 Andreas Rumpf, Dominik Picheta
5#
6#    See the file "copying.txt", included in this
7#    distribution, for details about the copyright.
8#
9
10## **Note:** Import ``nimsuggest/sexp`` to use this module
11
12import
13  hashes, strutils, lexbase, streams, unicode, macros
14
15import std/private/decode_helpers
16
17type
18  SexpEventKind* = enum  ## enumeration of all events that may occur when parsing
19    sexpError,           ## an error occurred during parsing
20    sexpEof,             ## end of file reached
21    sexpString,          ## a string literal
22    sexpSymbol,          ## a symbol
23    sexpInt,             ## an integer literal
24    sexpFloat,           ## a float literal
25    sexpNil,             ## the value ``nil``
26    sexpDot,             ## the dot to separate car/cdr
27    sexpListStart,       ## start of a list: the ``(`` token
28    sexpListEnd,         ## end of a list: the ``)`` token
29
30  TTokKind = enum        # must be synchronized with SexpEventKind!
31    tkError,
32    tkEof,
33    tkString,
34    tkSymbol,
35    tkInt,
36    tkFloat,
37    tkNil,
38    tkDot,
39    tkParensLe,
40    tkParensRi
41    tkSpace
42
43  SexpError* = enum        ## enumeration that lists all errors that can occur
44    errNone,               ## no error
45    errInvalidToken,       ## invalid token
46    errParensRiExpected,    ## ``)`` expected
47    errQuoteExpected,      ## ``"`` expected
48    errEofExpected,        ## EOF expected
49
50  SexpParser* = object of BaseLexer ## the parser object.
51    a: string
52    tok: TTokKind
53    kind: SexpEventKind
54    err: SexpError
55
56const
57  errorMessages: array[SexpError, string] = [
58    "no error",
59    "invalid token",
60    "')' expected",
61    "'\"' or \"'\" expected",
62    "EOF expected",
63  ]
64  tokToStr: array[TTokKind, string] = [
65    "invalid token",
66    "EOF",
67    "string literal",
68    "symbol",
69    "int literal",
70    "float literal",
71    "nil",
72    ".",
73    "(", ")", "space"
74  ]
75
76proc close*(my: var SexpParser) {.inline.} =
77  ## closes the parser `my` and its associated input stream.
78  lexbase.close(my)
79
80proc str*(my: SexpParser): string {.inline.} =
81  ## returns the character data for the events: ``sexpInt``, ``sexpFloat``,
82  ## ``sexpString``
83  assert(my.kind in {sexpInt, sexpFloat, sexpString})
84  result = my.a
85
86proc getInt*(my: SexpParser): BiggestInt {.inline.} =
87  ## returns the number for the event: ``sexpInt``
88  assert(my.kind == sexpInt)
89  result = parseBiggestInt(my.a)
90
91proc getFloat*(my: SexpParser): float {.inline.} =
92  ## returns the number for the event: ``sexpFloat``
93  assert(my.kind == sexpFloat)
94  result = parseFloat(my.a)
95
96proc kind*(my: SexpParser): SexpEventKind {.inline.} =
97  ## returns the current event type for the SEXP parser
98  result = my.kind
99
100proc getColumn*(my: SexpParser): int {.inline.} =
101  ## get the current column the parser has arrived at.
102  result = getColNumber(my, my.bufpos)
103
104proc getLine*(my: SexpParser): int {.inline.} =
105  ## get the current line the parser has arrived at.
106  result = my.lineNumber
107
108proc errorMsg*(my: SexpParser): string =
109  ## returns a helpful error message for the event ``sexpError``
110  assert(my.kind == sexpError)
111  result = "($1, $2) Error: $3" % [$getLine(my), $getColumn(my), errorMessages[my.err]]
112
113proc errorMsgExpected*(my: SexpParser, e: string): string =
114  ## returns an error message "`e` expected" in the same format as the
115  ## other error messages
116  result = "($1, $2) Error: $3" % [$getLine(my), $getColumn(my), e & " expected"]
117
118proc parseString(my: var SexpParser): TTokKind =
119  result = tkString
120  var pos = my.bufpos + 1
121  while true:
122    case my.buf[pos]
123    of '\0':
124      my.err = errQuoteExpected
125      result = tkError
126      break
127    of '"':
128      inc(pos)
129      break
130    of '\\':
131      case my.buf[pos+1]
132      of '\\', '"', '\'', '/':
133        add(my.a, my.buf[pos+1])
134        inc(pos, 2)
135      of 'b':
136        add(my.a, '\b')
137        inc(pos, 2)
138      of 'f':
139        add(my.a, '\f')
140        inc(pos, 2)
141      of 'n':
142        add(my.a, '\L')
143        inc(pos, 2)
144      of 'r':
145        add(my.a, '\C')
146        inc(pos, 2)
147      of 't':
148        add(my.a, '\t')
149        inc(pos, 2)
150      of 'u':
151        inc(pos, 2)
152        var r: int
153        if handleHexChar(my.buf[pos], r): inc(pos)
154        if handleHexChar(my.buf[pos], r): inc(pos)
155        if handleHexChar(my.buf[pos], r): inc(pos)
156        if handleHexChar(my.buf[pos], r): inc(pos)
157        add(my.a, toUTF8(Rune(r)))
158      else:
159        # don't bother with the error
160        add(my.a, my.buf[pos])
161        inc(pos)
162    of '\c':
163      pos = lexbase.handleCR(my, pos)
164      add(my.a, '\c')
165    of '\L':
166      pos = lexbase.handleLF(my, pos)
167      add(my.a, '\L')
168    else:
169      add(my.a, my.buf[pos])
170      inc(pos)
171  my.bufpos = pos # store back
172
173proc parseNumber(my: var SexpParser) =
174  var pos = my.bufpos
175  if my.buf[pos] == '-':
176    add(my.a, '-')
177    inc(pos)
178  if my.buf[pos] == '.':
179    add(my.a, "0.")
180    inc(pos)
181  else:
182    while my.buf[pos] in Digits:
183      add(my.a, my.buf[pos])
184      inc(pos)
185    if my.buf[pos] == '.':
186      add(my.a, '.')
187      inc(pos)
188  # digits after the dot:
189  while my.buf[pos] in Digits:
190    add(my.a, my.buf[pos])
191    inc(pos)
192  if my.buf[pos] in {'E', 'e'}:
193    add(my.a, my.buf[pos])
194    inc(pos)
195    if my.buf[pos] in {'+', '-'}:
196      add(my.a, my.buf[pos])
197      inc(pos)
198    while my.buf[pos] in Digits:
199      add(my.a, my.buf[pos])
200      inc(pos)
201  my.bufpos = pos
202
203proc parseSymbol(my: var SexpParser) =
204  var pos = my.bufpos
205  if my.buf[pos] in IdentStartChars:
206    while my.buf[pos] in IdentChars:
207      add(my.a, my.buf[pos])
208      inc(pos)
209  my.bufpos = pos
210
211proc getTok(my: var SexpParser): TTokKind =
212  setLen(my.a, 0)
213  case my.buf[my.bufpos]
214  of '-', '0'..'9': # numbers that start with a . are not parsed
215                    # correctly.
216    parseNumber(my)
217    if {'.', 'e', 'E'} in my.a:
218      result = tkFloat
219    else:
220      result = tkInt
221  of '"': #" # gotta fix nim-mode
222    result = parseString(my)
223  of '(':
224    inc(my.bufpos)
225    result = tkParensLe
226  of ')':
227    inc(my.bufpos)
228    result = tkParensRi
229  of '\0':
230    result = tkEof
231  of 'a'..'z', 'A'..'Z', '_':
232    parseSymbol(my)
233    if my.a == "nil":
234      result = tkNil
235    else:
236      result = tkSymbol
237  of ' ':
238    result = tkSpace
239    inc(my.bufpos)
240  of '.':
241    result = tkDot
242    inc(my.bufpos)
243  else:
244    inc(my.bufpos)
245    result = tkError
246  my.tok = result
247
248# ------------- higher level interface ---------------------------------------
249
250type
251  SexpNodeKind* = enum ## possible SEXP node types
252    SNil,
253    SInt,
254    SFloat,
255    SString,
256    SSymbol,
257    SList,
258    SCons
259
260  SexpNode* = ref SexpNodeObj ## SEXP node
261  SexpNodeObj* {.acyclic.} = object
262    case kind*: SexpNodeKind
263    of SString:
264      str*: string
265    of SSymbol:
266      symbol*: string
267    of SInt:
268      num*: BiggestInt
269    of SFloat:
270      fnum*: float
271    of SList:
272      elems*: seq[SexpNode]
273    of SCons:
274      car: SexpNode
275      cdr: SexpNode
276    of SNil:
277      discard
278
279  Cons = tuple[car: SexpNode, cdr: SexpNode]
280
281  SexpParsingError* = object of ValueError ## is raised for a SEXP error
282
283proc raiseParseErr*(p: SexpParser, msg: string) {.noinline, noreturn.} =
284  ## raises an `ESexpParsingError` exception.
285  raise newException(SexpParsingError, errorMsgExpected(p, msg))
286
287proc newSString*(s: string): SexpNode =
288  ## Creates a new `SString SexpNode`.
289  result = SexpNode(kind: SString, str: s)
290
291proc newSStringMove(s: string): SexpNode =
292  result = SexpNode(kind: SString)
293  shallowCopy(result.str, s)
294
295proc newSInt*(n: BiggestInt): SexpNode =
296  ## Creates a new `SInt SexpNode`.
297  result = SexpNode(kind: SInt, num: n)
298
299proc newSFloat*(n: float): SexpNode =
300  ## Creates a new `SFloat SexpNode`.
301  result = SexpNode(kind: SFloat, fnum: n)
302
303proc newSNil*(): SexpNode =
304  ## Creates a new `SNil SexpNode`.
305  result = SexpNode(kind: SNil)
306
307proc newSCons*(car, cdr: SexpNode): SexpNode =
308  ## Creates a new `SCons SexpNode`
309  result = SexpNode(kind: SCons, car: car, cdr: cdr)
310
311proc newSList*(): SexpNode =
312  ## Creates a new `SList SexpNode`
313  result = SexpNode(kind: SList, elems: @[])
314
315proc newSSymbol*(s: string): SexpNode =
316  result = SexpNode(kind: SSymbol, symbol: s)
317
318proc newSSymbolMove(s: string): SexpNode =
319  result = SexpNode(kind: SSymbol)
320  shallowCopy(result.symbol, s)
321
322proc getStr*(n: SexpNode, default: string = ""): string =
323  ## Retrieves the string value of a `SString SexpNode`.
324  ##
325  ## Returns ``default`` if ``n`` is not a ``SString``.
326  if n.kind != SString: return default
327  else: return n.str
328
329proc getNum*(n: SexpNode, default: BiggestInt = 0): BiggestInt =
330  ## Retrieves the int value of a `SInt SexpNode`.
331  ##
332  ## Returns ``default`` if ``n`` is not a ``SInt``.
333  if n.kind != SInt: return default
334  else: return n.num
335
336proc getFNum*(n: SexpNode, default: float = 0.0): float =
337  ## Retrieves the float value of a `SFloat SexpNode`.
338  ##
339  ## Returns ``default`` if ``n`` is not a ``SFloat``.
340  if n.kind != SFloat: return default
341  else: return n.fnum
342
343proc getSymbol*(n: SexpNode, default: string = ""): string =
344  ## Retrieves the int value of a `SList SexpNode`.
345  ##
346  ## Returns ``default`` if ``n`` is not a ``SList``.
347  if n.kind != SSymbol: return default
348  else: return n.symbol
349
350proc getElems*(n: SexpNode, default: seq[SexpNode] = @[]): seq[SexpNode] =
351  ## Retrieves the int value of a `SList SexpNode`.
352  ##
353  ## Returns ``default`` if ``n`` is not a ``SList``.
354  if n.kind == SNil: return @[]
355  elif n.kind != SList: return default
356  else: return n.elems
357
358proc getCons*(n: SexpNode, defaults: Cons = (newSNil(), newSNil())): Cons =
359  ## Retrieves the cons value of a `SList SexpNode`.
360  ##
361  ## Returns ``default`` if ``n`` is not a ``SList``.
362  if n.kind == SCons: return (n.car, n.cdr)
363  elif n.kind == SList: return (n.elems[0], n.elems[1])
364  else: return defaults
365
366proc sexp*(s: string): SexpNode =
367  ## Generic constructor for SEXP data. Creates a new `SString SexpNode`.
368  result = SexpNode(kind: SString, str: s)
369
370proc sexp*(n: BiggestInt): SexpNode =
371  ## Generic constructor for SEXP data. Creates a new `SInt SexpNode`.
372  result = SexpNode(kind: SInt, num: n)
373
374proc sexp*(n: float): SexpNode =
375  ## Generic constructor for SEXP data. Creates a new `SFloat SexpNode`.
376  result = SexpNode(kind: SFloat, fnum: n)
377
378proc sexp*(b: bool): SexpNode =
379  ## Generic constructor for SEXP data. Creates a new `SSymbol
380  ## SexpNode` with value t or `SNil SexpNode`.
381  if b:
382    result = SexpNode(kind: SSymbol, symbol: "t")
383  else:
384    result = SexpNode(kind: SNil)
385
386proc sexp*(elements: openArray[SexpNode]): SexpNode =
387  ## Generic constructor for SEXP data. Creates a new `SList SexpNode`
388  result = SexpNode(kind: SList)
389  newSeq(result.elems, elements.len)
390  for i, p in pairs(elements): result.elems[i] = p
391
392proc sexp*(s: SexpNode): SexpNode =
393  result = s
394
395proc toSexp(x: NimNode): NimNode {.compileTime.} =
396  case x.kind
397  of nnkBracket:
398    result = newNimNode(nnkBracket)
399    for i in 0 ..< x.len:
400      result.add(toSexp(x[i]))
401
402  else:
403    result = x
404
405  result = prefix(result, "sexp")
406
407macro convertSexp*(x: untyped): untyped =
408  ## Convert an expression to a SexpNode directly, without having to specify
409  ## `%` for every element.
410  result = toSexp(x)
411
412proc `==`* (a,b: SexpNode): bool =
413  ## Check two nodes for equality
414  if a.isNil:
415    if b.isNil: return true
416    return false
417  elif b.isNil or a.kind != b.kind:
418    return false
419  else:
420    return case a.kind
421    of SString:
422      a.str == b.str
423    of SInt:
424      a.num == b.num
425    of SFloat:
426      a.fnum == b.fnum
427    of SNil:
428      true
429    of SList:
430      a.elems == b.elems
431    of SSymbol:
432      a.symbol == b.symbol
433    of SCons:
434      a.car == b.car and a.cdr == b.cdr
435
436proc hash* (n:SexpNode): Hash =
437  ## Compute the hash for a SEXP node
438  case n.kind
439  of SList:
440    result = hash(n.elems)
441  of SInt:
442    result = hash(n.num)
443  of SFloat:
444    result = hash(n.fnum)
445  of SString:
446    result = hash(n.str)
447  of SNil:
448    result = hash(0)
449  of SSymbol:
450    result = hash(n.symbol)
451  of SCons:
452    result = hash(n.car) !& hash(n.cdr)
453
454proc len*(n: SexpNode): int =
455  ## If `n` is a `SList`, it returns the number of elements.
456  ## If `n` is a `JObject`, it returns the number of pairs.
457  ## Else it returns 0.
458  case n.kind
459  of SList: result = n.elems.len
460  else: discard
461
462proc `[]`*(node: SexpNode, index: int): SexpNode =
463  ## Gets the node at `index` in a List. Result is undefined if `index`
464  ## is out of bounds
465  assert(not isNil(node))
466  assert(node.kind == SList)
467  return node.elems[index]
468
469proc add*(father, child: SexpNode) =
470  ## Adds `child` to a SList node `father`.
471  assert father.kind == SList
472  father.elems.add(child)
473
474# ------------- pretty printing ----------------------------------------------
475
476proc indent(s: var string, i: int) =
477  s.add(spaces(i))
478
479proc newIndent(curr, indent: int, ml: bool): int =
480  if ml: return curr + indent
481  else: return indent
482
483proc nl(s: var string, ml: bool) =
484  if ml: s.add("\n")
485
486proc escapeJson*(s: string): string =
487  ## Converts a string `s` to its JSON representation.
488  result = newStringOfCap(s.len + s.len shr 3)
489  result.add("\"")
490  for x in runes(s):
491    var r = int(x)
492    if r >= 32 and r <= 127:
493      var c = chr(r)
494      case c
495      of '"': result.add("\\\"") #" # gotta fix nim-mode
496      of '\\': result.add("\\\\")
497      else: result.add(c)
498    else:
499      result.add("\\u")
500      result.add(toHex(r, 4))
501  result.add("\"")
502
503proc copy*(p: SexpNode): SexpNode =
504  ## Performs a deep copy of `a`.
505  case p.kind
506  of SString:
507    result = newSString(p.str)
508  of SInt:
509    result = newSInt(p.num)
510  of SFloat:
511    result = newSFloat(p.fnum)
512  of SNil:
513    result = newSNil()
514  of SSymbol:
515    result = newSSymbol(p.symbol)
516  of SList:
517    result = newSList()
518    for i in items(p.elems):
519      result.elems.add(copy(i))
520  of SCons:
521    result = newSCons(copy(p.car), copy(p.cdr))
522
523proc toPretty(result: var string, node: SexpNode, indent = 2, ml = true,
524              lstArr = false, currIndent = 0) =
525  case node.kind
526  of SString:
527    if lstArr: result.indent(currIndent)
528    result.add(escapeJson(node.str))
529  of SInt:
530    if lstArr: result.indent(currIndent)
531    result.addInt(node.num)
532  of SFloat:
533    if lstArr: result.indent(currIndent)
534    result.addFloat(node.fnum)
535  of SNil:
536    if lstArr: result.indent(currIndent)
537    result.add("nil")
538  of SSymbol:
539    if lstArr: result.indent(currIndent)
540    result.add(node.symbol)
541  of SList:
542    if lstArr: result.indent(currIndent)
543    if len(node.elems) != 0:
544      result.add("(")
545      result.nl(ml)
546      for i in 0..len(node.elems)-1:
547        if i > 0:
548          result.add(" ")
549          result.nl(ml) # New Line
550        toPretty(result, node.elems[i], indent, ml,
551            true, newIndent(currIndent, indent, ml))
552      result.nl(ml)
553      result.indent(currIndent)
554      result.add(")")
555    else: result.add("nil")
556  of SCons:
557    if lstArr: result.indent(currIndent)
558    result.add("(")
559    toPretty(result, node.car, indent, ml,
560        true, newIndent(currIndent, indent, ml))
561    result.add(" . ")
562    toPretty(result, node.cdr, indent, ml,
563        true, newIndent(currIndent, indent, ml))
564    result.add(")")
565
566proc pretty*(node: SexpNode, indent = 2): string =
567  ## Converts `node` to its Sexp Representation, with indentation and
568  ## on multiple lines.
569  result = ""
570  toPretty(result, node, indent)
571
572proc `$`*(node: SexpNode): string =
573  ## Converts `node` to its SEXP Representation on one line.
574  result = ""
575  toPretty(result, node, 0, false)
576
577iterator items*(node: SexpNode): SexpNode =
578  ## Iterator for the items of `node`. `node` has to be a SList.
579  assert node.kind == SList
580  for i in items(node.elems):
581    yield i
582
583iterator mitems*(node: var SexpNode): var SexpNode =
584  ## Iterator for the items of `node`. `node` has to be a SList. Items can be
585  ## modified.
586  assert node.kind == SList
587  for i in mitems(node.elems):
588    yield i
589
590proc eat(p: var SexpParser, tok: TTokKind) =
591  if p.tok == tok: discard getTok(p)
592  else: raiseParseErr(p, tokToStr[tok])
593
594proc parseSexp(p: var SexpParser): SexpNode =
595  ## Parses SEXP from a SEXP Parser `p`.
596  case p.tok
597  of tkString:
598    # we capture 'p.a' here, so we need to give it a fresh buffer afterwards:
599    result = newSStringMove(p.a)
600    p.a = ""
601    discard getTok(p)
602  of tkInt:
603    result = newSInt(parseBiggestInt(p.a))
604    discard getTok(p)
605  of tkFloat:
606    result = newSFloat(parseFloat(p.a))
607    discard getTok(p)
608  of tkNil:
609    result = newSNil()
610    discard getTok(p)
611  of tkSymbol:
612    result = newSSymbolMove(p.a)
613    p.a = ""
614    discard getTok(p)
615  of tkParensLe:
616    result = newSList()
617    discard getTok(p)
618    while p.tok notin {tkParensRi, tkDot}:
619      result.add(parseSexp(p))
620      if p.tok != tkSpace: break
621      discard getTok(p)
622    if p.tok == tkDot:
623      eat(p, tkDot)
624      eat(p, tkSpace)
625      result.add(parseSexp(p))
626      result = newSCons(result[0], result[1])
627    eat(p, tkParensRi)
628  of tkSpace, tkDot, tkError, tkParensRi, tkEof:
629    raiseParseErr(p, "(")
630
631proc open*(my: var SexpParser, input: Stream) =
632  ## initializes the parser with an input stream.
633  lexbase.open(my, input)
634  my.kind = sexpError
635  my.a = ""
636
637proc parseSexp*(s: Stream): SexpNode =
638  ## Parses from a buffer `s` into a `SexpNode`.
639  var p: SexpParser
640  p.open(s)
641  discard getTok(p) # read first token
642  result = p.parseSexp()
643  p.close()
644
645proc parseSexp*(buffer: string): SexpNode =
646  ## Parses Sexp from `buffer`.
647  result = parseSexp(newStringStream(buffer))
648
649when isMainModule:
650  let testSexp = parseSexp("""(1 (98 2) nil (2) foobar "foo" 9.234)""")
651  assert(testSexp[0].getNum == 1)
652  assert(testSexp[1][0].getNum == 98)
653  assert(testSexp[2].getElems == @[])
654  assert(testSexp[4].getSymbol == "foobar")
655  assert(testSexp[5].getStr == "foo")
656
657  let alist = parseSexp("""((1 . 2) (2 . "foo"))""")
658  assert(alist[0].getCons.car.getNum == 1)
659  assert(alist[0].getCons.cdr.getNum == 2)
660  assert(alist[1].getCons.cdr.getStr == "foo")
661
662  # Generator:
663  var j = convertSexp([true, false, "foobar", [1, 2, "baz"]])
664  assert($j == """(t nil "foobar" (1 2 "baz"))""")
665