1# 2# 3# Nim's Runtime Library 4# (c) Copyright 2015 Andreas Rumpf, Dominik Picheta 5# 6# See the file "copying.txt", included in this 7# distribution, for details about the copyright. 8# 9 10## **Note:** Import ``nimsuggest/sexp`` to use this module 11 12import 13 hashes, strutils, lexbase, streams, unicode, macros 14 15import std/private/decode_helpers 16 17type 18 SexpEventKind* = enum ## enumeration of all events that may occur when parsing 19 sexpError, ## an error occurred during parsing 20 sexpEof, ## end of file reached 21 sexpString, ## a string literal 22 sexpSymbol, ## a symbol 23 sexpInt, ## an integer literal 24 sexpFloat, ## a float literal 25 sexpNil, ## the value ``nil`` 26 sexpDot, ## the dot to separate car/cdr 27 sexpListStart, ## start of a list: the ``(`` token 28 sexpListEnd, ## end of a list: the ``)`` token 29 30 TTokKind = enum # must be synchronized with SexpEventKind! 31 tkError, 32 tkEof, 33 tkString, 34 tkSymbol, 35 tkInt, 36 tkFloat, 37 tkNil, 38 tkDot, 39 tkParensLe, 40 tkParensRi 41 tkSpace 42 43 SexpError* = enum ## enumeration that lists all errors that can occur 44 errNone, ## no error 45 errInvalidToken, ## invalid token 46 errParensRiExpected, ## ``)`` expected 47 errQuoteExpected, ## ``"`` expected 48 errEofExpected, ## EOF expected 49 50 SexpParser* = object of BaseLexer ## the parser object. 51 a: string 52 tok: TTokKind 53 kind: SexpEventKind 54 err: SexpError 55 56const 57 errorMessages: array[SexpError, string] = [ 58 "no error", 59 "invalid token", 60 "')' expected", 61 "'\"' or \"'\" expected", 62 "EOF expected", 63 ] 64 tokToStr: array[TTokKind, string] = [ 65 "invalid token", 66 "EOF", 67 "string literal", 68 "symbol", 69 "int literal", 70 "float literal", 71 "nil", 72 ".", 73 "(", ")", "space" 74 ] 75 76proc close*(my: var SexpParser) {.inline.} = 77 ## closes the parser `my` and its associated input stream. 78 lexbase.close(my) 79 80proc str*(my: SexpParser): string {.inline.} = 81 ## returns the character data for the events: ``sexpInt``, ``sexpFloat``, 82 ## ``sexpString`` 83 assert(my.kind in {sexpInt, sexpFloat, sexpString}) 84 result = my.a 85 86proc getInt*(my: SexpParser): BiggestInt {.inline.} = 87 ## returns the number for the event: ``sexpInt`` 88 assert(my.kind == sexpInt) 89 result = parseBiggestInt(my.a) 90 91proc getFloat*(my: SexpParser): float {.inline.} = 92 ## returns the number for the event: ``sexpFloat`` 93 assert(my.kind == sexpFloat) 94 result = parseFloat(my.a) 95 96proc kind*(my: SexpParser): SexpEventKind {.inline.} = 97 ## returns the current event type for the SEXP parser 98 result = my.kind 99 100proc getColumn*(my: SexpParser): int {.inline.} = 101 ## get the current column the parser has arrived at. 102 result = getColNumber(my, my.bufpos) 103 104proc getLine*(my: SexpParser): int {.inline.} = 105 ## get the current line the parser has arrived at. 106 result = my.lineNumber 107 108proc errorMsg*(my: SexpParser): string = 109 ## returns a helpful error message for the event ``sexpError`` 110 assert(my.kind == sexpError) 111 result = "($1, $2) Error: $3" % [$getLine(my), $getColumn(my), errorMessages[my.err]] 112 113proc errorMsgExpected*(my: SexpParser, e: string): string = 114 ## returns an error message "`e` expected" in the same format as the 115 ## other error messages 116 result = "($1, $2) Error: $3" % [$getLine(my), $getColumn(my), e & " expected"] 117 118proc parseString(my: var SexpParser): TTokKind = 119 result = tkString 120 var pos = my.bufpos + 1 121 while true: 122 case my.buf[pos] 123 of '\0': 124 my.err = errQuoteExpected 125 result = tkError 126 break 127 of '"': 128 inc(pos) 129 break 130 of '\\': 131 case my.buf[pos+1] 132 of '\\', '"', '\'', '/': 133 add(my.a, my.buf[pos+1]) 134 inc(pos, 2) 135 of 'b': 136 add(my.a, '\b') 137 inc(pos, 2) 138 of 'f': 139 add(my.a, '\f') 140 inc(pos, 2) 141 of 'n': 142 add(my.a, '\L') 143 inc(pos, 2) 144 of 'r': 145 add(my.a, '\C') 146 inc(pos, 2) 147 of 't': 148 add(my.a, '\t') 149 inc(pos, 2) 150 of 'u': 151 inc(pos, 2) 152 var r: int 153 if handleHexChar(my.buf[pos], r): inc(pos) 154 if handleHexChar(my.buf[pos], r): inc(pos) 155 if handleHexChar(my.buf[pos], r): inc(pos) 156 if handleHexChar(my.buf[pos], r): inc(pos) 157 add(my.a, toUTF8(Rune(r))) 158 else: 159 # don't bother with the error 160 add(my.a, my.buf[pos]) 161 inc(pos) 162 of '\c': 163 pos = lexbase.handleCR(my, pos) 164 add(my.a, '\c') 165 of '\L': 166 pos = lexbase.handleLF(my, pos) 167 add(my.a, '\L') 168 else: 169 add(my.a, my.buf[pos]) 170 inc(pos) 171 my.bufpos = pos # store back 172 173proc parseNumber(my: var SexpParser) = 174 var pos = my.bufpos 175 if my.buf[pos] == '-': 176 add(my.a, '-') 177 inc(pos) 178 if my.buf[pos] == '.': 179 add(my.a, "0.") 180 inc(pos) 181 else: 182 while my.buf[pos] in Digits: 183 add(my.a, my.buf[pos]) 184 inc(pos) 185 if my.buf[pos] == '.': 186 add(my.a, '.') 187 inc(pos) 188 # digits after the dot: 189 while my.buf[pos] in Digits: 190 add(my.a, my.buf[pos]) 191 inc(pos) 192 if my.buf[pos] in {'E', 'e'}: 193 add(my.a, my.buf[pos]) 194 inc(pos) 195 if my.buf[pos] in {'+', '-'}: 196 add(my.a, my.buf[pos]) 197 inc(pos) 198 while my.buf[pos] in Digits: 199 add(my.a, my.buf[pos]) 200 inc(pos) 201 my.bufpos = pos 202 203proc parseSymbol(my: var SexpParser) = 204 var pos = my.bufpos 205 if my.buf[pos] in IdentStartChars: 206 while my.buf[pos] in IdentChars: 207 add(my.a, my.buf[pos]) 208 inc(pos) 209 my.bufpos = pos 210 211proc getTok(my: var SexpParser): TTokKind = 212 setLen(my.a, 0) 213 case my.buf[my.bufpos] 214 of '-', '0'..'9': # numbers that start with a . are not parsed 215 # correctly. 216 parseNumber(my) 217 if {'.', 'e', 'E'} in my.a: 218 result = tkFloat 219 else: 220 result = tkInt 221 of '"': #" # gotta fix nim-mode 222 result = parseString(my) 223 of '(': 224 inc(my.bufpos) 225 result = tkParensLe 226 of ')': 227 inc(my.bufpos) 228 result = tkParensRi 229 of '\0': 230 result = tkEof 231 of 'a'..'z', 'A'..'Z', '_': 232 parseSymbol(my) 233 if my.a == "nil": 234 result = tkNil 235 else: 236 result = tkSymbol 237 of ' ': 238 result = tkSpace 239 inc(my.bufpos) 240 of '.': 241 result = tkDot 242 inc(my.bufpos) 243 else: 244 inc(my.bufpos) 245 result = tkError 246 my.tok = result 247 248# ------------- higher level interface --------------------------------------- 249 250type 251 SexpNodeKind* = enum ## possible SEXP node types 252 SNil, 253 SInt, 254 SFloat, 255 SString, 256 SSymbol, 257 SList, 258 SCons 259 260 SexpNode* = ref SexpNodeObj ## SEXP node 261 SexpNodeObj* {.acyclic.} = object 262 case kind*: SexpNodeKind 263 of SString: 264 str*: string 265 of SSymbol: 266 symbol*: string 267 of SInt: 268 num*: BiggestInt 269 of SFloat: 270 fnum*: float 271 of SList: 272 elems*: seq[SexpNode] 273 of SCons: 274 car: SexpNode 275 cdr: SexpNode 276 of SNil: 277 discard 278 279 Cons = tuple[car: SexpNode, cdr: SexpNode] 280 281 SexpParsingError* = object of ValueError ## is raised for a SEXP error 282 283proc raiseParseErr*(p: SexpParser, msg: string) {.noinline, noreturn.} = 284 ## raises an `ESexpParsingError` exception. 285 raise newException(SexpParsingError, errorMsgExpected(p, msg)) 286 287proc newSString*(s: string): SexpNode = 288 ## Creates a new `SString SexpNode`. 289 result = SexpNode(kind: SString, str: s) 290 291proc newSStringMove(s: string): SexpNode = 292 result = SexpNode(kind: SString) 293 shallowCopy(result.str, s) 294 295proc newSInt*(n: BiggestInt): SexpNode = 296 ## Creates a new `SInt SexpNode`. 297 result = SexpNode(kind: SInt, num: n) 298 299proc newSFloat*(n: float): SexpNode = 300 ## Creates a new `SFloat SexpNode`. 301 result = SexpNode(kind: SFloat, fnum: n) 302 303proc newSNil*(): SexpNode = 304 ## Creates a new `SNil SexpNode`. 305 result = SexpNode(kind: SNil) 306 307proc newSCons*(car, cdr: SexpNode): SexpNode = 308 ## Creates a new `SCons SexpNode` 309 result = SexpNode(kind: SCons, car: car, cdr: cdr) 310 311proc newSList*(): SexpNode = 312 ## Creates a new `SList SexpNode` 313 result = SexpNode(kind: SList, elems: @[]) 314 315proc newSSymbol*(s: string): SexpNode = 316 result = SexpNode(kind: SSymbol, symbol: s) 317 318proc newSSymbolMove(s: string): SexpNode = 319 result = SexpNode(kind: SSymbol) 320 shallowCopy(result.symbol, s) 321 322proc getStr*(n: SexpNode, default: string = ""): string = 323 ## Retrieves the string value of a `SString SexpNode`. 324 ## 325 ## Returns ``default`` if ``n`` is not a ``SString``. 326 if n.kind != SString: return default 327 else: return n.str 328 329proc getNum*(n: SexpNode, default: BiggestInt = 0): BiggestInt = 330 ## Retrieves the int value of a `SInt SexpNode`. 331 ## 332 ## Returns ``default`` if ``n`` is not a ``SInt``. 333 if n.kind != SInt: return default 334 else: return n.num 335 336proc getFNum*(n: SexpNode, default: float = 0.0): float = 337 ## Retrieves the float value of a `SFloat SexpNode`. 338 ## 339 ## Returns ``default`` if ``n`` is not a ``SFloat``. 340 if n.kind != SFloat: return default 341 else: return n.fnum 342 343proc getSymbol*(n: SexpNode, default: string = ""): string = 344 ## Retrieves the int value of a `SList SexpNode`. 345 ## 346 ## Returns ``default`` if ``n`` is not a ``SList``. 347 if n.kind != SSymbol: return default 348 else: return n.symbol 349 350proc getElems*(n: SexpNode, default: seq[SexpNode] = @[]): seq[SexpNode] = 351 ## Retrieves the int value of a `SList SexpNode`. 352 ## 353 ## Returns ``default`` if ``n`` is not a ``SList``. 354 if n.kind == SNil: return @[] 355 elif n.kind != SList: return default 356 else: return n.elems 357 358proc getCons*(n: SexpNode, defaults: Cons = (newSNil(), newSNil())): Cons = 359 ## Retrieves the cons value of a `SList SexpNode`. 360 ## 361 ## Returns ``default`` if ``n`` is not a ``SList``. 362 if n.kind == SCons: return (n.car, n.cdr) 363 elif n.kind == SList: return (n.elems[0], n.elems[1]) 364 else: return defaults 365 366proc sexp*(s: string): SexpNode = 367 ## Generic constructor for SEXP data. Creates a new `SString SexpNode`. 368 result = SexpNode(kind: SString, str: s) 369 370proc sexp*(n: BiggestInt): SexpNode = 371 ## Generic constructor for SEXP data. Creates a new `SInt SexpNode`. 372 result = SexpNode(kind: SInt, num: n) 373 374proc sexp*(n: float): SexpNode = 375 ## Generic constructor for SEXP data. Creates a new `SFloat SexpNode`. 376 result = SexpNode(kind: SFloat, fnum: n) 377 378proc sexp*(b: bool): SexpNode = 379 ## Generic constructor for SEXP data. Creates a new `SSymbol 380 ## SexpNode` with value t or `SNil SexpNode`. 381 if b: 382 result = SexpNode(kind: SSymbol, symbol: "t") 383 else: 384 result = SexpNode(kind: SNil) 385 386proc sexp*(elements: openArray[SexpNode]): SexpNode = 387 ## Generic constructor for SEXP data. Creates a new `SList SexpNode` 388 result = SexpNode(kind: SList) 389 newSeq(result.elems, elements.len) 390 for i, p in pairs(elements): result.elems[i] = p 391 392proc sexp*(s: SexpNode): SexpNode = 393 result = s 394 395proc toSexp(x: NimNode): NimNode {.compileTime.} = 396 case x.kind 397 of nnkBracket: 398 result = newNimNode(nnkBracket) 399 for i in 0 ..< x.len: 400 result.add(toSexp(x[i])) 401 402 else: 403 result = x 404 405 result = prefix(result, "sexp") 406 407macro convertSexp*(x: untyped): untyped = 408 ## Convert an expression to a SexpNode directly, without having to specify 409 ## `%` for every element. 410 result = toSexp(x) 411 412proc `==`* (a,b: SexpNode): bool = 413 ## Check two nodes for equality 414 if a.isNil: 415 if b.isNil: return true 416 return false 417 elif b.isNil or a.kind != b.kind: 418 return false 419 else: 420 return case a.kind 421 of SString: 422 a.str == b.str 423 of SInt: 424 a.num == b.num 425 of SFloat: 426 a.fnum == b.fnum 427 of SNil: 428 true 429 of SList: 430 a.elems == b.elems 431 of SSymbol: 432 a.symbol == b.symbol 433 of SCons: 434 a.car == b.car and a.cdr == b.cdr 435 436proc hash* (n:SexpNode): Hash = 437 ## Compute the hash for a SEXP node 438 case n.kind 439 of SList: 440 result = hash(n.elems) 441 of SInt: 442 result = hash(n.num) 443 of SFloat: 444 result = hash(n.fnum) 445 of SString: 446 result = hash(n.str) 447 of SNil: 448 result = hash(0) 449 of SSymbol: 450 result = hash(n.symbol) 451 of SCons: 452 result = hash(n.car) !& hash(n.cdr) 453 454proc len*(n: SexpNode): int = 455 ## If `n` is a `SList`, it returns the number of elements. 456 ## If `n` is a `JObject`, it returns the number of pairs. 457 ## Else it returns 0. 458 case n.kind 459 of SList: result = n.elems.len 460 else: discard 461 462proc `[]`*(node: SexpNode, index: int): SexpNode = 463 ## Gets the node at `index` in a List. Result is undefined if `index` 464 ## is out of bounds 465 assert(not isNil(node)) 466 assert(node.kind == SList) 467 return node.elems[index] 468 469proc add*(father, child: SexpNode) = 470 ## Adds `child` to a SList node `father`. 471 assert father.kind == SList 472 father.elems.add(child) 473 474# ------------- pretty printing ---------------------------------------------- 475 476proc indent(s: var string, i: int) = 477 s.add(spaces(i)) 478 479proc newIndent(curr, indent: int, ml: bool): int = 480 if ml: return curr + indent 481 else: return indent 482 483proc nl(s: var string, ml: bool) = 484 if ml: s.add("\n") 485 486proc escapeJson*(s: string): string = 487 ## Converts a string `s` to its JSON representation. 488 result = newStringOfCap(s.len + s.len shr 3) 489 result.add("\"") 490 for x in runes(s): 491 var r = int(x) 492 if r >= 32 and r <= 127: 493 var c = chr(r) 494 case c 495 of '"': result.add("\\\"") #" # gotta fix nim-mode 496 of '\\': result.add("\\\\") 497 else: result.add(c) 498 else: 499 result.add("\\u") 500 result.add(toHex(r, 4)) 501 result.add("\"") 502 503proc copy*(p: SexpNode): SexpNode = 504 ## Performs a deep copy of `a`. 505 case p.kind 506 of SString: 507 result = newSString(p.str) 508 of SInt: 509 result = newSInt(p.num) 510 of SFloat: 511 result = newSFloat(p.fnum) 512 of SNil: 513 result = newSNil() 514 of SSymbol: 515 result = newSSymbol(p.symbol) 516 of SList: 517 result = newSList() 518 for i in items(p.elems): 519 result.elems.add(copy(i)) 520 of SCons: 521 result = newSCons(copy(p.car), copy(p.cdr)) 522 523proc toPretty(result: var string, node: SexpNode, indent = 2, ml = true, 524 lstArr = false, currIndent = 0) = 525 case node.kind 526 of SString: 527 if lstArr: result.indent(currIndent) 528 result.add(escapeJson(node.str)) 529 of SInt: 530 if lstArr: result.indent(currIndent) 531 result.addInt(node.num) 532 of SFloat: 533 if lstArr: result.indent(currIndent) 534 result.addFloat(node.fnum) 535 of SNil: 536 if lstArr: result.indent(currIndent) 537 result.add("nil") 538 of SSymbol: 539 if lstArr: result.indent(currIndent) 540 result.add(node.symbol) 541 of SList: 542 if lstArr: result.indent(currIndent) 543 if len(node.elems) != 0: 544 result.add("(") 545 result.nl(ml) 546 for i in 0..len(node.elems)-1: 547 if i > 0: 548 result.add(" ") 549 result.nl(ml) # New Line 550 toPretty(result, node.elems[i], indent, ml, 551 true, newIndent(currIndent, indent, ml)) 552 result.nl(ml) 553 result.indent(currIndent) 554 result.add(")") 555 else: result.add("nil") 556 of SCons: 557 if lstArr: result.indent(currIndent) 558 result.add("(") 559 toPretty(result, node.car, indent, ml, 560 true, newIndent(currIndent, indent, ml)) 561 result.add(" . ") 562 toPretty(result, node.cdr, indent, ml, 563 true, newIndent(currIndent, indent, ml)) 564 result.add(")") 565 566proc pretty*(node: SexpNode, indent = 2): string = 567 ## Converts `node` to its Sexp Representation, with indentation and 568 ## on multiple lines. 569 result = "" 570 toPretty(result, node, indent) 571 572proc `$`*(node: SexpNode): string = 573 ## Converts `node` to its SEXP Representation on one line. 574 result = "" 575 toPretty(result, node, 0, false) 576 577iterator items*(node: SexpNode): SexpNode = 578 ## Iterator for the items of `node`. `node` has to be a SList. 579 assert node.kind == SList 580 for i in items(node.elems): 581 yield i 582 583iterator mitems*(node: var SexpNode): var SexpNode = 584 ## Iterator for the items of `node`. `node` has to be a SList. Items can be 585 ## modified. 586 assert node.kind == SList 587 for i in mitems(node.elems): 588 yield i 589 590proc eat(p: var SexpParser, tok: TTokKind) = 591 if p.tok == tok: discard getTok(p) 592 else: raiseParseErr(p, tokToStr[tok]) 593 594proc parseSexp(p: var SexpParser): SexpNode = 595 ## Parses SEXP from a SEXP Parser `p`. 596 case p.tok 597 of tkString: 598 # we capture 'p.a' here, so we need to give it a fresh buffer afterwards: 599 result = newSStringMove(p.a) 600 p.a = "" 601 discard getTok(p) 602 of tkInt: 603 result = newSInt(parseBiggestInt(p.a)) 604 discard getTok(p) 605 of tkFloat: 606 result = newSFloat(parseFloat(p.a)) 607 discard getTok(p) 608 of tkNil: 609 result = newSNil() 610 discard getTok(p) 611 of tkSymbol: 612 result = newSSymbolMove(p.a) 613 p.a = "" 614 discard getTok(p) 615 of tkParensLe: 616 result = newSList() 617 discard getTok(p) 618 while p.tok notin {tkParensRi, tkDot}: 619 result.add(parseSexp(p)) 620 if p.tok != tkSpace: break 621 discard getTok(p) 622 if p.tok == tkDot: 623 eat(p, tkDot) 624 eat(p, tkSpace) 625 result.add(parseSexp(p)) 626 result = newSCons(result[0], result[1]) 627 eat(p, tkParensRi) 628 of tkSpace, tkDot, tkError, tkParensRi, tkEof: 629 raiseParseErr(p, "(") 630 631proc open*(my: var SexpParser, input: Stream) = 632 ## initializes the parser with an input stream. 633 lexbase.open(my, input) 634 my.kind = sexpError 635 my.a = "" 636 637proc parseSexp*(s: Stream): SexpNode = 638 ## Parses from a buffer `s` into a `SexpNode`. 639 var p: SexpParser 640 p.open(s) 641 discard getTok(p) # read first token 642 result = p.parseSexp() 643 p.close() 644 645proc parseSexp*(buffer: string): SexpNode = 646 ## Parses Sexp from `buffer`. 647 result = parseSexp(newStringStream(buffer)) 648 649when isMainModule: 650 let testSexp = parseSexp("""(1 (98 2) nil (2) foobar "foo" 9.234)""") 651 assert(testSexp[0].getNum == 1) 652 assert(testSexp[1][0].getNum == 98) 653 assert(testSexp[2].getElems == @[]) 654 assert(testSexp[4].getSymbol == "foobar") 655 assert(testSexp[5].getStr == "foo") 656 657 let alist = parseSexp("""((1 . 2) (2 . "foo"))""") 658 assert(alist[0].getCons.car.getNum == 1) 659 assert(alist[0].getCons.cdr.getNum == 2) 660 assert(alist[1].getCons.cdr.getStr == "foo") 661 662 # Generator: 663 var j = convertSexp([true, false, "foobar", [1, 2, "baz"]]) 664 assert($j == """(t nil "foobar" (1 2 "baz"))""") 665