1// Copyright (c) 2020, Peter Ohler, All rights reserved. 2 3package gen 4 5import ( 6 "bytes" 7 "fmt" 8 "io" 9 "math" 10 "unicode/utf8" 11) 12 13const ( 14 stackInitSize = 32 // for container stack { or [ 15 tmpInitSize = 32 // for tokens and numbers 16 mapInitSize = 8 17 readBufSize = 4096 18) 19 20// Parser is a reusable JSON parser. It can be reused for multiple parsings 21// which allows buffer reuse for a performance advantage. 22type Parser struct { 23 tmp []byte // used for numbers and strings 24 runeBytes []byte 25 stack []Node 26 starts []int 27 maps []Object 28 cb func(Node) bool 29 resultChan chan Node 30 line int 31 noff int // Offset of last newline from start of buf. Can be negative when using a reader. 32 ri int // read index for null, false, and true 33 mi int 34 num Number 35 rn rune 36 result Node 37 mode string 38 nextMode string 39 40 // OnlyOne returns an error if more than one JSON is in the string or stream. 41 OnlyOne bool 42 43 // Reuse maps. Previously returned maps will no longer be valid or rather 44 // could be modified during parsing. 45 Reuse bool 46} 47 48// Parse a JSON string in to simple types. An error is returned if not valid JSON. 49func (p *Parser) Parse(buf []byte, args ...interface{}) (Node, error) { 50 for _, a := range args { 51 switch ta := a.(type) { 52 case func(Node) bool: 53 p.cb = ta 54 p.OnlyOne = false 55 case chan Node: 56 p.resultChan = ta 57 p.OnlyOne = false 58 p.Reuse = false 59 default: 60 return nil, fmt.Errorf("a %T is not a valid option type", a) 61 } 62 } 63 if p.cb == nil && p.resultChan == nil { 64 p.OnlyOne = true 65 } 66 if p.stack == nil { 67 p.stack = make([]Node, 0, stackInitSize) 68 p.tmp = make([]byte, 0, tmpInitSize) 69 p.starts = make([]int, 0, 16) 70 p.maps = make([]Object, 0, 16) 71 } else { 72 p.stack = p.stack[:0] 73 p.tmp = p.tmp[:0] 74 p.starts = p.starts[:0] 75 } 76 p.result = nil 77 p.noff = -1 78 p.line = 1 79 p.mode = valueMap 80 p.mi = 0 81 var err error 82 // Skip BOM if present. 83 if 3 < len(buf) && buf[0] == 0xEF { 84 if buf[1] == 0xBB && buf[2] == 0xBF { 85 err = p.parseBuffer(buf[3:], true) 86 } else { 87 return nil, fmt.Errorf("expected BOM at 1:3") 88 } 89 } else { 90 err = p.parseBuffer(buf, true) 91 } 92 p.stack = p.stack[:cap(p.stack)] 93 for i := len(p.stack) - 1; 0 <= i; i-- { 94 p.stack[i] = nil 95 } 96 p.stack = p.stack[:0] 97 98 return p.result, err 99} 100 101// ParseReader a JSON io.Reader. An error is returned if not valid JSON. 102func (p *Parser) ParseReader(r io.Reader, args ...interface{}) (data Node, err error) { 103 for _, a := range args { 104 switch ta := a.(type) { 105 case func(Node) bool: 106 p.cb = ta 107 p.OnlyOne = false 108 case chan Node: 109 p.resultChan = ta 110 p.OnlyOne = false 111 p.Reuse = false 112 default: 113 return nil, fmt.Errorf("a %T is not a valid option type", a) 114 } 115 } 116 if p.cb == nil && p.resultChan == nil { 117 p.OnlyOne = true 118 } 119 if p.stack == nil { 120 p.stack = make([]Node, 0, stackInitSize) 121 p.tmp = make([]byte, 0, tmpInitSize) 122 p.starts = make([]int, 0, 16) 123 p.maps = make([]Object, 0, 16) 124 } else { 125 p.stack = p.stack[:0] 126 p.tmp = p.tmp[:0] 127 p.starts = p.starts[:0] 128 } 129 p.result = nil 130 p.noff = -1 131 p.line = 1 132 p.mi = 0 133 buf := make([]byte, readBufSize) 134 eof := false 135 var cnt int 136 cnt, err = r.Read(buf) 137 buf = buf[:cnt] 138 p.mode = valueMap 139 if err != nil { 140 if err != io.EOF { 141 return 142 } 143 eof = true 144 } 145 var skip int 146 // Skip BOM if present. 147 if 3 < len(buf) && buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF { 148 skip = 3 149 } 150 for { 151 if 0 < skip { 152 err = p.parseBuffer(buf[skip:], eof) 153 } else { 154 err = p.parseBuffer(buf, eof) 155 } 156 if err != nil { 157 p.stack = p.stack[:cap(p.stack)] 158 for i := len(p.stack) - 1; 0 <= i; i-- { 159 p.stack[i] = nil 160 } 161 p.stack = p.stack[:0] 162 163 return 164 } 165 skip = 0 166 if eof { 167 break 168 } 169 buf = buf[:cap(buf)] 170 cnt, err = r.Read(buf) 171 buf = buf[:cnt] 172 if err != nil { 173 if err != io.EOF { 174 return 175 } 176 eof = true 177 } 178 } 179 data = p.result 180 181 return 182} 183 184func (p *Parser) parseBuffer(buf []byte, last bool) error { 185 var b byte 186 var i int 187 var off int 188 depth := len(p.starts) 189 for off = 0; off < len(buf); off++ { 190 b = buf[off] 191 switch p.mode[b] { 192 case skipNewline: 193 p.line++ 194 p.noff = off 195 for i, b = range buf[off+1:] { 196 if spaceMap[b] != skipChar { 197 break 198 } 199 } 200 off += i 201 continue 202 case colonColon: 203 p.mode = valueMap 204 continue 205 case skipChar: // skip and continue 206 continue 207 case strOk: 208 p.tmp = append(p.tmp, b) 209 case keyQuote: 210 start := off + 1 211 if len(buf) <= start { 212 p.tmp = p.tmp[:0] 213 p.mode = stringMap 214 p.nextMode = colonMap 215 continue 216 } 217 for i, b = range buf[off+1:] { 218 if stringMap[b] != strOk { 219 break 220 } 221 } 222 off += i 223 if b == '"' { 224 off++ 225 p.stack = append(p.stack, Key(buf[start:off])) 226 p.mode = colonMap 227 } else { 228 p.tmp = p.tmp[:0] 229 p.tmp = append(p.tmp, buf[start:off+1]...) 230 p.mode = stringMap 231 p.nextMode = colonMap 232 } 233 continue 234 case afterComma: 235 if 0 < len(p.starts) && p.starts[len(p.starts)-1] == -1 { 236 p.mode = keyMap 237 } else { 238 p.mode = commaMap 239 } 240 continue 241 case valQuote: 242 start := off + 1 243 if len(buf) <= start { 244 p.tmp = p.tmp[:0] 245 p.mode = stringMap 246 p.nextMode = afterMap 247 continue 248 } 249 for i, b = range buf[off+1:] { 250 if stringMap[b] != strOk { 251 break 252 } 253 } 254 off += i 255 if b == '"' { 256 off++ 257 p.add(String(buf[start:off])) 258 p.mode = afterMap 259 } else { 260 p.tmp = p.tmp[:0] 261 p.tmp = append(p.tmp, buf[start:off+1]...) 262 p.mode = stringMap 263 p.nextMode = afterMap 264 continue 265 } 266 case numComma: 267 p.add(p.num.AsNode()) 268 if 0 < len(p.starts) && p.starts[len(p.starts)-1] == -1 { 269 p.mode = keyMap 270 } else { 271 p.mode = commaMap 272 } 273 case strSlash: 274 p.mode = escMap 275 continue 276 case escOk: 277 p.tmp = append(p.tmp, escByteMap[b]) 278 p.mode = stringMap 279 continue 280 case openObject: 281 p.starts = append(p.starts, -1) 282 p.mode = key1Map 283 var m Object 284 if p.Reuse { 285 if p.mi < len(p.maps) { 286 m = p.maps[p.mi] 287 for k := range m { 288 delete(m, k) 289 } 290 } else { 291 m = make(Object, mapInitSize) 292 p.maps = append(p.maps, m) 293 } 294 p.mi++ 295 } else { 296 m = make(Object, mapInitSize) 297 } 298 p.stack = append(p.stack, m) 299 depth++ 300 continue 301 case closeObject: 302 depth-- 303 if depth < 0 || 0 <= p.starts[depth] { 304 return p.newError(off, "unexpected object close") 305 } 306 if 256 < len(p.mode) && p.mode[256] == 'n' { 307 p.add(p.num.AsNode()) 308 } 309 p.starts = p.starts[0:depth] 310 n := p.stack[len(p.stack)-1] 311 p.stack = p.stack[:len(p.stack)-1] 312 p.add(n) 313 p.mode = afterMap 314 case val0: 315 p.mode = zeroMap 316 p.num.Reset() 317 case valDigit: 318 p.num.Reset() 319 p.mode = digitMap 320 p.num.I = uint64(b - '0') 321 for i, b = range buf[off+1:] { 322 if digitMap[b] != numDigit { 323 break 324 } 325 p.num.I = p.num.I*10 + uint64(b-'0') 326 if math.MaxInt64 < p.num.I { 327 p.num.FillBig() 328 break 329 } 330 } 331 if digitMap[b] == numDigit { 332 off++ 333 } 334 off += i 335 case valNeg: 336 p.mode = negMap 337 p.num.Reset() 338 p.num.Neg = true 339 continue 340 case escU: 341 p.mode = uMap 342 p.rn = 0 343 p.ri = 0 344 continue 345 case openArray: 346 p.starts = append(p.starts, len(p.stack)) 347 p.stack = append(p.stack, EmptyArray) 348 p.mode = valueMap 349 depth++ 350 continue 351 case closeArray: 352 depth-- 353 if depth < 0 || p.starts[depth] < 0 { 354 return p.newError(off, "unexpected array close") 355 } 356 // Only modes with a close array are value, after, and numbers 357 // which are all over 256 long. 358 if p.mode[256] == 'n' { 359 p.add(p.num.AsNode()) 360 } 361 start := p.starts[len(p.starts)-1] + 1 362 p.starts = p.starts[:len(p.starts)-1] 363 size := len(p.stack) - start 364 n := make(Array, size) 365 copy(n, p.stack[start:len(p.stack)]) 366 p.stack = p.stack[0 : start-1] 367 p.add(n) 368 p.mode = afterMap 369 case valNull: 370 if off+4 <= len(buf) && string(buf[off:off+4]) == "null" { 371 off += 3 372 p.mode = afterMap 373 p.add(nil) 374 } else { 375 p.mode = nullMap 376 p.ri = 0 377 } 378 case valTrue: 379 if off+4 <= len(buf) && string(buf[off:off+4]) == "true" { 380 off += 3 381 p.mode = afterMap 382 p.add(True) 383 } else { 384 p.mode = trueMap 385 p.ri = 0 386 } 387 case valFalse: 388 if off+5 <= len(buf) && string(buf[off:off+5]) == "false" { 389 off += 4 390 p.mode = afterMap 391 p.add(False) 392 } else { 393 p.mode = falseMap 394 p.ri = 0 395 } 396 case numDot: 397 if 0 < len(p.num.BigBuf) { 398 p.num.BigBuf = append(p.num.BigBuf, b) 399 p.mode = dotMap 400 continue 401 } 402 for i, b = range buf[off+1:] { 403 if digitMap[b] != numDigit { 404 break 405 } 406 p.num.Frac = p.num.Frac*10 + uint64(b-'0') 407 p.num.Div *= 10.0 408 if math.MaxInt64 < p.num.Frac { 409 p.num.FillBig() 410 break 411 } 412 } 413 off += i 414 if digitMap[b] == numDigit { 415 off++ 416 } 417 p.mode = fracMap 418 case numFrac: 419 p.num.AddFrac(b) 420 p.mode = fracMap 421 case fracE: 422 if 0 < len(p.num.BigBuf) { 423 p.num.BigBuf = append(p.num.BigBuf, b) 424 } 425 p.mode = expSignMap 426 continue 427 case strQuote: 428 p.mode = p.nextMode 429 if p.mode[':'] == colonColon { 430 p.stack = append(p.stack, Key(p.tmp)) 431 } else { 432 p.add(String(p.tmp)) 433 } 434 case numZero: 435 p.mode = zeroMap 436 case numDigit: 437 p.num.AddDigit(b) 438 case negDigit: 439 p.num.AddDigit(b) 440 p.mode = digitMap 441 case numSpc: 442 p.add(p.num.AsNode()) 443 p.mode = afterMap 444 case numNewline: 445 p.add(p.num.AsNode()) 446 p.line++ 447 p.noff = off 448 p.mode = afterMap 449 for i, b = range buf[off+1:] { 450 if spaceMap[b] != skipChar { 451 break 452 } 453 } 454 off += i 455 case expSign: 456 p.mode = expZeroMap 457 if b == '-' { 458 p.num.NegExp = true 459 } 460 continue 461 case expDigit: 462 p.num.AddExp(b) 463 p.mode = expMap 464 case uOk: 465 p.ri++ 466 switch b { 467 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': 468 p.rn = p.rn<<4 | rune(b-'0') 469 case 'a', 'b', 'c', 'd', 'e', 'f': 470 p.rn = p.rn<<4 | rune(b-'a'+10) 471 case 'A', 'B', 'C', 'D', 'E', 'F': 472 p.rn = p.rn<<4 | rune(b-'A'+10) 473 } 474 if p.ri == 4 { 475 if len(p.runeBytes) < 6 { 476 p.runeBytes = make([]byte, 6) 477 } 478 n := utf8.EncodeRune(p.runeBytes, p.rn) 479 p.tmp = append(p.tmp, p.runeBytes[:n]...) 480 p.mode = stringMap 481 } 482 continue 483 case tokenOk: 484 switch { 485 case p.mode['r'] == tokenOk: 486 p.ri++ 487 if "true"[p.ri] != b { 488 return p.newError(off, "expected true") 489 } 490 if 3 <= p.ri { 491 p.add(True) 492 p.mode = afterMap 493 } 494 case p.mode['a'] == tokenOk: 495 p.ri++ 496 if "false"[p.ri] != b { 497 return p.newError(off, "expected false") 498 } 499 if 4 <= p.ri { 500 p.add(False) 501 p.mode = afterMap 502 } 503 case p.mode['u'] == tokenOk && p.mode['l'] == tokenOk: 504 p.ri++ 505 if "null"[p.ri] != b { 506 return p.newError(off, "expected null") 507 } 508 if 3 <= p.ri { 509 p.add(nil) 510 p.mode = afterMap 511 } 512 } 513 case charErr: 514 return p.byteError(off, p.mode, b, bytes.Runes(buf[off:])[0]) 515 } 516 if depth == 0 && 256 < len(p.mode) && p.mode[256] == 'a' { 517 if p.cb == nil && p.resultChan == nil { 518 p.result = p.stack[0] 519 } else { 520 if p.cb != nil { 521 p.cb(p.stack[0]) 522 } 523 if p.resultChan != nil { 524 p.resultChan <- p.stack[0] 525 } 526 } 527 p.stack = p.stack[:0] 528 p.mi = 0 529 if p.OnlyOne { 530 p.mode = spaceMap 531 } else { 532 p.mode = valueMap 533 } 534 } 535 } 536 if last { 537 if len(p.mode) == 256 { // valid finishing maps are one byte longer 538 return p.newError(off, "incomplete JSON") 539 } 540 if p.mode[256] == 'n' { 541 p.add(p.num.AsNode()) 542 if p.cb == nil && p.resultChan == nil { 543 p.result = p.stack[0] 544 } else { 545 if p.cb != nil { 546 p.cb(p.stack[0]) 547 } 548 if p.resultChan != nil { 549 p.resultChan <- p.stack[0] 550 } 551 } 552 } 553 } 554 return nil 555} 556 557func (p *Parser) add(n Node) { 558 if 2 <= len(p.stack) { 559 if k, ok := p.stack[len(p.stack)-1].(Key); ok { 560 obj, _ := p.stack[len(p.stack)-2].(Object) 561 obj[string(k)] = n 562 p.stack = p.stack[0 : len(p.stack)-1] 563 564 return 565 } 566 } 567 p.stack = append(p.stack, n) 568} 569 570func (p *Parser) newError(off int, format string, args ...interface{}) error { 571 return &ParseError{ 572 Message: fmt.Sprintf(format, args...), 573 Line: p.line, 574 Column: off - p.noff, 575 } 576} 577 578func (p *Parser) byteError(off int, mode string, b byte, r rune) error { 579 err := &ParseError{ 580 Line: p.line, 581 Column: off - p.noff, 582 } 583 switch mode { 584 case nullMap: 585 err.Message = "expected null" 586 case trueMap: 587 err.Message = "expected true" 588 case falseMap: 589 err.Message = "expected false" 590 case afterMap: 591 err.Message = fmt.Sprintf("expected a comma or close, not '%c'", r) 592 case key1Map: 593 err.Message = fmt.Sprintf("expected a string start or object close, not '%c'", r) 594 case keyMap: 595 err.Message = fmt.Sprintf("expected a string start, not '%c'", r) 596 case colonMap: 597 err.Message = fmt.Sprintf("expected a colon, not '%c'", r) 598 case negMap, zeroMap, digitMap, dotMap, fracMap, expSignMap, expZeroMap, expMap: 599 err.Message = "invalid number" 600 case stringMap: 601 err.Message = fmt.Sprintf("invalid JSON character 0x%02x", b) 602 case escMap: 603 err.Message = fmt.Sprintf("invalid JSON escape character '\\%c'", r) 604 case uMap: 605 err.Message = fmt.Sprintf("invalid JSON unicode character '%c'", r) 606 case spaceMap: 607 err.Message = fmt.Sprintf("extra characters after close, '%c'", r) 608 default: 609 err.Message = fmt.Sprintf("unexpected character '%c'", r) 610 } 611 return err 612} 613