1// Copyright (c) 2009 The Go Authors. All rights reserved. 2// 3// Redistribution and use in source and binary forms, with or without 4// modification, are permitted provided that the following conditions are 5// met: 6// 7// * Redistributions of source code must retain the above copyright 8// notice, this list of conditions and the following disclaimer. 9// * Redistributions in binary form must reproduce the above 10// copyright notice, this list of conditions and the following disclaimer 11// in the documentation and/or other materials provided with the 12// distribution. 13// * Neither the name of Google Inc. nor the names of its 14// contributors may be used to endorse or promote products derived from 15// this software without specific prior written permission. 16// 17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29// Package json provides a JSON value parser state machine. 30// This package is almost entirely copied from the Go stdlib. 31// Changes made to it permit users of the package to tell 32// if some slice of bytes is a valid beginning of a json string. 33package json 34 35import "fmt" 36 37type ( 38 context int 39 scanStatus int 40) 41 42const ( 43 contextKey context = iota 44 contextObj 45 contextArr 46 47 scanContinue scanStatus = iota // uninteresting byte 48 scanBeginLiteral // end implied by next result != scanContinue 49 scanBeginObject // begin object 50 scanObjectKey // just finished object key (string) 51 scanObjectValue // just finished non-last object value 52 scanEndObject // end object (implies scanObjectValue if possible) 53 scanBeginArray // begin array 54 scanArrayValue // just finished array value 55 scanEndArray // end array (implies scanArrayValue if possible) 56 scanSkipSpace // space byte; can skip; known to be last "continue" result 57 scanEnd // top-level value ended *before* this byte; known to be first "stop" result 58 scanError // hit an error, scanner.err. 59) 60 61type ( 62 scanner struct { 63 step func(*scanner, byte) scanStatus 64 contexts []context 65 endTop bool 66 err error 67 index int 68 } 69) 70 71// Scan returns the number of bytes scanned and if there was any error 72// in trying to reach the end of data 73func Scan(data []byte) (int, error) { 74 s := &scanner{} 75 _ = checkValid(data, s) 76 return s.index, s.err 77} 78 79// checkValid verifies that data is valid JSON-encoded data. 80// scan is passed in for use by checkValid to avoid an allocation. 81func checkValid(data []byte, scan *scanner) error { 82 scan.reset() 83 for _, c := range data { 84 scan.index++ 85 if scan.step(scan, c) == scanError { 86 return scan.err 87 } 88 } 89 if scan.eof() == scanError { 90 return scan.err 91 } 92 return nil 93} 94 95func isSpace(c byte) bool { 96 return c == ' ' || c == '\t' || c == '\r' || c == '\n' 97} 98 99func (s *scanner) reset() { 100 s.step = stateBeginValue 101 s.contexts = s.contexts[0:0] 102 s.err = nil 103} 104 105// eof tells the scanner that the end of input has been reached. 106// It returns a scan status just as s.step does. 107func (s *scanner) eof() scanStatus { 108 if s.err != nil { 109 return scanError 110 } 111 if s.endTop { 112 return scanEnd 113 } 114 s.step(s, ' ') 115 if s.endTop { 116 return scanEnd 117 } 118 if s.err == nil { 119 s.err = fmt.Errorf("unexpected end of JSON input") 120 } 121 return scanError 122} 123 124// pushContext pushes a new parse state p onto the parse stack. 125func (s *scanner) pushParseState(p context) { 126 s.contexts = append(s.contexts, p) 127} 128 129// popParseState pops a parse state (already obtained) off the stack 130// and updates s.step accordingly. 131func (s *scanner) popParseState() { 132 n := len(s.contexts) - 1 133 s.contexts = s.contexts[0:n] 134 if n == 0 { 135 s.step = stateEndTop 136 s.endTop = true 137 } else { 138 s.step = stateEndValue 139 } 140} 141 142// stateBeginValueOrEmpty is the state after reading `[`. 143func stateBeginValueOrEmpty(s *scanner, c byte) scanStatus { 144 if c <= ' ' && isSpace(c) { 145 return scanSkipSpace 146 } 147 if c == ']' { 148 return stateEndValue(s, c) 149 } 150 return stateBeginValue(s, c) 151} 152 153// stateBeginValue is the state at the beginning of the input. 154func stateBeginValue(s *scanner, c byte) scanStatus { 155 if c <= ' ' && isSpace(c) { 156 return scanSkipSpace 157 } 158 switch c { 159 case '{': 160 s.step = stateBeginStringOrEmpty 161 s.pushParseState(contextKey) 162 return scanBeginObject 163 case '[': 164 s.step = stateBeginValueOrEmpty 165 s.pushParseState(contextArr) 166 return scanBeginArray 167 case '"': 168 s.step = stateInString 169 return scanBeginLiteral 170 case '-': 171 s.step = stateNeg 172 return scanBeginLiteral 173 case '0': // beginning of 0.123 174 s.step = state0 175 return scanBeginLiteral 176 case 't': // beginning of true 177 s.step = stateT 178 return scanBeginLiteral 179 case 'f': // beginning of false 180 s.step = stateF 181 return scanBeginLiteral 182 case 'n': // beginning of null 183 s.step = stateN 184 return scanBeginLiteral 185 } 186 if '1' <= c && c <= '9' { // beginning of 1234.5 187 s.step = state1 188 return scanBeginLiteral 189 } 190 return s.error(c, "looking for beginning of value") 191} 192 193// stateBeginStringOrEmpty is the state after reading `{`. 194func stateBeginStringOrEmpty(s *scanner, c byte) scanStatus { 195 if c <= ' ' && isSpace(c) { 196 return scanSkipSpace 197 } 198 if c == '}' { 199 n := len(s.contexts) 200 s.contexts[n-1] = contextObj 201 return stateEndValue(s, c) 202 } 203 return stateBeginString(s, c) 204} 205 206// stateBeginString is the state after reading `{"key": value,`. 207func stateBeginString(s *scanner, c byte) scanStatus { 208 if c <= ' ' && isSpace(c) { 209 return scanSkipSpace 210 } 211 if c == '"' { 212 s.step = stateInString 213 return scanBeginLiteral 214 } 215 return s.error(c, "looking for beginning of object key string") 216} 217 218// stateEndValue is the state after completing a value, 219// such as after reading `{}` or `true` or `["x"`. 220func stateEndValue(s *scanner, c byte) scanStatus { 221 n := len(s.contexts) 222 if n == 0 { 223 // Completed top-level before the current byte. 224 s.step = stateEndTop 225 s.endTop = true 226 return stateEndTop(s, c) 227 } 228 if c <= ' ' && isSpace(c) { 229 s.step = stateEndValue 230 return scanSkipSpace 231 } 232 ps := s.contexts[n-1] 233 switch ps { 234 case contextKey: 235 if c == ':' { 236 s.contexts[n-1] = contextObj 237 s.step = stateBeginValue 238 return scanObjectKey 239 } 240 return s.error(c, "after object key") 241 case contextObj: 242 if c == ',' { 243 s.contexts[n-1] = contextKey 244 s.step = stateBeginString 245 return scanObjectValue 246 } 247 if c == '}' { 248 s.popParseState() 249 return scanEndObject 250 } 251 return s.error(c, "after object key:value pair") 252 case contextArr: 253 if c == ',' { 254 s.step = stateBeginValue 255 return scanArrayValue 256 } 257 if c == ']' { 258 s.popParseState() 259 return scanEndArray 260 } 261 return s.error(c, "after array element") 262 } 263 return s.error(c, "") 264} 265 266// stateEndTop is the state after finishing the top-level value, 267// such as after reading `{}` or `[1,2,3]`. 268// Only space characters should be seen now. 269func stateEndTop(s *scanner, c byte) scanStatus { 270 if c != ' ' && c != '\t' && c != '\r' && c != '\n' { 271 // Complain about non-space byte on next call. 272 s.error(c, "after top-level value") 273 } 274 return scanEnd 275} 276 277// stateInString is the state after reading `"`. 278func stateInString(s *scanner, c byte) scanStatus { 279 if c == '"' { 280 s.step = stateEndValue 281 return scanContinue 282 } 283 if c == '\\' { 284 s.step = stateInStringEsc 285 return scanContinue 286 } 287 if c < 0x20 { 288 return s.error(c, "in string literal") 289 } 290 return scanContinue 291} 292 293// stateInStringEsc is the state after reading `"\` during a quoted string. 294func stateInStringEsc(s *scanner, c byte) scanStatus { 295 switch c { 296 case 'b', 'f', 'n', 'r', 't', '\\', '/', '"': 297 s.step = stateInString 298 return scanContinue 299 case 'u': 300 s.step = stateInStringEscU 301 return scanContinue 302 } 303 return s.error(c, "in string escape code") 304} 305 306// stateInStringEscU is the state after reading `"\u` during a quoted string. 307func stateInStringEscU(s *scanner, c byte) scanStatus { 308 if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { 309 s.step = stateInStringEscU1 310 return scanContinue 311 } 312 // numbers 313 return s.error(c, "in \\u hexadecimal character escape") 314} 315 316// stateInStringEscU1 is the state after reading `"\u1` during a quoted string. 317func stateInStringEscU1(s *scanner, c byte) scanStatus { 318 if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { 319 s.step = stateInStringEscU12 320 return scanContinue 321 } 322 // numbers 323 return s.error(c, "in \\u hexadecimal character escape") 324} 325 326// stateInStringEscU12 is the state after reading `"\u12` during a quoted string. 327func stateInStringEscU12(s *scanner, c byte) scanStatus { 328 if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { 329 s.step = stateInStringEscU123 330 return scanContinue 331 } 332 // numbers 333 return s.error(c, "in \\u hexadecimal character escape") 334} 335 336// stateInStringEscU123 is the state after reading `"\u123` during a quoted string. 337func stateInStringEscU123(s *scanner, c byte) scanStatus { 338 if '0' <= c && c <= '9' || 'a' <= c && c <= 'f' || 'A' <= c && c <= 'F' { 339 s.step = stateInString 340 return scanContinue 341 } 342 // numbers 343 return s.error(c, "in \\u hexadecimal character escape") 344} 345 346// stateNeg is the state after reading `-` during a number. 347func stateNeg(s *scanner, c byte) scanStatus { 348 if c == '0' { 349 s.step = state0 350 return scanContinue 351 } 352 if '1' <= c && c <= '9' { 353 s.step = state1 354 return scanContinue 355 } 356 return s.error(c, "in numeric literal") 357} 358 359// state1 is the state after reading a non-zero integer during a number, 360// such as after reading `1` or `100` but not `0`. 361func state1(s *scanner, c byte) scanStatus { 362 if '0' <= c && c <= '9' { 363 s.step = state1 364 return scanContinue 365 } 366 return state0(s, c) 367} 368 369// state0 is the state after reading `0` during a number. 370func state0(s *scanner, c byte) scanStatus { 371 if c == '.' { 372 s.step = stateDot 373 return scanContinue 374 } 375 if c == 'e' || c == 'E' { 376 s.step = stateE 377 return scanContinue 378 } 379 return stateEndValue(s, c) 380} 381 382// stateDot is the state after reading the integer and decimal point in a number, 383// such as after reading `1.`. 384func stateDot(s *scanner, c byte) scanStatus { 385 if '0' <= c && c <= '9' { 386 s.step = stateDot0 387 return scanContinue 388 } 389 return s.error(c, "after decimal point in numeric literal") 390} 391 392// stateDot0 is the state after reading the integer, decimal point, and subsequent 393// digits of a number, such as after reading `3.14`. 394func stateDot0(s *scanner, c byte) scanStatus { 395 if '0' <= c && c <= '9' { 396 return scanContinue 397 } 398 if c == 'e' || c == 'E' { 399 s.step = stateE 400 return scanContinue 401 } 402 return stateEndValue(s, c) 403} 404 405// stateE is the state after reading the mantissa and e in a number, 406// such as after reading `314e` or `0.314e`. 407func stateE(s *scanner, c byte) scanStatus { 408 if c == '+' || c == '-' { 409 s.step = stateESign 410 return scanContinue 411 } 412 return stateESign(s, c) 413} 414 415// stateESign is the state after reading the mantissa, e, and sign in a number, 416// such as after reading `314e-` or `0.314e+`. 417func stateESign(s *scanner, c byte) scanStatus { 418 if '0' <= c && c <= '9' { 419 s.step = stateE0 420 return scanContinue 421 } 422 return s.error(c, "in exponent of numeric literal") 423} 424 425// stateE0 is the state after reading the mantissa, e, optional sign, 426// and at least one digit of the exponent in a number, 427// such as after reading `314e-2` or `0.314e+1` or `3.14e0`. 428func stateE0(s *scanner, c byte) scanStatus { 429 if '0' <= c && c <= '9' { 430 return scanContinue 431 } 432 return stateEndValue(s, c) 433} 434 435// stateT is the state after reading `t`. 436func stateT(s *scanner, c byte) scanStatus { 437 if c == 'r' { 438 s.step = stateTr 439 return scanContinue 440 } 441 return s.error(c, "in literal true (expecting 'r')") 442} 443 444// stateTr is the state after reading `tr`. 445func stateTr(s *scanner, c byte) scanStatus { 446 if c == 'u' { 447 s.step = stateTru 448 return scanContinue 449 } 450 return s.error(c, "in literal true (expecting 'u')") 451} 452 453// stateTru is the state after reading `tru`. 454func stateTru(s *scanner, c byte) scanStatus { 455 if c == 'e' { 456 s.step = stateEndValue 457 return scanContinue 458 } 459 return s.error(c, "in literal true (expecting 'e')") 460} 461 462// stateF is the state after reading `f`. 463func stateF(s *scanner, c byte) scanStatus { 464 if c == 'a' { 465 s.step = stateFa 466 return scanContinue 467 } 468 return s.error(c, "in literal false (expecting 'a')") 469} 470 471// stateFa is the state after reading `fa`. 472func stateFa(s *scanner, c byte) scanStatus { 473 if c == 'l' { 474 s.step = stateFal 475 return scanContinue 476 } 477 return s.error(c, "in literal false (expecting 'l')") 478} 479 480// stateFal is the state after reading `fal`. 481func stateFal(s *scanner, c byte) scanStatus { 482 if c == 's' { 483 s.step = stateFals 484 return scanContinue 485 } 486 return s.error(c, "in literal false (expecting 's')") 487} 488 489// stateFals is the state after reading `fals`. 490func stateFals(s *scanner, c byte) scanStatus { 491 if c == 'e' { 492 s.step = stateEndValue 493 return scanContinue 494 } 495 return s.error(c, "in literal false (expecting 'e')") 496} 497 498// stateN is the state after reading `n`. 499func stateN(s *scanner, c byte) scanStatus { 500 if c == 'u' { 501 s.step = stateNu 502 return scanContinue 503 } 504 return s.error(c, "in literal null (expecting 'u')") 505} 506 507// stateNu is the state after reading `nu`. 508func stateNu(s *scanner, c byte) scanStatus { 509 if c == 'l' { 510 s.step = stateNul 511 return scanContinue 512 } 513 return s.error(c, "in literal null (expecting 'l')") 514} 515 516// stateNul is the state after reading `nul`. 517func stateNul(s *scanner, c byte) scanStatus { 518 if c == 'l' { 519 s.step = stateEndValue 520 return scanContinue 521 } 522 return s.error(c, "in literal null (expecting 'l')") 523} 524 525// stateError is the state after reaching a syntax error, 526// such as after reading `[1}` or `5.1.2`. 527func stateError(s *scanner, c byte) scanStatus { 528 return scanError 529} 530 531// error records an error and switches to the error state. 532func (s *scanner) error(c byte, context string) scanStatus { 533 s.step = stateError 534 s.err = fmt.Errorf("invalid character <<%c>> %s", c, context) 535 return scanError 536} 537