1/* 2Copyright 2015 The Kubernetes Authors. 3 4Licensed under the Apache License, Version 2.0 (the "License"); 5you may not use this file except in compliance with the License. 6You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10Unless required by applicable law or agreed to in writing, software 11distributed under the License is distributed on an "AS IS" BASIS, 12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13See the License for the specific language governing permissions and 14limitations under the License. 15*/ 16 17package jsonpath 18 19import ( 20 "errors" 21 "fmt" 22 "regexp" 23 "strconv" 24 "strings" 25 "unicode" 26 "unicode/utf8" 27) 28 29const eof = -1 30 31const ( 32 leftDelim = "{" 33 rightDelim = "}" 34) 35 36type Parser struct { 37 Name string 38 Root *ListNode 39 input string 40 cur *ListNode 41 pos int 42 start int 43 width int 44} 45 46var ( 47 ErrSyntax = errors.New("invalid syntax") 48 dictKeyRex = regexp.MustCompile(`^'([^']*)'$`) 49 sliceOperatorRex = regexp.MustCompile(`^(-?[\d]*)(:-?[\d]*)?(:[\d]*)?$`) 50) 51 52// Parse parsed the given text and return a node Parser. 53// If an error is encountered, parsing stops and an empty 54// Parser is returned with the error 55func Parse(name, text string) (*Parser, error) { 56 p := NewParser(name) 57 err := p.Parse(text) 58 if err != nil { 59 p = nil 60 } 61 return p, err 62} 63 64func NewParser(name string) *Parser { 65 return &Parser{ 66 Name: name, 67 } 68} 69 70// parseAction parsed the expression inside delimiter 71func parseAction(name, text string) (*Parser, error) { 72 p, err := Parse(name, fmt.Sprintf("%s%s%s", leftDelim, text, rightDelim)) 73 // when error happens, p will be nil, so we need to return here 74 if err != nil { 75 return p, err 76 } 77 p.Root = p.Root.Nodes[0].(*ListNode) 78 return p, nil 79} 80 81func (p *Parser) Parse(text string) error { 82 p.input = text 83 p.Root = newList() 84 p.pos = 0 85 return p.parseText(p.Root) 86} 87 88// consumeText return the parsed text since last cosumeText 89func (p *Parser) consumeText() string { 90 value := p.input[p.start:p.pos] 91 p.start = p.pos 92 return value 93} 94 95// next returns the next rune in the input. 96func (p *Parser) next() rune { 97 if int(p.pos) >= len(p.input) { 98 p.width = 0 99 return eof 100 } 101 r, w := utf8.DecodeRuneInString(p.input[p.pos:]) 102 p.width = w 103 p.pos += p.width 104 return r 105} 106 107// peek returns but does not consume the next rune in the input. 108func (p *Parser) peek() rune { 109 r := p.next() 110 p.backup() 111 return r 112} 113 114// backup steps back one rune. Can only be called once per call of next. 115func (p *Parser) backup() { 116 p.pos -= p.width 117} 118 119func (p *Parser) parseText(cur *ListNode) error { 120 for { 121 if strings.HasPrefix(p.input[p.pos:], leftDelim) { 122 if p.pos > p.start { 123 cur.append(newText(p.consumeText())) 124 } 125 return p.parseLeftDelim(cur) 126 } 127 if p.next() == eof { 128 break 129 } 130 } 131 // Correctly reached EOF. 132 if p.pos > p.start { 133 cur.append(newText(p.consumeText())) 134 } 135 return nil 136} 137 138// parseLeftDelim scans the left delimiter, which is known to be present. 139func (p *Parser) parseLeftDelim(cur *ListNode) error { 140 p.pos += len(leftDelim) 141 p.consumeText() 142 newNode := newList() 143 cur.append(newNode) 144 cur = newNode 145 return p.parseInsideAction(cur) 146} 147 148func (p *Parser) parseInsideAction(cur *ListNode) error { 149 prefixMap := map[string]func(*ListNode) error{ 150 rightDelim: p.parseRightDelim, 151 "[?(": p.parseFilter, 152 "..": p.parseRecursive, 153 } 154 for prefix, parseFunc := range prefixMap { 155 if strings.HasPrefix(p.input[p.pos:], prefix) { 156 return parseFunc(cur) 157 } 158 } 159 160 switch r := p.next(); { 161 case r == eof || isEndOfLine(r): 162 return fmt.Errorf("unclosed action") 163 case r == ' ': 164 p.consumeText() 165 case r == '@' || r == '$': //the current object, just pass it 166 p.consumeText() 167 case r == '[': 168 return p.parseArray(cur) 169 case r == '"' || r == '\'': 170 return p.parseQuote(cur, r) 171 case r == '.': 172 return p.parseField(cur) 173 case r == '+' || r == '-' || unicode.IsDigit(r): 174 p.backup() 175 return p.parseNumber(cur) 176 case isAlphaNumeric(r): 177 p.backup() 178 return p.parseIdentifier(cur) 179 default: 180 return fmt.Errorf("unrecognized character in action: %#U", r) 181 } 182 return p.parseInsideAction(cur) 183} 184 185// parseRightDelim scans the right delimiter, which is known to be present. 186func (p *Parser) parseRightDelim(cur *ListNode) error { 187 p.pos += len(rightDelim) 188 p.consumeText() 189 cur = p.Root 190 return p.parseText(cur) 191} 192 193// parseIdentifier scans build-in keywords, like "range" "end" 194func (p *Parser) parseIdentifier(cur *ListNode) error { 195 var r rune 196 for { 197 r = p.next() 198 if isTerminator(r) { 199 p.backup() 200 break 201 } 202 } 203 value := p.consumeText() 204 205 if isBool(value) { 206 v, err := strconv.ParseBool(value) 207 if err != nil { 208 return fmt.Errorf("can not parse bool '%s': %s", value, err.Error()) 209 } 210 211 cur.append(newBool(v)) 212 } else { 213 cur.append(newIdentifier(value)) 214 } 215 216 return p.parseInsideAction(cur) 217} 218 219// parseRecursive scans the recursive desent operator .. 220func (p *Parser) parseRecursive(cur *ListNode) error { 221 p.pos += len("..") 222 p.consumeText() 223 cur.append(newRecursive()) 224 if r := p.peek(); isAlphaNumeric(r) { 225 return p.parseField(cur) 226 } 227 return p.parseInsideAction(cur) 228} 229 230// parseNumber scans number 231func (p *Parser) parseNumber(cur *ListNode) error { 232 r := p.peek() 233 if r == '+' || r == '-' { 234 r = p.next() 235 } 236 for { 237 r = p.next() 238 if r != '.' && !unicode.IsDigit(r) { 239 p.backup() 240 break 241 } 242 } 243 value := p.consumeText() 244 i, err := strconv.Atoi(value) 245 if err == nil { 246 cur.append(newInt(i)) 247 return p.parseInsideAction(cur) 248 } 249 d, err := strconv.ParseFloat(value, 64) 250 if err == nil { 251 cur.append(newFloat(d)) 252 return p.parseInsideAction(cur) 253 } 254 return fmt.Errorf("cannot parse number %s", value) 255} 256 257// parseArray scans array index selection 258func (p *Parser) parseArray(cur *ListNode) error { 259Loop: 260 for { 261 switch p.next() { 262 case eof, '\n': 263 return fmt.Errorf("unterminated array") 264 case ']': 265 break Loop 266 } 267 } 268 text := p.consumeText() 269 text = string(text[1 : len(text)-1]) 270 if text == "*" { 271 text = ":" 272 } 273 274 //union operator 275 strs := strings.Split(text, ",") 276 if len(strs) > 1 { 277 union := []*ListNode{} 278 for _, str := range strs { 279 parser, err := parseAction("union", fmt.Sprintf("[%s]", strings.Trim(str, " "))) 280 if err != nil { 281 return err 282 } 283 union = append(union, parser.Root) 284 } 285 cur.append(newUnion(union)) 286 return p.parseInsideAction(cur) 287 } 288 289 // dict key 290 value := dictKeyRex.FindStringSubmatch(text) 291 if value != nil { 292 parser, err := parseAction("arraydict", fmt.Sprintf(".%s", value[1])) 293 if err != nil { 294 return err 295 } 296 for _, node := range parser.Root.Nodes { 297 cur.append(node) 298 } 299 return p.parseInsideAction(cur) 300 } 301 302 //slice operator 303 value = sliceOperatorRex.FindStringSubmatch(text) 304 if value == nil { 305 return fmt.Errorf("invalid array index %s", text) 306 } 307 value = value[1:] 308 params := [3]ParamsEntry{} 309 for i := 0; i < 3; i++ { 310 if value[i] != "" { 311 if i > 0 { 312 value[i] = value[i][1:] 313 } 314 if i > 0 && value[i] == "" { 315 params[i].Known = false 316 } else { 317 var err error 318 params[i].Known = true 319 params[i].Value, err = strconv.Atoi(value[i]) 320 if err != nil { 321 return fmt.Errorf("array index %s is not a number", value[i]) 322 } 323 } 324 } else { 325 if i == 1 { 326 params[i].Known = true 327 params[i].Value = params[0].Value + 1 328 } else { 329 params[i].Known = false 330 params[i].Value = 0 331 } 332 } 333 } 334 cur.append(newArray(params)) 335 return p.parseInsideAction(cur) 336} 337 338// parseFilter scans filter inside array selection 339func (p *Parser) parseFilter(cur *ListNode) error { 340 p.pos += len("[?(") 341 p.consumeText() 342 begin := false 343 end := false 344 var pair rune 345 346Loop: 347 for { 348 r := p.next() 349 switch r { 350 case eof, '\n': 351 return fmt.Errorf("unterminated filter") 352 case '"', '\'': 353 if begin == false { 354 //save the paired rune 355 begin = true 356 pair = r 357 continue 358 } 359 //only add when met paired rune 360 if p.input[p.pos-2] != '\\' && r == pair { 361 end = true 362 } 363 case ')': 364 //in rightParser below quotes only appear zero or once 365 //and must be paired at the beginning and end 366 if begin == end { 367 break Loop 368 } 369 } 370 } 371 if p.next() != ']' { 372 return fmt.Errorf("unclosed array expect ]") 373 } 374 reg := regexp.MustCompile(`^([^!<>=]+)([!<>=]+)(.+?)$`) 375 text := p.consumeText() 376 text = string(text[:len(text)-2]) 377 value := reg.FindStringSubmatch(text) 378 if value == nil { 379 parser, err := parseAction("text", text) 380 if err != nil { 381 return err 382 } 383 cur.append(newFilter(parser.Root, newList(), "exists")) 384 } else { 385 leftParser, err := parseAction("left", value[1]) 386 if err != nil { 387 return err 388 } 389 rightParser, err := parseAction("right", value[3]) 390 if err != nil { 391 return err 392 } 393 cur.append(newFilter(leftParser.Root, rightParser.Root, value[2])) 394 } 395 return p.parseInsideAction(cur) 396} 397 398// parseQuote unquotes string inside double or single quote 399func (p *Parser) parseQuote(cur *ListNode, end rune) error { 400Loop: 401 for { 402 switch p.next() { 403 case eof, '\n': 404 return fmt.Errorf("unterminated quoted string") 405 case end: 406 //if it's not escape break the Loop 407 if p.input[p.pos-2] != '\\' { 408 break Loop 409 } 410 } 411 } 412 value := p.consumeText() 413 s, err := UnquoteExtend(value) 414 if err != nil { 415 return fmt.Errorf("unquote string %s error %v", value, err) 416 } 417 cur.append(newText(s)) 418 return p.parseInsideAction(cur) 419} 420 421// parseField scans a field until a terminator 422func (p *Parser) parseField(cur *ListNode) error { 423 p.consumeText() 424 for p.advance() { 425 } 426 value := p.consumeText() 427 if value == "*" { 428 cur.append(newWildcard()) 429 } else { 430 cur.append(newField(strings.Replace(value, "\\", "", -1))) 431 } 432 return p.parseInsideAction(cur) 433} 434 435// advance scans until next non-escaped terminator 436func (p *Parser) advance() bool { 437 r := p.next() 438 if r == '\\' { 439 p.next() 440 } else if isTerminator(r) { 441 p.backup() 442 return false 443 } 444 return true 445} 446 447// isTerminator reports whether the input is at valid termination character to appear after an identifier. 448func isTerminator(r rune) bool { 449 if isSpace(r) || isEndOfLine(r) { 450 return true 451 } 452 switch r { 453 case eof, '.', ',', '[', ']', '$', '@', '{', '}': 454 return true 455 } 456 return false 457} 458 459// isSpace reports whether r is a space character. 460func isSpace(r rune) bool { 461 return r == ' ' || r == '\t' 462} 463 464// isEndOfLine reports whether r is an end-of-line character. 465func isEndOfLine(r rune) bool { 466 return r == '\r' || r == '\n' 467} 468 469// isAlphaNumeric reports whether r is an alphabetic, digit, or underscore. 470func isAlphaNumeric(r rune) bool { 471 return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) 472} 473 474// isBool reports whether s is a boolean value. 475func isBool(s string) bool { 476 return s == "true" || s == "false" 477} 478 479//UnquoteExtend is almost same as strconv.Unquote(), but it support parse single quotes as a string 480func UnquoteExtend(s string) (string, error) { 481 n := len(s) 482 if n < 2 { 483 return "", ErrSyntax 484 } 485 quote := s[0] 486 if quote != s[n-1] { 487 return "", ErrSyntax 488 } 489 s = s[1 : n-1] 490 491 if quote != '"' && quote != '\'' { 492 return "", ErrSyntax 493 } 494 495 // Is it trivial? Avoid allocation. 496 if !contains(s, '\\') && !contains(s, quote) { 497 return s, nil 498 } 499 500 var runeTmp [utf8.UTFMax]byte 501 buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. 502 for len(s) > 0 { 503 c, multibyte, ss, err := strconv.UnquoteChar(s, quote) 504 if err != nil { 505 return "", err 506 } 507 s = ss 508 if c < utf8.RuneSelf || !multibyte { 509 buf = append(buf, byte(c)) 510 } else { 511 n := utf8.EncodeRune(runeTmp[:], c) 512 buf = append(buf, runeTmp[:n]...) 513 } 514 } 515 return string(buf), nil 516} 517 518func contains(s string, c byte) bool { 519 for i := 0; i < len(s); i++ { 520 if s[i] == c { 521 return true 522 } 523 } 524 return false 525} 526