1/* 2Copyright 2015 The Kubernetes Authors. 3 4Licensed under the Apache License, Version 2.0 (the "License"); 5you may not use this file except in compliance with the License. 6You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10Unless required by applicable law or agreed to in writing, software 11distributed under the License is distributed on an "AS IS" BASIS, 12WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13See the License for the specific language governing permissions and 14limitations under the License. 15*/ 16 17package jsonpath 18 19import ( 20 "errors" 21 "fmt" 22 "regexp" 23 "strconv" 24 "strings" 25 "unicode" 26 "unicode/utf8" 27) 28 29const eof = -1 30 31const ( 32 leftDelim = "{" 33 rightDelim = "}" 34) 35 36type Parser struct { 37 Name string 38 Root *ListNode 39 input string 40 pos int 41 start int 42 width int 43} 44 45var ( 46 ErrSyntax = errors.New("invalid syntax") 47 dictKeyRex = regexp.MustCompile(`^'([^']*)'$`) 48 sliceOperatorRex = regexp.MustCompile(`^(-?[\d]*)(:-?[\d]*)?(:-?[\d]*)?$`) 49) 50 51// Parse parsed the given text and return a node Parser. 52// If an error is encountered, parsing stops and an empty 53// Parser is returned with the error 54func Parse(name, text string) (*Parser, error) { 55 p := NewParser(name) 56 err := p.Parse(text) 57 if err != nil { 58 p = nil 59 } 60 return p, err 61} 62 63func NewParser(name string) *Parser { 64 return &Parser{ 65 Name: name, 66 } 67} 68 69// parseAction parsed the expression inside delimiter 70func parseAction(name, text string) (*Parser, error) { 71 p, err := Parse(name, fmt.Sprintf("%s%s%s", leftDelim, text, rightDelim)) 72 // when error happens, p will be nil, so we need to return here 73 if err != nil { 74 return p, err 75 } 76 p.Root = p.Root.Nodes[0].(*ListNode) 77 return p, nil 78} 79 80func (p *Parser) Parse(text string) error { 81 p.input = text 82 p.Root = newList() 83 p.pos = 0 84 return p.parseText(p.Root) 85} 86 87// consumeText return the parsed text since last cosumeText 88func (p *Parser) consumeText() string { 89 value := p.input[p.start:p.pos] 90 p.start = p.pos 91 return value 92} 93 94// next returns the next rune in the input. 95func (p *Parser) next() rune { 96 if p.pos >= len(p.input) { 97 p.width = 0 98 return eof 99 } 100 r, w := utf8.DecodeRuneInString(p.input[p.pos:]) 101 p.width = w 102 p.pos += p.width 103 return r 104} 105 106// peek returns but does not consume the next rune in the input. 107func (p *Parser) peek() rune { 108 r := p.next() 109 p.backup() 110 return r 111} 112 113// backup steps back one rune. Can only be called once per call of next. 114func (p *Parser) backup() { 115 p.pos -= p.width 116} 117 118func (p *Parser) parseText(cur *ListNode) error { 119 for { 120 if strings.HasPrefix(p.input[p.pos:], leftDelim) { 121 if p.pos > p.start { 122 cur.append(newText(p.consumeText())) 123 } 124 return p.parseLeftDelim(cur) 125 } 126 if p.next() == eof { 127 break 128 } 129 } 130 // Correctly reached EOF. 131 if p.pos > p.start { 132 cur.append(newText(p.consumeText())) 133 } 134 return nil 135} 136 137// parseLeftDelim scans the left delimiter, which is known to be present. 138func (p *Parser) parseLeftDelim(cur *ListNode) error { 139 p.pos += len(leftDelim) 140 p.consumeText() 141 newNode := newList() 142 cur.append(newNode) 143 cur = newNode 144 return p.parseInsideAction(cur) 145} 146 147func (p *Parser) parseInsideAction(cur *ListNode) error { 148 prefixMap := map[string]func(*ListNode) error{ 149 rightDelim: p.parseRightDelim, 150 "[?(": p.parseFilter, 151 "..": p.parseRecursive, 152 } 153 for prefix, parseFunc := range prefixMap { 154 if strings.HasPrefix(p.input[p.pos:], prefix) { 155 return parseFunc(cur) 156 } 157 } 158 159 switch r := p.next(); { 160 case r == eof || isEndOfLine(r): 161 return fmt.Errorf("unclosed action") 162 case r == ' ': 163 p.consumeText() 164 case r == '@' || r == '$': //the current object, just pass it 165 p.consumeText() 166 case r == '[': 167 return p.parseArray(cur) 168 case r == '"' || r == '\'': 169 return p.parseQuote(cur, r) 170 case r == '.': 171 return p.parseField(cur) 172 case r == '+' || r == '-' || unicode.IsDigit(r): 173 p.backup() 174 return p.parseNumber(cur) 175 case isAlphaNumeric(r): 176 p.backup() 177 return p.parseIdentifier(cur) 178 default: 179 return fmt.Errorf("unrecognized character in action: %#U", r) 180 } 181 return p.parseInsideAction(cur) 182} 183 184// parseRightDelim scans the right delimiter, which is known to be present. 185func (p *Parser) parseRightDelim(cur *ListNode) error { 186 p.pos += len(rightDelim) 187 p.consumeText() 188 return p.parseText(p.Root) 189} 190 191// parseIdentifier scans build-in keywords, like "range" "end" 192func (p *Parser) parseIdentifier(cur *ListNode) error { 193 var r rune 194 for { 195 r = p.next() 196 if isTerminator(r) { 197 p.backup() 198 break 199 } 200 } 201 value := p.consumeText() 202 203 if isBool(value) { 204 v, err := strconv.ParseBool(value) 205 if err != nil { 206 return fmt.Errorf("can not parse bool '%s': %s", value, err.Error()) 207 } 208 209 cur.append(newBool(v)) 210 } else { 211 cur.append(newIdentifier(value)) 212 } 213 214 return p.parseInsideAction(cur) 215} 216 217// parseRecursive scans the recursive desent operator .. 218func (p *Parser) parseRecursive(cur *ListNode) error { 219 p.pos += len("..") 220 p.consumeText() 221 cur.append(newRecursive()) 222 if r := p.peek(); isAlphaNumeric(r) { 223 return p.parseField(cur) 224 } 225 return p.parseInsideAction(cur) 226} 227 228// parseNumber scans number 229func (p *Parser) parseNumber(cur *ListNode) error { 230 r := p.peek() 231 if r == '+' || r == '-' { 232 p.next() 233 } 234 for { 235 r = p.next() 236 if r != '.' && !unicode.IsDigit(r) { 237 p.backup() 238 break 239 } 240 } 241 value := p.consumeText() 242 i, err := strconv.Atoi(value) 243 if err == nil { 244 cur.append(newInt(i)) 245 return p.parseInsideAction(cur) 246 } 247 d, err := strconv.ParseFloat(value, 64) 248 if err == nil { 249 cur.append(newFloat(d)) 250 return p.parseInsideAction(cur) 251 } 252 return fmt.Errorf("cannot parse number %s", value) 253} 254 255// parseArray scans array index selection 256func (p *Parser) parseArray(cur *ListNode) error { 257Loop: 258 for { 259 switch p.next() { 260 case eof, '\n': 261 return fmt.Errorf("unterminated array") 262 case ']': 263 break Loop 264 } 265 } 266 text := p.consumeText() 267 text = text[1 : len(text)-1] 268 if text == "*" { 269 text = ":" 270 } 271 272 //union operator 273 strs := strings.Split(text, ",") 274 if len(strs) > 1 { 275 union := []*ListNode{} 276 for _, str := range strs { 277 parser, err := parseAction("union", fmt.Sprintf("[%s]", strings.Trim(str, " "))) 278 if err != nil { 279 return err 280 } 281 union = append(union, parser.Root) 282 } 283 cur.append(newUnion(union)) 284 return p.parseInsideAction(cur) 285 } 286 287 // dict key 288 value := dictKeyRex.FindStringSubmatch(text) 289 if value != nil { 290 parser, err := parseAction("arraydict", fmt.Sprintf(".%s", value[1])) 291 if err != nil { 292 return err 293 } 294 for _, node := range parser.Root.Nodes { 295 cur.append(node) 296 } 297 return p.parseInsideAction(cur) 298 } 299 300 //slice operator 301 value = sliceOperatorRex.FindStringSubmatch(text) 302 if value == nil { 303 return fmt.Errorf("invalid array index %s", text) 304 } 305 value = value[1:] 306 params := [3]ParamsEntry{} 307 for i := 0; i < 3; i++ { 308 if value[i] != "" { 309 if i > 0 { 310 value[i] = value[i][1:] 311 } 312 if i > 0 && value[i] == "" { 313 params[i].Known = false 314 } else { 315 var err error 316 params[i].Known = true 317 params[i].Value, err = strconv.Atoi(value[i]) 318 if err != nil { 319 return fmt.Errorf("array index %s is not a number", value[i]) 320 } 321 } 322 } else { 323 if i == 1 { 324 params[i].Known = true 325 params[i].Value = params[0].Value + 1 326 params[i].Derived = true 327 } else { 328 params[i].Known = false 329 params[i].Value = 0 330 } 331 } 332 } 333 cur.append(newArray(params)) 334 return p.parseInsideAction(cur) 335} 336 337// parseFilter scans filter inside array selection 338func (p *Parser) parseFilter(cur *ListNode) error { 339 p.pos += len("[?(") 340 p.consumeText() 341 begin := false 342 end := false 343 var pair rune 344 345Loop: 346 for { 347 r := p.next() 348 switch r { 349 case eof, '\n': 350 return fmt.Errorf("unterminated filter") 351 case '"', '\'': 352 if begin == false { 353 //save the paired rune 354 begin = true 355 pair = r 356 continue 357 } 358 //only add when met paired rune 359 if p.input[p.pos-2] != '\\' && r == pair { 360 end = true 361 } 362 case ')': 363 //in rightParser below quotes only appear zero or once 364 //and must be paired at the beginning and end 365 if begin == end { 366 break Loop 367 } 368 } 369 } 370 if p.next() != ']' { 371 return fmt.Errorf("unclosed array expect ]") 372 } 373 reg := regexp.MustCompile(`^([^!<>=]+)([!<>=]+)(.+?)$`) 374 text := p.consumeText() 375 text = text[:len(text)-2] 376 value := reg.FindStringSubmatch(text) 377 if value == nil { 378 parser, err := parseAction("text", text) 379 if err != nil { 380 return err 381 } 382 cur.append(newFilter(parser.Root, newList(), "exists")) 383 } else { 384 leftParser, err := parseAction("left", value[1]) 385 if err != nil { 386 return err 387 } 388 rightParser, err := parseAction("right", value[3]) 389 if err != nil { 390 return err 391 } 392 cur.append(newFilter(leftParser.Root, rightParser.Root, value[2])) 393 } 394 return p.parseInsideAction(cur) 395} 396 397// parseQuote unquotes string inside double or single quote 398func (p *Parser) parseQuote(cur *ListNode, end rune) error { 399Loop: 400 for { 401 switch p.next() { 402 case eof, '\n': 403 return fmt.Errorf("unterminated quoted string") 404 case end: 405 //if it's not escape break the Loop 406 if p.input[p.pos-2] != '\\' { 407 break Loop 408 } 409 } 410 } 411 value := p.consumeText() 412 s, err := UnquoteExtend(value) 413 if err != nil { 414 return fmt.Errorf("unquote string %s error %v", value, err) 415 } 416 cur.append(newText(s)) 417 return p.parseInsideAction(cur) 418} 419 420// parseField scans a field until a terminator 421func (p *Parser) parseField(cur *ListNode) error { 422 p.consumeText() 423 for p.advance() { 424 } 425 value := p.consumeText() 426 if value == "*" { 427 cur.append(newWildcard()) 428 } else { 429 cur.append(newField(strings.Replace(value, "\\", "", -1))) 430 } 431 return p.parseInsideAction(cur) 432} 433 434// advance scans until next non-escaped terminator 435func (p *Parser) advance() bool { 436 r := p.next() 437 if r == '\\' { 438 p.next() 439 } else if isTerminator(r) { 440 p.backup() 441 return false 442 } 443 return true 444} 445 446// isTerminator reports whether the input is at valid termination character to appear after an identifier. 447func isTerminator(r rune) bool { 448 if isSpace(r) || isEndOfLine(r) { 449 return true 450 } 451 switch r { 452 case eof, '.', ',', '[', ']', '$', '@', '{', '}': 453 return true 454 } 455 return false 456} 457 458// isSpace reports whether r is a space character. 459func isSpace(r rune) bool { 460 return r == ' ' || r == '\t' 461} 462 463// isEndOfLine reports whether r is an end-of-line character. 464func isEndOfLine(r rune) bool { 465 return r == '\r' || r == '\n' 466} 467 468// isAlphaNumeric reports whether r is an alphabetic, digit, or underscore. 469func isAlphaNumeric(r rune) bool { 470 return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r) 471} 472 473// isBool reports whether s is a boolean value. 474func isBool(s string) bool { 475 return s == "true" || s == "false" 476} 477 478//UnquoteExtend is almost same as strconv.Unquote(), but it support parse single quotes as a string 479func UnquoteExtend(s string) (string, error) { 480 n := len(s) 481 if n < 2 { 482 return "", ErrSyntax 483 } 484 quote := s[0] 485 if quote != s[n-1] { 486 return "", ErrSyntax 487 } 488 s = s[1 : n-1] 489 490 if quote != '"' && quote != '\'' { 491 return "", ErrSyntax 492 } 493 494 // Is it trivial? Avoid allocation. 495 if !contains(s, '\\') && !contains(s, quote) { 496 return s, nil 497 } 498 499 var runeTmp [utf8.UTFMax]byte 500 buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. 501 for len(s) > 0 { 502 c, multibyte, ss, err := strconv.UnquoteChar(s, quote) 503 if err != nil { 504 return "", err 505 } 506 s = ss 507 if c < utf8.RuneSelf || !multibyte { 508 buf = append(buf, byte(c)) 509 } else { 510 n := utf8.EncodeRune(runeTmp[:], c) 511 buf = append(buf, runeTmp[:n]...) 512 } 513 } 514 return string(buf), nil 515} 516 517func contains(s string, c byte) bool { 518 for i := 0; i < len(s); i++ { 519 if s[i] == c { 520 return true 521 } 522 } 523 return false 524} 525