1package xpath 2 3import ( 4 "bytes" 5 "fmt" 6 "hash/fnv" 7 "reflect" 8) 9 10type iterator interface { 11 Current() NodeNavigator 12} 13 14// An XPath query interface. 15type query interface { 16 // Select traversing iterator returns a query matched node NodeNavigator. 17 Select(iterator) NodeNavigator 18 19 // Evaluate evaluates query and returns values of the current query. 20 Evaluate(iterator) interface{} 21 22 Clone() query 23} 24 25// nopQuery is an empty query that always return nil for any query. 26type nopQuery struct { 27 query 28} 29 30func (nopQuery) Select(iterator) NodeNavigator { return nil } 31 32func (nopQuery) Evaluate(iterator) interface{} { return nil } 33 34func (nopQuery) Clone() query { return nopQuery{} } 35 36// contextQuery is returns current node on the iterator object query. 37type contextQuery struct { 38 count int 39 Root bool // Moving to root-level node in the current context iterator. 40} 41 42func (c *contextQuery) Select(t iterator) (n NodeNavigator) { 43 if c.count == 0 { 44 c.count++ 45 n = t.Current().Copy() 46 if c.Root { 47 n.MoveToRoot() 48 } 49 } 50 return n 51} 52 53func (c *contextQuery) Evaluate(iterator) interface{} { 54 c.count = 0 55 return c 56} 57 58func (c *contextQuery) Clone() query { 59 return &contextQuery{count: 0, Root: c.Root} 60} 61 62// ancestorQuery is an XPath ancestor node query.(ancestor::*|ancestor-self::*) 63type ancestorQuery struct { 64 iterator func() NodeNavigator 65 66 Self bool 67 Input query 68 Predicate func(NodeNavigator) bool 69} 70 71func (a *ancestorQuery) Select(t iterator) NodeNavigator { 72 for { 73 if a.iterator == nil { 74 node := a.Input.Select(t) 75 if node == nil { 76 return nil 77 } 78 first := true 79 node = node.Copy() 80 a.iterator = func() NodeNavigator { 81 if first && a.Self { 82 first = false 83 if a.Predicate(node) { 84 return node 85 } 86 } 87 for node.MoveToParent() { 88 if !a.Predicate(node) { 89 continue 90 } 91 return node 92 } 93 return nil 94 } 95 } 96 97 if node := a.iterator(); node != nil { 98 return node 99 } 100 a.iterator = nil 101 } 102} 103 104func (a *ancestorQuery) Evaluate(t iterator) interface{} { 105 a.Input.Evaluate(t) 106 a.iterator = nil 107 return a 108} 109 110func (a *ancestorQuery) Test(n NodeNavigator) bool { 111 return a.Predicate(n) 112} 113 114func (a *ancestorQuery) Clone() query { 115 return &ancestorQuery{Self: a.Self, Input: a.Input.Clone(), Predicate: a.Predicate} 116} 117 118// attributeQuery is an XPath attribute node query.(@*) 119type attributeQuery struct { 120 iterator func() NodeNavigator 121 122 Input query 123 Predicate func(NodeNavigator) bool 124} 125 126func (a *attributeQuery) Select(t iterator) NodeNavigator { 127 for { 128 if a.iterator == nil { 129 node := a.Input.Select(t) 130 if node == nil { 131 return nil 132 } 133 node = node.Copy() 134 a.iterator = func() NodeNavigator { 135 for { 136 onAttr := node.MoveToNextAttribute() 137 if !onAttr { 138 return nil 139 } 140 if a.Predicate(node) { 141 return node 142 } 143 } 144 } 145 } 146 147 if node := a.iterator(); node != nil { 148 return node 149 } 150 a.iterator = nil 151 } 152} 153 154func (a *attributeQuery) Evaluate(t iterator) interface{} { 155 a.Input.Evaluate(t) 156 a.iterator = nil 157 return a 158} 159 160func (a *attributeQuery) Test(n NodeNavigator) bool { 161 return a.Predicate(n) 162} 163 164func (a *attributeQuery) Clone() query { 165 return &attributeQuery{Input: a.Input.Clone(), Predicate: a.Predicate} 166} 167 168// childQuery is an XPath child node query.(child::*) 169type childQuery struct { 170 posit int 171 iterator func() NodeNavigator 172 173 Input query 174 Predicate func(NodeNavigator) bool 175} 176 177func (c *childQuery) Select(t iterator) NodeNavigator { 178 for { 179 if c.iterator == nil { 180 c.posit = 0 181 node := c.Input.Select(t) 182 if node == nil { 183 return nil 184 } 185 node = node.Copy() 186 first := true 187 c.iterator = func() NodeNavigator { 188 for { 189 if (first && !node.MoveToChild()) || (!first && !node.MoveToNext()) { 190 return nil 191 } 192 first = false 193 if c.Predicate(node) { 194 return node 195 } 196 } 197 } 198 } 199 200 if node := c.iterator(); node != nil { 201 c.posit++ 202 return node 203 } 204 c.iterator = nil 205 } 206} 207 208func (c *childQuery) Evaluate(t iterator) interface{} { 209 c.Input.Evaluate(t) 210 c.iterator = nil 211 return c 212} 213 214func (c *childQuery) Test(n NodeNavigator) bool { 215 return c.Predicate(n) 216} 217 218func (c *childQuery) Clone() query { 219 return &childQuery{Input: c.Input.Clone(), Predicate: c.Predicate} 220} 221 222// position returns a position of current NodeNavigator. 223func (c *childQuery) position() int { 224 return c.posit 225} 226 227// descendantQuery is an XPath descendant node query.(descendant::* | descendant-or-self::*) 228type descendantQuery struct { 229 iterator func() NodeNavigator 230 posit int 231 level int 232 233 Self bool 234 Input query 235 Predicate func(NodeNavigator) bool 236} 237 238func (d *descendantQuery) Select(t iterator) NodeNavigator { 239 for { 240 if d.iterator == nil { 241 d.posit = 0 242 node := d.Input.Select(t) 243 if node == nil { 244 return nil 245 } 246 node = node.Copy() 247 d.level = 0 248 positmap := make(map[int]int) 249 first := true 250 d.iterator = func() NodeNavigator { 251 if first && d.Self { 252 first = false 253 if d.Predicate(node) { 254 d.posit = 1 255 positmap[d.level] = 1 256 return node 257 } 258 } 259 260 for { 261 if node.MoveToChild() { 262 d.level = d.level + 1 263 positmap[d.level] = 0 264 } else { 265 for { 266 if d.level == 0 { 267 return nil 268 } 269 if node.MoveToNext() { 270 break 271 } 272 node.MoveToParent() 273 d.level = d.level - 1 274 } 275 } 276 if d.Predicate(node) { 277 positmap[d.level]++ 278 d.posit = positmap[d.level] 279 return node 280 } 281 } 282 } 283 } 284 285 if node := d.iterator(); node != nil { 286 return node 287 } 288 d.iterator = nil 289 } 290} 291 292func (d *descendantQuery) Evaluate(t iterator) interface{} { 293 d.Input.Evaluate(t) 294 d.iterator = nil 295 return d 296} 297 298func (d *descendantQuery) Test(n NodeNavigator) bool { 299 return d.Predicate(n) 300} 301 302// position returns a position of current NodeNavigator. 303func (d *descendantQuery) position() int { 304 return d.posit 305} 306 307func (d *descendantQuery) depth() int { 308 return d.level 309} 310 311func (d *descendantQuery) Clone() query { 312 return &descendantQuery{Self: d.Self, Input: d.Input.Clone(), Predicate: d.Predicate} 313} 314 315// followingQuery is an XPath following node query.(following::*|following-sibling::*) 316type followingQuery struct { 317 posit int 318 iterator func() NodeNavigator 319 320 Input query 321 Sibling bool // The matching sibling node of current node. 322 Predicate func(NodeNavigator) bool 323} 324 325func (f *followingQuery) Select(t iterator) NodeNavigator { 326 for { 327 if f.iterator == nil { 328 f.posit = 0 329 node := f.Input.Select(t) 330 if node == nil { 331 return nil 332 } 333 node = node.Copy() 334 if f.Sibling { 335 f.iterator = func() NodeNavigator { 336 for { 337 if !node.MoveToNext() { 338 return nil 339 } 340 if f.Predicate(node) { 341 f.posit++ 342 return node 343 } 344 } 345 } 346 } else { 347 var q *descendantQuery // descendant query 348 f.iterator = func() NodeNavigator { 349 for { 350 if q == nil { 351 for !node.MoveToNext() { 352 if !node.MoveToParent() { 353 return nil 354 } 355 } 356 q = &descendantQuery{ 357 Self: true, 358 Input: &contextQuery{}, 359 Predicate: f.Predicate, 360 } 361 t.Current().MoveTo(node) 362 } 363 if node := q.Select(t); node != nil { 364 f.posit = q.posit 365 return node 366 } 367 q = nil 368 } 369 } 370 } 371 } 372 373 if node := f.iterator(); node != nil { 374 return node 375 } 376 f.iterator = nil 377 } 378} 379 380func (f *followingQuery) Evaluate(t iterator) interface{} { 381 f.Input.Evaluate(t) 382 return f 383} 384 385func (f *followingQuery) Test(n NodeNavigator) bool { 386 return f.Predicate(n) 387} 388 389func (f *followingQuery) Clone() query { 390 return &followingQuery{Input: f.Input.Clone(), Sibling: f.Sibling, Predicate: f.Predicate} 391} 392 393func (f *followingQuery) position() int { 394 return f.posit 395} 396 397// precedingQuery is an XPath preceding node query.(preceding::*) 398type precedingQuery struct { 399 iterator func() NodeNavigator 400 posit int 401 Input query 402 Sibling bool // The matching sibling node of current node. 403 Predicate func(NodeNavigator) bool 404} 405 406func (p *precedingQuery) Select(t iterator) NodeNavigator { 407 for { 408 if p.iterator == nil { 409 p.posit = 0 410 node := p.Input.Select(t) 411 if node == nil { 412 return nil 413 } 414 node = node.Copy() 415 if p.Sibling { 416 p.iterator = func() NodeNavigator { 417 for { 418 for !node.MoveToPrevious() { 419 return nil 420 } 421 if p.Predicate(node) { 422 p.posit++ 423 return node 424 } 425 } 426 } 427 } else { 428 var q query 429 p.iterator = func() NodeNavigator { 430 for { 431 if q == nil { 432 for !node.MoveToPrevious() { 433 if !node.MoveToParent() { 434 return nil 435 } 436 p.posit = 0 437 } 438 q = &descendantQuery{ 439 Self: true, 440 Input: &contextQuery{}, 441 Predicate: p.Predicate, 442 } 443 t.Current().MoveTo(node) 444 } 445 if node := q.Select(t); node != nil { 446 p.posit++ 447 return node 448 } 449 q = nil 450 } 451 } 452 } 453 } 454 if node := p.iterator(); node != nil { 455 return node 456 } 457 p.iterator = nil 458 } 459} 460 461func (p *precedingQuery) Evaluate(t iterator) interface{} { 462 p.Input.Evaluate(t) 463 return p 464} 465 466func (p *precedingQuery) Test(n NodeNavigator) bool { 467 return p.Predicate(n) 468} 469 470func (p *precedingQuery) Clone() query { 471 return &precedingQuery{Input: p.Input.Clone(), Sibling: p.Sibling, Predicate: p.Predicate} 472} 473 474func (p *precedingQuery) position() int { 475 return p.posit 476} 477 478// parentQuery is an XPath parent node query.(parent::*) 479type parentQuery struct { 480 Input query 481 Predicate func(NodeNavigator) bool 482} 483 484func (p *parentQuery) Select(t iterator) NodeNavigator { 485 for { 486 node := p.Input.Select(t) 487 if node == nil { 488 return nil 489 } 490 node = node.Copy() 491 if node.MoveToParent() && p.Predicate(node) { 492 return node 493 } 494 } 495} 496 497func (p *parentQuery) Evaluate(t iterator) interface{} { 498 p.Input.Evaluate(t) 499 return p 500} 501 502func (p *parentQuery) Clone() query { 503 return &parentQuery{Input: p.Input.Clone(), Predicate: p.Predicate} 504} 505 506func (p *parentQuery) Test(n NodeNavigator) bool { 507 return p.Predicate(n) 508} 509 510// selfQuery is an Self node query.(self::*) 511type selfQuery struct { 512 Input query 513 Predicate func(NodeNavigator) bool 514} 515 516func (s *selfQuery) Select(t iterator) NodeNavigator { 517 for { 518 node := s.Input.Select(t) 519 if node == nil { 520 return nil 521 } 522 523 if s.Predicate(node) { 524 return node 525 } 526 } 527} 528 529func (s *selfQuery) Evaluate(t iterator) interface{} { 530 s.Input.Evaluate(t) 531 return s 532} 533 534func (s *selfQuery) Test(n NodeNavigator) bool { 535 return s.Predicate(n) 536} 537 538func (s *selfQuery) Clone() query { 539 return &selfQuery{Input: s.Input.Clone(), Predicate: s.Predicate} 540} 541 542// filterQuery is an XPath query for predicate filter. 543type filterQuery struct { 544 Input query 545 Predicate query 546 posit int 547 positmap map[int]int 548} 549 550func (f *filterQuery) do(t iterator) bool { 551 val := reflect.ValueOf(f.Predicate.Evaluate(t)) 552 switch val.Kind() { 553 case reflect.Bool: 554 return val.Bool() 555 case reflect.String: 556 return len(val.String()) > 0 557 case reflect.Float64: 558 pt := getNodePosition(f.Input) 559 return int(val.Float()) == pt 560 default: 561 if q, ok := f.Predicate.(query); ok { 562 return q.Select(t) != nil 563 } 564 } 565 return false 566} 567 568func (f *filterQuery) position() int { 569 return f.posit 570} 571 572func (f *filterQuery) Select(t iterator) NodeNavigator { 573 if f.positmap == nil { 574 f.positmap = make(map[int]int) 575 } 576 for { 577 578 node := f.Input.Select(t) 579 if node == nil { 580 return node 581 } 582 node = node.Copy() 583 584 t.Current().MoveTo(node) 585 if f.do(t) { 586 // fix https://github.com/antchfx/htmlquery/issues/26 587 // Calculate and keep the each of matching node's position in the same depth. 588 level := getNodeDepth(f.Input) 589 f.positmap[level]++ 590 f.posit = f.positmap[level] 591 return node 592 } 593 } 594} 595 596func (f *filterQuery) Evaluate(t iterator) interface{} { 597 f.Input.Evaluate(t) 598 return f 599} 600 601func (f *filterQuery) Clone() query { 602 return &filterQuery{Input: f.Input.Clone(), Predicate: f.Predicate.Clone()} 603} 604 605// functionQuery is an XPath function that returns a computed value for 606// the Evaluate call of the current NodeNavigator node. Select call isn't 607// applicable for functionQuery. 608type functionQuery struct { 609 Input query // Node Set 610 Func func(query, iterator) interface{} // The xpath function. 611} 612 613func (f *functionQuery) Select(t iterator) NodeNavigator { 614 return nil 615} 616 617// Evaluate call a specified function that will returns the 618// following value type: number,string,boolean. 619func (f *functionQuery) Evaluate(t iterator) interface{} { 620 return f.Func(f.Input, t) 621} 622 623func (f *functionQuery) Clone() query { 624 return &functionQuery{Input: f.Input.Clone(), Func: f.Func} 625} 626 627// transformFunctionQuery diffs from functionQuery where the latter computes a scalar 628// value (number,string,boolean) for the current NodeNavigator node while the former 629// (transformFunctionQuery) performs a mapping or transform of the current NodeNavigator 630// and returns a new NodeNavigator. It is used for non-scalar XPath functions such as 631// reverse(), remove(), subsequence(), unordered(), etc. 632type transformFunctionQuery struct { 633 Input query 634 Func func(query, iterator) func() NodeNavigator 635 iterator func() NodeNavigator 636} 637 638func (f *transformFunctionQuery) Select(t iterator) NodeNavigator { 639 if f.iterator == nil { 640 f.iterator = f.Func(f.Input, t) 641 } 642 return f.iterator() 643} 644 645func (f *transformFunctionQuery) Evaluate(t iterator) interface{} { 646 f.Input.Evaluate(t) 647 f.iterator = nil 648 return f 649} 650 651func (f *transformFunctionQuery) Clone() query { 652 return &transformFunctionQuery{Input: f.Input.Clone(), Func: f.Func} 653} 654 655// constantQuery is an XPath constant operand. 656type constantQuery struct { 657 Val interface{} 658} 659 660func (c *constantQuery) Select(t iterator) NodeNavigator { 661 return nil 662} 663 664func (c *constantQuery) Evaluate(t iterator) interface{} { 665 return c.Val 666} 667 668func (c *constantQuery) Clone() query { 669 return c 670} 671 672type groupQuery struct { 673 posit int 674 675 Input query 676} 677 678func (g *groupQuery) Select(t iterator) NodeNavigator { 679 for { 680 node := g.Input.Select(t) 681 if node == nil { 682 return nil 683 } 684 g.posit++ 685 return node.Copy() 686 } 687} 688 689func (g *groupQuery) Evaluate(t iterator) interface{} { 690 return g.Input.Evaluate(t) 691} 692 693func (g *groupQuery) Clone() query { 694 return &groupQuery{Input: g.Input} 695} 696 697func (g *groupQuery) position() int { 698 return g.posit 699} 700 701// logicalQuery is an XPath logical expression. 702type logicalQuery struct { 703 Left, Right query 704 705 Do func(iterator, interface{}, interface{}) interface{} 706} 707 708func (l *logicalQuery) Select(t iterator) NodeNavigator { 709 // When a XPath expr is logical expression. 710 node := t.Current().Copy() 711 val := l.Evaluate(t) 712 switch val.(type) { 713 case bool: 714 if val.(bool) == true { 715 return node 716 } 717 } 718 return nil 719} 720 721func (l *logicalQuery) Evaluate(t iterator) interface{} { 722 m := l.Left.Evaluate(t) 723 n := l.Right.Evaluate(t) 724 return l.Do(t, m, n) 725} 726 727func (l *logicalQuery) Clone() query { 728 return &logicalQuery{Left: l.Left.Clone(), Right: l.Right.Clone(), Do: l.Do} 729} 730 731// numericQuery is an XPath numeric operator expression. 732type numericQuery struct { 733 Left, Right query 734 735 Do func(interface{}, interface{}) interface{} 736} 737 738func (n *numericQuery) Select(t iterator) NodeNavigator { 739 return nil 740} 741 742func (n *numericQuery) Evaluate(t iterator) interface{} { 743 m := n.Left.Evaluate(t) 744 k := n.Right.Evaluate(t) 745 return n.Do(m, k) 746} 747 748func (n *numericQuery) Clone() query { 749 return &numericQuery{Left: n.Left.Clone(), Right: n.Right.Clone(), Do: n.Do} 750} 751 752type booleanQuery struct { 753 IsOr bool 754 Left, Right query 755 iterator func() NodeNavigator 756} 757 758func (b *booleanQuery) Select(t iterator) NodeNavigator { 759 if b.iterator == nil { 760 var list []NodeNavigator 761 i := 0 762 root := t.Current().Copy() 763 if b.IsOr { 764 for { 765 node := b.Left.Select(t) 766 if node == nil { 767 break 768 } 769 node = node.Copy() 770 list = append(list, node) 771 } 772 t.Current().MoveTo(root) 773 for { 774 node := b.Right.Select(t) 775 if node == nil { 776 break 777 } 778 node = node.Copy() 779 list = append(list, node) 780 } 781 } else { 782 var m []NodeNavigator 783 var n []NodeNavigator 784 for { 785 node := b.Left.Select(t) 786 if node == nil { 787 break 788 } 789 node = node.Copy() 790 list = append(m, node) 791 } 792 t.Current().MoveTo(root) 793 for { 794 node := b.Right.Select(t) 795 if node == nil { 796 break 797 } 798 node = node.Copy() 799 list = append(n, node) 800 } 801 for _, k := range m { 802 for _, j := range n { 803 if k == j { 804 list = append(list, k) 805 } 806 } 807 } 808 } 809 810 b.iterator = func() NodeNavigator { 811 if i >= len(list) { 812 return nil 813 } 814 node := list[i] 815 i++ 816 return node 817 } 818 } 819 return b.iterator() 820} 821 822func (b *booleanQuery) Evaluate(t iterator) interface{} { 823 m := b.Left.Evaluate(t) 824 left := asBool(t, m) 825 if b.IsOr && left { 826 return true 827 } else if !b.IsOr && !left { 828 return false 829 } 830 m = b.Right.Evaluate(t) 831 return asBool(t, m) 832} 833 834func (b *booleanQuery) Clone() query { 835 return &booleanQuery{IsOr: b.IsOr, Left: b.Left.Clone(), Right: b.Right.Clone()} 836} 837 838type unionQuery struct { 839 Left, Right query 840 iterator func() NodeNavigator 841} 842 843func (u *unionQuery) Select(t iterator) NodeNavigator { 844 if u.iterator == nil { 845 var list []NodeNavigator 846 var m = make(map[uint64]bool) 847 root := t.Current().Copy() 848 for { 849 node := u.Left.Select(t) 850 if node == nil { 851 break 852 } 853 code := getHashCode(node.Copy()) 854 if _, ok := m[code]; !ok { 855 m[code] = true 856 list = append(list, node.Copy()) 857 } 858 } 859 t.Current().MoveTo(root) 860 for { 861 node := u.Right.Select(t) 862 if node == nil { 863 break 864 } 865 code := getHashCode(node.Copy()) 866 if _, ok := m[code]; !ok { 867 m[code] = true 868 list = append(list, node.Copy()) 869 } 870 } 871 var i int 872 u.iterator = func() NodeNavigator { 873 if i >= len(list) { 874 return nil 875 } 876 node := list[i] 877 i++ 878 return node 879 } 880 } 881 return u.iterator() 882} 883 884func (u *unionQuery) Evaluate(t iterator) interface{} { 885 u.iterator = nil 886 u.Left.Evaluate(t) 887 u.Right.Evaluate(t) 888 return u 889} 890 891func (u *unionQuery) Clone() query { 892 return &unionQuery{Left: u.Left.Clone(), Right: u.Right.Clone()} 893} 894 895func getHashCode(n NodeNavigator) uint64 { 896 var sb bytes.Buffer 897 switch n.NodeType() { 898 case AttributeNode, TextNode, CommentNode: 899 sb.WriteString(fmt.Sprintf("%s=%s", n.LocalName(), n.Value())) 900 // https://github.com/antchfx/htmlquery/issues/25 901 d := 1 902 for n.MoveToPrevious() { 903 d++ 904 } 905 sb.WriteString(fmt.Sprintf("-%d", d)) 906 for n.MoveToParent() { 907 d = 1 908 for n.MoveToPrevious() { 909 d++ 910 } 911 sb.WriteString(fmt.Sprintf("-%d", d)) 912 } 913 case ElementNode: 914 sb.WriteString(n.Prefix() + n.LocalName()) 915 d := 1 916 for n.MoveToPrevious() { 917 d++ 918 } 919 sb.WriteString(fmt.Sprintf("-%d", d)) 920 921 for n.MoveToParent() { 922 d = 1 923 for n.MoveToPrevious() { 924 d++ 925 } 926 sb.WriteString(fmt.Sprintf("-%d", d)) 927 } 928 } 929 h := fnv.New64a() 930 h.Write([]byte(sb.String())) 931 return h.Sum64() 932} 933 934func getNodePosition(q query) int { 935 type Position interface { 936 position() int 937 } 938 if count, ok := q.(Position); ok { 939 return count.position() 940 } 941 return 1 942} 943 944func getNodeDepth(q query) int { 945 type Depth interface { 946 depth() int 947 } 948 if count, ok := q.(Depth); ok { 949 return count.depth() 950 } 951 return 0 952} 953