1# -*- coding: utf-8 -*-
2# cython: language_level=3, always_allow_keywords=True
3
4## Copyright 1999-2018 by LivingLogic AG, Bayreuth/Germany
5## Copyright 1999-2018 by Walter Dörwald
6##
7## All Rights Reserved
8##
9## See ll/xist/__init__.py for the license
10
11
12"""
13This module contains XFind selectors and related classes and functions.
14
15A selector specifies a condition that a node in an XIST tree must satisfy to
16match the selector. For example the method :meth:`Node.walk` will only output
17nodes that match the specified selector.
18
19Selectors can be combined with various operations and form a language comparable
20to XPath__ but implemented as Python expressions.
21
22__ http://www.w3.org/TR/xpath
23"""
24
25
26import builtins, collections
27
28from ll import misc
29from ll.xist import xsc
30
31
32__docformat__ = "reStructuredText"
33
34
35###
36### Function for filtering a :class:`xsc.Cursor` iterator against a :class:`Selector`.
37###
38
39def filter(iter, *selectors):
40	"""
41	Filter an iterator over :class:`xsc.Cursor` objects against a
42	:class:`Selector` object.
43
44	Example::
45
46		>>> from ll.xist import xsc, parse, xfind
47		>>> from ll.xist.ns import xml, html, chars
48		>>> doc = parse.tree(
49		... 	parse.URL("https://www.python.org/"),
50		... 	parse.Tidy(),
51		... 	parse.NS(html),
52		... 	parse.Node(pool=xsc.Pool(xml, html, chars))
53		... )
54		>>> [c.node.string() for c in xfind.filter(doc.walk(), html.b, html.title)]
55		[
56			'<title>Welcome to Python.org</title>',
57			'<b>Web Programming</b>',
58			'<b>GUI Development</b>',
59			'<b>Scientific and Numeric</b>',
60			'<b>Software Development</b>',
61			'<b>System Administration</b>'
62		]
63	"""
64	sel = selector(*selectors)
65	for cursor in iter:
66		if cursor.path in sel:
67			yield cursor
68
69
70###
71### Function for creating a :class:`Selector` object.
72###
73
74def selector(*objs):
75	"""
76	Create a :class:`Selector` object from :obj:`objs`.
77
78	If :obj:`objs` is empty (i.e. :func:`selector` is called without arguments)
79	``any`` is returned (which matches every node).
80
81	If more than one argument is passed (or the argument is a tuple), an
82	:class:`OrCombinator` is returned.
83
84	Otherwise the following steps are taken for the single argument ``obj``:
85
86	*	if ``obj`` already is a :class:`Selector` object it is returned unchanged;
87
88	*	if ``obj`` is a :class:`Node` subclass, an :class:`IsInstanceSelector`
89		is returned (which matches if the node is an instance of this class);
90
91	*	if ``obj`` is a :class:`Node` instance, an :class:`IsSelector` is returned
92		(which matches only ``obj``);
93
94	*	if ``obj`` is callable a :class:`CallableSelector` is returned
95		(where matching is done by calling ``obj``);
96
97	*	if ``obj`` is ``None`` ``any`` will be returned;
98
99	*	otherwise :func:`selector` will raise a :exc:`TypeError`.
100	"""
101	if not objs:
102		return any
103	if len(objs) == 1:
104		obj = objs[0]
105		if isinstance(obj, Selector):
106			return obj
107		if isinstance(obj, xsc._Node_Meta):
108			return IsInstanceSelector(obj)
109		elif isinstance(obj, tuple):
110			return selector(*obj)
111		elif isinstance(obj, xsc.Node):
112			return IsSelector(obj)
113		elif isinstance(obj, collections.Callable):
114			return CallableSelector(obj)
115		elif obj is None:
116			return any
117		else:
118			raise TypeError(f"can't convert {obj!r} to selector")
119	elif all(isinstance(sel, type) for sel in objs):
120		return IsInstanceSelector(*objs)
121	return OrCombinator(*objs)
122
123
124###
125### Selectors for the :meth:`walk` method.
126###
127
128class Selector:
129	"""
130	A selector specifies a condition that a node in an XIST tree must satisfy
131	to match the selector.
132
133	Whether a node matches the selector can be specified by overwriting the
134	:meth:`__contains__` method. Selectors can be combined with various
135	operations (see methods below).
136	"""
137
138	@misc.notimplemented
139	def __contains__(self, path):
140		"""
141		Return whether :obj:`path` (which is a list of XIST nodes from the root
142		of the tree to the node in question) matches the selector.
143		"""
144
145	def __truediv__(self, other):
146		"""
147		Create a :class:`ChildCombinator` with :obj:`self` as the left hand
148		selector and :obj:`other` as the right hand selector.
149		"""
150		return ChildCombinator(self, selector(other))
151
152	def __floordiv__(self, other):
153		"""
154		Create a :class:`DescendantCombinator` with :obj:`self` as the left hand
155		selector and :obj:`other` as the right hand selector.
156		"""
157		return DescendantCombinator(self, selector(other))
158
159	def __mul__(self, other):
160		"""
161		Create an :class:`AdjacentSiblingCombinator` with :obj:`self` as the left
162		hand selector and :obj:`other` as the right hand selector.
163		"""
164		return AdjacentSiblingCombinator(self, selector(other))
165
166	def __pow__(self, other):
167		"""
168		Create a :class:`GeneralSiblingCombinator` with :obj:`self` as the left
169		hand selector and :obj:`other` as the right hand selector.
170		"""
171		return GeneralSiblingCombinator(self, selector(other))
172
173	def __and__(self, other):
174		"""
175		Create an :class:`AndCombinator` from :obj:`self` and :obj:`other`.
176		"""
177		return AndCombinator(self, selector(other))
178
179	def __or__(self, other):
180		"""
181		Create an :class:`OrCombinator` from :obj:`self` and :obj:`other`.
182		"""
183		return OrCombinator(self, selector(other))
184
185	def __invert__(self):
186		"""
187		Create a :class:`NotCombinator` inverting :obj:`self`.
188		"""
189		return NotCombinator(self)
190
191
192
193class AnySelector(Selector):
194	"""
195	Selector that selects all nodes.
196
197	An instance of this class named ``any`` is created as a module global, i.e.
198	you can use ``xfind.any``.
199	"""
200
201	def __contains__(self, path):
202		return True
203
204	def __and__(self, other):
205		return selector(other)
206
207	def __or__(self, other):
208		return self
209
210
211any = AnySelector()
212
213
214class IsInstanceSelector(Selector):
215	"""
216	Selector that selects all nodes that are instances of the specified type.
217	You can either create an :class:`IsInstanceSelector` object directly
218	or simply pass a class to a function that expects a selector (this class
219	will be automatically wrapped in an :class:`IsInstanceSelector`)::
220
221		>>> from ll.xist import xsc, parse, xfind
222		>>> from ll.xist.ns import xml, html, chars
223		>>> doc = parse.tree(
224		... 	parse.URL("https://www.python.org/"),
225		... 	parse.Tidy(),
226		... 	parse.NS(html),
227		... 	parse.Node(pool=xsc.Pool(xml, html, chars))
228		... )
229		>>> for node in doc.walknodes(html.a):
230		... 	print(node.attrs.href, node.attrs.title)
231		...
232		https://www.python.org/#content Skip to content
233		https://www.python.org/#python-network
234		https://www.python.org/ The Python Programming Language
235		https://www.python.org/psf-landing/ The Python Software Foundation
236		...
237	"""
238	def __init__(self, *types):
239		self.types = types
240
241	def __contains__(self, path):
242		return isinstance(path[-1], self.types)
243
244	def __or__(self, other):
245		# If ``other`` is a type check too, combine ``self`` and ``other`` into one :class:`IsInstanceSelector` object
246		if isinstance(other, xsc._Node_Meta):
247			return IsInstanceSelector(*(self.types + (other,)))
248		elif isinstance(other, IsInstanceSelector):
249			return IsInstanceSelector(*(self.types+other.types))
250		return Selector.__or__(self, other)
251
252	def __getitem__(self, index):
253		"""
254		Return an :class:`nthoftype` selector that uses :obj:`index` as the
255		index and ``self.types`` as the types.
256		"""
257		return nthoftype(index, *self.types)
258
259	def __str__(self):
260		if len(self.types) == 1:
261			return f"{self.types[0].__module__}.{self.types[0].__name__}"
262		else:
263			types = " | ".join(f"{type.__module__}.{type.__name__}" for type in self.types)
264			return f"({types})"
265
266
267class element(Selector):
268	"""
269	Selector that selects all elements that have a specified namespace name and
270	element name::
271
272		>>> from ll.xist import xsc, parse, xfind
273		>>> from ll.xist.ns import xml, html, chars
274		>>> doc = parse.tree(
275		... 	parse.URL("https://www.python.org/"),
276		... 	parse.Tidy(),
277		... 	parse.NS(html),
278		... 	parse.Node(pool=xsc.Pool(xml, html, chars))
279		... )
280		>>> for node in doc.walknodes(xfind.element(html, "img")):
281		... 	print(node.string())
282		...
283		<img alt="python™" class="python-logo" src="https://www.python.org/static/img/python-logo.png" />
284	"""
285	def __init__(self, xmlns, xmlname):
286		self.xmlns = xsc.nsname(xmlns)
287		self.xmlname = xmlname
288
289	def __contains__(self, path):
290		node = path[-1]
291		return isinstance(node, xsc.Element) and node.xmlns == self.xmlns and node.xmlname == self.xmlname
292
293	def __str__(self):
294		return f"{self.__class__.__qualname__}({self.name!r}, {self.xmlns!r})"
295
296
297class procinst(Selector):
298	"""
299	Selector that selects all processing instructions that have a specified name.
300	"""
301	def __init__(self, xmlname):
302		self.xmlname = xmlname
303
304	def __contains__(self, path):
305		node = path[-1]
306		return isinstance(node, xsc.ProcInst) and node.xmlname == self.xmlname
307
308	def __str__(self):
309		return f"{self.__class__.__qualname__}({self.name!r})"
310
311
312class entity(Selector):
313	"""
314	Selector that selects all entities that have a specified name.
315	"""
316	def __init__(self, xmlname):
317		self.xmlname = xmlname
318
319	def __contains__(self, path):
320		node = path[-1]
321		return isinstance(node, xsc.Entity) and node.xmlname == self.xmlname
322
323	def __str__(self):
324		return f"{self.__class__.__qualname__}({self.name!r})"
325
326
327class IsSelector(Selector):
328	"""
329	Selector that selects one specific node in the tree. This can be combined
330	with other selectors via :class:`ChildCombinator` or
331	:class:`DescendantCombinator` selectors to select children of this specific
332	node. You can either create an :class:`IsSelector` directly or simply pass
333	a node to a function that expects a selector::
334
335		>>> from ll.xist import xsc, parse
336		>>> from ll.xist.ns import xml, html, chars
337		>>> doc = parse.tree(
338		... 	parse.URL("https://www.python.org/"),
339		... 	parse.Tidy(),
340		... 	parse.NS(html),
341		... 	parse.Node(pool=xsc.Pool(xml, html, chars))
342		... )
343		>>> for node in doc.walknodes(doc[0]/xsc.Element):
344		... 	print(repr(node))
345		...
346		<element ll.xist.ns.html.head xmlns='http://www.w3.org/1999/xhtml' (89 children/no attrs) location='https://www.python.org/:?:?' at 0x104ad7630>
347		<element ll.xist.ns.html.body xmlns='http://www.w3.org/1999/xhtml' (14 children/2 attrs) location='https://www.python.org/:?:?' at 0x104cc1f28>
348	"""
349	def __init__(self, node):
350		self.node = node
351
352	def __contains__(self, path):
353		return path[-1] is self.node
354
355	def __str__(self):
356		return f"{self.__class__.__qualname__}({self.node!r})"
357
358
359class IsRootSelector(Selector):
360	"""
361	Selector that selects the node that is the root of the traversal.
362
363	An instance of this class named ``isroot`` is created as a module global,
364	i.e. you can use ``xfind.isroot``.
365	"""
366	def __contains__(self, path):
367		return len(path) == 1
368
369
370isroot = IsRootSelector()
371
372
373class IsEmptySelector(Selector):
374	"""
375	Selector that selects all empty elements or fragments.
376
377	An instance of this class named ``empty`` is created as a module global,
378	i.e. you can use ``xfind.empty``::
379
380		>>> from ll.xist import xsc, parse, xfind
381		>>> from ll.xist.ns import xml, html, chars
382		>>> doc = parse.tree(
383		... 	parse.URL("https://www.python.org/"),
384		... 	parse.Tidy(),
385		... 	parse.NS(html),
386		... 	parse.Node(pool=xsc.Pool(xml, html, chars))
387		... )
388		>>> for node in doc.walknodes(xfind.empty):
389		... 	print(node.string())
390		...
391		<meta charset="utf-8" />
392		<meta http-equiv="X-UA-Compatible" content="IE=edge" />
393		<link href="https://ajax.googleapis.com/" rel="prefetch" />
394		<meta name="application-name" content="Python.org" />
395		...
396	"""
397
398	def __contains__(self, path):
399		node = path[-1]
400		if isinstance(node, (xsc.Element, xsc.Frag)):
401			return len(node) == 0
402		return False
403
404
405empty = IsEmptySelector()
406
407
408class OnlyChildSelector(Selector):
409	"""
410	Selector that selects all nodes that are the only child of their parents.
411
412	An instance of this class named ``onlychild`` is created as a module global,
413	i.e. you can use ``xfind.onlychild``::
414
415		>>> from ll.xist import xsc, parse, xfind
416		>>> from ll.xist.ns import xml, html, chars
417		>>> doc = parse.tree(
418		... 	parse.URL("https://www.python.org/"),
419		... 	parse.Tidy(),
420		... 	parse.NS(html),
421		... 	parse.Node(pool=xsc.Pool(xml, html, chars))
422		... )
423		>>> for node in doc.walknodes(xfind.onlychild & html.a):
424		... 	print(node.string())
425		...
426		<a class="text-shrink" href="javascript:;" title="Make Text Smaller">Smaller</a>
427		<a class="text-grow" href="javascript:;" title="Make Text Larger">Larger</a>
428		<a class="text-reset" href="javascript:;" title="Reset any font size changes I have made">Reset</a>
429		<a href="http://plus.google.com/+Python"><span aria-hidden="true" class="icon-google-plus"></span>Google+</a>
430		...
431	"""
432
433	def __contains__(self, path):
434		if len(path) >= 2:
435			parent = path[-2]
436			if isinstance(parent, (xsc.Frag, xsc.Element)):
437				return len(parent) == 1 and parent[0] is path[-1]
438		return False
439
440	def __str__(self):
441		return "onlychild"
442
443
444onlychild = OnlyChildSelector()
445
446
447class OnlyOfTypeSelector(Selector):
448	"""
449	Selector that selects all nodes that are the only nodes of their type among
450	their siblings.
451
452	An instance of this class named ``onlyoftype`` is created as a module global,
453	i.e. you can use ``xfind.onlyoftype``::
454
455		>>> from ll.xist import xsc, parse, xfind
456		>>> from ll.xist.ns import xml, html, chars
457		>>> doc = parse.tree(
458		... 	parse.URL("https://www.python.org/"),
459		... 	parse.Tidy(),
460		... 	parse.NS(html),
461		... 	parse.Node(pool=xsc.Pool(xml, html, chars))
462		... )
463		>>> for node in doc.walknodes(xfind.onlyoftype & xsc.Element):
464		... 	print(repr(node))
465		...
466		<element ll.xist.ns.html.html xmlns='http://www.w3.org/1999/xhtml' (7 children/3 attrs) location='https://www.python.org/:?:?' at 0x108858d30>
467		<element ll.xist.ns.html.head xmlns='http://www.w3.org/1999/xhtml' (89 children/no attrs) location='https://www.python.org/:?:?' at 0x108858630>
468		<element ll.xist.ns.html.title xmlns='http://www.w3.org/1999/xhtml' (1 child/no attrs) location='https://www.python.org/:?:?' at 0x108c547b8>
469		<element ll.xist.ns.html.body xmlns='http://www.w3.org/1999/xhtml' (14 children/2 attrs) location='https://www.python.org/:?:?' at 0x108c54eb8>
470		...
471	"""
472
473	def __contains__(self, path):
474		if len(path) >= 2:
475			node = path[-1]
476			parent = path[-2]
477			if isinstance(parent, (xsc.Frag, xsc.Element)):
478				for child in parent:
479					if isinstance(child, node.__class__):
480						if child is not node:
481							return False
482				return True
483		return False
484
485	def __str__(self):
486		return "onlyoftype"
487
488
489onlyoftype = OnlyOfTypeSelector()
490
491
492class hasattr(Selector):
493	"""
494	Selector that selects all element nodes that have an attribute with one of
495	the specified names. (Names can be strings, (attribute name, namespace name)
496	tuples or attribute classes or instances)::
497
498		>>> from ll.xist import xsc, parse, xfind
499		>>> from ll.xist.ns import xml, html, chars
500		>>> doc = parse.tree(
501		... 	parse.URL("https://www.python.org/"),
502		... 	parse.Tidy(),
503		... 	parse.NS(html),
504		... 	parse.Node(pool=xsc.Pool(xml, html, chars))
505		... )
506		>>> for node in doc.walknodes(xfind.hasattr("id")):
507		... 	print(node.xmlname, node.attrs.id)
508		...
509		body homepage
510		div touchnav-wrapper
511		div top
512		a close-python-network
513		...
514	"""
515
516	def __init__(self, *attrnames):
517		self.attrnames = attrnames
518
519	def __contains__(self, path):
520		node = path[-1]
521		if isinstance(node, xsc.Element):
522			for attrname in self.attrnames:
523				if attrname in node.attrs:
524					return True
525		return False
526
527	def __str__(self):
528		attrnames = ", ".join(repr(attrname) for attrname in self.attrnames)
529		return f"{self.__class__.__qualname__}({attrname})"
530
531
532class attrhasvalue(Selector):
533	"""
534	Selector that selects all element nodes where an attribute with the specified
535	name has one of the specified values. (Names can be strings,
536	(attribute name, namespace name) tuples or attribute classes or instances).
537	Note that "fancy" attributes (i.e. those containing non-text) will not be
538	considered::
539
540		>>> from ll.xist import xsc, parse, xfind
541		>>> from ll.xist.ns import xml, html, chars
542		>>> doc = parse.tree(
543		... 	parse.URL("https://www.python.org/"),
544		... 	parse.Tidy(),
545		... 	parse.NS(html),
546		... 	parse.Node(pool=xsc.Pool(xml, html, chars))
547		... )
548		>>> for node in doc.walknodes(xfind.attrhasvalue("rel", "stylesheet")):
549		... 	print(node.attrs.href)
550		...
551		https://www.python.org/static/stylesheets/style.css
552		https://www.python.org/static/stylesheets/mq.css
553	"""
554
555	def __init__(self, attrname, *attrvalues):
556		self.attrname = attrname
557		if not attrvalues:
558			raise ValueError("need at least one attribute value")
559		self.attrvalues = attrvalues
560
561	def __contains__(self, path):
562		node = path[-1]
563		if isinstance(node, xsc.Element):
564			attr = node.attrs.get(self.attrname)
565			if not attr.isfancy(): # if there are PIs, say no
566				return str(attr) in self.attrvalues
567		return False
568
569	def __str__(self):
570		attrvalues = repr(self.attrvalues)[1:-1]
571		return f"{self.__class__.__qualname__}({self.attrname!r}, {attrvalues})"
572
573
574class attrcontains(Selector):
575	"""
576	Selector that selects all element nodes where an attribute with the specified
577	name contains one of the specified substrings in its value. (Names can be
578	strings, (attribute name, namespace name) tuples or attribute classes or
579	instances). Note that "fancy" attributes (i.e. those containing non-text)
580	will not be considered::
581
582		>>> from ll.xist import xsc, parse, xfind
583		>>> from ll.xist.ns import xml, html, chars
584		>>> doc = parse.tree(
585		... 	parse.URL("https://www.python.org/"),
586		... 	parse.Tidy(),
587		... 	parse.NS(html),
588		... 	parse.Node(pool=xsc.Pool(xml, html, chars))
589		... )
590		>>> for node in doc.walknodes(xfind.attrcontains("rel", "stylesheet")):
591		... 	print(node.attrs.rel, node.attrs.href)
592		...
593		stylesheet https://www.python.org/static/stylesheets/style.css
594		stylesheet https://www.python.org/static/stylesheets/mq.css
595	"""
596
597	def __init__(self, attrname, *attrvalues):
598		self.attrname = attrname
599		if not attrvalues:
600			raise ValueError("need at least one attribute value")
601		self.attrvalues = attrvalues
602
603	def __contains__(self, path):
604		node = path[-1]
605		if isinstance(node, xsc.Element):
606			attr = node.attrs.get(self.attrname)
607			if not attr.isfancy(): # if there are PIs, say no
608				return builtins.any(attrvalue in str(attr) for attrvalue in self.attrvalues)
609		return False
610
611	def __str__(self):
612		attrvalues = repr(self.attrvalues)[1:-1]
613		return f"{self.__class__.__qualname__}({self.attrname!r}, {attrvalues})"
614
615
616class attrstartswith(Selector):
617	"""
618	Selector that selects all element nodes where an attribute with the specified
619	name starts with any of the specified strings. (Names can be strings,
620	(attribute name, namespace name) tuples or attribute classes or instances).
621	Note that "fancy" attributes (i.e. those containing non-text) will not be
622	considered::
623
624		>>> from ll.xist import xsc, parse, xfind
625		>>> from ll.xist.ns import xml, html, chars
626		>>> doc = parse.tree(
627		... 	parse.URL("https://www.python.org/"),
628		... 	parse.Tidy(),
629		... 	parse.NS(html),
630		... 	parse.Node(pool=xsc.Pool(xml, html, chars))
631		... )
632		>>> for node in doc.walknodes(xfind.attrstartswith("class", "icon-")):
633		... 	print(node.string())
634		...
635		<span aria-hidden="true" class="icon-arrow-down"><span>▼</span></span>
636		<span aria-hidden="true" class="icon-arrow-up"><span>▲</span></span>
637		<span aria-hidden="true" class="icon-search"></span>
638		<span aria-hidden="true" class="icon-google-plus"></span>
639		...
640	"""
641
642	def __init__(self, attrname, *attrvalues):
643		self.attrname = attrname
644		if not attrvalues:
645			raise ValueError("need at least one attribute value")
646		self.attrvalues = attrvalues
647
648	def __contains__(self, path):
649		node = path[-1]
650		if isinstance(node, xsc.Element):
651			attr = node.attrs.get(self.attrname)
652			if not attr.isfancy(): # if there are PIs, say no
653				return builtins.any(str(attr).startswith(attrvalue) for attrvalue in self.attrvalues)
654		return False
655
656	def __str__(self):
657		attrvalues = repr(self.attrvalues)[1:-1]
658		return f"{self.__class__.__qualname__}({self.attrname!r}, {attrvalues})"
659
660
661class attrendswith(Selector):
662	"""
663	Selector that selects all element nodes where an attribute with the specified
664	name ends with one of the specified strings. (Names can be strings,
665	(attribute name, namespace name) tuples or attribute classes or instances).
666	Note that "fancy" attributes (i.e. those containing non-text) will not be
667	considered::
668
669		>>> from ll.xist import xsc, parse, xfind
670		>>> from ll.xist.ns import xml, html, chars
671		>>> doc = parse.tree(
672		... 	parse.URL("https://www.python.org/"),
673		... 	parse.Tidy(),
674		... 	parse.NS(html),
675		... 	parse.Node(pool=xsc.Pool(xml, html, chars))
676		... )
677		>>> for node in doc.walknodes(xfind.attrendswith("href", ".css")):
678		... 	print(node.attrs.href)
679		...
680		https://www.python.org/static/stylesheets/style.css
681		https://www.python.org/static/stylesheets/mq.css
682	"""
683
684	def __init__(self, attrname, *attrvalues):
685		self.attrname = attrname
686		if not attrvalues:
687			raise ValueError("need at least one attribute value")
688		self.attrvalues = attrvalues
689
690	def __contains__(self, path):
691		node = path[-1]
692		if isinstance(node, xsc.Element):
693			attr = node.attrs.get(self.attrname)
694			if not attr.isfancy(): # if there are PIs, say no
695				return builtins.any(str(attr).endswith(attrvalue) for attrvalue in self.attrvalues)
696		return False
697
698	def __str__(self):
699		attrvalues = repr(self.attrvalues)[1:-1]
700		return f"{self.__class__.__qualname__}({self.attrname!r}, {attrvalues})"
701
702
703class hasid(Selector):
704	"""
705	Selector that selects all element nodes where the ``id`` attribute has one
706	if the specified values::
707
708		>>> from ll.xist import xsc, parse, xfind
709		>>> from ll.xist.ns import xml, html, chars
710		>>> doc = parse.tree(
711		... 	parse.URL("https://www.python.org/"),
712		... 	parse.Tidy(),
713		... 	parse.NS(html),
714		... 	parse.Node(pool=xsc.Pool(xml, html, chars))
715		... )
716		>>> for node in doc.walknodes(xfind.hasid("id-search-field")):
717		... 	print(node.string())
718		...
719		<input class="search-field" id="id-search-field" name="q" placeholder="Search" role="textbox" tabindex="1" type="search" />
720	"""
721
722	def __init__(self, *ids):
723		if not ids:
724			raise ValueError("need at least one id")
725		self.ids = ids
726
727	def __contains__(self, path):
728		node = path[-1]
729		if isinstance(node, xsc.Element):
730			attr = node.attrs.get("id")
731			if not attr.isfancy():
732				return str(attr) in self.ids
733		return False
734
735	def __str__(self):
736		ids = repr(self.ids)[1:-1]
737		return f"{self.__class__.__qualname__}({ids})"
738
739
740class hasclass(Selector):
741	"""
742	Selector that selects all element nodes where the ``class`` attribute contains
743	one of the specified values::
744
745		>>> from ll.xist import xsc, parse, xfind
746		>>> from ll.xist.ns import xml, html, chars
747		>>> doc = parse.tree(
748		... 	parse.URL("https://www.python.org/"),
749		... 	parse.Tidy(),
750		... 	parse.NS(html),
751		... 	parse.Node(pool=xsc.Pool(xml, html, chars))
752		... )
753		>>> for node in doc.walknodes(xfind.hasclass("tier-1")/html.a):
754		... 	print(node.string())
755		...
756		A A
757		Socialize
758		Sign In
759		About
760		Downloads
761		...
762	"""
763
764	def __init__(self, *classnames):
765		if not classnames:
766			raise ValueError("need at least one classname")
767		self.classnames = classnames
768
769	def __contains__(self, path):
770		node = path[-1]
771		if isinstance(node, xsc.Element):
772			attr = node.attrs.get("class")
773			if not attr.isfancy():
774				return builtins.any(classname in str(attr).split() for classname in self.classnames)
775		return False
776
777	def __str__(self):
778		classnames = repr(self.classnames)[1:-1]
779		return f"{self.__class__.__qualname__}({classnames})"
780
781
782class InAttrSelector(Selector):
783	"""
784	Selector that selects all attribute nodes and nodes inside of attributes::
785
786		>>> from ll.xist import xsc, parse, xfind
787		>>> from ll.xist.ns import xml, html, chars
788		>>> doc = parse.tree(
789		... 	parse.URL("https://www.python.org/"),
790		... 	parse.Tidy(),
791		... 	parse.NS(html),
792		... 	parse.Node(pool=xsc.Pool(xml, html, chars))
793		... )
794		>>> for path in doc.walkpaths(xfind.inattr & xsc.Text, enterattrs=True, enterattr=True):
795		... 	print(path[-3].xmlname, path[-2].xmlname, path[-1].string())
796		...
797		html class no-js
798		html dir ltr
799		html lang en
800		meta charset utf-8
801		meta content IE=edge
802		meta http-equiv X-UA-Compatible
803		...
804	"""
805	def __contains__(self, path):
806		return builtins.any(isinstance(node, xsc.Attr) for node in path)
807
808	def __str__(self):
809		return "inattr"
810
811
812inattr = InAttrSelector()
813
814
815class Combinator(Selector):
816	"""
817	A :class:`Combinator` is a selector that transforms one or combines two or
818	more other selectors in a certain way.
819	"""
820
821
822class BinaryCombinator(Combinator):
823	"""
824	A :class:`BinaryCombinator` is a combinator that combines two selector:
825	the left hand selector and the right hand selector.
826	"""
827	symbol = None
828
829	def __init__(self, left, right):
830		self.left = left
831		self.right = right
832
833	def __str__(self):
834		left = str(self.left)
835		if isinstance(self.left, Combinator) and not isinstance(self.left, self.__class__):
836			left = f"({left})"
837		right = str(self.right)
838		if isinstance(self.right, Combinator) and not isinstance(self.right, self.__class__):
839			right = f"({right})"
840		return f"{left}{self.symbol}{right}"
841
842
843class ChildCombinator(BinaryCombinator):
844	"""
845	A :class:`ChildCombinator` is a :class:`BinaryCombinator`. To match the
846	:class:`ChildCombinator` the node must match the right hand selector and
847	its immediate parent must match the left hand selector (i.e. it works
848	similar to the ``>`` combinator in CSS or the ``/`` combinator in XPath).
849
850	:class:`ChildCombinator` objects can be created via the division operator
851	(``/``)::
852
853		>>> from ll.xist import xsc, parse
854		>>> from ll.xist.ns import xml, html, chars
855		>>> doc = parse.tree(
856		... 	parse.URL("https://www.python.org/"),
857		... 	parse.Tidy(),
858		... 	parse.NS(html),
859		... 	parse.Node(pool=xsc.Pool(xml, html, chars))
860		... )
861		>>> for node in doc.walknodes(html.a/html.img):
862		... 	print(node.string())
863		...
864		<img alt="python™" class="python-logo" src="https://www.python.org/static/img/python-logo.png" />
865	"""
866	def __contains__(self, path):
867		if len(path) > 1 and path in self.right:
868			return path[:-1] in self.left
869		return False
870
871	symbol = " / "
872
873
874class DescendantCombinator(BinaryCombinator):
875	"""
876	A :class:`DescendantCombinator` is a :class:`BinaryCombinator`. To match the
877	:class:`DescendantCombinator` the node must match the right hand selector
878	and any of its ancestor nodes must match the left hand selector (i.e. it
879	works similar to the descendant combinator in CSS or the ``//`` combinator
880	in XPath).
881
882	:class:`DescendantCombinator` objects can be created via the floor division
883	operator (``//``)::
884
885		>>> from ll.xist import xsc, parse
886		>>> from ll.xist.ns import xml, html, chars
887		>>> doc = parse.tree(
888		... 	parse.URL("https://www.python.org/"),
889		... 	parse.Tidy(),
890		... 	parse.NS(html),
891		... 	parse.Node(pool=xsc.Pool(xml, html, chars))
892		... )
893		>>> for node in doc.walknodes(html.div//html.img):
894		... 	print(node.string())
895		...
896		<img alt="python™" class="python-logo" src="https://www.python.org/static/img/python-logo.png" />
897	"""
898	def __contains__(self, path):
899		if path in self.right:
900			while len(path) > 1:
901				path = path[:-1]
902				if path in self.left:
903					return True
904		return False
905
906	symbol = " // "
907
908
909class AdjacentSiblingCombinator(BinaryCombinator):
910	"""
911	A :class:`AdjacentSiblingCombinator` is a :class:`BinaryCombinator`.
912	To match the :class:`AdjacentSiblingCombinator` the node must match the
913	right hand selector and the immediately preceding sibling must match the
914	left hand selector.
915
916	:class:`AdjacentSiblingCombinator` objects can be created via the
917	multiplication operator (``*``). The following example outputs all
918	:class:`span` elements that immediately follow a :class:`form` element::
919
920		>>> from ll.xist import xsc, parse, xfind
921		>>> from ll.xist.ns import xml, html, chars
922		>>> doc = parse.tree(
923		... 	parse.URL("https://www.python.org/"),
924		... 	parse.Tidy(),
925		... 	parse.NS(html),
926		... 	parse.Node(pool=xsc.Pool(xml, html, chars))
927		... )
928		>>> for node in doc.walknodes(html.form*html.span):
929		... 	print(node.string())
930		...
931		<span class="breaker"></span>
932	"""
933
934	def __contains__(self, path):
935		if len(path) > 1 and path in self.right:
936			# Find sibling
937			node = path[-1]
938			sibling = None
939			for child in path[-2]:
940				if child is node:
941					break
942				sibling = child
943			if sibling is not None:
944				return path[:-1]+[sibling] in self.left
945		return False
946
947	symbol = " * "
948
949
950class GeneralSiblingCombinator(BinaryCombinator):
951	"""
952	A :class:`GeneralSiblingCombinator` is a :class:`BinaryCombinator`.
953	To match the :class:`GeneralSiblingCombinator` the node must match the
954	right hand selector and any of the preceding siblings must match the left
955	hand selector.
956
957	:class:`AdjacentSiblingCombinator` objects can be created via the
958	exponentiation operator (``**``). The following example outputs all
959	:class:`meta` element that come after the :class:`link` elements::
960
961		>>> from ll.xist import xsc, parse, xfind
962		>>> from ll.xist.ns import xml, html, chars
963		>>> doc = parse.tree(
964		... 	parse.URL("https://www.python.org/"),
965		... 	parse.Tidy(),
966		... 	parse.NS(html),
967		... 	parse.Node(pool=xsc.Pool(xml, html, chars))
968		... )
969		>>> for node in doc.walknodes(html.link**html.meta):
970		... 	print(node.string())
971		...
972		<meta name="application-name" content="Python.org" />
973		<meta name="msapplication-tooltip" content="The official home of the Python Programming Language" />
974		<meta name="apple-mobile-web-app-title" content="Python.org" />
975		<meta name="apple-mobile-web-app-capable" content="yes" />
976		<meta name="apple-mobile-web-app-status-bar-style" content="black" />
977		...
978	"""
979
980	def __contains__(self, path):
981		if len(path) > 1 and path in self.right:
982			node = path[-1]
983			for child in path[-2]:
984				if child is node: # no previous siblings
985					return False
986				if path[:-1]+[child] in self.left:
987					return True
988		return False
989
990	symbol = " ** "
991
992
993class ChainedCombinator(Combinator):
994	"""
995	A :class:`ChainedCombinator` combines any number of other selectors.
996	"""
997
998	symbol = None
999
1000	def __init__(self, *selectors):
1001		self.selectors = tuple(selector(sel) for sel in selectors)
1002
1003	def __str__(self):
1004		v = []
1005		for sel in self.selectors:
1006			if isinstance(sel, Combinator) and not isinstance(sel, self.__class__):
1007				s = f"({sel})"
1008			else:
1009				s = str(sel)
1010			v.append(s)
1011		return self.symbol.join(v)
1012
1013
1014class OrCombinator(ChainedCombinator):
1015	"""
1016	An :class:`OrCombinator` is a :class:`ChainedCombinator` where the node must
1017	match at least one of the selectors to match the :class:`OrCombinator`. An
1018	:class:`OrCombinator` can be created with the binary or operator (``|``)::
1019
1020		>>> from ll.xist import xsc, parse, xfind
1021		>>> from ll.xist.ns import xml, html, chars
1022		>>> doc = parse.tree(
1023		... 	parse.URL("https://www.python.org/"),
1024		... 	parse.Tidy(),
1025		... 	parse.NS(html),
1026		... 	parse.Node(pool=xsc.Pool(xml, html, chars))
1027		... )
1028		>>> for node in doc.walknodes(xfind.hasattr("href") | xfind.hasattr("src")):
1029		... 	print(node.attrs.href if "href" in node.Attrs else node.attrs.src)
1030		...
1031		https://ajax.googleapis.com/
1032		https://www.python.org/static/js/libs/modernizr.js
1033		https://www.python.org/static/stylesheets/style.css
1034		https://www.python.org/static/stylesheets/mq.css
1035		https://www.python.org/static/favicon.ico
1036		...
1037	"""
1038
1039	def __contains__(self, path):
1040		return builtins.any(path in sel for sel in self.selectors)
1041
1042	symbol = " | "
1043
1044	def __or__(self, other):
1045		return OrCombinator(*(self.selectors + (selector(other),)))
1046
1047
1048class AndCombinator(ChainedCombinator):
1049	"""
1050	An :class:`AndCombinator` is a :class:`ChainedCombinator` where the node
1051	must match all of the combined selectors to match the :class:`AndCombinator`.
1052	An :class:`AndCombinator` can be created with the binary and operator
1053	(``&``)::
1054
1055		>>> from ll.xist import xsc, parse, xfind
1056		>>> from ll.xist.ns import xml, html, chars
1057		>>> doc = parse.tree(
1058		... 	parse.URL("https://www.python.org/"),
1059		... 	parse.Tidy(),
1060		... 	parse.NS(html),
1061		... 	parse.Node(pool=xsc.Pool(xml, html, chars))
1062		... )
1063		>>> for node in doc.walknodes(html.input & xfind.hasattr("id")):
1064		... 	print(node.string())
1065		...
1066		<input class="search-field" id="id-search-field" name="q" placeholder="Search" role="textbox" tabindex="1" type="search" />
1067	"""
1068
1069	def __contains__(self, path):
1070		return all(path in sel for sel in self.selectors)
1071
1072	def __and__(self, other):
1073		return AndCombinator(*(self.selectors + (selector(other),)))
1074
1075	symbol = " & "
1076
1077
1078class NotCombinator(Combinator):
1079	"""
1080	A :class:`NotCombinator` inverts the selection logic of the underlying
1081	selector, i.e. a node matches only if it does not match the underlying
1082	selector. A :class:`NotCombinator` can be created with the unary inversion
1083	operator (``~``).
1084
1085	The following example outputs all internal scripts::
1086
1087		>>> from ll.xist import xsc, parse, xfind
1088		>>> from ll.xist.ns import xml, html, chars
1089		>>> doc = parse.tree(
1090		... 	parse.URL("https://www.python.org/"),
1091		... 	parse.Tidy(),
1092		... 	parse.NS(html),
1093		... 	parse.Node(pool=xsc.Pool(xml, html, chars))
1094		... )
1095		>>> for node in doc.walknodes(html.script & ~xfind.hasattr("src")):
1096		... 	print(node.string())
1097		...
1098		<script type="text/javascript">
1099		    var _gaq = _gaq || [];
1100		    _gaq.push(['_setAccount', 'UA-39055973-1']);
1101		    _gaq.push(['_trackPageview']);
1102
1103		    (function() {
1104		        var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
1105		        ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
1106		        var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
1107		    })();
1108		    </script>
1109		<script>window.jQuery || document.write('&lt;script src="/static/js/libs/jquery-1.8.2.min.js"&gt;&lt;\/script&gt;')</script>
1110	"""
1111
1112	def __init__(self, selector):
1113		self.selector = selector
1114
1115	def __contains__(self, path):
1116		return path not in self.selector
1117
1118	def __str__(self):
1119		if isinstance(self.selector, Combinator) and not isinstance(self.selector, NotCombinator):
1120			return f"~({self.selector})"
1121		else:
1122			return f"~{self.selector}"
1123
1124
1125class CallableSelector(Selector):
1126	"""
1127	A :class:`CallableSelector` is a selector that calls a user specified
1128	callable to select nodes. The callable gets passed the path and must return
1129	a bool specifying whether this path is selected. A :class:`CallableSelector`
1130	is created implicitely whenever a callable is passed to a method that
1131	expects a selector.
1132
1133	The following example outputs all links that point outside the ``python.org``
1134	domain::
1135
1136		>>> from ll.xist import xsc, parse, xfind
1137		>>> from ll.xist.ns import xml, html, chars
1138		>>> doc = parse.tree(
1139		... 	parse.URL("https://www.python.org/"),
1140		... 	parse.Tidy(),
1141		... 	parse.NS(html),
1142		... 	parse.Node(pool=xsc.Pool(xml, html, chars))
1143		... )
1144		>>> def isextlink(path):
1145		... 	return isinstance(path[-1], html.a) and not str(path[-1].attrs.href).startswith("https://www.python.org")
1146		...
1147		>>> for node in doc.walknodes(isextlink):
1148		... 	print(node.string())
1149		...
1150		<a href="http://docs.python.org/" title="Python Documentation">Docs</a>
1151		<a href="https://pypi.python.org/" title="Python Package Index">PyPI</a>
1152		<a class="text-shrink" href="javascript:;" title="Make Text Smaller">Smaller</a>
1153		<a class="text-grow" href="javascript:;" title="Make Text Larger">Larger</a>
1154		..
1155	"""
1156
1157	def __init__(self, func):
1158		self.func = func
1159
1160	def __contains__(self, path):
1161		return self.func(path)
1162
1163	def __str__(self):
1164		return f"{self.__class__.__qualname__}({self.func!r})"
1165
1166
1167class nthchild(Selector):
1168	"""
1169	An :class:`nthchild` object is a selector that selects every node that is
1170	the n-th child of its parent. E.g. ``nthchild(0)`` selects every first
1171	child, ``nthchild(-1)`` selects each last child. Furthermore
1172	``nthchild("even")`` selects each first, third, fifth, ... child and
1173	``nthchild("odd")`` selects each second, fourth, sixth, ... child.
1174	"""
1175
1176	def __init__(self, index):
1177		self.index = index
1178
1179	def __contains__(self, path):
1180		if len(path) > 1:
1181			if self.index in ("even", "odd"):
1182				for (i, child) in enumerate(path[-2]):
1183					if child is path[-1]:
1184						return (i % 2) == (self.index == "odd")
1185			else:
1186				try:
1187					return path[-2][self.index] is path[-1]
1188				except IndexError:
1189					return False
1190		return False
1191
1192	def __str__(self):
1193		return f"{self.__class__.__qualname__}({self.index!r})"
1194
1195
1196class nthoftype(Selector):
1197	"""
1198	An :class:`nthoftype` object is a selector that selects every node that is
1199	the n-th node of a specified type among its siblings. Similar to
1200	:class:`nthchild` :class:`nthoftype` supports negative and positive indices
1201	as well as ``"even"`` and ``"odd"``. Which types are checked can be passed
1202	explicitly. If no types are passed the type of the node itself is used::
1203
1204		>>> from ll.xist import xsc, parse, xfind
1205		>>> from ll.xist.ns import xml, html, chars
1206		>>> doc = parse.tree(
1207		... 	parse.URL("https://www.python.org/"),
1208		... 	parse.Tidy(),
1209		... 	parse.NS(html),
1210		... 	parse.Node(pool=xsc.Pool(xml, html, chars))
1211		... )
1212		>>> for node in doc.walknodes(xfind.nthoftype(0, html.h2)):
1213		... 	print(node.string())
1214		...
1215		<h2 class="widget-title"><span aria-hidden="true" class="icon-get-started"></span>Get Started</h2>
1216		<h2 class="widget-title"><span aria-hidden="true" class="icon-download"></span>Download</h2>
1217		<h2 class="widget-title"><span aria-hidden="true" class="icon-documentation"></span>Docs</h2>
1218		<h2 class="widget-title"><span aria-hidden="true" class="icon-jobs"></span>Jobs</h2>
1219		...
1220	"""
1221
1222	def __init__(self, index, *types):
1223		self.index = index
1224		self.types = types
1225
1226	def _find(self, path):
1227		types = self.types if self.types else path[-1].__class__
1228		for child in path[-2]:
1229			if isinstance(child, types):
1230				yield child
1231
1232	def __contains__(self, path):
1233		if len(path) > 1:
1234			if self.index in ("even", "odd"):
1235				for (i, child) in enumerate(self._find(path)):
1236					if child is path[-1]:
1237						return (i % 2) == (self.index == "odd")
1238			else:
1239				try:
1240					return misc.item(self._find(path), self.index) is path[-1]
1241				except IndexError:
1242					return False
1243		return False
1244
1245	def __str__(self):
1246		if self.types:
1247			types = ", ".join(f"{type.__module__}.{type.__qualname__}" for type in self.types)
1248			return f"{self.__class__.__qualname__}({self.index!r}, {types})"
1249		else:
1250			return f"{self.__class__.__qualname__}({self.index!r})"
1251