1"""CSS matcher."""
2from datetime import datetime
3from . import util
4import re
5from . import css_types as ct
6import unicodedata
7import bs4  # type: ignore[import]
8from typing import Iterator, Iterable, List, Any, Optional, Tuple, Union, Dict, Callable, Sequence, cast
9
10# Empty tag pattern (whitespace okay)
11RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')
12
13RE_NOT_WS = re.compile('[^ \t\r\n\f]+')
14
15# Relationships
16REL_PARENT = ' '
17REL_CLOSE_PARENT = '>'
18REL_SIBLING = '~'
19REL_CLOSE_SIBLING = '+'
20
21# Relationships for :has() (forward looking)
22REL_HAS_PARENT = ': '
23REL_HAS_CLOSE_PARENT = ':>'
24REL_HAS_SIBLING = ':~'
25REL_HAS_CLOSE_SIBLING = ':+'
26
27NS_XHTML = 'http://www.w3.org/1999/xhtml'
28NS_XML = 'http://www.w3.org/XML/1998/namespace'
29
30DIR_FLAGS = ct.SEL_DIR_LTR | ct.SEL_DIR_RTL
31RANGES = ct.SEL_IN_RANGE | ct.SEL_OUT_OF_RANGE
32
33DIR_MAP = {
34    'ltr': ct.SEL_DIR_LTR,
35    'rtl': ct.SEL_DIR_RTL,
36    'auto': 0
37}
38
39RE_NUM = re.compile(r"^(?P<value>-?(?:[0-9]{1,}(\.[0-9]+)?|\.[0-9]+))$")
40RE_TIME = re.compile(r'^(?P<hour>[0-9]{2}):(?P<minutes>[0-9]{2})$')
41RE_MONTH = re.compile(r'^(?P<year>[0-9]{4,})-(?P<month>[0-9]{2})$')
42RE_WEEK = re.compile(r'^(?P<year>[0-9]{4,})-W(?P<week>[0-9]{2})$')
43RE_DATE = re.compile(r'^(?P<year>[0-9]{4,})-(?P<month>[0-9]{2})-(?P<day>[0-9]{2})$')
44RE_DATETIME = re.compile(
45    r'^(?P<year>[0-9]{4,})-(?P<month>[0-9]{2})-(?P<day>[0-9]{2})T(?P<hour>[0-9]{2}):(?P<minutes>[0-9]{2})$'
46)
47RE_WILD_STRIP = re.compile(r'(?:(?:-\*-)(?:\*(?:-|$))*|-\*$)')
48
49MONTHS_30 = (4, 6, 9, 11)  # April, June, September, and November
50FEB = 2
51SHORT_MONTH = 30
52LONG_MONTH = 31
53FEB_MONTH = 28
54FEB_LEAP_MONTH = 29
55DAYS_IN_WEEK = 7
56
57
58class _FakeParent:
59    """
60    Fake parent class.
61
62    When we have a fragment with no `BeautifulSoup` document object,
63    we can't evaluate `nth` selectors properly.  Create a temporary
64    fake parent so we can traverse the root element as a child.
65    """
66
67    def __init__(self, element: 'bs4.Tag') -> None:
68        """Initialize."""
69
70        self.contents = [element]
71
72    def __len__(self) -> 'bs4.PageElement':
73        """Length."""
74
75        return len(self.contents)
76
77
78class _DocumentNav:
79    """Navigate a Beautiful Soup document."""
80
81    @classmethod
82    def assert_valid_input(cls, tag: Any) -> None:
83        """Check if valid input tag or document."""
84
85        # Fail on unexpected types.
86        if not cls.is_tag(tag):
87            raise TypeError("Expected a BeautifulSoup 'Tag', but instead recieved type {}".format(type(tag)))
88
89    @staticmethod
90    def is_doc(obj: 'bs4.Tag') -> bool:
91        """Is `BeautifulSoup` object."""
92        return isinstance(obj, bs4.BeautifulSoup)
93
94    @staticmethod
95    def is_tag(obj: 'bs4.PageElement') -> bool:
96        """Is tag."""
97        return isinstance(obj, bs4.Tag)
98
99    @staticmethod
100    def is_declaration(obj: 'bs4.PageElement') -> bool:  # pragma: no cover
101        """Is declaration."""
102        return isinstance(obj, bs4.Declaration)
103
104    @staticmethod
105    def is_cdata(obj: 'bs4.PageElement') -> bool:
106        """Is CDATA."""
107        return isinstance(obj, bs4.CData)
108
109    @staticmethod
110    def is_processing_instruction(obj: 'bs4.PageElement') -> bool:  # pragma: no cover
111        """Is processing instruction."""
112        return isinstance(obj, bs4.ProcessingInstruction)
113
114    @staticmethod
115    def is_navigable_string(obj: 'bs4.PageElement') -> bool:
116        """Is navigable string."""
117        return isinstance(obj, bs4.NavigableString)
118
119    @staticmethod
120    def is_special_string(obj: 'bs4.PageElement') -> bool:
121        """Is special string."""
122        return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype))
123
124    @classmethod
125    def is_content_string(cls, obj: 'bs4.PageElement') -> bool:
126        """Check if node is content string."""
127
128        return cls.is_navigable_string(obj) and not cls.is_special_string(obj)
129
130    @staticmethod
131    def create_fake_parent(el: 'bs4.Tag') -> _FakeParent:
132        """Create fake parent for a given element."""
133
134        return _FakeParent(el)
135
136    @staticmethod
137    def is_xml_tree(el: 'bs4.Tag') -> bool:
138        """Check if element (or document) is from a XML tree."""
139
140        return bool(el._is_xml)
141
142    def is_iframe(self, el: 'bs4.Tag') -> bool:
143        """Check if element is an `iframe`."""
144
145        return bool(
146            ((el.name if self.is_xml_tree(el) else util.lower(el.name)) == 'iframe') and
147            self.is_html_tag(el)  # type: ignore[attr-defined]
148        )
149
150    def is_root(self, el: 'bs4.Tag') -> bool:
151        """
152        Return whether element is a root element.
153
154        We check that the element is the root of the tree (which we have already pre-calculated),
155        and we check if it is the root element under an `iframe`.
156        """
157
158        root = self.root and self.root is el  # type: ignore[attr-defined]
159        if not root:
160            parent = self.get_parent(el)
161            root = parent is not None and self.is_html and self.is_iframe(parent)  # type: ignore[attr-defined]
162        return root
163
164    def get_contents(self, el: 'bs4.Tag', no_iframe: bool = False) -> Iterator['bs4.PageElement']:
165        """Get contents or contents in reverse."""
166        if not no_iframe or not self.is_iframe(el):
167            for content in el.contents:
168                yield content
169
170    def get_children(
171        self,
172        el: 'bs4.Tag',
173        start: Optional[int] = None,
174        reverse: bool = False,
175        tags: bool = True,
176        no_iframe: bool = False
177    ) -> Iterator['bs4.PageElement']:
178        """Get children."""
179
180        if not no_iframe or not self.is_iframe(el):
181            last = len(el.contents) - 1
182            if start is None:
183                index = last if reverse else 0
184            else:
185                index = start
186            end = -1 if reverse else last + 1
187            incr = -1 if reverse else 1
188
189            if 0 <= index <= last:
190                while index != end:
191                    node = el.contents[index]
192                    index += incr
193                    if not tags or self.is_tag(node):
194                        yield node
195
196    def get_descendants(
197        self,
198        el: 'bs4.Tag',
199        tags: bool = True,
200        no_iframe: bool = False
201    ) -> Iterator['bs4.PageElement']:
202        """Get descendants."""
203
204        if not no_iframe or not self.is_iframe(el):
205            next_good = None
206            for child in el.descendants:
207
208                if next_good is not None:
209                    if child is not next_good:
210                        continue
211                    next_good = None
212
213                is_tag = self.is_tag(child)
214
215                if no_iframe and is_tag and self.is_iframe(child):
216                    if child.next_sibling is not None:
217                        next_good = child.next_sibling
218                    else:
219                        last_child = child
220                        while self.is_tag(last_child) and last_child.contents:
221                            last_child = last_child.contents[-1]
222                        next_good = last_child.next_element
223                    yield child
224                    if next_good is None:
225                        break
226                    # Coverage isn't seeing this even though it's executed
227                    continue  # pragma: no cover
228
229                if not tags or is_tag:
230                    yield child
231
232    def get_parent(self, el: 'bs4.Tag', no_iframe: bool = False) -> 'bs4.Tag':
233        """Get parent."""
234
235        parent = el.parent
236        if no_iframe and parent is not None and self.is_iframe(parent):
237            parent = None
238        return parent
239
240    @staticmethod
241    def get_tag_name(el: 'bs4.Tag') -> Optional[str]:
242        """Get tag."""
243
244        return cast(Optional[str], el.name)
245
246    @staticmethod
247    def get_prefix_name(el: 'bs4.Tag') -> Optional[str]:
248        """Get prefix."""
249
250        return cast(Optional[str], el.prefix)
251
252    @staticmethod
253    def get_uri(el: 'bs4.Tag') -> Optional[str]:
254        """Get namespace `URI`."""
255
256        return cast(Optional[str], el.namespace)
257
258    @classmethod
259    def get_next(cls, el: 'bs4.Tag', tags: bool = True) -> 'bs4.PageElement':
260        """Get next sibling tag."""
261
262        sibling = el.next_sibling
263        while tags and not cls.is_tag(sibling) and sibling is not None:
264            sibling = sibling.next_sibling
265        return sibling
266
267    @classmethod
268    def get_previous(cls, el: 'bs4.Tag', tags: bool = True) -> 'bs4.PageElement':
269        """Get previous sibling tag."""
270
271        sibling = el.previous_sibling
272        while tags and not cls.is_tag(sibling) and sibling is not None:
273            sibling = sibling.previous_sibling
274        return sibling
275
276    @staticmethod
277    def has_html_ns(el: 'bs4.Tag') -> bool:
278        """
279        Check if element has an HTML namespace.
280
281        This is a bit different than whether a element is treated as having an HTML namespace,
282        like we do in the case of `is_html_tag`.
283        """
284
285        ns = getattr(el, 'namespace') if el else None
286        return bool(ns and ns == NS_XHTML)
287
288    @staticmethod
289    def split_namespace(el: 'bs4.Tag', attr_name: str) -> Tuple[Optional[str], Optional[str]]:
290        """Return namespace and attribute name without the prefix."""
291
292        return getattr(attr_name, 'namespace', None), getattr(attr_name, 'name', None)
293
294    @classmethod
295    def normalize_value(cls, value: Any) -> Union[str, Sequence[str]]:
296        """Normalize the value to be a string or list of strings."""
297
298        # Treat `None` as empty string.
299        if value is None:
300            return ''
301
302        # Pass through strings
303        if (isinstance(value, str)):
304            return value
305
306        # If it's a byte string, convert it to Unicode, treating it as UTF-8.
307        if isinstance(value, bytes):
308            return value.decode("utf8")
309
310        # BeautifulSoup supports sequences of attribute values, so make sure the children are strings.
311        if isinstance(value, Sequence):
312            new_value = []
313            for v in value:
314                if not isinstance(v, (str, bytes)) and isinstance(v, Sequence):
315                    # This is most certainly a user error and will crash and burn later.
316                    # To keep things working, we'll do what we do with all objects,
317                    # And convert them to strings.
318                    new_value.append(str(v))
319                else:
320                    # Convert the child to a string
321                    new_value.append(cast(str, cls.normalize_value(v)))
322            return new_value
323
324        # Try and make anything else a string
325        return str(value)
326
327    @classmethod
328    def get_attribute_by_name(
329        cls,
330        el: 'bs4.Tag',
331        name: str,
332        default: Optional[Union[str, Sequence[str]]] = None
333    ) -> Optional[Union[str, Sequence[str]]]:
334        """Get attribute by name."""
335
336        value = default
337        if el._is_xml:
338            try:
339                value = cls.normalize_value(el.attrs[name])
340            except KeyError:
341                pass
342        else:
343            for k, v in el.attrs.items():
344                if util.lower(k) == name:
345                    value = cls.normalize_value(v)
346                    break
347        return value
348
349    @classmethod
350    def iter_attributes(cls, el: 'bs4.Tag') -> Iterator[Tuple[str, Optional[Union[str, Sequence[str]]]]]:
351        """Iterate attributes."""
352
353        for k, v in el.attrs.items():
354            yield k, cls.normalize_value(v)
355
356    @classmethod
357    def get_classes(cls, el: 'bs4.Tag') -> Sequence[str]:
358        """Get classes."""
359
360        classes = cls.get_attribute_by_name(el, 'class', [])
361        if isinstance(classes, str):
362            classes = RE_NOT_WS.findall(classes)
363        return cast(Sequence[str], classes)
364
365    def get_text(self, el: 'bs4.Tag', no_iframe: bool = False) -> str:
366        """Get text."""
367
368        return ''.join(
369            [node for node in self.get_descendants(el, tags=False, no_iframe=no_iframe) if self.is_content_string(node)]
370        )
371
372    def get_own_text(self, el: 'bs4.Tag', no_iframe: bool = False) -> List[str]:
373        """Get Own Text."""
374
375        return [node for node in self.get_contents(el, no_iframe=no_iframe) if self.is_content_string(node)]
376
377
378class Inputs:
379    """Class for parsing and validating input items."""
380
381    @staticmethod
382    def validate_day(year: int, month: int, day: int) -> bool:
383        """Validate day."""
384
385        max_days = LONG_MONTH
386        if month == FEB:
387            max_days = FEB_LEAP_MONTH if ((year % 4 == 0) and (year % 100 != 0)) or (year % 400 == 0) else FEB_MONTH
388        elif month in MONTHS_30:
389            max_days = SHORT_MONTH
390        return 1 <= day <= max_days
391
392    @staticmethod
393    def validate_week(year: int, week: int) -> bool:
394        """Validate week."""
395
396        max_week = datetime.strptime("{}-{}-{}".format(12, 31, year), "%m-%d-%Y").isocalendar()[1]
397        if max_week == 1:
398            max_week = 53
399        return 1 <= week <= max_week
400
401    @staticmethod
402    def validate_month(month: int) -> bool:
403        """Validate month."""
404
405        return 1 <= month <= 12
406
407    @staticmethod
408    def validate_year(year: int) -> bool:
409        """Validate year."""
410
411        return 1 <= year
412
413    @staticmethod
414    def validate_hour(hour: int) -> bool:
415        """Validate hour."""
416
417        return 0 <= hour <= 23
418
419    @staticmethod
420    def validate_minutes(minutes: int) -> bool:
421        """Validate minutes."""
422
423        return 0 <= minutes <= 59
424
425    @classmethod
426    def parse_value(cls, itype: str, value: Optional[str]) -> Optional[Tuple[float, ...]]:
427        """Parse the input value."""
428
429        parsed = None  # type: Optional[Tuple[float, ...]]
430        if value is None:
431            return value
432        if itype == "date":
433            m = RE_DATE.match(value)
434            if m:
435                year = int(m.group('year'), 10)
436                month = int(m.group('month'), 10)
437                day = int(m.group('day'), 10)
438                if cls.validate_year(year) and cls.validate_month(month) and cls.validate_day(year, month, day):
439                    parsed = (year, month, day)
440        elif itype == "month":
441            m = RE_MONTH.match(value)
442            if m:
443                year = int(m.group('year'), 10)
444                month = int(m.group('month'), 10)
445                if cls.validate_year(year) and cls.validate_month(month):
446                    parsed = (year, month)
447        elif itype == "week":
448            m = RE_WEEK.match(value)
449            if m:
450                year = int(m.group('year'), 10)
451                week = int(m.group('week'), 10)
452                if cls.validate_year(year) and cls.validate_week(year, week):
453                    parsed = (year, week)
454        elif itype == "time":
455            m = RE_TIME.match(value)
456            if m:
457                hour = int(m.group('hour'), 10)
458                minutes = int(m.group('minutes'), 10)
459                if cls.validate_hour(hour) and cls.validate_minutes(minutes):
460                    parsed = (hour, minutes)
461        elif itype == "datetime-local":
462            m = RE_DATETIME.match(value)
463            if m:
464                year = int(m.group('year'), 10)
465                month = int(m.group('month'), 10)
466                day = int(m.group('day'), 10)
467                hour = int(m.group('hour'), 10)
468                minutes = int(m.group('minutes'), 10)
469                if (
470                    cls.validate_year(year) and cls.validate_month(month) and cls.validate_day(year, month, day) and
471                    cls.validate_hour(hour) and cls.validate_minutes(minutes)
472                ):
473                    parsed = (year, month, day, hour, minutes)
474        elif itype in ("number", "range"):
475            m = RE_NUM.match(value)
476            if m:
477                parsed = (float(m.group('value')),)
478        return parsed
479
480
481class CSSMatch(_DocumentNav):
482    """Perform CSS matching."""
483
484    def __init__(
485        self,
486        selectors: ct.SelectorList,
487        scope: 'bs4.Tag',
488        namespaces: Optional[ct.Namespaces],
489        flags: int
490    ) -> None:
491        """Initialize."""
492
493        self.assert_valid_input(scope)
494        self.tag = scope
495        self.cached_meta_lang = []  # type: List[Tuple[str, str]]
496        self.cached_default_forms = []  # type: List[Tuple['bs4.Tag', 'bs4.Tag']]
497        self.cached_indeterminate_forms = []  # type: List[Tuple['bs4.Tag', str, bool]]
498        self.selectors = selectors
499        self.namespaces = {} if namespaces is None else namespaces  # type: Union[ct.Namespaces, Dict[str, str]]
500        self.flags = flags
501        self.iframe_restrict = False
502
503        # Find the root element for the whole tree
504        doc = scope
505        parent = self.get_parent(doc)
506        while parent:
507            doc = parent
508            parent = self.get_parent(doc)
509        root = None
510        if not self.is_doc(doc):
511            root = doc
512        else:
513            for child in self.get_children(doc):
514                root = child
515                break
516
517        self.root = root
518        self.scope = scope if scope is not doc else root
519        self.has_html_namespace = self.has_html_ns(root)
520
521        # A document can be both XML and HTML (XHTML)
522        self.is_xml = self.is_xml_tree(doc)
523        self.is_html = not self.is_xml or self.has_html_namespace
524
525    def supports_namespaces(self) -> bool:
526        """Check if namespaces are supported in the HTML type."""
527
528        return self.is_xml or self.has_html_namespace
529
530    def get_tag_ns(self, el: 'bs4.Tag') -> str:
531        """Get tag namespace."""
532
533        if self.supports_namespaces():
534            namespace = ''
535            ns = self.get_uri(el)
536            if ns:
537                namespace = ns
538        else:
539            namespace = NS_XHTML
540        return namespace
541
542    def is_html_tag(self, el: 'bs4.Tag') -> bool:
543        """Check if tag is in HTML namespace."""
544
545        return self.get_tag_ns(el) == NS_XHTML
546
547    def get_tag(self, el: 'bs4.Tag') -> Optional[str]:
548        """Get tag."""
549
550        name = self.get_tag_name(el)
551        return util.lower(name) if name is not None and not self.is_xml else name
552
553    def get_prefix(self, el: 'bs4.Tag') -> Optional[str]:
554        """Get prefix."""
555
556        prefix = self.get_prefix_name(el)
557        return util.lower(prefix) if prefix is not None and not self.is_xml else prefix
558
559    def find_bidi(self, el: 'bs4.Tag') -> Optional[int]:
560        """Get directionality from element text."""
561
562        for node in self.get_children(el, tags=False):
563
564            # Analyze child text nodes
565            if self.is_tag(node):
566
567                # Avoid analyzing certain elements specified in the specification.
568                direction = DIR_MAP.get(util.lower(self.get_attribute_by_name(node, 'dir', '')), None)
569                if (
570                    self.get_tag(node) in ('bdi', 'script', 'style', 'textarea', 'iframe') or
571                    not self.is_html_tag(node) or
572                    direction is not None
573                ):
574                    continue  # pragma: no cover
575
576                # Check directionality of this node's text
577                value = self.find_bidi(node)
578                if value is not None:
579                    return value
580
581                # Direction could not be determined
582                continue  # pragma: no cover
583
584            # Skip `doctype` comments, etc.
585            if self.is_special_string(node):
586                continue
587
588            # Analyze text nodes for directionality.
589            for c in node:
590                bidi = unicodedata.bidirectional(c)
591                if bidi in ('AL', 'R', 'L'):
592                    return ct.SEL_DIR_LTR if bidi == 'L' else ct.SEL_DIR_RTL
593        return None
594
595    def extended_language_filter(self, lang_range: str, lang_tag: str) -> bool:
596        """Filter the language tags."""
597
598        match = True
599        lang_range = RE_WILD_STRIP.sub('-', lang_range).lower()
600        ranges = lang_range.split('-')
601        subtags = lang_tag.lower().split('-')
602        length = len(ranges)
603        rindex = 0
604        sindex = 0
605        r = ranges[rindex]
606        s = subtags[sindex]
607
608        # Primary tag needs to match
609        if r != '*' and r != s:
610            match = False
611
612        rindex += 1
613        sindex += 1
614
615        # Match until we run out of ranges
616        while match and rindex < length:
617            r = ranges[rindex]
618            try:
619                s = subtags[sindex]
620            except IndexError:
621                # Ran out of subtags,
622                # but we still have ranges
623                match = False
624                continue
625
626            # Empty range
627            if not r:
628                match = False
629                continue
630
631            # Matched range
632            elif s == r:
633                rindex += 1
634
635            # Implicit wildcard cannot match
636            # singletons
637            elif len(s) == 1:
638                match = False
639                continue
640
641            # Implicitly matched, so grab next subtag
642            sindex += 1
643
644        return match
645
646    def match_attribute_name(
647        self,
648        el: 'bs4.Tag',
649        attr: str,
650        prefix: Optional[str]
651    ) -> Optional[Union[str, Sequence[str]]]:
652        """Match attribute name and return value if it exists."""
653
654        value = None
655        if self.supports_namespaces():
656            value = None
657            # If we have not defined namespaces, we can't very well find them, so don't bother trying.
658            if prefix:
659                ns = self.namespaces.get(prefix)
660                if ns is None and prefix != '*':
661                    return None
662            else:
663                ns = None
664
665            for k, v in self.iter_attributes(el):
666
667                # Get attribute parts
668                namespace, name = self.split_namespace(el, k)
669
670                # Can't match a prefix attribute as we haven't specified one to match
671                # Try to match it normally as a whole `p:a` as selector may be trying `p\:a`.
672                if ns is None:
673                    if (self.is_xml and attr == k) or (not self.is_xml and util.lower(attr) == util.lower(k)):
674                        value = v
675                        break
676                    # Coverage is not finding this even though it is executed.
677                    # Adding a print statement before this (and erasing coverage) causes coverage to find the line.
678                    # Ignore the false positive message.
679                    continue  # pragma: no cover
680
681                # We can't match our desired prefix attribute as the attribute doesn't have a prefix
682                if namespace is None or ns != namespace and prefix != '*':
683                    continue
684
685                # The attribute doesn't match.
686                if (util.lower(attr) != util.lower(name)) if not self.is_xml else (attr != name):
687                    continue
688
689                value = v
690                break
691        else:
692            for k, v in self.iter_attributes(el):
693                if util.lower(attr) != util.lower(k):
694                    continue
695                value = v
696                break
697        return value
698
699    def match_namespace(self, el: 'bs4.Tag', tag: ct.SelectorTag) -> bool:
700        """Match the namespace of the element."""
701
702        match = True
703        namespace = self.get_tag_ns(el)
704        default_namespace = self.namespaces.get('')
705        tag_ns = '' if tag.prefix is None else self.namespaces.get(tag.prefix)
706        # We must match the default namespace if one is not provided
707        if tag.prefix is None and (default_namespace is not None and namespace != default_namespace):
708            match = False
709        # If we specified `|tag`, we must not have a namespace.
710        elif (tag.prefix is not None and tag.prefix == '' and namespace):
711            match = False
712        # Verify prefix matches
713        elif (
714            tag.prefix and
715            tag.prefix != '*' and (tag_ns is None or namespace != tag_ns)
716        ):
717            match = False
718        return match
719
720    def match_attributes(self, el: 'bs4.Tag', attributes: Tuple[ct.SelectorAttribute, ...]) -> bool:
721        """Match attributes."""
722
723        match = True
724        if attributes:
725            for a in attributes:
726                temp = self.match_attribute_name(el, a.attribute, a.prefix)
727                pattern = a.xml_type_pattern if self.is_xml and a.xml_type_pattern else a.pattern
728                if temp is None:
729                    match = False
730                    break
731                value = temp if isinstance(temp, str) else ' '.join(temp)
732                if pattern is None:
733                    continue
734                elif pattern.match(value) is None:
735                    match = False
736                    break
737        return match
738
739    def match_tagname(self, el: 'bs4.Tag', tag: ct.SelectorTag) -> bool:
740        """Match tag name."""
741
742        name = (util.lower(tag.name) if not self.is_xml and tag.name is not None else tag.name)
743        return not (
744            name is not None and
745            name not in (self.get_tag(el), '*')
746        )
747
748    def match_tag(self, el: 'bs4.Tag', tag: Optional[ct.SelectorTag]) -> bool:
749        """Match the tag."""
750
751        match = True
752        if tag is not None:
753            # Verify namespace
754            if not self.match_namespace(el, tag):
755                match = False
756            if not self.match_tagname(el, tag):
757                match = False
758        return match
759
760    def match_past_relations(self, el: 'bs4.Tag', relation: ct.SelectorList) -> bool:
761        """Match past relationship."""
762
763        found = False
764        # I don't think this can ever happen, but it makes `mypy` happy
765        if isinstance(relation[0], ct.SelectorNull):  # pragma: no cover
766            return found
767
768        if relation[0].rel_type == REL_PARENT:
769            parent = self.get_parent(el, no_iframe=self.iframe_restrict)
770            while not found and parent:
771                found = self.match_selectors(parent, relation)
772                parent = self.get_parent(parent, no_iframe=self.iframe_restrict)
773        elif relation[0].rel_type == REL_CLOSE_PARENT:
774            parent = self.get_parent(el, no_iframe=self.iframe_restrict)
775            if parent:
776                found = self.match_selectors(parent, relation)
777        elif relation[0].rel_type == REL_SIBLING:
778            sibling = self.get_previous(el)
779            while not found and sibling:
780                found = self.match_selectors(sibling, relation)
781                sibling = self.get_previous(sibling)
782        elif relation[0].rel_type == REL_CLOSE_SIBLING:
783            sibling = self.get_previous(el)
784            if sibling and self.is_tag(sibling):
785                found = self.match_selectors(sibling, relation)
786        return found
787
788    def match_future_child(self, parent: 'bs4.Tag', relation: ct.SelectorList, recursive: bool = False) -> bool:
789        """Match future child."""
790
791        match = False
792        if recursive:
793            children = self.get_descendants  # type: Callable[..., Iterator['bs4.Tag']]
794        else:
795            children = self.get_children
796        for child in children(parent, no_iframe=self.iframe_restrict):
797            match = self.match_selectors(child, relation)
798            if match:
799                break
800        return match
801
802    def match_future_relations(self, el: 'bs4.Tag', relation: ct.SelectorList) -> bool:
803        """Match future relationship."""
804
805        found = False
806        # I don't think this can ever happen, but it makes `mypy` happy
807        if isinstance(relation[0], ct.SelectorNull):  # pragma: no cover
808            return found
809
810        if relation[0].rel_type == REL_HAS_PARENT:
811            found = self.match_future_child(el, relation, True)
812        elif relation[0].rel_type == REL_HAS_CLOSE_PARENT:
813            found = self.match_future_child(el, relation)
814        elif relation[0].rel_type == REL_HAS_SIBLING:
815            sibling = self.get_next(el)
816            while not found and sibling:
817                found = self.match_selectors(sibling, relation)
818                sibling = self.get_next(sibling)
819        elif relation[0].rel_type == REL_HAS_CLOSE_SIBLING:
820            sibling = self.get_next(el)
821            if sibling and self.is_tag(sibling):
822                found = self.match_selectors(sibling, relation)
823        return found
824
825    def match_relations(self, el: 'bs4.Tag', relation: ct.SelectorList) -> bool:
826        """Match relationship to other elements."""
827
828        found = False
829
830        if isinstance(relation[0], ct.SelectorNull) or relation[0].rel_type is None:
831            return found
832
833        if relation[0].rel_type.startswith(':'):
834            found = self.match_future_relations(el, relation)
835        else:
836            found = self.match_past_relations(el, relation)
837
838        return found
839
840    def match_id(self, el: 'bs4.Tag', ids: Tuple[str, ...]) -> bool:
841        """Match element's ID."""
842
843        found = True
844        for i in ids:
845            if i != self.get_attribute_by_name(el, 'id', ''):
846                found = False
847                break
848        return found
849
850    def match_classes(self, el: 'bs4.Tag', classes: Tuple[str, ...]) -> bool:
851        """Match element's classes."""
852
853        current_classes = self.get_classes(el)
854        found = True
855        for c in classes:
856            if c not in current_classes:
857                found = False
858                break
859        return found
860
861    def match_root(self, el: 'bs4.Tag') -> bool:
862        """Match element as root."""
863
864        is_root = self.is_root(el)
865        if is_root:
866            sibling = self.get_previous(el, tags=False)
867            while is_root and sibling is not None:
868                if (
869                    self.is_tag(sibling) or (self.is_content_string(sibling) and sibling.strip()) or
870                    self.is_cdata(sibling)
871                ):
872                    is_root = False
873                else:
874                    sibling = self.get_previous(sibling, tags=False)
875        if is_root:
876            sibling = self.get_next(el, tags=False)
877            while is_root and sibling is not None:
878                if (
879                    self.is_tag(sibling) or (self.is_content_string(sibling) and sibling.strip()) or
880                    self.is_cdata(sibling)
881                ):
882                    is_root = False
883                else:
884                    sibling = self.get_next(sibling, tags=False)
885        return is_root
886
887    def match_scope(self, el: 'bs4.Tag') -> bool:
888        """Match element as scope."""
889
890        return self.scope is el
891
892    def match_nth_tag_type(self, el: 'bs4.Tag', child: 'bs4.Tag') -> bool:
893        """Match tag type for `nth` matches."""
894
895        return(
896            (self.get_tag(child) == self.get_tag(el)) and
897            (self.get_tag_ns(child) == self.get_tag_ns(el))
898        )
899
900    def match_nth(self, el: 'bs4.Tag', nth: 'bs4.Tag') -> bool:
901        """Match `nth` elements."""
902
903        matched = True
904
905        for n in nth:
906            matched = False
907            if n.selectors and not self.match_selectors(el, n.selectors):
908                break
909            parent = self.get_parent(el)
910            if parent is None:
911                parent = self.create_fake_parent(el)
912            last = n.last
913            last_index = len(parent) - 1
914            index = last_index if last else 0
915            relative_index = 0
916            a = n.a
917            b = n.b
918            var = n.n
919            count = 0
920            count_incr = 1
921            factor = -1 if last else 1
922            idx = last_idx = a * count + b if var else a
923
924            # We can only adjust bounds within a variable index
925            if var:
926                # Abort if our nth index is out of bounds and only getting further out of bounds as we increment.
927                # Otherwise, increment to try to get in bounds.
928                adjust = None
929                while idx < 1 or idx > last_index:
930                    if idx < 0:
931                        diff_low = 0 - idx
932                        if adjust is not None and adjust == 1:
933                            break
934                        adjust = -1
935                        count += count_incr
936                        idx = last_idx = a * count + b if var else a
937                        diff = 0 - idx
938                        if diff >= diff_low:
939                            break
940                    else:
941                        diff_high = idx - last_index
942                        if adjust is not None and adjust == -1:
943                            break
944                        adjust = 1
945                        count += count_incr
946                        idx = last_idx = a * count + b if var else a
947                        diff = idx - last_index
948                        if diff >= diff_high:
949                            break
950                        diff_high = diff
951
952                # If a < 0, our count is working backwards, so floor the index by increasing the count.
953                # Find the count that yields the lowest, in bound value and use that.
954                # Lastly reverse count increment so that we'll increase our index.
955                lowest = count
956                if a < 0:
957                    while idx >= 1:
958                        lowest = count
959                        count += count_incr
960                        idx = last_idx = a * count + b if var else a
961                    count_incr = -1
962                count = lowest
963                idx = last_idx = a * count + b if var else a
964
965            # Evaluate elements while our calculated nth index is still in range
966            while 1 <= idx <= last_index + 1:
967                child = None
968                # Evaluate while our child index is still in range.
969                for child in self.get_children(parent, start=index, reverse=factor < 0, tags=False):
970                    index += factor
971                    if not self.is_tag(child):
972                        continue
973                    # Handle `of S` in `nth-child`
974                    if n.selectors and not self.match_selectors(child, n.selectors):
975                        continue
976                    # Handle `of-type`
977                    if n.of_type and not self.match_nth_tag_type(el, child):
978                        continue
979                    relative_index += 1
980                    if relative_index == idx:
981                        if child is el:
982                            matched = True
983                        else:
984                            break
985                    if child is el:
986                        break
987                if child is el:
988                    break
989                last_idx = idx
990                count += count_incr
991                if count < 0:
992                    # Count is counting down and has now ventured into invalid territory.
993                    break
994                idx = a * count + b if var else a
995                if last_idx == idx:
996                    break
997            if not matched:
998                break
999        return matched
1000
1001    def match_empty(self, el: 'bs4.Tag') -> bool:
1002        """Check if element is empty (if requested)."""
1003
1004        is_empty = True
1005        for child in self.get_children(el, tags=False):
1006            if self.is_tag(child):
1007                is_empty = False
1008                break
1009            elif self.is_content_string(child) and RE_NOT_EMPTY.search(child):
1010                is_empty = False
1011                break
1012        return is_empty
1013
1014    def match_subselectors(self, el: 'bs4.Tag', selectors: Tuple[ct.SelectorList, ...]) -> bool:
1015        """Match selectors."""
1016
1017        match = True
1018        for sel in selectors:
1019            if not self.match_selectors(el, sel):
1020                match = False
1021        return match
1022
1023    def match_contains(self, el: 'bs4.Tag', contains: Tuple[ct.SelectorContains, ...]) -> bool:
1024        """Match element if it contains text."""
1025
1026        match = True
1027        content = None  # type: Optional[Union[str, Sequence[str]]]
1028        for contain_list in contains:
1029            if content is None:
1030                if contain_list.own:
1031                    content = self.get_own_text(el, no_iframe=self.is_html)
1032                else:
1033                    content = self.get_text(el, no_iframe=self.is_html)
1034            found = False
1035            for text in contain_list.text:
1036                if contain_list.own:
1037                    for c in content:
1038                        if text in c:
1039                            found = True
1040                            break
1041                    if found:
1042                        break
1043                else:
1044                    if text in content:
1045                        found = True
1046                        break
1047            if not found:
1048                match = False
1049        return match
1050
1051    def match_default(self, el: 'bs4.Tag') -> bool:
1052        """Match default."""
1053
1054        match = False
1055
1056        # Find this input's form
1057        form = None
1058        parent = self.get_parent(el, no_iframe=True)
1059        while parent and form is None:
1060            if self.get_tag(parent) == 'form' and self.is_html_tag(parent):
1061                form = parent
1062            else:
1063                parent = self.get_parent(parent, no_iframe=True)
1064
1065        # Look in form cache to see if we've already located its default button
1066        found_form = False
1067        for f, t in self.cached_default_forms:
1068            if f is form:
1069                found_form = True
1070                if t is el:
1071                    match = True
1072                break
1073
1074        # We didn't have the form cached, so look for its default button
1075        if not found_form:
1076            for child in self.get_descendants(form, no_iframe=True):
1077                name = self.get_tag(child)
1078                # Can't do nested forms (haven't figured out why we never hit this)
1079                if name == 'form':  # pragma: no cover
1080                    break
1081                if name in ('input', 'button'):
1082                    v = self.get_attribute_by_name(child, 'type', '')
1083                    if v and util.lower(v) == 'submit':
1084                        self.cached_default_forms.append((form, child))
1085                        if el is child:
1086                            match = True
1087                        break
1088        return match
1089
1090    def match_indeterminate(self, el: 'bs4.Tag') -> bool:
1091        """Match default."""
1092
1093        match = False
1094        name = cast(str, self.get_attribute_by_name(el, 'name'))
1095
1096        def get_parent_form(el: 'bs4.Tag') -> Optional['bs4.Tag']:
1097            """Find this input's form."""
1098            form = None
1099            parent = self.get_parent(el, no_iframe=True)
1100            while form is None:
1101                if self.get_tag(parent) == 'form' and self.is_html_tag(parent):
1102                    form = parent
1103                    break
1104                last_parent = parent
1105                parent = self.get_parent(parent, no_iframe=True)
1106                if parent is None:
1107                    form = last_parent
1108                    break
1109            return form
1110
1111        form = get_parent_form(el)
1112
1113        # Look in form cache to see if we've already evaluated that its fellow radio buttons are indeterminate
1114        found_form = False
1115        for f, n, i in self.cached_indeterminate_forms:
1116            if f is form and n == name:
1117                found_form = True
1118                if i is True:
1119                    match = True
1120                break
1121
1122        # We didn't have the form cached, so validate that the radio button is indeterminate
1123        if not found_form:
1124            checked = False
1125            for child in self.get_descendants(form, no_iframe=True):
1126                if child is el:
1127                    continue
1128                tag_name = self.get_tag(child)
1129                if tag_name == 'input':
1130                    is_radio = False
1131                    check = False
1132                    has_name = False
1133                    for k, v in self.iter_attributes(child):
1134                        if util.lower(k) == 'type' and util.lower(v) == 'radio':
1135                            is_radio = True
1136                        elif util.lower(k) == 'name' and v == name:
1137                            has_name = True
1138                        elif util.lower(k) == 'checked':
1139                            check = True
1140                        if is_radio and check and has_name and get_parent_form(child) is form:
1141                            checked = True
1142                            break
1143                if checked:
1144                    break
1145            if not checked:
1146                match = True
1147            self.cached_indeterminate_forms.append((form, name, match))
1148
1149        return match
1150
1151    def match_lang(self, el: 'bs4.Tag', langs: Tuple[ct.SelectorLang, ...]) -> bool:
1152        """Match languages."""
1153
1154        match = False
1155        has_ns = self.supports_namespaces()
1156        root = self.root
1157        has_html_namespace = self.has_html_namespace
1158
1159        # Walk parents looking for `lang` (HTML) or `xml:lang` XML property.
1160        parent = el
1161        found_lang = None
1162        last = None
1163        while not found_lang:
1164            has_html_ns = self.has_html_ns(parent)
1165            for k, v in self.iter_attributes(parent):
1166                attr_ns, attr = self.split_namespace(parent, k)
1167                if (
1168                    ((not has_ns or has_html_ns) and (util.lower(k) if not self.is_xml else k) == 'lang') or
1169                    (
1170                        has_ns and not has_html_ns and attr_ns == NS_XML and
1171                        (util.lower(attr) if not self.is_xml and attr is not None else attr) == 'lang'
1172                    )
1173                ):
1174                    found_lang = v
1175                    break
1176            last = parent
1177            parent = self.get_parent(parent, no_iframe=self.is_html)
1178
1179            if parent is None:
1180                root = last
1181                has_html_namespace = self.has_html_ns(root)
1182                parent = last
1183                break
1184
1185        # Use cached meta language.
1186        if not found_lang and self.cached_meta_lang:
1187            for cache in self.cached_meta_lang:
1188                if root is cache[0]:
1189                    found_lang = cache[1]
1190
1191        # If we couldn't find a language, and the document is HTML, look to meta to determine language.
1192        if found_lang is None and (not self.is_xml or (has_html_namespace and root.name == 'html')):
1193            # Find head
1194            found = False
1195            for tag in ('html', 'head'):
1196                found = False
1197                for child in self.get_children(parent, no_iframe=self.is_html):
1198                    if self.get_tag(child) == tag and self.is_html_tag(child):
1199                        found = True
1200                        parent = child
1201                        break
1202                if not found:  # pragma: no cover
1203                    break
1204
1205            # Search meta tags
1206            if found:
1207                for child in parent:
1208                    if self.is_tag(child) and self.get_tag(child) == 'meta' and self.is_html_tag(parent):
1209                        c_lang = False
1210                        content = None
1211                        for k, v in self.iter_attributes(child):
1212                            if util.lower(k) == 'http-equiv' and util.lower(v) == 'content-language':
1213                                c_lang = True
1214                            if util.lower(k) == 'content':
1215                                content = v
1216                            if c_lang and content:
1217                                found_lang = content
1218                                self.cached_meta_lang.append((cast(str, root), cast(str, found_lang)))
1219                                break
1220                    if found_lang:
1221                        break
1222                if not found_lang:
1223                    self.cached_meta_lang.append((cast(str, root), ''))
1224
1225        # If we determined a language, compare.
1226        if found_lang:
1227            for patterns in langs:
1228                match = False
1229                for pattern in patterns:
1230                    if self.extended_language_filter(pattern, cast(str, found_lang)):
1231                        match = True
1232                if not match:
1233                    break
1234
1235        return match
1236
1237    def match_dir(self, el: 'bs4.Tag', directionality: int) -> bool:
1238        """Check directionality."""
1239
1240        # If we have to match both left and right, we can't match either.
1241        if directionality & ct.SEL_DIR_LTR and directionality & ct.SEL_DIR_RTL:
1242            return False
1243
1244        if el is None or not self.is_html_tag(el):
1245            return False
1246
1247        # Element has defined direction of left to right or right to left
1248        direction = DIR_MAP.get(util.lower(self.get_attribute_by_name(el, 'dir', '')), None)
1249        if direction not in (None, 0):
1250            return direction == directionality
1251
1252        # Element is the document element (the root) and no direction assigned, assume left to right.
1253        is_root = self.is_root(el)
1254        if is_root and direction is None:
1255            return ct.SEL_DIR_LTR == directionality
1256
1257        # If `input[type=telephone]` and no direction is assigned, assume left to right.
1258        name = self.get_tag(el)
1259        is_input = name == 'input'
1260        is_textarea = name == 'textarea'
1261        is_bdi = name == 'bdi'
1262        itype = util.lower(self.get_attribute_by_name(el, 'type', '')) if is_input else ''
1263        if is_input and itype == 'tel' and direction is None:
1264            return ct.SEL_DIR_LTR == directionality
1265
1266        # Auto handling for text inputs
1267        if ((is_input and itype in ('text', 'search', 'tel', 'url', 'email')) or is_textarea) and direction == 0:
1268            if is_textarea:
1269                temp = []
1270                for node in self.get_contents(el, no_iframe=True):
1271                    if self.is_content_string(node):
1272                        temp.append(node)
1273                value = ''.join(temp)
1274            else:
1275                value = cast(str, self.get_attribute_by_name(el, 'value', ''))
1276            if value:
1277                for c in value:
1278                    bidi = unicodedata.bidirectional(c)
1279                    if bidi in ('AL', 'R', 'L'):
1280                        direction = ct.SEL_DIR_LTR if bidi == 'L' else ct.SEL_DIR_RTL
1281                        return direction == directionality
1282                # Assume left to right
1283                return ct.SEL_DIR_LTR == directionality
1284            elif is_root:
1285                return ct.SEL_DIR_LTR == directionality
1286            return self.match_dir(self.get_parent(el, no_iframe=True), directionality)
1287
1288        # Auto handling for `bdi` and other non text inputs.
1289        if (is_bdi and direction is None) or direction == 0:
1290            direction = self.find_bidi(el)
1291            if direction is not None:
1292                return direction == directionality
1293            elif is_root:
1294                return ct.SEL_DIR_LTR == directionality
1295            return self.match_dir(self.get_parent(el, no_iframe=True), directionality)
1296
1297        # Match parents direction
1298        return self.match_dir(self.get_parent(el, no_iframe=True), directionality)
1299
1300    def match_range(self, el: 'bs4.Tag', condition: int) -> bool:
1301        """
1302        Match range.
1303
1304        Behavior is modeled after what we see in browsers. Browsers seem to evaluate
1305        if the value is out of range, and if not, it is in range. So a missing value
1306        will not evaluate out of range; therefore, value is in range. Personally, I
1307        feel like this should evaluate as neither in or out of range.
1308        """
1309
1310        out_of_range = False
1311
1312        itype = util.lower(self.get_attribute_by_name(el, 'type'))
1313        mn = Inputs.parse_value(itype, cast(str, self.get_attribute_by_name(el, 'min', None)))
1314        mx = Inputs.parse_value(itype, cast(str, self.get_attribute_by_name(el, 'max', None)))
1315
1316        # There is no valid min or max, so we cannot evaluate a range
1317        if mn is None and mx is None:
1318            return False
1319
1320        value = Inputs.parse_value(itype, cast(str, self.get_attribute_by_name(el, 'value', None)))
1321        if value is not None:
1322            if itype in ("date", "datetime-local", "month", "week", "number", "range"):
1323                if mn is not None and value < mn:
1324                    out_of_range = True
1325                if not out_of_range and mx is not None and value > mx:
1326                    out_of_range = True
1327            elif itype == "time":
1328                if mn is not None and mx is not None and mn > mx:
1329                    # Time is periodic, so this is a reversed/discontinuous range
1330                    if value < mn and value > mx:
1331                        out_of_range = True
1332                else:
1333                    if mn is not None and value < mn:
1334                        out_of_range = True
1335                    if not out_of_range and mx is not None and value > mx:
1336                        out_of_range = True
1337
1338        return not out_of_range if condition & ct.SEL_IN_RANGE else out_of_range
1339
1340    def match_defined(self, el: 'bs4.Tag') -> bool:
1341        """
1342        Match defined.
1343
1344        `:defined` is related to custom elements in a browser.
1345
1346        - If the document is XML (not XHTML), all tags will match.
1347        - Tags that are not custom (don't have a hyphen) are marked defined.
1348        - If the tag has a prefix (without or without a namespace), it will not match.
1349
1350        This is of course requires the parser to provide us with the proper prefix and namespace info,
1351        if it doesn't, there is nothing we can do.
1352        """
1353
1354        name = self.get_tag(el)
1355        return (
1356            name is not None and (
1357                name.find('-') == -1 or
1358                name.find(':') != -1 or
1359                self.get_prefix(el) is not None
1360            )
1361        )
1362
1363    def match_placeholder_shown(self, el: 'bs4.Tag') -> bool:
1364        """
1365        Match placeholder shown according to HTML spec.
1366
1367        - text area should be checked if they have content. A single newline does not count as content.
1368
1369        """
1370
1371        match = False
1372        content = self.get_text(el)
1373        if content in ('', '\n'):
1374            match = True
1375
1376        return match
1377
1378    def match_selectors(self, el: 'bs4.Tag', selectors: ct.SelectorList) -> bool:
1379        """Check if element matches one of the selectors."""
1380
1381        match = False
1382        is_not = selectors.is_not
1383        is_html = selectors.is_html
1384
1385        # Internal selector lists that use the HTML flag, will automatically get the `html` namespace.
1386        if is_html:
1387            namespaces = self.namespaces
1388            iframe_restrict = self.iframe_restrict
1389            self.namespaces = {'html': NS_XHTML}
1390            self.iframe_restrict = True
1391
1392        if not is_html or self.is_html:
1393            for selector in selectors:
1394                match = is_not
1395                # We have a un-matchable situation (like `:focus` as you can focus an element in this environment)
1396                if isinstance(selector, ct.SelectorNull):
1397                    continue
1398                # Verify tag matches
1399                if not self.match_tag(el, selector.tag):
1400                    continue
1401                # Verify tag is defined
1402                if selector.flags & ct.SEL_DEFINED and not self.match_defined(el):
1403                    continue
1404                # Verify element is root
1405                if selector.flags & ct.SEL_ROOT and not self.match_root(el):
1406                    continue
1407                # Verify element is scope
1408                if selector.flags & ct.SEL_SCOPE and not self.match_scope(el):
1409                    continue
1410                # Verify element has placeholder shown
1411                if selector.flags & ct.SEL_PLACEHOLDER_SHOWN and not self.match_placeholder_shown(el):
1412                    continue
1413                # Verify `nth` matches
1414                if not self.match_nth(el, selector.nth):
1415                    continue
1416                if selector.flags & ct.SEL_EMPTY and not self.match_empty(el):
1417                    continue
1418                # Verify id matches
1419                if selector.ids and not self.match_id(el, selector.ids):
1420                    continue
1421                # Verify classes match
1422                if selector.classes and not self.match_classes(el, selector.classes):
1423                    continue
1424                # Verify attribute(s) match
1425                if not self.match_attributes(el, selector.attributes):
1426                    continue
1427                # Verify ranges
1428                if selector.flags & RANGES and not self.match_range(el, selector.flags & RANGES):
1429                    continue
1430                # Verify language patterns
1431                if selector.lang and not self.match_lang(el, selector.lang):
1432                    continue
1433                # Verify pseudo selector patterns
1434                if selector.selectors and not self.match_subselectors(el, selector.selectors):
1435                    continue
1436                # Verify relationship selectors
1437                if selector.relation and not self.match_relations(el, selector.relation):
1438                    continue
1439                # Validate that the current default selector match corresponds to the first submit button in the form
1440                if selector.flags & ct.SEL_DEFAULT and not self.match_default(el):
1441                    continue
1442                # Validate that the unset radio button is among radio buttons with the same name in a form that are
1443                # also not set.
1444                if selector.flags & ct.SEL_INDETERMINATE and not self.match_indeterminate(el):
1445                    continue
1446                # Validate element directionality
1447                if selector.flags & DIR_FLAGS and not self.match_dir(el, selector.flags & DIR_FLAGS):
1448                    continue
1449                # Validate that the tag contains the specified text.
1450                if selector.contains and not self.match_contains(el, selector.contains):
1451                    continue
1452                match = not is_not
1453                break
1454
1455        # Restore actual namespaces being used for external selector lists
1456        if is_html:
1457            self.namespaces = namespaces
1458            self.iframe_restrict = iframe_restrict
1459
1460        return match
1461
1462    def select(self, limit: int = 0) -> Iterator['bs4.Tag']:
1463        """Match all tags under the targeted tag."""
1464
1465        lim = None if limit < 1 else limit
1466
1467        for child in self.get_descendants(self.tag):
1468            if self.match(child):
1469                yield child
1470                if lim is not None:
1471                    lim -= 1
1472                    if lim < 1:
1473                        break
1474
1475    def closest(self) -> Optional['bs4.Tag']:
1476        """Match closest ancestor."""
1477
1478        current = self.tag
1479        closest = None
1480        while closest is None and current is not None:
1481            if self.match(current):
1482                closest = current
1483            else:
1484                current = self.get_parent(current)
1485        return closest
1486
1487    def filter(self) -> List['bs4.Tag']:  # noqa A001
1488        """Filter tag's children."""
1489
1490        return [tag for tag in self.get_contents(self.tag) if not self.is_navigable_string(tag) and self.match(tag)]
1491
1492    def match(self, el: 'bs4.Tag') -> bool:
1493        """Match."""
1494
1495        return not self.is_doc(el) and self.is_tag(el) and self.match_selectors(el, self.selectors)
1496
1497
1498class SoupSieve(ct.Immutable):
1499    """Compiled Soup Sieve selector matching object."""
1500
1501    pattern: str
1502    selectors: ct.SelectorList
1503    namespaces: Optional[ct.Namespaces]
1504    custom: Dict[str, str]
1505    flags: int
1506
1507    __slots__ = ("pattern", "selectors", "namespaces", "custom", "flags", "_hash")
1508
1509    def __init__(
1510        self,
1511        pattern: str,
1512        selectors: ct.SelectorList,
1513        namespaces: Optional[ct.Namespaces],
1514        custom: Optional[ct.CustomSelectors],
1515        flags: int
1516    ):
1517        """Initialize."""
1518
1519        super().__init__(
1520            pattern=pattern,
1521            selectors=selectors,
1522            namespaces=namespaces,
1523            custom=custom,
1524            flags=flags
1525        )
1526
1527    def match(self, tag: 'bs4.Tag') -> bool:
1528        """Match."""
1529
1530        return CSSMatch(self.selectors, tag, self.namespaces, self.flags).match(tag)
1531
1532    def closest(self, tag: 'bs4.Tag') -> 'bs4.Tag':
1533        """Match closest ancestor."""
1534
1535        return CSSMatch(self.selectors, tag, self.namespaces, self.flags).closest()
1536
1537    def filter(self, iterable: Iterable['bs4.Tag']) -> List['bs4.Tag']:  # noqa A001
1538        """
1539        Filter.
1540
1541        `CSSMatch` can cache certain searches for tags of the same document,
1542        so if we are given a tag, all tags are from the same document,
1543        and we can take advantage of the optimization.
1544
1545        Any other kind of iterable could have tags from different documents or detached tags,
1546        so for those, we use a new `CSSMatch` for each item in the iterable.
1547        """
1548
1549        if CSSMatch.is_tag(iterable):
1550            return CSSMatch(self.selectors, iterable, self.namespaces, self.flags).filter()
1551        else:
1552            return [node for node in iterable if not CSSMatch.is_navigable_string(node) and self.match(node)]
1553
1554    def select_one(self, tag: 'bs4.Tag') -> 'bs4.Tag':
1555        """Select a single tag."""
1556
1557        tags = self.select(tag, limit=1)
1558        return tags[0] if tags else None
1559
1560    def select(self, tag: 'bs4.Tag', limit: int = 0) -> List['bs4.Tag']:
1561        """Select the specified tags."""
1562
1563        return list(self.iselect(tag, limit))
1564
1565    def iselect(self, tag: 'bs4.Tag', limit: int = 0) -> Iterator['bs4.Tag']:
1566        """Iterate the specified tags."""
1567
1568        for el in CSSMatch(self.selectors, tag, self.namespaces, self.flags).select(limit):
1569            yield el
1570
1571    def __repr__(self) -> str:  # pragma: no cover
1572        """Representation."""
1573
1574        return "SoupSieve(pattern={!r}, namespaces={!r}, custom={!r}, flags={!r})".format(
1575            self.pattern,
1576            self.namespaces,
1577            self.custom,
1578            self.flags
1579        )
1580
1581    __str__ = __repr__
1582
1583
1584ct.pickle_register(SoupSieve)
1585