1#!/usr/bin/env python
2import re
3from collections.abc import MutableMapping, Iterable
4from deepdiff.helper import OrderedSetPlus
5import logging
6
7from deepdiff.helper import (
8    strings, numbers, add_to_frozen_set, get_doc, dict_, RE_COMPILED_TYPE
9)
10
11logger = logging.getLogger(__name__)
12
13
14doc = get_doc('search_doc.rst')
15
16
17class DeepSearch(dict):
18    r"""
19    **DeepSearch**
20
21    Deep Search inside objects to find the item matching your criteria.
22
23    **Parameters**
24
25    obj : The object to search within
26
27    item : The item to search for
28
29    verbose_level : int >= 0, default = 1.
30        Verbose level one shows the paths of found items.
31        Verbose level 2 shows the path and value of the found items.
32
33    exclude_paths: list, default = None.
34        List of paths to exclude from the report.
35
36    exclude_types: list, default = None.
37        List of object types to exclude from the report.
38
39    case_sensitive: Boolean, default = False
40
41    match_string: Boolean, default = False
42        If True, the value of the object or its children have to exactly match the item.
43        If False, the value of the item can be a part of the value of the object or its children
44
45    use_regexp: Boolean, default = False
46
47    strict_checking: Boolean, default = True
48        If True, it will check the type of the object to match, so when searching for '1234',
49        it will NOT match the int 1234. Currently this only affects the numeric values searching.
50
51    **Returns**
52
53        A DeepSearch object that has the matched paths and matched values.
54
55    **Supported data types**
56
57    int, string, unicode, dictionary, list, tuple, set, frozenset, OrderedDict, NamedTuple and custom objects!
58
59    **Examples**
60
61    Importing
62        >>> from deepdiff import DeepSearch
63        >>> from pprint import pprint
64
65    Search in list for string
66        >>> obj = ["long somewhere", "string", 0, "somewhere great!"]
67        >>> item = "somewhere"
68        >>> ds = DeepSearch(obj, item, verbose_level=2)
69        >>> print(ds)
70        {'matched_values': {'root[3]': 'somewhere great!', 'root[0]': 'long somewhere'}}
71
72    Search in nested data for string
73        >>> obj = ["something somewhere", {"long": "somewhere", "string": 2, 0: 0, "somewhere": "around"}]
74        >>> item = "somewhere"
75        >>> ds = DeepSearch(obj, item, verbose_level=2)
76        >>> pprint(ds, indent=2)
77        { 'matched_paths': {"root[1]['somewhere']": 'around'},
78          'matched_values': { 'root[0]': 'something somewhere',
79                              "root[1]['long']": 'somewhere'}}
80
81    """
82
83    warning_num = 0
84
85    def __init__(self,
86                 obj,
87                 item,
88                 exclude_paths=OrderedSetPlus(),
89                 exclude_regex_paths=OrderedSetPlus(),
90                 exclude_types=OrderedSetPlus(),
91                 verbose_level=1,
92                 case_sensitive=False,
93                 match_string=False,
94                 use_regexp=False,
95                 strict_checking=True,
96                 **kwargs):
97        if kwargs:
98            raise ValueError((
99                "The following parameter(s) are not valid: %s\n"
100                "The valid parameters are obj, item, exclude_paths, exclude_types,\n"
101                "case_sensitive, match_string and verbose_level."
102            ) % ', '.join(kwargs.keys()))
103
104        self.obj = obj
105        self.case_sensitive = case_sensitive if isinstance(item, strings) else True
106        item = item if self.case_sensitive else item.lower()
107        self.exclude_paths = OrderedSetPlus(exclude_paths)
108        self.exclude_regex_paths = [re.compile(exclude_regex_path) for exclude_regex_path in exclude_regex_paths]
109        self.exclude_types = OrderedSetPlus(exclude_types)
110        self.exclude_types_tuple = tuple(
111            exclude_types)  # we need tuple for checking isinstance
112        self.verbose_level = verbose_level
113        self.update(
114            matched_paths=self.__set_or_dict(),
115            matched_values=self.__set_or_dict(),
116            unprocessed=[])
117        self.use_regexp = use_regexp
118        if not strict_checking and isinstance(item, numbers):
119            item = str(item)
120        if self.use_regexp:
121            try:
122                item = re.compile(item)
123            except TypeError as e:
124                raise TypeError(f"The passed item of {item} is not usable for regex: {e}") from None
125        self.strict_checking = strict_checking
126
127        # Cases where user wants to match exact string item
128        self.match_string = match_string
129
130        self.__search(obj, item, parents_ids=frozenset({id(obj)}))
131
132        empty_keys = [k for k, v in self.items() if not v]
133
134        for k in empty_keys:
135            del self[k]
136
137    def __set_or_dict(self):
138        return dict_() if self.verbose_level >= 2 else OrderedSetPlus()
139
140    def __report(self, report_key, key, value):
141        if self.verbose_level >= 2:
142            self[report_key][key] = value
143        else:
144            self[report_key].add(key)
145
146    def __search_obj(self,
147                     obj,
148                     item,
149                     parent,
150                     parents_ids=frozenset(),
151                     is_namedtuple=False):
152        """Search objects"""
153        found = False
154        if obj == item:
155            found = True
156            # We report the match but also continue inside the match to see if there are
157            # further matches inside the `looped` object.
158            self.__report(report_key='matched_values', key=parent, value=obj)
159
160        try:
161            if is_namedtuple:
162                obj = obj._asdict()
163            else:
164                # Skip magic methods. Slightly hacky, but unless people are defining
165                # new magic methods they want to search, it should work fine.
166                obj = {i: getattr(obj, i) for i in dir(obj)
167                       if not (i.startswith('__') and i.endswith('__'))}
168        except AttributeError:
169            try:
170                obj = {i: getattr(obj, i) for i in obj.__slots__}
171            except AttributeError:
172                if not found:
173                    self['unprocessed'].append("%s" % parent)
174
175                return
176
177        self.__search_dict(
178            obj, item, parent, parents_ids, print_as_attribute=True)
179
180    def __skip_this(self, item, parent):
181        skip = False
182        if parent in self.exclude_paths:
183            skip = True
184        elif self.exclude_regex_paths and any(
185                [exclude_regex_path.search(parent) for exclude_regex_path in self.exclude_regex_paths]):
186            skip = True
187        else:
188            if isinstance(item, self.exclude_types_tuple):
189                skip = True
190
191        return skip
192
193    def __search_dict(self,
194                      obj,
195                      item,
196                      parent,
197                      parents_ids=frozenset(),
198                      print_as_attribute=False):
199        """Search dictionaries"""
200        if print_as_attribute:
201            parent_text = "%s.%s"
202        else:
203            parent_text = "%s[%s]"
204
205        obj_keys = OrderedSetPlus(obj.keys())
206
207        for item_key in obj_keys:
208            if not print_as_attribute and isinstance(item_key, strings):
209                item_key_str = "'%s'" % item_key
210            else:
211                item_key_str = item_key
212
213            obj_child = obj[item_key]
214
215            item_id = id(obj_child)
216
217            if parents_ids and item_id in parents_ids:
218                continue
219
220            parents_ids_added = add_to_frozen_set(parents_ids, item_id)
221
222            new_parent = parent_text % (parent, item_key_str)
223            new_parent_cased = new_parent if self.case_sensitive else new_parent.lower()
224
225            str_item = str(item)
226            if (self.match_string and str_item == new_parent_cased) or\
227               (not self.match_string and str_item in new_parent_cased) or\
228               (self.use_regexp and item.search(new_parent_cased)):
229                self.__report(
230                    report_key='matched_paths',
231                    key=new_parent,
232                    value=obj_child)
233
234            self.__search(
235                obj_child,
236                item,
237                parent=new_parent,
238                parents_ids=parents_ids_added)
239
240    def __search_iterable(self,
241                          obj,
242                          item,
243                          parent="root",
244                          parents_ids=frozenset()):
245        """Search iterables except dictionaries, sets and strings."""
246        for i, thing in enumerate(obj):
247            new_parent = "{}[{}]".format(parent, i)
248            if self.__skip_this(thing, parent=new_parent):
249                continue
250
251            if self.case_sensitive or not isinstance(thing, strings):
252                thing_cased = thing
253            else:
254                thing_cased = thing.lower()
255
256            if not self.use_regexp and thing_cased == item:
257                self.__report(
258                    report_key='matched_values', key=new_parent, value=thing)
259            else:
260                item_id = id(thing)
261                if parents_ids and item_id in parents_ids:
262                    continue
263                parents_ids_added = add_to_frozen_set(parents_ids, item_id)
264                self.__search(thing, item, "%s[%s]" %
265                              (parent, i), parents_ids_added)
266
267    def __search_str(self, obj, item, parent):
268        """Compare strings"""
269        obj_text = obj if self.case_sensitive else obj.lower()
270
271        is_matched = False
272        if self.use_regexp:
273            is_matched = item.search(obj_text)
274        elif (self.match_string and item == obj_text) or (not self.match_string and item in obj_text):
275            is_matched = True
276        if is_matched:
277            self.__report(report_key='matched_values', key=parent, value=obj)
278
279    def __search_numbers(self, obj, item, parent):
280        if (
281            item == obj or (
282                not self.strict_checking and (
283                    item == str(obj) or (
284                        self.use_regexp and item.search(str(obj))
285                    )
286                )
287            )
288        ):
289            self.__report(report_key='matched_values', key=parent, value=obj)
290
291    def __search_tuple(self, obj, item, parent, parents_ids):
292        # Checking to see if it has _fields. Which probably means it is a named
293        # tuple.
294        try:
295            obj._asdict
296        # It must be a normal tuple
297        except AttributeError:
298            self.__search_iterable(obj, item, parent, parents_ids)
299        # We assume it is a namedtuple then
300        else:
301            self.__search_obj(
302                obj, item, parent, parents_ids, is_namedtuple=True)
303
304    def __search(self, obj, item, parent="root", parents_ids=frozenset()):
305        """The main search method"""
306        if self.__skip_this(item, parent):
307            return
308
309        elif isinstance(obj, strings) and isinstance(item, (strings, RE_COMPILED_TYPE)):
310            self.__search_str(obj, item, parent)
311
312        elif isinstance(obj, strings) and isinstance(item, numbers):
313            return
314
315        elif isinstance(obj, numbers):
316            self.__search_numbers(obj, item, parent)
317
318        elif isinstance(obj, MutableMapping):
319            self.__search_dict(obj, item, parent, parents_ids)
320
321        elif isinstance(obj, tuple):
322            self.__search_tuple(obj, item, parent, parents_ids)
323
324        elif isinstance(obj, (set, frozenset)):
325            if self.warning_num < 10:
326                logger.warning(
327                    "Set item detected in the path."
328                    "'set' objects do NOT support indexing. But DeepSearch will still report a path."
329                )
330                self.warning_num += 1
331            self.__search_iterable(obj, item, parent, parents_ids)
332
333        elif isinstance(obj, Iterable) and not isinstance(obj, strings):
334            self.__search_iterable(obj, item, parent, parents_ids)
335
336        else:
337            self.__search_obj(obj, item, parent, parents_ids)
338
339
340class grep:
341    __doc__ = doc
342
343    def __init__(self,
344                 item,
345                 **kwargs):
346        self.item = item
347        self.kwargs = kwargs
348
349    def __ror__(self, other):
350        return DeepSearch(obj=other, item=self.item, **self.kwargs)
351
352
353if __name__ == "__main__":  # pragma: no cover
354    import doctest
355    doctest.testmod()
356