1#!/usr/bin/env python 2import re 3from collections.abc import MutableMapping, Iterable 4from deepdiff.helper import OrderedSetPlus 5import logging 6 7from deepdiff.helper import ( 8 strings, numbers, add_to_frozen_set, get_doc, dict_, RE_COMPILED_TYPE 9) 10 11logger = logging.getLogger(__name__) 12 13 14doc = get_doc('search_doc.rst') 15 16 17class DeepSearch(dict): 18 r""" 19 **DeepSearch** 20 21 Deep Search inside objects to find the item matching your criteria. 22 23 **Parameters** 24 25 obj : The object to search within 26 27 item : The item to search for 28 29 verbose_level : int >= 0, default = 1. 30 Verbose level one shows the paths of found items. 31 Verbose level 2 shows the path and value of the found items. 32 33 exclude_paths: list, default = None. 34 List of paths to exclude from the report. 35 36 exclude_types: list, default = None. 37 List of object types to exclude from the report. 38 39 case_sensitive: Boolean, default = False 40 41 match_string: Boolean, default = False 42 If True, the value of the object or its children have to exactly match the item. 43 If False, the value of the item can be a part of the value of the object or its children 44 45 use_regexp: Boolean, default = False 46 47 strict_checking: Boolean, default = True 48 If True, it will check the type of the object to match, so when searching for '1234', 49 it will NOT match the int 1234. Currently this only affects the numeric values searching. 50 51 **Returns** 52 53 A DeepSearch object that has the matched paths and matched values. 54 55 **Supported data types** 56 57 int, string, unicode, dictionary, list, tuple, set, frozenset, OrderedDict, NamedTuple and custom objects! 58 59 **Examples** 60 61 Importing 62 >>> from deepdiff import DeepSearch 63 >>> from pprint import pprint 64 65 Search in list for string 66 >>> obj = ["long somewhere", "string", 0, "somewhere great!"] 67 >>> item = "somewhere" 68 >>> ds = DeepSearch(obj, item, verbose_level=2) 69 >>> print(ds) 70 {'matched_values': {'root[3]': 'somewhere great!', 'root[0]': 'long somewhere'}} 71 72 Search in nested data for string 73 >>> obj = ["something somewhere", {"long": "somewhere", "string": 2, 0: 0, "somewhere": "around"}] 74 >>> item = "somewhere" 75 >>> ds = DeepSearch(obj, item, verbose_level=2) 76 >>> pprint(ds, indent=2) 77 { 'matched_paths': {"root[1]['somewhere']": 'around'}, 78 'matched_values': { 'root[0]': 'something somewhere', 79 "root[1]['long']": 'somewhere'}} 80 81 """ 82 83 warning_num = 0 84 85 def __init__(self, 86 obj, 87 item, 88 exclude_paths=OrderedSetPlus(), 89 exclude_regex_paths=OrderedSetPlus(), 90 exclude_types=OrderedSetPlus(), 91 verbose_level=1, 92 case_sensitive=False, 93 match_string=False, 94 use_regexp=False, 95 strict_checking=True, 96 **kwargs): 97 if kwargs: 98 raise ValueError(( 99 "The following parameter(s) are not valid: %s\n" 100 "The valid parameters are obj, item, exclude_paths, exclude_types,\n" 101 "case_sensitive, match_string and verbose_level." 102 ) % ', '.join(kwargs.keys())) 103 104 self.obj = obj 105 self.case_sensitive = case_sensitive if isinstance(item, strings) else True 106 item = item if self.case_sensitive else item.lower() 107 self.exclude_paths = OrderedSetPlus(exclude_paths) 108 self.exclude_regex_paths = [re.compile(exclude_regex_path) for exclude_regex_path in exclude_regex_paths] 109 self.exclude_types = OrderedSetPlus(exclude_types) 110 self.exclude_types_tuple = tuple( 111 exclude_types) # we need tuple for checking isinstance 112 self.verbose_level = verbose_level 113 self.update( 114 matched_paths=self.__set_or_dict(), 115 matched_values=self.__set_or_dict(), 116 unprocessed=[]) 117 self.use_regexp = use_regexp 118 if not strict_checking and isinstance(item, numbers): 119 item = str(item) 120 if self.use_regexp: 121 try: 122 item = re.compile(item) 123 except TypeError as e: 124 raise TypeError(f"The passed item of {item} is not usable for regex: {e}") from None 125 self.strict_checking = strict_checking 126 127 # Cases where user wants to match exact string item 128 self.match_string = match_string 129 130 self.__search(obj, item, parents_ids=frozenset({id(obj)})) 131 132 empty_keys = [k for k, v in self.items() if not v] 133 134 for k in empty_keys: 135 del self[k] 136 137 def __set_or_dict(self): 138 return dict_() if self.verbose_level >= 2 else OrderedSetPlus() 139 140 def __report(self, report_key, key, value): 141 if self.verbose_level >= 2: 142 self[report_key][key] = value 143 else: 144 self[report_key].add(key) 145 146 def __search_obj(self, 147 obj, 148 item, 149 parent, 150 parents_ids=frozenset(), 151 is_namedtuple=False): 152 """Search objects""" 153 found = False 154 if obj == item: 155 found = True 156 # We report the match but also continue inside the match to see if there are 157 # further matches inside the `looped` object. 158 self.__report(report_key='matched_values', key=parent, value=obj) 159 160 try: 161 if is_namedtuple: 162 obj = obj._asdict() 163 else: 164 # Skip magic methods. Slightly hacky, but unless people are defining 165 # new magic methods they want to search, it should work fine. 166 obj = {i: getattr(obj, i) for i in dir(obj) 167 if not (i.startswith('__') and i.endswith('__'))} 168 except AttributeError: 169 try: 170 obj = {i: getattr(obj, i) for i in obj.__slots__} 171 except AttributeError: 172 if not found: 173 self['unprocessed'].append("%s" % parent) 174 175 return 176 177 self.__search_dict( 178 obj, item, parent, parents_ids, print_as_attribute=True) 179 180 def __skip_this(self, item, parent): 181 skip = False 182 if parent in self.exclude_paths: 183 skip = True 184 elif self.exclude_regex_paths and any( 185 [exclude_regex_path.search(parent) for exclude_regex_path in self.exclude_regex_paths]): 186 skip = True 187 else: 188 if isinstance(item, self.exclude_types_tuple): 189 skip = True 190 191 return skip 192 193 def __search_dict(self, 194 obj, 195 item, 196 parent, 197 parents_ids=frozenset(), 198 print_as_attribute=False): 199 """Search dictionaries""" 200 if print_as_attribute: 201 parent_text = "%s.%s" 202 else: 203 parent_text = "%s[%s]" 204 205 obj_keys = OrderedSetPlus(obj.keys()) 206 207 for item_key in obj_keys: 208 if not print_as_attribute and isinstance(item_key, strings): 209 item_key_str = "'%s'" % item_key 210 else: 211 item_key_str = item_key 212 213 obj_child = obj[item_key] 214 215 item_id = id(obj_child) 216 217 if parents_ids and item_id in parents_ids: 218 continue 219 220 parents_ids_added = add_to_frozen_set(parents_ids, item_id) 221 222 new_parent = parent_text % (parent, item_key_str) 223 new_parent_cased = new_parent if self.case_sensitive else new_parent.lower() 224 225 str_item = str(item) 226 if (self.match_string and str_item == new_parent_cased) or\ 227 (not self.match_string and str_item in new_parent_cased) or\ 228 (self.use_regexp and item.search(new_parent_cased)): 229 self.__report( 230 report_key='matched_paths', 231 key=new_parent, 232 value=obj_child) 233 234 self.__search( 235 obj_child, 236 item, 237 parent=new_parent, 238 parents_ids=parents_ids_added) 239 240 def __search_iterable(self, 241 obj, 242 item, 243 parent="root", 244 parents_ids=frozenset()): 245 """Search iterables except dictionaries, sets and strings.""" 246 for i, thing in enumerate(obj): 247 new_parent = "{}[{}]".format(parent, i) 248 if self.__skip_this(thing, parent=new_parent): 249 continue 250 251 if self.case_sensitive or not isinstance(thing, strings): 252 thing_cased = thing 253 else: 254 thing_cased = thing.lower() 255 256 if not self.use_regexp and thing_cased == item: 257 self.__report( 258 report_key='matched_values', key=new_parent, value=thing) 259 else: 260 item_id = id(thing) 261 if parents_ids and item_id in parents_ids: 262 continue 263 parents_ids_added = add_to_frozen_set(parents_ids, item_id) 264 self.__search(thing, item, "%s[%s]" % 265 (parent, i), parents_ids_added) 266 267 def __search_str(self, obj, item, parent): 268 """Compare strings""" 269 obj_text = obj if self.case_sensitive else obj.lower() 270 271 is_matched = False 272 if self.use_regexp: 273 is_matched = item.search(obj_text) 274 elif (self.match_string and item == obj_text) or (not self.match_string and item in obj_text): 275 is_matched = True 276 if is_matched: 277 self.__report(report_key='matched_values', key=parent, value=obj) 278 279 def __search_numbers(self, obj, item, parent): 280 if ( 281 item == obj or ( 282 not self.strict_checking and ( 283 item == str(obj) or ( 284 self.use_regexp and item.search(str(obj)) 285 ) 286 ) 287 ) 288 ): 289 self.__report(report_key='matched_values', key=parent, value=obj) 290 291 def __search_tuple(self, obj, item, parent, parents_ids): 292 # Checking to see if it has _fields. Which probably means it is a named 293 # tuple. 294 try: 295 obj._asdict 296 # It must be a normal tuple 297 except AttributeError: 298 self.__search_iterable(obj, item, parent, parents_ids) 299 # We assume it is a namedtuple then 300 else: 301 self.__search_obj( 302 obj, item, parent, parents_ids, is_namedtuple=True) 303 304 def __search(self, obj, item, parent="root", parents_ids=frozenset()): 305 """The main search method""" 306 if self.__skip_this(item, parent): 307 return 308 309 elif isinstance(obj, strings) and isinstance(item, (strings, RE_COMPILED_TYPE)): 310 self.__search_str(obj, item, parent) 311 312 elif isinstance(obj, strings) and isinstance(item, numbers): 313 return 314 315 elif isinstance(obj, numbers): 316 self.__search_numbers(obj, item, parent) 317 318 elif isinstance(obj, MutableMapping): 319 self.__search_dict(obj, item, parent, parents_ids) 320 321 elif isinstance(obj, tuple): 322 self.__search_tuple(obj, item, parent, parents_ids) 323 324 elif isinstance(obj, (set, frozenset)): 325 if self.warning_num < 10: 326 logger.warning( 327 "Set item detected in the path." 328 "'set' objects do NOT support indexing. But DeepSearch will still report a path." 329 ) 330 self.warning_num += 1 331 self.__search_iterable(obj, item, parent, parents_ids) 332 333 elif isinstance(obj, Iterable) and not isinstance(obj, strings): 334 self.__search_iterable(obj, item, parent, parents_ids) 335 336 else: 337 self.__search_obj(obj, item, parent, parents_ids) 338 339 340class grep: 341 __doc__ = doc 342 343 def __init__(self, 344 item, 345 **kwargs): 346 self.item = item 347 self.kwargs = kwargs 348 349 def __ror__(self, other): 350 return DeepSearch(obj=other, item=self.item, **self.kwargs) 351 352 353if __name__ == "__main__": # pragma: no cover 354 import doctest 355 doctest.testmod() 356