1""" 2Approximate the Unix find(1) command and return a list of paths that 3meet the specified criteria. 4 5The options include match criteria: 6 name = file-glob # case sensitive 7 iname = file-glob # case insensitive 8 regex = file-regex # case sensitive 9 iregex = file-regex # case insensitive 10 type = file-types # match any listed type 11 user = users # match any listed user 12 group = groups # match any listed group 13 size = [+-]number[size-unit] # default unit = byte 14 mtime = interval # modified since date 15 grep = regex # search file contents 16and/or actions: 17 delete [= file-types] # default type = 'f' 18 exec = command [arg ...] # where {} is replaced by pathname 19 print [= print-opts] 20and/or depth criteria: 21 maxdepth = maximum depth to transverse in path 22 mindepth = minimum depth to transverse before checking files or directories 23 24The default action is 'print=path'. 25 26file-glob: 27 * = match zero or more chars 28 ? = match any char 29 [abc] = match a, b, or c 30 [!abc] or [^abc] = match anything except a, b, and c 31 [x-y] = match chars x through y 32 [!x-y] or [^x-y] = match anything except chars x through y 33 {a,b,c} = match a or b or c 34 35file-regex: 36 a Python re (regular expression) pattern 37 38file-types: a string of one or more of the following: 39 a: all file types 40 b: block device 41 c: character device 42 d: directory 43 p: FIFO (named pipe) 44 f: plain file 45 l: symlink 46 s: socket 47 48users: 49 a space and/or comma separated list of user names and/or uids 50 51groups: 52 a space and/or comma separated list of group names and/or gids 53 54size-unit: 55 b: bytes 56 k: kilobytes 57 m: megabytes 58 g: gigabytes 59 t: terabytes 60 61interval: 62 [<num>w] [<num>[d]] [<num>h] [<num>m] [<num>s] 63 64 where: 65 w: week 66 d: day 67 h: hour 68 m: minute 69 s: second 70 71print-opts: a comma and/or space separated list of one or more of 72the following: 73 74 group: group name 75 md5: MD5 digest of file contents 76 mode: file permissions (as as integer) 77 mtime: last modification time (as time_t) 78 name: file basename 79 path: file absolute path 80 size: file size in bytes 81 type: file type 82 user: user name 83""" 84 85 86import logging 87import os 88import re 89import shutil 90import stat 91import sys 92import time 93from subprocess import PIPE, Popen 94 95import salt.defaults.exitcodes 96import salt.utils.args 97import salt.utils.hashutils 98import salt.utils.path 99import salt.utils.stringutils 100from salt.utils.filebuffer import BufferedReader 101 102try: 103 import grp 104 import pwd 105 106 # TODO: grp and pwd are both used in the code, we better make sure that 107 # that code never gets run if importing them does not succeed 108except ImportError: 109 pass 110 111 112# Set up logger 113log = logging.getLogger(__name__) 114 115_REQUIRES_PATH = 1 116_REQUIRES_STAT = 2 117_REQUIRES_CONTENTS = 4 118 119_FILE_TYPES = { 120 "b": stat.S_IFBLK, 121 "c": stat.S_IFCHR, 122 "d": stat.S_IFDIR, 123 "f": stat.S_IFREG, 124 "l": stat.S_IFLNK, 125 "p": stat.S_IFIFO, 126 "s": stat.S_IFSOCK, 127 stat.S_IFBLK: "b", 128 stat.S_IFCHR: "c", 129 stat.S_IFDIR: "d", 130 stat.S_IFREG: "f", 131 stat.S_IFLNK: "l", 132 stat.S_IFIFO: "p", 133 stat.S_IFSOCK: "s", 134} 135 136_INTERVAL_REGEX = re.compile( 137 r""" 138 ^\s* 139 (?P<modifier>[+-]?) 140 (?: (?P<week> \d+ (?:\.\d*)? ) \s* [wW] )? \s* 141 (?: (?P<day> \d+ (?:\.\d*)? ) \s* [dD] )? \s* 142 (?: (?P<hour> \d+ (?:\.\d*)? ) \s* [hH] )? \s* 143 (?: (?P<minute> \d+ (?:\.\d*)? ) \s* [mM] )? \s* 144 (?: (?P<second> \d+ (?:\.\d*)? ) \s* [sS] )? \s* 145 $ 146 """, 147 flags=re.VERBOSE, 148) 149 150_PATH_DEPTH_IGNORED = (os.path.sep, os.path.curdir, os.path.pardir) 151 152 153def _parse_interval(value): 154 """ 155 Convert an interval string like 1w3d6h into the number of seconds, time 156 resolution (1 unit of the smallest specified time unit) and the modifier( 157 '+', '-', or ''). 158 w = week 159 d = day 160 h = hour 161 m = minute 162 s = second 163 """ 164 match = _INTERVAL_REGEX.match(str(value)) 165 if match is None: 166 raise ValueError("invalid time interval: '{}'".format(value)) 167 168 result = 0 169 resolution = None 170 for name, multiplier in [ 171 ("second", 1), 172 ("minute", 60), 173 ("hour", 60 * 60), 174 ("day", 60 * 60 * 24), 175 ("week", 60 * 60 * 24 * 7), 176 ]: 177 if match.group(name) is not None: 178 result += float(match.group(name)) * multiplier 179 if resolution is None: 180 resolution = multiplier 181 182 return result, resolution, match.group("modifier") 183 184 185def _parse_size(value): 186 scalar = value.strip() 187 188 if scalar.startswith(("-", "+")): 189 style = scalar[0] 190 scalar = scalar[1:] 191 else: 192 style = "=" 193 194 if scalar: 195 multiplier = { 196 "b": 2 ** 0, 197 "k": 2 ** 10, 198 "m": 2 ** 20, 199 "g": 2 ** 30, 200 "t": 2 ** 40, 201 }.get(scalar[-1].lower()) 202 if multiplier: 203 scalar = scalar[:-1].strip() 204 else: 205 multiplier = 1 206 else: 207 multiplier = 1 208 209 try: 210 num = int(scalar) * multiplier 211 except ValueError: 212 try: 213 num = int(float(scalar) * multiplier) 214 except ValueError: 215 raise ValueError('invalid size: "{}"'.format(value)) 216 217 if style == "-": 218 min_size = 0 219 max_size = num 220 elif style == "+": 221 min_size = num 222 max_size = sys.maxsize 223 else: 224 min_size = num 225 max_size = num + multiplier - 1 226 227 return min_size, max_size 228 229 230class Option: 231 """ 232 Abstract base class for all find options. 233 """ 234 235 def requires(self): 236 return _REQUIRES_PATH 237 238 239class NameOption(Option): 240 """ 241 Match files with a case-sensitive glob filename pattern. 242 Note: this is the 'basename' portion of a pathname. 243 The option name is 'name', e.g. {'name' : '*.txt'}. 244 """ 245 246 def __init__(self, key, value): 247 self.regex = re.compile( 248 value.replace(".", "\\.").replace("?", ".?").replace("*", ".*") + "$" 249 ) 250 251 def match(self, dirname, filename, fstat): 252 return self.regex.match(filename) 253 254 255class InameOption(Option): 256 """ 257 Match files with a case-insensitive glob filename pattern. 258 Note: this is the 'basename' portion of a pathname. 259 The option name is 'iname', e.g. {'iname' : '*.TXT'}. 260 """ 261 262 def __init__(self, key, value): 263 self.regex = re.compile( 264 value.replace(".", "\\.").replace("?", ".?").replace("*", ".*") + "$", 265 re.IGNORECASE, 266 ) 267 268 def match(self, dirname, filename, fstat): 269 return self.regex.match(filename) 270 271 272class RegexOption(Option): 273 """ 274 Match files with a case-sensitive regular expression. 275 Note: this is the 'basename' portion of a pathname. 276 The option name is 'regex', e.g. {'regex' : '.*\\.txt'}. 277 """ 278 279 def __init__(self, key, value): 280 try: 281 self.regex = re.compile(value) 282 except re.error: 283 raise ValueError('invalid regular expression: "{}"'.format(value)) 284 285 def match(self, dirname, filename, fstat): 286 return self.regex.match(filename) 287 288 289class IregexOption(Option): 290 """ 291 Match files with a case-insensitive regular expression. 292 Note: this is the 'basename' portion of a pathname. 293 The option name is 'iregex', e.g. {'iregex' : '.*\\.txt'}. 294 """ 295 296 def __init__(self, key, value): 297 try: 298 self.regex = re.compile(value, re.IGNORECASE) 299 except re.error: 300 raise ValueError('invalid regular expression: "{}"'.format(value)) 301 302 def match(self, dirname, filename, fstat): 303 return self.regex.match(filename) 304 305 306class TypeOption(Option): 307 """ 308 Match files by their file type(s). 309 The file type(s) are specified as an optionally comma and/or space 310 separated list of letters. 311 b = block device 312 c = character device 313 d = directory 314 f = regular (plain) file 315 l = symbolic link 316 p = FIFO (named pipe) 317 s = socket 318 The option name is 'type', e.g. {'type' : 'd'} or {'type' : 'bc'}. 319 """ 320 321 def __init__(self, key, value): 322 # remove whitespace and commas 323 value = "".join(value.strip().replace(",", "").split()) 324 self.ftypes = set() 325 for ftype in value: 326 try: 327 self.ftypes.add(_FILE_TYPES[ftype]) 328 except KeyError: 329 raise ValueError('invalid file type "{}"'.format(ftype)) 330 331 def requires(self): 332 return _REQUIRES_STAT 333 334 def match(self, dirname, filename, fstat): 335 return stat.S_IFMT(fstat[stat.ST_MODE]) in self.ftypes 336 337 338class OwnerOption(Option): 339 """ 340 Match files by their owner name(s) and/or uid(s), e.g. 'root'. 341 The names are a space and/or comma separated list of names and/or integers. 342 A match occurs when the file's uid matches any user specified. 343 The option name is 'owner', e.g. {'owner' : 'root'}. 344 """ 345 346 def __init__(self, key, value): 347 self.uids = set() 348 for name in value.replace(",", " ").split(): 349 if name.isdigit(): 350 self.uids.add(int(name)) 351 else: 352 try: 353 self.uids.add(pwd.getpwnam(value).pw_uid) 354 except KeyError: 355 raise ValueError('no such user "{}"'.format(name)) 356 357 def requires(self): 358 return _REQUIRES_STAT 359 360 def match(self, dirname, filename, fstat): 361 return fstat[stat.ST_UID] in self.uids 362 363 364class GroupOption(Option): 365 """ 366 Match files by their group name(s) and/or uid(s), e.g. 'admin'. 367 The names are a space and/or comma separated list of names and/or integers. 368 A match occurs when the file's gid matches any group specified. 369 The option name is 'group', e.g. {'group' : 'admin'}. 370 """ 371 372 def __init__(self, key, value): 373 self.gids = set() 374 for name in value.replace(",", " ").split(): 375 if name.isdigit(): 376 self.gids.add(int(name)) 377 else: 378 try: 379 self.gids.add(grp.getgrnam(name).gr_gid) 380 except KeyError: 381 raise ValueError('no such group "{}"'.format(name)) 382 383 def requires(self): 384 return _REQUIRES_STAT 385 386 def match(self, dirname, filename, fstat): 387 return fstat[stat.ST_GID] in self.gids 388 389 390class SizeOption(Option): 391 """ 392 Match files by their size. 393 Prefix the size with '-' to find files the specified size and smaller. 394 Prefix the size with '+' to find files the specified size and larger. 395 Without the +/- prefix, match the exact file size. 396 The size can be suffixed with (case-insensitive) suffixes: 397 b = bytes 398 k = kilobytes 399 m = megabytes 400 g = gigabytes 401 t = terabytes 402 The option name is 'size', e.g. {'size' : '+1G'}. 403 """ 404 405 def __init__(self, key, value): 406 self.min_size, self.max_size = _parse_size(value) 407 408 def requires(self): 409 return _REQUIRES_STAT 410 411 def match(self, dirname, filename, fstat): 412 return self.min_size <= fstat[stat.ST_SIZE] <= self.max_size 413 414 415class MtimeOption(Option): 416 """ 417 Match files modified since the specified time. 418 The option name is 'mtime', e.g. {'mtime' : '3d'}. 419 The value format is [<num>w] [<num>[d]] [<num>h] [<num>m] [<num>s] 420 where num is an integer or float and the case-insensitive suffixes are: 421 w = week 422 d = day 423 h = hour 424 m = minute 425 s = second 426 Whitespace is ignored in the value. 427 """ 428 429 def __init__(self, key, value): 430 secs, resolution, modifier = _parse_interval(value) 431 self.mtime = time.time() - int(secs / resolution) * resolution 432 self.modifier = modifier 433 434 def requires(self): 435 return _REQUIRES_STAT 436 437 def match(self, dirname, filename, fstat): 438 if self.modifier == "-": 439 return fstat[stat.ST_MTIME] >= self.mtime 440 else: 441 return fstat[stat.ST_MTIME] <= self.mtime 442 443 444class GrepOption(Option): 445 """Match files when a pattern occurs within the file. 446 The option name is 'grep', e.g. {'grep' : '(foo)|(bar}'}. 447 """ 448 449 def __init__(self, key, value): 450 try: 451 self.regex = re.compile(value) 452 except re.error: 453 raise ValueError('invalid regular expression: "{}"'.format(value)) 454 455 def requires(self): 456 return _REQUIRES_CONTENTS | _REQUIRES_STAT 457 458 def match(self, dirname, filename, fstat): 459 if not stat.S_ISREG(fstat[stat.ST_MODE]): 460 return None 461 dfilename = os.path.join(dirname, filename) 462 with BufferedReader(dfilename, mode="rb") as bread: 463 for chunk in bread: 464 if self.regex.search(chunk): 465 return dfilename 466 return None 467 468 469class PrintOption(Option): 470 """ 471 Return information about a matched file. 472 Print options are specified as a comma and/or space separated list of 473 one or more of the following: 474 group = group name 475 md5 = MD5 digest of file contents 476 mode = file mode (as integer) 477 mtime = last modification time (as time_t) 478 name = file basename 479 path = file absolute path 480 size = file size in bytes 481 type = file type 482 user = user name 483 """ 484 485 def __init__(self, key, value): 486 self.need_stat = False 487 self.print_title = False 488 self.fmt = [] 489 for arg in value.replace(",", " ").split(): 490 self.fmt.append(arg) 491 if arg not in ["name", "path"]: 492 self.need_stat = True 493 if not self.fmt: 494 self.fmt.append("path") 495 496 def requires(self): 497 return _REQUIRES_STAT if self.need_stat else _REQUIRES_PATH 498 499 def execute(self, fullpath, fstat, test=False): 500 result = [] 501 for arg in self.fmt: 502 if arg == "path": 503 result.append(fullpath) 504 elif arg == "name": 505 result.append(os.path.basename(fullpath)) 506 elif arg == "size": 507 result.append(fstat[stat.ST_SIZE]) 508 elif arg == "type": 509 result.append(_FILE_TYPES.get(stat.S_IFMT(fstat[stat.ST_MODE]), "?")) 510 elif arg == "mode": 511 # PY3 compatibility: Use radix value 8 on int type-cast explicitly 512 result.append(int(oct(fstat[stat.ST_MODE])[-3:], 8)) 513 elif arg == "mtime": 514 result.append(fstat[stat.ST_MTIME]) 515 elif arg == "user": 516 uid = fstat[stat.ST_UID] 517 try: 518 result.append(pwd.getpwuid(uid).pw_name) 519 except KeyError: 520 result.append(uid) 521 elif arg == "group": 522 gid = fstat[stat.ST_GID] 523 try: 524 result.append(grp.getgrgid(gid).gr_name) 525 except KeyError: 526 result.append(gid) 527 elif arg == "md5": 528 if stat.S_ISREG(fstat[stat.ST_MODE]): 529 md5digest = salt.utils.hashutils.get_hash(fullpath, "md5") 530 result.append(md5digest) 531 else: 532 result.append("") 533 534 if len(result) == 1: 535 return result[0] 536 else: 537 return result 538 539 540class DeleteOption(TypeOption): 541 """ 542 Deletes matched file. 543 Delete options are one or more of the following: 544 a: all file types 545 b: block device 546 c: character device 547 d: directory 548 p: FIFO (named pipe) 549 f: plain file 550 l: symlink 551 s: socket 552 """ 553 554 def __init__(self, key, value): 555 if "a" in value: 556 value = "bcdpfls" 557 super().__init__(key, value) 558 559 def execute(self, fullpath, fstat, test=False): 560 if test: 561 return fullpath 562 try: 563 if os.path.isfile(fullpath) or os.path.islink(fullpath): 564 os.remove(fullpath) 565 elif os.path.isdir(fullpath): 566 shutil.rmtree(fullpath) 567 except OSError as exc: 568 return None 569 return fullpath 570 571 572class ExecOption(Option): 573 """ 574 Execute the given command, {} replaced by filename. 575 Quote the {} if commands might include whitespace. 576 """ 577 578 def __init__(self, key, value): 579 self.command = value 580 581 def execute(self, fullpath, fstat, test=False): 582 try: 583 command = self.command.replace("{}", fullpath) 584 print(salt.utils.args.shlex_split(command)) 585 p = Popen(salt.utils.args.shlex_split(command), stdout=PIPE, stderr=PIPE) 586 (out, err) = p.communicate() 587 if err: 588 log.error( 589 "Error running command: %s\n\n%s", 590 command, 591 salt.utils.stringutils.to_str(err), 592 ) 593 return "{}:\n{}\n".format(command, salt.utils.stringutils.to_str(out)) 594 595 except Exception as e: # pylint: disable=broad-except 596 log.error('Exception while executing command "%s":\n\n%s', command, e) 597 return "{}: Failed".format(fullpath) 598 599 600class Finder: 601 def __init__(self, options): 602 self.actions = [] 603 self.maxdepth = None 604 self.mindepth = 0 605 self.test = False 606 criteria = { 607 _REQUIRES_PATH: list(), 608 _REQUIRES_STAT: list(), 609 _REQUIRES_CONTENTS: list(), 610 } 611 if "mindepth" in options: 612 self.mindepth = options["mindepth"] 613 del options["mindepth"] 614 if "maxdepth" in options: 615 self.maxdepth = options["maxdepth"] 616 del options["maxdepth"] 617 if "test" in options: 618 self.test = options["test"] 619 del options["test"] 620 for key, value in options.items(): 621 if key.startswith("_"): 622 # this is a passthrough object, continue 623 continue 624 if not value: 625 raise ValueError('missing value for "{}" option'.format(key)) 626 try: 627 obj = globals()[key.title() + "Option"](key, value) 628 except KeyError: 629 raise ValueError('invalid option "{}"'.format(key)) 630 if hasattr(obj, "match"): 631 requires = obj.requires() 632 if requires & _REQUIRES_CONTENTS: 633 criteria[_REQUIRES_CONTENTS].append(obj) 634 elif requires & _REQUIRES_STAT: 635 criteria[_REQUIRES_STAT].append(obj) 636 else: 637 criteria[_REQUIRES_PATH].append(obj) 638 if hasattr(obj, "execute"): 639 self.actions.append(obj) 640 if not self.actions: 641 self.actions.append(PrintOption("print", "")) 642 # order criteria so that least expensive checks are done first 643 self.criteria = ( 644 criteria[_REQUIRES_PATH] 645 + criteria[_REQUIRES_STAT] 646 + criteria[_REQUIRES_CONTENTS] 647 ) 648 649 def find(self, path): 650 """ 651 Generate filenames in path that satisfy criteria specified in 652 the constructor. 653 This method is a generator and should be repeatedly called 654 until there are no more results. 655 """ 656 if self.mindepth < 1: 657 dirpath, name = os.path.split(path) 658 match, fstat = self._check_criteria(dirpath, name, path) 659 if match: 660 yield from self._perform_actions(path, fstat=fstat) 661 662 for dirpath, dirs, files in salt.utils.path.os_walk(path): 663 relpath = os.path.relpath(dirpath, path) 664 depth = path_depth(relpath) + 1 665 if depth >= self.mindepth and ( 666 self.maxdepth is None or self.maxdepth >= depth 667 ): 668 for name in dirs + files: 669 fullpath = os.path.join(dirpath, name) 670 match, fstat = self._check_criteria(dirpath, name, fullpath) 671 if match: 672 yield from self._perform_actions(fullpath, fstat=fstat) 673 674 if self.maxdepth is not None and depth > self.maxdepth: 675 dirs[:] = [] 676 677 def _check_criteria(self, dirpath, name, fullpath, fstat=None): 678 match = True 679 for criterion in self.criteria: 680 if fstat is None and criterion.requires() & _REQUIRES_STAT: 681 try: 682 fstat = os.stat(fullpath) 683 except OSError: 684 fstat = os.lstat(fullpath) 685 if not criterion.match(dirpath, name, fstat): 686 match = False 687 break 688 return match, fstat 689 690 def _perform_actions(self, fullpath, fstat=None): 691 for action in self.actions: 692 if fstat is None and action.requires() & _REQUIRES_STAT: 693 try: 694 fstat = os.stat(fullpath) 695 except OSError: 696 fstat = os.lstat(fullpath) 697 result = action.execute(fullpath, fstat, test=self.test) 698 if result is not None: 699 yield result 700 701 702def path_depth(path): 703 depth = 0 704 head = path 705 while True: 706 head, tail = os.path.split(head) 707 if not tail and (not head or head in _PATH_DEPTH_IGNORED): 708 break 709 if tail and tail not in _PATH_DEPTH_IGNORED: 710 depth += 1 711 return depth 712 713 714def find(path, options): 715 """ 716 WRITEME 717 """ 718 finder = Finder(options) 719 for path in finder.find(path): 720 yield path 721 722 723def _main(): 724 if len(sys.argv) < 2: 725 sys.stderr.write("usage: {} path [options]\n".format(sys.argv[0])) 726 sys.exit(salt.defaults.exitcodes.EX_USAGE) 727 728 path = sys.argv[1] 729 criteria = {} 730 731 for arg in sys.argv[2:]: 732 key, value = arg.split("=") 733 criteria[key] = value 734 try: 735 finder = Finder(criteria) 736 except ValueError as ex: 737 sys.stderr.write("error: {}\n".format(ex)) 738 sys.exit(salt.defaults.exitcodes.EX_GENERIC) 739 740 for result in finder.find(path): 741 print(result) 742 743 744if __name__ == "__main__": 745 _main() 746