1"""
2Approximate the Unix find(1) command and return a list of paths that
3meet the specified criteria.
4
5The options include match criteria:
6    name    = file-glob                 # case sensitive
7    iname   = file-glob                 # case insensitive
8    regex   = file-regex                # case sensitive
9    iregex  = file-regex                # case insensitive
10    type    = file-types                # match any listed type
11    user    = users                     # match any listed user
12    group   = groups                    # match any listed group
13    size    = [+-]number[size-unit]     # default unit = byte
14    mtime   = interval                  # modified since date
15    grep    = regex                     # search file contents
16and/or actions:
17    delete [= file-types]               # default type = 'f'
18    exec    = command [arg ...]         # where {} is replaced by pathname
19    print  [= print-opts]
20and/or depth criteria:
21   maxdepth = maximum depth to transverse in path
22   mindepth = minimum depth to transverse before checking files or directories
23
24The default action is 'print=path'.
25
26file-glob:
27    *                = match zero or more chars
28    ?                = match any char
29    [abc]            = match a, b, or c
30    [!abc] or [^abc] = match anything except a, b, and c
31    [x-y]            = match chars x through y
32    [!x-y] or [^x-y] = match anything except chars x through y
33    {a,b,c}          = match a or b or c
34
35file-regex:
36    a Python re (regular expression) pattern
37
38file-types: a string of one or more of the following:
39    a: all file types
40    b: block device
41    c: character device
42    d: directory
43    p: FIFO (named pipe)
44    f: plain file
45    l: symlink
46    s: socket
47
48users:
49    a space and/or comma separated list of user names and/or uids
50
51groups:
52    a space and/or comma separated list of group names and/or gids
53
54size-unit:
55    b: bytes
56    k: kilobytes
57    m: megabytes
58    g: gigabytes
59    t: terabytes
60
61interval:
62    [<num>w] [<num>[d]] [<num>h] [<num>m] [<num>s]
63
64    where:
65        w: week
66        d: day
67        h: hour
68        m: minute
69        s: second
70
71print-opts: a comma and/or space separated list of one or more of
72the following:
73
74    group: group name
75    md5:   MD5 digest of file contents
76    mode:  file permissions (as as integer)
77    mtime: last modification time (as time_t)
78    name:  file basename
79    path:  file absolute path
80    size:  file size in bytes
81    type:  file type
82    user:  user name
83"""
84
85
86import logging
87import os
88import re
89import shutil
90import stat
91import sys
92import time
93from subprocess import PIPE, Popen
94
95import salt.defaults.exitcodes
96import salt.utils.args
97import salt.utils.hashutils
98import salt.utils.path
99import salt.utils.stringutils
100from salt.utils.filebuffer import BufferedReader
101
102try:
103    import grp
104    import pwd
105
106    # TODO: grp and pwd are both used in the code, we better make sure that
107    # that code never gets run if importing them does not succeed
108except ImportError:
109    pass
110
111
112# Set up logger
113log = logging.getLogger(__name__)
114
115_REQUIRES_PATH = 1
116_REQUIRES_STAT = 2
117_REQUIRES_CONTENTS = 4
118
119_FILE_TYPES = {
120    "b": stat.S_IFBLK,
121    "c": stat.S_IFCHR,
122    "d": stat.S_IFDIR,
123    "f": stat.S_IFREG,
124    "l": stat.S_IFLNK,
125    "p": stat.S_IFIFO,
126    "s": stat.S_IFSOCK,
127    stat.S_IFBLK: "b",
128    stat.S_IFCHR: "c",
129    stat.S_IFDIR: "d",
130    stat.S_IFREG: "f",
131    stat.S_IFLNK: "l",
132    stat.S_IFIFO: "p",
133    stat.S_IFSOCK: "s",
134}
135
136_INTERVAL_REGEX = re.compile(
137    r"""
138                             ^\s*
139                             (?P<modifier>[+-]?)
140                             (?: (?P<week>   \d+ (?:\.\d*)? ) \s* [wW]  )? \s*
141                             (?: (?P<day>    \d+ (?:\.\d*)? ) \s* [dD]  )? \s*
142                             (?: (?P<hour>   \d+ (?:\.\d*)? ) \s* [hH]  )? \s*
143                             (?: (?P<minute> \d+ (?:\.\d*)? ) \s* [mM]  )? \s*
144                             (?: (?P<second> \d+ (?:\.\d*)? ) \s* [sS]  )? \s*
145                             $
146                             """,
147    flags=re.VERBOSE,
148)
149
150_PATH_DEPTH_IGNORED = (os.path.sep, os.path.curdir, os.path.pardir)
151
152
153def _parse_interval(value):
154    """
155    Convert an interval string like 1w3d6h into the number of seconds, time
156    resolution (1 unit of the smallest specified time unit) and the modifier(
157    '+', '-', or '').
158        w = week
159        d = day
160        h = hour
161        m = minute
162        s = second
163    """
164    match = _INTERVAL_REGEX.match(str(value))
165    if match is None:
166        raise ValueError("invalid time interval: '{}'".format(value))
167
168    result = 0
169    resolution = None
170    for name, multiplier in [
171        ("second", 1),
172        ("minute", 60),
173        ("hour", 60 * 60),
174        ("day", 60 * 60 * 24),
175        ("week", 60 * 60 * 24 * 7),
176    ]:
177        if match.group(name) is not None:
178            result += float(match.group(name)) * multiplier
179            if resolution is None:
180                resolution = multiplier
181
182    return result, resolution, match.group("modifier")
183
184
185def _parse_size(value):
186    scalar = value.strip()
187
188    if scalar.startswith(("-", "+")):
189        style = scalar[0]
190        scalar = scalar[1:]
191    else:
192        style = "="
193
194    if scalar:
195        multiplier = {
196            "b": 2 ** 0,
197            "k": 2 ** 10,
198            "m": 2 ** 20,
199            "g": 2 ** 30,
200            "t": 2 ** 40,
201        }.get(scalar[-1].lower())
202        if multiplier:
203            scalar = scalar[:-1].strip()
204        else:
205            multiplier = 1
206    else:
207        multiplier = 1
208
209    try:
210        num = int(scalar) * multiplier
211    except ValueError:
212        try:
213            num = int(float(scalar) * multiplier)
214        except ValueError:
215            raise ValueError('invalid size: "{}"'.format(value))
216
217    if style == "-":
218        min_size = 0
219        max_size = num
220    elif style == "+":
221        min_size = num
222        max_size = sys.maxsize
223    else:
224        min_size = num
225        max_size = num + multiplier - 1
226
227    return min_size, max_size
228
229
230class Option:
231    """
232    Abstract base class for all find options.
233    """
234
235    def requires(self):
236        return _REQUIRES_PATH
237
238
239class NameOption(Option):
240    """
241    Match files with a case-sensitive glob filename pattern.
242    Note: this is the 'basename' portion of a pathname.
243    The option name is 'name', e.g. {'name' : '*.txt'}.
244    """
245
246    def __init__(self, key, value):
247        self.regex = re.compile(
248            value.replace(".", "\\.").replace("?", ".?").replace("*", ".*") + "$"
249        )
250
251    def match(self, dirname, filename, fstat):
252        return self.regex.match(filename)
253
254
255class InameOption(Option):
256    """
257    Match files with a case-insensitive glob filename pattern.
258    Note: this is the 'basename' portion of a pathname.
259    The option name is 'iname', e.g. {'iname' : '*.TXT'}.
260    """
261
262    def __init__(self, key, value):
263        self.regex = re.compile(
264            value.replace(".", "\\.").replace("?", ".?").replace("*", ".*") + "$",
265            re.IGNORECASE,
266        )
267
268    def match(self, dirname, filename, fstat):
269        return self.regex.match(filename)
270
271
272class RegexOption(Option):
273    """
274    Match files with a case-sensitive regular expression.
275    Note: this is the 'basename' portion of a pathname.
276    The option name is 'regex', e.g. {'regex' : '.*\\.txt'}.
277    """
278
279    def __init__(self, key, value):
280        try:
281            self.regex = re.compile(value)
282        except re.error:
283            raise ValueError('invalid regular expression: "{}"'.format(value))
284
285    def match(self, dirname, filename, fstat):
286        return self.regex.match(filename)
287
288
289class IregexOption(Option):
290    """
291    Match files with a case-insensitive regular expression.
292    Note: this is the 'basename' portion of a pathname.
293    The option name is 'iregex', e.g. {'iregex' : '.*\\.txt'}.
294    """
295
296    def __init__(self, key, value):
297        try:
298            self.regex = re.compile(value, re.IGNORECASE)
299        except re.error:
300            raise ValueError('invalid regular expression: "{}"'.format(value))
301
302    def match(self, dirname, filename, fstat):
303        return self.regex.match(filename)
304
305
306class TypeOption(Option):
307    """
308    Match files by their file type(s).
309    The file type(s) are specified as an optionally comma and/or space
310    separated list of letters.
311        b = block device
312        c = character device
313        d = directory
314        f = regular (plain) file
315        l = symbolic link
316        p = FIFO (named pipe)
317        s = socket
318    The option name is 'type', e.g. {'type' : 'd'} or {'type' : 'bc'}.
319    """
320
321    def __init__(self, key, value):
322        # remove whitespace and commas
323        value = "".join(value.strip().replace(",", "").split())
324        self.ftypes = set()
325        for ftype in value:
326            try:
327                self.ftypes.add(_FILE_TYPES[ftype])
328            except KeyError:
329                raise ValueError('invalid file type "{}"'.format(ftype))
330
331    def requires(self):
332        return _REQUIRES_STAT
333
334    def match(self, dirname, filename, fstat):
335        return stat.S_IFMT(fstat[stat.ST_MODE]) in self.ftypes
336
337
338class OwnerOption(Option):
339    """
340    Match files by their owner name(s) and/or uid(s), e.g. 'root'.
341    The names are a space and/or comma separated list of names and/or integers.
342    A match occurs when the file's uid matches any user specified.
343    The option name is 'owner', e.g. {'owner' : 'root'}.
344    """
345
346    def __init__(self, key, value):
347        self.uids = set()
348        for name in value.replace(",", " ").split():
349            if name.isdigit():
350                self.uids.add(int(name))
351            else:
352                try:
353                    self.uids.add(pwd.getpwnam(value).pw_uid)
354                except KeyError:
355                    raise ValueError('no such user "{}"'.format(name))
356
357    def requires(self):
358        return _REQUIRES_STAT
359
360    def match(self, dirname, filename, fstat):
361        return fstat[stat.ST_UID] in self.uids
362
363
364class GroupOption(Option):
365    """
366    Match files by their group name(s) and/or uid(s), e.g. 'admin'.
367    The names are a space and/or comma separated list of names and/or integers.
368    A match occurs when the file's gid matches any group specified.
369    The option name is 'group', e.g. {'group' : 'admin'}.
370    """
371
372    def __init__(self, key, value):
373        self.gids = set()
374        for name in value.replace(",", " ").split():
375            if name.isdigit():
376                self.gids.add(int(name))
377            else:
378                try:
379                    self.gids.add(grp.getgrnam(name).gr_gid)
380                except KeyError:
381                    raise ValueError('no such group "{}"'.format(name))
382
383    def requires(self):
384        return _REQUIRES_STAT
385
386    def match(self, dirname, filename, fstat):
387        return fstat[stat.ST_GID] in self.gids
388
389
390class SizeOption(Option):
391    """
392    Match files by their size.
393    Prefix the size with '-' to find files the specified size and smaller.
394    Prefix the size with '+' to find files the specified size and larger.
395    Without the +/- prefix, match the exact file size.
396    The size can be suffixed with (case-insensitive) suffixes:
397        b = bytes
398        k = kilobytes
399        m = megabytes
400        g = gigabytes
401        t = terabytes
402    The option name is 'size', e.g. {'size' : '+1G'}.
403    """
404
405    def __init__(self, key, value):
406        self.min_size, self.max_size = _parse_size(value)
407
408    def requires(self):
409        return _REQUIRES_STAT
410
411    def match(self, dirname, filename, fstat):
412        return self.min_size <= fstat[stat.ST_SIZE] <= self.max_size
413
414
415class MtimeOption(Option):
416    """
417    Match files modified since the specified time.
418    The option name is 'mtime', e.g. {'mtime' : '3d'}.
419    The value format is [<num>w] [<num>[d]] [<num>h] [<num>m] [<num>s]
420    where num is an integer or float and the case-insensitive suffixes are:
421        w = week
422        d = day
423        h = hour
424        m = minute
425        s = second
426    Whitespace is ignored in the value.
427    """
428
429    def __init__(self, key, value):
430        secs, resolution, modifier = _parse_interval(value)
431        self.mtime = time.time() - int(secs / resolution) * resolution
432        self.modifier = modifier
433
434    def requires(self):
435        return _REQUIRES_STAT
436
437    def match(self, dirname, filename, fstat):
438        if self.modifier == "-":
439            return fstat[stat.ST_MTIME] >= self.mtime
440        else:
441            return fstat[stat.ST_MTIME] <= self.mtime
442
443
444class GrepOption(Option):
445    """Match files when a pattern occurs within the file.
446    The option name is 'grep', e.g. {'grep' : '(foo)|(bar}'}.
447    """
448
449    def __init__(self, key, value):
450        try:
451            self.regex = re.compile(value)
452        except re.error:
453            raise ValueError('invalid regular expression: "{}"'.format(value))
454
455    def requires(self):
456        return _REQUIRES_CONTENTS | _REQUIRES_STAT
457
458    def match(self, dirname, filename, fstat):
459        if not stat.S_ISREG(fstat[stat.ST_MODE]):
460            return None
461        dfilename = os.path.join(dirname, filename)
462        with BufferedReader(dfilename, mode="rb") as bread:
463            for chunk in bread:
464                if self.regex.search(chunk):
465                    return dfilename
466        return None
467
468
469class PrintOption(Option):
470    """
471    Return information about a matched file.
472    Print options are specified as a comma and/or space separated list of
473    one or more of the following:
474        group  = group name
475        md5    = MD5 digest of file contents
476        mode   = file mode (as integer)
477        mtime  = last modification time (as time_t)
478        name   = file basename
479        path   = file absolute path
480        size   = file size in bytes
481        type   = file type
482        user   = user name
483    """
484
485    def __init__(self, key, value):
486        self.need_stat = False
487        self.print_title = False
488        self.fmt = []
489        for arg in value.replace(",", " ").split():
490            self.fmt.append(arg)
491            if arg not in ["name", "path"]:
492                self.need_stat = True
493        if not self.fmt:
494            self.fmt.append("path")
495
496    def requires(self):
497        return _REQUIRES_STAT if self.need_stat else _REQUIRES_PATH
498
499    def execute(self, fullpath, fstat, test=False):
500        result = []
501        for arg in self.fmt:
502            if arg == "path":
503                result.append(fullpath)
504            elif arg == "name":
505                result.append(os.path.basename(fullpath))
506            elif arg == "size":
507                result.append(fstat[stat.ST_SIZE])
508            elif arg == "type":
509                result.append(_FILE_TYPES.get(stat.S_IFMT(fstat[stat.ST_MODE]), "?"))
510            elif arg == "mode":
511                # PY3 compatibility: Use radix value 8 on int type-cast explicitly
512                result.append(int(oct(fstat[stat.ST_MODE])[-3:], 8))
513            elif arg == "mtime":
514                result.append(fstat[stat.ST_MTIME])
515            elif arg == "user":
516                uid = fstat[stat.ST_UID]
517                try:
518                    result.append(pwd.getpwuid(uid).pw_name)
519                except KeyError:
520                    result.append(uid)
521            elif arg == "group":
522                gid = fstat[stat.ST_GID]
523                try:
524                    result.append(grp.getgrgid(gid).gr_name)
525                except KeyError:
526                    result.append(gid)
527            elif arg == "md5":
528                if stat.S_ISREG(fstat[stat.ST_MODE]):
529                    md5digest = salt.utils.hashutils.get_hash(fullpath, "md5")
530                    result.append(md5digest)
531                else:
532                    result.append("")
533
534        if len(result) == 1:
535            return result[0]
536        else:
537            return result
538
539
540class DeleteOption(TypeOption):
541    """
542    Deletes matched file.
543    Delete options are one or more of the following:
544        a: all file types
545        b: block device
546        c: character device
547        d: directory
548        p: FIFO (named pipe)
549        f: plain file
550        l: symlink
551        s: socket
552    """
553
554    def __init__(self, key, value):
555        if "a" in value:
556            value = "bcdpfls"
557        super().__init__(key, value)
558
559    def execute(self, fullpath, fstat, test=False):
560        if test:
561            return fullpath
562        try:
563            if os.path.isfile(fullpath) or os.path.islink(fullpath):
564                os.remove(fullpath)
565            elif os.path.isdir(fullpath):
566                shutil.rmtree(fullpath)
567        except OSError as exc:
568            return None
569        return fullpath
570
571
572class ExecOption(Option):
573    """
574    Execute the given command, {} replaced by filename.
575    Quote the {} if commands might include whitespace.
576    """
577
578    def __init__(self, key, value):
579        self.command = value
580
581    def execute(self, fullpath, fstat, test=False):
582        try:
583            command = self.command.replace("{}", fullpath)
584            print(salt.utils.args.shlex_split(command))
585            p = Popen(salt.utils.args.shlex_split(command), stdout=PIPE, stderr=PIPE)
586            (out, err) = p.communicate()
587            if err:
588                log.error(
589                    "Error running command: %s\n\n%s",
590                    command,
591                    salt.utils.stringutils.to_str(err),
592                )
593            return "{}:\n{}\n".format(command, salt.utils.stringutils.to_str(out))
594
595        except Exception as e:  # pylint: disable=broad-except
596            log.error('Exception while executing command "%s":\n\n%s', command, e)
597            return "{}: Failed".format(fullpath)
598
599
600class Finder:
601    def __init__(self, options):
602        self.actions = []
603        self.maxdepth = None
604        self.mindepth = 0
605        self.test = False
606        criteria = {
607            _REQUIRES_PATH: list(),
608            _REQUIRES_STAT: list(),
609            _REQUIRES_CONTENTS: list(),
610        }
611        if "mindepth" in options:
612            self.mindepth = options["mindepth"]
613            del options["mindepth"]
614        if "maxdepth" in options:
615            self.maxdepth = options["maxdepth"]
616            del options["maxdepth"]
617        if "test" in options:
618            self.test = options["test"]
619            del options["test"]
620        for key, value in options.items():
621            if key.startswith("_"):
622                # this is a passthrough object, continue
623                continue
624            if not value:
625                raise ValueError('missing value for "{}" option'.format(key))
626            try:
627                obj = globals()[key.title() + "Option"](key, value)
628            except KeyError:
629                raise ValueError('invalid option "{}"'.format(key))
630            if hasattr(obj, "match"):
631                requires = obj.requires()
632                if requires & _REQUIRES_CONTENTS:
633                    criteria[_REQUIRES_CONTENTS].append(obj)
634                elif requires & _REQUIRES_STAT:
635                    criteria[_REQUIRES_STAT].append(obj)
636                else:
637                    criteria[_REQUIRES_PATH].append(obj)
638            if hasattr(obj, "execute"):
639                self.actions.append(obj)
640        if not self.actions:
641            self.actions.append(PrintOption("print", ""))
642        # order criteria so that least expensive checks are done first
643        self.criteria = (
644            criteria[_REQUIRES_PATH]
645            + criteria[_REQUIRES_STAT]
646            + criteria[_REQUIRES_CONTENTS]
647        )
648
649    def find(self, path):
650        """
651        Generate filenames in path that satisfy criteria specified in
652        the constructor.
653        This method is a generator and should be repeatedly called
654        until there are no more results.
655        """
656        if self.mindepth < 1:
657            dirpath, name = os.path.split(path)
658            match, fstat = self._check_criteria(dirpath, name, path)
659            if match:
660                yield from self._perform_actions(path, fstat=fstat)
661
662        for dirpath, dirs, files in salt.utils.path.os_walk(path):
663            relpath = os.path.relpath(dirpath, path)
664            depth = path_depth(relpath) + 1
665            if depth >= self.mindepth and (
666                self.maxdepth is None or self.maxdepth >= depth
667            ):
668                for name in dirs + files:
669                    fullpath = os.path.join(dirpath, name)
670                    match, fstat = self._check_criteria(dirpath, name, fullpath)
671                    if match:
672                        yield from self._perform_actions(fullpath, fstat=fstat)
673
674            if self.maxdepth is not None and depth > self.maxdepth:
675                dirs[:] = []
676
677    def _check_criteria(self, dirpath, name, fullpath, fstat=None):
678        match = True
679        for criterion in self.criteria:
680            if fstat is None and criterion.requires() & _REQUIRES_STAT:
681                try:
682                    fstat = os.stat(fullpath)
683                except OSError:
684                    fstat = os.lstat(fullpath)
685            if not criterion.match(dirpath, name, fstat):
686                match = False
687                break
688        return match, fstat
689
690    def _perform_actions(self, fullpath, fstat=None):
691        for action in self.actions:
692            if fstat is None and action.requires() & _REQUIRES_STAT:
693                try:
694                    fstat = os.stat(fullpath)
695                except OSError:
696                    fstat = os.lstat(fullpath)
697            result = action.execute(fullpath, fstat, test=self.test)
698            if result is not None:
699                yield result
700
701
702def path_depth(path):
703    depth = 0
704    head = path
705    while True:
706        head, tail = os.path.split(head)
707        if not tail and (not head or head in _PATH_DEPTH_IGNORED):
708            break
709        if tail and tail not in _PATH_DEPTH_IGNORED:
710            depth += 1
711    return depth
712
713
714def find(path, options):
715    """
716    WRITEME
717    """
718    finder = Finder(options)
719    for path in finder.find(path):
720        yield path
721
722
723def _main():
724    if len(sys.argv) < 2:
725        sys.stderr.write("usage: {} path [options]\n".format(sys.argv[0]))
726        sys.exit(salt.defaults.exitcodes.EX_USAGE)
727
728    path = sys.argv[1]
729    criteria = {}
730
731    for arg in sys.argv[2:]:
732        key, value = arg.split("=")
733        criteria[key] = value
734    try:
735        finder = Finder(criteria)
736    except ValueError as ex:
737        sys.stderr.write("error: {}\n".format(ex))
738        sys.exit(salt.defaults.exitcodes.EX_GENERIC)
739
740    for result in finder.find(path):
741        print(result)
742
743
744if __name__ == "__main__":
745    _main()
746