1"""Helpers for other modules."""
2
3from __future__ import unicode_literals
4
5import yaml
6from dvc.utils.compat import str, builtin_str, open, cast_bytes_py2
7
8import os
9import sys
10import stat
11import math
12import json
13import shutil
14import hashlib
15import nanotime
16import time
17
18LOCAL_CHUNK_SIZE = 1024 * 1024
19LARGE_FILE_SIZE = 1024 * 1024 * 1024
20LARGE_DIR_SIZE = 100
21
22
23def dos2unix(data):
24    return data.replace(b"\r\n", b"\n")
25
26
27def file_md5(fname):
28    """ get the (md5 hexdigest, md5 digest) of a file """
29    import dvc.logger as logger
30    from dvc.progress import progress
31    from dvc.istextfile import istextfile
32
33    if os.path.exists(fname):
34        hash_md5 = hashlib.md5()
35        binary = not istextfile(fname)
36        size = os.path.getsize(fname)
37        bar = False
38        if size >= LARGE_FILE_SIZE:
39            bar = True
40            msg = "Computing md5 for a large file {}. This is only done once."
41            logger.info(msg.format(os.path.relpath(fname)))
42            name = os.path.relpath(fname)
43            total = 0
44
45        with open(fname, "rb") as fobj:
46            while True:
47                data = fobj.read(LOCAL_CHUNK_SIZE)
48                if not data:
49                    break
50
51                if bar:
52                    total += len(data)
53                    progress.update_target(name, total, size)
54
55                if binary:
56                    chunk = data
57                else:
58                    chunk = dos2unix(data)
59
60                hash_md5.update(chunk)
61
62        if bar:
63            progress.finish_target(name)
64
65        return (hash_md5.hexdigest(), hash_md5.digest())
66    else:
67        return (None, None)
68
69
70def bytes_md5(byts):
71    hasher = hashlib.md5()
72    hasher.update(byts)
73    return hasher.hexdigest()
74
75
76def dict_filter(d, exclude=[]):
77    """
78    Exclude specified keys from a nested dict
79    """
80
81    if isinstance(d, list):
82        ret = []
83        for e in d:
84            ret.append(dict_filter(e, exclude))
85        return ret
86    elif isinstance(d, dict):
87        ret = {}
88        for k, v in d.items():
89            if isinstance(k, builtin_str):
90                k = str(k)
91
92            assert isinstance(k, str)
93            if k in exclude:
94                continue
95            ret[k] = dict_filter(v, exclude)
96        return ret
97
98    return d
99
100
101def dict_md5(d, exclude=[]):
102    filtered = dict_filter(d, exclude)
103    byts = json.dumps(filtered, sort_keys=True).encode("utf-8")
104    return bytes_md5(byts)
105
106
107def copyfile(src, dest, no_progress_bar=False, name=None):
108    """Copy file with progress bar"""
109    from dvc.progress import progress
110
111    copied = 0
112    name = name if name else os.path.basename(dest)
113    total = os.stat(src).st_size
114
115    fsrc = open(src, "rb")
116
117    if os.path.isdir(dest):
118        fdest = open(os.path.join(dest, os.path.basename(src)), "wb+")
119    else:
120        fdest = open(dest, "wb+")
121
122    while True:
123        buf = fsrc.read(LOCAL_CHUNK_SIZE)
124        if not buf:
125            break
126        fdest.write(buf)
127        copied += len(buf)
128        if not no_progress_bar:
129            progress.update_target(name, copied, total)
130
131    if not no_progress_bar:
132        progress.finish_target(name)
133
134    fsrc.close()
135    fdest.close()
136
137
138def move(src, dst):
139    dst = os.path.abspath(dst)
140    dname = os.path.dirname(dst)
141    if not os.path.exists(dname):
142        os.makedirs(dname)
143
144    if os.path.islink(src):
145        shutil.copy(os.readlink(src), dst)
146        os.unlink(src)
147        return
148
149    shutil.move(src, dst)
150
151
152def remove(path):
153    import dvc.logger as logger
154
155    if not os.path.exists(path):
156        return
157
158    logger.debug("Removing '{}'".format(os.path.relpath(path)))
159
160    def _chmod(func, p, excinfo):
161        perm = os.stat(p).st_mode
162        perm |= stat.S_IWRITE
163        os.chmod(p, perm)
164        func(p)
165
166    if os.path.isfile(path):
167        _chmod(os.unlink, path, None)
168    else:
169        shutil.rmtree(path, onerror=_chmod)
170
171
172def to_chunks(l, jobs):
173    n = int(math.ceil(len(l) / jobs))
174
175    if len(l) == 1:
176        return [l]
177
178    if n == 0:
179        n = 1
180
181    return [l[x : x + n] for x in range(0, len(l), n)]
182
183
184# NOTE: Check if we are in a bundle
185# https://pythonhosted.org/PyInstaller/runtime-information.html
186def is_binary():
187    return getattr(sys, "frozen", False)
188
189
190# NOTE: Fix env variables modified by PyInstaller
191# http://pyinstaller.readthedocs.io/en/stable/runtime-information.html
192def fix_env(env=None):
193    if env is None:
194        env = os.environ.copy()
195    else:
196        env = env.copy()
197
198    if is_binary():
199        lp_key = "LD_LIBRARY_PATH"
200        lp_orig = env.get(lp_key + "_ORIG", None)
201        if lp_orig is not None:
202            # NOTE: py2 doesn't like unicode strings in environ
203            env[cast_bytes_py2(lp_key)] = cast_bytes_py2(lp_orig)
204        else:
205            env.pop(lp_key, None)
206
207    return env
208
209
210def convert_to_unicode(data):
211    if isinstance(data, builtin_str):
212        return str(data)
213    elif isinstance(data, dict):
214        return dict(map(convert_to_unicode, data.items()))
215    elif isinstance(data, list) or isinstance(data, tuple):
216        return type(data)(map(convert_to_unicode, data))
217    else:
218        return data
219
220
221def tmp_fname(fname):
222    """ Temporary name for a partial download """
223    from uuid import uuid4
224
225    return fname + "." + str(uuid4()) + ".tmp"
226
227
228def current_timestamp():
229    return int(nanotime.timestamp(time.time()))
230
231
232def load_stage_file(path):
233    with open(path, "r") as fobj:
234        return yaml.safe_load(fobj) or {}
235