1#!/usr/bin/env python
2###############################################################################
3#                                                                             #
4#    manifestManager.py                                                       #
5#                                                                             #
6#    Work with online data manifests (creating / syncing / validating)        #
7#                                                                             #
8#    Copyright (C) Michael Imelfort                                           #
9#                                                                             #
10###############################################################################
11#                                                                             #
12#    This program is free software: you can redistribute it and/or modify     #
13#    it under the terms of the GNU General Public License as published by     #
14#    the Free Software Foundation, either version 3 of the License, or        #
15#    (at your option) any later version.                                      #
16#                                                                             #
17#    This program is distributed in the hope that it will be useful,          #
18#    but WITHOUT ANY WARRANTY; without even the implied warranty of           #
19#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            #
20#    GNU General Public License for more details.                             #
21#                                                                             #
22#    You should have received a copy of the GNU General Public License        #
23#    along with this program. If not, see <http://www.gnu.org/licenses/>.     #
24#                                                                             #
25###############################################################################
26
27__author__ = "Michael Imelfort"
28__copyright__ = "Copyright 2014"
29__credits__ = ["Michael Imelfort"]
30__license__ = "GPLv3"
31__maintainer__ = "Michael Imelfort"
32__email__ = "mike@mikeimelfort.com"
33__version__ = "0.2.3"
34
35###############################################################################
36###############################################################################
37###############################################################################
38###############################################################################
39
40__MANIFEST__ = ".dmanifest"
41
42###############################################################################
43###############################################################################
44###############################################################################
45###############################################################################
46
47# system includes
48import os
49import hashlib
50import urllib2
51import urllib
52import shutil
53import errno
54
55# local includes
56from checkm.fileEntity import FileEntity as FE
57
58###############################################################################
59###############################################################################
60###############################################################################
61###############################################################################
62
63class ManifestManager(object):
64    """Use this interface for storing and managing file and paths"""
65    def __init__(self, manType=None, timeout=30):
66        self.timeout = timeout
67
68        self.files = []
69        if manType is not None:
70            self.type = manType
71        else:
72            self.type = "generic"
73
74    def createManifest(self, path, manifestName=None):
75        """inventory all files in path and create a manifest file"""
76        if manifestName is None:
77            manifestName = __MANIFEST__
78        # make the root file entity
79        root_path = os.path.abspath(path)
80        root_fe = FE('root', ".", None, "-", 0)
81        self.files.append(root_fe)
82        # now make all the ones below
83        parents = [root_fe]
84        dirs, files = self.listdir(path)[:2]
85        self.walk(parents, root_path, '', dirs, files, skipFile=manifestName)
86
87        with open(os.path.join(path, manifestName), 'w') as man_fh:
88            # print the header
89            man_fh.write("##%s##\tData manifest created by ScreamingBackpack version %s\n" % (self.type, __version__))
90            for f in self.files:
91                if f.parent is not None:
92                    man_fh.write("%s\n" % f)
93
94    def diffManifests(self,
95                      localManifestLocation,
96                      sourceManifestLocation,
97                      localManifestName=None,
98                      sourceManifestName=None,
99                      printDiffs=False):
100        """check for any differences between two manifests
101
102        if remote is true then sourceManifestLocation is a URL
103        returns a list of files that need to be updated
104        """
105        if localManifestName is None:
106            localManifestName = __MANIFEST__
107        if sourceManifestName is None:
108            sourceManifestName = __MANIFEST__
109
110        # get the "type" of the local manifest
111        l_type = "generic"
112        with open(os.path.join(localManifestLocation, localManifestName)) as l_man:
113            for line in l_man:
114                if line[0] == "#":
115                    l_type = self.getManType(line)
116                break
117
118        # load the source manifest
119        s_type = "generic"
120        source_man = {}
121        source = ""
122        # first we assume it is remote
123        try:
124            s_man = urllib2.urlopen(sourceManifestLocation + "/" + sourceManifestName, None, self.timeout)
125            source = sourceManifestLocation + "/"
126        except ValueError:
127            # then it is probably a file
128            s_man = open(os.path.join(sourceManifestLocation, sourceManifestName))
129            source = os.path.join(sourceManifestLocation) + os.path.sep
130        except urllib2.URLError:
131            # problems connecting to server, perhaps user is behind a proxy or firewall
132            print "Error: failed to connect to server."
133            return (None, None, None, None, None)
134
135        first_line = True
136        for line in s_man:
137            if first_line:
138                first_line = False
139                if line[0] == "#":
140                    # get the type of the manifest
141                    s_type = self.getManType(line)
142                    if s_type != l_type:
143                        print "Error: type of source manifest (%s) does not match type of local manifest (%s)" % (s_type, l_type)
144                        return (None, None, None, None, None)
145                else:
146                    # no type specified
147                    print "Error: type of source manifest is not specified. Is this a valid manifest file?"
148                    return (None, None, None, None, None)
149
150                self.type = l_type
151            if line[0] != "#":
152                fields = line.rstrip().split("\t")
153                # set the dict up as {path => [hash, size, seenLocal]
154                source_man[fields[0]] = [fields[1], fields[2], False]
155
156        # keep lists of modifications
157        deleted = []
158        addedDirs = []
159        addedFiles = []
160        modified = []
161
162        with open(os.path.join(localManifestLocation, localManifestName)) as l_man:
163            for line in l_man:
164                if line[0] != "#":
165                    fields = line.rstrip().split("\t")
166                    try:
167                        if source_man[fields[0]][0] != fields[1]:
168                            # hashes don't match
169                            modified.append(fields[0])
170                        # seen this file
171                        source_man[fields[0]][2] = True
172                    except KeyError:
173                        # this file has been deleted from the source manifest
174                        deleted.append(fields[0])
175
176        # check for new files
177        for f in source_man.keys():
178            if source_man[f][2] == False:
179                if source_man[f][0] == '-':
180                    addedDirs.append(f)
181                else:
182                    addedFiles.append(f)
183
184        if printDiffs:
185            new_size = 0
186            modified_size = 0
187            for f in addedFiles:
188                new_size += int(source_man[f][1])
189            for f in modified:
190                modified_size += int(source_man[f][1])
191
192            if len(addedFiles) > 0:
193                print "#------------------------------------------------------"
194                print "# Source contains %d new file(s) (%s)" % (len(addedFiles), self.formatData(new_size))
195                for f in addedFiles:
196                    print "\t".join([self.formatData(int(source_man[f][1])), f])
197
198            if len(addedDirs) > 0:
199                print "#------------------------------------------------------"
200                print "# Source contains %d new folders(s)" % (len(addedDirs))
201                for f in addedDirs:
202                    print f
203
204            if len(modified) > 0:
205                print "#------------------------------------------------------"
206                print "# Source contains %d modified file(s) (%s)" % (len(modified), self.formatData(modified_size))
207                for f in modified:
208                    print f
209
210            if len(deleted) > 0:
211                print "#------------------------------------------------------"
212                print "# %d files have been deleted in the source:" % len(deleted)
213                for f in deleted:
214                    print f
215        else:
216            return (source,
217                    [(a, source_man[a]) for a in addedFiles],
218                    [(a, source_man[a]) for a in addedDirs],
219                    deleted,
220                    [(m, source_man[m]) for m in modified])
221
222
223    def updateManifest(self,
224                       localManifestLocation,
225                       sourceManifestLocation,
226                       localManifestName=None,
227                       sourceManifestName=None,
228                       prompt=True):
229        """Update local files based on remote changes"""
230        # get the diffs
231        source, added_files, added_dirs, deleted, modified = self.diffManifests(localManifestLocation,
232                                                                                sourceManifestLocation,
233                                                                                localManifestName,
234                                                                                sourceManifestName)
235        # bail if the diff failed
236        if source is None:
237            return False
238
239        # no changes by default
240        do_down = False
241        if prompt:
242            total_size = 0
243            for f in added_files:
244                total_size += int(f[1][1])
245            for f in modified:
246                total_size += int(f[1][1])
247            if total_size != 0:
248                print "****************************************************************"
249                print "%d new file(s) to be downloaded from source" % len(added_files)
250                print "%d existing file(s) to be updated" % len(modified)
251                print "%s will need to be downloaded" % self.formatData(total_size)
252                do_down = self.promptUserDownload()
253                if not do_down:
254                    print "Download aborted"
255
256        update_manifest = False
257        if do_down:
258            update_manifest = True
259            for add in added_dirs:
260                # make the dirs first
261                full_path = os.path.abspath(os.path.join(localManifestLocation, add[0]))
262                self.makeSurePathExists(full_path)
263            for add in added_files:
264                full_path = os.path.abspath(os.path.join(localManifestLocation, add[0]))
265                urllib.urlretrieve(source+add[0], full_path)
266            for modify in modified:
267                full_path = os.path.abspath(os.path.join(localManifestLocation, modify[0]))
268                urllib.urlretrieve(source+modify[0], full_path)
269
270        if update_manifest:
271            print "(re) creating manifest file (please be patient)"
272            self.createManifest(localManifestLocation, manifestName=localManifestName)
273
274        return True
275
276    def getManType(self, line):
277        """Work out the manifest type from the first line of the file"""
278        return line.rstrip().split("##")[1]
279
280    def formatData(self, amount):
281        """Pretty print file sizes"""
282        if amount < 1024*1024:
283            return "%d B" % amount
284        elif amount < 1024*1024*1024:
285            return "%0.2f MB" % (float(amount)/(1024.*1024.))
286        elif amount < 1024*1024*1024*1024:
287            return "%0.2f GB" % (float(amount)/(1024.*1024.*1024.))
288        elif amount < 1024*1024*1024*1024*1024:
289            return "%0.2f TB" % (float(amount)/(1024.*1024.*1024.*1024.))
290
291#-----------------------------------------------------------------------------
292# FS utilities
293
294    def makeSurePathExists(self, path):
295        try:
296            os.makedirs(path)
297        except OSError as exception:
298            if exception.errno != errno.EEXIST:
299                raise
300
301    def promptUserDownload(self):
302        """Check that the user is OK with making changes"""
303        input_not_ok = True
304        minimal=False
305        valid_responses = {'Y':True,'N':False}
306        vrs = ",".join([x.lower() for x in valid_responses.keys()])
307        while(input_not_ok):
308            if(minimal):
309                option = raw_input("Download? ("+vrs+") : ").upper()
310            else:
311                option = raw_input("Confirm you want to download this data\n" \
312                                   "Changes *WILL* be permanent\n" \
313                                   "Continue? ("+vrs+") : ").upper()
314            if(option in valid_responses):
315                print "****************************************************************"
316                return valid_responses[option]
317            else:
318                print "ERROR: unrecognised choice '"+option+"'"
319                minimal = True
320
321    def walk(self, parents, full_path, rel_path, dirs, files, skipFile=__MANIFEST__):
322        """recursive walk through directory tree"""
323        # first do files here
324        for f in files:
325            if f != skipFile:
326                path = os.path.join(full_path, f)
327                self.files.append(FE(f,
328                                     rel_path,
329                                     parents[-1],
330                                     self.hashfile(path),
331                                     os.path.getsize(path)
332                                     )
333                                  )
334        for d in dirs:
335            # the walk will go into these dirs first
336            tmp_fe = FE(d, rel_path, parents[-1], "-", 0)
337            self.files.append(tmp_fe)
338            parents.append(tmp_fe)
339            new_full_path = os.path.join(full_path, d)
340            new_rel_path = os.path.join(rel_path, d)
341            new_dirs, new_files = self.listdir(new_full_path)[:2]
342            self.walk(parents, new_full_path, new_rel_path, new_dirs, new_files)
343            parents.pop()
344
345    def listdir(self, path):
346        """List dirs, files etc in path (one dir deep)"""
347        dirs, files, links = [], [], []
348        for name in os.listdir(path):
349            path_name = os.path.join(path, name)
350            if os.path.isdir(path_name):
351                dirs.append(name)
352            elif os.path.isfile(path_name):
353                files.append(name)
354            elif os.path.islink(path_name):
355                links.append(name)
356        return dirs, files, links
357
358    def hashfile(self, fileName, blocksize=65536):
359        """Hash a file and return the digest"""
360        hasher = hashlib.sha256()
361        with open(fileName) as fh:
362            buf = fh.read(blocksize)
363            while len(buf) > 0:
364                hasher.update(buf)
365                buf = fh.read(blocksize)
366            return hasher.hexdigest()
367        return "?"
368
369###############################################################################
370###############################################################################
371###############################################################################
372###############################################################################
373