1#!/usr/bin/env python 2############################################################################### 3# # 4# manifestManager.py # 5# # 6# Work with online data manifests (creating / syncing / validating) # 7# # 8# Copyright (C) Michael Imelfort # 9# # 10############################################################################### 11# # 12# This program is free software: you can redistribute it and/or modify # 13# it under the terms of the GNU General Public License as published by # 14# the Free Software Foundation, either version 3 of the License, or # 15# (at your option) any later version. # 16# # 17# This program is distributed in the hope that it will be useful, # 18# but WITHOUT ANY WARRANTY; without even the implied warranty of # 19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 20# GNU General Public License for more details. # 21# # 22# You should have received a copy of the GNU General Public License # 23# along with this program. If not, see <http://www.gnu.org/licenses/>. # 24# # 25############################################################################### 26 27__author__ = "Michael Imelfort" 28__copyright__ = "Copyright 2014" 29__credits__ = ["Michael Imelfort"] 30__license__ = "GPLv3" 31__maintainer__ = "Michael Imelfort" 32__email__ = "mike@mikeimelfort.com" 33__version__ = "0.2.3" 34 35############################################################################### 36############################################################################### 37############################################################################### 38############################################################################### 39 40__MANIFEST__ = ".dmanifest" 41 42############################################################################### 43############################################################################### 44############################################################################### 45############################################################################### 46 47# system includes 48import os 49import hashlib 50import urllib2 51import urllib 52import shutil 53import errno 54 55# local includes 56from checkm.fileEntity import FileEntity as FE 57 58############################################################################### 59############################################################################### 60############################################################################### 61############################################################################### 62 63class ManifestManager(object): 64 """Use this interface for storing and managing file and paths""" 65 def __init__(self, manType=None, timeout=30): 66 self.timeout = timeout 67 68 self.files = [] 69 if manType is not None: 70 self.type = manType 71 else: 72 self.type = "generic" 73 74 def createManifest(self, path, manifestName=None): 75 """inventory all files in path and create a manifest file""" 76 if manifestName is None: 77 manifestName = __MANIFEST__ 78 # make the root file entity 79 root_path = os.path.abspath(path) 80 root_fe = FE('root', ".", None, "-", 0) 81 self.files.append(root_fe) 82 # now make all the ones below 83 parents = [root_fe] 84 dirs, files = self.listdir(path)[:2] 85 self.walk(parents, root_path, '', dirs, files, skipFile=manifestName) 86 87 with open(os.path.join(path, manifestName), 'w') as man_fh: 88 # print the header 89 man_fh.write("##%s##\tData manifest created by ScreamingBackpack version %s\n" % (self.type, __version__)) 90 for f in self.files: 91 if f.parent is not None: 92 man_fh.write("%s\n" % f) 93 94 def diffManifests(self, 95 localManifestLocation, 96 sourceManifestLocation, 97 localManifestName=None, 98 sourceManifestName=None, 99 printDiffs=False): 100 """check for any differences between two manifests 101 102 if remote is true then sourceManifestLocation is a URL 103 returns a list of files that need to be updated 104 """ 105 if localManifestName is None: 106 localManifestName = __MANIFEST__ 107 if sourceManifestName is None: 108 sourceManifestName = __MANIFEST__ 109 110 # get the "type" of the local manifest 111 l_type = "generic" 112 with open(os.path.join(localManifestLocation, localManifestName)) as l_man: 113 for line in l_man: 114 if line[0] == "#": 115 l_type = self.getManType(line) 116 break 117 118 # load the source manifest 119 s_type = "generic" 120 source_man = {} 121 source = "" 122 # first we assume it is remote 123 try: 124 s_man = urllib2.urlopen(sourceManifestLocation + "/" + sourceManifestName, None, self.timeout) 125 source = sourceManifestLocation + "/" 126 except ValueError: 127 # then it is probably a file 128 s_man = open(os.path.join(sourceManifestLocation, sourceManifestName)) 129 source = os.path.join(sourceManifestLocation) + os.path.sep 130 except urllib2.URLError: 131 # problems connecting to server, perhaps user is behind a proxy or firewall 132 print "Error: failed to connect to server." 133 return (None, None, None, None, None) 134 135 first_line = True 136 for line in s_man: 137 if first_line: 138 first_line = False 139 if line[0] == "#": 140 # get the type of the manifest 141 s_type = self.getManType(line) 142 if s_type != l_type: 143 print "Error: type of source manifest (%s) does not match type of local manifest (%s)" % (s_type, l_type) 144 return (None, None, None, None, None) 145 else: 146 # no type specified 147 print "Error: type of source manifest is not specified. Is this a valid manifest file?" 148 return (None, None, None, None, None) 149 150 self.type = l_type 151 if line[0] != "#": 152 fields = line.rstrip().split("\t") 153 # set the dict up as {path => [hash, size, seenLocal] 154 source_man[fields[0]] = [fields[1], fields[2], False] 155 156 # keep lists of modifications 157 deleted = [] 158 addedDirs = [] 159 addedFiles = [] 160 modified = [] 161 162 with open(os.path.join(localManifestLocation, localManifestName)) as l_man: 163 for line in l_man: 164 if line[0] != "#": 165 fields = line.rstrip().split("\t") 166 try: 167 if source_man[fields[0]][0] != fields[1]: 168 # hashes don't match 169 modified.append(fields[0]) 170 # seen this file 171 source_man[fields[0]][2] = True 172 except KeyError: 173 # this file has been deleted from the source manifest 174 deleted.append(fields[0]) 175 176 # check for new files 177 for f in source_man.keys(): 178 if source_man[f][2] == False: 179 if source_man[f][0] == '-': 180 addedDirs.append(f) 181 else: 182 addedFiles.append(f) 183 184 if printDiffs: 185 new_size = 0 186 modified_size = 0 187 for f in addedFiles: 188 new_size += int(source_man[f][1]) 189 for f in modified: 190 modified_size += int(source_man[f][1]) 191 192 if len(addedFiles) > 0: 193 print "#------------------------------------------------------" 194 print "# Source contains %d new file(s) (%s)" % (len(addedFiles), self.formatData(new_size)) 195 for f in addedFiles: 196 print "\t".join([self.formatData(int(source_man[f][1])), f]) 197 198 if len(addedDirs) > 0: 199 print "#------------------------------------------------------" 200 print "# Source contains %d new folders(s)" % (len(addedDirs)) 201 for f in addedDirs: 202 print f 203 204 if len(modified) > 0: 205 print "#------------------------------------------------------" 206 print "# Source contains %d modified file(s) (%s)" % (len(modified), self.formatData(modified_size)) 207 for f in modified: 208 print f 209 210 if len(deleted) > 0: 211 print "#------------------------------------------------------" 212 print "# %d files have been deleted in the source:" % len(deleted) 213 for f in deleted: 214 print f 215 else: 216 return (source, 217 [(a, source_man[a]) for a in addedFiles], 218 [(a, source_man[a]) for a in addedDirs], 219 deleted, 220 [(m, source_man[m]) for m in modified]) 221 222 223 def updateManifest(self, 224 localManifestLocation, 225 sourceManifestLocation, 226 localManifestName=None, 227 sourceManifestName=None, 228 prompt=True): 229 """Update local files based on remote changes""" 230 # get the diffs 231 source, added_files, added_dirs, deleted, modified = self.diffManifests(localManifestLocation, 232 sourceManifestLocation, 233 localManifestName, 234 sourceManifestName) 235 # bail if the diff failed 236 if source is None: 237 return False 238 239 # no changes by default 240 do_down = False 241 if prompt: 242 total_size = 0 243 for f in added_files: 244 total_size += int(f[1][1]) 245 for f in modified: 246 total_size += int(f[1][1]) 247 if total_size != 0: 248 print "****************************************************************" 249 print "%d new file(s) to be downloaded from source" % len(added_files) 250 print "%d existing file(s) to be updated" % len(modified) 251 print "%s will need to be downloaded" % self.formatData(total_size) 252 do_down = self.promptUserDownload() 253 if not do_down: 254 print "Download aborted" 255 256 update_manifest = False 257 if do_down: 258 update_manifest = True 259 for add in added_dirs: 260 # make the dirs first 261 full_path = os.path.abspath(os.path.join(localManifestLocation, add[0])) 262 self.makeSurePathExists(full_path) 263 for add in added_files: 264 full_path = os.path.abspath(os.path.join(localManifestLocation, add[0])) 265 urllib.urlretrieve(source+add[0], full_path) 266 for modify in modified: 267 full_path = os.path.abspath(os.path.join(localManifestLocation, modify[0])) 268 urllib.urlretrieve(source+modify[0], full_path) 269 270 if update_manifest: 271 print "(re) creating manifest file (please be patient)" 272 self.createManifest(localManifestLocation, manifestName=localManifestName) 273 274 return True 275 276 def getManType(self, line): 277 """Work out the manifest type from the first line of the file""" 278 return line.rstrip().split("##")[1] 279 280 def formatData(self, amount): 281 """Pretty print file sizes""" 282 if amount < 1024*1024: 283 return "%d B" % amount 284 elif amount < 1024*1024*1024: 285 return "%0.2f MB" % (float(amount)/(1024.*1024.)) 286 elif amount < 1024*1024*1024*1024: 287 return "%0.2f GB" % (float(amount)/(1024.*1024.*1024.)) 288 elif amount < 1024*1024*1024*1024*1024: 289 return "%0.2f TB" % (float(amount)/(1024.*1024.*1024.*1024.)) 290 291#----------------------------------------------------------------------------- 292# FS utilities 293 294 def makeSurePathExists(self, path): 295 try: 296 os.makedirs(path) 297 except OSError as exception: 298 if exception.errno != errno.EEXIST: 299 raise 300 301 def promptUserDownload(self): 302 """Check that the user is OK with making changes""" 303 input_not_ok = True 304 minimal=False 305 valid_responses = {'Y':True,'N':False} 306 vrs = ",".join([x.lower() for x in valid_responses.keys()]) 307 while(input_not_ok): 308 if(minimal): 309 option = raw_input("Download? ("+vrs+") : ").upper() 310 else: 311 option = raw_input("Confirm you want to download this data\n" \ 312 "Changes *WILL* be permanent\n" \ 313 "Continue? ("+vrs+") : ").upper() 314 if(option in valid_responses): 315 print "****************************************************************" 316 return valid_responses[option] 317 else: 318 print "ERROR: unrecognised choice '"+option+"'" 319 minimal = True 320 321 def walk(self, parents, full_path, rel_path, dirs, files, skipFile=__MANIFEST__): 322 """recursive walk through directory tree""" 323 # first do files here 324 for f in files: 325 if f != skipFile: 326 path = os.path.join(full_path, f) 327 self.files.append(FE(f, 328 rel_path, 329 parents[-1], 330 self.hashfile(path), 331 os.path.getsize(path) 332 ) 333 ) 334 for d in dirs: 335 # the walk will go into these dirs first 336 tmp_fe = FE(d, rel_path, parents[-1], "-", 0) 337 self.files.append(tmp_fe) 338 parents.append(tmp_fe) 339 new_full_path = os.path.join(full_path, d) 340 new_rel_path = os.path.join(rel_path, d) 341 new_dirs, new_files = self.listdir(new_full_path)[:2] 342 self.walk(parents, new_full_path, new_rel_path, new_dirs, new_files) 343 parents.pop() 344 345 def listdir(self, path): 346 """List dirs, files etc in path (one dir deep)""" 347 dirs, files, links = [], [], [] 348 for name in os.listdir(path): 349 path_name = os.path.join(path, name) 350 if os.path.isdir(path_name): 351 dirs.append(name) 352 elif os.path.isfile(path_name): 353 files.append(name) 354 elif os.path.islink(path_name): 355 links.append(name) 356 return dirs, files, links 357 358 def hashfile(self, fileName, blocksize=65536): 359 """Hash a file and return the digest""" 360 hasher = hashlib.sha256() 361 with open(fileName) as fh: 362 buf = fh.read(blocksize) 363 while len(buf) > 0: 364 hasher.update(buf) 365 buf = fh.read(blocksize) 366 return hasher.hexdigest() 367 return "?" 368 369############################################################################### 370############################################################################### 371############################################################################### 372############################################################################### 373