1""" 2Resource management methods. 3""" 4import sys 5import os 6import re 7import zipfile 8import tarfile 9import io 10 11__all__ = ["open_zipfile", "open_tarfile", "open_url", "Resources"] 12 13# Python 3.x workarounds for the changed urllib modules. 14if sys.version_info[0] >= 3: 15 import urllib.parse as urlparse 16 import urllib.request as urllib2 17else: 18 import urlparse 19 import urllib2 20 21 22def open_zipfile(archive, filename, directory=None): 23 """Opens and reads a certain file from a ZIP archive. 24 25 Opens and reads a certain file from a ZIP archive. The result is 26 returned as StringIO stream. filename can be a relative or absolute 27 path within the ZIP archive. The optional directory argument can be 28 used to supply a relative directory path, under which filename will 29 be searched. 30 31 If the filename could not be found, a KeyError will be raised. 32 Raises a TypeError, if archive is not a valid ZIP archive. 33 """ 34 data = None 35 opened = False 36 37 if not isinstance(archive, zipfile.ZipFile): 38 if not zipfile.is_zipfile(archive): 39 raise TypeError("passed file does not seem to be a ZIP archive") 40 else: 41 archive = zipfile.ZipFile(archive, 'r') 42 opened = True 43 44 apath = filename 45 if directory: 46 apath = "%s/%s" % (directory, filename) 47 48 try: 49 dmpdata = archive.open(apath) 50 data = io.BytesIO(dmpdata.read()) 51 finally: 52 if opened: 53 archive.close() 54 return data 55 56 57def open_tarfile(archive, filename, directory=None, ftype=None): 58 """Opens and reads a certain file from a TAR archive. 59 60 Opens and reads a certain file from a TAR archive. The result is 61 returned as StringIO stream. filename can be a relative or absolute 62 path within the TAR archive. The optional directory argument can be 63 used to supply a relative directory path, under which filename will 64 be searched. 65 66 ftype is used to supply additional compression information, in case 67 the system cannot determine the compression type itself, and can be 68 either 'gz' for gzip compression or 'bz2' for bzip2 compression. 69 70 Note: 71 72 If ftype is supplied, the compression mode will be enforced for 73 opening and reading. 74 75 If the filename could not be found or an error occured on reading it, 76 None will be returned. 77 78 Raises a TypeError, if archive is not a valid TAR archive or if type 79 is not a valid value of ('gz', 'bz2'). 80 """ 81 data = None 82 opened = False 83 84 mode = 'r' 85 if ftype: 86 if ftype not in ('gz', 'bz2'): 87 raise TypeError("invalid TAR compression type") 88 mode = "r:%s" % ftype 89 90 if not isinstance(archive, tarfile.TarFile): 91 if not tarfile.is_tarfile(archive): 92 raise TypeError("passed file does not seem to be a TAR archive") 93 else: 94 archive = tarfile.open(archive, mode) 95 opened = True 96 97 apath = filename 98 if directory: 99 apath = "%s/%s" % (directory, filename) 100 101 try: 102 dmpdata = archive.extractfile(apath) 103 data = io.BytesIO(dmpdata.read()) 104 finally: 105 if opened: 106 archive.close() 107 return data 108 109 110def open_url(filename, basepath=None): 111 """Opens and reads a certain file from a web or remote location. 112 113 Opens and reads a certain file from a web or remote location. This 114 function utilizes the urllib2 module, which means that it is 115 restricted to the types of remote locations supported by urllib2. 116 117 basepath can be used to supply an additional location prefix. 118 """ 119 url = filename 120 if basepath: 121 url = urlparse.urljoin(basepath, filename) 122 return urllib2.urlopen(url) 123 124 125class Resources(object): 126 """The Resources class manages a set of file resources and eases 127 accessing them by using relative paths, scanning archives 128 automatically and so on. 129 """ 130 def __init__(self, path=None, subdir=None, excludepattern=None): 131 """Creates a new resource container instance. 132 133 If path is provided, the resource container will scan the path 134 and add all found files to itself by invoking 135 scan(path, subdir, excludepattern). 136 """ 137 self.files = {} 138 if path: 139 self.scan(path, subdir, excludepattern) 140 141 def _scanzip(self, filename): 142 """Scans the passed ZIP archive and indexes all the files 143 contained by it. 144 """ 145 if not zipfile.is_zipfile(filename): 146 raise TypeError("file '%s' is not a valid ZIP archive" % filename) 147 archname = os.path.abspath(filename) 148 zipf = zipfile.ZipFile(filename, 'r') 149 for path in zipf.namelist(): 150 fname = os.path.split(path)[1] 151 if fname: 152 self.files[fname] = (archname, 'zip', path) 153 zipf.close() 154 155 def _scantar(self, filename, ftype=None): 156 """Scans the passed TAR archive and indexes all the files 157 contained by it. 158 """ 159 if not tarfile.is_tarfile(filename): 160 raise TypeError("file '%s' is not a valid TAR archive" % filename) 161 mode = 'r' 162 if ftype: 163 if ftype not in ('gz', 'bz2'): 164 raise TypeError("invalid TAR compression type") 165 mode = "r:%s" % ftype 166 archname = os.path.abspath(filename) 167 archtype = 'tar' 168 if ftype: 169 archtype = 'tar%s' % ftype 170 tar = tarfile.open(filename, mode) 171 for path in tar.getnames(): 172 fname = os.path.split(path)[1] 173 self.files[fname] = (archname, archtype, path) 174 tar.close() 175 176 def add(self, filename): 177 """Adds a file to the Resources container. 178 179 Depending on the file type (determined by the file suffix or name), 180 the file will be automatically scanned (if it is an archive) or 181 checked for availability (if it is a stream/network resource). 182 """ 183 if not os.path.exists(filename): 184 raise ValueError("invalid file path") 185 if zipfile.is_zipfile(filename): 186 self.add_archive(filename) 187 elif tarfile.is_tarfile(filename): 188 self.add_archive(filename, 'tar') 189 else: 190 self.add_file(filename) 191 192 def add_file(self, filename): 193 """Adds a file to the Resources container. 194 195 This will only add the passed file and do not scan an archive or 196 check a stream for availability. 197 """ 198 if not os.path.exists(filename): 199 raise ValueError("invalid file path") 200 abspath = os.path.abspath(filename) 201 fname = os.path.split(abspath)[1] 202 if not fname: 203 raise ValueError("invalid file path") 204 self.files[fname] = (None, None, abspath) 205 206 def add_archive(self, filename, typehint='zip'): 207 """Adds an archive file to the Resources container. 208 209 This will scan the passed archive and add its contents to the 210 list of available resources. 211 """ 212 if not os.path.exists(filename): 213 raise ValueError("invalid file path") 214 if typehint == 'zip': 215 self._scanzip(filename) 216 elif typehint == 'tar': 217 self._scantar(filename) 218 elif typehint == 'tarbz2': 219 self._scantar(filename, 'bz2') 220 elif typehint == 'targz': 221 self._scantar(filename, 'gz') 222 else: 223 raise ValueError("unsupported archive type") 224 225 def get(self, filename): 226 """Gets a specific file from the Resources. 227 228 Raises a KeyError, if filename could not be found. 229 """ 230 archive, ftype, pathname = self.files[filename] 231 if archive: 232 if ftype == 'zip': 233 return open_zipfile(archive, pathname) 234 elif ftype == 'tar': 235 return open_tarfile(archive, pathname) 236 elif ftype == 'tarbz2': 237 return open_tarfile(archive, pathname, ftype='bz2') 238 elif ftype == 'targz': 239 return open_tarfile(archive, pathname, ftype='gz') 240 else: 241 raise ValueError("unsupported archive type") 242 dmpdata = open(pathname, 'rb') 243 data = io.BytesIO(dmpdata.read()) 244 dmpdata.close() 245 return data 246 247 def get_filelike(self, filename): 248 """Like get(), but tries to return the original file handle, if 249 possible. 250 251 If the passed filename is only available within an archive, a 252 StringIO instance will be returned. 253 254 Raises a KeyError, if filename could not be found. 255 """ 256 archive, ftype, pathname = self.files[filename] 257 if archive: 258 if ftype == 'zip': 259 return open_zipfile(archive, pathname) 260 elif ftype == 'tar': 261 return open_tarfile(archive, pathname) 262 elif ftype == 'tarbz2': 263 return open_tarfile(archive, pathname, ftype='bz2') 264 elif ftype == 'targz': 265 return open_tarfile(archive, pathname, ftype='gz') 266 else: 267 raise ValueError("unsupported archive type") 268 return open(pathname, 'rb') 269 270 def get_path(self, filename): 271 """Gets the path of the passed filename. 272 273 If filename is only available within an archive, a string in 274 the form 'filename@archivename' will be returned. 275 276 Raises a KeyError, if filename could not be found. 277 """ 278 archive, ftype, pathname = self.files[filename] 279 if archive: 280 return '%s@%s' % (pathname, archive) 281 return pathname 282 283 def scan(self, path, subdir=None, excludepattern=None): 284 """Scans a path and adds all found files to the Resources 285 container. 286 287 Scans a path and adds all found files to the Resources 288 container. If a file is a supported (ZIP or TAR) archive, its 289 contents will be indexed and added automatically. 290 291 The method will consider the directory part (os.path.dirname) of 292 the provided path as path to scan, if the path is not a 293 directory. If subdir is provided, it will be appended to the 294 path and used as starting point for adding files to the 295 Resources container. 296 297 excludepattern can be a regular expression to skip directories, which 298 match the pattern. 299 """ 300 match = None 301 if excludepattern: 302 match = re.compile(excludepattern).match 303 join = os.path.join 304 add = self.add 305 abspath = os.path.abspath(path) 306 if not os.path.exists(abspath): 307 raise ValueError("invalid path '%s'" % abspath) 308 if not os.path.isdir(abspath): 309 abspath = os.path.dirname(abspath) 310 if subdir is not None: 311 abspath = os.path.join(abspath, subdir) 312 if not os.path.exists(abspath): 313 raise ValueError("invalid path '%s'" % abspath) 314 for (pdir, dirnames, filenames) in os.walk(abspath): 315 if match and match(pdir) is not None: 316 continue 317 for fname in filenames: 318 add(join(pdir, fname)) 319