1"""
2Resource management methods.
3"""
4import sys
5import os
6import re
7import zipfile
8import tarfile
9import io
10
11__all__ = ["open_zipfile", "open_tarfile", "open_url", "Resources"]
12
13# Python 3.x workarounds for the changed urllib modules.
14if sys.version_info[0] >= 3:
15    import urllib.parse as urlparse
16    import urllib.request as urllib2
17else:
18    import urlparse
19    import urllib2
20
21
22def open_zipfile(archive, filename, directory=None):
23    """Opens and reads a certain file from a ZIP archive.
24
25    Opens and reads a certain file from a ZIP archive. The result is
26    returned as StringIO stream. filename can be a relative or absolute
27    path within the ZIP archive. The optional directory argument can be
28    used to supply a relative directory path, under which filename will
29    be searched.
30
31    If the filename could not be found, a KeyError will be raised.
32    Raises a TypeError, if archive is not a valid ZIP archive.
33    """
34    data = None
35    opened = False
36
37    if not isinstance(archive, zipfile.ZipFile):
38        if not zipfile.is_zipfile(archive):
39            raise TypeError("passed file does not seem to be a ZIP archive")
40        else:
41            archive = zipfile.ZipFile(archive, 'r')
42            opened = True
43
44    apath = filename
45    if directory:
46        apath = "%s/%s" % (directory, filename)
47
48    try:
49        dmpdata = archive.open(apath)
50        data = io.BytesIO(dmpdata.read())
51    finally:
52        if opened:
53            archive.close()
54    return data
55
56
57def open_tarfile(archive, filename, directory=None, ftype=None):
58    """Opens and reads a certain file from a TAR archive.
59
60    Opens and reads a certain file from a TAR archive. The result is
61    returned as StringIO stream. filename can be a relative or absolute
62    path within the TAR archive. The optional directory argument can be
63    used to supply a relative directory path, under which filename will
64    be searched.
65
66    ftype is used to supply additional compression information, in case
67    the system cannot determine the compression type itself, and can be
68    either 'gz' for gzip compression or 'bz2' for bzip2 compression.
69
70    Note:
71
72      If ftype is supplied, the compression mode will be enforced for
73      opening and reading.
74
75    If the filename could not be found or an error occured on reading it,
76    None will be returned.
77
78    Raises a TypeError, if archive is not a valid TAR archive or if type
79    is not a valid value of ('gz', 'bz2').
80    """
81    data = None
82    opened = False
83
84    mode = 'r'
85    if ftype:
86        if ftype not in ('gz', 'bz2'):
87            raise TypeError("invalid TAR compression type")
88        mode = "r:%s" % ftype
89
90    if not isinstance(archive, tarfile.TarFile):
91        if not tarfile.is_tarfile(archive):
92            raise TypeError("passed file does not seem to be a TAR archive")
93        else:
94            archive = tarfile.open(archive, mode)
95            opened = True
96
97    apath = filename
98    if directory:
99        apath = "%s/%s" % (directory, filename)
100
101    try:
102        dmpdata = archive.extractfile(apath)
103        data = io.BytesIO(dmpdata.read())
104    finally:
105        if opened:
106            archive.close()
107    return data
108
109
110def open_url(filename, basepath=None):
111    """Opens and reads a certain file from a web or remote location.
112
113    Opens and reads a certain file from a web or remote location. This
114    function utilizes the urllib2 module, which means that it is
115    restricted to the types of remote locations supported by urllib2.
116
117    basepath can be used to supply an additional location prefix.
118    """
119    url = filename
120    if basepath:
121        url = urlparse.urljoin(basepath, filename)
122    return urllib2.urlopen(url)
123
124
125class Resources(object):
126    """The Resources class manages a set of file resources and eases
127    accessing them by using relative paths, scanning archives
128    automatically and so on.
129    """
130    def __init__(self, path=None, subdir=None, excludepattern=None):
131        """Creates a new resource container instance.
132
133        If path is provided, the resource container will scan the path
134        and add all found files to itself by invoking
135        scan(path, subdir, excludepattern).
136        """
137        self.files = {}
138        if path:
139            self.scan(path, subdir, excludepattern)
140
141    def _scanzip(self, filename):
142        """Scans the passed ZIP archive and indexes all the files
143        contained by it.
144        """
145        if not zipfile.is_zipfile(filename):
146            raise TypeError("file '%s' is not a valid ZIP archive" % filename)
147        archname = os.path.abspath(filename)
148        zipf = zipfile.ZipFile(filename, 'r')
149        for path in zipf.namelist():
150            fname = os.path.split(path)[1]
151            if fname:
152                self.files[fname] = (archname, 'zip', path)
153        zipf.close()
154
155    def _scantar(self, filename, ftype=None):
156        """Scans the passed TAR archive and indexes all the files
157        contained by it.
158        """
159        if not tarfile.is_tarfile(filename):
160            raise TypeError("file '%s' is not a valid TAR archive" % filename)
161        mode = 'r'
162        if ftype:
163            if ftype not in ('gz', 'bz2'):
164                raise TypeError("invalid TAR compression type")
165            mode = "r:%s" % ftype
166        archname = os.path.abspath(filename)
167        archtype = 'tar'
168        if ftype:
169            archtype = 'tar%s' % ftype
170        tar = tarfile.open(filename, mode)
171        for path in tar.getnames():
172            fname = os.path.split(path)[1]
173            self.files[fname] = (archname, archtype, path)
174        tar.close()
175
176    def add(self, filename):
177        """Adds a file to the Resources container.
178
179        Depending on the file type (determined by the file suffix or name),
180        the file will be automatically scanned (if it is an archive) or
181        checked for availability (if it is a stream/network resource).
182        """
183        if not os.path.exists(filename):
184            raise ValueError("invalid file path")
185        if zipfile.is_zipfile(filename):
186            self.add_archive(filename)
187        elif tarfile.is_tarfile(filename):
188            self.add_archive(filename, 'tar')
189        else:
190            self.add_file(filename)
191
192    def add_file(self, filename):
193        """Adds a file to the Resources container.
194
195        This will only add the passed file and do not scan an archive or
196        check a stream for availability.
197        """
198        if not os.path.exists(filename):
199            raise ValueError("invalid file path")
200        abspath = os.path.abspath(filename)
201        fname = os.path.split(abspath)[1]
202        if not fname:
203            raise ValueError("invalid file path")
204        self.files[fname] = (None, None, abspath)
205
206    def add_archive(self, filename, typehint='zip'):
207        """Adds an archive file to the Resources container.
208
209        This will scan the passed archive and add its contents to the
210        list of available resources.
211        """
212        if not os.path.exists(filename):
213            raise ValueError("invalid file path")
214        if typehint == 'zip':
215            self._scanzip(filename)
216        elif typehint == 'tar':
217            self._scantar(filename)
218        elif typehint == 'tarbz2':
219            self._scantar(filename, 'bz2')
220        elif typehint == 'targz':
221            self._scantar(filename, 'gz')
222        else:
223            raise ValueError("unsupported archive type")
224
225    def get(self, filename):
226        """Gets a specific file from the Resources.
227
228        Raises a KeyError, if filename could not be found.
229        """
230        archive, ftype, pathname = self.files[filename]
231        if archive:
232            if ftype == 'zip':
233                return open_zipfile(archive, pathname)
234            elif ftype == 'tar':
235                return open_tarfile(archive, pathname)
236            elif ftype == 'tarbz2':
237                return open_tarfile(archive, pathname, ftype='bz2')
238            elif ftype == 'targz':
239                return open_tarfile(archive, pathname, ftype='gz')
240            else:
241                raise ValueError("unsupported archive type")
242        dmpdata = open(pathname, 'rb')
243        data = io.BytesIO(dmpdata.read())
244        dmpdata.close()
245        return data
246
247    def get_filelike(self, filename):
248        """Like get(), but tries to return the original file handle, if
249        possible.
250
251        If the passed filename is only available within an archive, a
252        StringIO instance will be returned.
253
254        Raises a KeyError, if filename could not be found.
255        """
256        archive, ftype, pathname = self.files[filename]
257        if archive:
258            if ftype == 'zip':
259                return open_zipfile(archive, pathname)
260            elif ftype == 'tar':
261                return open_tarfile(archive, pathname)
262            elif ftype == 'tarbz2':
263                return open_tarfile(archive, pathname, ftype='bz2')
264            elif ftype == 'targz':
265                return open_tarfile(archive, pathname, ftype='gz')
266            else:
267                raise ValueError("unsupported archive type")
268        return open(pathname, 'rb')
269
270    def get_path(self, filename):
271        """Gets the path of the passed filename.
272
273        If filename is only available within an archive, a string in
274        the form 'filename@archivename' will be returned.
275
276        Raises a KeyError, if filename could not be found.
277        """
278        archive, ftype, pathname = self.files[filename]
279        if archive:
280            return '%s@%s' % (pathname, archive)
281        return pathname
282
283    def scan(self, path, subdir=None, excludepattern=None):
284        """Scans a path and adds all found files to the Resources
285        container.
286
287        Scans a path and adds all found files to the Resources
288        container. If a file is a supported (ZIP or TAR) archive, its
289        contents will be indexed and added automatically.
290
291        The method will consider the directory part (os.path.dirname) of
292        the provided path as path to scan, if the path is not a
293        directory. If subdir is provided, it will be appended to the
294        path and used as starting point for adding files to the
295        Resources container.
296
297        excludepattern can be a regular expression to skip directories, which
298        match the pattern.
299        """
300        match = None
301        if excludepattern:
302            match = re.compile(excludepattern).match
303        join = os.path.join
304        add = self.add
305        abspath = os.path.abspath(path)
306        if not os.path.exists(abspath):
307            raise ValueError("invalid path '%s'" % abspath)
308        if not os.path.isdir(abspath):
309            abspath = os.path.dirname(abspath)
310        if subdir is not None:
311            abspath = os.path.join(abspath, subdir)
312        if not os.path.exists(abspath):
313            raise ValueError("invalid path '%s'" % abspath)
314        for (pdir, dirnames, filenames) in os.walk(abspath):
315            if match and match(pdir) is not None:
316                continue
317            for fname in filenames:
318                add(join(pdir, fname))
319