1# -*- coding: utf-8 -*-
2# Copyright (C) 2006  Joe Wreschnig
3#
4# This program is free software; you can redistribute it and/or modify
5# it under the terms of the GNU General Public License as published by
6# the Free Software Foundation; either version 2 of the License, or
7# (at your option) any later version.
8
9"""Utility classes for Mutagen.
10
11You should not rely on the interfaces here being stable. They are
12intended for internal use in Mutagen only.
13"""
14
15import sys
16import struct
17import codecs
18import errno
19import decimal
20from io import BytesIO
21
22try:
23    import mmap
24except ImportError:
25    # Google App Engine has no mmap:
26    #   https://github.com/quodlibet/mutagen/issues/286
27    mmap = None
28
29from collections import namedtuple
30from contextlib import contextmanager
31from functools import wraps
32from fnmatch import fnmatchcase
33
34from ._compat import chr_, PY2, iteritems, iterbytes, integer_types, xrange, \
35    izip, text_type, reraise
36
37
38def intround(value):
39    """Given a float returns a rounded int. Should give the same result on
40    both Py2/3
41    """
42
43    return int(decimal.Decimal.from_float(
44        value).to_integral_value(decimal.ROUND_HALF_EVEN))
45
46
47def is_fileobj(fileobj):
48    """Returns:
49        bool: if an argument passed ot mutagen should be treated as a
50            file object
51    """
52
53    return not (isinstance(fileobj, (text_type, bytes)) or
54                hasattr(fileobj, "__fspath__"))
55
56
57def verify_fileobj(fileobj, writable=False):
58    """Verifies that the passed fileobj is a file like object which
59    we can use.
60
61    Args:
62        writable (bool): verify that the file object is writable as well
63
64    Raises:
65        ValueError: In case the object is not a file object that is readable
66            (or writable if required) or is not opened in bytes mode.
67    """
68
69    try:
70        data = fileobj.read(0)
71    except Exception:
72        if not hasattr(fileobj, "read"):
73            raise ValueError("%r not a valid file object" % fileobj)
74        raise ValueError("Can't read from file object %r" % fileobj)
75
76    if not isinstance(data, bytes):
77        raise ValueError(
78            "file object %r not opened in binary mode" % fileobj)
79
80    if writable:
81        try:
82            fileobj.write(b"")
83        except Exception:
84            if not hasattr(fileobj, "write"):
85                raise ValueError("%r not a valid file object" % fileobj)
86            raise ValueError("Can't write to file object %r" % fileobj)
87
88
89def verify_filename(filename):
90    """Checks of the passed in filename has the correct type.
91
92    Raises:
93        ValueError: if not a filename
94    """
95
96    if is_fileobj(filename):
97        raise ValueError("%r not a filename" % filename)
98
99
100def fileobj_name(fileobj):
101    """
102    Returns:
103        text: A potential filename for a file object. Always a valid
104            path type, but might be empty or non-existent.
105    """
106
107    value = getattr(fileobj, "name", u"")
108    if not isinstance(value, (text_type, bytes)):
109        value = text_type(value)
110    return value
111
112
113def loadfile(method=True, writable=False, create=False):
114    """A decorator for functions taking a `filething` as a first argument.
115
116    Passes a FileThing instance as the first argument to the wrapped function.
117
118    Args:
119        method (bool): If the wrapped functions is a method
120        writable (bool): If a filename is passed opens the file readwrite, if
121            passed a file object verifies that it is writable.
122        create (bool): If passed a filename that does not exist will create
123            a new empty file.
124    """
125
126    def convert_file_args(args, kwargs):
127        filething = args[0] if args else None
128        filename = kwargs.pop("filename", None)
129        fileobj = kwargs.pop("fileobj", None)
130        return filething, filename, fileobj, args[1:], kwargs
131
132    def wrap(func):
133
134        @wraps(func)
135        def wrapper(self, *args, **kwargs):
136            filething, filename, fileobj, args, kwargs = \
137                convert_file_args(args, kwargs)
138            with _openfile(self, filething, filename, fileobj,
139                           writable, create) as h:
140                return func(self, h, *args, **kwargs)
141
142        @wraps(func)
143        def wrapper_func(*args, **kwargs):
144            filething, filename, fileobj, args, kwargs = \
145                convert_file_args(args, kwargs)
146            with _openfile(None, filething, filename, fileobj,
147                           writable, create) as h:
148                return func(h, *args, **kwargs)
149
150        return wrapper if method else wrapper_func
151
152    return wrap
153
154
155def convert_error(exc_src, exc_dest):
156    """A decorator for reraising exceptions with a different type.
157    Mostly useful for IOError.
158
159    Args:
160        exc_src (type): The source exception type
161        exc_dest (type): The target exception type.
162    """
163
164    def wrap(func):
165
166        @wraps(func)
167        def wrapper(*args, **kwargs):
168            try:
169                return func(*args, **kwargs)
170            except exc_dest:
171                raise
172            except exc_src as err:
173                reraise(exc_dest, err, sys.exc_info()[2])
174
175        return wrapper
176
177    return wrap
178
179
180FileThing = namedtuple("FileThing", ["fileobj", "filename", "name"])
181"""filename is None if the source is not a filename. name is a filename which
182can be used for file type detection
183"""
184
185
186@contextmanager
187def _openfile(instance, filething, filename, fileobj, writable, create):
188    """yields a FileThing
189
190    Args:
191        filething: Either a file name, a file object or None
192        filename: Either a file name or None
193        fileobj: Either a file object or None
194        writable (bool): if the file should be opened
195        create (bool): if the file should be created if it doesn't exist.
196            implies writable
197    Raises:
198        MutagenError: In case opening the file failed
199        TypeError: in case neither a file name or a file object is passed
200    """
201
202    assert not create or writable
203
204    # to allow stacked context managers, just pass the result through
205    if isinstance(filething, FileThing):
206        filename = filething.filename
207        fileobj = filething.fileobj
208        filething = None
209
210    if filething is not None:
211        if is_fileobj(filething):
212            fileobj = filething
213        elif hasattr(filething, "__fspath__"):
214            filename = filething.__fspath__()
215            if not isinstance(filename, (bytes, text_type)):
216                raise TypeError("expected __fspath__() to return a filename")
217        else:
218            filename = filething
219
220    if instance is not None:
221        # XXX: take "not writable" as loading the file..
222        if not writable:
223            instance.filename = filename
224        elif filename is None:
225            filename = getattr(instance, "filename", None)
226
227    if fileobj is not None:
228        verify_fileobj(fileobj, writable=writable)
229        yield FileThing(fileobj, filename, filename or fileobj_name(fileobj))
230    elif filename is not None:
231        verify_filename(filename)
232
233        inmemory_fileobj = False
234        try:
235            fileobj = open(filename, "rb+" if writable else "rb")
236        except IOError as e:
237            if writable and e.errno == errno.EOPNOTSUPP:
238                # Some file systems (gvfs over fuse) don't support opening
239                # files read/write. To make things still work read the whole
240                # file into an in-memory file like object and write it back
241                # later.
242                # https://github.com/quodlibet/mutagen/issues/300
243                try:
244                    with open(filename, "rb") as fileobj:
245                        fileobj = BytesIO(fileobj.read())
246                except IOError as e2:
247                    raise MutagenError(e2)
248                inmemory_fileobj = True
249            elif create and e.errno == errno.ENOENT:
250                assert writable
251                try:
252                    fileobj = open(filename, "wb+")
253                except IOError as e2:
254                    raise MutagenError(e2)
255            else:
256                raise MutagenError(e)
257
258        with fileobj as fileobj:
259            yield FileThing(fileobj, filename, filename)
260
261            if inmemory_fileobj:
262                assert writable
263                data = fileobj.getvalue()
264                try:
265                    with open(filename, "wb") as fileobj:
266                        fileobj.write(data)
267                except IOError as e:
268                    raise MutagenError(e)
269    else:
270        raise TypeError("Missing filename or fileobj argument")
271
272
273class MutagenError(Exception):
274    """Base class for all custom exceptions in mutagen
275
276    .. versionadded:: 1.25
277    """
278
279    __module__ = "mutagen"
280
281
282def total_ordering(cls):
283    """Adds all possible ordering methods to a class.
284
285    Needs a working __eq__ and __lt__ and will supply the rest.
286    """
287
288    assert "__eq__" in cls.__dict__
289    assert "__lt__" in cls.__dict__
290
291    cls.__le__ = lambda self, other: self == other or self < other
292    cls.__gt__ = lambda self, other: not (self == other or self < other)
293    cls.__ge__ = lambda self, other: not self < other
294    cls.__ne__ = lambda self, other: not self.__eq__(other)
295
296    return cls
297
298
299def hashable(cls):
300    """Makes sure the class is hashable.
301
302    Needs a working __eq__ and __hash__ and will add a __ne__.
303    """
304
305    # py2
306    assert "__hash__" in cls.__dict__
307    # py3
308    assert cls.__dict__["__hash__"] is not None
309    assert "__eq__" in cls.__dict__
310
311    cls.__ne__ = lambda self, other: not self.__eq__(other)
312
313    return cls
314
315
316def enum(cls):
317    """A decorator for creating an int enum class.
318
319    Makes the values a subclass of the type and implements repr/str.
320    The new class will be a subclass of int.
321
322    Args:
323        cls (type): The class to convert to an enum
324
325    Returns:
326        type: A new class
327
328    ::
329
330        @enum
331        class Foo(object):
332            FOO = 1
333            BAR = 2
334    """
335
336    assert cls.__bases__ == (object,)
337
338    d = dict(cls.__dict__)
339    new_type = type(cls.__name__, (int,), d)
340    new_type.__module__ = cls.__module__
341
342    map_ = {}
343    for key, value in iteritems(d):
344        if key.upper() == key and isinstance(value, integer_types):
345            value_instance = new_type(value)
346            setattr(new_type, key, value_instance)
347            map_[value] = key
348
349    def str_(self):
350        if self in map_:
351            return "%s.%s" % (type(self).__name__, map_[self])
352        return "%d" % int(self)
353
354    def repr_(self):
355        if self in map_:
356            return "<%s.%s: %d>" % (type(self).__name__, map_[self], int(self))
357        return "%d" % int(self)
358
359    setattr(new_type, "__repr__", repr_)
360    setattr(new_type, "__str__", str_)
361
362    return new_type
363
364
365def flags(cls):
366    """A decorator for creating an int flags class.
367
368    Makes the values a subclass of the type and implements repr/str.
369    The new class will be a subclass of int.
370
371    Args:
372        cls (type): The class to convert to an flags
373
374    Returns:
375        type: A new class
376
377    ::
378
379        @flags
380        class Foo(object):
381            FOO = 1
382            BAR = 2
383    """
384
385    assert cls.__bases__ == (object,)
386
387    d = dict(cls.__dict__)
388    new_type = type(cls.__name__, (int,), d)
389    new_type.__module__ = cls.__module__
390
391    map_ = {}
392    for key, value in iteritems(d):
393        if key.upper() == key and isinstance(value, integer_types):
394            value_instance = new_type(value)
395            setattr(new_type, key, value_instance)
396            map_[value] = key
397
398    def str_(self):
399        value = int(self)
400        matches = []
401        for k, v in map_.items():
402            if value & k:
403                matches.append("%s.%s" % (type(self).__name__, v))
404                value &= ~k
405        if value != 0 or not matches:
406            matches.append(text_type(value))
407
408        return " | ".join(matches)
409
410    def repr_(self):
411        return "<%s: %d>" % (str(self), int(self))
412
413    setattr(new_type, "__repr__", repr_)
414    setattr(new_type, "__str__", str_)
415
416    return new_type
417
418
419@total_ordering
420class DictMixin(object):
421    """Implement the dict API using keys() and __*item__ methods.
422
423    Similar to UserDict.DictMixin, this takes a class that defines
424    __getitem__, __setitem__, __delitem__, and keys(), and turns it
425    into a full dict-like object.
426
427    UserDict.DictMixin is not suitable for this purpose because it's
428    an old-style class.
429
430    This class is not optimized for very large dictionaries; many
431    functions have linear memory requirements. I recommend you
432    override some of these functions if speed is required.
433    """
434
435    def __iter__(self):
436        return iter(self.keys())
437
438    def __has_key(self, key):
439        try:
440            self[key]
441        except KeyError:
442            return False
443        else:
444            return True
445
446    if PY2:
447        has_key = __has_key
448
449    __contains__ = __has_key
450
451    if PY2:
452        iterkeys = lambda self: iter(self.keys())
453
454    def values(self):
455        return [self[k] for k in self.keys()]
456
457    if PY2:
458        itervalues = lambda self: iter(self.values())
459
460    def items(self):
461        return list(izip(self.keys(), self.values()))
462
463    if PY2:
464        iteritems = lambda s: iter(s.items())
465
466    def clear(self):
467        for key in list(self.keys()):
468            self.__delitem__(key)
469
470    def pop(self, key, *args):
471        if len(args) > 1:
472            raise TypeError("pop takes at most two arguments")
473        try:
474            value = self[key]
475        except KeyError:
476            if args:
477                return args[0]
478            else:
479                raise
480        del(self[key])
481        return value
482
483    def popitem(self):
484        for key in self.keys():
485            break
486        else:
487            raise KeyError("dictionary is empty")
488        return key, self.pop(key)
489
490    def update(self, other=None, **kwargs):
491        if other is None:
492            self.update(kwargs)
493            other = {}
494
495        try:
496            for key, value in other.items():
497                self.__setitem__(key, value)
498        except AttributeError:
499            for key, value in other:
500                self[key] = value
501
502    def setdefault(self, key, default=None):
503        try:
504            return self[key]
505        except KeyError:
506            self[key] = default
507            return default
508
509    def get(self, key, default=None):
510        try:
511            return self[key]
512        except KeyError:
513            return default
514
515    def __repr__(self):
516        return repr(dict(self.items()))
517
518    def __eq__(self, other):
519        return dict(self.items()) == other
520
521    def __lt__(self, other):
522        return dict(self.items()) < other
523
524    __hash__ = object.__hash__
525
526    def __len__(self):
527        return len(self.keys())
528
529
530class DictProxy(DictMixin):
531    def __init__(self, *args, **kwargs):
532        self.__dict = {}
533        super(DictProxy, self).__init__(*args, **kwargs)
534
535    def __getitem__(self, key):
536        return self.__dict[key]
537
538    def __setitem__(self, key, value):
539        self.__dict[key] = value
540
541    def __delitem__(self, key):
542        del(self.__dict[key])
543
544    def keys(self):
545        return self.__dict.keys()
546
547
548def _fill_cdata(cls):
549    """Add struct pack/unpack functions"""
550
551    funcs = {}
552    for key, name in [("b", "char"), ("h", "short"),
553                      ("i", "int"), ("q", "longlong")]:
554        for echar, esuffix in [("<", "le"), (">", "be")]:
555            esuffix = "_" + esuffix
556            for unsigned in [True, False]:
557                s = struct.Struct(echar + (key.upper() if unsigned else key))
558                get_wrapper = lambda f: lambda *a, **k: f(*a, **k)[0]
559                unpack = get_wrapper(s.unpack)
560                unpack_from = get_wrapper(s.unpack_from)
561
562                def get_unpack_from(s):
563                    def unpack_from(data, offset=0):
564                        return s.unpack_from(data, offset)[0], offset + s.size
565                    return unpack_from
566
567                unpack_from = get_unpack_from(s)
568                pack = s.pack
569
570                prefix = "u" if unsigned else ""
571                if s.size == 1:
572                    esuffix = ""
573                bits = str(s.size * 8)
574
575                if unsigned:
576                    max_ = 2 ** (s.size * 8) - 1
577                    min_ = 0
578                else:
579                    max_ = 2 ** (s.size * 8 - 1) - 1
580                    min_ = - 2 ** (s.size * 8 - 1)
581
582                funcs["%s%s_min" % (prefix, name)] = min_
583                funcs["%s%s_max" % (prefix, name)] = max_
584                funcs["%sint%s_min" % (prefix, bits)] = min_
585                funcs["%sint%s_max" % (prefix, bits)] = max_
586
587                funcs["%s%s%s" % (prefix, name, esuffix)] = unpack
588                funcs["%sint%s%s" % (prefix, bits, esuffix)] = unpack
589                funcs["%s%s%s_from" % (prefix, name, esuffix)] = unpack_from
590                funcs["%sint%s%s_from" % (prefix, bits, esuffix)] = unpack_from
591                funcs["to_%s%s%s" % (prefix, name, esuffix)] = pack
592                funcs["to_%sint%s%s" % (prefix, bits, esuffix)] = pack
593
594    for key, func in iteritems(funcs):
595        setattr(cls, key, staticmethod(func))
596
597
598class cdata(object):
599    """C character buffer to Python numeric type conversions.
600
601    For each size/sign/endianness:
602    uint32_le(data)/to_uint32_le(num)/uint32_le_from(data, offset=0)
603    """
604
605    from struct import error
606    error = error
607
608    bitswap = b''.join(
609        chr_(sum(((val >> i) & 1) << (7 - i) for i in xrange(8)))
610        for val in xrange(256))
611
612    test_bit = staticmethod(lambda value, n: bool((value >> n) & 1))
613
614
615_fill_cdata(cdata)
616
617
618def get_size(fileobj):
619    """Returns the size of the file.
620    The position when passed in will be preserved if no error occurs.
621
622    Args:
623        fileobj (fileobj)
624    Returns:
625        int: The size of the file
626    Raises:
627        IOError
628    """
629
630    old_pos = fileobj.tell()
631    try:
632        fileobj.seek(0, 2)
633        return fileobj.tell()
634    finally:
635        fileobj.seek(old_pos, 0)
636
637
638def read_full(fileobj, size):
639    """Like fileobj.read but raises IOError if not all requested data is
640    returned.
641
642    If you want to distinguish IOError and the EOS case, better handle
643    the error yourself instead of using this.
644
645    Args:
646        fileobj (fileobj)
647        size (int): amount of bytes to read
648    Raises:
649        IOError: In case read fails or not enough data is read
650    """
651
652    if size < 0:
653        raise ValueError("size must not be negative")
654
655    data = fileobj.read(size)
656    if len(data) != size:
657        raise IOError
658    return data
659
660
661def seek_end(fileobj, offset):
662    """Like fileobj.seek(-offset, 2), but will not try to go beyond the start
663
664    Needed since file objects from BytesIO will not raise IOError and
665    file objects from open() will raise IOError if going to a negative offset.
666    To make things easier for custom implementations, instead of allowing
667    both behaviors, we just don't do it.
668
669    Args:
670        fileobj (fileobj)
671        offset (int): how many bytes away from the end backwards to seek to
672
673    Raises:
674        IOError
675    """
676
677    if offset < 0:
678        raise ValueError
679
680    if get_size(fileobj) < offset:
681        fileobj.seek(0, 0)
682    else:
683        fileobj.seek(-offset, 2)
684
685
686def mmap_move(fileobj, dest, src, count):
687    """Mmaps the file object if possible and moves 'count' data
688    from 'src' to 'dest'. All data has to be inside the file size
689    (enlarging the file through this function isn't possible)
690
691    Will adjust the file offset.
692
693    Args:
694        fileobj (fileobj)
695        dest (int): The destination offset
696        src (int): The source offset
697        count (int) The amount of data to move
698    Raises:
699        mmap.error: In case move failed
700        IOError: In case an operation on the fileobj fails
701        ValueError: In case invalid parameters were given
702    """
703
704    assert mmap is not None, "no mmap support"
705
706    if dest < 0 or src < 0 or count < 0:
707        raise ValueError("Invalid parameters")
708
709    try:
710        fileno = fileobj.fileno()
711    except (AttributeError, IOError):
712        raise mmap.error(
713            "File object does not expose/support a file descriptor")
714
715    fileobj.seek(0, 2)
716    filesize = fileobj.tell()
717    length = max(dest, src) + count
718
719    if length > filesize:
720        raise ValueError("Not in file size boundary")
721
722    offset = ((min(dest, src) // mmap.ALLOCATIONGRANULARITY) *
723              mmap.ALLOCATIONGRANULARITY)
724    assert dest >= offset
725    assert src >= offset
726    assert offset % mmap.ALLOCATIONGRANULARITY == 0
727
728    # Windows doesn't handle empty mappings, add a fast path here instead
729    if count == 0:
730        return
731
732    # fast path
733    if src == dest:
734        return
735
736    fileobj.flush()
737    file_map = mmap.mmap(fileno, length - offset, offset=offset)
738    try:
739        file_map.move(dest - offset, src - offset, count)
740    finally:
741        file_map.close()
742
743
744def resize_file(fobj, diff, BUFFER_SIZE=2 ** 16):
745    """Resize a file by `diff`.
746
747    New space will be filled with zeros.
748
749    Args:
750        fobj (fileobj)
751        diff (int): amount of size to change
752    Raises:
753        IOError
754    """
755
756    fobj.seek(0, 2)
757    filesize = fobj.tell()
758
759    if diff < 0:
760        if filesize + diff < 0:
761            raise ValueError
762        # truncate flushes internally
763        fobj.truncate(filesize + diff)
764    elif diff > 0:
765        try:
766            while diff:
767                addsize = min(BUFFER_SIZE, diff)
768                fobj.write(b"\x00" * addsize)
769                diff -= addsize
770            fobj.flush()
771        except IOError as e:
772            if e.errno == errno.ENOSPC:
773                # To reduce the chance of corrupt files in case of missing
774                # space try to revert the file expansion back. Of course
775                # in reality every in-file-write can also fail due to COW etc.
776                # Note: IOError gets also raised in flush() due to buffering
777                fobj.truncate(filesize)
778            raise
779
780
781def fallback_move(fobj, dest, src, count, BUFFER_SIZE=2 ** 16):
782    """Moves data around using read()/write().
783
784    Args:
785        fileobj (fileobj)
786        dest (int): The destination offset
787        src (int): The source offset
788        count (int) The amount of data to move
789    Raises:
790        IOError: In case an operation on the fileobj fails
791        ValueError: In case invalid parameters were given
792    """
793
794    if dest < 0 or src < 0 or count < 0:
795        raise ValueError
796
797    fobj.seek(0, 2)
798    filesize = fobj.tell()
799
800    if max(dest, src) + count > filesize:
801        raise ValueError("area outside of file")
802
803    if src > dest:
804        moved = 0
805        while count - moved:
806            this_move = min(BUFFER_SIZE, count - moved)
807            fobj.seek(src + moved)
808            buf = fobj.read(this_move)
809            fobj.seek(dest + moved)
810            fobj.write(buf)
811            moved += this_move
812        fobj.flush()
813    else:
814        while count:
815            this_move = min(BUFFER_SIZE, count)
816            fobj.seek(src + count - this_move)
817            buf = fobj.read(this_move)
818            fobj.seek(count + dest - this_move)
819            fobj.write(buf)
820            count -= this_move
821        fobj.flush()
822
823
824def insert_bytes(fobj, size, offset, BUFFER_SIZE=2 ** 16):
825    """Insert size bytes of empty space starting at offset.
826
827    fobj must be an open file object, open rb+ or
828    equivalent. Mutagen tries to use mmap to resize the file, but
829    falls back to a significantly slower method if mmap fails.
830
831    Args:
832        fobj (fileobj)
833        size (int): The amount of space to insert
834        offset (int): The offset at which to insert the space
835    Raises:
836        IOError
837    """
838
839    if size < 0 or offset < 0:
840        raise ValueError
841
842    fobj.seek(0, 2)
843    filesize = fobj.tell()
844    movesize = filesize - offset
845
846    if movesize < 0:
847        raise ValueError
848
849    resize_file(fobj, size, BUFFER_SIZE)
850
851    if mmap is not None:
852        try:
853            mmap_move(fobj, offset + size, offset, movesize)
854        except mmap.error:
855            fallback_move(fobj, offset + size, offset, movesize, BUFFER_SIZE)
856    else:
857        fallback_move(fobj, offset + size, offset, movesize, BUFFER_SIZE)
858
859
860def delete_bytes(fobj, size, offset, BUFFER_SIZE=2 ** 16):
861    """Delete size bytes of empty space starting at offset.
862
863    fobj must be an open file object, open rb+ or
864    equivalent. Mutagen tries to use mmap to resize the file, but
865    falls back to a significantly slower method if mmap fails.
866
867    Args:
868        fobj (fileobj)
869        size (int): The amount of space to delete
870        offset (int): The start of the space to delete
871    Raises:
872        IOError
873    """
874
875    if size < 0 or offset < 0:
876        raise ValueError
877
878    fobj.seek(0, 2)
879    filesize = fobj.tell()
880    movesize = filesize - offset - size
881
882    if movesize < 0:
883        raise ValueError
884
885    if mmap is not None:
886        try:
887            mmap_move(fobj, offset, offset + size, movesize)
888        except mmap.error:
889            fallback_move(fobj, offset, offset + size, movesize, BUFFER_SIZE)
890    else:
891        fallback_move(fobj, offset, offset + size, movesize, BUFFER_SIZE)
892
893    resize_file(fobj, -size, BUFFER_SIZE)
894
895
896def resize_bytes(fobj, old_size, new_size, offset):
897    """Resize an area in a file adding and deleting at the end of it.
898    Does nothing if no resizing is needed.
899
900    Args:
901        fobj (fileobj)
902        old_size (int): The area starting at offset
903        new_size (int): The new size of the area
904        offset (int): The start of the area
905    Raises:
906        IOError
907    """
908
909    if new_size < old_size:
910        delete_size = old_size - new_size
911        delete_at = offset + new_size
912        delete_bytes(fobj, delete_size, delete_at)
913    elif new_size > old_size:
914        insert_size = new_size - old_size
915        insert_at = offset + old_size
916        insert_bytes(fobj, insert_size, insert_at)
917
918
919def dict_match(d, key, default=None):
920    """Like __getitem__ but works as if the keys() are all filename patterns.
921    Returns the value of any dict key that matches the passed key.
922
923    Args:
924        d (dict): A dict with filename patterns as keys
925        key (str): A key potentially matching any of the keys
926        default (object): The object to return if no pattern matched the
927            passed in key
928    Returns:
929        object: The dict value where the dict key matched the passed in key.
930            Or default if there was no match.
931    """
932
933    if key in d and "[" not in key:
934        return d[key]
935    else:
936        for pattern, value in iteritems(d):
937            if fnmatchcase(key, pattern):
938                return value
939    return default
940
941
942def encode_endian(text, encoding, errors="strict", le=True):
943    """Like text.encode(encoding) but always returns little endian/big endian
944    BOMs instead of the system one.
945
946    Args:
947        text (text)
948        encoding (str)
949        errors (str)
950        le (boolean): if little endian
951    Returns:
952        bytes
953    Raises:
954        UnicodeEncodeError
955        LookupError
956    """
957
958    encoding = codecs.lookup(encoding).name
959
960    if encoding == "utf-16":
961        if le:
962            return codecs.BOM_UTF16_LE + text.encode("utf-16-le", errors)
963        else:
964            return codecs.BOM_UTF16_BE + text.encode("utf-16-be", errors)
965    elif encoding == "utf-32":
966        if le:
967            return codecs.BOM_UTF32_LE + text.encode("utf-32-le", errors)
968        else:
969            return codecs.BOM_UTF32_BE + text.encode("utf-32-be", errors)
970    else:
971        return text.encode(encoding, errors)
972
973
974def decode_terminated(data, encoding, strict=True):
975    """Returns the decoded data until the first NULL terminator
976    and all data after it.
977
978    Args:
979        data (bytes): data to decode
980        encoding (str): The codec to use
981        strict (bool): If True will raise ValueError in case no NULL is found
982            but the available data decoded successfully.
983    Returns:
984        Tuple[`text`, `bytes`]: A tuple containing the decoded text and the
985            remaining data after the found NULL termination.
986
987    Raises:
988        UnicodeError: In case the data can't be decoded.
989        LookupError:In case the encoding is not found.
990        ValueError: In case the data isn't null terminated (even if it is
991            encoded correctly) except if strict is False, then the decoded
992            string will be returned anyway.
993    """
994
995    codec_info = codecs.lookup(encoding)
996
997    # normalize encoding name so we can compare by name
998    encoding = codec_info.name
999
1000    # fast path
1001    if encoding in ("utf-8", "iso8859-1"):
1002        index = data.find(b"\x00")
1003        if index == -1:
1004            # make sure we raise UnicodeError first, like in the slow path
1005            res = data.decode(encoding), b""
1006            if strict:
1007                raise ValueError("not null terminated")
1008            else:
1009                return res
1010        return data[:index].decode(encoding), data[index + 1:]
1011
1012    # slow path
1013    decoder = codec_info.incrementaldecoder()
1014    r = []
1015    for i, b in enumerate(iterbytes(data)):
1016        c = decoder.decode(b)
1017        if c == u"\x00":
1018            return u"".join(r), data[i + 1:]
1019        r.append(c)
1020    else:
1021        # make sure the decoder is finished
1022        r.append(decoder.decode(b"", True))
1023        if strict:
1024            raise ValueError("not null terminated")
1025        return u"".join(r), b""
1026
1027
1028class BitReaderError(Exception):
1029    pass
1030
1031
1032class BitReader(object):
1033
1034    def __init__(self, fileobj):
1035        self._fileobj = fileobj
1036        self._buffer = 0
1037        self._bits = 0
1038        self._pos = fileobj.tell()
1039
1040    def bits(self, count):
1041        """Reads `count` bits and returns an uint, MSB read first.
1042
1043        May raise BitReaderError if not enough data could be read or
1044        IOError by the underlying file object.
1045        """
1046
1047        if count < 0:
1048            raise ValueError
1049
1050        if count > self._bits:
1051            n_bytes = (count - self._bits + 7) // 8
1052            data = self._fileobj.read(n_bytes)
1053            if len(data) != n_bytes:
1054                raise BitReaderError("not enough data")
1055            for b in bytearray(data):
1056                self._buffer = (self._buffer << 8) | b
1057            self._bits += n_bytes * 8
1058
1059        self._bits -= count
1060        value = self._buffer >> self._bits
1061        self._buffer &= (1 << self._bits) - 1
1062        assert self._bits < 8
1063        return value
1064
1065    def bytes(self, count):
1066        """Returns a bytearray of length `count`. Works unaligned."""
1067
1068        if count < 0:
1069            raise ValueError
1070
1071        # fast path
1072        if self._bits == 0:
1073            data = self._fileobj.read(count)
1074            if len(data) != count:
1075                raise BitReaderError("not enough data")
1076            return data
1077
1078        return bytes(bytearray(self.bits(8) for _ in xrange(count)))
1079
1080    def skip(self, count):
1081        """Skip `count` bits.
1082
1083        Might raise BitReaderError if there wasn't enough data to skip,
1084        but might also fail on the next bits() instead.
1085        """
1086
1087        if count < 0:
1088            raise ValueError
1089
1090        if count <= self._bits:
1091            self.bits(count)
1092        else:
1093            count -= self.align()
1094            n_bytes = count // 8
1095            self._fileobj.seek(n_bytes, 1)
1096            count -= n_bytes * 8
1097            self.bits(count)
1098
1099    def get_position(self):
1100        """Returns the amount of bits read or skipped so far"""
1101
1102        return (self._fileobj.tell() - self._pos) * 8 - self._bits
1103
1104    def align(self):
1105        """Align to the next byte, returns the amount of bits skipped"""
1106
1107        bits = self._bits
1108        self._buffer = 0
1109        self._bits = 0
1110        return bits
1111
1112    def is_aligned(self):
1113        """If we are currently aligned to bytes and nothing is buffered"""
1114
1115        return self._bits == 0
1116