1"""Interface to the libbzip2 compression library.
2
3This module provides a file interface, classes for incremental
4(de)compression, and functions for one-shot (de)compression.
5"""
6
7__all__ = ["BZ2File", "BZ2Compressor", "BZ2Decompressor",
8           "open", "compress", "decompress"]
9
10__author__ = "Nadeem Vawda <nadeem.vawda@gmail.com>"
11
12from builtins import open as _builtin_open
13import io
14import os
15import warnings
16import _compression
17from threading import RLock
18
19from _bz2 import BZ2Compressor, BZ2Decompressor
20
21
22_MODE_CLOSED   = 0
23_MODE_READ     = 1
24# Value 2 no longer used
25_MODE_WRITE    = 3
26
27_sentinel = object()
28
29
30class BZ2File(_compression.BaseStream):
31
32    """A file object providing transparent bzip2 (de)compression.
33
34    A BZ2File can act as a wrapper for an existing file object, or refer
35    directly to a named file on disk.
36
37    Note that BZ2File provides a *binary* file interface - data read is
38    returned as bytes, and data to be written should be given as bytes.
39    """
40
41    def __init__(self, filename, mode="r", buffering=_sentinel, compresslevel=9):
42        """Open a bzip2-compressed file.
43
44        If filename is a str, bytes, or PathLike object, it gives the
45        name of the file to be opened. Otherwise, it should be a file
46        object, which will be used to read or write the compressed data.
47
48        mode can be 'r' for reading (default), 'w' for (over)writing,
49        'x' for creating exclusively, or 'a' for appending. These can
50        equivalently be given as 'rb', 'wb', 'xb', and 'ab'.
51
52        buffering is ignored since Python 3.0. Its use is deprecated.
53
54        If mode is 'w', 'x' or 'a', compresslevel can be a number between 1
55        and 9 specifying the level of compression: 1 produces the least
56        compression, and 9 (default) produces the most compression.
57
58        If mode is 'r', the input file may be the concatenation of
59        multiple compressed streams.
60        """
61        # This lock must be recursive, so that BufferedIOBase's
62        # writelines() does not deadlock.
63        self._lock = RLock()
64        self._fp = None
65        self._closefp = False
66        self._mode = _MODE_CLOSED
67
68        if buffering is not _sentinel:
69            warnings.warn("Use of 'buffering' argument is deprecated and ignored "
70                          "since Python 3.0.",
71                          DeprecationWarning,
72                          stacklevel=2)
73
74        if not (1 <= compresslevel <= 9):
75            raise ValueError("compresslevel must be between 1 and 9")
76
77        if mode in ("", "r", "rb"):
78            mode = "rb"
79            mode_code = _MODE_READ
80        elif mode in ("w", "wb"):
81            mode = "wb"
82            mode_code = _MODE_WRITE
83            self._compressor = BZ2Compressor(compresslevel)
84        elif mode in ("x", "xb"):
85            mode = "xb"
86            mode_code = _MODE_WRITE
87            self._compressor = BZ2Compressor(compresslevel)
88        elif mode in ("a", "ab"):
89            mode = "ab"
90            mode_code = _MODE_WRITE
91            self._compressor = BZ2Compressor(compresslevel)
92        else:
93            raise ValueError("Invalid mode: %r" % (mode,))
94
95        if isinstance(filename, (str, bytes, os.PathLike)):
96            self._fp = _builtin_open(filename, mode)
97            self._closefp = True
98            self._mode = mode_code
99        elif hasattr(filename, "read") or hasattr(filename, "write"):
100            self._fp = filename
101            self._mode = mode_code
102        else:
103            raise TypeError("filename must be a str, bytes, file or PathLike object")
104
105        if self._mode == _MODE_READ:
106            raw = _compression.DecompressReader(self._fp,
107                BZ2Decompressor, trailing_error=OSError)
108            self._buffer = io.BufferedReader(raw)
109        else:
110            self._pos = 0
111
112    def close(self):
113        """Flush and close the file.
114
115        May be called more than once without error. Once the file is
116        closed, any other operation on it will raise a ValueError.
117        """
118        with self._lock:
119            if self._mode == _MODE_CLOSED:
120                return
121            try:
122                if self._mode == _MODE_READ:
123                    self._buffer.close()
124                elif self._mode == _MODE_WRITE:
125                    self._fp.write(self._compressor.flush())
126                    self._compressor = None
127            finally:
128                try:
129                    if self._closefp:
130                        self._fp.close()
131                finally:
132                    self._fp = None
133                    self._closefp = False
134                    self._mode = _MODE_CLOSED
135                    self._buffer = None
136
137    @property
138    def closed(self):
139        """True if this file is closed."""
140        return self._mode == _MODE_CLOSED
141
142    def fileno(self):
143        """Return the file descriptor for the underlying file."""
144        self._check_not_closed()
145        return self._fp.fileno()
146
147    def seekable(self):
148        """Return whether the file supports seeking."""
149        return self.readable() and self._buffer.seekable()
150
151    def readable(self):
152        """Return whether the file was opened for reading."""
153        self._check_not_closed()
154        return self._mode == _MODE_READ
155
156    def writable(self):
157        """Return whether the file was opened for writing."""
158        self._check_not_closed()
159        return self._mode == _MODE_WRITE
160
161    def peek(self, n=0):
162        """Return buffered data without advancing the file position.
163
164        Always returns at least one byte of data, unless at EOF.
165        The exact number of bytes returned is unspecified.
166        """
167        with self._lock:
168            self._check_can_read()
169            # Relies on the undocumented fact that BufferedReader.peek()
170            # always returns at least one byte (except at EOF), independent
171            # of the value of n
172            return self._buffer.peek(n)
173
174    def read(self, size=-1):
175        """Read up to size uncompressed bytes from the file.
176
177        If size is negative or omitted, read until EOF is reached.
178        Returns b'' if the file is already at EOF.
179        """
180        with self._lock:
181            self._check_can_read()
182            return self._buffer.read(size)
183
184    def read1(self, size=-1):
185        """Read up to size uncompressed bytes, while trying to avoid
186        making multiple reads from the underlying stream. Reads up to a
187        buffer's worth of data if size is negative.
188
189        Returns b'' if the file is at EOF.
190        """
191        with self._lock:
192            self._check_can_read()
193            if size < 0:
194                size = io.DEFAULT_BUFFER_SIZE
195            return self._buffer.read1(size)
196
197    def readinto(self, b):
198        """Read bytes into b.
199
200        Returns the number of bytes read (0 for EOF).
201        """
202        with self._lock:
203            self._check_can_read()
204            return self._buffer.readinto(b)
205
206    def readline(self, size=-1):
207        """Read a line of uncompressed bytes from the file.
208
209        The terminating newline (if present) is retained. If size is
210        non-negative, no more than size bytes will be read (in which
211        case the line may be incomplete). Returns b'' if already at EOF.
212        """
213        if not isinstance(size, int):
214            if not hasattr(size, "__index__"):
215                raise TypeError("Integer argument expected")
216            size = size.__index__()
217        with self._lock:
218            self._check_can_read()
219            return self._buffer.readline(size)
220
221    def readlines(self, size=-1):
222        """Read a list of lines of uncompressed bytes from the file.
223
224        size can be specified to control the number of lines read: no
225        further lines will be read once the total size of the lines read
226        so far equals or exceeds size.
227        """
228        if not isinstance(size, int):
229            if not hasattr(size, "__index__"):
230                raise TypeError("Integer argument expected")
231            size = size.__index__()
232        with self._lock:
233            self._check_can_read()
234            return self._buffer.readlines(size)
235
236    def write(self, data):
237        """Write a byte string to the file.
238
239        Returns the number of uncompressed bytes written, which is
240        always len(data). Note that due to buffering, the file on disk
241        may not reflect the data written until close() is called.
242        """
243        with self._lock:
244            self._check_can_write()
245            compressed = self._compressor.compress(data)
246            self._fp.write(compressed)
247            self._pos += len(data)
248            return len(data)
249
250    def writelines(self, seq):
251        """Write a sequence of byte strings to the file.
252
253        Returns the number of uncompressed bytes written.
254        seq can be any iterable yielding byte strings.
255
256        Line separators are not added between the written byte strings.
257        """
258        with self._lock:
259            return _compression.BaseStream.writelines(self, seq)
260
261    def seek(self, offset, whence=io.SEEK_SET):
262        """Change the file position.
263
264        The new position is specified by offset, relative to the
265        position indicated by whence. Values for whence are:
266
267            0: start of stream (default); offset must not be negative
268            1: current stream position
269            2: end of stream; offset must not be positive
270
271        Returns the new file position.
272
273        Note that seeking is emulated, so depending on the parameters,
274        this operation may be extremely slow.
275        """
276        with self._lock:
277            self._check_can_seek()
278            return self._buffer.seek(offset, whence)
279
280    def tell(self):
281        """Return the current file position."""
282        with self._lock:
283            self._check_not_closed()
284            if self._mode == _MODE_READ:
285                return self._buffer.tell()
286            return self._pos
287
288
289def open(filename, mode="rb", compresslevel=9,
290         encoding=None, errors=None, newline=None):
291    """Open a bzip2-compressed file in binary or text mode.
292
293    The filename argument can be an actual filename (a str, bytes, or
294    PathLike object), or an existing file object to read from or write
295    to.
296
297    The mode argument can be "r", "rb", "w", "wb", "x", "xb", "a" or
298    "ab" for binary mode, or "rt", "wt", "xt" or "at" for text mode.
299    The default mode is "rb", and the default compresslevel is 9.
300
301    For binary mode, this function is equivalent to the BZ2File
302    constructor: BZ2File(filename, mode, compresslevel). In this case,
303    the encoding, errors and newline arguments must not be provided.
304
305    For text mode, a BZ2File object is created, and wrapped in an
306    io.TextIOWrapper instance with the specified encoding, error
307    handling behavior, and line ending(s).
308
309    """
310    if "t" in mode:
311        if "b" in mode:
312            raise ValueError("Invalid mode: %r" % (mode,))
313    else:
314        if encoding is not None:
315            raise ValueError("Argument 'encoding' not supported in binary mode")
316        if errors is not None:
317            raise ValueError("Argument 'errors' not supported in binary mode")
318        if newline is not None:
319            raise ValueError("Argument 'newline' not supported in binary mode")
320
321    bz_mode = mode.replace("t", "")
322    binary_file = BZ2File(filename, bz_mode, compresslevel=compresslevel)
323
324    if "t" in mode:
325        return io.TextIOWrapper(binary_file, encoding, errors, newline)
326    else:
327        return binary_file
328
329
330def compress(data, compresslevel=9):
331    """Compress a block of data.
332
333    compresslevel, if given, must be a number between 1 and 9.
334
335    For incremental compression, use a BZ2Compressor object instead.
336    """
337    comp = BZ2Compressor(compresslevel)
338    return comp.compress(data) + comp.flush()
339
340
341def decompress(data):
342    """Decompress a block of data.
343
344    For incremental decompression, use a BZ2Decompressor object instead.
345    """
346    results = []
347    while data:
348        decomp = BZ2Decompressor()
349        try:
350            res = decomp.decompress(data)
351        except OSError:
352            if results:
353                break  # Leftover data is not a valid bzip2 stream; ignore it.
354            else:
355                raise  # Error on the first iteration; bail out.
356        results.append(res)
357        if not decomp.eof:
358            raise ValueError("Compressed data ended before the "
359                             "end-of-stream marker was reached")
360        data = decomp.unused_data
361    return b"".join(results)
362