1 /*
2 
3 python-bz2 - python bz2 library interface
4 
5 Copyright (c) 2002  Gustavo Niemeyer <niemeyer@conectiva.com>
6 Copyright (c) 2002  Python Software Foundation; All Rights Reserved
7 
8 */
9 
10 #include "Python.h"
11 #include <stdio.h>
12 #include <bzlib.h>
13 #include "structmember.h"
14 
15 #ifdef WITH_THREAD
16 #include "pythread.h"
17 #endif
18 
19 static char __author__[] =
20 "The bz2 python module was written by:\n\
21 \n\
22     Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23 ";
24 
25 /* Our very own off_t-like type, 64-bit if possible */
26 /* copied from Objects/fileobject.c */
27 #if !defined(HAVE_LARGEFILE_SUPPORT)
28 typedef off_t Py_off_t;
29 #elif SIZEOF_OFF_T >= 8
30 typedef off_t Py_off_t;
31 #elif SIZEOF_FPOS_T >= 8
32 typedef fpos_t Py_off_t;
33 #else
34 #error "Large file support, but neither off_t nor fpos_t is large enough."
35 #endif
36 
37 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
38 
39 #define MODE_CLOSED   0
40 #define MODE_READ     1
41 #define MODE_READ_EOF 2
42 #define MODE_WRITE    3
43 
44 
45 #ifndef BZ_CONFIG_ERROR
46 
47 #define BZ2_bzRead bzRead
48 #define BZ2_bzReadOpen bzReadOpen
49 #define BZ2_bzReadClose bzReadClose
50 #define BZ2_bzWrite bzWrite
51 #define BZ2_bzWriteOpen bzWriteOpen
52 #define BZ2_bzWriteClose bzWriteClose
53 #define BZ2_bzCompress bzCompress
54 #define BZ2_bzCompressInit bzCompressInit
55 #define BZ2_bzCompressEnd bzCompressEnd
56 #define BZ2_bzDecompress bzDecompress
57 #define BZ2_bzDecompressInit bzDecompressInit
58 #define BZ2_bzDecompressEnd bzDecompressEnd
59 
60 #endif /* ! BZ_CONFIG_ERROR */
61 
62 
63 #ifdef WITH_THREAD
64 #define ACQUIRE_LOCK(obj) do { \
65     if (!PyThread_acquire_lock(obj->lock, 0)) { \
66         Py_BEGIN_ALLOW_THREADS \
67         PyThread_acquire_lock(obj->lock, 1); \
68         Py_END_ALLOW_THREADS \
69     } } while(0)
70 #define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
71 #else
72 #define ACQUIRE_LOCK(obj)
73 #define RELEASE_LOCK(obj)
74 #endif
75 
76 #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
77 
78 /* Bits in f_newlinetypes */
79 #define NEWLINE_UNKNOWN 0       /* No newline seen, yet */
80 #define NEWLINE_CR 1            /* \r newline seen */
81 #define NEWLINE_LF 2            /* \n newline seen */
82 #define NEWLINE_CRLF 4          /* \r\n newline seen */
83 
84 /* ===================================================================== */
85 /* Structure definitions. */
86 
87 typedef struct {
88     PyObject_HEAD
89     PyObject *file;
90 
91     char* f_buf;                /* Allocated readahead buffer */
92     char* f_bufend;             /* Points after last occupied position */
93     char* f_bufptr;             /* Current buffer position */
94 
95     int f_softspace;            /* Flag used by 'print' command */
96 
97     int f_univ_newline;         /* Handle any newline convention */
98     int f_newlinetypes;         /* Types of newlines seen */
99     int f_skipnextlf;           /* Skip next \n */
100 
101     BZFILE *fp;
102     int mode;
103     Py_off_t pos;
104     Py_off_t size;
105 #ifdef WITH_THREAD
106     PyThread_type_lock lock;
107 #endif
108 } BZ2FileObject;
109 
110 typedef struct {
111     PyObject_HEAD
112     bz_stream bzs;
113     int running;
114 #ifdef WITH_THREAD
115     PyThread_type_lock lock;
116 #endif
117 } BZ2CompObject;
118 
119 typedef struct {
120     PyObject_HEAD
121     bz_stream bzs;
122     int running;
123     PyObject *unused_data;
124 #ifdef WITH_THREAD
125     PyThread_type_lock lock;
126 #endif
127 } BZ2DecompObject;
128 
129 /* ===================================================================== */
130 /* Utility functions. */
131 
132 /* Refuse regular I/O if there's data in the iteration-buffer.
133  * Mixing them would cause data to arrive out of order, as the read*
134  * methods don't use the iteration buffer. */
135 static int
check_iterbuffered(BZ2FileObject * f)136 check_iterbuffered(BZ2FileObject *f)
137 {
138     if (f->f_buf != NULL &&
139         (f->f_bufend - f->f_bufptr) > 0 &&
140         f->f_buf[0] != '\0') {
141         PyErr_SetString(PyExc_ValueError,
142             "Mixing iteration and read methods would lose data");
143         return -1;
144     }
145     return 0;
146 }
147 
148 static int
Util_CatchBZ2Error(int bzerror)149 Util_CatchBZ2Error(int bzerror)
150 {
151     int ret = 0;
152     switch(bzerror) {
153         case BZ_OK:
154         case BZ_STREAM_END:
155             break;
156 
157 #ifdef BZ_CONFIG_ERROR
158         case BZ_CONFIG_ERROR:
159             PyErr_SetString(PyExc_SystemError,
160                             "the bz2 library was not compiled "
161                             "correctly");
162             ret = 1;
163             break;
164 #endif
165 
166         case BZ_PARAM_ERROR:
167             PyErr_SetString(PyExc_ValueError,
168                             "the bz2 library has received wrong "
169                             "parameters");
170             ret = 1;
171             break;
172 
173         case BZ_MEM_ERROR:
174             PyErr_NoMemory();
175             ret = 1;
176             break;
177 
178         case BZ_DATA_ERROR:
179         case BZ_DATA_ERROR_MAGIC:
180             PyErr_SetString(PyExc_IOError, "invalid data stream");
181             ret = 1;
182             break;
183 
184         case BZ_IO_ERROR:
185             PyErr_SetString(PyExc_IOError, "unknown IO error");
186             ret = 1;
187             break;
188 
189         case BZ_UNEXPECTED_EOF:
190             PyErr_SetString(PyExc_EOFError,
191                             "compressed file ended before the "
192                             "logical end-of-stream was detected");
193             ret = 1;
194             break;
195 
196         case BZ_SEQUENCE_ERROR:
197             PyErr_SetString(PyExc_RuntimeError,
198                             "wrong sequence of bz2 library "
199                             "commands used");
200             ret = 1;
201             break;
202     }
203     return ret;
204 }
205 
206 #if BUFSIZ < 8192
207 #define SMALLCHUNK 8192
208 #else
209 #define SMALLCHUNK BUFSIZ
210 #endif
211 
212 /* This is a hacked version of Python's fileobject.c:new_buffersize(). */
213 static size_t
Util_NewBufferSize(size_t currentsize)214 Util_NewBufferSize(size_t currentsize)
215 {
216     /* Expand the buffer by an amount proportional to the current size,
217        giving us amortized linear-time behavior. Use a less-than-double
218        growth factor to avoid excessive allocation. */
219     return currentsize + (currentsize >> 3) + 6;
220 }
221 
222 static int
Util_GrowBuffer(PyObject ** buf)223 Util_GrowBuffer(PyObject **buf)
224 {
225     size_t size = PyString_GET_SIZE(*buf);
226     size_t new_size = Util_NewBufferSize(size);
227     if (new_size > size) {
228         return _PyString_Resize(buf, new_size);
229     } else {  /* overflow */
230         PyErr_SetString(PyExc_OverflowError,
231                         "Unable to allocate buffer - output too large");
232         return -1;
233     }
234 }
235 
236 /* This is a hacked version of Python's fileobject.c:get_line(). */
237 static PyObject *
Util_GetLine(BZ2FileObject * f,int n)238 Util_GetLine(BZ2FileObject *f, int n)
239 {
240     char c;
241     char *buf, *end;
242     size_t total_v_size;        /* total # of slots in buffer */
243     size_t used_v_size;         /* # used slots in buffer */
244     size_t increment;       /* amount to increment the buffer */
245     PyObject *v;
246     int bzerror;
247     int bytes_read;
248     int newlinetypes = f->f_newlinetypes;
249     int skipnextlf = f->f_skipnextlf;
250     int univ_newline = f->f_univ_newline;
251 
252     total_v_size = n > 0 ? n : 100;
253     v = PyString_FromStringAndSize((char *)NULL, total_v_size);
254     if (v == NULL)
255         return NULL;
256 
257     buf = BUF(v);
258     end = buf + total_v_size;
259 
260     for (;;) {
261         Py_BEGIN_ALLOW_THREADS
262         while (buf != end) {
263             bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
264             f->pos++;
265             if (bytes_read == 0) break;
266             if (univ_newline) {
267                 if (skipnextlf) {
268                     skipnextlf = 0;
269                     if (c == '\n') {
270                         /* Seeing a \n here with skipnextlf true means we
271                          * saw a \r before.
272                          */
273                         newlinetypes |= NEWLINE_CRLF;
274                         if (bzerror != BZ_OK) break;
275                         bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
276                         f->pos++;
277                         if (bytes_read == 0) break;
278                     } else {
279                         newlinetypes |= NEWLINE_CR;
280                     }
281                 }
282                 if (c == '\r') {
283                     skipnextlf = 1;
284                     c = '\n';
285                 } else if (c == '\n')
286                     newlinetypes |= NEWLINE_LF;
287             }
288             *buf++ = c;
289             if (bzerror != BZ_OK || c == '\n') break;
290         }
291         if (univ_newline && bzerror == BZ_STREAM_END && skipnextlf)
292             newlinetypes |= NEWLINE_CR;
293         Py_END_ALLOW_THREADS
294         f->f_newlinetypes = newlinetypes;
295         f->f_skipnextlf = skipnextlf;
296         if (bzerror == BZ_STREAM_END) {
297             f->size = f->pos;
298             f->mode = MODE_READ_EOF;
299             break;
300         } else if (bzerror != BZ_OK) {
301             Util_CatchBZ2Error(bzerror);
302             Py_DECREF(v);
303             return NULL;
304         }
305         if (c == '\n')
306             break;
307         /* Must be because buf == end */
308         if (n > 0)
309             break;
310         used_v_size = total_v_size;
311         increment = total_v_size >> 2; /* mild exponential growth */
312         total_v_size += increment;
313         if (total_v_size > INT_MAX) {
314             PyErr_SetString(PyExc_OverflowError,
315                 "line is longer than a Python string can hold");
316             Py_DECREF(v);
317             return NULL;
318         }
319         if (_PyString_Resize(&v, total_v_size) < 0)
320             return NULL;
321         buf = BUF(v) + used_v_size;
322         end = BUF(v) + total_v_size;
323     }
324 
325     used_v_size = buf - BUF(v);
326     if (used_v_size != total_v_size)
327         _PyString_Resize(&v, used_v_size);
328     return v;
329 }
330 
331 /* This is a hacked version of Python's
332  * fileobject.c:Py_UniversalNewlineFread(). */
333 size_t
Util_UnivNewlineRead(int * bzerror,BZFILE * stream,char * buf,size_t n,BZ2FileObject * f)334 Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
335                      char* buf, size_t n, BZ2FileObject *f)
336 {
337     char *dst = buf;
338     int newlinetypes, skipnextlf;
339 
340     assert(buf != NULL);
341     assert(stream != NULL);
342 
343     if (!f->f_univ_newline)
344         return BZ2_bzRead(bzerror, stream, buf, n);
345 
346     newlinetypes = f->f_newlinetypes;
347     skipnextlf = f->f_skipnextlf;
348 
349     /* Invariant:  n is the number of bytes remaining to be filled
350      * in the buffer.
351      */
352     while (n) {
353         size_t nread;
354         int shortread;
355         char *src = dst;
356 
357         nread = BZ2_bzRead(bzerror, stream, dst, n);
358         assert(nread <= n);
359         n -= nread; /* assuming 1 byte out for each in; will adjust */
360         shortread = n != 0;             /* true iff EOF or error */
361         while (nread--) {
362             char c = *src++;
363             if (c == '\r') {
364                 /* Save as LF and set flag to skip next LF. */
365                 *dst++ = '\n';
366                 skipnextlf = 1;
367             }
368             else if (skipnextlf && c == '\n') {
369                 /* Skip LF, and remember we saw CR LF. */
370                 skipnextlf = 0;
371                 newlinetypes |= NEWLINE_CRLF;
372                 ++n;
373             }
374             else {
375                 /* Normal char to be stored in buffer.  Also
376                  * update the newlinetypes flag if either this
377                  * is an LF or the previous char was a CR.
378                  */
379                 if (c == '\n')
380                     newlinetypes |= NEWLINE_LF;
381                 else if (skipnextlf)
382                     newlinetypes |= NEWLINE_CR;
383                 *dst++ = c;
384                 skipnextlf = 0;
385             }
386         }
387         if (shortread) {
388             /* If this is EOF, update type flags. */
389             if (skipnextlf && *bzerror == BZ_STREAM_END)
390                 newlinetypes |= NEWLINE_CR;
391             break;
392         }
393     }
394     f->f_newlinetypes = newlinetypes;
395     f->f_skipnextlf = skipnextlf;
396     return dst - buf;
397 }
398 
399 /* This is a hacked version of Python's fileobject.c:drop_readahead(). */
400 static void
Util_DropReadAhead(BZ2FileObject * f)401 Util_DropReadAhead(BZ2FileObject *f)
402 {
403     if (f->f_buf != NULL) {
404         PyMem_Free(f->f_buf);
405         f->f_buf = NULL;
406     }
407 }
408 
409 /* This is a hacked version of Python's fileobject.c:readahead(). */
410 static int
Util_ReadAhead(BZ2FileObject * f,int bufsize)411 Util_ReadAhead(BZ2FileObject *f, int bufsize)
412 {
413     int chunksize;
414     int bzerror;
415 
416     if (f->f_buf != NULL) {
417         if((f->f_bufend - f->f_bufptr) >= 1)
418             return 0;
419         else
420             Util_DropReadAhead(f);
421     }
422     if (f->mode == MODE_READ_EOF) {
423         f->f_bufptr = f->f_buf;
424         f->f_bufend = f->f_buf;
425         return 0;
426     }
427     if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
428         PyErr_NoMemory();
429         return -1;
430     }
431     Py_BEGIN_ALLOW_THREADS
432     chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
433                                      bufsize, f);
434     Py_END_ALLOW_THREADS
435     f->pos += chunksize;
436     if (bzerror == BZ_STREAM_END) {
437         f->size = f->pos;
438         f->mode = MODE_READ_EOF;
439     } else if (bzerror != BZ_OK) {
440         Util_CatchBZ2Error(bzerror);
441         Util_DropReadAhead(f);
442         return -1;
443     }
444     f->f_bufptr = f->f_buf;
445     f->f_bufend = f->f_buf + chunksize;
446     return 0;
447 }
448 
449 /* This is a hacked version of Python's
450  * fileobject.c:readahead_get_line_skip(). */
451 static PyStringObject *
Util_ReadAheadGetLineSkip(BZ2FileObject * f,int skip,int bufsize)452 Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
453 {
454     PyStringObject* s;
455     char *bufptr;
456     char *buf;
457     int len;
458 
459     if (f->f_buf == NULL)
460         if (Util_ReadAhead(f, bufsize) < 0)
461             return NULL;
462 
463     len = f->f_bufend - f->f_bufptr;
464     if (len == 0)
465         return (PyStringObject *)
466             PyString_FromStringAndSize(NULL, skip);
467     bufptr = memchr(f->f_bufptr, '\n', len);
468     if (bufptr != NULL) {
469         bufptr++;                               /* Count the '\n' */
470         len = bufptr - f->f_bufptr;
471         s = (PyStringObject *)
472             PyString_FromStringAndSize(NULL, skip+len);
473         if (s == NULL)
474             return NULL;
475         memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
476         f->f_bufptr = bufptr;
477         if (bufptr == f->f_bufend)
478             Util_DropReadAhead(f);
479     } else {
480         bufptr = f->f_bufptr;
481         buf = f->f_buf;
482         f->f_buf = NULL;                /* Force new readahead buffer */
483         s = Util_ReadAheadGetLineSkip(f, skip+len,
484                                       bufsize + (bufsize>>2));
485         if (s == NULL) {
486             PyMem_Free(buf);
487             return NULL;
488         }
489         memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
490         PyMem_Free(buf);
491     }
492     return s;
493 }
494 
495 /* ===================================================================== */
496 /* Methods of BZ2File. */
497 
498 PyDoc_STRVAR(BZ2File_read__doc__,
499 "read([size]) -> string\n\
500 \n\
501 Read at most size uncompressed bytes, returned as a string. If the size\n\
502 argument is negative or omitted, read until EOF is reached.\n\
503 ");
504 
505 /* This is a hacked version of Python's fileobject.c:file_read(). */
506 static PyObject *
BZ2File_read(BZ2FileObject * self,PyObject * args)507 BZ2File_read(BZ2FileObject *self, PyObject *args)
508 {
509     long bytesrequested = -1;
510     size_t bytesread, buffersize, chunksize;
511     int bzerror;
512     PyObject *ret = NULL;
513 
514     if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
515         return NULL;
516 
517     ACQUIRE_LOCK(self);
518     switch (self->mode) {
519         case MODE_READ:
520             break;
521         case MODE_READ_EOF:
522             ret = PyString_FromString("");
523             goto cleanup;
524         case MODE_CLOSED:
525             PyErr_SetString(PyExc_ValueError,
526                             "I/O operation on closed file");
527             goto cleanup;
528         default:
529             PyErr_SetString(PyExc_IOError,
530                             "file is not ready for reading");
531             goto cleanup;
532     }
533 
534     /* refuse to mix with f.next() */
535     if (check_iterbuffered(self))
536         goto cleanup;
537 
538     if (bytesrequested < 0)
539         buffersize = Util_NewBufferSize((size_t)0);
540     else
541         buffersize = bytesrequested;
542     if (buffersize > INT_MAX) {
543         PyErr_SetString(PyExc_OverflowError,
544                         "requested number of bytes is "
545                         "more than a Python string can hold");
546         goto cleanup;
547     }
548     ret = PyString_FromStringAndSize((char *)NULL, buffersize);
549     if (ret == NULL)
550         goto cleanup;
551     bytesread = 0;
552 
553     for (;;) {
554         Py_BEGIN_ALLOW_THREADS
555         chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
556                                          BUF(ret)+bytesread,
557                                          buffersize-bytesread,
558                                          self);
559         self->pos += chunksize;
560         Py_END_ALLOW_THREADS
561         bytesread += chunksize;
562         if (bzerror == BZ_STREAM_END) {
563             self->size = self->pos;
564             self->mode = MODE_READ_EOF;
565             break;
566         } else if (bzerror != BZ_OK) {
567             Util_CatchBZ2Error(bzerror);
568             Py_DECREF(ret);
569             ret = NULL;
570             goto cleanup;
571         }
572         if (bytesrequested < 0) {
573             buffersize = Util_NewBufferSize(buffersize);
574             if (_PyString_Resize(&ret, buffersize) < 0)
575                 goto cleanup;
576         } else {
577             break;
578         }
579     }
580     if (bytesread != buffersize)
581         _PyString_Resize(&ret, bytesread);
582 
583 cleanup:
584     RELEASE_LOCK(self);
585     return ret;
586 }
587 
588 PyDoc_STRVAR(BZ2File_readline__doc__,
589 "readline([size]) -> string\n\
590 \n\
591 Return the next line from the file, as a string, retaining newline.\n\
592 A non-negative size argument will limit the maximum number of bytes to\n\
593 return (an incomplete line may be returned then). Return an empty\n\
594 string at EOF.\n\
595 ");
596 
597 static PyObject *
BZ2File_readline(BZ2FileObject * self,PyObject * args)598 BZ2File_readline(BZ2FileObject *self, PyObject *args)
599 {
600     PyObject *ret = NULL;
601     int sizehint = -1;
602 
603     if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
604         return NULL;
605 
606     ACQUIRE_LOCK(self);
607     switch (self->mode) {
608         case MODE_READ:
609             break;
610         case MODE_READ_EOF:
611             ret = PyString_FromString("");
612             goto cleanup;
613         case MODE_CLOSED:
614             PyErr_SetString(PyExc_ValueError,
615                             "I/O operation on closed file");
616             goto cleanup;
617         default:
618             PyErr_SetString(PyExc_IOError,
619                             "file is not ready for reading");
620             goto cleanup;
621     }
622 
623     /* refuse to mix with f.next() */
624     if (check_iterbuffered(self))
625         goto cleanup;
626 
627     if (sizehint == 0)
628         ret = PyString_FromString("");
629     else
630         ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
631 
632 cleanup:
633     RELEASE_LOCK(self);
634     return ret;
635 }
636 
637 PyDoc_STRVAR(BZ2File_readlines__doc__,
638 "readlines([size]) -> list\n\
639 \n\
640 Call readline() repeatedly and return a list of lines read.\n\
641 The optional size argument, if given, is an approximate bound on the\n\
642 total number of bytes in the lines returned.\n\
643 ");
644 
645 /* This is a hacked version of Python's fileobject.c:file_readlines(). */
646 static PyObject *
BZ2File_readlines(BZ2FileObject * self,PyObject * args)647 BZ2File_readlines(BZ2FileObject *self, PyObject *args)
648 {
649     long sizehint = 0;
650     PyObject *list = NULL;
651     PyObject *line;
652     char small_buffer[SMALLCHUNK];
653     char *buffer = small_buffer;
654     size_t buffersize = SMALLCHUNK;
655     PyObject *big_buffer = NULL;
656     size_t nfilled = 0;
657     size_t nread;
658     size_t totalread = 0;
659     char *p, *q, *end;
660     int err;
661     int shortread = 0;
662     int bzerror;
663 
664     if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
665         return NULL;
666 
667     ACQUIRE_LOCK(self);
668     switch (self->mode) {
669         case MODE_READ:
670             break;
671         case MODE_READ_EOF:
672             list = PyList_New(0);
673             goto cleanup;
674         case MODE_CLOSED:
675             PyErr_SetString(PyExc_ValueError,
676                             "I/O operation on closed file");
677             goto cleanup;
678         default:
679             PyErr_SetString(PyExc_IOError,
680                             "file is not ready for reading");
681             goto cleanup;
682     }
683 
684     /* refuse to mix with f.next() */
685     if (check_iterbuffered(self))
686         goto cleanup;
687 
688     if ((list = PyList_New(0)) == NULL)
689         goto cleanup;
690 
691     for (;;) {
692         Py_BEGIN_ALLOW_THREADS
693         nread = Util_UnivNewlineRead(&bzerror, self->fp,
694                                      buffer+nfilled,
695                                      buffersize-nfilled, self);
696         self->pos += nread;
697         Py_END_ALLOW_THREADS
698         if (bzerror == BZ_STREAM_END) {
699             self->size = self->pos;
700             self->mode = MODE_READ_EOF;
701             if (nread == 0) {
702                 sizehint = 0;
703                 break;
704             }
705             shortread = 1;
706         } else if (bzerror != BZ_OK) {
707             Util_CatchBZ2Error(bzerror);
708           error:
709             Py_DECREF(list);
710             list = NULL;
711             goto cleanup;
712         }
713         totalread += nread;
714         p = memchr(buffer+nfilled, '\n', nread);
715         if (!shortread && p == NULL) {
716             /* Need a larger buffer to fit this line */
717             nfilled += nread;
718             buffersize *= 2;
719             if (buffersize > INT_MAX) {
720                 PyErr_SetString(PyExc_OverflowError,
721                 "line is longer than a Python string can hold");
722                 goto error;
723             }
724             if (big_buffer == NULL) {
725                 /* Create the big buffer */
726                 big_buffer = PyString_FromStringAndSize(
727                     NULL, buffersize);
728                 if (big_buffer == NULL)
729                     goto error;
730                 buffer = PyString_AS_STRING(big_buffer);
731                 memcpy(buffer, small_buffer, nfilled);
732             }
733             else {
734                 /* Grow the big buffer */
735                 if (_PyString_Resize(&big_buffer, buffersize))
736                     goto error;
737                 buffer = PyString_AS_STRING(big_buffer);
738             }
739             continue;
740         }
741         end = buffer+nfilled+nread;
742         q = buffer;
743         while (p != NULL) {
744             /* Process complete lines */
745             p++;
746             line = PyString_FromStringAndSize(q, p-q);
747             if (line == NULL)
748                 goto error;
749             err = PyList_Append(list, line);
750             Py_DECREF(line);
751             if (err != 0)
752                 goto error;
753             q = p;
754             p = memchr(q, '\n', end-q);
755         }
756         /* Move the remaining incomplete line to the start */
757         nfilled = end-q;
758         memmove(buffer, q, nfilled);
759         if (sizehint > 0)
760             if (totalread >= (size_t)sizehint)
761                 break;
762         if (shortread) {
763             sizehint = 0;
764             break;
765         }
766     }
767     if (nfilled != 0) {
768         /* Partial last line */
769         line = PyString_FromStringAndSize(buffer, nfilled);
770         if (line == NULL)
771             goto error;
772         if (sizehint > 0) {
773             /* Need to complete the last line */
774             PyObject *rest = Util_GetLine(self, 0);
775             if (rest == NULL) {
776                 Py_DECREF(line);
777                 goto error;
778             }
779             PyString_Concat(&line, rest);
780             Py_DECREF(rest);
781             if (line == NULL)
782                 goto error;
783         }
784         err = PyList_Append(list, line);
785         Py_DECREF(line);
786         if (err != 0)
787             goto error;
788     }
789 
790   cleanup:
791     RELEASE_LOCK(self);
792     if (big_buffer) {
793         Py_DECREF(big_buffer);
794     }
795     return list;
796 }
797 
798 PyDoc_STRVAR(BZ2File_xreadlines__doc__,
799 "xreadlines() -> self\n\
800 \n\
801 For backward compatibility. BZ2File objects now include the performance\n\
802 optimizations previously implemented in the xreadlines module.\n\
803 ");
804 
805 PyDoc_STRVAR(BZ2File_write__doc__,
806 "write(data) -> None\n\
807 \n\
808 Write the 'data' string to file. Note that due to buffering, close() may\n\
809 be needed before the file on disk reflects the data written.\n\
810 ");
811 
812 /* This is a hacked version of Python's fileobject.c:file_write(). */
813 static PyObject *
BZ2File_write(BZ2FileObject * self,PyObject * args)814 BZ2File_write(BZ2FileObject *self, PyObject *args)
815 {
816     PyObject *ret = NULL;
817     Py_buffer pbuf;
818     char *buf;
819     int len;
820     int bzerror;
821 
822     if (!PyArg_ParseTuple(args, "s*:write", &pbuf))
823         return NULL;
824     buf = pbuf.buf;
825     len = pbuf.len;
826 
827     ACQUIRE_LOCK(self);
828     switch (self->mode) {
829         case MODE_WRITE:
830             break;
831 
832         case MODE_CLOSED:
833             PyErr_SetString(PyExc_ValueError,
834                             "I/O operation on closed file");
835             goto cleanup;
836 
837         default:
838             PyErr_SetString(PyExc_IOError,
839                             "file is not ready for writing");
840             goto cleanup;
841     }
842 
843     self->f_softspace = 0;
844 
845     Py_BEGIN_ALLOW_THREADS
846     BZ2_bzWrite (&bzerror, self->fp, buf, len);
847     self->pos += len;
848     Py_END_ALLOW_THREADS
849 
850     if (bzerror != BZ_OK) {
851         Util_CatchBZ2Error(bzerror);
852         goto cleanup;
853     }
854 
855     Py_INCREF(Py_None);
856     ret = Py_None;
857 
858 cleanup:
859     PyBuffer_Release(&pbuf);
860     RELEASE_LOCK(self);
861     return ret;
862 }
863 
864 PyDoc_STRVAR(BZ2File_writelines__doc__,
865 "writelines(sequence_of_strings) -> None\n\
866 \n\
867 Write the sequence of strings to the file. Note that newlines are not\n\
868 added. The sequence can be any iterable object producing strings. This is\n\
869 equivalent to calling write() for each string.\n\
870 ");
871 
872 /* This is a hacked version of Python's fileobject.c:file_writelines(). */
873 static PyObject *
BZ2File_writelines(BZ2FileObject * self,PyObject * seq)874 BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
875 {
876 #define CHUNKSIZE 1000
877     PyObject *list = NULL;
878     PyObject *iter = NULL;
879     PyObject *ret = NULL;
880     PyObject *line;
881     int i, j, index, len, islist;
882     int bzerror;
883 
884     ACQUIRE_LOCK(self);
885     switch (self->mode) {
886         case MODE_WRITE:
887             break;
888 
889         case MODE_CLOSED:
890             PyErr_SetString(PyExc_ValueError,
891                             "I/O operation on closed file");
892             goto error;
893 
894         default:
895             PyErr_SetString(PyExc_IOError,
896                             "file is not ready for writing");
897             goto error;
898     }
899 
900     islist = PyList_Check(seq);
901     if  (!islist) {
902         iter = PyObject_GetIter(seq);
903         if (iter == NULL) {
904             PyErr_SetString(PyExc_TypeError,
905                 "writelines() requires an iterable argument");
906             goto error;
907         }
908         list = PyList_New(CHUNKSIZE);
909         if (list == NULL)
910             goto error;
911     }
912 
913     /* Strategy: slurp CHUNKSIZE lines into a private list,
914        checking that they are all strings, then write that list
915        without holding the interpreter lock, then come back for more. */
916     for (index = 0; ; index += CHUNKSIZE) {
917         if (islist) {
918             Py_XDECREF(list);
919             list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
920             if (list == NULL)
921                 goto error;
922             j = PyList_GET_SIZE(list);
923         }
924         else {
925             for (j = 0; j < CHUNKSIZE; j++) {
926                 line = PyIter_Next(iter);
927                 if (line == NULL) {
928                     if (PyErr_Occurred())
929                         goto error;
930                     break;
931                 }
932                 PyList_SetItem(list, j, line);
933             }
934         }
935         if (j == 0)
936             break;
937 
938         /* Check that all entries are indeed strings. If not,
939            apply the same rules as for file.write() and
940            convert the rets to strings. This is slow, but
941            seems to be the only way since all conversion APIs
942            could potentially execute Python code. */
943         for (i = 0; i < j; i++) {
944             PyObject *v = PyList_GET_ITEM(list, i);
945             if (!PyString_Check(v)) {
946                 const char *buffer;
947                 Py_ssize_t len;
948                 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
949                     PyErr_SetString(PyExc_TypeError,
950                                     "writelines() "
951                                     "argument must be "
952                                     "a sequence of "
953                                     "strings");
954                     goto error;
955                 }
956                 line = PyString_FromStringAndSize(buffer,
957                                                   len);
958                 if (line == NULL)
959                     goto error;
960                 Py_DECREF(v);
961                 PyList_SET_ITEM(list, i, line);
962             }
963         }
964 
965         self->f_softspace = 0;
966 
967         /* Since we are releasing the global lock, the
968            following code may *not* execute Python code. */
969         Py_BEGIN_ALLOW_THREADS
970         for (i = 0; i < j; i++) {
971             line = PyList_GET_ITEM(list, i);
972             len = PyString_GET_SIZE(line);
973             BZ2_bzWrite (&bzerror, self->fp,
974                          PyString_AS_STRING(line), len);
975             if (bzerror != BZ_OK) {
976                 Py_BLOCK_THREADS
977                 Util_CatchBZ2Error(bzerror);
978                 goto error;
979             }
980         }
981         Py_END_ALLOW_THREADS
982 
983         if (j < CHUNKSIZE)
984             break;
985     }
986 
987     Py_INCREF(Py_None);
988     ret = Py_None;
989 
990   error:
991     RELEASE_LOCK(self);
992     Py_XDECREF(list);
993     Py_XDECREF(iter);
994     return ret;
995 #undef CHUNKSIZE
996 }
997 
998 PyDoc_STRVAR(BZ2File_seek__doc__,
999 "seek(offset [, whence]) -> None\n\
1000 \n\
1001 Move to new file position. Argument offset is a byte count. Optional\n\
1002 argument whence defaults to 0 (offset from start of file, offset\n\
1003 should be >= 0); other values are 1 (move relative to current position,\n\
1004 positive or negative), and 2 (move relative to end of file, usually\n\
1005 negative, although many platforms allow seeking beyond the end of a file).\n\
1006 \n\
1007 Note that seeking of bz2 files is emulated, and depending on the parameters\n\
1008 the operation may be extremely slow.\n\
1009 ");
1010 
1011 static PyObject *
BZ2File_seek(BZ2FileObject * self,PyObject * args)1012 BZ2File_seek(BZ2FileObject *self, PyObject *args)
1013 {
1014     int where = 0;
1015     PyObject *offobj;
1016     Py_off_t offset;
1017     char small_buffer[SMALLCHUNK];
1018     char *buffer = small_buffer;
1019     size_t buffersize = SMALLCHUNK;
1020     Py_off_t bytesread = 0;
1021     size_t readsize;
1022     int chunksize;
1023     int bzerror;
1024     PyObject *ret = NULL;
1025 
1026     if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
1027         return NULL;
1028 #if !defined(HAVE_LARGEFILE_SUPPORT)
1029     offset = PyInt_AsLong(offobj);
1030 #else
1031     offset = PyLong_Check(offobj) ?
1032         PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
1033 #endif
1034     if (PyErr_Occurred())
1035         return NULL;
1036 
1037     ACQUIRE_LOCK(self);
1038     Util_DropReadAhead(self);
1039     switch (self->mode) {
1040         case MODE_READ:
1041         case MODE_READ_EOF:
1042             break;
1043 
1044         case MODE_CLOSED:
1045             PyErr_SetString(PyExc_ValueError,
1046                             "I/O operation on closed file");
1047             goto cleanup;
1048 
1049         default:
1050             PyErr_SetString(PyExc_IOError,
1051                             "seek works only while reading");
1052             goto cleanup;
1053     }
1054 
1055     if (where == 2) {
1056         if (self->size == -1) {
1057             assert(self->mode != MODE_READ_EOF);
1058             for (;;) {
1059                 Py_BEGIN_ALLOW_THREADS
1060                 chunksize = Util_UnivNewlineRead(
1061                                 &bzerror, self->fp,
1062                                 buffer, buffersize,
1063                                 self);
1064                 self->pos += chunksize;
1065                 Py_END_ALLOW_THREADS
1066 
1067                 bytesread += chunksize;
1068                 if (bzerror == BZ_STREAM_END) {
1069                     break;
1070                 } else if (bzerror != BZ_OK) {
1071                     Util_CatchBZ2Error(bzerror);
1072                     goto cleanup;
1073                 }
1074             }
1075             self->mode = MODE_READ_EOF;
1076             self->size = self->pos;
1077             bytesread = 0;
1078         }
1079         offset = self->size + offset;
1080     } else if (where == 1) {
1081         offset = self->pos + offset;
1082     }
1083 
1084     /* Before getting here, offset must be the absolute position the file
1085      * pointer should be set to. */
1086 
1087     if (offset >= self->pos) {
1088         /* we can move forward */
1089         offset -= self->pos;
1090     } else {
1091         /* we cannot move back, so rewind the stream */
1092         BZ2_bzReadClose(&bzerror, self->fp);
1093         if (self->fp) {
1094             PyFile_DecUseCount((PyFileObject *)self->file);
1095             self->fp = NULL;
1096         }
1097         if (bzerror != BZ_OK) {
1098             Util_CatchBZ2Error(bzerror);
1099             goto cleanup;
1100         }
1101         ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
1102         if (!ret)
1103             goto cleanup;
1104         Py_DECREF(ret);
1105         ret = NULL;
1106         self->pos = 0;
1107         self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
1108                                   0, 0, NULL, 0);
1109         if (self->fp)
1110             PyFile_IncUseCount((PyFileObject *)self->file);
1111         if (bzerror != BZ_OK) {
1112             Util_CatchBZ2Error(bzerror);
1113             goto cleanup;
1114         }
1115         self->mode = MODE_READ;
1116     }
1117 
1118     if (offset <= 0 || self->mode == MODE_READ_EOF)
1119         goto exit;
1120 
1121     /* Before getting here, offset must be set to the number of bytes
1122      * to walk forward. */
1123     for (;;) {
1124         if (offset-bytesread > buffersize)
1125             readsize = buffersize;
1126         else
1127             /* offset might be wider that readsize, but the result
1128              * of the subtraction is bound by buffersize (see the
1129              * condition above). buffersize is 8192. */
1130             readsize = (size_t)(offset-bytesread);
1131         Py_BEGIN_ALLOW_THREADS
1132         chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1133                                          buffer, readsize, self);
1134         self->pos += chunksize;
1135         Py_END_ALLOW_THREADS
1136         bytesread += chunksize;
1137         if (bzerror == BZ_STREAM_END) {
1138             self->size = self->pos;
1139             self->mode = MODE_READ_EOF;
1140             break;
1141         } else if (bzerror != BZ_OK) {
1142             Util_CatchBZ2Error(bzerror);
1143             goto cleanup;
1144         }
1145         if (bytesread == offset)
1146             break;
1147     }
1148 
1149 exit:
1150     Py_INCREF(Py_None);
1151     ret = Py_None;
1152 
1153 cleanup:
1154     RELEASE_LOCK(self);
1155     return ret;
1156 }
1157 
1158 PyDoc_STRVAR(BZ2File_tell__doc__,
1159 "tell() -> int\n\
1160 \n\
1161 Return the current file position, an integer (may be a long integer).\n\
1162 ");
1163 
1164 static PyObject *
BZ2File_tell(BZ2FileObject * self,PyObject * args)1165 BZ2File_tell(BZ2FileObject *self, PyObject *args)
1166 {
1167     PyObject *ret = NULL;
1168 
1169     if (self->mode == MODE_CLOSED) {
1170         PyErr_SetString(PyExc_ValueError,
1171                         "I/O operation on closed file");
1172         goto cleanup;
1173     }
1174 
1175 #if !defined(HAVE_LARGEFILE_SUPPORT)
1176     ret = PyInt_FromLong(self->pos);
1177 #else
1178     ret = PyLong_FromLongLong(self->pos);
1179 #endif
1180 
1181 cleanup:
1182     return ret;
1183 }
1184 
1185 PyDoc_STRVAR(BZ2File_close__doc__,
1186 "close() -> None or (perhaps) an integer\n\
1187 \n\
1188 Close the file. Sets data attribute .closed to true. A closed file\n\
1189 cannot be used for further I/O operations. close() may be called more\n\
1190 than once without error.\n\
1191 ");
1192 
1193 static PyObject *
BZ2File_close(BZ2FileObject * self)1194 BZ2File_close(BZ2FileObject *self)
1195 {
1196     PyObject *ret = NULL;
1197     int bzerror = BZ_OK;
1198 
1199     ACQUIRE_LOCK(self);
1200     switch (self->mode) {
1201         case MODE_READ:
1202         case MODE_READ_EOF:
1203             BZ2_bzReadClose(&bzerror, self->fp);
1204             break;
1205         case MODE_WRITE:
1206             BZ2_bzWriteClose(&bzerror, self->fp,
1207                              0, NULL, NULL);
1208             break;
1209     }
1210     if (self->file) {
1211         if (self->fp)
1212             PyFile_DecUseCount((PyFileObject *)self->file);
1213         ret = PyObject_CallMethod(self->file, "close", NULL);
1214     } else {
1215         Py_INCREF(Py_None);
1216         ret = Py_None;
1217     }
1218     self->fp = NULL;
1219     self->mode = MODE_CLOSED;
1220     if (bzerror != BZ_OK) {
1221         Util_CatchBZ2Error(bzerror);
1222         Py_XDECREF(ret);
1223         ret = NULL;
1224     }
1225 
1226     RELEASE_LOCK(self);
1227     return ret;
1228 }
1229 
1230 PyDoc_STRVAR(BZ2File_enter_doc,
1231 "__enter__() -> self.");
1232 
1233 static PyObject *
BZ2File_enter(BZ2FileObject * self)1234 BZ2File_enter(BZ2FileObject *self)
1235 {
1236     if (self->mode == MODE_CLOSED) {
1237         PyErr_SetString(PyExc_ValueError,
1238             "I/O operation on closed file");
1239         return NULL;
1240     }
1241     Py_INCREF(self);
1242     return (PyObject *) self;
1243 }
1244 
1245 PyDoc_STRVAR(BZ2File_exit_doc,
1246 "__exit__(*excinfo) -> None.  Closes the file.");
1247 
1248 static PyObject *
BZ2File_exit(BZ2FileObject * self,PyObject * args)1249 BZ2File_exit(BZ2FileObject *self, PyObject *args)
1250 {
1251     PyObject *ret = PyObject_CallMethod((PyObject *) self, "close", NULL);
1252     if (!ret)
1253         /* If error occurred, pass through */
1254         return NULL;
1255     Py_DECREF(ret);
1256     Py_RETURN_NONE;
1257 }
1258 
1259 
1260 static PyObject *BZ2File_getiter(BZ2FileObject *self);
1261 
1262 static PyMethodDef BZ2File_methods[] = {
1263     {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1264     {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1265     {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1266     {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
1267     {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1268     {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1269     {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1270     {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1271     {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1272     {"__enter__", (PyCFunction)BZ2File_enter, METH_NOARGS, BZ2File_enter_doc},
1273     {"__exit__", (PyCFunction)BZ2File_exit, METH_VARARGS, BZ2File_exit_doc},
1274     {NULL,              NULL}           /* sentinel */
1275 };
1276 
1277 
1278 /* ===================================================================== */
1279 /* Getters and setters of BZ2File. */
1280 
1281 /* This is a hacked version of Python's fileobject.c:get_newlines(). */
1282 static PyObject *
BZ2File_get_newlines(BZ2FileObject * self,void * closure)1283 BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1284 {
1285     switch (self->f_newlinetypes) {
1286     case NEWLINE_UNKNOWN:
1287         Py_INCREF(Py_None);
1288         return Py_None;
1289     case NEWLINE_CR:
1290         return PyString_FromString("\r");
1291     case NEWLINE_LF:
1292         return PyString_FromString("\n");
1293     case NEWLINE_CR|NEWLINE_LF:
1294         return Py_BuildValue("(ss)", "\r", "\n");
1295     case NEWLINE_CRLF:
1296         return PyString_FromString("\r\n");
1297     case NEWLINE_CR|NEWLINE_CRLF:
1298         return Py_BuildValue("(ss)", "\r", "\r\n");
1299     case NEWLINE_LF|NEWLINE_CRLF:
1300         return Py_BuildValue("(ss)", "\n", "\r\n");
1301     case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1302         return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1303     default:
1304         PyErr_Format(PyExc_SystemError,
1305                      "Unknown newlines value 0x%x\n",
1306                      self->f_newlinetypes);
1307         return NULL;
1308     }
1309 }
1310 
1311 static PyObject *
BZ2File_get_closed(BZ2FileObject * self,void * closure)1312 BZ2File_get_closed(BZ2FileObject *self, void *closure)
1313 {
1314     return PyInt_FromLong(self->mode == MODE_CLOSED);
1315 }
1316 
1317 static PyObject *
BZ2File_get_mode(BZ2FileObject * self,void * closure)1318 BZ2File_get_mode(BZ2FileObject *self, void *closure)
1319 {
1320     return PyObject_GetAttrString(self->file, "mode");
1321 }
1322 
1323 static PyObject *
BZ2File_get_name(BZ2FileObject * self,void * closure)1324 BZ2File_get_name(BZ2FileObject *self, void *closure)
1325 {
1326     return PyObject_GetAttrString(self->file, "name");
1327 }
1328 
1329 static PyGetSetDef BZ2File_getset[] = {
1330     {"closed", (getter)BZ2File_get_closed, NULL,
1331                     "True if the file is closed"},
1332     {"newlines", (getter)BZ2File_get_newlines, NULL,
1333                     "end-of-line convention used in this file"},
1334     {"mode", (getter)BZ2File_get_mode, NULL,
1335                     "file mode ('r', 'w', or 'U')"},
1336     {"name", (getter)BZ2File_get_name, NULL,
1337                     "file name"},
1338     {NULL}      /* Sentinel */
1339 };
1340 
1341 
1342 /* ===================================================================== */
1343 /* Members of BZ2File_Type. */
1344 
1345 #undef OFF
1346 #define OFF(x) offsetof(BZ2FileObject, x)
1347 
1348 static PyMemberDef BZ2File_members[] = {
1349     {"softspace",       T_INT,          OFF(f_softspace), 0,
1350      "flag indicating that a space needs to be printed; used by print"},
1351     {NULL}      /* Sentinel */
1352 };
1353 
1354 /* ===================================================================== */
1355 /* Slot definitions for BZ2File_Type. */
1356 static int
BZ2File_clear(BZ2FileObject * self)1357 BZ2File_clear(BZ2FileObject *self)
1358 {
1359     int bzerror;
1360 
1361     ACQUIRE_LOCK(self);
1362     switch (self->mode) {
1363         case MODE_READ:
1364         case MODE_READ_EOF:
1365             BZ2_bzReadClose(&bzerror, self->fp);
1366             break;
1367         case MODE_WRITE:
1368             BZ2_bzWriteClose(&bzerror, self->fp,
1369                              0, NULL, NULL);
1370             break;
1371     }
1372     if (self->fp != NULL && self->file != NULL)
1373         PyFile_DecUseCount((PyFileObject *)self->file);
1374     self->fp = NULL;
1375     Util_DropReadAhead(self);
1376     Py_CLEAR(self->file);
1377     RELEASE_LOCK(self);
1378     return 0;
1379 }
1380 
1381 static int
BZ2File_init(BZ2FileObject * self,PyObject * args,PyObject * kwargs)1382 BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1383 {
1384     static char *kwlist[] = {"filename", "mode", "buffering",
1385                                    "compresslevel", 0};
1386     PyObject *name;
1387     char *mode = "r";
1388     int buffering = -1;
1389     int compresslevel = 9;
1390     int bzerror;
1391     int mode_char = 0;
1392 
1393     self->size = -1;
1394 
1395     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1396                                      kwlist, &name, &mode, &buffering,
1397                                      &compresslevel))
1398         return -1;
1399 
1400     if (compresslevel < 1 || compresslevel > 9) {
1401         PyErr_SetString(PyExc_ValueError,
1402                         "compresslevel must be between 1 and 9");
1403         return -1;
1404     }
1405 
1406     for (;;) {
1407         int error = 0;
1408         switch (*mode) {
1409             case 'r':
1410             case 'w':
1411                 if (mode_char)
1412                     error = 1;
1413                 mode_char = *mode;
1414                 break;
1415 
1416             case 'b':
1417                 break;
1418 
1419             case 'U':
1420 #ifdef __VMS
1421                 self->f_univ_newline = 0;
1422 #else
1423                 self->f_univ_newline = 1;
1424 #endif
1425                 break;
1426 
1427             default:
1428                 error = 1;
1429                 break;
1430         }
1431         if (error) {
1432             PyErr_Format(PyExc_ValueError,
1433                          "invalid mode char %c", *mode);
1434             return -1;
1435         }
1436         mode++;
1437         if (*mode == '\0')
1438             break;
1439     }
1440 
1441     if (mode_char == 0) {
1442         mode_char = 'r';
1443     }
1444 
1445     mode = (mode_char == 'r') ? "rb" : "wb";
1446 
1447 #ifdef WITH_THREAD
1448     if (!self->lock) {
1449         self->lock = PyThread_allocate_lock();
1450     }
1451 
1452     if (!self->lock) {
1453         PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1454         goto error;
1455     }
1456 #endif
1457 
1458     BZ2File_clear(self);
1459 
1460     self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1461                                        name, mode, buffering);
1462     if (self->file == NULL)
1463         return -1;
1464 
1465     /* From now on, we have stuff to dealloc, so jump to error label
1466      * instead of returning */
1467 
1468     if (mode_char == 'r')
1469         self->fp = BZ2_bzReadOpen(&bzerror,
1470                                   PyFile_AsFile(self->file),
1471                                   0, 0, NULL, 0);
1472     else
1473         self->fp = BZ2_bzWriteOpen(&bzerror,
1474                                    PyFile_AsFile(self->file),
1475                                    compresslevel, 0, 0);
1476 
1477     if (bzerror != BZ_OK) {
1478         Util_CatchBZ2Error(bzerror);
1479         goto error;
1480     }
1481     PyFile_IncUseCount((PyFileObject *)self->file);
1482 
1483     self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1484 
1485     return 0;
1486 
1487 error:
1488     Py_CLEAR(self->file);
1489 #ifdef WITH_THREAD
1490     if (self->lock) {
1491         PyThread_free_lock(self->lock);
1492         self->lock = NULL;
1493     }
1494 #endif
1495     return -1;
1496 }
1497 
1498 static void
BZ2File_dealloc(BZ2FileObject * self)1499 BZ2File_dealloc(BZ2FileObject *self)
1500 {
1501     BZ2File_clear(self);
1502 #ifdef WITH_THREAD
1503     if (self->lock)
1504         PyThread_free_lock(self->lock);
1505 #endif
1506     Py_TYPE(self)->tp_free((PyObject *)self);
1507 }
1508 
1509 /* This is a hacked version of Python's fileobject.c:file_getiter(). */
1510 static PyObject *
BZ2File_getiter(BZ2FileObject * self)1511 BZ2File_getiter(BZ2FileObject *self)
1512 {
1513     if (self->mode == MODE_CLOSED) {
1514         PyErr_SetString(PyExc_ValueError,
1515                         "I/O operation on closed file");
1516         return NULL;
1517     }
1518     Py_INCREF((PyObject*)self);
1519     return (PyObject *)self;
1520 }
1521 
1522 /* This is a hacked version of Python's fileobject.c:file_iternext(). */
1523 #define READAHEAD_BUFSIZE 8192
1524 static PyObject *
BZ2File_iternext(BZ2FileObject * self)1525 BZ2File_iternext(BZ2FileObject *self)
1526 {
1527     PyStringObject* ret;
1528     ACQUIRE_LOCK(self);
1529     if (self->mode == MODE_CLOSED) {
1530         RELEASE_LOCK(self);
1531         PyErr_SetString(PyExc_ValueError,
1532                         "I/O operation on closed file");
1533         return NULL;
1534     }
1535     ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1536     RELEASE_LOCK(self);
1537     if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1538         Py_XDECREF(ret);
1539         return NULL;
1540     }
1541     return (PyObject *)ret;
1542 }
1543 
1544 /* ===================================================================== */
1545 /* BZ2File_Type definition. */
1546 
1547 PyDoc_VAR(BZ2File__doc__) =
1548 PyDoc_STR(
1549 "BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1550 \n\
1551 Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1552 writing. When opened for writing, the file will be created if it doesn't\n\
1553 exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1554 unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1555 is given, must be a number between 1 and 9.\n\
1556 ")
1557 PyDoc_STR(
1558 "\n\
1559 Add a 'U' to mode to open the file for input with universal newline\n\
1560 support. Any line ending in the input file will be seen as a '\\n' in\n\
1561 Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1562 for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1563 '\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1564 newlines are available only when reading.\n\
1565 ")
1566 ;
1567 
1568 static PyTypeObject BZ2File_Type = {
1569     PyVarObject_HEAD_INIT(NULL, 0)
1570     "bz2.BZ2File",              /*tp_name*/
1571     sizeof(BZ2FileObject),      /*tp_basicsize*/
1572     0,                          /*tp_itemsize*/
1573     (destructor)BZ2File_dealloc, /*tp_dealloc*/
1574     0,                          /*tp_print*/
1575     0,                          /*tp_getattr*/
1576     0,                          /*tp_setattr*/
1577     0,                          /*tp_compare*/
1578     0,                          /*tp_repr*/
1579     0,                          /*tp_as_number*/
1580     0,                          /*tp_as_sequence*/
1581     0,                          /*tp_as_mapping*/
1582     0,                          /*tp_hash*/
1583     0,                      /*tp_call*/
1584     0,                      /*tp_str*/
1585     PyObject_GenericGetAttr,/*tp_getattro*/
1586     PyObject_GenericSetAttr,/*tp_setattro*/
1587     0,                      /*tp_as_buffer*/
1588     Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1589     BZ2File__doc__,         /*tp_doc*/
1590     0,                      /*tp_traverse*/
1591     (inquiry)BZ2File_clear, /*tp_clear*/
1592     0,                      /*tp_richcompare*/
1593     0,                      /*tp_weaklistoffset*/
1594     (getiterfunc)BZ2File_getiter, /*tp_iter*/
1595     (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1596     BZ2File_methods,        /*tp_methods*/
1597     BZ2File_members,        /*tp_members*/
1598     BZ2File_getset,         /*tp_getset*/
1599     0,                      /*tp_base*/
1600     0,                      /*tp_dict*/
1601     0,                      /*tp_descr_get*/
1602     0,                      /*tp_descr_set*/
1603     0,                      /*tp_dictoffset*/
1604     (initproc)BZ2File_init, /*tp_init*/
1605     PyType_GenericAlloc,    /*tp_alloc*/
1606     PyType_GenericNew,      /*tp_new*/
1607     _PyObject_Del,          /*tp_free*/
1608     0,                      /*tp_is_gc*/
1609 };
1610 
1611 
1612 /* ===================================================================== */
1613 /* Methods of BZ2Comp. */
1614 
1615 PyDoc_STRVAR(BZ2Comp_compress__doc__,
1616 "compress(data) -> string\n\
1617 \n\
1618 Provide more data to the compressor object. It will return chunks of\n\
1619 compressed data whenever possible. When you've finished providing data\n\
1620 to compress, call the flush() method to finish the compression process,\n\
1621 and return what is left in the internal buffers.\n\
1622 ");
1623 
1624 static PyObject *
BZ2Comp_compress(BZ2CompObject * self,PyObject * args)1625 BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1626 {
1627     Py_buffer pdata;
1628     size_t input_left;
1629     size_t output_size = 0;
1630     PyObject *ret = NULL;
1631     bz_stream *bzs = &self->bzs;
1632     int bzerror;
1633 
1634     if (!PyArg_ParseTuple(args, "s*:compress", &pdata))
1635         return NULL;
1636 
1637     if (pdata.len == 0) {
1638         PyBuffer_Release(&pdata);
1639         return PyString_FromString("");
1640     }
1641 
1642     ACQUIRE_LOCK(self);
1643     if (!self->running) {
1644         PyErr_SetString(PyExc_ValueError,
1645                         "this object was already flushed");
1646         goto error;
1647     }
1648 
1649     ret = PyString_FromStringAndSize(NULL, SMALLCHUNK);
1650     if (!ret)
1651         goto error;
1652 
1653     bzs->next_in = pdata.buf;
1654     bzs->avail_in = MIN(pdata.len, UINT_MAX);
1655     input_left = pdata.len - bzs->avail_in;
1656 
1657     bzs->next_out = BUF(ret);
1658     bzs->avail_out = PyString_GET_SIZE(ret);
1659 
1660     for (;;) {
1661         char *saved_next_out;
1662 
1663         Py_BEGIN_ALLOW_THREADS
1664         saved_next_out = bzs->next_out;
1665         bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1666         output_size += bzs->next_out - saved_next_out;
1667         Py_END_ALLOW_THREADS
1668 
1669         if (bzerror != BZ_RUN_OK) {
1670             Util_CatchBZ2Error(bzerror);
1671             goto error;
1672         }
1673         if (bzs->avail_in == 0) {
1674             if (input_left == 0)
1675                 break; /* no more input data */
1676             bzs->avail_in = MIN(input_left, UINT_MAX);
1677             input_left -= bzs->avail_in;
1678         }
1679         if (bzs->avail_out == 0) {
1680             size_t buffer_left = PyString_GET_SIZE(ret) - output_size;
1681             if (buffer_left == 0) {
1682                 if (Util_GrowBuffer(&ret) < 0) {
1683                     BZ2_bzCompressEnd(bzs);
1684                     goto error;
1685                 }
1686                 bzs->next_out = BUF(ret) + output_size;
1687                 buffer_left = PyString_GET_SIZE(ret) - output_size;
1688             }
1689             bzs->avail_out = MIN(buffer_left, UINT_MAX);
1690         }
1691     }
1692 
1693     if (_PyString_Resize(&ret, output_size) < 0)
1694         goto error;
1695 
1696     RELEASE_LOCK(self);
1697     PyBuffer_Release(&pdata);
1698     return ret;
1699 
1700 error:
1701     RELEASE_LOCK(self);
1702     PyBuffer_Release(&pdata);
1703     Py_XDECREF(ret);
1704     return NULL;
1705 }
1706 
1707 PyDoc_STRVAR(BZ2Comp_flush__doc__,
1708 "flush() -> string\n\
1709 \n\
1710 Finish the compression process and return what is left in internal buffers.\n\
1711 You must not use the compressor object after calling this method.\n\
1712 ");
1713 
1714 static PyObject *
BZ2Comp_flush(BZ2CompObject * self)1715 BZ2Comp_flush(BZ2CompObject *self)
1716 {
1717     size_t output_size = 0;
1718     PyObject *ret = NULL;
1719     bz_stream *bzs = &self->bzs;
1720     int bzerror;
1721 
1722     ACQUIRE_LOCK(self);
1723     if (!self->running) {
1724         PyErr_SetString(PyExc_ValueError, "object was already flushed");
1725         goto error;
1726     }
1727     self->running = 0;
1728 
1729     ret = PyString_FromStringAndSize(NULL, SMALLCHUNK);
1730     if (!ret)
1731         goto error;
1732 
1733     bzs->next_out = BUF(ret);
1734     bzs->avail_out = PyString_GET_SIZE(ret);
1735 
1736     for (;;) {
1737         char *saved_next_out;
1738 
1739         Py_BEGIN_ALLOW_THREADS
1740         saved_next_out = bzs->next_out;
1741         bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1742         output_size += bzs->next_out - saved_next_out;
1743         Py_END_ALLOW_THREADS
1744 
1745         if (bzerror == BZ_STREAM_END) {
1746             break;
1747         } else if (bzerror != BZ_FINISH_OK) {
1748             Util_CatchBZ2Error(bzerror);
1749             goto error;
1750         }
1751         if (bzs->avail_out == 0) {
1752             size_t buffer_left = PyString_GET_SIZE(ret) - output_size;
1753             if (buffer_left == 0) {
1754                 if (Util_GrowBuffer(&ret) < 0)
1755                     goto error;
1756                 bzs->next_out = BUF(ret) + output_size;
1757                 buffer_left = PyString_GET_SIZE(ret) - output_size;
1758             }
1759             bzs->avail_out = MIN(buffer_left, UINT_MAX);
1760         }
1761     }
1762 
1763     if (output_size != PyString_GET_SIZE(ret))
1764         if (_PyString_Resize(&ret, output_size) < 0)
1765             goto error;
1766 
1767     RELEASE_LOCK(self);
1768     return ret;
1769 
1770 error:
1771     RELEASE_LOCK(self);
1772     Py_XDECREF(ret);
1773     return NULL;
1774 }
1775 
1776 static PyMethodDef BZ2Comp_methods[] = {
1777     {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1778      BZ2Comp_compress__doc__},
1779     {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1780      BZ2Comp_flush__doc__},
1781     {NULL,              NULL}           /* sentinel */
1782 };
1783 
1784 
1785 /* ===================================================================== */
1786 /* Slot definitions for BZ2Comp_Type. */
1787 
1788 static int
BZ2Comp_init(BZ2CompObject * self,PyObject * args,PyObject * kwargs)1789 BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1790 {
1791     int compresslevel = 9;
1792     int bzerror;
1793     static char *kwlist[] = {"compresslevel", 0};
1794 
1795     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1796                                      kwlist, &compresslevel))
1797         return -1;
1798 
1799     if (compresslevel < 1 || compresslevel > 9) {
1800         PyErr_SetString(PyExc_ValueError,
1801                         "compresslevel must be between 1 and 9");
1802         goto error;
1803     }
1804 
1805 #ifdef WITH_THREAD
1806     self->lock = PyThread_allocate_lock();
1807     if (!self->lock) {
1808         PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1809         goto error;
1810     }
1811 #endif
1812 
1813     memset(&self->bzs, 0, sizeof(bz_stream));
1814     bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1815     if (bzerror != BZ_OK) {
1816         Util_CatchBZ2Error(bzerror);
1817         goto error;
1818     }
1819 
1820     self->running = 1;
1821 
1822     return 0;
1823 error:
1824 #ifdef WITH_THREAD
1825     if (self->lock) {
1826         PyThread_free_lock(self->lock);
1827         self->lock = NULL;
1828     }
1829 #endif
1830     return -1;
1831 }
1832 
1833 static void
BZ2Comp_dealloc(BZ2CompObject * self)1834 BZ2Comp_dealloc(BZ2CompObject *self)
1835 {
1836 #ifdef WITH_THREAD
1837     if (self->lock)
1838         PyThread_free_lock(self->lock);
1839 #endif
1840     BZ2_bzCompressEnd(&self->bzs);
1841     Py_TYPE(self)->tp_free((PyObject *)self);
1842 }
1843 
1844 
1845 /* ===================================================================== */
1846 /* BZ2Comp_Type definition. */
1847 
1848 PyDoc_STRVAR(BZ2Comp__doc__,
1849 "BZ2Compressor([compresslevel=9]) -> compressor object\n\
1850 \n\
1851 Create a new compressor object. This object may be used to compress\n\
1852 data sequentially. If you want to compress data in one shot, use the\n\
1853 compress() function instead. The compresslevel parameter, if given,\n\
1854 must be a number between 1 and 9.\n\
1855 ");
1856 
1857 static PyTypeObject BZ2Comp_Type = {
1858     PyVarObject_HEAD_INIT(NULL, 0)
1859     "bz2.BZ2Compressor",        /*tp_name*/
1860     sizeof(BZ2CompObject),      /*tp_basicsize*/
1861     0,                          /*tp_itemsize*/
1862     (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1863     0,                          /*tp_print*/
1864     0,                          /*tp_getattr*/
1865     0,                          /*tp_setattr*/
1866     0,                          /*tp_compare*/
1867     0,                          /*tp_repr*/
1868     0,                          /*tp_as_number*/
1869     0,                          /*tp_as_sequence*/
1870     0,                          /*tp_as_mapping*/
1871     0,                          /*tp_hash*/
1872     0,                      /*tp_call*/
1873     0,                      /*tp_str*/
1874     PyObject_GenericGetAttr,/*tp_getattro*/
1875     PyObject_GenericSetAttr,/*tp_setattro*/
1876     0,                      /*tp_as_buffer*/
1877     Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1878     BZ2Comp__doc__,         /*tp_doc*/
1879     0,                      /*tp_traverse*/
1880     0,                      /*tp_clear*/
1881     0,                      /*tp_richcompare*/
1882     0,                      /*tp_weaklistoffset*/
1883     0,                      /*tp_iter*/
1884     0,                      /*tp_iternext*/
1885     BZ2Comp_methods,        /*tp_methods*/
1886     0,                      /*tp_members*/
1887     0,                      /*tp_getset*/
1888     0,                      /*tp_base*/
1889     0,                      /*tp_dict*/
1890     0,                      /*tp_descr_get*/
1891     0,                      /*tp_descr_set*/
1892     0,                      /*tp_dictoffset*/
1893     (initproc)BZ2Comp_init, /*tp_init*/
1894     PyType_GenericAlloc,    /*tp_alloc*/
1895     PyType_GenericNew,      /*tp_new*/
1896     _PyObject_Del,          /*tp_free*/
1897     0,                      /*tp_is_gc*/
1898 };
1899 
1900 
1901 /* ===================================================================== */
1902 /* Members of BZ2Decomp. */
1903 
1904 #undef OFF
1905 #define OFF(x) offsetof(BZ2DecompObject, x)
1906 
1907 static PyMemberDef BZ2Decomp_members[] = {
1908     {"unused_data", T_OBJECT, OFF(unused_data), RO},
1909     {NULL}      /* Sentinel */
1910 };
1911 
1912 
1913 /* ===================================================================== */
1914 /* Methods of BZ2Decomp. */
1915 
1916 PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1917 "decompress(data) -> string\n\
1918 \n\
1919 Provide more data to the decompressor object. It will return chunks\n\
1920 of decompressed data whenever possible. If you try to decompress data\n\
1921 after the end of stream is found, EOFError will be raised. If any data\n\
1922 was found after the end of stream, it'll be ignored and saved in\n\
1923 unused_data attribute.\n\
1924 ");
1925 
1926 static PyObject *
BZ2Decomp_decompress(BZ2DecompObject * self,PyObject * args)1927 BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1928 {
1929     Py_buffer pdata;
1930     size_t input_left;
1931     size_t output_size = 0;
1932     PyObject *ret = NULL;
1933     bz_stream *bzs = &self->bzs;
1934     int bzerror;
1935 
1936     if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
1937         return NULL;
1938 
1939     ACQUIRE_LOCK(self);
1940     if (!self->running) {
1941         PyErr_SetString(PyExc_EOFError, "end of stream was "
1942                                         "already found");
1943         goto error;
1944     }
1945 
1946     ret = PyString_FromStringAndSize(NULL, SMALLCHUNK);
1947     if (!ret)
1948         goto error;
1949 
1950     bzs->next_in = pdata.buf;
1951     bzs->avail_in = MIN(pdata.len, UINT_MAX);
1952     input_left = pdata.len - bzs->avail_in;
1953 
1954     bzs->next_out = BUF(ret);
1955     bzs->avail_out = PyString_GET_SIZE(ret);
1956 
1957     for (;;) {
1958         char *saved_next_out;
1959 
1960         Py_BEGIN_ALLOW_THREADS
1961         saved_next_out = bzs->next_out;
1962         bzerror = BZ2_bzDecompress(bzs);
1963         output_size += bzs->next_out - saved_next_out;
1964         Py_END_ALLOW_THREADS
1965 
1966         if (bzerror == BZ_STREAM_END) {
1967             self->running = 0;
1968             input_left += bzs->avail_in;
1969             if (input_left != 0) {
1970                 Py_SETREF(self->unused_data,
1971                           PyString_FromStringAndSize(bzs->next_in, input_left));
1972                 if (self->unused_data == NULL)
1973                     goto error;
1974             }
1975             break;
1976         }
1977         if (bzerror != BZ_OK) {
1978             Util_CatchBZ2Error(bzerror);
1979             goto error;
1980         }
1981         if (bzs->avail_in == 0) {
1982             if (input_left == 0)
1983                 break; /* no more input data */
1984             bzs->avail_in = MIN(input_left, UINT_MAX);
1985             input_left -= bzs->avail_in;
1986         }
1987         if (bzs->avail_out == 0) {
1988             size_t buffer_left = PyString_GET_SIZE(ret) - output_size;
1989             if (buffer_left == 0) {
1990                 if (Util_GrowBuffer(&ret) < 0) {
1991                     BZ2_bzDecompressEnd(bzs);
1992                     goto error;
1993                 }
1994                 bzs->next_out = BUF(ret) + output_size;
1995                 buffer_left = PyString_GET_SIZE(ret) - output_size;
1996             }
1997             bzs->avail_out = MIN(buffer_left, UINT_MAX);
1998         }
1999     }
2000 
2001     if (output_size != PyString_GET_SIZE(ret))
2002         if (_PyString_Resize(&ret, output_size) < 0)
2003             goto error;
2004 
2005     RELEASE_LOCK(self);
2006     PyBuffer_Release(&pdata);
2007     return ret;
2008 
2009 error:
2010     RELEASE_LOCK(self);
2011     PyBuffer_Release(&pdata);
2012     Py_XDECREF(ret);
2013     return NULL;
2014 }
2015 
2016 static PyMethodDef BZ2Decomp_methods[] = {
2017     {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
2018     {NULL,              NULL}           /* sentinel */
2019 };
2020 
2021 
2022 /* ===================================================================== */
2023 /* Slot definitions for BZ2Decomp_Type. */
2024 
2025 static int
BZ2Decomp_init(BZ2DecompObject * self,PyObject * args,PyObject * kwargs)2026 BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
2027 {
2028     int bzerror;
2029 
2030     if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
2031         return -1;
2032 
2033 #ifdef WITH_THREAD
2034     self->lock = PyThread_allocate_lock();
2035     if (!self->lock) {
2036         PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
2037         goto error;
2038     }
2039 #endif
2040 
2041     self->unused_data = PyString_FromString("");
2042     if (!self->unused_data)
2043         goto error;
2044 
2045     memset(&self->bzs, 0, sizeof(bz_stream));
2046     bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
2047     if (bzerror != BZ_OK) {
2048         Util_CatchBZ2Error(bzerror);
2049         goto error;
2050     }
2051 
2052     self->running = 1;
2053 
2054     return 0;
2055 
2056 error:
2057 #ifdef WITH_THREAD
2058     if (self->lock) {
2059         PyThread_free_lock(self->lock);
2060         self->lock = NULL;
2061     }
2062 #endif
2063     Py_CLEAR(self->unused_data);
2064     return -1;
2065 }
2066 
2067 static void
BZ2Decomp_dealloc(BZ2DecompObject * self)2068 BZ2Decomp_dealloc(BZ2DecompObject *self)
2069 {
2070 #ifdef WITH_THREAD
2071     if (self->lock)
2072         PyThread_free_lock(self->lock);
2073 #endif
2074     Py_XDECREF(self->unused_data);
2075     BZ2_bzDecompressEnd(&self->bzs);
2076     Py_TYPE(self)->tp_free((PyObject *)self);
2077 }
2078 
2079 
2080 /* ===================================================================== */
2081 /* BZ2Decomp_Type definition. */
2082 
2083 PyDoc_STRVAR(BZ2Decomp__doc__,
2084 "BZ2Decompressor() -> decompressor object\n\
2085 \n\
2086 Create a new decompressor object. This object may be used to decompress\n\
2087 data sequentially. If you want to decompress data in one shot, use the\n\
2088 decompress() function instead.\n\
2089 ");
2090 
2091 static PyTypeObject BZ2Decomp_Type = {
2092     PyVarObject_HEAD_INIT(NULL, 0)
2093     "bz2.BZ2Decompressor",      /*tp_name*/
2094     sizeof(BZ2DecompObject), /*tp_basicsize*/
2095     0,                          /*tp_itemsize*/
2096     (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
2097     0,                          /*tp_print*/
2098     0,                          /*tp_getattr*/
2099     0,                          /*tp_setattr*/
2100     0,                          /*tp_compare*/
2101     0,                          /*tp_repr*/
2102     0,                          /*tp_as_number*/
2103     0,                          /*tp_as_sequence*/
2104     0,                          /*tp_as_mapping*/
2105     0,                          /*tp_hash*/
2106     0,                      /*tp_call*/
2107     0,                      /*tp_str*/
2108     PyObject_GenericGetAttr,/*tp_getattro*/
2109     PyObject_GenericSetAttr,/*tp_setattro*/
2110     0,                      /*tp_as_buffer*/
2111     Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
2112     BZ2Decomp__doc__,       /*tp_doc*/
2113     0,                      /*tp_traverse*/
2114     0,                      /*tp_clear*/
2115     0,                      /*tp_richcompare*/
2116     0,                      /*tp_weaklistoffset*/
2117     0,                      /*tp_iter*/
2118     0,                      /*tp_iternext*/
2119     BZ2Decomp_methods,      /*tp_methods*/
2120     BZ2Decomp_members,      /*tp_members*/
2121     0,                      /*tp_getset*/
2122     0,                      /*tp_base*/
2123     0,                      /*tp_dict*/
2124     0,                      /*tp_descr_get*/
2125     0,                      /*tp_descr_set*/
2126     0,                      /*tp_dictoffset*/
2127     (initproc)BZ2Decomp_init, /*tp_init*/
2128     PyType_GenericAlloc,    /*tp_alloc*/
2129     PyType_GenericNew,      /*tp_new*/
2130     _PyObject_Del,          /*tp_free*/
2131     0,                      /*tp_is_gc*/
2132 };
2133 
2134 
2135 /* ===================================================================== */
2136 /* Module functions. */
2137 
2138 PyDoc_STRVAR(bz2_compress__doc__,
2139 "compress(data [, compresslevel=9]) -> string\n\
2140 \n\
2141 Compress data in one shot. If you want to compress data sequentially,\n\
2142 use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2143 given, must be a number between 1 and 9.\n\
2144 ");
2145 
2146 static PyObject *
bz2_compress(PyObject * self,PyObject * args,PyObject * kwargs)2147 bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2148 {
2149     int compresslevel=9;
2150     int action;
2151     Py_buffer pdata;
2152     size_t input_left;
2153     size_t output_size = 0;
2154     PyObject *ret = NULL;
2155     bz_stream _bzs;
2156     bz_stream *bzs = &_bzs;
2157     int bzerror;
2158     static char *kwlist[] = {"data", "compresslevel", 0};
2159 
2160     if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i",
2161                                      kwlist, &pdata,
2162                                      &compresslevel))
2163         return NULL;
2164 
2165     if (compresslevel < 1 || compresslevel > 9) {
2166         PyErr_SetString(PyExc_ValueError,
2167                         "compresslevel must be between 1 and 9");
2168         PyBuffer_Release(&pdata);
2169         return NULL;
2170     }
2171 
2172     ret = PyString_FromStringAndSize(NULL, SMALLCHUNK);
2173     if (!ret) {
2174         PyBuffer_Release(&pdata);
2175         return NULL;
2176     }
2177 
2178     memset(bzs, 0, sizeof(bz_stream));
2179 
2180     bzs->next_in = pdata.buf;
2181     bzs->avail_in = MIN(pdata.len, UINT_MAX);
2182     input_left = pdata.len - bzs->avail_in;
2183 
2184     bzs->next_out = BUF(ret);
2185     bzs->avail_out = PyString_GET_SIZE(ret);
2186 
2187     bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2188     if (bzerror != BZ_OK) {
2189         Util_CatchBZ2Error(bzerror);
2190         PyBuffer_Release(&pdata);
2191         Py_DECREF(ret);
2192         return NULL;
2193     }
2194 
2195     action = input_left > 0 ? BZ_RUN : BZ_FINISH;
2196 
2197     for (;;) {
2198         char *saved_next_out;
2199 
2200         Py_BEGIN_ALLOW_THREADS
2201         saved_next_out = bzs->next_out;
2202         bzerror = BZ2_bzCompress(bzs, action);
2203         output_size += bzs->next_out - saved_next_out;
2204         Py_END_ALLOW_THREADS
2205 
2206         if (bzerror == BZ_STREAM_END) {
2207             break;
2208         } else if (bzerror != BZ_RUN_OK && bzerror != BZ_FINISH_OK) {
2209             BZ2_bzCompressEnd(bzs);
2210             Util_CatchBZ2Error(bzerror);
2211             PyBuffer_Release(&pdata);
2212             Py_DECREF(ret);
2213             return NULL;
2214         }
2215         if (action == BZ_RUN && bzs->avail_in == 0) {
2216             if (input_left == 0) {
2217                 action = BZ_FINISH;
2218             } else {
2219                 bzs->avail_in = MIN(input_left, UINT_MAX);
2220                 input_left -= bzs->avail_in;
2221             }
2222         }
2223         if (bzs->avail_out == 0) {
2224             size_t buffer_left = PyString_GET_SIZE(ret) - output_size;
2225             if (buffer_left == 0) {
2226                 if (Util_GrowBuffer(&ret) < 0) {
2227                     BZ2_bzCompressEnd(bzs);
2228                     PyBuffer_Release(&pdata);
2229                     return NULL;
2230                 }
2231                 bzs->next_out = BUF(ret) + output_size;
2232                 buffer_left = PyString_GET_SIZE(ret) - output_size;
2233             }
2234             bzs->avail_out = MIN(buffer_left, UINT_MAX);
2235         }
2236     }
2237 
2238     if (output_size != PyString_GET_SIZE(ret))
2239         _PyString_Resize(&ret, output_size);  /* Sets ret to NULL on failure. */
2240 
2241     BZ2_bzCompressEnd(bzs);
2242     PyBuffer_Release(&pdata);
2243     return ret;
2244 }
2245 
2246 PyDoc_STRVAR(bz2_decompress__doc__,
2247 "decompress(data) -> decompressed data\n\
2248 \n\
2249 Decompress data in one shot. If you want to decompress data sequentially,\n\
2250 use an instance of BZ2Decompressor instead.\n\
2251 ");
2252 
2253 static PyObject *
bz2_decompress(PyObject * self,PyObject * args)2254 bz2_decompress(PyObject *self, PyObject *args)
2255 {
2256     Py_buffer pdata;
2257     size_t input_left;
2258     size_t output_size = 0;
2259     PyObject *ret;
2260     bz_stream _bzs;
2261     bz_stream *bzs = &_bzs;
2262     int bzerror;
2263 
2264     if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
2265         return NULL;
2266 
2267     if (pdata.len == 0) {
2268         PyBuffer_Release(&pdata);
2269         return PyString_FromString("");
2270     }
2271 
2272     ret = PyString_FromStringAndSize(NULL, SMALLCHUNK);
2273     if (!ret) {
2274         PyBuffer_Release(&pdata);
2275         return NULL;
2276     }
2277 
2278     memset(bzs, 0, sizeof(bz_stream));
2279 
2280     bzs->next_in = pdata.buf;
2281     bzs->avail_in = MIN(pdata.len, UINT_MAX);
2282     input_left = pdata.len - bzs->avail_in;
2283 
2284     bzs->next_out = BUF(ret);
2285     bzs->avail_out = PyString_GET_SIZE(ret);
2286 
2287     bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2288     if (bzerror != BZ_OK) {
2289         Util_CatchBZ2Error(bzerror);
2290         Py_DECREF(ret);
2291         PyBuffer_Release(&pdata);
2292         return NULL;
2293     }
2294 
2295     for (;;) {
2296         char *saved_next_out;
2297 
2298         Py_BEGIN_ALLOW_THREADS
2299         saved_next_out = bzs->next_out;
2300         bzerror = BZ2_bzDecompress(bzs);
2301         output_size += bzs->next_out - saved_next_out;
2302         Py_END_ALLOW_THREADS
2303 
2304         if (bzerror == BZ_STREAM_END) {
2305             break;
2306         } else if (bzerror != BZ_OK) {
2307             BZ2_bzDecompressEnd(bzs);
2308             Util_CatchBZ2Error(bzerror);
2309             PyBuffer_Release(&pdata);
2310             Py_DECREF(ret);
2311             return NULL;
2312         }
2313         if (bzs->avail_in == 0) {
2314             if (input_left == 0) {
2315                 BZ2_bzDecompressEnd(bzs);
2316                 PyErr_SetString(PyExc_ValueError,
2317                                 "couldn't find end of stream");
2318                 PyBuffer_Release(&pdata);
2319                 Py_DECREF(ret);
2320                 return NULL;
2321             }
2322             bzs->avail_in = MIN(input_left, UINT_MAX);
2323             input_left -= bzs->avail_in;
2324         }
2325         if (bzs->avail_out == 0) {
2326             size_t buffer_left = PyString_GET_SIZE(ret) - output_size;
2327             if (buffer_left == 0) {
2328                 if (Util_GrowBuffer(&ret) < 0) {
2329                     BZ2_bzDecompressEnd(bzs);
2330                     PyBuffer_Release(&pdata);
2331                     return NULL;
2332                 }
2333                 bzs->next_out = BUF(ret) + output_size;
2334                 buffer_left = PyString_GET_SIZE(ret) - output_size;
2335             }
2336             bzs->avail_out = MIN(buffer_left, UINT_MAX);
2337         }
2338     }
2339 
2340     if (output_size != PyString_GET_SIZE(ret))
2341         _PyString_Resize(&ret, output_size);  /* Sets ret to NULL on failure. */
2342 
2343     BZ2_bzDecompressEnd(bzs);
2344     PyBuffer_Release(&pdata);
2345     return ret;
2346 }
2347 
2348 static PyMethodDef bz2_methods[] = {
2349     {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2350         bz2_compress__doc__},
2351     {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2352         bz2_decompress__doc__},
2353     {NULL,              NULL}           /* sentinel */
2354 };
2355 
2356 /* ===================================================================== */
2357 /* Initialization function. */
2358 
2359 PyDoc_STRVAR(bz2__doc__,
2360 "The python bz2 module provides a comprehensive interface for\n\
2361 the bz2 compression library. It implements a complete file\n\
2362 interface, one shot (de)compression functions, and types for\n\
2363 sequential (de)compression.\n\
2364 ");
2365 
2366 PyMODINIT_FUNC
initbz2(void)2367 initbz2(void)
2368 {
2369     PyObject *m;
2370 
2371     if (PyType_Ready(&BZ2File_Type) < 0)
2372         return;
2373     if (PyType_Ready(&BZ2Comp_Type) < 0)
2374         return;
2375     if (PyType_Ready(&BZ2Decomp_Type) < 0)
2376         return;
2377 
2378     m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2379     if (m == NULL)
2380         return;
2381 
2382     PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2383 
2384     Py_INCREF(&BZ2File_Type);
2385     PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2386 
2387     Py_INCREF(&BZ2Comp_Type);
2388     PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2389 
2390     Py_INCREF(&BZ2Decomp_Type);
2391     PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2392 }
2393