1 /* 2 * File: BZ2StreamScanner.h 3 * Author: Yavor Nikolov 4 * 5 * Created on March 6, 2010, 10:07 PM 6 */ 7 8 #ifndef _BZ2STREAMSCANNER_H 9 #define _BZ2STREAMSCANNER_H 10 11 #include "pbzip2.h" 12 #include <vector> 13 #include <string> 14 15 using namespace std; 16 17 namespace pbzip2 18 { 19 20 class BZ2StreamScanner 21 { 22 public: 23 typedef unsigned char CharType; 24 25 static const size_t DEFAULT_IN_BUFF_CAPACITY = 1024 * 1024; // 1M 26 static const size_t DEFAULT_OUT_BUFF_LIMIT = 1024 * 1024; 27 28 enum BZ2SScannerErrorFlag 29 { 30 ERR_MEM_ALLOC_INBUFF = 1, 31 ERR_MEM_ALLOC_OUTBUFF = 1 << 1, 32 ERR_IO_READ = 1 << 2, 33 ERR_IO_INSUFFICIENT_BUFF_CAPACITY = 1 << 3, 34 ERR_INVALID_STATE = 1 << 4, 35 ERR_INVALID_FILE_FORMAT = 1 << 5 36 }; 37 38 BZ2StreamScanner( int hInFile, size_t inBuffCapacity = DEFAULT_IN_BUFF_CAPACITY ); 39 int init( int hInFile, size_t inBuffCapacity = DEFAULT_IN_BUFF_CAPACITY ); 40 41 virtual ~BZ2StreamScanner(); 42 43 outBuff * getNextStream(); 44 getInBuffSize()45 size_t getInBuffSize() const { return ( _inBuffEnd - _inBuff ); } getInBuffCapacity()46 size_t getInBuffCapacity() const { return _inBuffCapacity; } getHeader()47 const basic_string<CharType> & getHeader() const { return _bz2Header; } getHeaderSize()48 size_t getHeaderSize() const { return _bz2Header.size(); } getErrState()49 int getErrState() const { return _errState; } failed()50 bool failed() { return ( _errState != 0 ); } 51 52 /** true if header has been found since last initialization */ isBz2HeaderFound()53 bool isBz2HeaderFound() const { return _bz2HeaderFound; } 54 55 /** status of last/current search only */ getSearchStatus()56 bool getSearchStatus() const { return _searchStatus; } 57 58 // end of file eof()59 bool eof() const { return _eof; } 60 61 /** true if out buffer is full enough to produce output block */ isOutBuffFullEnough()62 bool isOutBuffFullEnough() const { return _outBuff.bufSize >= getOutBuffCapacityLimit(); } 63 64 /** 65 * dispose memory resources 66 */ 67 virtual void dispose(); 68 69 #ifdef PBZIP_DEBUG 70 void printCurrentState(); 71 #endif 72 73 private: 74 /* disable copy c-tor */ BZ2StreamScanner(const BZ2StreamScanner & orig)75 BZ2StreamScanner( const BZ2StreamScanner& orig ) {} 76 77 void initOutBuff( char * buf = NULL, size_t bufSize = 0, size_t bufCapacity = 0 ); 78 int appendOutBuffData( CharType * end ); appendOutBuffData()79 int appendOutBuffData() { return appendOutBuffData( getInBuffSearchPtr() ); } 80 int appendOutBuffDataUpToLimit(); 81 int ensureOutBuffCapacity( size_t newSize ); 82 int readData(); 83 getInBuffEnd()84 CharType * getInBuffEnd() { return _inBuffEnd; } getInBuffBegin()85 CharType * getInBuffBegin() { return _inBuff; } getInBuffCurrent()86 CharType * getInBuffCurrent() { return _inBuffCurrent; } getInBuffSearchPtr()87 CharType * getInBuffSearchPtr() { return _inBuffSearchPtr; } getOutBuffEnd()88 char * getOutBuffEnd() { return _outBuff.buf + _outBuff.bufSize; } getUnsearchedCount()89 size_t getUnsearchedCount() const { return _inBuffEnd - _inBuffSearchPtr; } 90 91 /** 92 * Search next bz2 header. Read more data from file if needed. 93 * 94 * @return pointer to header is returned if found; 95 * getInBuffEnd() - if not found; NULL - on error. 96 */ 97 CharType * searchNextHeader(); 98 99 /** 100 * Search next bz2 header just in currently available input buffer. 101 * (Doesn't read more data from file). 102 * 103 * @return pointer to header or getInBuffEnd() if such is not found. 104 */ 105 CharType * searchNextHeaderInBuff(); 106 107 /** 108 * Prepare for next read from file into input buffer. 109 * Consumes remaining input data buffer and moves header tail to beginning. 110 * 111 */ 112 int rewindInBuff(); 113 114 /** 115 * Locate BZh header prefix in buffer. In case of first search - just check 116 * the beginning of buffer and signal error if it doesn't match to headers. 117 * 118 * @return pointer to BZh header prefix if located. getInBuffEnd() if not. 119 * failure() and getErrState() will indicate error if such occurred. 120 */ 121 CharType * locateHeaderPrefixInBuff(); 122 getOutBuffCapacityLimit()123 size_t getOutBuffCapacityLimit() const { return _outBuffCapacityLimit; } 124 125 int _hInFile; // input file descriptor 126 bool _eof; 127 128 basic_string<CharType> _bz2Header; 129 basic_string<CharType> _bz2HeaderZero; 130 bool _bz2HeaderFound; 131 bool _searchStatus; 132 133 CharType * _inBuff; 134 CharType * _inBuffEnd; // end of data read from file 135 CharType * _inBuffCurrent; 136 CharType * _inBuffSearchPtr; 137 138 size_t _inBuffCapacity; // allocated memory capacity for in buffer 139 140 outBuff _outBuff; 141 size_t _outBuffCapacity; 142 size_t _outBuffCapacityHint; // keep max used capacity 143 size_t _outBuffCapacityLimit; 144 145 unsigned int _errState; // 0 - ok; otherwise error 146 int _outSequenceNumber; // output block sequence number in bz2 stream (>0 if segmented) 147 int _streamNumber; 148 }; 149 150 } 151 152 #endif /* _BZ2STREAMSCANNER_H */ 153 154