1 #include "gcdbz.h"
2 
GCdbz(FILE * azf,bool uc,int zrsize)3 GCdbz::GCdbz(FILE* azf, bool uc, int zrsize) {
4  uncompress=uc;
5  zrecsize=-1;
6  zpos=0;
7  defline_cap=1024;
8  begin_defline();
9  GMALLOC(defline, defline_cap);
10  zf=azf;
11  // FULL_FLUSH method instead of finish:
12  if (uncompress)
13      decomp_start(zrsize);
14    else
15      compress_start();
16 }
17 
~GCdbz()18 GCdbz::~GCdbz() {
19  //if (zf!=NULL && zf!=stdout && zf!=stdin) fclose(zf);
20  // FULL_FLUSH method instead of finish
21   if (uncompress) decomp_end();
22    else
23      if (!zclosed) compress_end();
24  GFREE(defline);
25 }
26 
27 
28 
extend_defline(int ch)29 void GCdbz::extend_defline(int ch) {
30  if (defline_len+1 >= defline_cap) {
31    defline_cap+=(defline_cap>>2);
32    GREALLOC(defline, defline_cap);
33    }
34  defline[defline_len]= ch;
35  defline_len++;
36  }
37 
38 
39 #define DUMMY_ZREC ">AA1234567890 DNA protein\n\
40 ACGTTGCTAGCT\n\
41 NRMTPYYHEIEP\n\
42 RTASNTSPTPNS\n\
43 IKSAHPAEPPKR\n"
44 
compress_start()45 void GCdbz::compress_start() {
46  //initialize zstream compression
47  zstream.zalloc = (alloc_func)0; //no alloc function to use
48  zstream.zfree = (free_func)0;   //no free function to use
49  zstream.opaque = (voidpf)0;     //no private object to pass to zalloc/zfree
50 
51  int err=deflateInit(&zstream, Z_DEFAULT_COMPRESSION);
52  if (err!=Z_OK)
53      GError("GCdbz error: deflateInit failed!(err=%d)\n",err);
54  zclosed=false;
55  //write a dummy record as the first record,
56  //so we can use random access (FULL_FLUSH style) later
57  char ztag[5];strcpy(ztag, "CDBZ");
58  uint32 zsize=0;
59  zstream.next_in = (Bytef*)sbuf;
60  strcpy(sbuf, DUMMY_ZREC);
61  zstream.avail_in=strlen(sbuf);
62  zstream.next_out = (Bytef*)lbuf;
63  zstream.avail_out = GCDBZ_LBUF_LEN;
64  uLong t_out=zstream.total_out;
65  err = deflate(&zstream, Z_FULL_FLUSH);
66  zsize=zstream.total_out-t_out;
67  if ((err !=Z_OK && err!=Z_STREAM_END) || zsize<=0)
68        GError("GCdbz error: deflate 1st record failed! (err=%d)\n", err);
69  //now write the header and the dummy record
70      //in case this was not done before:
71  gcvt_uint=(endian_test())? &uint32_sun : &uint32_x86;
72  uint32 zfv = gcvt_uint(&zsize);
73  if (fwrite(ztag, 1, 4, zf)<4 ||
74        fwrite(&zfv,1,sizeof(uint32), zf) < sizeof(uint32) ||
75          fwrite(lbuf, 1, zsize, zf) < zsize)
76        GError("Error writing 1st deflated record!\n");
77  zpos+=4+sizeof(uint32)+zsize;
78  }
79 
compress_end()80 void GCdbz::compress_end() {
81  zstream.next_out = (Bytef*)lbuf;
82  zstream.avail_out = GCDBZ_LBUF_LEN;
83  zstream.avail_in = 0;
84  uLong t_out=zstream.total_out;
85  int err = deflate(&zstream, Z_FINISH);
86  if (err != Z_STREAM_END) {
87    GError("GCdbz error: deflate/Z_FINISH() failed! (err=%d) \n", err);
88    }
89  uLong toWrite=zstream.total_out-t_out;
90  if (toWrite>0) {
91    if (fwrite(lbuf, 1, toWrite, zf)<toWrite)
92         GError("Error writing FINISH deflate chunk!\n");
93    //GError("GCdbz error: out data after Z_FINISH (%d bytes)\n",
94    //    zstream.total_out-t_out);
95    }
96  err=deflateEnd(&zstream);
97  if (err!=Z_OK)
98    GError("GCdbz error: deflateEnd() failed! (err=%d) \n", err);
99  zclosed=true;
100 }
101 
compress(GReadBuf * readbuf,char * delim)102 char* GCdbz::compress(GReadBuf *readbuf, char* delim) {
103   //compress everything coming from the input stream inf
104   //until \n is encountered followed by delim
105   //returns this->defline or NULL if error encountered
106 
107   //-- WARNING: this subrutine assumes that inf file position
108   // is at the beginning of the record, right AFTER the delim
109   // (exactly as left after a previous call)
110  if (zf==NULL || uncompress)
111     GError("GCdbz Error: cannot use compress() method !\n");
112  unsigned int total_out=0;
113  int c=0;
114  bool in_rec=true;
115  int delimlen=strlen(delim);
116  zrecsize=0;
117  if ((c=readbuf->peekCmp(delim, delimlen))!=0) {
118     if (c<-1) return NULL; //end of file reached
119     GError("GCdbZ::compress error: delimiter '%s' expected at record start!\n",
120           delim);
121     }
122  bool bol=false; //beginning of line flag
123  int deflate_flag=0;
124  begin_defline();
125  int rec_pos=0;
126  int err=0;
127  while (in_rec) { // main read loop
128      int bytes_read=0;
129      while ((c=readbuf->getch())>=0) {
130        sbuf[bytes_read++]=c;
131        if (c=='\n' || c=='\r') { //beginning of line
132           bol = true;
133           if (in_defline) end_defline();
134           //look_ahead for record delimiter:
135           if (readbuf->peekCmp(delim, delimlen)==0) {
136               in_rec=false;
137               break;
138               }
139           }
140         else bol = false;
141        if (rec_pos>delimlen-1 && in_defline)
142             extend_defline(c);
143        rec_pos++;
144        if (bytes_read == GCDBZ_SBUF_LEN) break;
145        }//while not EOF or space in buffer
146      /*if (bytes_read==0)
147            return NULL;*/
148      if (c==EOF) {
149         in_rec=false;
150         if (in_defline) end_defline();
151         }
152      zstream.next_in = (Bytef*)sbuf;
153      zstream.avail_in = bytes_read;
154      //deflate_flag = in_rec ? 0 : Z_FINISH;
155      deflate_flag = in_rec ? 0 : Z_FULL_FLUSH;
156      do { //compression loop
157         zstream.next_out = (Bytef*)lbuf;
158         zstream.avail_out = GCDBZ_LBUF_LEN;
159         uLong t_out=zstream.total_out;
160         err = deflate(&zstream, deflate_flag);
161         if (err !=Z_OK && err!=Z_STREAM_END)
162              GError("GCdbz error: deflate failed! (err=%d)\n", err);
163         uLong toWrite=zstream.total_out-t_out;
164         if (toWrite>0) {
165              if (fwrite(lbuf, 1, toWrite, zf)<toWrite)
166                 GError("Error writing deflate chunk!\n");
167              total_out+=toWrite;
168              zrecsize+=toWrite;
169              zpos+=toWrite;
170              }
171        } while (err!=Z_STREAM_END && zstream.avail_out==0);//compression loop
172    } //read loop
173   //if (deflate_flag!=Z_FINISH)
174   if (deflate_flag!=Z_FULL_FLUSH)
175      GError("Deflate flag not set to FINISH!\n");
176   return defline;
177 }
178 
179 
decomp_start(int zrsize)180 void GCdbz::decomp_start(int zrsize) {
181  zstream.zalloc = (alloc_func)0;
182  zstream.zfree = (free_func)0;
183  zstream.opaque = (voidpf)0;
184  zstream.next_in  = (Bytef*)sbuf;
185  zstream.avail_in = 0;
186  zstream.next_out = (Bytef*)lbuf;
187  int err = inflateInit(&zstream);
188  if (err!=Z_OK)
189      GMessage("Error at inflateInit()\n");
190  //-- now read and discard the first record, so we can use random access later
191  // (needed by zlib)
192  int bytes_read=fread(sbuf, 1, zrsize, zf);
193  if (bytes_read<zrsize)
194      GError("Error reading 1st record from zrec file\n");
195  zstream.next_in = (Bytef*)sbuf;
196  zstream.avail_in = bytes_read;
197 //decompress first chunk
198  zstream.next_out = (Bytef*)lbuf;
199  zstream.avail_out = GCDBZ_LBUF_LEN;
200  err = inflate(&zstream, Z_SYNC_FLUSH);
201  if (err !=Z_OK && err!=Z_STREAM_END)
202      GError("GCdbz error: 1st record inflate failed! (err=%d)\n",err);
203 }
204 
decomp_end()205 void GCdbz::decomp_end() {
206   int err = inflateEnd(&zstream);
207   if (err!=Z_OK)
208      GError("Error at inflateEnd() (err=%d)\n", err);
209 
210 }
211 
212 
213 //record decompress
214 //returns: the number of bytes decompressed
decompress(FILE * outf,int csize,int zfofs)215 int GCdbz::decompress(FILE* outf, int csize, int zfofs) {
216  if (zfofs>=0) {
217     if (fseek(zf, zfofs, 0))
218       GError("GCdbz::decompress: error fseek() to %d\n", zfofs);
219     }
220   else
221      if (feof(zf)) return 0;
222  bool in_rec=true;
223  int err=0;
224  int total_read=0;
225  int total_written=0;
226  while (in_rec) { // main read loop
227      int to_read=0;
228      int bytes_read=0;
229      if (csize<=0) { //read one byte at a time
230         to_read=1;
231         int c;
232         if ((c =fgetc(zf))!=EOF) {
233            bytes_read = 1;
234            sbuf[0]=c;
235            }
236           else {
237             //bytes_read=0;
238             return 0; //eof
239             }
240         total_read+=bytes_read;
241         }
242       else {
243         to_read = csize-total_read>GCDBZ_SBUF_LEN ?
244                                  GCDBZ_SBUF_LEN : csize-total_read;
245        // check for csize vs bytes_read match:
246         if (to_read==0) return 0;
247         bytes_read=fread(sbuf, 1, to_read, zf);
248         if (bytes_read!=to_read)
249             GError("Error reading from zrec file\n");
250         total_read+=bytes_read;
251         in_rec=(total_read<csize);
252         }
253      if (bytes_read==0) {
254         //GMessage("bytes_read = 0\n");
255         return 0;
256         }
257      if (in_rec && bytes_read<to_read) in_rec=false;
258      zstream.next_in = (Bytef*)sbuf;
259      zstream.avail_in = bytes_read;
260 
261      do { //decompression loop
262         zstream.next_out = (Bytef*)lbuf;
263         zstream.avail_out = GCDBZ_LBUF_LEN;
264         uLong t_out=zstream.total_out;
265         err = inflate(&zstream, Z_SYNC_FLUSH);
266         uLong toWrite=zstream.total_out-t_out;
267         if (toWrite>0) {
268              if (fwrite(lbuf, 1, toWrite, outf)<toWrite) {
269                GError("Error writing inflated chunk!\n");
270                }
271              total_written+=toWrite;
272              }
273         if (err==Z_STREAM_END) {
274               in_rec=false;
275               if (total_written==0) {
276                 GMessage("Z_STREAM_END found but total_written=0!\n");
277                 }
278               break;
279               }
280          else if (err !=Z_OK)
281                 GError("GCdbz error: inflate failed! (err=%d)\n",err);
282         } while (zstream.avail_in!=0); //decompression loop
283    } //read loop
284  /*if (err!=Z_STREAM_END) {
285    GError("decompress: Z_STREAM_END not found!\n");
286    }*/
287   return total_written;
288 }
289 
290