1 /* $Id: ct_nlmzip_streamprocs.cpp 624729 2021-02-03 18:52:17Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information (NCBI)
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government do not place any restriction on its use or reproduction.
13 * We would, however, appreciate having the NCBI and the author cited in
14 * any work or product based on this material
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannot warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * ===========================================================================
25 *
26 * Author: Michael Kimelman
27 *
28 * (Asn) Stream processing utilities - compressor/cacher and AsnIoPtr to fci merger.
29 *
30 * Modifications:
31 * --------------------------------------------------------------------------
32 * $Log: streamprocs.c,v $
33 * Revision 1.8 2002/08/14 15:51:52 kimelman
34 * asserts added
35 *
36 * Revision 1.7 2001/05/09 23:40:26 kimelman
37 * reader effeciency improved
38 *
39 * Revision 1.6 2001/05/09 21:32:45 kimelman
40 * bugfix: check for null 'close' method before running
41 *
42 * Revision 1.5 2001/05/09 00:57:42 kimelman
43 * cosmetics
44 *
45 * Revision 1.4 2001/03/01 21:20:51 kimelman
46 * make it less noisy
47 *
48 * Revision 1.3 1998/06/25 19:24:29 kimelman
49 * changed coef. of cache grow
50 *
51 * Revision 1.2 1998/05/15 19:05:19 kimelman
52 * all old 'gzip' names changed to be suitable for library names.
53 * prefix Nlmzip_ is now used for all of this local stuff.
54 * interface headers changed their names, moving from artdb/ur to nlmzip
55 *
56 * Revision 1.1 1998/05/14 20:21:17 kimelman
57 * ct_init --> Nlmzip_ct_init
58 * added stream& AsnIo processing functions
59 * makefile changed
60 *
61 *
62 *
63 * ==========================================================================
64 */
65
66
67 #include <ncbi_pch.hpp>
68 #include <ctools/ctransition/ncbimem.hpp>
69 #include <ctools/ctransition/ncbierr.hpp>
70 #include <assert.h>
71 #include "ct_nlmzip_i.h"
72
73 // Ignore warnings for ncbi included code
74 #ifdef __GNUC__ // if gcc or g++
75 # pragma GCC diagnostic push
76 # pragma GCC diagnostic ignored "-Wunused-function"
77 #endif //__GNUC__
78
79
80 BEGIN_CTRANSITION_SCOPE
81
82
83
84 /*
85 * File common interface
86 */
87
88 fci_t LIBCALL
fci_open(Pointer data,Int4 (* proc_buf)(Pointer ptr,CharPtr buf,Int4 count),Int4 (* pclose)(Pointer ptr,int commit))89 fci_open( Pointer data,
90 Int4 (*proc_buf)(Pointer ptr, CharPtr buf, Int4 count),
91 Int4 (*pclose)(Pointer ptr, int commit)
92 )
93 {
94 fci_t obj = (fci_t)Nlm_MemNew(sizeof(*obj));
95
96 obj->data = data;
97 obj->proc_buf = proc_buf;
98 obj->close = pclose;
99 return obj;
100 }
101
102
103 /*
104 * AsnIoPtr asnio2fci.open(compressor.open(cacher.open(100,dbio.open(db))))
105
106 * fci_t asnio2fci ;
107 * fci_t compressor;
108 * fci_t cacher;
109 * fci_t dbio;
110
111 */
112
113 /*
114 * CACHER
115 */
116
117 typedef struct {
118 fci_t src;
119 int read;
120 char *buf;
121 int len;
122 int start;
123 int size;
124 int cache_size;
125 int eos;
126 } cacher_t;
127
128 static Int4 LIBCALLBACK
cacher_read(Pointer ptr,CharPtr buf,Int4 count)129 cacher_read(Pointer ptr, CharPtr buf, Int4 count)
130 {
131 cacher_t *db = (cacher_t*)ptr;
132 Int4 bytes = 0;
133
134 while (count > bytes)
135 {
136 assert(bytes>=0);
137 assert(bytes<=count);
138 if ( db->len == db->start ) /* if cache is empty */
139 {
140 Int4 len = 0;
141 // int direct_read = 0;
142 if (db->eos)
143 break; /* end of stream EXIT */
144 if ( count-bytes > db->cache_size / 2)
145 { /* read directly to caller's buffer */
146 len = db->src->proc_buf(db->src->data, buf+bytes, count-bytes);
147 /* negative 'len<0' answer means request for larger buffer size */
148 if ( len == 0 )
149 db->eos = 1;
150 if ( len > 0 )
151 bytes += len ;
152 }
153 if (count>bytes)
154 { /* cache input stream */
155 if ( db->cache_size < count )
156 db->cache_size = count;
157 if ( db->cache_size < - len )
158 db->cache_size = - len;
159 if ( db->cache_size > db->size )
160 db->cache_size = db->size;
161 db->start = 0;
162 len = db->src->proc_buf(db->src->data, db->buf,db->cache_size);
163 if (len < 0 && db->cache_size < - len )
164 {
165 /* negative 'len<0' answer means request for larger buffer size */
166 db->cache_size = - len;
167 if ( db->cache_size > db->size )
168 { /* try to adjust cache size - that case seems to be request
169 for larger buffer from underlying decompressor */
170 CharPtr newb = (CharPtr)Nlm_MemNew( - len );
171 if ( ! newb )
172 {
173 ErrPostEx(SEV_ERROR,0,0,"memory is exhausted - can't allocate %d bytes",-len);
174 return len;
175 }
176 Nlm_MemFree(db->buf);
177 db->buf = newb;
178 db->size = - len;
179 }
180 len = db->src->proc_buf(db->src->data, db->buf,db->cache_size);
181 }
182 db->cache_size *= 2;
183 if (len < 0 )
184 return len;
185 if (len == 0 )
186 db->eos = 1 ;
187 db->len = len;
188 }
189 }
190 if ( db->len - db->start > 0 )
191 {
192 int sz = db->len - db->start;
193 if ( bytes + sz > count )
194 sz = count - bytes ;
195 memcpy(buf+bytes, db->buf + db->start, sz );
196 db->start +=sz;
197 bytes += sz;
198 }
199 }
200 assert(bytes>=0);
201 assert(bytes<=count);
202 return bytes;
203 }
204
205 static Int4 LIBCALLBACK
cacher_write(Pointer ptr,CharPtr buf,Int4 count)206 cacher_write(Pointer ptr, CharPtr buf, Int4 count)
207 {
208 cacher_t *db = (cacher_t*)ptr;
209 Int4 bytes = 0;
210 // int flush_it = 0;
211
212 if(count<=0)
213 return 0;
214 /* cache size adjustments */
215 if ( db->cache_size < count )
216 {
217 db->cache_size = count;
218 if ( db->cache_size > db->size )
219 db->cache_size = db->size;
220 }
221
222 while (count > bytes)
223 {
224 int len = 0;
225 if ( db->len == db->cache_size || /* if cache is full */
226 count-bytes > db->size / 2 ) /* or new data is too large for this cache */
227 { /* flush cache */
228 if (db->len > 0)
229 len = db->src->proc_buf(db->src->data, db->buf,db->len);
230 if (len != db->len)
231 {
232 ErrPostEx(SEV_ERROR,0,0,"Failure to write data from cache (%d of %d written)",len,db->len);
233 return -1;
234 }
235 db->cache_size *=2;
236 if ( db->cache_size < 2 * count )
237 db->cache_size = 2 * count;
238 if ( db->cache_size > db->size )
239 db->cache_size = db->size;
240 db->start = db->len = 0;
241 }
242 if ( count - bytes > db->cache_size && db->len) /* if there are a lot of data */
243 { /*remains and cache is empty -- do uncached write */
244 len = db->src->proc_buf(db->src->data, buf+bytes,count-bytes);
245 if (len != count-bytes)
246 return -1;
247 bytes += len;
248 assert (bytes == count);
249 }
250 else
251 { /* cached write */
252 len = db->cache_size - db->len;
253 assert( len > 0 );
254 if ( count-bytes < len)
255 len = count - bytes;
256 memcpy(db->buf + db->len, buf+bytes, len);
257 db->len += len ;
258 bytes += len;
259 }
260 }
261 return bytes;
262 }
263
264 static Int4 LIBCALLBACK
cacher_close(Pointer ptr,int commit)265 cacher_close(Pointer ptr, int commit)
266 {
267 cacher_t *db = (cacher_t*)ptr;
268 Int4 rc;
269
270 if (!db->read)
271 {
272 Int4 len, len1 = db->len-db->start;
273 if (commit>0)
274 {
275 len = db->src->proc_buf(db->src->data, db->buf+db->start,len1);
276 commit = (len == len1) ;
277 }
278 }
279 rc=commit;
280 if(db->src->close)
281 rc = db->src->close(db->src->data,commit);
282 if (commit>=0)
283 {
284 Nlm_MemFree(db->src);
285 Nlm_MemFree(db->buf);
286 Nlm_MemFree(db);
287 }
288 else
289 {
290 db->len = db->start = db->cache_size = db->eos = 0 ;
291 }
292 return rc ;
293 }
294
295 fci_t LIBCALL
cacher_open(fci_t stream,int max_cache_size,int read)296 cacher_open(fci_t stream, int max_cache_size,int read)
297 {
298 cacher_t *data = (cacher_t*)Nlm_MemNew(sizeof(*data));
299
300 data->read = read ;
301 data->src = stream ;
302 data->size = max_cache_size;
303 data->cache_size = max_cache_size/10;
304 if (data->cache_size < 2048 && max_cache_size > 2048)
305 data->cache_size=2048;
306
307 while ((data->buf = (char*)Nlm_MemNew(data->size)) == NULL)
308 {
309 data->size /= 2;
310 if (data->size <= 1024)
311 {
312 Nlm_MemFree(data);
313 ErrPostEx(SEV_ERROR, 0,0,"\n%s:%d: memory exhausted '%d' ",
314 __FILE__,__LINE__,data->size*2);
315 return NULL;
316 }
317 }
318 return fci_open(data,(read?cacher_read:cacher_write),cacher_close);
319 }
320
321 /*
322 * COMPRESSOR
323 */
324
325 typedef struct {
326 fci_t src;
327 int mode; /* 0 - uninitialized. 1 - compressed ; -1 - uncompressed */
328 unsigned char *dbuf;
329 Int4 bsize;
330 Int4 compr_size;
331 Int4 decomp_size;
332 } compressor_t;
333
334 static void
compressor_header(compressor_t * db,UcharPtr header,int read)335 compressor_header(compressor_t *db,UcharPtr header,int read)
336 {
337 UcharPtr dbuf;
338 Uint4 val;
339 int bytes;
340
341 dbuf = (UcharPtr) header;
342 if(read)
343 { /* header --> db */
344 #if 0
345 fprintf(stderr,"scanned buffer");
346 for(bytes=0; bytes<8; bytes++)
347 fprintf(stderr,"'%x',",header[bytes]);
348 fprintf(stderr,"\n");
349 #endif
350 for(val=0, bytes=0; bytes<4; bytes++,dbuf++)
351 val = (val<<8) + *dbuf ;
352 db->compr_size = val;
353 for(val=0 ; bytes<8; bytes++,dbuf++)
354 val = (val<<8) + *dbuf ;
355 db->decomp_size = val;
356 #if 0
357 fprintf (stderr,"decompr(%x-%d)-->%x-%d\n",db->compr_size,db->compr_size,db->decomp_size,db->decomp_size);
358 if (read == 1)
359 {/* QA */
360 Uchar buf[8];
361 compressor_header(db,buf,0);
362 assert(memcmp(buf,header,8)==0);
363 }
364 #endif
365 }
366 else
367 { /* write compressed block header */
368 /* db --> header */
369 val = db->compr_size;
370 for(bytes=0; bytes<4; bytes++,dbuf++)
371 *dbuf = (val >> (3-bytes)*8) & 0xff ;
372 val = db->decomp_size;
373 for( ; bytes<8; bytes++,dbuf++)
374 *dbuf = (val >> (7-bytes)*8) & 0xff ;
375 #if 0
376 fprintf (stderr,"compr(%x)-->%x ",db->decomp_size,db->compr_size);
377 fprintf(stderr,"written buffer");
378 for(bytes=0; bytes<8; bytes++)
379 fprintf(stderr,"'%x',",header[bytes]);
380 fprintf(stderr,"\n");
381
382 {/* QA */
383 Int4 dc = db->decomp_size, cm = db->compr_size;
384 compressor_header(db,header,2);
385 assert(cm == db->compr_size);
386 assert(dc == db->decomp_size);
387 }
388 #endif
389 }
390 }
391
392 static Int4 LIBCALLBACK
compressor_read(Pointer ptr,CharPtr obuf,Int4 count)393 compressor_read(Pointer ptr, CharPtr obuf, Int4 count)
394 {
395 compressor_t *db = (compressor_t*)ptr ;
396 unsigned char lens[8];
397 Int4 bytes = 0 ;
398
399 switch(db->mode)
400 {
401 case 0:
402 assert(count>=4);
403 bytes = db->src->proc_buf(db->src->data, (CharPtr)obuf,4);
404 if (bytes!=4)
405 return -1;
406 if (strcmp(obuf,"ZIP")==0)
407 {
408 db->mode=1; /* compresseed mode */
409 break;
410 }
411 db->mode=-1; /*uncompresseed mode */
412 obuf+=4;
413 count -=4;
414 case -1:
415 {
416 int rc;
417 rc = db->src->proc_buf(db->src->data, (CharPtr)obuf,count);
418 if (rc < 0)
419 return rc;
420 return bytes+ rc;
421 }
422 case 1:
423 default:
424 break;
425 }
426 assert(db->mode == 1);
427 if ( db->compr_size == 0 )
428 {
429 bytes = db->src->proc_buf(db->src->data, (CharPtr)lens,8);
430 if (bytes<=0)
431 return bytes;
432 assert ( bytes == 8 );
433 compressor_header(db,lens,1);
434 }
435 if ( db->decomp_size > count )
436 {
437 #if 0
438 ErrPostEx(SEV_INFO, 0,0,"\n%s:%d: small compressor output buffer('%d' - required %d) ",
439 __FILE__,__LINE__,count,db->decomp_size);
440 #endif
441 return - db->decomp_size ; /* unsufficient space problem */
442 }
443 if ( db->compr_size > db->bsize)
444 {
445 unsigned char *nb = (unsigned char*)Nlm_MemNew(db->compr_size);
446 if (!nb)
447 {
448 ErrPostEx(SEV_ERROR, 0,0,"\n%s:%d: memory exhausted (required %d) ",
449 __FILE__,__LINE__,db->compr_size);
450 return -db->compr_size;
451 }
452 Nlm_MemFree(db->dbuf);
453 db->dbuf = nb;
454 db->bsize = db->compr_size;
455 }
456 bytes = db->src->proc_buf(db->src->data, (CharPtr)db->dbuf,db->compr_size);
457 if ( bytes < db->compr_size )
458 {
459 ErrPostEx(SEV_ERROR, 0,0,"\n%s:%d: broken data in input stream compressed(%d) != returned(%d)",
460 __FILE__,__LINE__,db->compr_size,bytes);
461 return -1;
462 }
463 assert (bytes == db->compr_size);
464 if (Nlmzip_Uncompress (db->dbuf, db->compr_size,obuf,count,&bytes) != NLMZIP_OKAY )
465 {
466 ErrPostEx(SEV_ERROR, 0,0,"can't uncompress data");
467 return -1;
468 }
469 assert(bytes==db->decomp_size);
470 db->decomp_size=db->compr_size=0; /* clean buffer reading lock */
471 return bytes;
472 }
473
474 static Int4 LIBCALLBACK
compressor_write(Pointer ptr,CharPtr buf,Int4 count)475 compressor_write(Pointer ptr, CharPtr buf, Int4 count)
476 {
477 compressor_t *db = (compressor_t*)ptr ;
478 Int4 bytes = 0 ;
479
480 if (count<=0)
481 return 0;
482
483 switch (db->mode)
484 {
485 case 0 :
486 bytes = db->src->proc_buf(db->src->data,(char*)"ZIP",4);
487 if (bytes!=4)
488 return -1;
489 db->mode=1; /* compresseed mode */
490 break ;
491 case -1 : /* uncompresseed mode */
492 return db->src->proc_buf(db->src->data, (CharPtr)buf,count);
493 case 1 :
494 default :
495 break;
496 }
497
498 while (Nlmzip_Compress (buf, count,db->dbuf+8,db->bsize-8,&bytes) !=NLMZIP_OKAY)
499 {
500 unsigned char *nb = (unsigned char*)Nlm_MemNew(2*db->bsize);
501 if (!nb)
502 {
503 ErrPostEx(SEV_ERROR, 0,0,"\n%s:%d: memory exhausted (required %d) ",
504 __FILE__,__LINE__,db->compr_size);
505 return -db->compr_size;
506 }
507 Nlm_MemFree(db->dbuf);
508 db->dbuf = nb;
509 db->bsize *=2;
510 }
511
512 db->decomp_size = count;
513 db->compr_size = bytes;
514
515 compressor_header(db,db->dbuf,0);
516 bytes = db->src->proc_buf(db->src->data, (CharPtr)db->dbuf,db->compr_size+8);
517 if ( bytes != db->compr_size+8)
518 {
519 ErrPostEx(SEV_ERROR, 0,0,"\n%s:%d: broken data in output stream",
520 __FILE__,__LINE__);
521 return -1;
522 }
523 return count;
524 }
525
526 static Int4 LIBCALLBACK
compressor_close(Pointer ptr,int commit)527 compressor_close(Pointer ptr, int commit)
528 {
529 compressor_t *db = (compressor_t*)ptr;
530 Int4 rc = commit;
531
532 if(db->src->close)
533 rc = db->src->close(db->src->data,commit);
534 if (commit>=0)
535 {
536 if (db->src)
537 Nlm_MemFree(db->src);
538 if (db->dbuf)
539 Nlm_MemFree(db->dbuf);
540 Nlm_MemFree(db);
541 }
542 else
543 {
544 db->mode = 0;
545 db->decomp_size=db->compr_size=0; /* clean buffer reading lock */
546 }
547 return rc ;
548 }
549
550 fci_t LIBCALL
compressor_open(fci_t stream,int max_buffer_size,int read)551 compressor_open(fci_t stream, int max_buffer_size, int read)
552 {
553 compressor_t *data = (compressor_t*)Nlm_MemNew(sizeof(*data));
554
555 if (max_buffer_size<1024)
556 max_buffer_size = 1024;
557 data->src = stream ;
558 data->mode = 0;
559 data->dbuf = (unsigned char*)Nlm_MemNew(max_buffer_size);
560 if(data->dbuf)
561 data->bsize = max_buffer_size;
562 return cacher_open( /* add one more cache which will read data */
563 fci_open(data,(read?compressor_read:compressor_write),compressor_close),
564 max_buffer_size,read);
565 }
566
567
568 #if 0
569 /*
570 * ASNIO2FCI
571 */
572
573 static Int2 LIBCALLBACK
574 asnio2fci_proc(Pointer ptr, CharPtr buf, Uint2 count)
575 {
576 fci_t f = (fci_t)ptr;
577
578 assert(count <= 0x7fff );
579 return f->proc_buf(f->data, buf,count);
580 }
581
582 Int4 LIBCALL
583 asnio2fci_close(AsnIoPtr aip,Int4 commit)
584 {
585 fci_t stream = aip ->iostruct;
586 Int4 rc = commit;
587
588 if(commit>=0)
589 AsnIoClose (aip);
590 else
591 AsnIoReset (aip);
592 if(stream->close)
593 rc = stream->close(stream->data,commit);
594 if (commit>=0)
595 MemFree (stream);
596 return rc;
597 }
598
599 AsnIoPtr LIBCALL
600 asnio2fci_open(int read, fci_t stream)
601 {
602 if (read)
603 return AsnIoNew(ASNIO_BIN_IN, NULL, stream, asnio2fci_proc, NULL);
604 else
605 return AsnIoNew(ASNIO_BIN_OUT, NULL, stream, NULL, asnio2fci_proc);
606 }
607 #endif
608
609
610 END_CTRANSITION_SCOPE
611
612
613 // Re-enable warnings
614 #ifdef __GNUC__ // if gcc or g++
615 # pragma GCC diagnostic pop
616 #endif //__GNUC__
617
618