1 /*  $Id: ct_nlmzip_streamprocs.cpp 624729 2021-02-03 18:52:17Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *            National Center for Biotechnology Information (NCBI)
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government do not place any restriction on its use or reproduction.
13  *  We would, however, appreciate having the NCBI and the author cited in
14  *  any work or product based on this material
15  *
16  *  Although all reasonable efforts have been taken to ensure the accuracy
17  *  and reliability of the software and data, the NLM and the U.S.
18  *  Government do not and cannot warrant the performance or results that
19  *  may be obtained by using this software or data. The NLM and the U.S.
20  *  Government disclaim all warranties, express or implied, including
21  *  warranties of performance, merchantability or fitness for any particular
22  *  purpose.
23  *
24  * ===========================================================================
25  *
26  * Author:  Michael Kimelman
27  *
28  * (Asn) Stream processing utilities - compressor/cacher and AsnIoPtr to fci merger.
29  *
30  * Modifications:
31  * --------------------------------------------------------------------------
32  * $Log: streamprocs.c,v $
33  * Revision 1.8  2002/08/14 15:51:52  kimelman
34  * asserts added
35  *
36  * Revision 1.7  2001/05/09 23:40:26  kimelman
37  * reader effeciency improved
38  *
39  * Revision 1.6  2001/05/09 21:32:45  kimelman
40  * bugfix: check for null 'close' method before running
41  *
42  * Revision 1.5  2001/05/09 00:57:42  kimelman
43  * cosmetics
44  *
45  * Revision 1.4  2001/03/01 21:20:51  kimelman
46  * make it less noisy
47  *
48  * Revision 1.3  1998/06/25 19:24:29  kimelman
49  * changed coef. of cache grow
50  *
51  * Revision 1.2  1998/05/15 19:05:19  kimelman
52  * all old 'gzip' names changed to be suitable for library names.
53  * prefix Nlmzip_ is now used for all of this local stuff.
54  * interface headers changed their names, moving from artdb/ur to nlmzip
55  *
56  * Revision 1.1  1998/05/14 20:21:17  kimelman
57  * ct_init --> Nlmzip_ct_init
58  * added stream& AsnIo processing functions
59  * makefile changed
60  *
61  *
62  *
63  * ==========================================================================
64  */
65 
66 
67 #include <ncbi_pch.hpp>
68 #include <ctools/ctransition/ncbimem.hpp>
69 #include <ctools/ctransition/ncbierr.hpp>
70 #include <assert.h>
71 #include "ct_nlmzip_i.h"
72 
73 // Ignore warnings for ncbi included code
74 #ifdef __GNUC__ // if gcc or g++
75 #  pragma GCC diagnostic push
76 #  pragma GCC diagnostic ignored "-Wunused-function"
77 #endif //__GNUC__
78 
79 
80 BEGIN_CTRANSITION_SCOPE
81 
82 
83 
84 /*
85  * File common interface
86  */
87 
88 fci_t LIBCALL
fci_open(Pointer data,Int4 (* proc_buf)(Pointer ptr,CharPtr buf,Int4 count),Int4 (* pclose)(Pointer ptr,int commit))89 fci_open(  Pointer data,
90            Int4 (*proc_buf)(Pointer ptr, CharPtr buf, Int4 count),
91            Int4 (*pclose)(Pointer ptr, int commit)
92            )
93 {
94   fci_t obj = (fci_t)Nlm_MemNew(sizeof(*obj));
95 
96   obj->data      = data;
97   obj->proc_buf  = proc_buf;
98   obj->close     = pclose;
99   return obj;
100 }
101 
102 
103 /*
104  *  AsnIoPtr  asnio2fci.open(compressor.open(cacher.open(100,dbio.open(db))))
105 
106  * fci_t  asnio2fci ;
107  * fci_t  compressor;
108  * fci_t  cacher;
109  * fci_t  dbio;
110 
111  */
112 
113 /*
114  *   CACHER
115  */
116 
117 typedef struct {
118   fci_t src;
119   int   read;
120   char *buf;
121   int   len;
122   int   start;
123   int   size;
124   int   cache_size;
125   int   eos;
126 } cacher_t;
127 
128 static Int4 LIBCALLBACK
cacher_read(Pointer ptr,CharPtr buf,Int4 count)129 cacher_read(Pointer ptr, CharPtr buf, Int4 count)
130 {
131   cacher_t *db = (cacher_t*)ptr;
132   Int4      bytes = 0;
133 
134   while (count > bytes)
135     {
136       assert(bytes>=0);
137       assert(bytes<=count);
138       if ( db->len == db->start ) /* if cache is empty */
139         {
140           Int4 len = 0;
141 //          int direct_read = 0;
142           if (db->eos)
143             break;      /* end of stream EXIT */
144           if ( count-bytes > db->cache_size / 2)
145             { /* read directly to caller's buffer */
146               len = db->src->proc_buf(db->src->data, buf+bytes, count-bytes);
147               /* negative 'len<0' answer means request for larger buffer size */
148               if ( len == 0 )
149                 db->eos = 1;
150               if ( len > 0 )
151                 bytes += len ;
152             }
153           if (count>bytes)
154             { /* cache input stream */
155               if ( db->cache_size < count )
156                 db->cache_size = count;
157               if ( db->cache_size < - len )
158                 db->cache_size = - len;
159               if ( db->cache_size > db->size )
160                 db->cache_size = db->size;
161               db->start = 0;
162               len = db->src->proc_buf(db->src->data, db->buf,db->cache_size);
163               if (len < 0 && db->cache_size < - len )
164                 {
165                   /* negative 'len<0' answer means request for larger buffer size */
166                   db->cache_size = - len;
167                   if ( db->cache_size >  db->size )
168                     { /* try to adjust cache size - that case seems to be request
169                          for larger buffer from underlying decompressor */
170                       CharPtr newb = (CharPtr)Nlm_MemNew( - len );
171                       if ( ! newb )
172                         {
173                           ErrPostEx(SEV_ERROR,0,0,"memory is exhausted - can't allocate %d bytes",-len);
174                           return len;
175                         }
176                       Nlm_MemFree(db->buf);
177                       db->buf = newb;
178                       db->size = - len;
179                     }
180                   len = db->src->proc_buf(db->src->data, db->buf,db->cache_size);
181                 }
182               db->cache_size *= 2;
183               if (len < 0 )
184                 return len;
185               if (len == 0 )
186                 db->eos = 1 ;
187               db->len = len;
188             }
189         }
190       if ( db->len - db->start > 0 )
191         {
192           int sz = db->len - db->start;
193           if ( bytes + sz  > count )
194             sz = count - bytes ;
195           memcpy(buf+bytes, db->buf + db->start, sz );
196           db->start +=sz;
197           bytes += sz;
198         }
199     }
200   assert(bytes>=0);
201   assert(bytes<=count);
202   return bytes;
203 }
204 
205 static Int4 LIBCALLBACK
cacher_write(Pointer ptr,CharPtr buf,Int4 count)206 cacher_write(Pointer ptr, CharPtr buf, Int4 count)
207 {
208   cacher_t *db = (cacher_t*)ptr;
209   Int4      bytes = 0;
210 //  int       flush_it = 0;
211 
212   if(count<=0)
213     return 0;
214   /* cache size adjustments */
215   if ( db->cache_size < count )
216     {
217       db->cache_size = count;
218       if ( db->cache_size > db->size )
219         db->cache_size = db->size;
220     }
221 
222   while (count > bytes)
223     {
224       int len = 0;
225       if ( db->len == db->cache_size ||  /* if cache is full */
226            count-bytes > db->size / 2 )  /* or new data is too large for this cache */
227         { /* flush cache */
228           if (db->len > 0)
229             len = db->src->proc_buf(db->src->data, db->buf,db->len);
230           if (len != db->len)
231             {
232               ErrPostEx(SEV_ERROR,0,0,"Failure to write data from cache (%d of %d written)",len,db->len);
233               return -1;
234             }
235           db->cache_size *=2;
236           if ( db->cache_size < 2 * count )
237             db->cache_size = 2 * count;
238           if ( db->cache_size > db->size )
239             db->cache_size = db->size;
240           db->start = db->len = 0;
241         }
242       if ( count - bytes > db->cache_size && db->len) /* if there are a lot of data */
243         { /*remains and cache is empty  -- do uncached write                        */
244           len = db->src->proc_buf(db->src->data, buf+bytes,count-bytes);
245           if (len != count-bytes)
246             return -1;
247           bytes += len;
248           assert (bytes == count);
249         }
250       else
251         { /* cached write */
252           len = db->cache_size - db->len;
253           assert( len > 0 );
254           if ( count-bytes < len)
255             len = count - bytes;
256           memcpy(db->buf + db->len, buf+bytes, len);
257           db->len += len ;
258           bytes += len;
259         }
260     }
261   return bytes;
262 }
263 
264 static Int4 LIBCALLBACK
cacher_close(Pointer ptr,int commit)265 cacher_close(Pointer ptr, int commit)
266 {
267   cacher_t *db = (cacher_t*)ptr;
268   Int4 rc;
269 
270   if (!db->read)
271     {
272       Int4 len, len1 = db->len-db->start;
273       if (commit>0)
274         {
275           len = db->src->proc_buf(db->src->data, db->buf+db->start,len1);
276           commit = (len == len1) ;
277         }
278     }
279   rc=commit;
280   if(db->src->close)
281     rc = db->src->close(db->src->data,commit);
282   if (commit>=0)
283     {
284       Nlm_MemFree(db->src);
285       Nlm_MemFree(db->buf);
286       Nlm_MemFree(db);
287     }
288   else
289     {
290       db->len = db->start = db->cache_size = db->eos = 0 ;
291     }
292   return rc ;
293 }
294 
295 fci_t LIBCALL
cacher_open(fci_t stream,int max_cache_size,int read)296 cacher_open(fci_t stream, int max_cache_size,int read)
297 {
298   cacher_t *data = (cacher_t*)Nlm_MemNew(sizeof(*data));
299 
300   data->read = read ;
301   data->src  = stream ;
302   data->size = max_cache_size;
303   data->cache_size = max_cache_size/10;
304   if (data->cache_size < 2048 && max_cache_size > 2048)
305     data->cache_size=2048;
306 
307   while ((data->buf = (char*)Nlm_MemNew(data->size)) == NULL)
308     {
309       data->size /= 2;
310       if (data->size <= 1024)
311         {
312           Nlm_MemFree(data);
313           ErrPostEx(SEV_ERROR, 0,0,"\n%s:%d: memory exhausted '%d' ",
314                     __FILE__,__LINE__,data->size*2);
315           return NULL;
316         }
317     }
318   return fci_open(data,(read?cacher_read:cacher_write),cacher_close);
319 }
320 
321 /*
322  *   COMPRESSOR
323  */
324 
325 typedef struct {
326   fci_t          src;
327   int            mode; /* 0 - uninitialized. 1 - compressed ; -1 - uncompressed */
328   unsigned char *dbuf;
329   Int4           bsize;
330   Int4           compr_size;
331   Int4           decomp_size;
332 } compressor_t;
333 
334 static void
compressor_header(compressor_t * db,UcharPtr header,int read)335 compressor_header(compressor_t  *db,UcharPtr header,int read)
336 {
337   UcharPtr dbuf;
338   Uint4 val;
339   int bytes;
340 
341   dbuf = (UcharPtr) header;
342   if(read)
343     { /* header --> db */
344 #if 0
345       fprintf(stderr,"scanned buffer");
346       for(bytes=0; bytes<8; bytes++)
347         fprintf(stderr,"'%x',",header[bytes]);
348       fprintf(stderr,"\n");
349 #endif
350       for(val=0, bytes=0; bytes<4; bytes++,dbuf++)
351         val = (val<<8) + *dbuf ;
352       db->compr_size = val;
353       for(val=0         ; bytes<8; bytes++,dbuf++)
354         val = (val<<8) + *dbuf ;
355       db->decomp_size = val;
356 #if 0
357       fprintf (stderr,"decompr(%x-%d)-->%x-%d\n",db->compr_size,db->compr_size,db->decomp_size,db->decomp_size);
358       if (read == 1)
359         {/* QA */
360           Uchar buf[8];
361           compressor_header(db,buf,0);
362           assert(memcmp(buf,header,8)==0);
363         }
364 #endif
365     }
366   else
367     { /* write compressed block header */
368       /* db --> header */
369       val = db->compr_size;
370       for(bytes=0; bytes<4; bytes++,dbuf++)
371         *dbuf = (val >> (3-bytes)*8) & 0xff ;
372       val = db->decomp_size;
373       for(      ; bytes<8; bytes++,dbuf++)
374         *dbuf = (val >> (7-bytes)*8) & 0xff ;
375 #if 0
376       fprintf (stderr,"compr(%x)-->%x ",db->decomp_size,db->compr_size);
377       fprintf(stderr,"written buffer");
378       for(bytes=0; bytes<8; bytes++)
379         fprintf(stderr,"'%x',",header[bytes]);
380       fprintf(stderr,"\n");
381 
382       {/* QA */
383         Int4 dc = db->decomp_size, cm = db->compr_size;
384         compressor_header(db,header,2);
385         assert(cm == db->compr_size);
386         assert(dc == db->decomp_size);
387       }
388 #endif
389     }
390 }
391 
392 static Int4 LIBCALLBACK
compressor_read(Pointer ptr,CharPtr obuf,Int4 count)393 compressor_read(Pointer ptr, CharPtr obuf, Int4 count)
394 {
395   compressor_t  *db          = (compressor_t*)ptr   ;
396   unsigned char  lens[8];
397   Int4           bytes       = 0 ;
398 
399   switch(db->mode)
400     {
401     case 0:
402       assert(count>=4);
403       bytes = db->src->proc_buf(db->src->data, (CharPtr)obuf,4);
404       if (bytes!=4)
405         return -1;
406       if (strcmp(obuf,"ZIP")==0)
407         {
408           db->mode=1; /* compresseed mode */
409           break;
410         }
411       db->mode=-1; /*uncompresseed mode */
412       obuf+=4;
413       count -=4;
414     case -1:
415       {
416         int rc;
417         rc = db->src->proc_buf(db->src->data, (CharPtr)obuf,count);
418         if (rc < 0)
419           return rc;
420         return bytes+ rc;
421       }
422     case 1:
423     default:
424       break;
425     }
426   assert(db->mode == 1);
427   if ( db->compr_size == 0 )
428     {
429       bytes = db->src->proc_buf(db->src->data, (CharPtr)lens,8);
430       if (bytes<=0)
431         return bytes;
432       assert ( bytes == 8 );
433       compressor_header(db,lens,1);
434     }
435   if ( db->decomp_size > count )
436     {
437 #if 0
438       ErrPostEx(SEV_INFO, 0,0,"\n%s:%d: small compressor output buffer('%d' - required %d) ",
439                 __FILE__,__LINE__,count,db->decomp_size);
440 #endif
441       return - db->decomp_size ;  /* unsufficient space problem */
442     }
443   if ( db->compr_size > db->bsize)
444     {
445       unsigned char *nb = (unsigned char*)Nlm_MemNew(db->compr_size);
446       if (!nb)
447         {
448           ErrPostEx(SEV_ERROR, 0,0,"\n%s:%d: memory exhausted (required %d) ",
449                     __FILE__,__LINE__,db->compr_size);
450           return -db->compr_size;
451         }
452       Nlm_MemFree(db->dbuf);
453       db->dbuf = nb;
454       db->bsize = db->compr_size;
455     }
456   bytes = db->src->proc_buf(db->src->data, (CharPtr)db->dbuf,db->compr_size);
457   if ( bytes < db->compr_size )
458     {
459       ErrPostEx(SEV_ERROR, 0,0,"\n%s:%d: broken data in input stream compressed(%d) != returned(%d)",
460                 __FILE__,__LINE__,db->compr_size,bytes);
461       return -1;
462     }
463   assert (bytes == db->compr_size);
464   if (Nlmzip_Uncompress (db->dbuf, db->compr_size,obuf,count,&bytes) != NLMZIP_OKAY )
465     {
466       ErrPostEx(SEV_ERROR, 0,0,"can't uncompress data");
467       return -1;
468     }
469   assert(bytes==db->decomp_size);
470   db->decomp_size=db->compr_size=0; /* clean buffer reading lock */
471   return bytes;
472 }
473 
474 static Int4 LIBCALLBACK
compressor_write(Pointer ptr,CharPtr buf,Int4 count)475 compressor_write(Pointer ptr, CharPtr buf, Int4 count)
476 {
477   compressor_t  *db          = (compressor_t*)ptr   ;
478   Int4           bytes       = 0 ;
479 
480   if (count<=0)
481     return 0;
482 
483   switch (db->mode)
484     {
485     case 0  :
486       bytes = db->src->proc_buf(db->src->data,(char*)"ZIP",4);
487       if (bytes!=4)
488         return -1;
489       db->mode=1; /* compresseed mode */
490       break ;
491     case -1 :      /* uncompresseed mode */
492       return db->src->proc_buf(db->src->data, (CharPtr)buf,count);
493     case 1  :
494     default :
495       break;
496     }
497 
498   while (Nlmzip_Compress (buf, count,db->dbuf+8,db->bsize-8,&bytes) !=NLMZIP_OKAY)
499     {
500       unsigned char *nb = (unsigned char*)Nlm_MemNew(2*db->bsize);
501       if (!nb)
502         {
503           ErrPostEx(SEV_ERROR, 0,0,"\n%s:%d: memory exhausted (required %d) ",
504                     __FILE__,__LINE__,db->compr_size);
505           return -db->compr_size;
506         }
507       Nlm_MemFree(db->dbuf);
508       db->dbuf = nb;
509       db->bsize *=2;
510     }
511 
512   db->decomp_size = count;
513   db->compr_size  = bytes;
514 
515   compressor_header(db,db->dbuf,0);
516   bytes = db->src->proc_buf(db->src->data, (CharPtr)db->dbuf,db->compr_size+8);
517   if ( bytes != db->compr_size+8)
518     {
519       ErrPostEx(SEV_ERROR, 0,0,"\n%s:%d: broken data in output stream",
520                 __FILE__,__LINE__);
521       return -1;
522     }
523   return count;
524 }
525 
526 static Int4 LIBCALLBACK
compressor_close(Pointer ptr,int commit)527 compressor_close(Pointer ptr, int commit)
528 {
529   compressor_t *db = (compressor_t*)ptr;
530   Int4 rc = commit;
531 
532   if(db->src->close)
533     rc = db->src->close(db->src->data,commit);
534   if (commit>=0)
535     {
536       if (db->src)
537           Nlm_MemFree(db->src);
538       if (db->dbuf)
539           Nlm_MemFree(db->dbuf);
540       Nlm_MemFree(db);
541     }
542   else
543     {
544       db->mode = 0;
545       db->decomp_size=db->compr_size=0; /* clean buffer reading lock */
546     }
547   return rc ;
548 }
549 
550 fci_t LIBCALL
compressor_open(fci_t stream,int max_buffer_size,int read)551 compressor_open(fci_t stream, int max_buffer_size, int read)
552 {
553   compressor_t *data = (compressor_t*)Nlm_MemNew(sizeof(*data));
554 
555   if (max_buffer_size<1024)
556     max_buffer_size = 1024;
557   data->src  = stream ;
558   data->mode = 0;
559   data->dbuf = (unsigned char*)Nlm_MemNew(max_buffer_size);
560   if(data->dbuf)
561     data->bsize = max_buffer_size;
562   return cacher_open( /* add one more cache which will read data */
563                      fci_open(data,(read?compressor_read:compressor_write),compressor_close),
564                      max_buffer_size,read);
565 }
566 
567 
568 #if 0
569 /*
570  *   ASNIO2FCI
571  */
572 
573 static Int2 LIBCALLBACK
574 asnio2fci_proc(Pointer ptr, CharPtr buf, Uint2 count)
575 {
576   fci_t f = (fci_t)ptr;
577 
578   assert(count <= 0x7fff );
579   return f->proc_buf(f->data, buf,count);
580 }
581 
582 Int4 LIBCALL
583 asnio2fci_close(AsnIoPtr aip,Int4 commit)
584 {
585   fci_t stream = aip ->iostruct;
586   Int4  rc = commit;
587 
588   if(commit>=0)
589     AsnIoClose (aip);
590   else
591     AsnIoReset (aip);
592   if(stream->close)
593     rc = stream->close(stream->data,commit);
594   if (commit>=0)
595     MemFree (stream);
596   return rc;
597 }
598 
599 AsnIoPtr LIBCALL
600 asnio2fci_open(int read, fci_t stream)
601 {
602   if (read)
603     return AsnIoNew(ASNIO_BIN_IN, NULL, stream, asnio2fci_proc, NULL);
604   else
605     return AsnIoNew(ASNIO_BIN_OUT, NULL, stream, NULL, asnio2fci_proc);
606 }
607 #endif
608 
609 
610 END_CTRANSITION_SCOPE
611 
612 
613 // Re-enable warnings
614 #ifdef __GNUC__ // if gcc or g++
615 #  pragma GCC diagnostic pop
616 #endif //__GNUC__
617 
618