1 //
2 // WordDBCompress.h
3 //
4 // WordDBCompress: Implements specific compression scheme for
5 //                 Berkeley DB pages containing WordReferences objects.
6 //
7 // Part of the ht://Dig package   <http://www.htdig.org/>
8 // Copyright (c) 1999-2004 The ht://Dig Group
9 // For copyright details, see the file COPYING in your distribution
10 // or the GNU Library General Public License (LGPL) version 2 or later
11 // <http://www.gnu.org/copyleft/lgpl.html>
12 //
13 // $Id: WordDBCompress.cc,v 1.7 2004/05/28 13:15:26 lha Exp $
14 //
15 
16 #ifdef HAVE_CONFIG_H
17 #include "htconfig.h"
18 #endif /* HAVE_CONFIG_H */
19 
20 #include <ctype.h>
21 
22 #include "WordDBPage.h"
23 #include "WordDBCompress.h"
24 #include "WordBitCompress.h"
25 
26 /*
27  *   WordDBCompress: C-callbacks, actually called by Berkeley-DB
28  *      they just call their WordDBCompress equivalents (by using user_data)
29  */
30 extern "C"
31 {
32 
WordDBCompress_compress_c(const u_int8_t * inbuff,int inbuff_length,u_int8_t ** outbuffp,int * outbuff_lengthp,void * user_data)33 static int WordDBCompress_compress_c(const u_int8_t* inbuff, int inbuff_length, u_int8_t** outbuffp, int* outbuff_lengthp, void *user_data)
34 {
35     if(!user_data) {
36       fprintf(stderr, "WordDBCompress_compress_c:: user_data is NULL");
37       return NOTOK;
38     }
39     return ((WordDBCompress *)user_data)->Compress((unsigned char*)inbuff, inbuff_length, (unsigned char**)outbuffp, outbuff_lengthp);
40 }
41 
WordDBCompress_uncompress_c(const u_int8_t * inbuff,int inbuff_length,u_int8_t * outbuff,int outbuff_length,void * user_data)42 static int WordDBCompress_uncompress_c(const u_int8_t* inbuff, int inbuff_length, u_int8_t* outbuff, int outbuff_length, void *user_data)
43 {
44     if(!user_data) {
45       fprintf(stderr, "WordDBCompress_uncompress_c:: user_data is NULL");
46       return NOTOK;
47     }
48     return ((WordDBCompress *)user_data)->Uncompress((unsigned char *)inbuff, inbuff_length, (unsigned char*)outbuff, outbuff_length);
49 }
50 
51 }
52 
53 // ***********************************************
54 // *********** WordDBCompress  *******************
55 // ***********************************************
56 
WordDBCompress()57 WordDBCompress::WordDBCompress()
58 {
59 
60   cmprInfo = 0;
61 
62   //
63   // DEBUGING / BENCHMARKING
64   //
65   debug = 0;
66 
67   //zlib WordDB Compression
68   use_zlib = 0;
69   zlib_level = 0;
70 
71 }
72 
73 
WordDBCompress(int zlib,int level)74 WordDBCompress::WordDBCompress(int zlib, int level)
75 {
76 
77   cmprInfo = 0;
78 
79   //
80   // DEBUGING / BENCHMARKING
81   //
82   debug = 0;
83 
84   //zlib WordDB Compression
85   use_zlib = zlib;
86   zlib_level = level;
87 }
88 
89 
CmprInfo()90 DB_CMPR_INFO* WordDBCompress::CmprInfo()
91 {
92 
93   DB_CMPR_INFO *cmpr_info = new DB_CMPR_INFO;
94 
95   cmpr_info->user_data = (void *)this;
96   cmpr_info->compress = WordDBCompress_compress_c;
97   cmpr_info->uncompress = WordDBCompress_uncompress_c;
98   cmpr_info->coefficient = 3;	// reduce page size by factor of 1<<3 = 8
99   cmpr_info->max_npages = 9;
100 
101   if(use_zlib == 1)
102       cmpr_info->zlib_flags = zlib_level;
103   else
104       cmpr_info->zlib_flags = 0;
105 
106   cmprInfo = cmpr_info;
107 
108   return cmpr_info;
109 }
110 
111 int
Compress(const u_int8_t * inbuff,int inbuff_length,u_int8_t ** outbuffp,int * outbuff_lengthp)112 WordDBCompress::Compress(const  u_int8_t *inbuff, int inbuff_length, u_int8_t **outbuffp, int *outbuff_lengthp)
113 {
114   WordDBPage pg(inbuff, inbuff_length);
115 
116   if(debug > 2) {
117     printf("###########################  WordDBCompress::Compress:  #################################################\n");
118     pg.show();
119     printf("~~~~~~~~~~~~~\n");
120   }
121 
122   if(debug) TestCompress(inbuff, inbuff_length);
123 
124   Compressor *res = pg.Compress(0, cmprInfo);
125 
126   (*outbuffp) = res->get_data();
127   (*outbuff_lengthp) = res->buffsize();
128 
129   if(debug > 2) {
130     res->show();
131     printf("\n%%%%%%%% Final COMPRESSED size:%4d   %f\n",res->size(),res->size()/8.0);
132     printf("***************************   #################################################\n");
133   }
134 
135   delete res;
136   if(debug > 2) printf("WordDBCompress::Compress: final output size:%6d (inputsize:%6d)\n", (*outbuff_lengthp), inbuff_length);
137 
138   pg.unset_page();
139 
140   return(0);
141 }
142 
143 int
Uncompress(const u_int8_t * inbuff,int inbuff_length,u_int8_t * outbuff,int outbuff_length)144 WordDBCompress::Uncompress(const u_int8_t *inbuff, int inbuff_length, u_int8_t *outbuff,int outbuff_length)
145 {
146   if(debug > 2) printf("WordDBCompress::Uncompress::  %5d -> %5d\n", inbuff_length, outbuff_length);
147 
148   WordDBPage pg(outbuff_length);
149 
150   if(debug > 2) printf("------------------------  WordDBCompress::Uncompress: --------------------------------\n");
151 
152   Compressor in(inbuff_length);
153   in.set_data(inbuff,inbuff_length*8);
154   in.rewind();
155 
156   pg.Uncompress(&in,0);
157 
158   memcpy((void *)outbuff, (void *)pg.pg, outbuff_length);
159 
160   if(debug > 2) printf("------------------------  WordDBCompress::Uncompress: END\n");
161 
162     // DEBUGING / BENCHMARKING
163 
164   pg.delete_page();
165   return(0);
166 }
167 
168 int
TestCompress(const u_int8_t * pagebuff,int pagebuffsize)169 WordDBCompress::TestCompress(const  u_int8_t* pagebuff, int pagebuffsize)
170 {
171     WordDBPage pg(pagebuff,pagebuffsize);
172     pg.TestCompress(debug);
173     pg.unset_page();
174     return 0;
175 }
176