1 //
2 // WordDBCompress.cc
3 //
4 // Part of the ht://Dig package   <http://www.htdig.org/>
5 // Copyright (c) 1999 The ht://Dig Group
6 // For copyright details, see the file COPYING in your distribution
7 // or the GNU Public License version 2 or later
8 // <http://www.gnu.org/copyleft/gpl.html>
9 //
10 // $Id: WordDBCompress.cc,v 1.15 2000/10/25 12:32:19 loic Exp $
11 //
12 /*
13 
14  BTREE original page layout in Berkeley DB
15 
16  *	+-----------------------------------+
17  *	|    lsn    |   pgno    | prev pgno |
18  *	+-----------------------------------+
19  *	| next pgno |  entries  | hf offset |
20  *	+-----------------------------------+
21  *	|   level   |   type    |   index   |
22  *	+-----------------------------------+
23  *	|   index   | free -->              |
24  *	+-----------+-----------------------+
25  *	|   	 F R E E A R E A            |
26  *	+-----------------------------------+
27  *	|              <-- free |   item    |
28  *	+-----------------------------------+
29  *	|   item    |   item    |   item    |
30  *	+-----------------------------------+
31  */
32 #ifdef HAVE_CONFIG_H
33 #include "config.h"
34 #endif /* HAVE_CONFIG_H */
35 
36 #include <stdlib.h>
37 #include <errno.h>
38 
39 extern "C" {
40 #include "db_int.h"
41 #include "db_page.h"
42 }
43 
44 #include "lib.h"
45 #include "WordDBCompress.h"
46 #include "WordBitCompress.h"
47 #include "WordKeyInfo.h"
48 #include "WordKey.h"
49 #include "WordRecord.h"
50 #include "WordDB.h"
51 #include "HtMaxMin.h"
52 
53 /*
54  *   WordDBCompress: C-callbacks, actually called by Berkeley-DB
55  *      they just call their WordDBCompress equivalents (by using user_data)
56  */
57 extern "C"
58 {
59 
WordDBCompress_compress_c(DB_ENV *,const u_int8_t * inbuff,int inbuff_length,u_int8_t ** outbuffp,int * outbuff_lengthp,void * user_data)60 int WordDBCompress_compress_c(DB_ENV*, const u_int8_t* inbuff, int inbuff_length, u_int8_t** outbuffp, int* outbuff_lengthp, void *user_data)
61 {
62     if(!user_data) {
63       fprintf(stderr, "WordDBCompress_compress_c:: user_data is NULL");
64       return NOTOK;
65     }
66     return ((WordDBCompress *)user_data)->Compress((unsigned char*)inbuff, inbuff_length, (unsigned char**)outbuffp, outbuff_lengthp);
67 }
68 
WordDBCompress_uncompress_c(DB_ENV *,const u_int8_t * inbuff,int inbuff_length,u_int8_t * outbuff,int outbuff_length,void * user_data)69 int WordDBCompress_uncompress_c(DB_ENV*, const u_int8_t* inbuff, int inbuff_length, u_int8_t* outbuff, int outbuff_length, void *user_data)
70 {
71     if(!user_data) {
72       fprintf(stderr, "WordDBCompress_uncompress_c:: user_data is NULL");
73       return NOTOK;
74     }
75     return ((WordDBCompress *)user_data)->Uncompress((unsigned char *)inbuff, inbuff_length, (unsigned char*)outbuff, outbuff_length);
76 }
77 
78 }
79 
80 //
81 // Symbolic names for the data in WordDBEncoded.values[..] arrays
82 //
83 //
84 // Each entry in a leaf/internal page has a flag that shows how
85 // the entry is coded.
86 //
87 #define WORD_CMPR_VAL_FLAGS	0
88 //
89 // Btree: Numerical fields of WordKey
90 //
91 #define WORD_CMPR_VAL_FIELDS	1
92 //
93 // Btree/Internal: BINTERNAL.pgno
94 //
95 #define WORD_CMPR_VAL_PGNO	(WORD_CMPR_VAL_FIELDS + WORD_KEY_MAX_NFIELDS)
96 //
97 // Btree/Leaf: Length of record string
98 //
99 #define WORD_CMPR_VAL_RLENGTH	WORD_CMPR_VAL_PGNO
100 //
101 // Btree/Internal: BINTERNAL.nrecs
102 //
103 #define WORD_CMPR_VAL_NRECS	(WORD_CMPR_VAL_PGNO + 1)
104 //
105 // Btree/Leaf: Value of integer if record contains a single integer value
106 //
107 #define WORD_CMPR_VAL_RVALUE    WORD_CMPR_VAL_NRECS
108 //
109 // Btree: length of WordKey.GetWord() prefix shared with previous entry
110 //
111 #define WORD_CMPR_VAL_PREFIX	(WORD_CMPR_VAL_PGNO + 2)
112 //
113 // Btree: length of WordKey.GetWord() suffix to add to previous key
114 // shared prefix.
115 //
116 #define WORD_CMPR_VAL_SUFFIX	(WORD_CMPR_VAL_PGNO + 3)
117 //
118 // Symbolic name for the last index
119 //
120 #define WORD_CMPR_VAL_LAST	WORD_CMPR_VAL_SUFFIX
121 
122 #define WORD_CMPR_VAL_ARRAY_SIZE (WORD_CMPR_VAL_LAST + 1)
123 //
124 // Number of bits needed to encode the maximum array size value
125 //
126 #define WORD_CMPR_VAL_ARRAY_SIZE_BITS 8
127 
128 //
129 // Number of bits used in flags
130 //
131 #define WORD_CMPR_VAL_FLAGS_BITS (WORD_CMPR_VAL_LAST + 1)
132 //
133 // Values for the WORD_CMPR_VAL_FLAGS bit field
134 //
135 #define WORD_CMPR_VAL_FLAGS_FIELD(n)	(1 << (WORD_CMPR_VAL_FIELDS + (n)))
136 #define WORD_CMPR_VAL_FLAGS_PGNO	(1 << (WORD_CMPR_VAL_PGNO))
137 #define WORD_CMPR_VAL_FLAGS_RLENGTH	WORD_CMPR_VAL_FLAGS_PGNO
138 #define WORD_CMPR_VAL_FLAGS_NRECS	(1 << (WORD_CMPR_VAL_NRECS))
139 #define WORD_CMPR_VAL_FLAGS_RVALUE	WORD_CMPR_VAL_FLAGS_NRECS
140 #define WORD_CMPR_VAL_FLAGS_PREFIX	(1 << (WORD_CMPR_VAL_PREFIX))
141 #define WORD_CMPR_VAL_FLAGS_SUFFIX	(1 << (WORD_CMPR_VAL_SUFFIX))
142 #define WORD_CMPR_VAL_FLAGS_STRING	(1 << (WORD_CMPR_VAL_LAST + 1))
143 #define WORD_CMPR_VAL_FLAGS_EMPTY	(1 << (WORD_CMPR_VAL_LAST + 2))
144 #define WORD_CMPR_VAL_FLAGS_RECORD_EQ	(1 << (WORD_CMPR_VAL_LAST + 3))
145 #define WORD_CMPR_VAL_FLAGS_RECORD_NO	(1 << (WORD_CMPR_VAL_LAST + 4))
146 #define WORD_CMPR_VAL_FLAGS_RECORD_STR	(1 << (WORD_CMPR_VAL_LAST + 5))
147 
148 //
149 // Storage for values extracted from a PAGE structure
150 //
151 class WordDBEncoded {
152 public:
WordDBEncoded()153   inline WordDBEncoded() {
154     Init();
155   }
~WordDBEncoded()156   inline ~WordDBEncoded() {
157     free(strings);
158     for(int i = 0; i < WORD_CMPR_VAL_ARRAY_SIZE; i++)
159       free(values[i]);
160   }
161 
Init()162   inline void Init() {
163     strings_size = 32;
164     strings = (unsigned char*)malloc(strings_size);
165 
166     for(int i = 0; i < WORD_CMPR_VAL_ARRAY_SIZE; i++) {
167       values_size[i] = 32;
168       values[i] = (unsigned int*)malloc(values_size[i] * sizeof(unsigned int));
169     }
170 
171     Clear();
172   }
173 
Clear()174   inline void Clear() {
175     strings_length = 0;
176     strings_idx = 0;
177 
178     for(int i = 0; i < WORD_CMPR_VAL_ARRAY_SIZE; i++) {
179       values_length[i] = 0;
180       values_idx[i] = 0;
181     }
182   }
183 
AllocateStrings(unsigned int size)184   inline void AllocateStrings(unsigned int size) {
185     strings_size = size;
186     strings = (unsigned char*)realloc(strings, strings_size);
187   }
188 
CheckStrings(int index)189   inline void CheckStrings(int index) {
190     while(index >= strings_size)
191       AllocateStrings(strings_size * 2);
192   }
193 
AllocateValues(int what,unsigned int size)194   inline void AllocateValues(int what, unsigned int size) {
195     values_size[what] = size;
196     values[what] = (unsigned int*)realloc(values[what], values_size[what] * sizeof(unsigned int));
197   }
198 
CheckValues(int what,int index)199   inline void CheckValues(int what, int index) {
200     while(index >= values_size[what])
201       AllocateValues(what, values_size[what] * 2);
202   }
203 
PushValue(int what,unsigned int value)204   inline void PushValue(int what, unsigned int value) {
205     CheckValues(what, values_length[what]);
206     values[what][values_length[what]] = value;
207     values_length[what]++;
208   }
209 
PushString(const unsigned char * string,int length)210   inline void PushString(const unsigned char* string, int length) {
211     CheckStrings(strings_length + length);
212     memcpy(strings + strings_length, string, length);
213     strings_length += length;
214   }
215 
ShiftValue(int what)216   inline unsigned int ShiftValue(int what) {
217     if(values_idx[what] >= values_length[what]) {
218       fprintf(stderr, "WordDBEncoded::ShiftValue: what = %d, (idx = %d) >= (length = %d)\n", what, values_idx[what], values_length[what]);
219       abort();
220     }
221     return values[what][values_idx[what]++];
222   }
223 
ShiftString(int length)224   inline unsigned char* ShiftString(int length) {
225     if(strings_idx + length > strings_length) {
226       fprintf(stderr, "WordDBEncoded::ShiftString: (idx + length = %d) >= (length = %d)\n", strings_idx + length, strings_length);
227       abort();
228     }
229     unsigned char* string = strings + strings_idx;
230     strings_idx += length;
231     return string;
232   }
233 
234   void Put(WordBitCompress& stream);
235   void Get(WordBitCompress& stream);
236 
237   unsigned int *values[WORD_CMPR_VAL_ARRAY_SIZE];
238   int values_length[WORD_CMPR_VAL_ARRAY_SIZE];
239   int values_idx[WORD_CMPR_VAL_ARRAY_SIZE];
240   int values_size[WORD_CMPR_VAL_ARRAY_SIZE];
241 
242   unsigned char *strings;
243   int strings_length;
244   int strings_idx;
245   int strings_size;
246 };
247 
Put(WordBitCompress & stream)248 void WordDBEncoded::Put(WordBitCompress& stream)
249 {
250   unsigned int count = 0;
251   for(int i = 0; i < WORD_CMPR_VAL_ARRAY_SIZE; i++) {
252     if(values_length[i] > 0) count++;
253   }
254   stream.WordBitStream::PutUint(count, WORD_CMPR_VAL_ARRAY_SIZE_BITS);
255 
256   for(int i = 0; i < WORD_CMPR_VAL_ARRAY_SIZE; i++) {
257     if(values_length[i] > 0) {
258       stream.WordBitStream::PutUint(i, WORD_CMPR_VAL_ARRAY_SIZE_BITS);
259       stream.PutUints(values[i], values_length[i]);
260     }
261   }
262 
263   stream.PutUchars(strings, strings_length);
264 }
265 
Get(WordBitCompress & stream)266 void WordDBEncoded::Get(WordBitCompress& stream)
267 {
268   Clear();
269 
270   unsigned int count = 0;
271   count = stream.WordBitStream::GetUint(WORD_CMPR_VAL_ARRAY_SIZE_BITS);
272 
273   for(unsigned int i = 0; i < count; i++) {
274     unsigned int index = stream.WordBitStream::GetUint(WORD_CMPR_VAL_ARRAY_SIZE_BITS);
275     values_length[index] = stream.GetUints(&values[index], &values_size[index]);
276   }
277 
278   strings_length = stream.GetUchars(&strings, &strings_size);
279 }
280 
281 //
282 // ******************** WordDBCompress implementation **************
283 //
284 
285 #define WORD_CMPR_OVERHEAD	((int)sizeof(unsigned char))
286 
WordDBCompress(WordContext * ncontext)287 WordDBCompress::WordDBCompress(WordContext* ncontext)
288 {
289   cmprInfo = 0;
290   context = ncontext;
291   encoded = new WordDBEncoded();
292 
293   const Configuration& config = context->GetConfiguration();
294   debug = config.Boolean("wordlist_compress_debug", 0);
295   verbose = config.Value("wordlist_compress_verbose", 0);
296 }
297 
~WordDBCompress()298 WordDBCompress::~WordDBCompress()
299 {
300   delete encoded;
301 }
302 
303 int
Compress(const unsigned char * inbuff,int inbuff_length,unsigned char ** outbuffp,int * outbuff_lengthp)304 WordDBCompress::Compress(const  unsigned char *inbuff, int inbuff_length, unsigned char **outbuffp, int *outbuff_lengthp)
305 {
306   //
307   // Maximum output length can be WORD_CMPR_OVERHEAD more than input
308   //
309   int outbuff_length = inbuff_length + WORD_CMPR_OVERHEAD;
310   unsigned char* outbuff = (unsigned char*)malloc(sizeof(unsigned char) * outbuff_length);
311 
312   *outbuffp = 0;
313   *outbuff_lengthp = outbuff_length;
314 
315   if(outbuff == 0)
316     return ENOMEM;
317 
318   int ret = 0;
319   PAGE* pp = (PAGE*)inbuff;
320 
321   *outbuff = TYPE_TAGS(pp);
322 
323   switch(TYPE_TAGS(pp)) {
324   case P_IBTREE|WORD_DB_INDEX:
325   case P_LBTREE|WORD_DB_INDEX:
326     ret = CompressBtree(inbuff, inbuff_length, outbuff, &outbuff_length);
327     break;
328   default:
329     memcpy((char*)(outbuff + WORD_CMPR_OVERHEAD), (char*)inbuff, inbuff_length);
330     break;
331   }
332 
333   if(ret == 0) {
334     *outbuffp = outbuff;
335     *outbuff_lengthp = outbuff_length;
336   } else {
337     free(outbuff);
338   }
339 
340   return ret;
341 }
342 
343 int
Uncompress(const unsigned char * inbuff,int inbuff_length,unsigned char * outbuff,int outbuff_length)344 WordDBCompress::Uncompress(const unsigned char *inbuff, int inbuff_length, unsigned char *outbuff, int outbuff_length)
345 {
346   int ret = 0;
347 
348   switch(*inbuff) {
349   case P_IBTREE|WORD_DB_INDEX:
350   case P_LBTREE|WORD_DB_INDEX:
351     ret = UncompressBtree(inbuff, inbuff_length, outbuff, outbuff_length);
352     break;
353   default:
354     memcpy((char*)outbuff, (char*)(inbuff + WORD_CMPR_OVERHEAD), outbuff_length);
355     break;
356   }
357 
358   return ret;
359 }
360 
361 int
CompressBtree(const unsigned char * inbuff,int inbuff_length,unsigned char * outbuff,int * outbuff_lengthp)362 WordDBCompress::CompressBtree(const  unsigned char *inbuff, int inbuff_length, unsigned char *outbuff, int *outbuff_lengthp)
363 {
364   int ret = 0;
365   PAGE *pp = (PAGE *)inbuff;
366 
367   if(verbose) fprintf(stderr, "WordDBCompress::CompressBtree: page %d\n", PGNO(pp));
368 
369   switch(TYPE(pp)) {
370   case P_IBTREE:
371     ret = CompressIBtree(inbuff, inbuff_length, outbuff, outbuff_lengthp);
372     break;
373   case P_LBTREE:
374     ret = CompressLBtree(inbuff, inbuff_length, outbuff, outbuff_lengthp);
375     break;
376   }
377 
378   return ret;
379 }
380 
381 //
382 // Return the length of the chars in b that are not common with a
383 // a = abcdef
384 // b = abcghi
385 //        ---
386 // return 3
387 //
suffixlength(const String & a,const String & b)388 static inline unsigned int suffixlength(const String& a, const String& b)
389 {
390   const unsigned char* ap = (const unsigned char*)a.get();
391   const unsigned char* bp = (const unsigned char*)b.get();
392   int length = HtMIN(a.length(), b.length());
393 
394   int i;
395   for(i = 0; i < length && ap[i] == bp[i]; i++)
396     ;
397 
398   return b.length() - i;
399 }
400 
401 int
CompressIBtree(const unsigned char * inbuff,int inbuff_length,unsigned char * outbuff,int * outbuff_lengthp)402 WordDBCompress::CompressIBtree(const  unsigned char *inbuff, int inbuff_length, unsigned char *outbuff, int *outbuff_lengthp)
403 {
404   PAGE *pp = (PAGE *)inbuff;
405 
406   if(verbose > 5) DumpPage(inbuff);
407 
408   WordBitCompress stream(inbuff_length);
409 
410   stream.PutUint(pp->lsn.file, sizeof(pp->lsn.file) * 8);
411   stream.PutUint(pp->lsn.offset, sizeof(pp->lsn.file) * 8);
412   stream.PutUint(PGNO(pp), sizeof(PGNO(pp)) * 8);
413   stream.PutUint(NUM_ENT(pp), sizeof(NUM_ENT(pp)) * 8);
414   stream.PutUint(LEVEL(pp), sizeof(LEVEL(pp)) * 8);
415 
416   if(NUM_ENT(pp) > 0) {
417     int i;
418     WordKey key(context);
419     WordKey previous_key(context);
420     BINTERNAL* previous_e = 0;
421 
422     encoded->Clear();
423 
424     for (i = 0; i < NUM_ENT(pp); i++) {
425       BINTERNAL* e = GET_BINTERNAL(pp, i);
426       if(debug) {
427 	if(e->type != B_KEYDATA)
428 	  fprintf(stderr, "WordDBCompress::EncodeIBtree: unexpected type 0x%02x\n", e->type);
429       }
430       unsigned int changes = 0;
431 
432       if(e->len <= 0) {
433 	changes = WORD_CMPR_VAL_FLAGS_EMPTY;
434 	//
435 	// Structural data
436 	//
437 	encoded->PushValue(WORD_CMPR_VAL_PGNO, e->pgno);
438 	encoded->PushValue(WORD_CMPR_VAL_NRECS, e->nrecs);
439       } else {
440 
441 	key.Unpack((const char*)e->data, (int)e->len);
442 
443 	if(previous_e != 0) {
444 	  int is_first_change = 1;
445 #if 0
446 	  //
447 	  // String
448 	  //
449 	  const String& word = key.GetWord();
450 	  const String& previous_word = previous_key.GetWord();
451 	  if(word != previous_word) {
452 	    unsigned int suffix_length = suffixlength(previous_word, word);
453 	    unsigned int prefix_length = word.length() - suffix_length;
454 	    encoded->PushValue(WORD_CMPR_VAL_PREFIX, prefix_length);
455 	    encoded->PushValue(WORD_CMPR_VAL_SUFFIX, suffix_length);
456 	    const char* suffix = word.get() + prefix_length;
457 	    encoded->PushString((const unsigned char*)suffix, suffix_length);
458 	    changes |= WORD_CMPR_VAL_FLAGS_STRING;
459 	    is_first_change = 0;
460 	  }
461 #endif
462 	  //
463 	  // Fields
464 	  //
465 	  int nfields = key.NFields();
466 	  for(int i = 0; i < nfields; i++) {
467 	    unsigned int value = key.Get(i);
468 	    unsigned int previous_value = previous_key.Get(i);
469 	    if(value != previous_value) {
470 	      if(is_first_change) {
471 		value -= previous_value;
472 		is_first_change = 0;
473 	      }
474 	      encoded->PushValue(i + WORD_CMPR_VAL_FIELDS, value);
475 	      changes |= WORD_CMPR_VAL_FLAGS_FIELD(i);
476 	    }
477 	  }
478 	  //
479 	  // Structural data
480 	  //
481 	  if(e->pgno != previous_e->pgno) {
482 	    encoded->PushValue(WORD_CMPR_VAL_PGNO, e->pgno);
483 	    changes |= WORD_CMPR_VAL_FLAGS_PGNO;
484 	  }
485 	  if(e->nrecs != previous_e->nrecs) {
486 	    encoded->PushValue(WORD_CMPR_VAL_NRECS, e->nrecs);
487 	    changes |= WORD_CMPR_VAL_FLAGS_NRECS;
488 	  }
489 	} else {
490 #if 0
491 	  //
492 	  // String
493 	  //
494 	  const String& word = key.GetWord();
495 	  encoded->PushValue(WORD_CMPR_VAL_SUFFIX, word.length());
496 	  encoded->PushString((const unsigned char*)word.get(), word.length());
497 #endif
498 	  //
499 	  // Fields
500 	  //
501 	  int nfields = key.NFields();
502 	  for(int i = 0; i < nfields; i++) {
503 	    encoded->PushValue(i + WORD_CMPR_VAL_FIELDS, key.Get(i));
504 	  }
505 	  //
506 	  // Structural data
507 	  //
508 	  encoded->PushValue(WORD_CMPR_VAL_PGNO, e->pgno);
509 	  encoded->PushValue(WORD_CMPR_VAL_NRECS, e->nrecs);
510 	}
511 
512 	previous_e = e;
513 	previous_key = key;
514       }
515 
516       encoded->PushValue(WORD_CMPR_VAL_FLAGS, changes);
517     }
518 
519     encoded->Put(stream);
520   }
521 
522   int outbuff_length = *outbuff_lengthp;
523   if(stream.BuffLength() > (outbuff_length - WORD_CMPR_OVERHEAD)) {
524     fprintf(stderr, "WordDBCompress::CompressIBtree: compressed length = %d > available length = %d\n", stream.BuffLength(), outbuff_length - WORD_CMPR_OVERHEAD);
525     abort();
526   }
527   memcpy(outbuff + WORD_CMPR_OVERHEAD, stream.Buff(), stream.BuffLength());
528   outbuff_length = WORD_CMPR_OVERHEAD + stream.BuffLength();
529 
530   if(debug) {
531     unsigned char* tmp = (unsigned char*)malloc(inbuff_length);
532     memset((char*)tmp, '\0', inbuff_length);
533     UncompressIBtree(outbuff, outbuff_length, tmp, inbuff_length);
534     if(DiffPage(inbuff, tmp)) {
535       fprintf(stderr, "WordDBCompress::CompressIBtree: failed\n");
536       DumpPage(inbuff);
537       DumpPage(tmp);
538       exit(1);
539     }
540     free(tmp);
541   }
542 
543   *outbuff_lengthp = outbuff_length;
544 
545   return 0;
546 }
547 
548 int
CompressLBtree(const unsigned char * inbuff,int inbuff_length,unsigned char * outbuff,int * outbuff_lengthp)549 WordDBCompress::CompressLBtree(const  unsigned char *inbuff, int inbuff_length, unsigned char *outbuff, int *outbuff_lengthp)
550 {
551   PAGE *pp = (PAGE *)inbuff;
552 
553   if(verbose > 5) DumpPage(inbuff);
554 
555   WordBitCompress stream(inbuff_length);
556 
557   stream.PutUint(pp->lsn.file, sizeof(pp->lsn.file) * 8);
558   stream.PutUint(pp->lsn.offset, sizeof(pp->lsn.file) * 8);
559   stream.PutUint(PGNO(pp), sizeof(PGNO(pp)) * 8);
560   stream.PutUint(PREV_PGNO(pp), sizeof(PREV_PGNO(pp)) * 8);
561   stream.PutUint(NEXT_PGNO(pp), sizeof(NEXT_PGNO(pp)) * 8);
562   stream.PutUint(NUM_ENT(pp), sizeof(NUM_ENT(pp)) * 8);
563   stream.PutUint(LEVEL(pp), sizeof(LEVEL(pp)) * 8);
564 
565   if(NUM_ENT(pp) > 0) {
566     int i;
567     WordKey key(context);
568     WordKey previous_key(context);
569     WordRecord record(context);
570     WordRecord previous_record(context);
571     BKEYDATA* previous_key_e = 0;
572     BKEYDATA* previous_record_e = 0;
573 
574     encoded->Clear();
575 
576     for (i = 0; i < NUM_ENT(pp); i += P_INDX) {
577       unsigned int changes = 0;
578 
579       {
580 	BKEYDATA* key_e = GET_BKEYDATA(pp, i);
581 	if(debug) {
582 	  if(key_e->type != B_KEYDATA)
583 	    fprintf(stderr, "WordDBCompress::EncodeLBtree: unexpected type 0x%02x\n", key_e->type);
584 	  if(key_e->len <= 0)
585 	    fprintf(stderr, "WordDBCompress::EncodeLBtree: unexpected length <= 0\n");
586 	}
587 
588 	key.Unpack((const char*)key_e->data, (int)key_e->len);
589 
590 	if(previous_key_e != 0) {
591 	  int is_first_change = 1;
592 #if 0
593 	  //
594 	  // String
595 	  //
596 	  const String& word = key.GetWord();
597 	  const String& previous_word = previous_key.GetWord();
598 	  if(word != previous_word) {
599 	    unsigned int suffix_length = suffixlength(previous_word, word);
600 	    unsigned int prefix_length = word.length() - suffix_length;
601 	    encoded->PushValue(WORD_CMPR_VAL_PREFIX, prefix_length);
602 	    encoded->PushValue(WORD_CMPR_VAL_SUFFIX, suffix_length);
603 	    const char* suffix = word.get() + prefix_length;
604 	    encoded->PushString((const unsigned char*)suffix, suffix_length);
605 	    changes |= WORD_CMPR_VAL_FLAGS_STRING;
606 	    is_first_change = 0;
607 	  }
608 #endif
609 	  //
610 	  // Fields
611 	  //
612 	  int nfields = key.NFields();
613 	  for(int i = 0; i < nfields; i++) {
614 	    unsigned int value = key.Get(i);
615 	    unsigned int previous_value = previous_key.Get(i);
616 	    if(value != previous_value) {
617 	      if(is_first_change) {
618 		value -= previous_value;
619 		is_first_change = 0;
620 	      }
621 	      encoded->PushValue(WORD_CMPR_VAL_FIELDS + i, value);
622 	      changes |= WORD_CMPR_VAL_FLAGS_FIELD(i);
623 	    }
624 	  }
625 	} else {
626 #if 0
627 	  //
628 	  // String
629 	  //
630 	  const String& word = key.GetWord();
631 	  encoded->PushValue(WORD_CMPR_VAL_SUFFIX, word.length());
632 	  encoded->PushString((const unsigned char*)word.get(), word.length());
633 #endif
634 	  //
635 	  // Fields
636 	  //
637 	  int nfields = key.NFields();
638 	  for(int i = 0; i < nfields; i++) {
639 	    encoded->PushValue(i + WORD_CMPR_VAL_FIELDS, key.Get(i));
640 	  }
641 	}
642 
643 	previous_key_e = key_e;
644 	previous_key = key;
645       }
646       {
647 	BKEYDATA* record_e = GET_BKEYDATA(pp, i + 1);
648 	if(debug) {
649 	  if(record_e->type != B_KEYDATA)
650 	    fprintf(stderr, "WordDBCompress::EncodeLBtree: unexpected type 0x%02x\n", record_e->type);
651 	}
652 
653 	if(record_e->len <= 0) {
654 	  changes |= WORD_CMPR_VAL_FLAGS_RECORD_NO;
655 	} else {
656 	  record.Unpack((const char*)record_e->data, (int)record_e->len);
657 
658 	  switch(record.type) {
659 	  case WORD_RECORD_DATA:
660 	    if(previous_record_e &&
661 	       record.info.data == previous_record.info.data) {
662 	      changes |= WORD_CMPR_VAL_FLAGS_RECORD_EQ;
663 	    } else {
664 	      encoded->PushValue(WORD_CMPR_VAL_RVALUE, record.info.data);
665 	    }
666 	    break;
667 	  case WORD_RECORD_STR:
668 	    changes |= WORD_CMPR_VAL_FLAGS_RECORD_STR;
669 	    if(previous_record_e &&
670 	       record.info.str == previous_record.info.str) {
671 	      changes |= WORD_CMPR_VAL_FLAGS_RECORD_EQ;
672 	    } else {
673 	      const String& string = record.info.str;
674 	      encoded->PushValue(WORD_CMPR_VAL_RLENGTH, string.length());
675 	      encoded->PushString((const unsigned char*)string.get(), string.length());
676 	    }
677 	    break;
678 	  default:
679 	    fprintf(stderr, "WordDBCompress::EncodeLBtree: unexpected record.type = %d\n", record.type);
680 	    break;
681 	  }
682 
683 	  previous_record = record;
684 	  previous_record_e = record_e;
685 	}
686 
687       }
688       encoded->PushValue(WORD_CMPR_VAL_FLAGS, changes);
689     }
690 
691     encoded->Put(stream);
692   }
693 
694   int outbuff_length = *outbuff_lengthp;
695   if(stream.BuffLength() > (outbuff_length - WORD_CMPR_OVERHEAD)) {
696     fprintf(stderr, "WordDBCompress::CompressLBtree: compressed length = %d > available length = %d\n", stream.BuffLength(), outbuff_length - WORD_CMPR_OVERHEAD);
697     abort();
698   }
699   memcpy(outbuff + WORD_CMPR_OVERHEAD, stream.Buff(), stream.BuffLength());
700   outbuff_length = WORD_CMPR_OVERHEAD + stream.BuffLength();
701 
702   if(debug) {
703     unsigned char* tmp = (unsigned char*)malloc(inbuff_length);
704     memset((char*)tmp, '\0', inbuff_length);
705     UncompressLBtree(outbuff, outbuff_length, tmp, inbuff_length);
706     if(DiffPage(inbuff, tmp)) {
707       fprintf(stderr, "WordDBCompress::CompressLBtree: failed\n");
708       DumpPage(inbuff);
709       DumpPage(tmp);
710       exit(1);
711     }
712     free(tmp);
713   }
714 
715   *outbuff_lengthp = outbuff_length;
716 
717   return 0;
718 }
719 
720 int
UncompressBtree(const unsigned char * inbuff,int inbuff_length,unsigned char * outbuff,int outbuff_length)721 WordDBCompress::UncompressBtree(const unsigned char *inbuff, int inbuff_length, unsigned char *outbuff, int outbuff_length)
722 {
723   int ret = 0;
724   switch((*inbuff) & TYPE_MASK) {
725   case P_IBTREE:
726     ret = UncompressIBtree(inbuff, inbuff_length, outbuff, outbuff_length);
727     break;
728   case P_LBTREE:
729     ret = UncompressLBtree(inbuff, inbuff_length, outbuff, outbuff_length);
730     break;
731   }
732   if(verbose) fprintf(stderr, "WordDBCompress::UncompressBtree: page %d\n", PGNO((PAGE*)outbuff));
733   return ret;
734 }
735 
736 /*
737  * cdb___db_pitem --
738  *	Put an item on a page.
739  */
740 static void
cdb___db_pitem(PAGE * pagep,u_int32_t indx,u_int32_t nbytes,DBT * hdr,DBT * data)741 cdb___db_pitem(PAGE *pagep, u_int32_t indx, u_int32_t nbytes, DBT *hdr, DBT *data)
742 {
743   u_int8_t *p;
744 
745   HOFFSET(pagep) -= nbytes;
746   pagep->inp[indx] = HOFFSET(pagep);
747   ++NUM_ENT(pagep);
748 
749   p = P_ENTRY(pagep, indx);
750   memcpy(p, hdr->data, hdr->size);
751   if (data != NULL)
752     memcpy(p + hdr->size, data->data, data->size);
753 }
754 
755 int
UncompressIBtree(const unsigned char * inbuff,int inbuff_length,unsigned char * outbuff,int outbuff_length)756 WordDBCompress::UncompressIBtree(const unsigned char *inbuff, int inbuff_length, unsigned char *outbuff, int outbuff_length)
757 {
758   memset((char*)outbuff, '\0', outbuff_length);
759 
760   WordBitCompress stream(inbuff_length);
761   stream.BuffSet(inbuff + WORD_CMPR_OVERHEAD, inbuff_length - WORD_CMPR_OVERHEAD);
762 
763   PAGE *pp = (PAGE *)outbuff;
764 
765   TYPE_TAGS(pp) = *inbuff;
766   HOFFSET(pp) = outbuff_length;
767   short entry_count = 0;
768   pp->lsn.file = stream.GetUint(sizeof(pp->lsn.file) * 8);
769   pp->lsn.offset = stream.GetUint(sizeof(pp->lsn.offset) * 8);
770   PGNO(pp) = stream.GetUint(sizeof(PGNO(pp)) * 8);
771   entry_count = stream.GetUint(sizeof(NUM_ENT(pp)) * 8);
772   LEVEL(pp) = stream.GetUint(sizeof(LEVEL(pp)) * 8);
773   NEXT_PGNO(pp) = PREV_PGNO(pp) = 0;
774 
775   if(entry_count > 0) {
776     int i;
777     String packed;
778     BINTERNAL previous_bi;
779     BINTERNAL bi;
780     DBT hdr;
781     hdr.data = &bi;
782     hdr.size = SSZA(BINTERNAL, data);
783     DBT data;
784     WordKey key(context);
785     WordKey previous_key(context);
786     String word;
787 
788     encoded->Get(stream);
789 
790     for(i = 0; i < entry_count; i++) {
791       unsigned int changes = encoded->ShiftValue(WORD_CMPR_VAL_FLAGS);
792 
793       memset((char*)&bi, '\0', sizeof(BINTERNAL));
794 
795       if(changes & WORD_CMPR_VAL_FLAGS_EMPTY) {
796 	packed.trunc();
797 	//
798 	// Structural data
799 	//
800 	bi.pgno = encoded->ShiftValue(WORD_CMPR_VAL_PGNO);
801 	bi.nrecs = encoded->ShiftValue(WORD_CMPR_VAL_NRECS);
802       } else {
803 	key.Clear();
804 
805 	if(!previous_key.Empty()) {
806 	  int is_first_change = 1;
807 #if 0
808 	  //
809 	  // String
810 	  //
811 	  if(changes & WORD_CMPR_VAL_FLAGS_STRING) {
812 	    unsigned int suffix_length = encoded->ShiftValue(WORD_CMPR_VAL_SUFFIX);
813 	    unsigned int prefix_length = encoded->ShiftValue(WORD_CMPR_VAL_PREFIX);
814 	    unsigned char* suffix = encoded->ShiftString(suffix_length);
815 	    word.set(previous_key.GetWord().get(), prefix_length);
816 	    word.append((const char*)suffix, suffix_length);
817 	    key.SetWord(word);
818 	    is_first_change = 0;
819 	  } else {
820 	    key.SetWord(previous_key.GetWord());
821 	  }
822 #endif
823 	  //
824 	  // Fields
825 	  //
826 	  int nfields = key.NFields();
827 	  for(int j = 0; j < nfields; j++) {
828 	    if(changes & WORD_CMPR_VAL_FLAGS_FIELD(j)) {
829 	      unsigned int value = encoded->ShiftValue(j + WORD_CMPR_VAL_FIELDS);
830 	      if(is_first_change) {
831 		value += previous_key.Get(j);
832 		is_first_change = 0;
833 	      }
834 	      key.Set(j, value);
835 	    } else {
836 	      key.Set(j, previous_key.Get(j));
837 	    }
838 	  }
839 	  //
840 	  // Structural data
841 	  //
842 	  if(changes & WORD_CMPR_VAL_FLAGS_PGNO) {
843 	    bi.pgno = encoded->ShiftValue(WORD_CMPR_VAL_PGNO);
844 	  } else {
845 	    bi.pgno = previous_bi.pgno;
846 	  }
847 	  if(changes & WORD_CMPR_VAL_FLAGS_NRECS) {
848 	    bi.nrecs = encoded->ShiftValue(WORD_CMPR_VAL_NRECS);
849 	  } else {
850 	    bi.nrecs = previous_bi.nrecs;
851 	  }
852 	} else {
853 #if 0
854 	  //
855 	  // String
856 	  //
857 	  unsigned int string_length = encoded->ShiftValue(WORD_CMPR_VAL_SUFFIX);
858 	  unsigned char* string = encoded->ShiftString(string_length);
859 	  key.SetWord((const char*)string, string_length);
860 #endif
861 	  //
862 	  // Fields
863 	  //
864 	  int nfields = key.NFields();
865 	  for(int i = 0; i < nfields; i++) {
866 	    key.Set(i, encoded->ShiftValue(i + WORD_CMPR_VAL_FIELDS));
867 	  }
868 	  //
869 	  // Structural data
870 	  //
871 	  bi.pgno = encoded->ShiftValue(WORD_CMPR_VAL_PGNO);
872 	  bi.nrecs = encoded->ShiftValue(WORD_CMPR_VAL_NRECS);
873 	}
874 
875 	key.Pack(packed);
876       }
877       data.data = packed.get();
878       data.size = packed.length();
879       bi.len = packed.length();
880       bi.type = B_KEYDATA;
881 
882       //
883       // Insert entry in page
884       //
885       cdb___db_pitem(pp, i, BINTERNAL_SIZE(bi.len), &hdr, &data);
886 
887       //
888       // Current becomes previous
889       //
890       previous_bi = bi;
891       previous_key = key;
892     }
893   }
894 
895   if(debug) {
896     if(entry_count != NUM_ENT(pp))
897       fprintf(stderr, "WordDBCompress::UncompressIBtree: pgno %d: NUM_ENT(pp) = %d is different than entry_count = %d\n", PGNO(pp), NUM_ENT(pp), entry_count);
898   }
899   return 0;
900 }
901 
902 int
UncompressLBtree(const unsigned char * inbuff,int inbuff_length,unsigned char * outbuff,int outbuff_length)903 WordDBCompress::UncompressLBtree(const unsigned char *inbuff, int inbuff_length, unsigned char *outbuff, int outbuff_length)
904 {
905   memset((char*)outbuff, '\0', outbuff_length);
906 
907   WordBitCompress stream(inbuff_length);
908   stream.BuffSet(inbuff + WORD_CMPR_OVERHEAD, inbuff_length - WORD_CMPR_OVERHEAD);
909 
910   PAGE *pp = (PAGE *)outbuff;
911 
912   TYPE_TAGS(pp) = *inbuff;
913   HOFFSET(pp) = outbuff_length;
914   short entry_count = 0;
915   pp->lsn.file = stream.GetUint(sizeof(pp->lsn.file) * 8);
916   pp->lsn.offset = stream.GetUint(sizeof(pp->lsn.offset) * 8);
917   PGNO(pp) = stream.GetUint(sizeof(PGNO(pp)) * 8);
918   PREV_PGNO(pp) = stream.GetUint(sizeof(PREV_PGNO(pp)) * 8);
919   NEXT_PGNO(pp) = stream.GetUint(sizeof(NEXT_PGNO(pp)) * 8);
920   entry_count = stream.GetUint(sizeof(NUM_ENT(pp)) * 8);
921   LEVEL(pp) = stream.GetUint(sizeof(LEVEL(pp)) * 8);
922 
923   if(entry_count > 0) {
924     int i;
925     String packed;
926     BKEYDATA bk;
927     DBT hdr;
928     hdr.data = &bk;
929     hdr.size = SSZA(BKEYDATA, data);
930     DBT data;
931     WordKey key(context);
932     WordKey previous_key(context);
933     WordRecord record(context);
934     WordRecord previous_record(context);
935     int previous_record_p = 0;
936     String word;
937 
938     encoded->Get(stream);
939 
940     for(i = 0; i < entry_count; i += P_INDX) {
941       unsigned int changes = encoded->ShiftValue(WORD_CMPR_VAL_FLAGS);
942 
943       //
944       // Key
945       //
946       {
947 	memset((char*)&bk, '\0', sizeof(BKEYDATA));
948 
949 	key.Clear();
950 
951 	if(!previous_key.Empty()) {
952 	  int is_first_change = 1;
953 #if 0
954 	  //
955 	  // String
956 	  //
957 	  if(changes & WORD_CMPR_VAL_FLAGS_STRING) {
958 	    unsigned int suffix_length = encoded->ShiftValue(WORD_CMPR_VAL_SUFFIX);
959 	    unsigned int prefix_length = encoded->ShiftValue(WORD_CMPR_VAL_PREFIX);
960 	    unsigned char* suffix = encoded->ShiftString(suffix_length);
961 	    word.set(previous_key.GetWord().get(), prefix_length);
962 	    word.append((const char*)suffix, suffix_length);
963 	    key.SetWord(word);
964 	    is_first_change = 0;
965 	  } else {
966 	    key.SetWord(previous_key.GetWord());
967 	  }
968 #endif
969 	  //
970 	  // Fields
971 	  //
972 	  int nfields = key.NFields();
973 	  for(int j = 0; j < nfields; j++) {
974 	    if(changes & WORD_CMPR_VAL_FLAGS_FIELD(j)) {
975 	      unsigned int value = encoded->ShiftValue(j + WORD_CMPR_VAL_FIELDS);
976 	      if(is_first_change) {
977 		value += previous_key.Get(j);
978 		is_first_change = 0;
979 	      }
980 	      key.Set(j, value);
981 	    } else {
982 	      key.Set(j, previous_key.Get(j));
983 	    }
984 	  }
985 	} else {
986 #if 0
987 	  //
988 	  // String
989 	  //
990 	  unsigned int string_length = encoded->ShiftValue(WORD_CMPR_VAL_SUFFIX);
991 	  unsigned char* string = encoded->ShiftString(string_length);
992 	  key.SetWord((const char*)string, string_length);
993 #endif
994 	  //
995 	  // Fields
996 	  //
997 	  int nfields = key.NFields();
998 	  for(int i = 0; i < nfields; i++) {
999 	    key.Set(i, encoded->ShiftValue(i + WORD_CMPR_VAL_FIELDS));
1000 	  }
1001 	}
1002 
1003 	key.Pack(packed);
1004 
1005 	data.data = packed.get();
1006 	data.size = packed.length();
1007 	bk.len = packed.length();
1008 	bk.type = B_KEYDATA;
1009 
1010 	//
1011 	// Insert entry in page
1012 	//
1013 	cdb___db_pitem(pp, i, BKEYDATA_SIZE(bk.len), &hdr, &data);
1014 
1015 	previous_key = key;
1016       }
1017       //
1018       // Record
1019       //
1020       {
1021 	memset((char*)&bk, '\0', sizeof(BKEYDATA));
1022 
1023 	if(changes & WORD_CMPR_VAL_FLAGS_RECORD_NO) {
1024 	  packed.trunc();
1025 	} else {
1026 	  if(changes & WORD_CMPR_VAL_FLAGS_RECORD_STR) {
1027 	    record.type = WORD_RECORD_STR;
1028 	    if(previous_record_p &&
1029 	       (changes & WORD_CMPR_VAL_FLAGS_RECORD_EQ)) {
1030 	      record.info.str = previous_record.info.str;
1031 	    } else {
1032 	      unsigned int string_length = encoded->ShiftValue(WORD_CMPR_VAL_RLENGTH);
1033 	      unsigned char* string = encoded->ShiftString(string_length);
1034 	      record.info.str.set((const char*)string, string_length);
1035 	    }
1036 	  } else {
1037 	    record.type = WORD_RECORD_DATA;
1038 
1039 	    if(previous_record_p &&
1040 	       (changes & WORD_CMPR_VAL_FLAGS_RECORD_EQ)) {
1041 	      record.info.data = previous_record.info.data;
1042 	    } else {
1043 	      record.info.data = encoded->ShiftValue(WORD_CMPR_VAL_RVALUE);
1044 	    }
1045 	  }
1046 
1047 	  record.Pack(packed);
1048 
1049 	  previous_record = record;
1050 	  previous_record_p = 1;
1051 	}
1052 	data.data = packed.get();
1053 	data.size = packed.length();
1054 	bk.len = packed.length();
1055 	bk.type = B_KEYDATA;
1056 
1057 	//
1058 	// Insert entry in page
1059 	//
1060 	cdb___db_pitem(pp, i + 1, BKEYDATA_SIZE(bk.len), &hdr, &data);
1061       }
1062     }
1063   }
1064 
1065   if(debug) {
1066     if(entry_count != NUM_ENT(pp))
1067       fprintf(stderr, "WordDBCompress::UncompressLBtree: pgno %d: NUM_ENT(pp) = %d is different than entry_count = %d\n", PGNO(pp), NUM_ENT(pp), entry_count);
1068   }
1069 
1070   return 0;
1071 }
1072 
CmprInfo()1073 DB_CMPR_INFO* WordDBCompress::CmprInfo()
1074 {
1075   DB_CMPR_INFO *cmpr_info = new DB_CMPR_INFO;
1076 
1077   cmpr_info->user_data = (void *)this;
1078   cmpr_info->compress = WordDBCompress_compress_c;
1079   cmpr_info->uncompress = WordDBCompress_uncompress_c;
1080   cmpr_info->coefficient = 3;
1081   cmpr_info->max_npages = 9;
1082   cmprInfo = cmpr_info;
1083 
1084   return cmpr_info;
1085 }
1086 
1087 void
DumpPage(const unsigned char * page) const1088 WordDBCompress::DumpPage(const unsigned char* page) const
1089 {
1090   PAGE* pp = (PAGE*)page;
1091 
1092   fprintf(stderr, "--------------------------------------------------\n");
1093   fprintf(stderr, "pgno = %d ", PGNO(pp));
1094   fprintf(stderr, "lsn.file = %d ", pp->lsn.file);
1095   fprintf(stderr, "lsn.offset = %d ", pp->lsn.offset);
1096   fprintf(stderr, "prev_pgno = %d ", PREV_PGNO(pp));
1097   fprintf(stderr, "next_pgno = %d\n", NEXT_PGNO(pp));
1098   fprintf(stderr, "entries = %d ", NUM_ENT(pp));
1099   fprintf(stderr, "hf_offset = %d ", HOFFSET(pp));
1100   fprintf(stderr, "level = %d ", LEVEL(pp));
1101   fprintf(stderr, "type = %d\n", TYPE(pp));
1102   fprintf(stderr, "tags = 0x%02x\n", TAGS(pp));
1103   fprintf(stderr, "freespace = %d bytes from byte %d to byte %d\n", P_FREESPACE(pp), LOFFSET(pp), HOFFSET(pp));
1104 
1105   int i;
1106   for(i = 0; i < NUM_ENT(pp); i++) {
1107     fprintf(stderr, "index = %d, ", pp->inp[i]);
1108     unsigned char* data = 0;
1109     int data_length = 0;
1110     switch(TYPE(pp)) {
1111     case P_IBTREE:
1112       {
1113 	BINTERNAL* e = GET_BINTERNAL(pp, i);
1114 	fprintf(stderr, "len = %d, type = %d, pgno = %d, nrecs = %d\n", e->len, e->type, e->pgno, e->nrecs);
1115 	data = (unsigned char*)e->data;
1116 	data_length = e->len;
1117       }
1118       break;
1119     case P_LBTREE:
1120       {
1121 	BKEYDATA* e = GET_BKEYDATA(pp, i);
1122 	fprintf(stderr, "len = %d, type = %d\n", e->len, e->type);
1123 	data = (unsigned char*)e->data;
1124 	data_length = e->len;
1125       }
1126       break;
1127     }
1128     if(data_length > 0) {
1129       int j;
1130       for(j = 0; j < data_length; j++) {
1131 	fprintf(stderr, "(%d) ", data[j]);
1132       }
1133       fprintf(stderr, "\n");
1134     }
1135   }
1136 }
1137 
1138 int
DiffPage(const unsigned char * first,const unsigned char * second) const1139 WordDBCompress::DiffPage(const unsigned char* first, const unsigned char* second) const
1140 {
1141   PAGE* p1 = (PAGE*)first;
1142   PAGE* p2 = (PAGE*)second;
1143 
1144   if(TAGS(p1) != TAGS(p2)) return 1;
1145   if(TYPE(p1) != TYPE(p2)) return 1;
1146   if(PGNO(p1) != PGNO(p2)) return 1;
1147   if(p1->lsn.file != p2->lsn.file) return 1;
1148   if(p1->lsn.offset != p2->lsn.offset) return 1;
1149   if(TYPE(p1) == P_LBTREE) {
1150     if(PREV_PGNO(p1) != PREV_PGNO(p2)) return 1;
1151     if(NEXT_PGNO(p1) != NEXT_PGNO(p2)) return 1;
1152   }
1153   if(NUM_ENT(p1) != NUM_ENT(p2)) return 1;
1154   if(HOFFSET(p1) != HOFFSET(p2)) return 1;
1155   if(LEVEL(p1) != LEVEL(p2)) return 1;
1156 
1157   int i;
1158   for(i = 0; i < NUM_ENT(p1); i++) {
1159     unsigned char* data1 = 0;
1160     int data1_length = 0;
1161     unsigned char* data2 = 0;
1162     int data2_length = 0;
1163     switch(TYPE(p1)) {
1164     case P_IBTREE:
1165       {
1166 	BINTERNAL* e1 = GET_BINTERNAL(p1, i);
1167 	BINTERNAL* e2 = GET_BINTERNAL(p2, i);
1168 	if(e1->len != e2->len) return 1;
1169 	if(e1->type != e2->type) return 1;
1170 	if(e1->pgno != e2->pgno) return 1;
1171 	if(e1->nrecs != e2->nrecs) return 1;
1172 	data1 = (unsigned char*)e1->data;
1173 	data1_length = e1->len;
1174 	data2 = (unsigned char*)e2->data;
1175 	data2_length = e2->len;
1176       }
1177       break;
1178     case P_LBTREE:
1179       {
1180 	BKEYDATA* e1 = GET_BKEYDATA(p1, i);
1181 	BKEYDATA* e2 = GET_BKEYDATA(p2, i);
1182 	if(e1->len != e2->len) return 1;
1183 	if(e1->type != e2->type) return 1;
1184 	data1 = (unsigned char*)e1->data;
1185 	data1_length = e1->len;
1186 	data2 = (unsigned char*)e2->data;
1187 	data2_length = e2->len;
1188       }
1189       break;
1190     }
1191     if(data1_length > 0) {
1192       int j;
1193       for(j = 0; j < data1_length; j++) {
1194 	if(data1[j] != data2[j]) return 1;
1195       }
1196     }
1197   }
1198   return 0;
1199 }
1200