1 /** @file xapian-check-brass.cc
2  * @brief Check consistency of a brass table.
3  */
4 /* Copyright 1999,2000,2001 BrightStation PLC
5  * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010 Olly Betts
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
20  * USA
21  */
22 
23 #include <config.h>
24 
25 #include "xapian-check-brass.h"
26 
27 #include "bitstream.h"
28 
29 #include "internaltypes.h"
30 
31 #include "brass_check.h"
32 #include "brass_cursor.h"
33 #include "brass_table.h"
34 #include "brass_types.h"
35 #include "pack.h"
36 #include "valuestats.h"
37 
38 #include <xapian.h>
39 
40 #include "autoptr.h"
41 #include <iostream>
42 
43 using namespace std;
44 
45 static inline bool
is_user_metadata_key(const string & key)46 is_user_metadata_key(const string & key)
47 {
48     return key.size() > 1 && key[0] == '\0' && key[1] == '\xc0';
49 }
50 
51 struct VStats : public ValueStats {
52     Xapian::doccount freq_real;
53 
VStatsVStats54     VStats() : ValueStats(), freq_real(0) {}
55 };
56 
57 size_t
check_brass_table(const char * tablename,string filename,brass_revision_number_t * rev_ptr,int opts,vector<Xapian::termcount> & doclens,Xapian::docid db_last_docid)58 check_brass_table(const char * tablename, string filename,
59 		  brass_revision_number_t * rev_ptr, int opts,
60 		  vector<Xapian::termcount> & doclens,
61 		  Xapian::docid db_last_docid)
62 {
63     filename += '.';
64 
65     // Check the btree structure.
66     BrassTableCheck::check(tablename, filename, rev_ptr, opts);
67 
68     // Now check the brass structures inside the btree.
69     BrassTable table(tablename, filename, true);
70     if (rev_ptr) {
71 	table.open(*rev_ptr);
72     } else {
73 	table.open();
74     }
75     AutoPtr<BrassCursor> cursor(table.cursor_get());
76 
77     size_t errors = 0;
78 
79     cursor->find_entry(string());
80     cursor->next(); // Skip the empty entry.
81 
82     if (strcmp(tablename, "postlist") == 0) {
83 	// Now check the structure of each postlist in the table.
84 	map<Xapian::valueno, VStats> valuestats;
85 	string current_term;
86 	Xapian::docid lastdid = 0;
87 	Xapian::termcount termfreq = 0, collfreq = 0;
88 	Xapian::termcount tf = 0, cf = 0;
89 	bool have_metainfo_key = false;
90 
91 	// The first key/tag pair should be the METAINFO - though this may be
92 	// missing if the table only contains user-metadata.
93 	if (!cursor->after_end()) {
94 	    if (cursor->current_key == string("", 1)) {
95 		have_metainfo_key = true;
96 		cursor->read_tag();
97 		// Check format of the METAINFO key.
98 		totlen_t total_doclen;
99 		Xapian::docid last_docid;
100 		Xapian::termcount doclen_lbound;
101 		Xapian::termcount doclen_ubound;
102 		Xapian::termcount wdf_ubound;
103 
104 		const char * data = cursor->current_tag.data();
105 		const char * end = data + cursor->current_tag.size();
106 		if (!unpack_uint(&data, end, &last_docid)) {
107 		    cout << "Tag containing meta information is corrupt (couldn't read last_docid)." << endl;
108 		    ++errors;
109 		} else if (!unpack_uint(&data, end, &doclen_lbound)) {
110 		    cout << "Tag containing meta information is corrupt (couldn't read doclen_lbound)." << endl;
111 		    ++errors;
112 		} else if (!unpack_uint(&data, end, &wdf_ubound)) {
113 		    cout << "Tag containing meta information is corrupt (couldn't read wdf_ubound)." << endl;
114 		    ++errors;
115 		} else if (!unpack_uint(&data, end, &doclen_ubound)) {
116 		    cout << "Tag containing meta information is corrupt (couldn't read doclen_ubound)." << endl;
117 		    ++errors;
118 		} else if (!unpack_uint_last(&data, end, &total_doclen)) {
119 		    cout << "Tag containing meta information is corrupt (couldn't read total_doclen)." << endl;
120 		    ++errors;
121 		} else if (data != end) {
122 		    cout << "Tag containing meta information is corrupt (junk at end)." << endl;
123 		    ++errors;
124 		}
125 		cursor->next();
126 	    }
127 	}
128 
129 	for ( ; !cursor->after_end(); cursor->next()) {
130 	    string & key = cursor->current_key;
131 
132 	    if (is_user_metadata_key(key)) {
133 		// User metadata can be anything, so we can't do any particular
134 		// checks on it other than to check that the tag isn't empty.
135 		cursor->read_tag();
136 		if (cursor->current_tag.empty()) {
137 		    cout << "User metadata item is empty" << endl;
138 		    ++errors;
139 		}
140 		continue;
141 	    }
142 
143 	    if (!have_metainfo_key) {
144 		have_metainfo_key = true;
145 		cout << "METAINFO key missing from postlist table" << endl;
146 		++errors;
147 	    }
148 
149 	    if (key.size() >= 2 && key[0] == '\0' && key[1] == '\xe0') {
150 		// doclen chunk
151 		const char * pos, * end;
152 		Xapian::docid did = 1;
153 		if (key.size() > 2) {
154 		    // Non-initial chunk.
155 		    pos = key.data();
156 		    end = pos + key.size();
157 		    pos += 2;
158 		    if (!unpack_uint_preserving_sort(&pos, end, &did)) {
159 			cout << "Error unpacking docid from doclen key" << endl;
160 			++errors;
161 			continue;
162 		    }
163 		}
164 
165 		cursor->read_tag();
166 		pos = cursor->current_tag.data();
167 		end = pos + cursor->current_tag.size();
168 		if (key.size() == 2) {
169 		    // Initial chunk.
170 		    if (end - pos < 2 || pos[0] || pos[1]) {
171 			cout << "Initial doclen chunk has nonzero dummy fields" << endl;
172 			++errors;
173 			continue;
174 		    }
175 		    pos += 2;
176 		    if (!unpack_uint(&pos, end, &did)) {
177 			cout << "Failed to unpack firstdid for doclen" << endl;
178 			++errors;
179 			continue;
180 		    }
181 		    ++did;
182 		    if (did <= lastdid) {
183 			cout << "First did in this chunk is <= last in "
184 			    "prev chunk" << endl;
185 			++errors;
186 		    }
187 		}
188 
189 		bool is_last_chunk;
190 		if (!unpack_bool(&pos, end, &is_last_chunk)) {
191 		    cout << "Failed to unpack last chunk flag for doclen" << endl;
192 		    ++errors;
193 		    continue;
194 		}
195 		// Read what the final document ID in this chunk is.
196 		if (!unpack_uint(&pos, end, &lastdid)) {
197 		    cout << "Failed to unpack increase to last" << endl;
198 		    ++errors;
199 		    continue;
200 		}
201 		lastdid += did;
202 		bool bad = false;
203 		while (true) {
204 		    Xapian::termcount doclen;
205 		    if (!unpack_uint(&pos, end, &doclen)) {
206 			cout << "Failed to unpack doclen" << endl;
207 			++errors;
208 			bad = true;
209 			break;
210 		    }
211 
212 		    if (did > db_last_docid) {
213 			cout << "document id " << did << " in doclen stream "
214 			     << "is larger than get_last_docid() "
215 			     << db_last_docid << endl;
216 			++errors;
217 		    }
218 
219 		    if (!doclens.empty()) {
220 			// In brass, a document without terms doesn't get a
221 			// termlist entry.
222 			Xapian::termcount termlist_doclen = 0;
223 			if (did < doclens.size())
224 			    termlist_doclen = doclens[did];
225 
226 			if (doclen != termlist_doclen) {
227 			    cout << "document id " << did << ": length "
228 				 << doclen << " doesn't match "
229 				 << termlist_doclen << " in the termlist table"
230 				 << endl;
231 			    ++errors;
232 			}
233 		    }
234 
235 		    if (pos == end) break;
236 
237 		    Xapian::docid inc;
238 		    if (!unpack_uint(&pos, end, &inc)) {
239 			cout << "Failed to unpack docid increase" << endl;
240 			++errors;
241 			bad = true;
242 			break;
243 		    }
244 		    ++inc;
245 		    did += inc;
246 		    if (did > lastdid) {
247 			cout << "docid " << did << " > last docid " << lastdid
248 			     << endl;
249 			++errors;
250 		    }
251 		}
252 		if (bad) {
253 		    continue;
254 		}
255 		if (is_last_chunk) {
256 		    if (did != lastdid) {
257 			cout << "lastdid " << lastdid << " != last did " << did
258 			     << endl;
259 			++errors;
260 		    }
261 		}
262 
263 		continue;
264 	    }
265 
266 	    if (key.size() >= 2 && key[0] == '\0' && key[1] == '\xd0') {
267 		// Value stats.
268 		const char * p = key.data();
269 		const char * end = p + key.length();
270 		p += 2;
271 		Xapian::valueno slot;
272 		if (!unpack_uint_last(&p, end, &slot)) {
273 		    cout << "Bad valuestats key (no slot)" << endl;
274 		    ++errors;
275 		    continue;
276 		}
277 
278 		cursor->read_tag();
279 		p = cursor->current_tag.data();
280 		end = p + cursor->current_tag.size();
281 
282 		VStats & v = valuestats[slot];
283 		if (!unpack_uint(&p, end, &v.freq)) {
284 		    if (*p == 0) {
285 			cout << "Incomplete stats item in value table" << endl;
286 		    } else {
287 			cout << "Frequency statistic in value table is too large" << endl;
288 		    }
289 		    ++errors;
290 		    continue;
291 		}
292 		if (!unpack_string(&p, end, v.lower_bound)) {
293 		    if (*p == 0) {
294 			cout << "Incomplete stats item in value table" << endl;
295 		    } else {
296 			cout << "Lower bound statistic in value table is too large" << endl;
297 		    }
298 		    ++errors;
299 		    continue;
300 		}
301 		size_t len = end - p;
302 		if (len == 0) {
303 		    v.upper_bound = v.lower_bound;
304 		} else {
305 		    v.upper_bound.assign(p, len);
306 		}
307 
308 		continue;
309 	    }
310 
311 	    if (key.size() >= 2 && key[0] == '\0' && key[1] == '\xd8') {
312 		// Value stream chunk.
313 		const char * p = key.data();
314 		const char * end = p + key.length();
315 		p += 2;
316 		Xapian::valueno slot;
317 		if (!unpack_uint(&p, end, &slot)) {
318 		    cout << "Bad value chunk key (no slot)" << endl;
319 		    ++errors;
320 		    continue;
321 		}
322 		Xapian::docid did;
323 		if (!unpack_uint_preserving_sort(&p, end, &did)) {
324 		    cout << "Bad value chunk key (no docid)" << endl;
325 		    ++errors;
326 		    continue;
327 		}
328 		if (p != end) {
329 		    cout << "Bad value chunk key (trailing junk)" << endl;
330 		    ++errors;
331 		    continue;
332 		}
333 
334 		VStats & v = valuestats[slot];
335 
336 		cursor->read_tag();
337 		p = cursor->current_tag.data();
338 		end = p + cursor->current_tag.size();
339 
340 		while (true) {
341 		    string value;
342 		    if (!unpack_string(&p, end, value)) {
343 			cout << "Failed to unpack value from chunk" << endl;
344 			++errors;
345 			break;
346 		    }
347 
348 		    ++v.freq_real;
349 
350 		    // FIXME: Cross-check that docid did has value slot (and
351 		    // vice versa - that there's a value here if the slot entry
352 		    // says so).
353 
354 		    // FIXME: Check if the bounds are tight?  Or is that better
355 		    // as a separate tool which can also update the bounds?
356 		    if (value < v.lower_bound) {
357 			cout << "Value slot " << slot << " has value below "
358 				"lower bound: '" << value << "' < '"
359 			     << v.lower_bound << "'" << endl;
360 			++errors;
361 		    } else if (value > v.upper_bound) {
362 			cout << "Value slot " << slot << " has value above "
363 				"upper bound: '" << value << "' > '"
364 			     << v.upper_bound << "'" << endl;
365 			++errors;
366 		    }
367 
368 		    if (p == end) break;
369 		    Xapian::docid delta;
370 		    if (!unpack_uint(&p, end, &delta)) {
371 			cout << "Failed to unpack docid delta from chunk" << endl;
372 			++errors;
373 			break;
374 		    }
375 		    Xapian::docid new_did = did + delta + 1;
376 		    if (new_did <= did) {
377 			cout << "docid overflowed in value chunk" << endl;
378 			++errors;
379 			break;
380 		    }
381 		    did = new_did;
382 
383 		    if (did > db_last_docid) {
384 			cout << "document id " << did << " in value chunk "
385 			     << "is larger than get_last_docid() "
386 			     << db_last_docid << endl;
387 			++errors;
388 		    }
389 		}
390 		continue;
391 	    }
392 
393 	    const char * pos, * end;
394 
395 	    // Get term from key.
396 	    pos = key.data();
397 	    end = pos + key.size();
398 
399 	    string term;
400 	    Xapian::docid did;
401 	    if (!unpack_string_preserving_sort(&pos, end, term)) {
402 		cout << "Error unpacking termname from key" << endl;
403 		++errors;
404 		continue;
405 	    }
406 	    if (!current_term.empty() && term != current_term) {
407 		// The term changed unexpectedly.
408 		if (pos == end) {
409 		    cout << "No last chunk for term `" << current_term
410 			 << "'" << endl;
411 		    current_term.resize(0);
412 		} else {
413 		    cout << "Mismatch in follow-on chunk in posting "
414 			"list for term `" << current_term << "' (got `"
415 			<< term << "')" << endl;
416 		    current_term = term;
417 		    tf = cf = 0;
418 		    lastdid = 0;
419 		}
420 		++errors;
421 	    }
422 	    if (pos == end) {
423 		// First chunk.
424 		if (term == current_term) {
425 		    // This probably isn't possible.
426 		    cout << "First posting list chunk for term `"
427 			 << term << "' follows previous chunk for the same "
428 			 "term" << endl;
429 		    ++errors;
430 		}
431 		current_term = term;
432 		tf = cf = 0;
433 
434 		// Unpack extra header from first chunk.
435 		cursor->read_tag();
436 		pos = cursor->current_tag.data();
437 		end = pos + cursor->current_tag.size();
438 		if (!unpack_uint(&pos, end, &termfreq)) {
439 		    cout << "Failed to unpack termfreq for term `" << term
440 			 << "'" << endl;
441 		    ++errors;
442 		    continue;
443 		}
444 		if (!unpack_uint(&pos, end, &collfreq)) {
445 		    cout << "Failed to unpack collfreq for term `" << term
446 			 << "'" << endl;
447 		    ++errors;
448 		    continue;
449 		}
450 		if (!unpack_uint(&pos, end, &did)) {
451 		    cout << "Failed to unpack firstdid for term `" << term
452 			 << "'" << endl;
453 		    ++errors;
454 		    continue;
455 		}
456 		++did;
457 	    } else {
458 		// Continuation chunk.
459 		if (current_term.empty()) {
460 		    cout << "First chunk for term `" << current_term << "' "
461 			 "is a continuation chunk" << endl;
462 		    ++errors;
463 		    current_term = term;
464 		}
465 		AssertEq(current_term, term);
466 		if (!unpack_uint_preserving_sort(&pos, end, &did)) {
467 		    cout << "Failed to unpack did from key" << endl;
468 		    ++errors;
469 		    continue;
470 		}
471 		if (did <= lastdid) {
472 		    cout << "First did in this chunk is <= last in "
473 			"prev chunk" << endl;
474 		    ++errors;
475 		}
476 		cursor->read_tag();
477 		pos = cursor->current_tag.data();
478 		end = pos + cursor->current_tag.size();
479 	    }
480 
481 	    bool is_last_chunk;
482 	    if (!unpack_bool(&pos, end, &is_last_chunk)) {
483 		cout << "Failed to unpack last chunk flag" << endl;
484 		++errors;
485 		continue;
486 	    }
487 	    // Read what the final document ID in this chunk is.
488 	    if (!unpack_uint(&pos, end, &lastdid)) {
489 		cout << "Failed to unpack increase to last" << endl;
490 		++errors;
491 		continue;
492 	    }
493 	    lastdid += did;
494 	    bool bad = false;
495 	    while (true) {
496 		Xapian::termcount wdf;
497 		if (!unpack_uint(&pos, end, &wdf)) {
498 		    cout << "Failed to unpack wdf" << endl;
499 		    ++errors;
500 		    bad = true;
501 		    break;
502 		}
503 		++tf;
504 		cf += wdf;
505 
506 		if (pos == end) break;
507 
508 		Xapian::docid inc;
509 		if (!unpack_uint(&pos, end, &inc)) {
510 		    cout << "Failed to unpack docid increase" << endl;
511 		    ++errors;
512 		    bad = true;
513 		    break;
514 		}
515 		++inc;
516 		did += inc;
517 		if (did > lastdid) {
518 		    cout << "docid " << did << " > last docid " << lastdid
519 			 << endl;
520 		    ++errors;
521 		}
522 	    }
523 	    if (bad) {
524 		continue;
525 	    }
526 	    if (is_last_chunk) {
527 		if (tf != termfreq) {
528 		    cout << "termfreq " << termfreq << " != # of entries "
529 			 << tf << endl;
530 		    ++errors;
531 		}
532 		if (cf != collfreq) {
533 		    cout << "collfreq " << collfreq << " != sum wdf " << cf
534 			 << endl;
535 		    ++errors;
536 		}
537 		if (did != lastdid) {
538 		    cout << "lastdid " << lastdid << " != last did " << did
539 			 << endl;
540 		    ++errors;
541 		}
542 		current_term.resize(0);
543 	    }
544 	}
545 	if (!current_term.empty()) {
546 	    cout << "Last term `" << current_term << "' has no last chunk"
547 		 << endl;
548 	    ++errors;
549 	}
550 
551 	map<Xapian::valueno, VStats>::const_iterator i;
552 	for (i = valuestats.begin(); i != valuestats.end(); ++i) {
553 	    if (i->second.freq != i->second.freq_real) {
554 		cout << "Value stats frequency for slot " << i->first << " is "
555 		     << i->second.freq << " but recounting gives "
556 		     << i->second.freq_real << endl;
557 		++errors;
558 	    }
559 	}
560     } else if (strcmp(tablename, "record") == 0) {
561 	// Now check the contents of the record table.  Any data is valid as
562 	// the tag so we don't check the tags.
563 	for ( ; !cursor->after_end(); cursor->next()) {
564 	    string & key = cursor->current_key;
565 
566 	    // Get docid from key.
567 	    const char * pos = key.data();
568 	    const char * end = pos + key.size();
569 
570 	    Xapian::docid did;
571 	    if (!unpack_uint_preserving_sort(&pos, end, &did)) {
572 		cout << "Error unpacking docid from key" << endl;
573 		++errors;
574 	    } else if (pos != end) {
575 		cout << "Extra junk in key" << endl;
576 		++errors;
577 	    }
578 	}
579     } else if (strcmp(tablename, "termlist") == 0) {
580 	// Now check the contents of the termlist table.
581 	for ( ; !cursor->after_end(); cursor->next()) {
582 	    string & key = cursor->current_key;
583 
584 	    // Get docid from key.
585 	    const char * pos = key.data();
586 	    const char * end = pos + key.size();
587 
588 	    Xapian::docid did;
589 	    if (!unpack_uint_preserving_sort(&pos, end, &did)) {
590 		cout << "Error unpacking docid from key" << endl;
591 		++errors;
592 		continue;
593 	    }
594 
595 	    if (end - pos == 1 && *pos == '\0') {
596 		// Value slots used entry.
597 		cursor->read_tag();
598 
599 		pos = cursor->current_tag.data();
600 		end = pos + cursor->current_tag.size();
601 
602 		if (pos == end) {
603 		    cout << "Empty value slots used tag" << endl;
604 		    ++errors;
605 		    continue;
606 		}
607 
608 		Xapian::valueno prev_slot;
609 		if (!unpack_uint(&pos, end, &prev_slot)) {
610 		    cout << "Value slot encoding corrupt" << endl;
611 		    ++errors;
612 		    continue;
613 		}
614 
615 		while (pos != end) {
616 		    Xapian::valueno slot;
617 		    if (!unpack_uint(&pos, end, &slot)) {
618 			cout << "Value slot encoding corrupt" << endl;
619 			++errors;
620 			break;
621 		    }
622 		    slot += prev_slot + 1;
623 		    if (slot <= prev_slot) {
624 			cout << "Value slot number overflowed (" << prev_slot << " -> " << slot << ")" << endl;
625 			++errors;
626 		    }
627 		    prev_slot = slot;
628 		}
629 		continue;
630 	    }
631 
632 	    if (pos != end) {
633 		cout << "Extra junk in key" << endl;
634 		++errors;
635 		continue;
636 	    }
637 
638 	    cursor->read_tag();
639 
640 	    pos = cursor->current_tag.data();
641 	    end = pos + cursor->current_tag.size();
642 
643 	    if (pos == end) {
644 		// Empty termlist.
645 		continue;
646 	    }
647 
648 	    Xapian::termcount doclen, termlist_size;
649 
650 	    // Read doclen
651 	    if (!unpack_uint(&pos, end, &doclen)) {
652 		if (pos != 0) {
653 		    cout << "doclen out of range" << endl;
654 		} else {
655 		    cout << "Unexpected end of data when reading doclen" << endl;
656 		}
657 		++errors;
658 		continue;
659 	    }
660 
661 	    // Read termlist_size
662 	    if (!unpack_uint(&pos, end, &termlist_size)) {
663 		if (pos != 0) {
664 		    cout << "termlist_size out of range" << endl;
665 		} else {
666 		    cout << "Unexpected end of data when reading termlist_size" << endl;
667 		}
668 		++errors;
669 		continue;
670 	    }
671 
672 	    Xapian::termcount actual_doclen = 0, actual_termlist_size = 0;
673 	    string current_tname;
674 
675 	    bool bad = false;
676 	    while (pos != end) {
677 		Xapian::doccount current_wdf = 0;
678 		bool got_wdf = false;
679 		// If there was a previous term, how much to reuse.
680 		if (!current_tname.empty()) {
681 		    string::size_type len = static_cast<unsigned char>(*pos++);
682 		    if (len > current_tname.length()) {
683 			// The wdf was squeezed into the same byte.
684 			current_wdf = len / (current_tname.length() + 1) - 1;
685 			len %= (current_tname.length() + 1);
686 			got_wdf = true;
687 		    }
688 		    current_tname.resize(len);
689 		}
690 		// What to append (note len must be positive, since just truncating
691 		// always takes us backwards in the sort order)
692 		string::size_type len = static_cast<unsigned char>(*pos++);
693 		current_tname.append(pos, len);
694 		pos += len;
695 
696 		if (!got_wdf) {
697 		    // Read wdf
698 		    if (!unpack_uint(&pos, end, &current_wdf)) {
699 			if (pos == 0) {
700 			    cout << "Unexpected end of data when reading termlist current_wdf" << endl;
701 			} else {
702 			    cout << "Size of wdf out of range, in termlist" << endl;
703 			}
704 			++errors;
705 			bad = true;
706 			break;
707 		    }
708 		}
709 
710 		++actual_termlist_size;
711 		actual_doclen += current_wdf;
712 	    }
713 	    if (bad) {
714 		continue;
715 	    }
716 
717 	    if (termlist_size != actual_termlist_size) {
718 		cout << "termlist_size != # of entries in termlist" << endl;
719 		++errors;
720 	    }
721 	    if (doclen != actual_doclen) {
722 		cout << "doclen != sum(wdf)" << endl;
723 		++errors;
724 	    }
725 
726 	    // + 1 so that did is a valid subscript.
727 	    if (doclens.size() <= did) doclens.resize(did + 1);
728 	    doclens[did] = actual_doclen;
729 	}
730     } else if (strcmp(tablename, "position") == 0) {
731 	// Now check the contents of the position table.
732 	for ( ; !cursor->after_end(); cursor->next()) {
733 	    string & key = cursor->current_key;
734 
735 	    // Get docid from key.
736 	    const char * pos = key.data();
737 	    const char * end = pos + key.size();
738 
739 	    Xapian::docid did;
740 	    if (!unpack_uint_preserving_sort(&pos, end, &did)) {
741 		cout << "Error unpacking docid from key" << endl;
742 		++errors;
743 		continue;
744 	    }
745 	    if (pos == end) {
746 		cout << "No termname in key" << endl;
747 		++errors;
748 		continue;
749 	    }
750 
751 	    if (!doclens.empty()) {
752 		// In glass, a document without terms doesn't get a
753 		// termlist entry, so we can't tell the difference
754 		// easily.
755 		if (did >= doclens.size() || doclens[did] == 0) {
756 		    cout << "Position list entry for document " << did
757 			 << " which doesn't exist or has no terms" << endl;
758 		    ++errors;
759 		    continue;
760 		}
761 	    }
762 
763 	    cursor->read_tag();
764 
765 	    const string & data = cursor->current_tag;
766 	    pos = data.data();
767 	    end = pos + data.size();
768 
769 	    Xapian::termpos pos_last;
770 	    if (!unpack_uint(&pos, end, &pos_last)) {
771 		cout << tablename << " table: Position list data corrupt" << endl;
772 		++errors;
773 		continue;
774 	    }
775 	    if (pos == end) {
776 		// Special case for single entry position list.
777 	    } else {
778 		// Skip the header we just read.
779 		BitReader rd(data, pos - data.data());
780 		Xapian::termpos pos_first = rd.decode(pos_last);
781 		Xapian::termpos pos_size = rd.decode(pos_last - pos_first) + 2;
782 		vector<Xapian::termpos> positions;
783 		positions.resize(pos_size);
784 		positions[0] = pos_first;
785 		positions.back() = pos_last;
786 		rd.decode_interpolative(positions, 0, pos_size - 1);
787 		vector<Xapian::termpos>::const_iterator current_pos = positions.begin();
788 		Xapian::termpos lastpos = *current_pos++;
789 		while (current_pos != positions.end()) {
790 		    Xapian::termpos termpos = *current_pos++;
791 		    if (termpos <= lastpos) {
792 			cout << tablename << " table: Positions not strictly monotonically increasing" << endl;
793 			++errors;
794 			break;
795 		    }
796 		    lastpos = termpos;
797 		}
798 	    }
799 	}
800     } else {
801 	cout << tablename << " table: Don't know how to check structure\n" << endl;
802 	return errors;
803     }
804 
805     if (!errors)
806 	cout << tablename << " table structure checked OK\n" << endl;
807     else
808 	cout << tablename << " table errors found: " << errors << "\n" << endl;
809 
810     return errors;
811 }
812