1 /* chert_postlist.cc: Postlists in a chert database
2  *
3  * Copyright 1999,2000,2001 BrightStation PLC
4  * Copyright 2002,2003,2004,2005,2007,2008,2009,2011,2014,2015 Olly Betts
5  * Copyright 2007,2008,2009 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
20  * USA
21  */
22 
23 #include <config.h>
24 
25 #include "chert_postlist.h"
26 
27 #include "chert_cursor.h"
28 #include "chert_database.h"
29 #include "debuglog.h"
30 #include "noreturn.h"
31 #include "pack.h"
32 #include "str.h"
33 
34 using Xapian::Internal::intrusive_ptr;
35 
36 void
get_freqs(const string & term,Xapian::doccount * termfreq_ptr,Xapian::termcount * collfreq_ptr) const37 ChertPostListTable::get_freqs(const string & term,
38 			      Xapian::doccount * termfreq_ptr,
39 			      Xapian::termcount * collfreq_ptr) const
40 {
41     string key = make_key(term);
42     string tag;
43     if (!get_exact_entry(key, tag)) {
44 	if (termfreq_ptr)
45 	    *termfreq_ptr = 0;
46 	if (collfreq_ptr)
47 	    *collfreq_ptr = 0;
48     } else {
49 	const char * p = tag.data();
50 	ChertPostList::read_number_of_entries(&p, p + tag.size(),
51 					      termfreq_ptr, collfreq_ptr);
52     }
53 }
54 
55 Xapian::termcount
get_doclength(Xapian::docid did,intrusive_ptr<const ChertDatabase> db) const56 ChertPostListTable::get_doclength(Xapian::docid did,
57 				  intrusive_ptr<const ChertDatabase> db) const {
58     if (!doclen_pl.get()) {
59 	// Don't keep a reference back to the database, since this
60 	// would make a reference loop.
61 	doclen_pl.reset(new ChertPostList(db, string(), false));
62     }
63     if (!doclen_pl->jump_to(did))
64 	throw Xapian::DocNotFoundError("Document " + str(did) + " not found");
65     return doclen_pl->get_wdf();
66 }
67 
68 bool
document_exists(Xapian::docid did,intrusive_ptr<const ChertDatabase> db) const69 ChertPostListTable::document_exists(Xapian::docid did,
70 				    intrusive_ptr<const ChertDatabase> db) const
71 {
72     if (!doclen_pl.get()) {
73 	// Don't keep a reference back to the database, since this
74 	// would make a reference loop.
75 	doclen_pl.reset(new ChertPostList(db, string(), false));
76     }
77     return (doclen_pl->jump_to(did));
78 }
79 
80 // How big should chunks in the posting list be?  (They
81 // will grow slightly bigger than this, but not more than a
82 // few bytes extra) - FIXME: tune this value to try to
83 // maximise how well blocks are used.  Or performance.
84 // Or indexing speed.  Or something...
85 const unsigned int CHUNKSIZE = 2000;
86 
87 /** PostlistChunkWriter is a wrapper which acts roughly as an
88  *  output iterator on a postlist chunk, taking care of the
89  *  messy details.  It's intended to be used with deletion and
90  *  replacing of entries, not for adding to the end, when it's
91  *  not really needed.
92  */
93 class Chert::PostlistChunkWriter {
94     public:
95 	PostlistChunkWriter(const string &orig_key_,
96 			    bool is_first_chunk_,
97 			    const string &tname_,
98 			    bool is_last_chunk_);
99 
100 	/// Append an entry to this chunk.
101 	void append(ChertTable * table, Xapian::docid did,
102 		    Xapian::termcount wdf);
103 
104 	/// Append a block of raw entries to this chunk.
raw_append(Xapian::docid first_did_,Xapian::docid current_did_,const string & s)105 	void raw_append(Xapian::docid first_did_, Xapian::docid current_did_,
106 			const string & s) {
107 	    Assert(!started);
108 	    first_did = first_did_;
109 	    current_did = current_did_;
110 	    if (!s.empty()) {
111 		chunk.append(s);
112 		started = true;
113 	    }
114 	}
115 
116 	/** Flush the chunk to the buffered table.  Note: this may write it
117 	 *  with a different key to the original one, if for example the first
118 	 *  entry has changed.
119 	 */
120 	void flush(ChertTable *table);
121 
122     private:
123 	string orig_key;
124 	string tname;
125 	bool is_first_chunk;
126 	bool is_last_chunk;
127 	bool started;
128 
129 	Xapian::docid first_did;
130 	Xapian::docid current_did;
131 
132 	string chunk;
133 };
134 
135 using Chert::PostlistChunkWriter;
136 
137 // Static functions
138 
139 /// Report an error when reading the posting list.
140 XAPIAN_NORETURN(static void report_read_error(const char * position));
report_read_error(const char * position)141 static void report_read_error(const char * position)
142 {
143     if (position == 0) {
144 	// data ran out
145 	LOGLINE(DB, "ChertPostList data ran out");
146 	throw Xapian::DatabaseCorruptError("Data ran out unexpectedly when reading posting list.");
147     }
148     // overflow
149     LOGLINE(DB, "ChertPostList value too large");
150     throw Xapian::RangeError("Value in posting list too large.");
151 }
152 
get_tname_from_key(const char ** src,const char * end,string & tname)153 static inline bool get_tname_from_key(const char **src, const char *end,
154 			       string &tname)
155 {
156     return unpack_string_preserving_sort(src, end, tname);
157 }
158 
159 static inline bool
check_tname_in_key_lite(const char ** keypos,const char * keyend,const string & tname)160 check_tname_in_key_lite(const char **keypos, const char *keyend, const string &tname)
161 {
162     string tname_in_key;
163 
164     if (keyend - *keypos >= 2 && (*keypos)[0] == '\0' && (*keypos)[1] == '\xe0') {
165 	*keypos += 2;
166     } else {
167 	// Read the termname.
168 	if (!get_tname_from_key(keypos, keyend, tname_in_key))
169 	    report_read_error(*keypos);
170     }
171 
172     // This should only fail if the postlist doesn't exist at all.
173     return tname_in_key == tname;
174 }
175 
176 static inline bool
check_tname_in_key(const char ** keypos,const char * keyend,const string & tname)177 check_tname_in_key(const char **keypos, const char *keyend, const string &tname)
178 {
179     if (*keypos == keyend) return false;
180 
181     return check_tname_in_key_lite(keypos, keyend, tname);
182 }
183 
184 /// Read the start of the first chunk in the posting list.
185 static Xapian::docid
read_start_of_first_chunk(const char ** posptr,const char * end,Xapian::doccount * number_of_entries_ptr,Xapian::termcount * collection_freq_ptr)186 read_start_of_first_chunk(const char ** posptr,
187 			  const char * end,
188 			  Xapian::doccount * number_of_entries_ptr,
189 			  Xapian::termcount * collection_freq_ptr)
190 {
191     LOGCALL_STATIC(DB, Xapian::docid, "read_start_of_first_chunk", (const void *)posptr | (const void *)end | (void *)number_of_entries_ptr | (void *)collection_freq_ptr);
192 
193     ChertPostList::read_number_of_entries(posptr, end,
194 			   number_of_entries_ptr, collection_freq_ptr);
195     if (number_of_entries_ptr)
196 	LOGVALUE(DB, *number_of_entries_ptr);
197     if (collection_freq_ptr)
198 	LOGVALUE(DB, *collection_freq_ptr);
199 
200     Xapian::docid did;
201     // Read the docid of the first entry in the posting list.
202     if (!unpack_uint(posptr, end, &did))
203 	report_read_error(*posptr);
204     ++did;
205     LOGVALUE(DB, did);
206     RETURN(did);
207 }
208 
209 static inline void
read_did_increase(const char ** posptr,const char * end,Xapian::docid * did_ptr)210 read_did_increase(const char ** posptr, const char * end,
211 		  Xapian::docid * did_ptr)
212 {
213     Xapian::docid did_increase;
214     if (!unpack_uint(posptr, end, &did_increase)) report_read_error(*posptr);
215     *did_ptr += did_increase + 1;
216 }
217 
218 /// Read the wdf for an entry.
219 static inline void
read_wdf(const char ** posptr,const char * end,Xapian::termcount * wdf_ptr)220 read_wdf(const char ** posptr, const char * end, Xapian::termcount * wdf_ptr)
221 {
222     if (!unpack_uint(posptr, end, wdf_ptr)) report_read_error(*posptr);
223 }
224 
225 /// Read the start of a chunk.
226 static Xapian::docid
read_start_of_chunk(const char ** posptr,const char * end,Xapian::docid first_did_in_chunk,bool * is_last_chunk_ptr)227 read_start_of_chunk(const char ** posptr,
228 		    const char * end,
229 		    Xapian::docid first_did_in_chunk,
230 		    bool * is_last_chunk_ptr)
231 {
232     LOGCALL_STATIC(DB, Xapian::docid, "read_start_of_chunk", reinterpret_cast<const void*>(posptr) | reinterpret_cast<const void*>(end) | first_did_in_chunk | reinterpret_cast<const void*>(is_last_chunk_ptr));
233     Assert(is_last_chunk_ptr);
234 
235     // Read whether this is the last chunk
236     if (!unpack_bool(posptr, end, is_last_chunk_ptr))
237 	report_read_error(*posptr);
238     LOGVALUE(DB, *is_last_chunk_ptr);
239 
240     // Read what the final document ID in this chunk is.
241     Xapian::docid increase_to_last;
242     if (!unpack_uint(posptr, end, &increase_to_last))
243 	report_read_error(*posptr);
244     Xapian::docid last_did_in_chunk = first_did_in_chunk + increase_to_last;
245     LOGVALUE(DB, last_did_in_chunk);
246     RETURN(last_did_in_chunk);
247 }
248 
249 /** PostlistChunkReader is essentially an iterator wrapper
250  *  around a postlist chunk.  It simply iterates through the
251  *  entries in a postlist.
252  */
253 class Chert::PostlistChunkReader {
254     string data;
255 
256     const char *pos;
257     const char *end;
258 
259     bool at_end;
260 
261     Xapian::docid did;
262     Xapian::termcount wdf;
263 
264   public:
265     /** Initialise the postlist chunk reader.
266      *
267      *  @param first_did  First document id in this chunk.
268      *  @param data       The tag string with the header removed.
269      */
PostlistChunkReader(Xapian::docid first_did,const string & data_)270     PostlistChunkReader(Xapian::docid first_did, const string & data_)
271 	: data(data_), pos(data.data()), end(pos + data.length()), at_end(data.empty()), did(first_did)
272     {
273 	if (!at_end) read_wdf(&pos, end, &wdf);
274     }
275 
get_docid() const276     Xapian::docid get_docid() const {
277 	return did;
278     }
get_wdf() const279     Xapian::termcount get_wdf() const {
280 	return wdf;
281     }
282 
is_at_end() const283     bool is_at_end() const {
284 	return at_end;
285     }
286 
287     /** Advance to the next entry.  Set at_end if we run off the end.
288      */
289     void next();
290 };
291 
292 using Chert::PostlistChunkReader;
293 
294 void
next()295 PostlistChunkReader::next()
296 {
297     if (pos == end) {
298 	at_end = true;
299     } else {
300 	read_did_increase(&pos, end, &did);
301 	read_wdf(&pos, end, &wdf);
302     }
303 }
304 
PostlistChunkWriter(const string & orig_key_,bool is_first_chunk_,const string & tname_,bool is_last_chunk_)305 PostlistChunkWriter::PostlistChunkWriter(const string &orig_key_,
306 					 bool is_first_chunk_,
307 					 const string &tname_,
308 					 bool is_last_chunk_)
309 	: orig_key(orig_key_),
310 	  tname(tname_), is_first_chunk(is_first_chunk_),
311 	  is_last_chunk(is_last_chunk_),
312 	  started(false)
313 {
314     LOGCALL_CTOR(DB, "PostlistChunkWriter", orig_key_ | is_first_chunk_ | tname_ | is_last_chunk_);
315 }
316 
317 void
append(ChertTable * table,Xapian::docid did,Xapian::termcount wdf)318 PostlistChunkWriter::append(ChertTable * table, Xapian::docid did,
319 			    Xapian::termcount wdf)
320 {
321     if (!started) {
322 	started = true;
323 	first_did = did;
324     } else {
325 	Assert(did > current_did);
326 	// Start a new chunk if this one has grown to the threshold.
327 	if (chunk.size() >= CHUNKSIZE) {
328 	    bool save_is_last_chunk = is_last_chunk;
329 	    is_last_chunk = false;
330 	    flush(table);
331 	    is_last_chunk = save_is_last_chunk;
332 	    is_first_chunk = false;
333 	    first_did = did;
334 	    chunk.resize(0);
335 	    orig_key = ChertPostListTable::make_key(tname, first_did);
336 	} else {
337 	    pack_uint(chunk, did - current_did - 1);
338 	}
339     }
340     current_did = did;
341     pack_uint(chunk, wdf);
342 }
343 
344 /** Make the data to go at the start of the very first chunk.
345  */
346 static inline string
make_start_of_first_chunk(Xapian::doccount entries,Xapian::termcount collectionfreq,Xapian::docid new_did)347 make_start_of_first_chunk(Xapian::doccount entries,
348 			  Xapian::termcount collectionfreq,
349 			  Xapian::docid new_did)
350 {
351     string chunk;
352     pack_uint(chunk, entries);
353     pack_uint(chunk, collectionfreq);
354     pack_uint(chunk, new_did - 1);
355     return chunk;
356 }
357 
358 /** Make the data to go at the start of a standard chunk.
359  */
360 static inline string
make_start_of_chunk(bool new_is_last_chunk,Xapian::docid new_first_did,Xapian::docid new_final_did)361 make_start_of_chunk(bool new_is_last_chunk,
362 		    Xapian::docid new_first_did,
363 		    Xapian::docid new_final_did)
364 {
365     Assert(new_final_did >= new_first_did);
366     string chunk;
367     pack_bool(chunk, new_is_last_chunk);
368     pack_uint(chunk, new_final_did - new_first_did);
369     return chunk;
370 }
371 
372 static void
write_start_of_chunk(string & chunk,unsigned int start_of_chunk_header,unsigned int end_of_chunk_header,bool is_last_chunk,Xapian::docid first_did_in_chunk,Xapian::docid last_did_in_chunk)373 write_start_of_chunk(string & chunk,
374 		     unsigned int start_of_chunk_header,
375 		     unsigned int end_of_chunk_header,
376 		     bool is_last_chunk,
377 		     Xapian::docid first_did_in_chunk,
378 		     Xapian::docid last_did_in_chunk)
379 {
380     Assert((size_t)(end_of_chunk_header - start_of_chunk_header) <= chunk.size());
381 
382     chunk.replace(start_of_chunk_header,
383 		  end_of_chunk_header - start_of_chunk_header,
384 		  make_start_of_chunk(is_last_chunk, first_did_in_chunk,
385 				      last_did_in_chunk));
386 }
387 
388 void
flush(ChertTable * table)389 PostlistChunkWriter::flush(ChertTable *table)
390 {
391     LOGCALL_VOID(DB, "PostlistChunkWriter::flush", table);
392 
393     /* This is one of the more messy parts involved with updating posting
394      * list chunks.
395      *
396      * Depending on circumstances, we may have to delete an entire chunk
397      * or file it under a different key, as well as possibly modifying both
398      * the previous and next chunk of the postlist.
399      */
400 
401     if (!started) {
402 	/* This chunk is now empty so disappears entirely.
403 	 *
404 	 * If this was the last chunk, then the previous chunk
405 	 * must have its "is_last_chunk" flag updated.
406 	 *
407 	 * If this was the first chunk, then the next chunk must
408 	 * be transformed into the first chunk.  Messy!
409 	 */
410 	LOGLINE(DB, "PostlistChunkWriter::flush(): deleting chunk");
411 	Assert(!orig_key.empty());
412 	if (is_first_chunk) {
413 	    LOGLINE(DB, "PostlistChunkWriter::flush(): deleting first chunk");
414 	    if (is_last_chunk) {
415 		/* This is the first and the last chunk, ie the only
416 		 * chunk, so just delete the tag.
417 		 */
418 		table->del(orig_key);
419 		return;
420 	    }
421 
422 	    /* This is the messiest case.  The first chunk is to
423 	     * be removed, and there is at least one chunk after
424 	     * it.  Need to rewrite the next chunk as the first
425 	     * chunk.
426 	     */
427 	    AutoPtr<ChertCursor> cursor(table->cursor_get());
428 
429 	    if (!cursor->find_entry(orig_key)) {
430 		throw Xapian::DatabaseCorruptError("The key we're working on has disappeared");
431 	    }
432 
433 	    // FIXME: Currently the doclen list has a special first chunk too,
434 	    // which reduces special casing here.  The downside is a slightly
435 	    // larger than necessary first chunk and needless fiddling if the
436 	    // first chunk is deleted.  But really we should look at
437 	    // redesigning the whole postlist format with an eye to making it
438 	    // easier to update!
439 
440 	    // Extract existing counts from the first chunk so we can reinsert
441 	    // them into the block we're renaming.
442 	    Xapian::doccount num_ent;
443 	    Xapian::termcount coll_freq;
444 	    {
445 		cursor->read_tag();
446 		const char *tagpos = cursor->current_tag.data();
447 		const char *tagend = tagpos + cursor->current_tag.size();
448 
449 		(void)read_start_of_first_chunk(&tagpos, tagend,
450 						&num_ent, &coll_freq);
451 	    }
452 
453 	    // Seek to the next chunk.
454 	    cursor->next();
455 	    if (cursor->after_end()) {
456 		throw Xapian::DatabaseCorruptError("Expected another key but found none");
457 	    }
458 	    const char *kpos = cursor->current_key.data();
459 	    const char *kend = kpos + cursor->current_key.size();
460 	    if (!check_tname_in_key(&kpos, kend, tname)) {
461 		throw Xapian::DatabaseCorruptError("Expected another key with the same term name but found a different one");
462 	    }
463 
464 	    // Read the new first docid
465 	    Xapian::docid new_first_did;
466 	    if (!C_unpack_uint_preserving_sort(&kpos, kend, &new_first_did)) {
467 		report_read_error(kpos);
468 	    }
469 
470 	    cursor->read_tag();
471 	    const char *tagpos = cursor->current_tag.data();
472 	    const char *tagend = tagpos + cursor->current_tag.size();
473 
474 	    // Read the chunk header
475 	    bool new_is_last_chunk;
476 	    Xapian::docid new_last_did_in_chunk =
477 		read_start_of_chunk(&tagpos, tagend, new_first_did,
478 				    &new_is_last_chunk);
479 
480 	    string chunk_data(tagpos, tagend);
481 
482 	    // First remove the renamed tag
483 	    table->del(cursor->current_key);
484 
485 	    // And now write it as the first chunk
486 	    string tag;
487 	    tag = make_start_of_first_chunk(num_ent, coll_freq, new_first_did);
488 	    tag += make_start_of_chunk(new_is_last_chunk,
489 					      new_first_did,
490 					      new_last_did_in_chunk);
491 	    tag += chunk_data;
492 	    table->add(orig_key, tag);
493 	    return;
494 	}
495 
496 	LOGLINE(DB, "PostlistChunkWriter::flush(): deleting secondary chunk");
497 	/* This isn't the first chunk.  Check whether we're the last chunk. */
498 
499 	// Delete this chunk
500 	table->del(orig_key);
501 
502 	if (is_last_chunk) {
503 	    LOGLINE(DB, "PostlistChunkWriter::flush(): deleting secondary last chunk");
504 	    // Update the previous chunk's is_last_chunk flag.
505 	    AutoPtr<ChertCursor> cursor(table->cursor_get());
506 
507 	    /* Should not find the key we just deleted, but should
508 	     * find the previous chunk. */
509 	    if (cursor->find_entry(orig_key)) {
510 		throw Xapian::DatabaseCorruptError("Chert key not deleted as we expected");
511 	    }
512 	    // Make sure this is a chunk with the right term attached.
513 	    const char * keypos = cursor->current_key.data();
514 	    const char * keyend = keypos + cursor->current_key.size();
515 	    if (!check_tname_in_key(&keypos, keyend, tname)) {
516 		throw Xapian::DatabaseCorruptError("Couldn't find chunk before delete chunk");
517 	    }
518 
519 	    bool is_prev_first_chunk = (keypos == keyend);
520 
521 	    // Now update the last_chunk
522 	    cursor->read_tag();
523 	    string tag = cursor->current_tag;
524 
525 	    const char *tagpos = tag.data();
526 	    const char *tagend = tagpos + tag.size();
527 
528 	    // Skip first chunk header
529 	    Xapian::docid first_did_in_chunk;
530 	    if (is_prev_first_chunk) {
531 		first_did_in_chunk = read_start_of_first_chunk(&tagpos, tagend,
532 							       0, 0);
533 	    } else {
534 		if (!C_unpack_uint_preserving_sort(&keypos, keyend, &first_did_in_chunk))
535 		    report_read_error(keypos);
536 	    }
537 	    bool wrong_is_last_chunk;
538 	    string::size_type start_of_chunk_header = tagpos - tag.data();
539 	    Xapian::docid last_did_in_chunk =
540 		read_start_of_chunk(&tagpos, tagend, first_did_in_chunk,
541 				    &wrong_is_last_chunk);
542 	    string::size_type end_of_chunk_header = tagpos - tag.data();
543 
544 	    // write new is_last flag
545 	    write_start_of_chunk(tag,
546 				 start_of_chunk_header,
547 				 end_of_chunk_header,
548 				 true, // is_last_chunk
549 				 first_did_in_chunk,
550 				 last_did_in_chunk);
551 	    table->add(cursor->current_key, tag);
552 	}
553     } else {
554 	LOGLINE(DB, "PostlistChunkWriter::flush(): updating chunk which still has items in it");
555 	/* The chunk still has some items in it.  Two major subcases:
556 	 * a) This is the first chunk.
557 	 * b) This isn't the first chunk.
558 	 *
559 	 * The subcases just affect the chunk header.
560 	 */
561 	string tag;
562 
563 	/* First write the header, which depends on whether this is the
564 	 * first chunk.
565 	 */
566 	if (is_first_chunk) {
567 	    /* The first chunk.  This is the relatively easy case,
568 	     * and we just have to write this one back to disk.
569 	     */
570 	    LOGLINE(DB, "PostlistChunkWriter::flush(): rewriting the first chunk, which still has items in it");
571 	    string key = ChertPostListTable::make_key(tname);
572 	    bool ok = table->get_exact_entry(key, tag);
573 	    (void)ok;
574 	    Assert(ok);
575 	    Assert(!tag.empty());
576 
577 	    Xapian::doccount num_ent;
578 	    Xapian::termcount coll_freq;
579 	    {
580 		const char * tagpos = tag.data();
581 		const char * tagend = tagpos + tag.size();
582 		(void)read_start_of_first_chunk(&tagpos, tagend,
583 						&num_ent, &coll_freq);
584 	    }
585 
586 	    tag = make_start_of_first_chunk(num_ent, coll_freq, first_did);
587 
588 	    tag += make_start_of_chunk(is_last_chunk, first_did, current_did);
589 	    tag += chunk;
590 	    table->add(key, tag);
591 	    return;
592 	}
593 
594 	LOGLINE(DB, "PostlistChunkWriter::flush(): updating secondary chunk which still has items in it");
595 	/* Not the first chunk.
596 	 *
597 	 * This has the easy sub-sub-case:
598 	 *   The first entry in the chunk hasn't changed
599 	 * ...and the hard sub-sub-case:
600 	 *   The first entry in the chunk has changed.  This is
601 	 *   harder because the key for the chunk changes, so
602 	 *   we've got to do a switch.
603 	 */
604 
605 	// First find out the initial docid
606 	const char *keypos = orig_key.data();
607 	const char *keyend = keypos + orig_key.size();
608 	if (!check_tname_in_key(&keypos, keyend, tname)) {
609 	    throw Xapian::DatabaseCorruptError("Have invalid key writing to postlist");
610 	}
611 	Xapian::docid initial_did;
612 	if (!C_unpack_uint_preserving_sort(&keypos, keyend, &initial_did)) {
613 	    report_read_error(keypos);
614 	}
615 	string new_key;
616 	if (initial_did != first_did) {
617 	    /* The fiddlier case:
618 	     * Create a new tag with the correct key, and replace
619 	     * the old one.
620 	     */
621 	    new_key = ChertPostListTable::make_key(tname, first_did);
622 	    table->del(orig_key);
623 	} else {
624 	    new_key = orig_key;
625 	}
626 
627 	// ...and write the start of this chunk.
628 	tag = make_start_of_chunk(is_last_chunk, first_did, current_did);
629 
630 	tag += chunk;
631 	table->add(new_key, tag);
632     }
633 }
634 
635 /** Read the number of entries in the posting list.
636  *  This must only be called when *posptr is pointing to the start of
637  *  the first chunk of the posting list.
638  */
read_number_of_entries(const char ** posptr,const char * end,Xapian::doccount * number_of_entries_ptr,Xapian::termcount * collection_freq_ptr)639 void ChertPostList::read_number_of_entries(const char ** posptr,
640 				   const char * end,
641 				   Xapian::doccount * number_of_entries_ptr,
642 				   Xapian::termcount * collection_freq_ptr)
643 {
644     if (!unpack_uint(posptr, end, number_of_entries_ptr))
645 	report_read_error(*posptr);
646     if (!unpack_uint(posptr, end, collection_freq_ptr))
647 	report_read_error(*posptr);
648 }
649 
650 /** The format of a postlist is:
651  *
652  *  Split into chunks.  Key for first chunk is the termname (encoded as
653  *  length : name).  Key for subsequent chunks is the same, followed by the
654  *  document ID of the first document in the chunk (encoded as length of
655  *  representation in first byte, and then docid).
656  *
657  *  A chunk (except for the first chunk) contains:
658  *
659  *  1)  bool - true if this is the last chunk.
660  *  2)  difference between final docid in chunk and first docid.
661  *  3)  wdf for the first item.
662  *  4)  increment in docid to next item, followed by wdf for the item.
663  *  5)  (4) repeatedly.
664  *
665  *  The first chunk begins with the number of entries, the collection
666  *  frequency, then the docid of the first document, then has the header of a
667  *  standard chunk.
668  */
ChertPostList(intrusive_ptr<const ChertDatabase> this_db_,const string & term_,bool keep_reference)669 ChertPostList::ChertPostList(intrusive_ptr<const ChertDatabase> this_db_,
670 			     const string & term_,
671 			     bool keep_reference)
672 	: LeafPostList(term_),
673 	  this_db(keep_reference ? this_db_ : NULL),
674 	  have_started(false),
675 	  is_at_end(false),
676 	  cursor(this_db_->postlist_table.cursor_get())
677 {
678     LOGCALL_CTOR(DB, "ChertPostList", this_db_.get() | term_ | keep_reference);
679     string key = ChertPostListTable::make_key(term);
680     int found = cursor->find_entry(key);
681     if (!found) {
682 	LOGLINE(DB, "postlist for term not found");
683 	number_of_entries = 0;
684 	is_at_end = true;
685 	pos = 0;
686 	end = 0;
687 	first_did_in_chunk = 0;
688 	last_did_in_chunk = 0;
689 	return;
690     }
691     cursor->read_tag();
692     pos = cursor->current_tag.data();
693     end = pos + cursor->current_tag.size();
694 
695     did = read_start_of_first_chunk(&pos, end, &number_of_entries, NULL);
696     first_did_in_chunk = did;
697     last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk,
698 					    &is_last_chunk);
699     read_wdf(&pos, end, &wdf);
700     LOGLINE(DB, "Initial docid " << did);
701 }
702 
~ChertPostList()703 ChertPostList::~ChertPostList()
704 {
705     LOGCALL_DTOR(DB, "ChertPostList");
706 }
707 
708 Xapian::termcount
get_doclength() const709 ChertPostList::get_doclength() const
710 {
711     LOGCALL(DB, Xapian::termcount, "ChertPostList::get_doclength", NO_ARGS);
712     Assert(have_started);
713     Assert(!is_at_end);
714     Assert(this_db.get());
715     RETURN(this_db->get_doclength(did));
716 }
717 
718 Xapian::termcount
get_unique_terms() const719 ChertPostList::get_unique_terms() const
720 {
721     LOGCALL(DB, Xapian::termcount, "ChertPostList::get_unique_terms", NO_ARGS);
722     Assert(have_started);
723     Assert(!is_at_end);
724     Assert(this_db.get());
725     RETURN(this_db->get_unique_terms(did));
726 }
727 
728 bool
next_in_chunk()729 ChertPostList::next_in_chunk()
730 {
731     LOGCALL(DB, bool, "ChertPostList::next_in_chunk", NO_ARGS);
732     if (pos == end) RETURN(false);
733 
734     read_did_increase(&pos, end, &did);
735     read_wdf(&pos, end, &wdf);
736 
737     // Either not at last doc in chunk, or pos == end, but not both.
738     Assert(did <= last_did_in_chunk);
739     Assert(did < last_did_in_chunk || pos == end);
740     Assert(pos != end || did == last_did_in_chunk);
741 
742     RETURN(true);
743 }
744 
745 void
next_chunk()746 ChertPostList::next_chunk()
747 {
748     LOGCALL_VOID(DB, "ChertPostList::next_chunk", NO_ARGS);
749     if (is_last_chunk) {
750 	is_at_end = true;
751 	return;
752     }
753 
754     cursor->next();
755     if (cursor->after_end()) {
756 	is_at_end = true;
757 	throw Xapian::DatabaseCorruptError("Unexpected end of posting list for '" +
758 				     term + "'");
759     }
760     const char * keypos = cursor->current_key.data();
761     const char * keyend = keypos + cursor->current_key.size();
762     // Check we're still in same postlist
763     if (!check_tname_in_key_lite(&keypos, keyend, term)) {
764 	is_at_end = true;
765 	throw Xapian::DatabaseCorruptError("Unexpected end of posting list for '" +
766 				     term + "'");
767     }
768 
769     Xapian::docid newdid;
770     if (!C_unpack_uint_preserving_sort(&keypos, keyend, &newdid)) {
771 	report_read_error(keypos);
772     }
773     if (newdid <= did) {
774 	throw Xapian::DatabaseCorruptError("Document ID in new chunk of postlist (" +
775 		str(newdid) +
776 		") is not greater than final document ID in previous chunk (" +
777 		str(did) + ")");
778     }
779     did = newdid;
780 
781     cursor->read_tag();
782     pos = cursor->current_tag.data();
783     end = pos + cursor->current_tag.size();
784 
785     first_did_in_chunk = did;
786     last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk,
787 					    &is_last_chunk);
788     read_wdf(&pos, end, &wdf);
789 }
790 
791 PositionList *
read_position_list()792 ChertPostList::read_position_list()
793 {
794     LOGCALL(DB, PositionList *, "ChertPostList::read_position_list", NO_ARGS);
795     Assert(this_db.get());
796     positionlist.read_data(&this_db->position_table, did, term);
797     RETURN(&positionlist);
798 }
799 
800 PositionList *
open_position_list() const801 ChertPostList::open_position_list() const
802 {
803     LOGCALL(DB, PositionList *, "ChertPostList::open_position_list", NO_ARGS);
804     Assert(this_db.get());
805     RETURN(new ChertPositionList(&this_db->position_table, did, term));
806 }
807 
808 PostList *
next(double w_min)809 ChertPostList::next(double w_min)
810 {
811     LOGCALL(DB, PostList *, "ChertPostList::next", w_min);
812     (void)w_min; // no warning
813 
814     if (!have_started) {
815 	have_started = true;
816     } else {
817 	if (!next_in_chunk()) next_chunk();
818     }
819 
820     if (is_at_end) {
821 	LOGLINE(DB, "Moved to end");
822     } else {
823 	LOGLINE(DB, "Moved to docid " << did << ", wdf = " << wdf);
824     }
825 
826     RETURN(NULL);
827 }
828 
829 bool
current_chunk_contains(Xapian::docid desired_did)830 ChertPostList::current_chunk_contains(Xapian::docid desired_did)
831 {
832     LOGCALL(DB, bool, "ChertPostList::current_chunk_contains", desired_did);
833     if (desired_did >= first_did_in_chunk &&
834 	desired_did <= last_did_in_chunk) {
835 	RETURN(true);
836     }
837     RETURN(false);
838 }
839 
840 void
move_to_chunk_containing(Xapian::docid desired_did)841 ChertPostList::move_to_chunk_containing(Xapian::docid desired_did)
842 {
843     LOGCALL_VOID(DB, "ChertPostList::move_to_chunk_containing", desired_did);
844     (void)cursor->find_entry(ChertPostListTable::make_key(term, desired_did));
845     Assert(!cursor->after_end());
846 
847     const char * keypos = cursor->current_key.data();
848     const char * keyend = keypos + cursor->current_key.size();
849     // Check we're still in same postlist
850     if (!check_tname_in_key_lite(&keypos, keyend, term)) {
851 	// This should only happen if the postlist doesn't exist at all.
852 	is_at_end = true;
853 	is_last_chunk = true;
854 	return;
855     }
856     is_at_end = false;
857 
858     cursor->read_tag();
859     pos = cursor->current_tag.data();
860     end = pos + cursor->current_tag.size();
861 
862     if (keypos == keyend) {
863 	// In first chunk
864 #ifdef XAPIAN_ASSERTIONS
865 	Xapian::doccount old_number_of_entries = number_of_entries;
866 	did = read_start_of_first_chunk(&pos, end, &number_of_entries, NULL);
867 	Assert(old_number_of_entries == number_of_entries);
868 #else
869 	did = read_start_of_first_chunk(&pos, end, NULL, NULL);
870 #endif
871     } else {
872 	// In normal chunk
873 	if (!C_unpack_uint_preserving_sort(&keypos, keyend, &did)) {
874 	    report_read_error(keypos);
875 	}
876     }
877 
878     first_did_in_chunk = did;
879     last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk,
880 					    &is_last_chunk);
881     read_wdf(&pos, end, &wdf);
882 
883     // Possible, since desired_did might be after end of this chunk and before
884     // the next.
885     if (desired_did > last_did_in_chunk) next_chunk();
886 }
887 
888 bool
move_forward_in_chunk_to_at_least(Xapian::docid desired_did)889 ChertPostList::move_forward_in_chunk_to_at_least(Xapian::docid desired_did)
890 {
891     LOGCALL(DB, bool, "ChertPostList::move_forward_in_chunk_to_at_least", desired_did);
892     if (did >= desired_did)
893 	RETURN(true);
894 
895     if (desired_did <= last_did_in_chunk) {
896 	while (pos != end) {
897 	    read_did_increase(&pos, end, &did);
898 	    if (did >= desired_did) {
899 		read_wdf(&pos, end, &wdf);
900 		RETURN(true);
901 	    }
902 	    // It's faster to just skip over the wdf than to decode it.
903 	    read_wdf(&pos, end, NULL);
904 	}
905 
906 	// If we hit the end of the chunk then last_did_in_chunk must be wrong.
907 	Assert(false);
908     }
909 
910     pos = end;
911     RETURN(false);
912 }
913 
914 PostList *
skip_to(Xapian::docid desired_did,double w_min)915 ChertPostList::skip_to(Xapian::docid desired_did, double w_min)
916 {
917     LOGCALL(DB, PostList *, "ChertPostList::skip_to", desired_did | w_min);
918     (void)w_min; // no warning
919     // We've started now - if we hadn't already, we're already positioned
920     // at start so there's no need to actually do anything.
921     have_started = true;
922 
923     // Don't skip back, and don't need to do anything if already there.
924     if (is_at_end || desired_did <= did) RETURN(NULL);
925 
926     // Move to correct chunk
927     if (!current_chunk_contains(desired_did)) {
928 	move_to_chunk_containing(desired_did);
929 	// Might be at_end now, so we need to check before trying to move
930 	// forward in chunk.
931 	if (is_at_end) RETURN(NULL);
932     }
933 
934     // Move to correct position in chunk
935     bool have_document = move_forward_in_chunk_to_at_least(desired_did);
936     (void)have_document;
937     Assert(have_document);
938 
939     if (is_at_end) {
940 	LOGLINE(DB, "Skipped to end");
941     } else {
942 	LOGLINE(DB, "Skipped to docid " << did << ", wdf = " << wdf);
943     }
944 
945     RETURN(NULL);
946 }
947 
948 // Used for doclens.
949 bool
jump_to(Xapian::docid desired_did)950 ChertPostList::jump_to(Xapian::docid desired_did)
951 {
952     LOGCALL(DB, bool, "ChertPostList::jump_to", desired_did);
953     // We've started now - if we hadn't already, we're already positioned
954     // at start so there's no need to actually do anything.
955     have_started = true;
956 
957     // If the list is empty, give up right away.
958     if (pos == 0) RETURN(false);
959 
960     // Move to correct chunk, or reload the current chunk to go backwards in it
961     // (FIXME: perhaps handle the latter case more elegantly, though it won't
962     // happen during sequential access which is most common).
963     if (is_at_end || !current_chunk_contains(desired_did) || desired_did < did) {
964 	// Clear is_at_end flag since we can rewind.
965 	is_at_end = false;
966 
967 	move_to_chunk_containing(desired_did);
968 	// Might be at_end now, so we need to check before trying to move
969 	// forward in chunk.
970 	if (is_at_end) RETURN(false);
971     }
972 
973     // Move to correct position in chunk.
974     if (!move_forward_in_chunk_to_at_least(desired_did)) RETURN(false);
975     RETURN(desired_did == did);
976 }
977 
978 string
get_description() const979 ChertPostList::get_description() const
980 {
981     return term + ":" + str(number_of_entries);
982 }
983 
984 // Returns the last did to allow in this chunk.
985 Xapian::docid
get_chunk(const string & tname,Xapian::docid did,bool adding,PostlistChunkReader ** from,PostlistChunkWriter ** to)986 ChertPostListTable::get_chunk(const string &tname,
987 	  Xapian::docid did, bool adding,
988 	  PostlistChunkReader ** from, PostlistChunkWriter **to)
989 {
990     LOGCALL(DB, Xapian::docid, "ChertPostListTable::get_chunk", tname | did | adding | from | to);
991     // Get chunk containing entry
992     string key = make_key(tname, did);
993 
994     // Find the right chunk
995     AutoPtr<ChertCursor> cursor(cursor_get());
996 
997     (void)cursor->find_entry(key);
998     Assert(!cursor->after_end());
999 
1000     const char * keypos = cursor->current_key.data();
1001     const char * keyend = keypos + cursor->current_key.size();
1002 
1003     if (!check_tname_in_key(&keypos, keyend, tname)) {
1004 	// Postlist for this termname doesn't exist.
1005 	if (!adding)
1006 	    throw Xapian::DatabaseCorruptError("Attempted to delete or modify an entry in a non-existent posting list for " + tname);
1007 
1008 	*from = NULL;
1009 	*to = new PostlistChunkWriter(string(), true, tname, true);
1010 	RETURN(Xapian::docid(-1));
1011     }
1012 
1013     // See if we're appending - if so we can shortcut by just copying
1014     // the data part of the chunk wholesale.
1015     bool is_first_chunk = (keypos == keyend);
1016     LOGVALUE(DB, is_first_chunk);
1017 
1018     cursor->read_tag();
1019     const char * pos = cursor->current_tag.data();
1020     const char * end = pos + cursor->current_tag.size();
1021     Xapian::docid first_did_in_chunk;
1022     if (is_first_chunk) {
1023 	first_did_in_chunk = read_start_of_first_chunk(&pos, end, NULL, NULL);
1024     } else {
1025 	if (!C_unpack_uint_preserving_sort(&keypos, keyend, &first_did_in_chunk)) {
1026 	    report_read_error(keypos);
1027 	}
1028     }
1029 
1030     bool is_last_chunk;
1031     Xapian::docid last_did_in_chunk;
1032     last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk, &is_last_chunk);
1033     *to = new PostlistChunkWriter(cursor->current_key, is_first_chunk, tname,
1034 				  is_last_chunk);
1035     if (did > last_did_in_chunk) {
1036 	// This is the shortcut.  Not very pretty, but I'll leave refactoring
1037 	// until I've a clearer picture of everything which needs to be done.
1038 	// (FIXME)
1039 	*from = NULL;
1040 	(*to)->raw_append(first_did_in_chunk, last_did_in_chunk,
1041 			  string(pos, end));
1042     } else {
1043 	*from = new PostlistChunkReader(first_did_in_chunk, string(pos, end));
1044     }
1045     if (is_last_chunk) RETURN(Xapian::docid(-1));
1046 
1047     // Find first did of next tag.
1048     cursor->next();
1049     if (cursor->after_end()) {
1050 	throw Xapian::DatabaseCorruptError("Expected another key but found none");
1051     }
1052     const char *kpos = cursor->current_key.data();
1053     const char *kend = kpos + cursor->current_key.size();
1054     if (!check_tname_in_key(&kpos, kend, tname)) {
1055 	throw Xapian::DatabaseCorruptError("Expected another key with the same term name but found a different one");
1056     }
1057 
1058     // Read the new first docid
1059     Xapian::docid first_did_of_next_chunk;
1060     if (!C_unpack_uint_preserving_sort(&kpos, kend, &first_did_of_next_chunk)) {
1061 	report_read_error(kpos);
1062     }
1063     RETURN(first_did_of_next_chunk - 1);
1064 }
1065 
1066 void
merge_changes(const map<string,map<Xapian::docid,pair<char,Xapian::termcount>>> & mod_plists,const map<Xapian::docid,Xapian::termcount> & doclens,const map<string,pair<Xapian::termcount_diff,Xapian::termcount_diff>> & freq_deltas)1067 ChertPostListTable::merge_changes(
1068     const map<string, map<Xapian::docid, pair<char, Xapian::termcount> > > & mod_plists,
1069     const map<Xapian::docid, Xapian::termcount> & doclens,
1070     const map<string, pair<Xapian::termcount_diff, Xapian::termcount_diff> > & freq_deltas)
1071 {
1072     LOGCALL_VOID(DB, "ChertPostListTable::merge_changes", mod_plists | doclens | freq_deltas);
1073 
1074     // The cursor in the doclen_pl will no longer be valid, so reset it.
1075     doclen_pl.reset(0);
1076 
1077     LOGVALUE(DB, doclens.size());
1078     if (!doclens.empty()) {
1079 	// Ensure there's a first chunk.
1080 	string current_key = make_key(string());
1081 	if (!key_exists(current_key)) {
1082 	    LOGLINE(DB, "Adding dummy first chunk");
1083 	    string newtag = make_start_of_first_chunk(0, 0, 0);
1084 	    newtag += make_start_of_chunk(true, 0, 0);
1085 	    add(current_key, newtag);
1086 	}
1087 
1088 	map<Xapian::docid, Xapian::termcount>::const_iterator j;
1089 	j = doclens.begin();
1090 	Assert(j != doclens.end()); // This case is caught above.
1091 
1092 	Xapian::docid max_did;
1093 	PostlistChunkReader *from;
1094 	PostlistChunkWriter *to;
1095 	max_did = get_chunk(string(), j->first, true, &from, &to);
1096 	LOGVALUE(DB, max_did);
1097 	for ( ; j != doclens.end(); ++j) {
1098 	    Xapian::docid did = j->first;
1099 
1100 next_doclen_chunk:
1101 	    LOGLINE(DB, "Updating doclens, did=" << did);
1102 	    if (from) while (!from->is_at_end()) {
1103 		Xapian::docid copy_did = from->get_docid();
1104 		if (copy_did >= did) {
1105 		    if (copy_did == did) from->next();
1106 		    break;
1107 		}
1108 		to->append(this, copy_did, from->get_wdf());
1109 		from->next();
1110 	    }
1111 	    if ((!from || from->is_at_end()) && did > max_did) {
1112 		delete from;
1113 		to->flush(this);
1114 		delete to;
1115 		max_did = get_chunk(string(), did, false, &from, &to);
1116 		goto next_doclen_chunk;
1117 	    }
1118 
1119 	    Xapian::termcount new_doclen = j->second;
1120 	    if (new_doclen != static_cast<Xapian::termcount>(-1)) {
1121 		to->append(this, did, new_doclen);
1122 	    }
1123 	}
1124 
1125 	if (from) {
1126 	    while (!from->is_at_end()) {
1127 		to->append(this, from->get_docid(), from->get_wdf());
1128 		from->next();
1129 	    }
1130 	    delete from;
1131 	}
1132 	to->flush(this);
1133 	delete to;
1134     }
1135 
1136     map<string, map<Xapian::docid, pair<char, Xapian::termcount> > >::const_iterator i;
1137     for (i = mod_plists.begin(); i != mod_plists.end(); ++i) {
1138 	if (i->second.empty()) continue;
1139 	string tname = i->first;
1140 	{
1141 	    // Rewrite the first chunk of this posting list with the updated
1142 	    // termfreq and collfreq.
1143 	    map<string, pair<Xapian::termcount_diff, Xapian::termcount_diff> >::const_iterator deltas = freq_deltas.find(tname);
1144 	    Assert(deltas != freq_deltas.end());
1145 
1146 	    string current_key = make_key(tname);
1147 	    string tag;
1148 	    (void)get_exact_entry(current_key, tag);
1149 
1150 	    // Read start of first chunk to get termfreq and collfreq.
1151 	    const char *pos = tag.data();
1152 	    const char *end = pos + tag.size();
1153 	    Xapian::doccount termfreq;
1154 	    Xapian::termcount collfreq;
1155 	    Xapian::docid firstdid, lastdid;
1156 	    bool islast;
1157 	    if (pos == end) {
1158 		termfreq = 0;
1159 		collfreq = 0;
1160 		firstdid = 0;
1161 		lastdid = 0;
1162 		islast = true;
1163 	    } else {
1164 		firstdid = read_start_of_first_chunk(&pos, end,
1165 						     &termfreq, &collfreq);
1166 		// Handle the generic start of chunk header.
1167 		lastdid = read_start_of_chunk(&pos, end, firstdid, &islast);
1168 	    }
1169 
1170 	    termfreq += deltas->second.first;
1171 	    if (termfreq == 0) {
1172 		// All postings deleted!  So we can shortcut by zapping the
1173 		// posting list.
1174 		if (islast) {
1175 		    // Only one entry for this posting list.
1176 		    del(current_key);
1177 		    continue;
1178 		}
1179 		MutableChertCursor cursor(this);
1180 		bool found = cursor.find_entry(current_key);
1181 		Assert(found);
1182 		if (!found) continue; // Reduce damage!
1183 		while (cursor.del()) {
1184 		    const char *kpos = cursor.current_key.data();
1185 		    const char *kend = kpos + cursor.current_key.size();
1186 		    if (!check_tname_in_key_lite(&kpos, kend, tname)) break;
1187 		}
1188 		continue;
1189 	    }
1190 	    collfreq += deltas->second.second;
1191 
1192 	    // Rewrite start of first chunk to update termfreq and collfreq.
1193 	    string newhdr = make_start_of_first_chunk(termfreq, collfreq, firstdid);
1194 	    newhdr += make_start_of_chunk(islast, firstdid, lastdid);
1195 	    if (pos == end) {
1196 		add(current_key, newhdr);
1197 	    } else {
1198 		Assert((size_t)(pos - tag.data()) <= tag.size());
1199 		tag.replace(0, pos - tag.data(), newhdr);
1200 		add(current_key, tag);
1201 	    }
1202 	}
1203 	map<Xapian::docid, pair<char, Xapian::termcount> >::const_iterator j;
1204 	j = i->second.begin();
1205 	Assert(j != i->second.end()); // This case is caught above.
1206 
1207 	Xapian::docid max_did;
1208 	PostlistChunkReader *from;
1209 	PostlistChunkWriter *to;
1210 	max_did = get_chunk(tname, j->first, j->second.first == 'A',
1211 			    &from, &to);
1212 	for ( ; j != i->second.end(); ++j) {
1213 	    Xapian::docid did = j->first;
1214 
1215 next_chunk:
1216 	    LOGLINE(DB, "Updating tname=" << tname << ", did=" << did);
1217 	    if (from) while (!from->is_at_end()) {
1218 		Xapian::docid copy_did = from->get_docid();
1219 		if (copy_did >= did) {
1220 		    if (copy_did == did) {
1221 			Assert(j->second.first != 'A');
1222 			from->next();
1223 		    }
1224 		    break;
1225 		}
1226 		to->append(this, copy_did, from->get_wdf());
1227 		from->next();
1228 	    }
1229 	    if ((!from || from->is_at_end()) && did > max_did) {
1230 		delete from;
1231 		to->flush(this);
1232 		delete to;
1233 		max_did = get_chunk(tname, did, false, &from, &to);
1234 		goto next_chunk;
1235 	    }
1236 
1237 	    if (j->second.first != 'D') {
1238 		Xapian::termcount new_wdf = j->second.second;
1239 		to->append(this, did, new_wdf);
1240 	    }
1241 	}
1242 
1243 	if (from) {
1244 	    while (!from->is_at_end()) {
1245 		to->append(this, from->get_docid(), from->get_wdf());
1246 		from->next();
1247 	    }
1248 	    delete from;
1249 	}
1250 	to->flush(this);
1251 	delete to;
1252     }
1253 }
1254 
1255 void
get_used_docid_range(Xapian::docid & first,Xapian::docid & last) const1256 ChertPostListTable::get_used_docid_range(Xapian::docid & first,
1257 					 Xapian::docid & last) const
1258 {
1259     LOGCALL(DB, Xapian::docid, "ChertPostList::get_used_docid_range", "&first, &used");
1260     AutoPtr<ChertCursor> cur(cursor_get());
1261     if (!cur->find_entry(pack_chert_postlist_key(string()))) {
1262 	// Empty database.
1263 	first = last = 0;
1264 	return;
1265     }
1266 
1267     cur->read_tag();
1268     const char * p = cur->current_tag.data();
1269     const char * e = p + cur->current_tag.size();
1270 
1271     first = read_start_of_first_chunk(&p, e, NULL, NULL);
1272 
1273     (void)cur->find_entry(pack_glass_postlist_key(string(), CHERT_MAX_DOCID));
1274     Assert(!cur->after_end());
1275 
1276     const char * keypos = cur->current_key.data();
1277     const char * keyend = keypos + cur->current_key.size();
1278     // Check we're still in same postlist
1279     if (!check_tname_in_key_lite(&keypos, keyend, string())) {
1280 	// Shouldn't happen - we already handled the empty database case above.
1281 	Assert(false);
1282 	first = last = 0;
1283 	return;
1284     }
1285 
1286     cur->read_tag();
1287     p = cur->current_tag.data();
1288     e = p + cur->current_tag.size();
1289 
1290     Xapian::docid start_of_last_chunk;
1291     if (keypos == keyend) {
1292 	start_of_last_chunk = first;
1293 	first = read_start_of_first_chunk(&p, e, NULL, NULL);
1294     } else {
1295 	// In normal chunk
1296 	if (!C_unpack_uint_preserving_sort(&keypos, keyend,
1297 					 &start_of_last_chunk)) {
1298 	    report_read_error(keypos);
1299 	}
1300     }
1301 
1302     bool dummy;
1303     last = read_start_of_chunk(&p, e, start_of_last_chunk, &dummy);
1304 }
1305