1 /* chert_postlist.cc: Postlists in a chert database
2  *
3  * Copyright 1999,2000,2001 BrightStation PLC
4  * Copyright 2002,2003,2004,2005,2007,2008,2009,2011 Olly Betts
5  * Copyright 2007,2008,2009 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
20  * USA
21  */
22 
23 #include <config.h>
24 
25 #include "chert_postlist.h"
26 
27 #include "chert_cursor.h"
28 #include "chert_database.h"
29 #include "debuglog.h"
30 #include "noreturn.h"
31 #include "pack.h"
32 #include "str.h"
33 
34 Xapian::doccount
get_termfreq(const string & term) const35 ChertPostListTable::get_termfreq(const string & term) const
36 {
37     string key = make_key(term);
38     string tag;
39     if (!get_exact_entry(key, tag)) return 0;
40 
41     Xapian::doccount termfreq;
42     const char * p = tag.data();
43     ChertPostList::read_number_of_entries(&p, p + tag.size(), &termfreq, NULL);
44     return termfreq;
45 }
46 
47 Xapian::termcount
get_collection_freq(const string & term) const48 ChertPostListTable::get_collection_freq(const string & term) const
49 {
50     string key = make_key(term);
51     string tag;
52     if (!get_exact_entry(key, tag)) return 0;
53 
54     Xapian::termcount collfreq;
55     const char * p = tag.data();
56     ChertPostList::read_number_of_entries(&p, p + tag.size(), NULL, &collfreq);
57     return collfreq;
58 }
59 
60 Xapian::termcount
get_doclength(Xapian::docid did,Xapian::Internal::RefCntPtr<const ChertDatabase> db) const61 ChertPostListTable::get_doclength(Xapian::docid did,
62 				  Xapian::Internal::RefCntPtr<const ChertDatabase> db) const {
63     if (!doclen_pl.get()) {
64 	// Don't keep a reference back to the database, since this
65 	// would make a reference loop.
66 	doclen_pl.reset(new ChertPostList(db, string(), false));
67     }
68     if (!doclen_pl->jump_to(did))
69 	throw Xapian::DocNotFoundError("Document " + str(did) + " not found");
70     return doclen_pl->get_wdf();
71 }
72 
73 bool
document_exists(Xapian::docid did,Xapian::Internal::RefCntPtr<const ChertDatabase> db) const74 ChertPostListTable::document_exists(Xapian::docid did,
75 				    Xapian::Internal::RefCntPtr<const ChertDatabase> db) const
76 {
77     if (!doclen_pl.get()) {
78 	// Don't keep a reference back to the database, since this
79 	// would make a reference loop.
80 	doclen_pl.reset(new ChertPostList(db, string(), false));
81     }
82     return (doclen_pl->jump_to(did));
83 }
84 
85 // How big should chunks in the posting list be?  (They
86 // will grow slightly bigger than this, but not more than a
87 // few bytes extra) - FIXME: tune this value to try to
88 // maximise how well blocks are used.  Or performance.
89 // Or indexing speed.  Or something...
90 const unsigned int CHUNKSIZE = 2000;
91 
92 /** PostlistChunkWriter is a wrapper which acts roughly as an
93  *  output iterator on a postlist chunk, taking care of the
94  *  messy details.  It's intended to be used with deletion and
95  *  replacing of entries, not for adding to the end, when it's
96  *  not really needed.
97  */
98 class Chert::PostlistChunkWriter {
99     public:
100 	PostlistChunkWriter(const string &orig_key_,
101 			    bool is_first_chunk_,
102 			    const string &tname_,
103 			    bool is_last_chunk_);
104 
105 	/// Append an entry to this chunk.
106 	void append(ChertTable * table, Xapian::docid did,
107 		    Xapian::termcount wdf);
108 
109 	/// Append a block of raw entries to this chunk.
raw_append(Xapian::docid first_did_,Xapian::docid current_did_,const string & s)110 	void raw_append(Xapian::docid first_did_, Xapian::docid current_did_,
111 			const string & s) {
112 	    Assert(!started);
113 	    first_did = first_did_;
114 	    current_did = current_did_;
115 	    if (!s.empty()) {
116 		chunk.append(s);
117 		started = true;
118 	    }
119 	}
120 
121 	/** Flush the chunk to the buffered table.  Note: this may write it
122 	 *  with a different key to the original one, if for example the first
123 	 *  entry has changed.
124 	 */
125 	void flush(ChertTable *table);
126 
127     private:
128 	string orig_key;
129 	string tname;
130 	bool is_first_chunk;
131 	bool is_last_chunk;
132 	bool started;
133 
134 	Xapian::docid first_did;
135 	Xapian::docid current_did;
136 
137 	string chunk;
138 };
139 
140 using Chert::PostlistChunkWriter;
141 
142 // Static functions
143 
144 /// Report an error when reading the posting list.
145 XAPIAN_NORETURN(static void report_read_error(const char * position));
report_read_error(const char * position)146 static void report_read_error(const char * position)
147 {
148     if (position == 0) {
149 	// data ran out
150 	LOGLINE(DB, "ChertPostList data ran out");
151 	throw Xapian::DatabaseCorruptError("Data ran out unexpectedly when reading posting list.");
152     }
153     // overflow
154     LOGLINE(DB, "ChertPostList value too large");
155     throw Xapian::RangeError("Value in posting list too large.");
156 }
157 
get_tname_from_key(const char ** src,const char * end,string & tname)158 static inline bool get_tname_from_key(const char **src, const char *end,
159 			       string &tname)
160 {
161     return unpack_string_preserving_sort(src, end, tname);
162 }
163 
164 static inline bool
check_tname_in_key_lite(const char ** keypos,const char * keyend,const string & tname)165 check_tname_in_key_lite(const char **keypos, const char *keyend, const string &tname)
166 {
167     string tname_in_key;
168 
169     if (keyend - *keypos >= 2 && (*keypos)[0] == '\0' && (*keypos)[1] == '\xe0') {
170 	*keypos += 2;
171     } else {
172 	// Read the termname.
173 	if (!get_tname_from_key(keypos, keyend, tname_in_key))
174 	    report_read_error(*keypos);
175     }
176 
177     // This should only fail if the postlist doesn't exist at all.
178     return tname_in_key == tname;
179 }
180 
181 static inline bool
check_tname_in_key(const char ** keypos,const char * keyend,const string & tname)182 check_tname_in_key(const char **keypos, const char *keyend, const string &tname)
183 {
184     if (*keypos == keyend) return false;
185 
186     return check_tname_in_key_lite(keypos, keyend, tname);
187 }
188 
189 /// Read the start of the first chunk in the posting list.
190 static Xapian::docid
read_start_of_first_chunk(const char ** posptr,const char * end,Xapian::doccount * number_of_entries_ptr,Xapian::termcount * collection_freq_ptr)191 read_start_of_first_chunk(const char ** posptr,
192 			  const char * end,
193 			  Xapian::doccount * number_of_entries_ptr,
194 			  Xapian::termcount * collection_freq_ptr)
195 {
196     LOGCALL_STATIC(DB, Xapian::docid, "read_start_of_first_chunk", (const void *)posptr | (const void *)end | (void *)number_of_entries_ptr | (void *)collection_freq_ptr);
197 
198     ChertPostList::read_number_of_entries(posptr, end,
199 			   number_of_entries_ptr, collection_freq_ptr);
200     if (number_of_entries_ptr)
201 	LOGVALUE(DB, *number_of_entries_ptr);
202     if (collection_freq_ptr)
203 	LOGVALUE(DB, *collection_freq_ptr);
204 
205     Xapian::docid did;
206     // Read the docid of the first entry in the posting list.
207     if (!unpack_uint(posptr, end, &did))
208 	report_read_error(*posptr);
209     ++did;
210     LOGVALUE(DB, did);
211     RETURN(did);
212 }
213 
214 static inline void
read_did_increase(const char ** posptr,const char * end,Xapian::docid * did_ptr)215 read_did_increase(const char ** posptr, const char * end,
216 		  Xapian::docid * did_ptr)
217 {
218     Xapian::docid did_increase;
219     if (!unpack_uint(posptr, end, &did_increase)) report_read_error(*posptr);
220     *did_ptr += did_increase + 1;
221 }
222 
223 /// Read the wdf for an entry.
224 static inline void
read_wdf(const char ** posptr,const char * end,Xapian::termcount * wdf_ptr)225 read_wdf(const char ** posptr, const char * end, Xapian::termcount * wdf_ptr)
226 {
227     if (!unpack_uint(posptr, end, wdf_ptr)) report_read_error(*posptr);
228 }
229 
230 /// Read the start of a chunk.
231 static Xapian::docid
read_start_of_chunk(const char ** posptr,const char * end,Xapian::docid first_did_in_chunk,bool * is_last_chunk_ptr)232 read_start_of_chunk(const char ** posptr,
233 		    const char * end,
234 		    Xapian::docid first_did_in_chunk,
235 		    bool * is_last_chunk_ptr)
236 {
237     LOGCALL_STATIC(DB, Xapian::docid, "read_start_of_chunk", reinterpret_cast<const void*>(posptr) | reinterpret_cast<const void*>(end) | first_did_in_chunk | reinterpret_cast<const void*>(is_last_chunk_ptr));
238     Assert(is_last_chunk_ptr);
239 
240     // Read whether this is the last chunk
241     if (!unpack_bool(posptr, end, is_last_chunk_ptr))
242 	report_read_error(*posptr);
243     LOGVALUE(DB, *is_last_chunk_ptr);
244 
245     // Read what the final document ID in this chunk is.
246     Xapian::docid increase_to_last;
247     if (!unpack_uint(posptr, end, &increase_to_last))
248 	report_read_error(*posptr);
249     Xapian::docid last_did_in_chunk = first_did_in_chunk + increase_to_last;
250     LOGVALUE(DB, last_did_in_chunk);
251     RETURN(last_did_in_chunk);
252 }
253 
254 /** PostlistChunkReader is essentially an iterator wrapper
255  *  around a postlist chunk.  It simply iterates through the
256  *  entries in a postlist.
257  */
258 class Chert::PostlistChunkReader {
259     string data;
260 
261     const char *pos;
262     const char *end;
263 
264     bool at_end;
265 
266     Xapian::docid did;
267     Xapian::termcount wdf;
268 
269   public:
270     /** Initialise the postlist chunk reader.
271      *
272      *  @param first_did  First document id in this chunk.
273      *  @param data       The tag string with the header removed.
274      */
PostlistChunkReader(Xapian::docid first_did,const string & data_)275     PostlistChunkReader(Xapian::docid first_did, const string & data_)
276 	: data(data_), pos(data.data()), end(pos + data.length()), at_end(data.empty()), did(first_did)
277     {
278 	if (!at_end) read_wdf(&pos, end, &wdf);
279     }
280 
get_docid() const281     Xapian::docid get_docid() const {
282 	return did;
283     }
get_wdf() const284     Xapian::termcount get_wdf() const {
285 	return wdf;
286     }
287 
is_at_end() const288     bool is_at_end() const {
289 	return at_end;
290     }
291 
292     /** Advance to the next entry.  Set at_end if we run off the end.
293      */
294     void next();
295 };
296 
297 using Chert::PostlistChunkReader;
298 
299 void
next()300 PostlistChunkReader::next()
301 {
302     if (pos == end) {
303 	at_end = true;
304     } else {
305 	read_did_increase(&pos, end, &did);
306 	read_wdf(&pos, end, &wdf);
307     }
308 }
309 
PostlistChunkWriter(const string & orig_key_,bool is_first_chunk_,const string & tname_,bool is_last_chunk_)310 PostlistChunkWriter::PostlistChunkWriter(const string &orig_key_,
311 					 bool is_first_chunk_,
312 					 const string &tname_,
313 					 bool is_last_chunk_)
314 	: orig_key(orig_key_),
315 	  tname(tname_), is_first_chunk(is_first_chunk_),
316 	  is_last_chunk(is_last_chunk_),
317 	  started(false)
318 {
319     LOGCALL_VOID(DB, "PostlistChunkWriter::PostlistChunkWriter", orig_key_ | is_first_chunk_ | tname_ | is_last_chunk_);
320 }
321 
322 void
append(ChertTable * table,Xapian::docid did,Xapian::termcount wdf)323 PostlistChunkWriter::append(ChertTable * table, Xapian::docid did,
324 			    Xapian::termcount wdf)
325 {
326     if (!started) {
327 	started = true;
328 	first_did = did;
329     } else {
330 	Assert(did > current_did);
331 	// Start a new chunk if this one has grown to the threshold.
332 	if (chunk.size() >= CHUNKSIZE) {
333 	    bool save_is_last_chunk = is_last_chunk;
334 	    is_last_chunk = false;
335 	    flush(table);
336 	    is_last_chunk = save_is_last_chunk;
337 	    is_first_chunk = false;
338 	    first_did = did;
339 	    chunk.resize(0);
340 	    orig_key = ChertPostListTable::make_key(tname, first_did);
341 	} else {
342 	    pack_uint(chunk, did - current_did - 1);
343 	}
344     }
345     current_did = did;
346     pack_uint(chunk, wdf);
347 }
348 
349 /** Make the data to go at the start of the very first chunk.
350  */
351 static inline string
make_start_of_first_chunk(Xapian::doccount entries,Xapian::termcount collectionfreq,Xapian::docid new_did)352 make_start_of_first_chunk(Xapian::doccount entries,
353 			  Xapian::termcount collectionfreq,
354 			  Xapian::docid new_did)
355 {
356     string chunk;
357     pack_uint(chunk, entries);
358     pack_uint(chunk, collectionfreq);
359     pack_uint(chunk, new_did - 1);
360     return chunk;
361 }
362 
363 /** Make the data to go at the start of a standard chunk.
364  */
365 static inline string
make_start_of_chunk(bool new_is_last_chunk,Xapian::docid new_first_did,Xapian::docid new_final_did)366 make_start_of_chunk(bool new_is_last_chunk,
367 		    Xapian::docid new_first_did,
368 		    Xapian::docid new_final_did)
369 {
370     Assert(new_final_did >= new_first_did);
371     string chunk;
372     pack_bool(chunk, new_is_last_chunk);
373     pack_uint(chunk, new_final_did - new_first_did);
374     return chunk;
375 }
376 
377 static void
write_start_of_chunk(string & chunk,unsigned int start_of_chunk_header,unsigned int end_of_chunk_header,bool is_last_chunk,Xapian::docid first_did_in_chunk,Xapian::docid last_did_in_chunk)378 write_start_of_chunk(string & chunk,
379 		     unsigned int start_of_chunk_header,
380 		     unsigned int end_of_chunk_header,
381 		     bool is_last_chunk,
382 		     Xapian::docid first_did_in_chunk,
383 		     Xapian::docid last_did_in_chunk)
384 {
385     Assert((size_t)(end_of_chunk_header - start_of_chunk_header) <= chunk.size());
386 
387     chunk.replace(start_of_chunk_header,
388 		  end_of_chunk_header - start_of_chunk_header,
389 		  make_start_of_chunk(is_last_chunk, first_did_in_chunk,
390 				      last_did_in_chunk));
391 }
392 
393 void
flush(ChertTable * table)394 PostlistChunkWriter::flush(ChertTable *table)
395 {
396     LOGCALL_VOID(DB, "PostlistChunkWriter::flush", table);
397 
398     /* This is one of the more messy parts involved with updating posting
399      * list chunks.
400      *
401      * Depending on circumstances, we may have to delete an entire chunk
402      * or file it under a different key, as well as possibly modifying both
403      * the previous and next chunk of the postlist.
404      */
405 
406     if (!started) {
407 	/* This chunk is now empty so disappears entirely.
408 	 *
409 	 * If this was the last chunk, then the previous chunk
410 	 * must have its "is_last_chunk" flag updated.
411 	 *
412 	 * If this was the first chunk, then the next chunk must
413 	 * be transformed into the first chunk.  Messy!
414 	 */
415 	LOGLINE(DB, "PostlistChunkWriter::flush(): deleting chunk");
416 	Assert(!orig_key.empty());
417 	if (is_first_chunk) {
418 	    LOGLINE(DB, "PostlistChunkWriter::flush(): deleting first chunk");
419 	    if (is_last_chunk) {
420 		/* This is the first and the last chunk, ie the only
421 		 * chunk, so just delete the tag.
422 		 */
423 		table->del(orig_key);
424 		return;
425 	    }
426 
427 	    /* This is the messiest case.  The first chunk is to
428 	     * be removed, and there is at least one chunk after
429 	     * it.  Need to rewrite the next chunk as the first
430 	     * chunk.
431 	     */
432 	    AutoPtr<ChertCursor> cursor(table->cursor_get());
433 
434 	    if (!cursor->find_entry(orig_key)) {
435 		throw Xapian::DatabaseCorruptError("The key we're working on has disappeared");
436 	    }
437 
438 	    // FIXME: Currently the doclen list has a special first chunk too,
439 	    // which reduces special casing here.  The downside is a slightly
440 	    // larger than necessary first chunk and needless fiddling if the
441 	    // first chunk is deleted.  But really we should look at
442 	    // redesigning the whole postlist format with an eye to making it
443 	    // easier to update!
444 
445 	    // Extract existing counts from the first chunk so we can reinsert
446 	    // them into the block we're renaming.
447 	    Xapian::doccount num_ent;
448 	    Xapian::termcount coll_freq;
449 	    {
450 		cursor->read_tag();
451 		const char *tagpos = cursor->current_tag.data();
452 		const char *tagend = tagpos + cursor->current_tag.size();
453 
454 		(void)read_start_of_first_chunk(&tagpos, tagend,
455 						&num_ent, &coll_freq);
456 	    }
457 
458 	    // Seek to the next chunk.
459 	    cursor->next();
460 	    if (cursor->after_end()) {
461 		throw Xapian::DatabaseCorruptError("Expected another key but found none");
462 	    }
463 	    const char *kpos = cursor->current_key.data();
464 	    const char *kend = kpos + cursor->current_key.size();
465 	    if (!check_tname_in_key(&kpos, kend, tname)) {
466 		throw Xapian::DatabaseCorruptError("Expected another key with the same term name but found a different one");
467 	    }
468 
469 	    // Read the new first docid
470 	    Xapian::docid new_first_did;
471 	    if (!unpack_uint_preserving_sort(&kpos, kend, &new_first_did)) {
472 		report_read_error(kpos);
473 	    }
474 
475 	    cursor->read_tag();
476 	    const char *tagpos = cursor->current_tag.data();
477 	    const char *tagend = tagpos + cursor->current_tag.size();
478 
479 	    // Read the chunk header
480 	    bool new_is_last_chunk;
481 	    Xapian::docid new_last_did_in_chunk =
482 		read_start_of_chunk(&tagpos, tagend, new_first_did,
483 				    &new_is_last_chunk);
484 
485 	    string chunk_data(tagpos, tagend);
486 
487 	    // First remove the renamed tag
488 	    table->del(cursor->current_key);
489 
490 	    // And now write it as the first chunk
491 	    string tag;
492 	    tag = make_start_of_first_chunk(num_ent, coll_freq, new_first_did);
493 	    tag += make_start_of_chunk(new_is_last_chunk,
494 					      new_first_did,
495 					      new_last_did_in_chunk);
496 	    tag += chunk_data;
497 	    table->add(orig_key, tag);
498 	    return;
499 	}
500 
501 	LOGLINE(DB, "PostlistChunkWriter::flush(): deleting secondary chunk");
502 	/* This isn't the first chunk.  Check whether we're the last chunk. */
503 
504 	// Delete this chunk
505 	table->del(orig_key);
506 
507 	if (is_last_chunk) {
508 	    LOGLINE(DB, "PostlistChunkWriter::flush(): deleting secondary last chunk");
509 	    // Update the previous chunk's is_last_chunk flag.
510 	    AutoPtr<ChertCursor> cursor(table->cursor_get());
511 
512 	    /* Should not find the key we just deleted, but should
513 	     * find the previous chunk. */
514 	    if (cursor->find_entry(orig_key)) {
515 		throw Xapian::DatabaseCorruptError("Chert key not deleted as we expected");
516 	    }
517 	    // Make sure this is a chunk with the right term attached.
518 	    const char * keypos = cursor->current_key.data();
519 	    const char * keyend = keypos + cursor->current_key.size();
520 	    if (!check_tname_in_key(&keypos, keyend, tname)) {
521 		throw Xapian::DatabaseCorruptError("Couldn't find chunk before delete chunk");
522 	    }
523 
524 	    bool is_prev_first_chunk = (keypos == keyend);
525 
526 	    // Now update the last_chunk
527 	    cursor->read_tag();
528 	    string tag = cursor->current_tag;
529 
530 	    const char *tagpos = tag.data();
531 	    const char *tagend = tagpos + tag.size();
532 
533 	    // Skip first chunk header
534 	    Xapian::docid first_did_in_chunk;
535 	    if (is_prev_first_chunk) {
536 		first_did_in_chunk = read_start_of_first_chunk(&tagpos, tagend,
537 							       0, 0);
538 	    } else {
539 		if (!unpack_uint_preserving_sort(&keypos, keyend, &first_did_in_chunk))
540 		    report_read_error(keypos);
541 	    }
542 	    bool wrong_is_last_chunk;
543 	    string::size_type start_of_chunk_header = tagpos - tag.data();
544 	    Xapian::docid last_did_in_chunk =
545 		read_start_of_chunk(&tagpos, tagend, first_did_in_chunk,
546 				    &wrong_is_last_chunk);
547 	    string::size_type end_of_chunk_header = tagpos - tag.data();
548 
549 	    // write new is_last flag
550 	    write_start_of_chunk(tag,
551 				 start_of_chunk_header,
552 				 end_of_chunk_header,
553 				 true, // is_last_chunk
554 				 first_did_in_chunk,
555 				 last_did_in_chunk);
556 	    table->add(cursor->current_key, tag);
557 	}
558     } else {
559 	LOGLINE(DB, "PostlistChunkWriter::flush(): updating chunk which still has items in it");
560 	/* The chunk still has some items in it.  Two major subcases:
561 	 * a) This is the first chunk.
562 	 * b) This isn't the first chunk.
563 	 *
564 	 * The subcases just affect the chunk header.
565 	 */
566 	string tag;
567 
568 	/* First write the header, which depends on whether this is the
569 	 * first chunk.
570 	 */
571 	if (is_first_chunk) {
572 	    /* The first chunk.  This is the relatively easy case,
573 	     * and we just have to write this one back to disk.
574 	     */
575 	    LOGLINE(DB, "PostlistChunkWriter::flush(): rewriting the first chunk, which still has items in it");
576 	    string key = ChertPostListTable::make_key(tname);
577 	    bool ok = table->get_exact_entry(key, tag);
578 	    (void)ok;
579 	    Assert(ok);
580 	    Assert(!tag.empty());
581 
582 	    Xapian::doccount num_ent;
583 	    Xapian::termcount coll_freq;
584 	    {
585 		const char * tagpos = tag.data();
586 		const char * tagend = tagpos + tag.size();
587 		(void)read_start_of_first_chunk(&tagpos, tagend,
588 						&num_ent, &coll_freq);
589 	    }
590 
591 	    tag = make_start_of_first_chunk(num_ent, coll_freq, first_did);
592 
593 	    tag += make_start_of_chunk(is_last_chunk, first_did, current_did);
594 	    tag += chunk;
595 	    table->add(key, tag);
596 	    return;
597 	}
598 
599 	LOGLINE(DB, "PostlistChunkWriter::flush(): updating secondary chunk which still has items in it");
600 	/* Not the first chunk.
601 	 *
602 	 * This has the easy sub-sub-case:
603 	 *   The first entry in the chunk hasn't changed
604 	 * ...and the hard sub-sub-case:
605 	 *   The first entry in the chunk has changed.  This is
606 	 *   harder because the key for the chunk changes, so
607 	 *   we've got to do a switch.
608 	 */
609 
610 	// First find out the initial docid
611 	const char *keypos = orig_key.data();
612 	const char *keyend = keypos + orig_key.size();
613 	if (!check_tname_in_key(&keypos, keyend, tname)) {
614 	    throw Xapian::DatabaseCorruptError("Have invalid key writing to postlist");
615 	}
616 	Xapian::docid initial_did;
617 	if (!unpack_uint_preserving_sort(&keypos, keyend, &initial_did)) {
618 	    report_read_error(keypos);
619 	}
620 	string new_key;
621 	if (initial_did != first_did) {
622 	    /* The fiddlier case:
623 	     * Create a new tag with the correct key, and replace
624 	     * the old one.
625 	     */
626 	    new_key = ChertPostListTable::make_key(tname, first_did);
627 	    table->del(orig_key);
628 	} else {
629 	    new_key = orig_key;
630 	}
631 
632 	// ...and write the start of this chunk.
633 	tag = make_start_of_chunk(is_last_chunk, first_did, current_did);
634 
635 	tag += chunk;
636 	table->add(new_key, tag);
637     }
638 }
639 
640 /** Read the number of entries in the posting list.
641  *  This must only be called when *posptr is pointing to the start of
642  *  the first chunk of the posting list.
643  */
read_number_of_entries(const char ** posptr,const char * end,Xapian::doccount * number_of_entries_ptr,Xapian::termcount * collection_freq_ptr)644 void ChertPostList::read_number_of_entries(const char ** posptr,
645 				   const char * end,
646 				   Xapian::doccount * number_of_entries_ptr,
647 				   Xapian::termcount * collection_freq_ptr)
648 {
649     if (!unpack_uint(posptr, end, number_of_entries_ptr))
650 	report_read_error(*posptr);
651     if (!unpack_uint(posptr, end, collection_freq_ptr))
652 	report_read_error(*posptr);
653 }
654 
655 /** The format of a postlist is:
656  *
657  *  Split into chunks.  Key for first chunk is the termname (encoded as
658  *  length : name).  Key for subsequent chunks is the same, followed by the
659  *  document ID of the first document in the chunk (encoded as length of
660  *  representation in first byte, and then docid).
661  *
662  *  A chunk (except for the first chunk) contains:
663  *
664  *  1)  bool - true if this is the last chunk.
665  *  2)  difference between final docid in chunk and first docid.
666  *  3)  wdf for the first item.
667  *  4)  increment in docid to next item, followed by wdf for the item.
668  *  5)  (4) repeatedly.
669  *
670  *  The first chunk begins with the number of entries, the collection
671  *  frequency, then the docid of the first document, then has the header of a
672  *  standard chunk.
673  */
ChertPostList(Xapian::Internal::RefCntPtr<const ChertDatabase> this_db_,const string & term_,bool keep_reference)674 ChertPostList::ChertPostList(Xapian::Internal::RefCntPtr<const ChertDatabase> this_db_,
675 			     const string & term_,
676 			     bool keep_reference)
677 	: LeafPostList(term_),
678 	  this_db(keep_reference ? this_db_ : NULL),
679 	  have_started(false),
680 	  is_at_end(false),
681 	  cursor(this_db_->postlist_table.cursor_get())
682 {
683     LOGCALL_VOID(DB, "ChertPostList::ChertPostList", this_db_.get() | term_ | keep_reference);
684     string key = ChertPostListTable::make_key(term);
685     int found = cursor->find_entry(key);
686     if (!found) {
687 	LOGLINE(DB, "postlist for term not found");
688 	number_of_entries = 0;
689 	is_at_end = true;
690 	pos = 0;
691 	end = 0;
692 	first_did_in_chunk = 0;
693 	last_did_in_chunk = 0;
694 	return;
695     }
696     cursor->read_tag();
697     pos = cursor->current_tag.data();
698     end = pos + cursor->current_tag.size();
699 
700     did = read_start_of_first_chunk(&pos, end, &number_of_entries, NULL);
701     first_did_in_chunk = did;
702     last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk,
703 					    &is_last_chunk);
704     read_wdf(&pos, end, &wdf);
705     LOGLINE(DB, "Initial docid " << did);
706 }
707 
~ChertPostList()708 ChertPostList::~ChertPostList()
709 {
710     LOGCALL_VOID(DB, "ChertPostList::~ChertPostList", NO_ARGS);
711 }
712 
713 Xapian::termcount
get_doclength() const714 ChertPostList::get_doclength() const
715 {
716     LOGCALL(DB, Xapian::termcount, "ChertPostList::get_doclength", NO_ARGS);
717     Assert(have_started);
718     Assert(this_db.get());
719     RETURN(this_db->get_doclength(did));
720 }
721 
722 bool
next_in_chunk()723 ChertPostList::next_in_chunk()
724 {
725     LOGCALL(DB, bool, "ChertPostList::next_in_chunk", NO_ARGS);
726     if (pos == end) RETURN(false);
727 
728     read_did_increase(&pos, end, &did);
729     read_wdf(&pos, end, &wdf);
730 
731     // Either not at last doc in chunk, or pos == end, but not both.
732     Assert(did <= last_did_in_chunk);
733     Assert(did < last_did_in_chunk || pos == end);
734     Assert(pos != end || did == last_did_in_chunk);
735 
736     RETURN(true);
737 }
738 
739 void
next_chunk()740 ChertPostList::next_chunk()
741 {
742     LOGCALL_VOID(DB, "ChertPostList::next_chunk", NO_ARGS);
743     if (is_last_chunk) {
744 	is_at_end = true;
745 	return;
746     }
747 
748     cursor->next();
749     if (cursor->after_end()) {
750 	is_at_end = true;
751 	throw Xapian::DatabaseCorruptError("Unexpected end of posting list for `" +
752 				     term + "'");
753     }
754     const char * keypos = cursor->current_key.data();
755     const char * keyend = keypos + cursor->current_key.size();
756     // Check we're still in same postlist
757     if (!check_tname_in_key_lite(&keypos, keyend, term)) {
758 	is_at_end = true;
759 	throw Xapian::DatabaseCorruptError("Unexpected end of posting list for `" +
760 				     term + "'");
761     }
762 
763     Xapian::docid newdid;
764     if (!unpack_uint_preserving_sort(&keypos, keyend, &newdid)) {
765 	report_read_error(keypos);
766     }
767     if (newdid <= did) {
768 	throw Xapian::DatabaseCorruptError("Document ID in new chunk of postlist (" +
769 		str(newdid) +
770 		") is not greater than final document ID in previous chunk (" +
771 		str(did) + ")");
772     }
773     did = newdid;
774 
775     cursor->read_tag();
776     pos = cursor->current_tag.data();
777     end = pos + cursor->current_tag.size();
778 
779     first_did_in_chunk = did;
780     last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk,
781 					    &is_last_chunk);
782     read_wdf(&pos, end, &wdf);
783 }
784 
785 PositionList *
read_position_list()786 ChertPostList::read_position_list()
787 {
788     LOGCALL(DB, PositionList *, "ChertPostList::read_position_list", NO_ARGS);
789     Assert(this_db.get());
790     positionlist.read_data(&this_db->position_table, did, term);
791     RETURN(&positionlist);
792 }
793 
794 PositionList *
open_position_list() const795 ChertPostList::open_position_list() const
796 {
797     LOGCALL(DB, PositionList *, "ChertPostList::open_position_list", NO_ARGS);
798     Assert(this_db.get());
799     RETURN(new ChertPositionList(&this_db->position_table, did, term));
800 }
801 
802 PostList *
next(Xapian::weight w_min)803 ChertPostList::next(Xapian::weight w_min)
804 {
805     LOGCALL(DB, PostList *, "ChertPostList::next", w_min);
806     (void)w_min; // no warning
807 
808     if (!have_started) {
809 	have_started = true;
810     } else {
811 	if (!next_in_chunk()) next_chunk();
812     }
813 
814     if (is_at_end) {
815 	LOGLINE(DB, "Moved to end");
816     } else {
817 	LOGLINE(DB, "Moved to docid " << did << ", wdf = " << wdf);
818     }
819 
820     RETURN(NULL);
821 }
822 
823 bool
current_chunk_contains(Xapian::docid desired_did)824 ChertPostList::current_chunk_contains(Xapian::docid desired_did)
825 {
826     LOGCALL(DB, bool, "ChertPostList::current_chunk_contains", desired_did);
827     if (desired_did >= first_did_in_chunk &&
828 	desired_did <= last_did_in_chunk) {
829 	RETURN(true);
830     }
831     RETURN(false);
832 }
833 
834 void
move_to_chunk_containing(Xapian::docid desired_did)835 ChertPostList::move_to_chunk_containing(Xapian::docid desired_did)
836 {
837     LOGCALL_VOID(DB, "ChertPostList::move_to_chunk_containing", desired_did);
838     (void)cursor->find_entry(ChertPostListTable::make_key(term, desired_did));
839     Assert(!cursor->after_end());
840 
841     const char * keypos = cursor->current_key.data();
842     const char * keyend = keypos + cursor->current_key.size();
843     // Check we're still in same postlist
844     if (!check_tname_in_key_lite(&keypos, keyend, term)) {
845 	// This should only happen if the postlist doesn't exist at all.
846 	is_at_end = true;
847 	is_last_chunk = true;
848 	return;
849     }
850     is_at_end = false;
851 
852     cursor->read_tag();
853     pos = cursor->current_tag.data();
854     end = pos + cursor->current_tag.size();
855 
856     if (keypos == keyend) {
857 	// In first chunk
858 #ifdef XAPIAN_ASSERTIONS
859 	Xapian::doccount old_number_of_entries = number_of_entries;
860 	did = read_start_of_first_chunk(&pos, end, &number_of_entries, NULL);
861 	Assert(old_number_of_entries == number_of_entries);
862 #else
863 	did = read_start_of_first_chunk(&pos, end, NULL, NULL);
864 #endif
865     } else {
866 	// In normal chunk
867 	if (!unpack_uint_preserving_sort(&keypos, keyend, &did)) {
868 	    report_read_error(keypos);
869 	}
870     }
871 
872     first_did_in_chunk = did;
873     last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk,
874 					    &is_last_chunk);
875     read_wdf(&pos, end, &wdf);
876 
877     // Possible, since desired_did might be after end of this chunk and before
878     // the next.
879     if (desired_did > last_did_in_chunk) next_chunk();
880 }
881 
882 bool
move_forward_in_chunk_to_at_least(Xapian::docid desired_did)883 ChertPostList::move_forward_in_chunk_to_at_least(Xapian::docid desired_did)
884 {
885     LOGCALL(DB, bool, "ChertPostList::move_forward_in_chunk_to_at_least", desired_did);
886     if (did >= desired_did)
887 	RETURN(true);
888 
889     if (desired_did <= last_did_in_chunk) {
890 	while (pos != end) {
891 	    read_did_increase(&pos, end, &did);
892 	    if (did >= desired_did) {
893 		read_wdf(&pos, end, &wdf);
894 		RETURN(true);
895 	    }
896 	    // It's faster to just skip over the wdf than to decode it.
897 	    read_wdf(&pos, end, NULL);
898 	}
899 
900 	// If we hit the end of the chunk then last_did_in_chunk must be wrong.
901 	Assert(false);
902     }
903 
904     pos = end;
905     RETURN(false);
906 }
907 
908 PostList *
skip_to(Xapian::docid desired_did,Xapian::weight w_min)909 ChertPostList::skip_to(Xapian::docid desired_did, Xapian::weight w_min)
910 {
911     LOGCALL(DB, PostList *, "ChertPostList::skip_to", desired_did | w_min);
912     (void)w_min; // no warning
913     // We've started now - if we hadn't already, we're already positioned
914     // at start so there's no need to actually do anything.
915     have_started = true;
916 
917     // Don't skip back, and don't need to do anything if already there.
918     if (is_at_end || desired_did <= did) RETURN(NULL);
919 
920     // Move to correct chunk
921     if (!current_chunk_contains(desired_did)) {
922 	move_to_chunk_containing(desired_did);
923 	// Might be at_end now, so we need to check before trying to move
924 	// forward in chunk.
925 	if (is_at_end) RETURN(NULL);
926     }
927 
928     // Move to correct position in chunk
929     bool have_document = move_forward_in_chunk_to_at_least(desired_did);
930     (void)have_document;
931     Assert(have_document);
932 
933     if (is_at_end) {
934 	LOGLINE(DB, "Skipped to end");
935     } else {
936 	LOGLINE(DB, "Skipped to docid " << did << ", wdf = " << wdf);
937     }
938 
939     RETURN(NULL);
940 }
941 
942 // Used for doclens.
943 bool
jump_to(Xapian::docid desired_did)944 ChertPostList::jump_to(Xapian::docid desired_did)
945 {
946     LOGCALL(DB, bool, "ChertPostList::jump_to", desired_did);
947     // We've started now - if we hadn't already, we're already positioned
948     // at start so there's no need to actually do anything.
949     have_started = true;
950 
951     // If the list is empty, give up right away.
952     if (pos == 0) RETURN(false);
953 
954     // Move to correct chunk, or reload the current chunk to go backwards in it
955     // (FIXME: perhaps handle the latter case more elegantly, though it won't
956     // happen during sequential access which is most common).
957     if (is_at_end || !current_chunk_contains(desired_did) || desired_did < did) {
958 	// Clear is_at_end flag since we can rewind.
959 	is_at_end = false;
960 
961 	move_to_chunk_containing(desired_did);
962 	// Might be at_end now, so we need to check before trying to move
963 	// forward in chunk.
964 	if (is_at_end) RETURN(false);
965     }
966 
967     // Move to correct position in chunk.
968     if (!move_forward_in_chunk_to_at_least(desired_did)) RETURN(false);
969     RETURN(desired_did == did);
970 }
971 
972 string
get_description() const973 ChertPostList::get_description() const
974 {
975     return term + ":" + str(number_of_entries);
976 }
977 
978 // Returns the last did to allow in this chunk.
979 Xapian::docid
get_chunk(const string & tname,Xapian::docid did,bool adding,PostlistChunkReader ** from,PostlistChunkWriter ** to)980 ChertPostListTable::get_chunk(const string &tname,
981 	  Xapian::docid did, bool adding,
982 	  PostlistChunkReader ** from, PostlistChunkWriter **to)
983 {
984     LOGCALL(DB, Xapian::docid, "ChertPostListTable::get_chunk", tname | did | adding | from | to);
985     // Get chunk containing entry
986     string key = make_key(tname, did);
987 
988     // Find the right chunk
989     AutoPtr<ChertCursor> cursor(cursor_get());
990 
991     (void)cursor->find_entry(key);
992     Assert(!cursor->after_end());
993 
994     const char * keypos = cursor->current_key.data();
995     const char * keyend = keypos + cursor->current_key.size();
996 
997     if (!check_tname_in_key(&keypos, keyend, tname)) {
998 	// Postlist for this termname doesn't exist.
999 	if (!adding)
1000 	    throw Xapian::DatabaseCorruptError("Attempted to delete or modify an entry in a non-existent posting list for " + tname);
1001 
1002 	*from = NULL;
1003 	*to = new PostlistChunkWriter(string(), true, tname, true);
1004 	RETURN(Xapian::docid(-1));
1005     }
1006 
1007     // See if we're appending - if so we can shortcut by just copying
1008     // the data part of the chunk wholesale.
1009     bool is_first_chunk = (keypos == keyend);
1010     LOGVALUE(DB, is_first_chunk);
1011 
1012     cursor->read_tag();
1013     const char * pos = cursor->current_tag.data();
1014     const char * end = pos + cursor->current_tag.size();
1015     Xapian::docid first_did_in_chunk;
1016     if (is_first_chunk) {
1017 	first_did_in_chunk = read_start_of_first_chunk(&pos, end, NULL, NULL);
1018     } else {
1019 	if (!unpack_uint_preserving_sort(&keypos, keyend, &first_did_in_chunk)) {
1020 	    report_read_error(keypos);
1021 	}
1022     }
1023 
1024     bool is_last_chunk;
1025     Xapian::docid last_did_in_chunk;
1026     last_did_in_chunk = read_start_of_chunk(&pos, end, first_did_in_chunk, &is_last_chunk);
1027     *to = new PostlistChunkWriter(cursor->current_key, is_first_chunk, tname,
1028 				  is_last_chunk);
1029     if (did > last_did_in_chunk) {
1030 	// This is the shortcut.  Not very pretty, but I'll leave refactoring
1031 	// until I've a clearer picture of everything which needs to be done.
1032 	// (FIXME)
1033 	*from = NULL;
1034 	(*to)->raw_append(first_did_in_chunk, last_did_in_chunk,
1035 			  string(pos, end));
1036     } else {
1037 	*from = new PostlistChunkReader(first_did_in_chunk, string(pos, end));
1038     }
1039     if (is_last_chunk) RETURN(Xapian::docid(-1));
1040 
1041     // Find first did of next tag.
1042     cursor->next();
1043     if (cursor->after_end()) {
1044 	throw Xapian::DatabaseCorruptError("Expected another key but found none");
1045     }
1046     const char *kpos = cursor->current_key.data();
1047     const char *kend = kpos + cursor->current_key.size();
1048     if (!check_tname_in_key(&kpos, kend, tname)) {
1049 	throw Xapian::DatabaseCorruptError("Expected another key with the same term name but found a different one");
1050     }
1051 
1052     // Read the new first docid
1053     Xapian::docid first_did_of_next_chunk;
1054     if (!unpack_uint_preserving_sort(&kpos, kend, &first_did_of_next_chunk)) {
1055 	report_read_error(kpos);
1056     }
1057     RETURN(first_did_of_next_chunk - 1);
1058 }
1059 
1060 void
merge_changes(const map<string,map<Xapian::docid,pair<char,Xapian::termcount>>> & mod_plists,const map<Xapian::docid,Xapian::termcount> & doclens,const map<string,pair<Xapian::termcount_diff,Xapian::termcount_diff>> & freq_deltas)1061 ChertPostListTable::merge_changes(
1062     const map<string, map<Xapian::docid, pair<char, Xapian::termcount> > > & mod_plists,
1063     const map<Xapian::docid, Xapian::termcount> & doclens,
1064     const map<string, pair<Xapian::termcount_diff, Xapian::termcount_diff> > & freq_deltas)
1065 {
1066     LOGCALL_VOID(DB, "ChertPostListTable::merge_changes", mod_plists | doclens | freq_deltas);
1067 
1068     // The cursor in the doclen_pl will no longer be valid, so reset it.
1069     doclen_pl.reset(0);
1070 
1071     LOGVALUE(DB, doclens.size());
1072     if (!doclens.empty()) {
1073 	// Ensure there's a first chunk.
1074 	string current_key = make_key(string());
1075 	if (!key_exists(current_key)) {
1076 	    LOGLINE(DB, "Adding dummy first chunk");
1077 	    string newtag = make_start_of_first_chunk(0, 0, 0);
1078 	    newtag += make_start_of_chunk(true, 0, 0);
1079 	    add(current_key, newtag);
1080 	}
1081 
1082 	map<Xapian::docid, Xapian::termcount>::const_iterator j;
1083 	j = doclens.begin();
1084 	Assert(j != doclens.end()); // This case is caught above.
1085 
1086 	Xapian::docid max_did;
1087 	PostlistChunkReader *from;
1088 	PostlistChunkWriter *to;
1089 	max_did = get_chunk(string(), j->first, true, &from, &to);
1090 	LOGVALUE(DB, max_did);
1091 	for ( ; j != doclens.end(); ++j) {
1092 	    Xapian::docid did = j->first;
1093 
1094 next_doclen_chunk:
1095 	    LOGLINE(DB, "Updating doclens, did=" << did);
1096 	    if (from) while (!from->is_at_end()) {
1097 		Xapian::docid copy_did = from->get_docid();
1098 		if (copy_did >= did) {
1099 		    if (copy_did == did) from->next();
1100 		    break;
1101 		}
1102 		to->append(this, copy_did, from->get_wdf());
1103 		from->next();
1104 	    }
1105 	    if ((!from || from->is_at_end()) && did > max_did) {
1106 		delete from;
1107 		to->flush(this);
1108 		delete to;
1109 		max_did = get_chunk(string(), did, false, &from, &to);
1110 		goto next_doclen_chunk;
1111 	    }
1112 
1113 	    Xapian::termcount new_doclen = j->second;
1114 	    if (new_doclen != static_cast<Xapian::termcount>(-1)) {
1115 		to->append(this, did, new_doclen);
1116 	    }
1117 	}
1118 
1119 	if (from) {
1120 	    while (!from->is_at_end()) {
1121 		to->append(this, from->get_docid(), from->get_wdf());
1122 		from->next();
1123 	    }
1124 	    delete from;
1125 	}
1126 	to->flush(this);
1127 	delete to;
1128     }
1129 
1130     map<string, map<Xapian::docid, pair<char, Xapian::termcount> > >::const_iterator i;
1131     for (i = mod_plists.begin(); i != mod_plists.end(); ++i) {
1132 	if (i->second.empty()) continue;
1133 	string tname = i->first;
1134 	{
1135 	    // Rewrite the first chunk of this posting list with the updated
1136 	    // termfreq and collfreq.
1137 	    map<string, pair<Xapian::termcount_diff, Xapian::termcount_diff> >::const_iterator deltas = freq_deltas.find(tname);
1138 	    Assert(deltas != freq_deltas.end());
1139 
1140 	    string current_key = make_key(tname);
1141 	    string tag;
1142 	    (void)get_exact_entry(current_key, tag);
1143 
1144 	    // Read start of first chunk to get termfreq and collfreq.
1145 	    const char *pos = tag.data();
1146 	    const char *end = pos + tag.size();
1147 	    Xapian::doccount termfreq;
1148 	    Xapian::termcount collfreq;
1149 	    Xapian::docid firstdid, lastdid;
1150 	    bool islast;
1151 	    if (pos == end) {
1152 		termfreq = 0;
1153 		collfreq = 0;
1154 		firstdid = 0;
1155 		lastdid = 0;
1156 		islast = true;
1157 	    } else {
1158 		firstdid = read_start_of_first_chunk(&pos, end,
1159 						     &termfreq, &collfreq);
1160 		// Handle the generic start of chunk header.
1161 		lastdid = read_start_of_chunk(&pos, end, firstdid, &islast);
1162 	    }
1163 
1164 	    termfreq += deltas->second.first;
1165 	    if (termfreq == 0) {
1166 		// All postings deleted!  So we can shortcut by zapping the
1167 		// posting list.
1168 		if (islast) {
1169 		    // Only one entry for this posting list.
1170 		    del(current_key);
1171 		    continue;
1172 		}
1173 		MutableChertCursor cursor(this);
1174 		bool found = cursor.find_entry(current_key);
1175 		Assert(found);
1176 		if (!found) continue; // Reduce damage!
1177 		while (cursor.del()) {
1178 		    const char *kpos = cursor.current_key.data();
1179 		    const char *kend = kpos + cursor.current_key.size();
1180 		    if (!check_tname_in_key_lite(&kpos, kend, tname)) break;
1181 		}
1182 		continue;
1183 	    }
1184 	    collfreq += deltas->second.second;
1185 
1186 	    // Rewrite start of first chunk to update termfreq and collfreq.
1187 	    string newhdr = make_start_of_first_chunk(termfreq, collfreq, firstdid);
1188 	    newhdr += make_start_of_chunk(islast, firstdid, lastdid);
1189 	    if (pos == end) {
1190 		add(current_key, newhdr);
1191 	    } else {
1192 		Assert((size_t)(pos - tag.data()) <= tag.size());
1193 		tag.replace(0, pos - tag.data(), newhdr);
1194 		add(current_key, tag);
1195 	    }
1196 	}
1197 	map<Xapian::docid, pair<char, Xapian::termcount> >::const_iterator j;
1198 	j = i->second.begin();
1199 	Assert(j != i->second.end()); // This case is caught above.
1200 
1201 	Xapian::docid max_did;
1202 	PostlistChunkReader *from;
1203 	PostlistChunkWriter *to;
1204 	max_did = get_chunk(tname, j->first, j->second.first == 'A',
1205 			    &from, &to);
1206 	for ( ; j != i->second.end(); ++j) {
1207 	    Xapian::docid did = j->first;
1208 
1209 next_chunk:
1210 	    LOGLINE(DB, "Updating tname=" << tname << ", did=" << did);
1211 	    if (from) while (!from->is_at_end()) {
1212 		Xapian::docid copy_did = from->get_docid();
1213 		if (copy_did >= did) {
1214 		    if (copy_did == did) {
1215 			Assert(j->second.first != 'A');
1216 			from->next();
1217 		    }
1218 		    break;
1219 		}
1220 		to->append(this, copy_did, from->get_wdf());
1221 		from->next();
1222 	    }
1223 	    if ((!from || from->is_at_end()) && did > max_did) {
1224 		delete from;
1225 		to->flush(this);
1226 		delete to;
1227 		max_did = get_chunk(tname, did, false, &from, &to);
1228 		goto next_chunk;
1229 	    }
1230 
1231 	    if (j->second.first != 'D') {
1232 		Xapian::termcount new_wdf = j->second.second;
1233 		to->append(this, did, new_wdf);
1234 	    }
1235 	}
1236 
1237 	if (from) {
1238 	    while (!from->is_at_end()) {
1239 		to->append(this, from->get_docid(), from->get_wdf());
1240 		from->next();
1241 	    }
1242 	    delete from;
1243 	}
1244 	to->flush(this);
1245 	delete to;
1246     }
1247 }
1248