1 /* omdatabase.cc: External interface for running queries
2  *
3  * Copyright 1999,2000,2001 BrightStation PLC
4  * Copyright 2001,2002 Ananova Ltd
5  * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2016 Olly Betts
6  * Copyright 2006,2008 Lemur Consulting Ltd
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License as
10  * published by the Free Software Foundation; either version 2 of the
11  * License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
21  * USA
22  */
23 
24 #include <config.h>
25 
26 #include "autoptr.h"
27 
28 #include <xapian/error.h>
29 #include <xapian/positioniterator.h>
30 #include <xapian/postingiterator.h>
31 #include <xapian/termiterator.h>
32 #include <xapian/unicode.h>
33 
34 #include "omassert.h"
35 #include "debuglog.h"
36 #include "../backends/multi/multi_postlist.h"
37 #include "../backends/multi/multi_termlist.h"
38 #include "alltermslist.h"
39 #include "multialltermslist.h"
40 #include "multivaluelist.h"
41 #include "database.h"
42 #include "editdistance.h"
43 #include "ortermlist.h"
44 #include "internaltypes.h"
45 #include "noreturn.h"
46 
47 #include <algorithm>
48 #include <cstdlib> // For abs().
49 #include <cstring>
50 #include <vector>
51 
52 using namespace std;
53 
54 XAPIAN_NORETURN(static void docid_zero_invalid());
docid_zero_invalid()55 static void docid_zero_invalid()
56 {
57     throw Xapian::InvalidArgumentError("Document ID 0 is invalid");
58 }
59 
60 XAPIAN_NORETURN(static void no_subdatabases());
no_subdatabases()61 static void no_subdatabases()
62 {
63     throw Xapian::DocNotFoundError("No subdatabases");
64 }
65 
66 namespace Xapian {
67 
Database()68 Database::Database()
69 {
70     LOGCALL_CTOR(API, "Database", NO_ARGS);
71 }
72 
Database(Database::Internal * internal_)73 Database::Database(Database::Internal *internal_)
74 {
75     LOGCALL_CTOR(API, "Database", internal_);
76     Xapian::Internal::RefCntPtr<Database::Internal> newi(internal_);
77     internal.push_back(newi);
78 }
79 
Database(const Database & other)80 Database::Database(const Database &other)
81 {
82     LOGCALL_CTOR(API, "Database", other);
83     internal = other.internal;
84 }
85 
86 void
operator =(const Database & other)87 Database::operator=(const Database &other)
88 {
89     LOGCALL_VOID(API, "Database::operator=", other);
90     internal = other.internal;
91 }
92 
~Database()93 Database::~Database()
94 {
95     LOGCALL_DTOR(API, "Database");
96 }
97 
98 void
reopen()99 Database::reopen()
100 {
101     LOGCALL_VOID(API, "Database::reopen", NO_ARGS);
102     vector<Xapian::Internal::RefCntPtr<Database::Internal> >::iterator i;
103     for (i = internal.begin(); i != internal.end(); ++i) {
104 	(*i)->reopen();
105     }
106 }
107 
108 void
close()109 Database::close()
110 {
111     LOGCALL_VOID(API, "Database::close", NO_ARGS);
112     vector<Xapian::Internal::RefCntPtr<Database::Internal> >::iterator i;
113     for (i = internal.begin(); i != internal.end(); ++i) {
114 	(*i)->close();
115     }
116 }
117 
118 void
add_database(const Database & database)119 Database::add_database(const Database & database)
120 {
121     LOGCALL_VOID(API, "Database::add_database", database);
122     if (this == &database) {
123 	LOGLINE(API, "Database added to itself");
124 	throw Xapian::InvalidArgumentError("Can't add a Database to itself");
125     }
126     vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
127     for (i = database.internal.begin(); i != database.internal.end(); ++i) {
128 	internal.push_back(*i);
129     }
130 }
131 
132 PostingIterator
postlist_begin(const string & tname) const133 Database::postlist_begin(const string &tname) const
134 {
135     LOGCALL(API, PostingIterator, "Database::postlist_begin", tname);
136 
137     // Don't bother checking that the term exists first.  If it does, we
138     // just end up doing more work, and if it doesn't, we save very little
139     // work.
140 
141     // Handle the common case of a single database specially.
142     if (internal.size() == 1)
143 	RETURN(PostingIterator(internal[0]->open_post_list(tname)));
144 
145     if (rare(internal.size() == 0))
146 	RETURN(PostingIterator());
147 
148     vector<LeafPostList *> pls;
149     try {
150 	vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
151 	for (i = internal.begin(); i != internal.end(); ++i) {
152 	    pls.push_back((*i)->open_post_list(tname));
153 	    pls.back()->next();
154 	}
155 	Assert(pls.begin() != pls.end());
156     } catch (...) {
157 	vector<LeafPostList *>::iterator i;
158 	for (i = pls.begin(); i != pls.end(); ++i) {
159 	    delete *i;
160 	    *i = 0;
161 	}
162 	throw;
163     }
164 
165     RETURN(PostingIterator(new MultiPostList(pls, *this)));
166 }
167 
168 TermIterator
termlist_begin(Xapian::docid did) const169 Database::termlist_begin(Xapian::docid did) const
170 {
171     LOGCALL(API, TermIterator, "Database::termlist_begin", did);
172     if (did == 0)
173 	docid_zero_invalid();
174 
175     unsigned int multiplier = internal.size();
176     if (rare(multiplier == 0))
177 	no_subdatabases();
178     TermList *tl;
179     if (multiplier == 1) {
180 	// There's no need for the MultiTermList wrapper in the common case
181 	// where we're only dealing with a single database.
182 	tl = internal[0]->open_term_list(did);
183     } else {
184 	Assert(multiplier != 0);
185 	Xapian::doccount n = (did - 1) % multiplier; // which actual database
186 	Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
187 
188 	tl = new MultiTermList(internal[n]->open_term_list(m), *this, n);
189     }
190     RETURN(TermIterator(tl));
191 }
192 
193 TermIterator
allterms_begin() const194 Database::allterms_begin() const
195 {
196     return allterms_begin(string());
197 }
198 
199 TermIterator
allterms_begin(const std::string & prefix) const200 Database::allterms_begin(const std::string & prefix) const
201 {
202     LOGCALL(API, TermIterator, "Database::allterms_begin", NO_ARGS);
203     TermList * tl;
204     if (rare(internal.size() == 0)) {
205 	tl = NULL;
206     } else if (internal.size() == 1) {
207 	tl = internal[0]->open_allterms(prefix);
208     } else {
209 	tl = new MultiAllTermsList(internal, prefix);
210     }
211     RETURN(TermIterator(tl));
212 }
213 
214 bool
has_positions() const215 Database::has_positions() const
216 {
217     LOGCALL(API, bool, "Database::has_positions", NO_ARGS);
218     // If any sub-database has positions, the combined database does.
219     vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
220     for (i = internal.begin(); i != internal.end(); ++i) {
221 	if ((*i)->has_positions()) RETURN(true);
222     }
223     RETURN(false);
224 }
225 
226 PositionIterator
positionlist_begin(Xapian::docid did,const string & tname) const227 Database::positionlist_begin(Xapian::docid did, const string &tname) const
228 {
229     LOGCALL(API, PositionIterator, "Database::positionlist_begin", did | tname);
230     if (tname.empty())
231 	throw InvalidArgumentError("Zero length terms are invalid");
232     if (did == 0)
233 	docid_zero_invalid();
234 
235     unsigned int multiplier = internal.size();
236     if (rare(multiplier == 0))
237 	no_subdatabases();
238     Xapian::doccount n = (did - 1) % multiplier; // which actual database
239     Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
240     RETURN(PositionIterator(internal[n]->open_position_list(m, tname)));
241 }
242 
243 Xapian::doccount
get_doccount() const244 Database::get_doccount() const
245 {
246     LOGCALL(API, Xapian::doccount, "Database::get_doccount", NO_ARGS);
247     Xapian::doccount docs = 0;
248     vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
249     for (i = internal.begin(); i != internal.end(); ++i) {
250 	docs += (*i)->get_doccount();
251     }
252     RETURN(docs);
253 }
254 
255 Xapian::docid
get_lastdocid() const256 Database::get_lastdocid() const
257 {
258     LOGCALL(API, Xapian::docid, "Database::get_lastdocid", NO_ARGS);
259     Xapian::docid did = 0;
260 
261     unsigned int multiplier = internal.size();
262     for (Xapian::doccount i = 0; i < multiplier; ++i) {
263 	Xapian::docid did_i = internal[i]->get_lastdocid();
264 	if (did_i) did = std::max(did, (did_i - 1) * multiplier + i + 1);
265     }
266     RETURN(did);
267 }
268 
269 Xapian::doclength
get_avlength() const270 Database::get_avlength() const
271 {
272     LOGCALL(API, Xapian::doclength, "Database::get_avlength", NO_ARGS);
273     Xapian::doccount docs = 0;
274     totlen_t totlen = 0;
275 
276     vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
277     for (i = internal.begin(); i != internal.end(); ++i) {
278 	docs += (*i)->get_doccount();
279 	totlen += (*i)->get_total_length();
280     }
281     LOGLINE(UNKNOWN, "get_avlength() = " << totlen << " / " << docs <<
282 	    " (from " << internal.size() << " dbs)");
283 
284     if (docs == 0) RETURN(0.0);
285     RETURN(totlen / double(docs));
286 }
287 
288 Xapian::doccount
get_termfreq(const string & tname) const289 Database::get_termfreq(const string & tname) const
290 {
291     LOGCALL(API, Xapian::doccount, "Database::get_termfreq", tname);
292     if (tname.empty()) RETURN(get_doccount());
293 
294     Xapian::doccount tf = 0;
295     vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
296     for (i = internal.begin(); i != internal.end(); ++i) {
297 	tf += (*i)->get_termfreq(tname);
298     }
299     RETURN(tf);
300 }
301 
302 Xapian::termcount
get_collection_freq(const string & tname) const303 Database::get_collection_freq(const string & tname) const
304 {
305     LOGCALL(API, Xapian::termcount, "Database::get_collection_freq", tname);
306     if (tname.empty()) RETURN(get_doccount());
307 
308     Xapian::termcount cf = 0;
309     vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
310     for (i = internal.begin(); i != internal.end(); ++i) {
311 	cf += (*i)->get_collection_freq(tname);
312     }
313     RETURN(cf);
314 }
315 
316 Xapian::doccount
get_value_freq(Xapian::valueno slot) const317 Database::get_value_freq(Xapian::valueno slot) const
318 {
319     LOGCALL(API, Xapian::doccount, "Database::get_value_freq", slot);
320 
321     Xapian::doccount vf = 0;
322     vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
323     for (i = internal.begin(); i != internal.end(); ++i) {
324 	vf += (*i)->get_value_freq(slot);
325     }
326     RETURN(vf);
327 }
328 
329 string
get_value_lower_bound(Xapian::valueno slot) const330 Database::get_value_lower_bound(Xapian::valueno slot) const
331 {
332     LOGCALL(API, string, "Database::get_value_lower_bound", slot);
333 
334     if (rare(internal.empty())) RETURN(string());
335 
336     vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
337     i = internal.begin();
338     string full_lb = (*i)->get_value_lower_bound(slot);
339     while (++i != internal.end()) {
340 	string lb = (*i)->get_value_lower_bound(slot);
341 	if (lb < full_lb) full_lb = lb;
342     }
343     RETURN(full_lb);
344 }
345 
346 std::string
get_value_upper_bound(Xapian::valueno slot) const347 Database::get_value_upper_bound(Xapian::valueno slot) const
348 {
349     LOGCALL(API, std::string, "Database::get_value_upper_bound", slot);
350 
351     std::string full_ub;
352     vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
353     for (i = internal.begin(); i != internal.end(); ++i) {
354 	std::string ub = (*i)->get_value_upper_bound(slot);
355 	if (ub > full_ub)
356 	    full_ub = ub;
357     }
358     RETURN(full_ub);
359 }
360 
361 Xapian::termcount
get_doclength_lower_bound() const362 Database::get_doclength_lower_bound() const
363 {
364     LOGCALL(API, Xapian::termcount, "Database::get_doclength_lower_bound", NO_ARGS);
365 
366     if (rare(internal.empty())) RETURN(0);
367 
368     Xapian::termcount full_lb = 0;
369     vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
370     for (i = internal.begin(); i != internal.end(); ++i) {
371 	// Skip sub-databases which are empty or only contain documents with
372 	// doclen==0.
373 	if ((*i)->get_total_length() != 0) {
374 	    Xapian::termcount lb = (*i)->get_doclength_lower_bound();
375 	    if (full_lb == 0 || lb < full_lb) full_lb = lb;
376 	}
377     }
378     RETURN(full_lb);
379 }
380 
381 Xapian::termcount
get_doclength_upper_bound() const382 Database::get_doclength_upper_bound() const
383 {
384     LOGCALL(API, Xapian::termcount, "Database::get_doclength_upper_bound", NO_ARGS);
385 
386     Xapian::termcount full_ub = 0;
387     vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
388     for (i = internal.begin(); i != internal.end(); ++i) {
389 	Xapian::termcount ub = (*i)->get_doclength_upper_bound();
390 	if (ub > full_ub) full_ub = ub;
391     }
392     RETURN(full_ub);
393 }
394 
395 Xapian::termcount
get_wdf_upper_bound(const string & term) const396 Database::get_wdf_upper_bound(const string & term) const
397 {
398     LOGCALL(API, Xapian::termcount, "Database::get_wdf_upper_bound", term);
399     if (term.empty()) RETURN(0);
400 
401     Xapian::termcount full_ub = 0;
402     vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
403     for (i = internal.begin(); i != internal.end(); ++i) {
404 	Xapian::termcount ub = (*i)->get_wdf_upper_bound(term);
405 	if (ub > full_ub) full_ub = ub;
406     }
407     RETURN(full_ub);
408 }
409 
410 ValueIterator
valuestream_begin(Xapian::valueno slot) const411 Database::valuestream_begin(Xapian::valueno slot) const
412 {
413     LOGCALL(API, ValueIterator, "Database::valuestream_begin", slot);
414     if (internal.size() == 0)
415        	RETURN(ValueIterator());
416     if (internal.size() != 1)
417 	RETURN(ValueIterator(new MultiValueList(internal, slot)));
418     RETURN(ValueIterator(internal[0]->open_value_list(slot)));
419 }
420 
421 Xapian::termcount
get_doclength(Xapian::docid did) const422 Database::get_doclength(Xapian::docid did) const
423 {
424     LOGCALL(API, Xapian::termcount, "Database::get_doclength", did);
425     if (did == 0)
426 	docid_zero_invalid();
427 
428     unsigned int multiplier = internal.size();
429     if (rare(multiplier == 0))
430 	no_subdatabases();
431     Xapian::doccount n = (did - 1) % multiplier; // which actual database
432     Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
433     RETURN(internal[n]->get_doclength(m));
434 }
435 
436 Document
get_document(Xapian::docid did) const437 Database::get_document(Xapian::docid did) const
438 {
439     LOGCALL(API, Document, "Database::get_document", did);
440     if (did == 0)
441 	docid_zero_invalid();
442 
443     unsigned int multiplier = internal.size();
444     if (rare(multiplier == 0))
445 	no_subdatabases();
446     Xapian::doccount n = (did - 1) % multiplier; // which actual database
447     Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
448 
449     // Open non-lazily so we throw DocNotFoundError if the doc doesn't exist.
450     RETURN(Document(internal[n]->open_document(m, false)));
451 }
452 
453 Document::Internal *
get_document_lazily(Xapian::docid did) const454 Database::get_document_lazily(Xapian::docid did) const
455 {
456     LOGCALL(DB, Document::Internal *, "Database::get_document_lazily", did);
457     if (did == 0)
458 	docid_zero_invalid();
459 
460     unsigned int multiplier = internal.size();
461     Assert(multiplier != 0);
462     Xapian::doccount n = (did - 1) % multiplier; // which actual database
463     Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
464 
465     RETURN(internal[n]->open_document(m, true));
466 }
467 
468 bool
term_exists(const string & tname) const469 Database::term_exists(const string & tname) const
470 {
471     LOGCALL(API, bool, "Database::term_exists", tname);
472     if (tname.empty()) {
473 	RETURN(get_doccount() != 0);
474     }
475     vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
476     for (i = internal.begin(); i != internal.end(); ++i) {
477 	if ((*i)->term_exists(tname)) RETURN(true);
478     }
479     RETURN(false);
480 }
481 
482 void
keep_alive()483 Database::keep_alive()
484 {
485     LOGCALL_VOID(API, "Database::keep_alive", NO_ARGS);
486     vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
487     for (i = internal.begin(); i != internal.end(); ++i) {
488 	(*i)->keep_alive();
489     }
490 }
491 
492 string
get_description() const493 Database::get_description() const
494 {
495     /// \todo display contents of the database
496     return "Database()";
497 }
498 
499 // We sum the character frequency histogram absolute differences to compute a
500 // lower bound on the edit distance.  Rather than counting each Unicode code
501 // point uniquely, we use an array with VEC_SIZE elements and tally code points
502 // modulo VEC_SIZE which can only reduce the bound we calculate.
503 //
504 // There will be a trade-off between how good the bound is and how large and
505 // array is used (a larger array takes more time to clear and sum over).  The
506 // value 64 is somewhat arbitrary - it works as well as 128 for the testsuite
507 // but that may not reflect real world performance.  FIXME: profile and tune.
508 
509 #define VEC_SIZE 64
510 
511 static int
freq_edit_lower_bound(const vector<unsigned> & a,const vector<unsigned> & b)512 freq_edit_lower_bound(const vector<unsigned> & a, const vector<unsigned> & b)
513 {
514     int vec[VEC_SIZE];
515     memset(vec, 0, sizeof(vec));
516     vector<unsigned>::const_iterator i;
517     for (i = a.begin(); i != a.end(); ++i) {
518 	++vec[(*i) % VEC_SIZE];
519     }
520     for (i = b.begin(); i != b.end(); ++i) {
521 	--vec[(*i) % VEC_SIZE];
522     }
523     unsigned int total = 0;
524     for (size_t j = 0; j < VEC_SIZE; ++j) {
525 	total += abs(vec[j]);
526     }
527     // Each insertion or deletion adds at most 1 to total.  Each transposition
528     // doesn't change it at all.  But each substitution can change it by 2 so
529     // we need to divide it by 2.  Rounding up is OK, since the odd change must
530     // be due to an actual edit.
531     return (total + 1) / 2;
532 }
533 
534 // Word must have a trigram score at least this close to the best score seen
535 // so far.
536 #define TRIGRAM_SCORE_THRESHOLD 2
537 
538 string
get_spelling_suggestion(const string & word,unsigned max_edit_distance) const539 Database::get_spelling_suggestion(const string &word,
540 				  unsigned max_edit_distance) const
541 {
542     LOGCALL(API, string, "Database::get_spelling_suggestion", word | max_edit_distance);
543     if (word.size() <= 1) return string();
544     AutoPtr<TermList> merger;
545     for (size_t i = 0; i < internal.size(); ++i) {
546 	TermList * tl = internal[i]->open_spelling_termlist(word);
547 	LOGLINE(SPELLING, "Sub db " << i << " tl = " << (void*)tl);
548 	if (tl) {
549 	    if (merger.get()) {
550 		merger.reset(new OrTermList(merger.release(), tl));
551 	    } else {
552 		merger.reset(tl);
553 	    }
554 	}
555     }
556     if (!merger.get()) RETURN(string());
557 
558     // Convert word to UTF-32.
559 #if ! defined __SUNPRO_CC || __SUNPRO_CC - 0 >= 0x580
560     // Extra brackets needed to avoid this being misparsed as a function
561     // prototype.
562     vector<unsigned> utf32_word((Utf8Iterator(word)), Utf8Iterator());
563 #else
564     // Older versions of Sun's C++ compiler need this workaround, but 5.8
565     // doesn't.  Unsure of the exact version it was fixed in.
566     vector<unsigned> utf32_word;
567     for (Utf8Iterator sunpro_it(word); sunpro_it != Utf8Iterator(); ++sunpro_it) {
568 	utf32_word.push_back(*sunpro_it);
569     }
570 #endif
571 
572     vector<unsigned> utf32_term;
573 
574     Xapian::termcount best = 1;
575     string result;
576     int edist_best = max_edit_distance;
577     Xapian::doccount freq_best = 0;
578     Xapian::doccount freq_exact = 0;
579     while (true) {
580 	TermList *ret = merger->next();
581 	if (ret) merger.reset(ret);
582 
583 	if (merger->at_end()) break;
584 
585 	string term = merger->get_termname();
586 	Xapian::termcount score = merger->get_wdf();
587 
588 	LOGLINE(SPELLING, "Term \"" << term << "\" ngram score " << score);
589 	if (score + TRIGRAM_SCORE_THRESHOLD >= best) {
590 	    if (score > best) best = score;
591 
592 	    // There's no point considering a word where the difference
593 	    // in length is greater than the smallest number of edits we've
594 	    // found so far.
595 
596 	    // First check the length of the encoded UTF-8 version of term.
597 	    // Each UTF-32 character is 1-4 bytes in UTF-8.
598 	    if (abs(long(term.size()) - long(word.size())) > edist_best * 4) {
599 		LOGLINE(SPELLING, "Lengths much too different");
600 		continue;
601 	    }
602 
603 	    // Now convert to UTF-32, and compare the true lengths more
604 	    // strictly.
605 	    utf32_term.assign(Utf8Iterator(term), Utf8Iterator());
606 
607 	    if (abs(long(utf32_term.size()) - long(utf32_word.size()))
608 		    > edist_best) {
609 		LOGLINE(SPELLING, "Lengths too different");
610 		continue;
611 	    }
612 
613 	    if (freq_edit_lower_bound(utf32_term, utf32_word) > edist_best) {
614 		LOGLINE(SPELLING, "Rejected by character frequency test");
615 		continue;
616 	    }
617 
618 	    int edist = edit_distance_unsigned(&utf32_term[0],
619 					       int(utf32_term.size()),
620 					       &utf32_word[0],
621 					       int(utf32_word.size()),
622 					       edist_best);
623 	    LOGLINE(SPELLING, "Edit distance " << edist);
624 
625 	    if (edist <= edist_best) {
626 		Xapian::doccount freq = 0;
627 		for (size_t j = 0; j < internal.size(); ++j)
628 		    freq += internal[j]->get_spelling_frequency(term);
629 
630 		LOGLINE(SPELLING, "Freq " << freq << " best " << freq_best);
631 		// Even if we have an exact match, there may be a much more
632 		// frequent potential correction which will still be
633 		// interesting.
634 		if (edist == 0) {
635 		    freq_exact = freq;
636 		    continue;
637 		}
638 
639 		if (edist < edist_best || freq > freq_best) {
640 		    LOGLINE(SPELLING, "Best so far: \"" << term <<
641 				      "\" edist " << edist << " freq " << freq);
642 		    result = term;
643 		    edist_best = edist;
644 		    freq_best = freq;
645 		}
646 	    }
647 	}
648     }
649     if (freq_best < freq_exact)
650 	RETURN(string());
651     RETURN(result);
652 }
653 
654 TermIterator
spellings_begin() const655 Database::spellings_begin() const
656 {
657     LOGCALL(API, TermIterator, "Database::spellings_begin", NO_ARGS);
658     AutoPtr<TermList> merger;
659     for (size_t i = 0; i < internal.size(); ++i) {
660 	TermList * tl = internal[i]->open_spelling_wordlist();
661 	if (tl) {
662 	    if (merger.get()) {
663 		merger.reset(new FreqAdderOrTermList(merger.release(), tl));
664 	    } else {
665 		merger.reset(tl);
666 	    }
667 	}
668     }
669     RETURN(TermIterator(merger.release()));
670 }
671 
672 TermIterator
synonyms_begin(const std::string & term) const673 Database::synonyms_begin(const std::string &term) const
674 {
675     LOGCALL(API, TermIterator, "Database::synonyms_begin", term);
676     AutoPtr<TermList> merger;
677     for (size_t i = 0; i < internal.size(); ++i) {
678 	TermList * tl = internal[i]->open_synonym_termlist(term);
679 	if (tl) {
680 	    if (merger.get()) {
681 		merger.reset(new OrTermList(merger.release(), tl));
682 	    } else {
683 		merger.reset(tl);
684 	    }
685 	}
686     }
687     RETURN(TermIterator(merger.release()));
688 }
689 
690 TermIterator
synonym_keys_begin(const std::string & prefix) const691 Database::synonym_keys_begin(const std::string &prefix) const
692 {
693     LOGCALL(API, TermIterator, "Database::synonyms_keys_begin", prefix);
694     AutoPtr<TermList> merger;
695     for (size_t i = 0; i < internal.size(); ++i) {
696 	TermList * tl = internal[i]->open_synonym_keylist(prefix);
697 	if (tl) {
698 	    if (merger.get()) {
699 		merger.reset(new OrTermList(merger.release(), tl));
700 	    } else {
701 		merger.reset(tl);
702 	    }
703 	}
704     }
705     RETURN(TermIterator(merger.release()));
706 }
707 
708 string
get_metadata(const string & key) const709 Database::get_metadata(const string & key) const
710 {
711     LOGCALL(API, string, "Database::get_metadata", key);
712     if (key.empty())
713 	throw InvalidArgumentError("Empty metadata keys are invalid");
714     if (internal.empty()) RETURN(std::string());
715     RETURN(internal[0]->get_metadata(key));
716 }
717 
718 Xapian::TermIterator
metadata_keys_begin(const std::string & prefix) const719 Database::metadata_keys_begin(const std::string &prefix) const
720 {
721     LOGCALL(API, Xapian::TermIterator, "Database::metadata_keys_begin", NO_ARGS);
722     if (internal.empty()) RETURN(TermIterator());
723     RETURN(TermIterator(internal[0]->open_metadata_keylist(prefix)));
724 }
725 
726 std::string
get_uuid() const727 Database::get_uuid() const
728 {
729     LOGCALL(API, std::string, "Database::get_uuid", NO_ARGS);
730     string uuid;
731     for (size_t i = 0; i < internal.size(); ++i) {
732 	string sub_uuid = internal[i]->get_uuid();
733 	// If any of the sub-databases have no uuid, we can't make a uuid for
734 	// the combined database.
735 	if (sub_uuid.empty())
736 	    RETURN(sub_uuid);
737 	if (!uuid.empty()) uuid += ':';
738 	uuid += sub_uuid;
739     }
740     RETURN(uuid);
741 }
742 
743 ///////////////////////////////////////////////////////////////////////////
744 
WritableDatabase()745 WritableDatabase::WritableDatabase() : Database()
746 {
747     LOGCALL_CTOR(API, "WritableDatabase", NO_ARGS);
748 }
749 
WritableDatabase(Database::Internal * internal_)750 WritableDatabase::WritableDatabase(Database::Internal *internal_)
751 	: Database(internal_)
752 {
753     LOGCALL_CTOR(API, "WritableDatabase", internal_);
754 }
755 
WritableDatabase(const WritableDatabase & other)756 WritableDatabase::WritableDatabase(const WritableDatabase &other)
757 	: Database(other)
758 {
759     LOGCALL_CTOR(API, "WritableDatabase", other);
760 }
761 
762 void
operator =(const WritableDatabase & other)763 WritableDatabase::operator=(const WritableDatabase &other)
764 {
765     LOGCALL_VOID(API, "WritableDatabase::operator=", other);
766     Database::operator=(other);
767 }
768 
~WritableDatabase()769 WritableDatabase::~WritableDatabase()
770 {
771     LOGCALL_DTOR(API, "WritableDatabase");
772 }
773 
774 XAPIAN_NORETURN(static void only_one_subdatabase_allowed());
only_one_subdatabase_allowed()775 static void only_one_subdatabase_allowed()
776 {
777     throw Xapian::InvalidOperationError("WritableDatabase needs exactly one subdatabase");
778 }
779 
780 void
commit()781 WritableDatabase::commit()
782 {
783     LOGCALL_VOID(API, "WritableDatabase::commit", NO_ARGS);
784     if (internal.size() != 1) only_one_subdatabase_allowed();
785     internal[0]->commit();
786 }
787 
788 void
begin_transaction(bool flushed)789 WritableDatabase::begin_transaction(bool flushed)
790 {
791     LOGCALL_VOID(API, "WritableDatabase::begin_transaction", NO_ARGS);
792     if (internal.size() != 1) only_one_subdatabase_allowed();
793     internal[0]->begin_transaction(flushed);
794 }
795 
796 void
commit_transaction()797 WritableDatabase::commit_transaction()
798 {
799     LOGCALL_VOID(API, "WritableDatabase::commit_transaction", NO_ARGS);
800     if (internal.size() != 1) only_one_subdatabase_allowed();
801     internal[0]->commit_transaction();
802 }
803 
804 void
cancel_transaction()805 WritableDatabase::cancel_transaction()
806 {
807     LOGCALL_VOID(API, "WritableDatabase::cancel_transaction", NO_ARGS);
808     if (internal.size() != 1) only_one_subdatabase_allowed();
809     internal[0]->cancel_transaction();
810 }
811 
812 
813 Xapian::docid
add_document(const Document & document)814 WritableDatabase::add_document(const Document & document)
815 {
816     LOGCALL(API, Xapian::docid, "WritableDatabase::add_document", document);
817     if (internal.size() != 1) only_one_subdatabase_allowed();
818     RETURN(internal[0]->add_document(document));
819 }
820 
821 void
delete_document(Xapian::docid did)822 WritableDatabase::delete_document(Xapian::docid did)
823 {
824     LOGCALL_VOID(API, "WritableDatabase::delete_document", did);
825     if (internal.size() != 1) only_one_subdatabase_allowed();
826     if (did == 0)
827 	docid_zero_invalid();
828     internal[0]->delete_document(did);
829 }
830 
831 void
delete_document(const std::string & unique_term)832 WritableDatabase::delete_document(const std::string & unique_term)
833 {
834     LOGCALL_VOID(API, "WritableDatabase::delete_document", unique_term);
835     if (internal.size() != 1) only_one_subdatabase_allowed();
836     if (unique_term.empty())
837 	throw InvalidArgumentError("Empty termnames are invalid");
838     internal[0]->delete_document(unique_term);
839 }
840 
841 void
replace_document(Xapian::docid did,const Document & document)842 WritableDatabase::replace_document(Xapian::docid did, const Document & document)
843 {
844     LOGCALL_VOID(API, "WritableDatabase::replace_document", did | document);
845     if (internal.size() != 1) only_one_subdatabase_allowed();
846     if (did == 0)
847 	docid_zero_invalid();
848     internal[0]->replace_document(did, document);
849 }
850 
851 Xapian::docid
replace_document(const std::string & unique_term,const Document & document)852 WritableDatabase::replace_document(const std::string & unique_term,
853 				   const Document & document)
854 {
855     LOGCALL(API, Xapian::docid, "WritableDatabase::replace_document", unique_term | document);
856     if (internal.size() != 1) only_one_subdatabase_allowed();
857     if (unique_term.empty())
858 	throw InvalidArgumentError("Empty termnames are invalid");
859     RETURN(internal[0]->replace_document(unique_term, document));
860 }
861 
862 void
add_spelling(const std::string & word,Xapian::termcount freqinc) const863 WritableDatabase::add_spelling(const std::string & word,
864 			       Xapian::termcount freqinc) const
865 {
866     LOGCALL_VOID(API, "WritableDatabase::add_spelling", word | freqinc);
867     if (internal.size() != 1) only_one_subdatabase_allowed();
868     internal[0]->add_spelling(word, freqinc);
869 }
870 
871 void
remove_spelling(const std::string & word,Xapian::termcount freqdec) const872 WritableDatabase::remove_spelling(const std::string & word,
873 				  Xapian::termcount freqdec) const
874 {
875     LOGCALL_VOID(API, "WritableDatabase::remove_spelling", word | freqdec);
876     if (internal.size() != 1) only_one_subdatabase_allowed();
877     internal[0]->remove_spelling(word, freqdec);
878 }
879 
880 void
add_synonym(const std::string & term,const std::string & synonym) const881 WritableDatabase::add_synonym(const std::string & term,
882 			      const std::string & synonym) const
883 {
884     LOGCALL_VOID(API, "WritableDatabase::add_synonym", term | synonym);
885     if (internal.size() != 1) only_one_subdatabase_allowed();
886     internal[0]->add_synonym(term, synonym);
887 }
888 
889 void
remove_synonym(const std::string & term,const std::string & synonym) const890 WritableDatabase::remove_synonym(const std::string & term,
891 				 const std::string & synonym) const
892 {
893     LOGCALL_VOID(API, "WritableDatabase::remove_synonym", term | synonym);
894     if (internal.size() != 1) only_one_subdatabase_allowed();
895     internal[0]->remove_synonym(term, synonym);
896 }
897 
898 void
clear_synonyms(const std::string & term) const899 WritableDatabase::clear_synonyms(const std::string & term) const
900 {
901     LOGCALL_VOID(API, "WritableDatabase::clear_synonyms", term);
902     if (internal.size() != 1) only_one_subdatabase_allowed();
903     internal[0]->clear_synonyms(term);
904 }
905 
906 void
set_metadata(const string & key,const string & value)907 WritableDatabase::set_metadata(const string & key, const string & value)
908 {
909     LOGCALL_VOID(API, "WritableDatabase::set_metadata", key | value);
910     if (internal.size() != 1) only_one_subdatabase_allowed();
911     if (key.empty())
912 	throw InvalidArgumentError("Empty metadata keys are invalid");
913     internal[0]->set_metadata(key, value);
914 }
915 
916 string
get_description() const917 WritableDatabase::get_description() const
918 {
919     /// \todo display contents of the writable database
920     return "WritableDatabase()";
921 }
922 
923 }
924