1 /* omdatabase.cc: External interface for running queries
2 *
3 * Copyright 1999,2000,2001 BrightStation PLC
4 * Copyright 2001,2002 Ananova Ltd
5 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2016 Olly Betts
6 * Copyright 2006,2008 Lemur Consulting Ltd
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21 * USA
22 */
23
24 #include <config.h>
25
26 #include "autoptr.h"
27
28 #include <xapian/error.h>
29 #include <xapian/positioniterator.h>
30 #include <xapian/postingiterator.h>
31 #include <xapian/termiterator.h>
32 #include <xapian/unicode.h>
33
34 #include "omassert.h"
35 #include "debuglog.h"
36 #include "../backends/multi/multi_postlist.h"
37 #include "../backends/multi/multi_termlist.h"
38 #include "alltermslist.h"
39 #include "multialltermslist.h"
40 #include "multivaluelist.h"
41 #include "database.h"
42 #include "editdistance.h"
43 #include "ortermlist.h"
44 #include "internaltypes.h"
45 #include "noreturn.h"
46
47 #include <algorithm>
48 #include <cstdlib> // For abs().
49 #include <cstring>
50 #include <vector>
51
52 using namespace std;
53
54 XAPIAN_NORETURN(static void docid_zero_invalid());
docid_zero_invalid()55 static void docid_zero_invalid()
56 {
57 throw Xapian::InvalidArgumentError("Document ID 0 is invalid");
58 }
59
60 XAPIAN_NORETURN(static void no_subdatabases());
no_subdatabases()61 static void no_subdatabases()
62 {
63 throw Xapian::DocNotFoundError("No subdatabases");
64 }
65
66 namespace Xapian {
67
Database()68 Database::Database()
69 {
70 LOGCALL_CTOR(API, "Database", NO_ARGS);
71 }
72
Database(Database::Internal * internal_)73 Database::Database(Database::Internal *internal_)
74 {
75 LOGCALL_CTOR(API, "Database", internal_);
76 Xapian::Internal::RefCntPtr<Database::Internal> newi(internal_);
77 internal.push_back(newi);
78 }
79
Database(const Database & other)80 Database::Database(const Database &other)
81 {
82 LOGCALL_CTOR(API, "Database", other);
83 internal = other.internal;
84 }
85
86 void
operator =(const Database & other)87 Database::operator=(const Database &other)
88 {
89 LOGCALL_VOID(API, "Database::operator=", other);
90 internal = other.internal;
91 }
92
~Database()93 Database::~Database()
94 {
95 LOGCALL_DTOR(API, "Database");
96 }
97
98 void
reopen()99 Database::reopen()
100 {
101 LOGCALL_VOID(API, "Database::reopen", NO_ARGS);
102 vector<Xapian::Internal::RefCntPtr<Database::Internal> >::iterator i;
103 for (i = internal.begin(); i != internal.end(); ++i) {
104 (*i)->reopen();
105 }
106 }
107
108 void
close()109 Database::close()
110 {
111 LOGCALL_VOID(API, "Database::close", NO_ARGS);
112 vector<Xapian::Internal::RefCntPtr<Database::Internal> >::iterator i;
113 for (i = internal.begin(); i != internal.end(); ++i) {
114 (*i)->close();
115 }
116 }
117
118 void
add_database(const Database & database)119 Database::add_database(const Database & database)
120 {
121 LOGCALL_VOID(API, "Database::add_database", database);
122 if (this == &database) {
123 LOGLINE(API, "Database added to itself");
124 throw Xapian::InvalidArgumentError("Can't add a Database to itself");
125 }
126 vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
127 for (i = database.internal.begin(); i != database.internal.end(); ++i) {
128 internal.push_back(*i);
129 }
130 }
131
132 PostingIterator
postlist_begin(const string & tname) const133 Database::postlist_begin(const string &tname) const
134 {
135 LOGCALL(API, PostingIterator, "Database::postlist_begin", tname);
136
137 // Don't bother checking that the term exists first. If it does, we
138 // just end up doing more work, and if it doesn't, we save very little
139 // work.
140
141 // Handle the common case of a single database specially.
142 if (internal.size() == 1)
143 RETURN(PostingIterator(internal[0]->open_post_list(tname)));
144
145 if (rare(internal.size() == 0))
146 RETURN(PostingIterator());
147
148 vector<LeafPostList *> pls;
149 try {
150 vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
151 for (i = internal.begin(); i != internal.end(); ++i) {
152 pls.push_back((*i)->open_post_list(tname));
153 pls.back()->next();
154 }
155 Assert(pls.begin() != pls.end());
156 } catch (...) {
157 vector<LeafPostList *>::iterator i;
158 for (i = pls.begin(); i != pls.end(); ++i) {
159 delete *i;
160 *i = 0;
161 }
162 throw;
163 }
164
165 RETURN(PostingIterator(new MultiPostList(pls, *this)));
166 }
167
168 TermIterator
termlist_begin(Xapian::docid did) const169 Database::termlist_begin(Xapian::docid did) const
170 {
171 LOGCALL(API, TermIterator, "Database::termlist_begin", did);
172 if (did == 0)
173 docid_zero_invalid();
174
175 unsigned int multiplier = internal.size();
176 if (rare(multiplier == 0))
177 no_subdatabases();
178 TermList *tl;
179 if (multiplier == 1) {
180 // There's no need for the MultiTermList wrapper in the common case
181 // where we're only dealing with a single database.
182 tl = internal[0]->open_term_list(did);
183 } else {
184 Assert(multiplier != 0);
185 Xapian::doccount n = (did - 1) % multiplier; // which actual database
186 Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
187
188 tl = new MultiTermList(internal[n]->open_term_list(m), *this, n);
189 }
190 RETURN(TermIterator(tl));
191 }
192
193 TermIterator
allterms_begin() const194 Database::allterms_begin() const
195 {
196 return allterms_begin(string());
197 }
198
199 TermIterator
allterms_begin(const std::string & prefix) const200 Database::allterms_begin(const std::string & prefix) const
201 {
202 LOGCALL(API, TermIterator, "Database::allterms_begin", NO_ARGS);
203 TermList * tl;
204 if (rare(internal.size() == 0)) {
205 tl = NULL;
206 } else if (internal.size() == 1) {
207 tl = internal[0]->open_allterms(prefix);
208 } else {
209 tl = new MultiAllTermsList(internal, prefix);
210 }
211 RETURN(TermIterator(tl));
212 }
213
214 bool
has_positions() const215 Database::has_positions() const
216 {
217 LOGCALL(API, bool, "Database::has_positions", NO_ARGS);
218 // If any sub-database has positions, the combined database does.
219 vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
220 for (i = internal.begin(); i != internal.end(); ++i) {
221 if ((*i)->has_positions()) RETURN(true);
222 }
223 RETURN(false);
224 }
225
226 PositionIterator
positionlist_begin(Xapian::docid did,const string & tname) const227 Database::positionlist_begin(Xapian::docid did, const string &tname) const
228 {
229 LOGCALL(API, PositionIterator, "Database::positionlist_begin", did | tname);
230 if (tname.empty())
231 throw InvalidArgumentError("Zero length terms are invalid");
232 if (did == 0)
233 docid_zero_invalid();
234
235 unsigned int multiplier = internal.size();
236 if (rare(multiplier == 0))
237 no_subdatabases();
238 Xapian::doccount n = (did - 1) % multiplier; // which actual database
239 Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
240 RETURN(PositionIterator(internal[n]->open_position_list(m, tname)));
241 }
242
243 Xapian::doccount
get_doccount() const244 Database::get_doccount() const
245 {
246 LOGCALL(API, Xapian::doccount, "Database::get_doccount", NO_ARGS);
247 Xapian::doccount docs = 0;
248 vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
249 for (i = internal.begin(); i != internal.end(); ++i) {
250 docs += (*i)->get_doccount();
251 }
252 RETURN(docs);
253 }
254
255 Xapian::docid
get_lastdocid() const256 Database::get_lastdocid() const
257 {
258 LOGCALL(API, Xapian::docid, "Database::get_lastdocid", NO_ARGS);
259 Xapian::docid did = 0;
260
261 unsigned int multiplier = internal.size();
262 for (Xapian::doccount i = 0; i < multiplier; ++i) {
263 Xapian::docid did_i = internal[i]->get_lastdocid();
264 if (did_i) did = std::max(did, (did_i - 1) * multiplier + i + 1);
265 }
266 RETURN(did);
267 }
268
269 Xapian::doclength
get_avlength() const270 Database::get_avlength() const
271 {
272 LOGCALL(API, Xapian::doclength, "Database::get_avlength", NO_ARGS);
273 Xapian::doccount docs = 0;
274 totlen_t totlen = 0;
275
276 vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
277 for (i = internal.begin(); i != internal.end(); ++i) {
278 docs += (*i)->get_doccount();
279 totlen += (*i)->get_total_length();
280 }
281 LOGLINE(UNKNOWN, "get_avlength() = " << totlen << " / " << docs <<
282 " (from " << internal.size() << " dbs)");
283
284 if (docs == 0) RETURN(0.0);
285 RETURN(totlen / double(docs));
286 }
287
288 Xapian::doccount
get_termfreq(const string & tname) const289 Database::get_termfreq(const string & tname) const
290 {
291 LOGCALL(API, Xapian::doccount, "Database::get_termfreq", tname);
292 if (tname.empty()) RETURN(get_doccount());
293
294 Xapian::doccount tf = 0;
295 vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
296 for (i = internal.begin(); i != internal.end(); ++i) {
297 tf += (*i)->get_termfreq(tname);
298 }
299 RETURN(tf);
300 }
301
302 Xapian::termcount
get_collection_freq(const string & tname) const303 Database::get_collection_freq(const string & tname) const
304 {
305 LOGCALL(API, Xapian::termcount, "Database::get_collection_freq", tname);
306 if (tname.empty()) RETURN(get_doccount());
307
308 Xapian::termcount cf = 0;
309 vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
310 for (i = internal.begin(); i != internal.end(); ++i) {
311 cf += (*i)->get_collection_freq(tname);
312 }
313 RETURN(cf);
314 }
315
316 Xapian::doccount
get_value_freq(Xapian::valueno slot) const317 Database::get_value_freq(Xapian::valueno slot) const
318 {
319 LOGCALL(API, Xapian::doccount, "Database::get_value_freq", slot);
320
321 Xapian::doccount vf = 0;
322 vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
323 for (i = internal.begin(); i != internal.end(); ++i) {
324 vf += (*i)->get_value_freq(slot);
325 }
326 RETURN(vf);
327 }
328
329 string
get_value_lower_bound(Xapian::valueno slot) const330 Database::get_value_lower_bound(Xapian::valueno slot) const
331 {
332 LOGCALL(API, string, "Database::get_value_lower_bound", slot);
333
334 if (rare(internal.empty())) RETURN(string());
335
336 vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
337 i = internal.begin();
338 string full_lb = (*i)->get_value_lower_bound(slot);
339 while (++i != internal.end()) {
340 string lb = (*i)->get_value_lower_bound(slot);
341 if (lb < full_lb) full_lb = lb;
342 }
343 RETURN(full_lb);
344 }
345
346 std::string
get_value_upper_bound(Xapian::valueno slot) const347 Database::get_value_upper_bound(Xapian::valueno slot) const
348 {
349 LOGCALL(API, std::string, "Database::get_value_upper_bound", slot);
350
351 std::string full_ub;
352 vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
353 for (i = internal.begin(); i != internal.end(); ++i) {
354 std::string ub = (*i)->get_value_upper_bound(slot);
355 if (ub > full_ub)
356 full_ub = ub;
357 }
358 RETURN(full_ub);
359 }
360
361 Xapian::termcount
get_doclength_lower_bound() const362 Database::get_doclength_lower_bound() const
363 {
364 LOGCALL(API, Xapian::termcount, "Database::get_doclength_lower_bound", NO_ARGS);
365
366 if (rare(internal.empty())) RETURN(0);
367
368 Xapian::termcount full_lb = 0;
369 vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
370 for (i = internal.begin(); i != internal.end(); ++i) {
371 // Skip sub-databases which are empty or only contain documents with
372 // doclen==0.
373 if ((*i)->get_total_length() != 0) {
374 Xapian::termcount lb = (*i)->get_doclength_lower_bound();
375 if (full_lb == 0 || lb < full_lb) full_lb = lb;
376 }
377 }
378 RETURN(full_lb);
379 }
380
381 Xapian::termcount
get_doclength_upper_bound() const382 Database::get_doclength_upper_bound() const
383 {
384 LOGCALL(API, Xapian::termcount, "Database::get_doclength_upper_bound", NO_ARGS);
385
386 Xapian::termcount full_ub = 0;
387 vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
388 for (i = internal.begin(); i != internal.end(); ++i) {
389 Xapian::termcount ub = (*i)->get_doclength_upper_bound();
390 if (ub > full_ub) full_ub = ub;
391 }
392 RETURN(full_ub);
393 }
394
395 Xapian::termcount
get_wdf_upper_bound(const string & term) const396 Database::get_wdf_upper_bound(const string & term) const
397 {
398 LOGCALL(API, Xapian::termcount, "Database::get_wdf_upper_bound", term);
399 if (term.empty()) RETURN(0);
400
401 Xapian::termcount full_ub = 0;
402 vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
403 for (i = internal.begin(); i != internal.end(); ++i) {
404 Xapian::termcount ub = (*i)->get_wdf_upper_bound(term);
405 if (ub > full_ub) full_ub = ub;
406 }
407 RETURN(full_ub);
408 }
409
410 ValueIterator
valuestream_begin(Xapian::valueno slot) const411 Database::valuestream_begin(Xapian::valueno slot) const
412 {
413 LOGCALL(API, ValueIterator, "Database::valuestream_begin", slot);
414 if (internal.size() == 0)
415 RETURN(ValueIterator());
416 if (internal.size() != 1)
417 RETURN(ValueIterator(new MultiValueList(internal, slot)));
418 RETURN(ValueIterator(internal[0]->open_value_list(slot)));
419 }
420
421 Xapian::termcount
get_doclength(Xapian::docid did) const422 Database::get_doclength(Xapian::docid did) const
423 {
424 LOGCALL(API, Xapian::termcount, "Database::get_doclength", did);
425 if (did == 0)
426 docid_zero_invalid();
427
428 unsigned int multiplier = internal.size();
429 if (rare(multiplier == 0))
430 no_subdatabases();
431 Xapian::doccount n = (did - 1) % multiplier; // which actual database
432 Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
433 RETURN(internal[n]->get_doclength(m));
434 }
435
436 Document
get_document(Xapian::docid did) const437 Database::get_document(Xapian::docid did) const
438 {
439 LOGCALL(API, Document, "Database::get_document", did);
440 if (did == 0)
441 docid_zero_invalid();
442
443 unsigned int multiplier = internal.size();
444 if (rare(multiplier == 0))
445 no_subdatabases();
446 Xapian::doccount n = (did - 1) % multiplier; // which actual database
447 Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
448
449 // Open non-lazily so we throw DocNotFoundError if the doc doesn't exist.
450 RETURN(Document(internal[n]->open_document(m, false)));
451 }
452
453 Document::Internal *
get_document_lazily(Xapian::docid did) const454 Database::get_document_lazily(Xapian::docid did) const
455 {
456 LOGCALL(DB, Document::Internal *, "Database::get_document_lazily", did);
457 if (did == 0)
458 docid_zero_invalid();
459
460 unsigned int multiplier = internal.size();
461 Assert(multiplier != 0);
462 Xapian::doccount n = (did - 1) % multiplier; // which actual database
463 Xapian::docid m = (did - 1) / multiplier + 1; // real docid in that database
464
465 RETURN(internal[n]->open_document(m, true));
466 }
467
468 bool
term_exists(const string & tname) const469 Database::term_exists(const string & tname) const
470 {
471 LOGCALL(API, bool, "Database::term_exists", tname);
472 if (tname.empty()) {
473 RETURN(get_doccount() != 0);
474 }
475 vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
476 for (i = internal.begin(); i != internal.end(); ++i) {
477 if ((*i)->term_exists(tname)) RETURN(true);
478 }
479 RETURN(false);
480 }
481
482 void
keep_alive()483 Database::keep_alive()
484 {
485 LOGCALL_VOID(API, "Database::keep_alive", NO_ARGS);
486 vector<Xapian::Internal::RefCntPtr<Database::Internal> >::const_iterator i;
487 for (i = internal.begin(); i != internal.end(); ++i) {
488 (*i)->keep_alive();
489 }
490 }
491
492 string
get_description() const493 Database::get_description() const
494 {
495 /// \todo display contents of the database
496 return "Database()";
497 }
498
499 // We sum the character frequency histogram absolute differences to compute a
500 // lower bound on the edit distance. Rather than counting each Unicode code
501 // point uniquely, we use an array with VEC_SIZE elements and tally code points
502 // modulo VEC_SIZE which can only reduce the bound we calculate.
503 //
504 // There will be a trade-off between how good the bound is and how large and
505 // array is used (a larger array takes more time to clear and sum over). The
506 // value 64 is somewhat arbitrary - it works as well as 128 for the testsuite
507 // but that may not reflect real world performance. FIXME: profile and tune.
508
509 #define VEC_SIZE 64
510
511 static int
freq_edit_lower_bound(const vector<unsigned> & a,const vector<unsigned> & b)512 freq_edit_lower_bound(const vector<unsigned> & a, const vector<unsigned> & b)
513 {
514 int vec[VEC_SIZE];
515 memset(vec, 0, sizeof(vec));
516 vector<unsigned>::const_iterator i;
517 for (i = a.begin(); i != a.end(); ++i) {
518 ++vec[(*i) % VEC_SIZE];
519 }
520 for (i = b.begin(); i != b.end(); ++i) {
521 --vec[(*i) % VEC_SIZE];
522 }
523 unsigned int total = 0;
524 for (size_t j = 0; j < VEC_SIZE; ++j) {
525 total += abs(vec[j]);
526 }
527 // Each insertion or deletion adds at most 1 to total. Each transposition
528 // doesn't change it at all. But each substitution can change it by 2 so
529 // we need to divide it by 2. Rounding up is OK, since the odd change must
530 // be due to an actual edit.
531 return (total + 1) / 2;
532 }
533
534 // Word must have a trigram score at least this close to the best score seen
535 // so far.
536 #define TRIGRAM_SCORE_THRESHOLD 2
537
538 string
get_spelling_suggestion(const string & word,unsigned max_edit_distance) const539 Database::get_spelling_suggestion(const string &word,
540 unsigned max_edit_distance) const
541 {
542 LOGCALL(API, string, "Database::get_spelling_suggestion", word | max_edit_distance);
543 if (word.size() <= 1) return string();
544 AutoPtr<TermList> merger;
545 for (size_t i = 0; i < internal.size(); ++i) {
546 TermList * tl = internal[i]->open_spelling_termlist(word);
547 LOGLINE(SPELLING, "Sub db " << i << " tl = " << (void*)tl);
548 if (tl) {
549 if (merger.get()) {
550 merger.reset(new OrTermList(merger.release(), tl));
551 } else {
552 merger.reset(tl);
553 }
554 }
555 }
556 if (!merger.get()) RETURN(string());
557
558 // Convert word to UTF-32.
559 #if ! defined __SUNPRO_CC || __SUNPRO_CC - 0 >= 0x580
560 // Extra brackets needed to avoid this being misparsed as a function
561 // prototype.
562 vector<unsigned> utf32_word((Utf8Iterator(word)), Utf8Iterator());
563 #else
564 // Older versions of Sun's C++ compiler need this workaround, but 5.8
565 // doesn't. Unsure of the exact version it was fixed in.
566 vector<unsigned> utf32_word;
567 for (Utf8Iterator sunpro_it(word); sunpro_it != Utf8Iterator(); ++sunpro_it) {
568 utf32_word.push_back(*sunpro_it);
569 }
570 #endif
571
572 vector<unsigned> utf32_term;
573
574 Xapian::termcount best = 1;
575 string result;
576 int edist_best = max_edit_distance;
577 Xapian::doccount freq_best = 0;
578 Xapian::doccount freq_exact = 0;
579 while (true) {
580 TermList *ret = merger->next();
581 if (ret) merger.reset(ret);
582
583 if (merger->at_end()) break;
584
585 string term = merger->get_termname();
586 Xapian::termcount score = merger->get_wdf();
587
588 LOGLINE(SPELLING, "Term \"" << term << "\" ngram score " << score);
589 if (score + TRIGRAM_SCORE_THRESHOLD >= best) {
590 if (score > best) best = score;
591
592 // There's no point considering a word where the difference
593 // in length is greater than the smallest number of edits we've
594 // found so far.
595
596 // First check the length of the encoded UTF-8 version of term.
597 // Each UTF-32 character is 1-4 bytes in UTF-8.
598 if (abs(long(term.size()) - long(word.size())) > edist_best * 4) {
599 LOGLINE(SPELLING, "Lengths much too different");
600 continue;
601 }
602
603 // Now convert to UTF-32, and compare the true lengths more
604 // strictly.
605 utf32_term.assign(Utf8Iterator(term), Utf8Iterator());
606
607 if (abs(long(utf32_term.size()) - long(utf32_word.size()))
608 > edist_best) {
609 LOGLINE(SPELLING, "Lengths too different");
610 continue;
611 }
612
613 if (freq_edit_lower_bound(utf32_term, utf32_word) > edist_best) {
614 LOGLINE(SPELLING, "Rejected by character frequency test");
615 continue;
616 }
617
618 int edist = edit_distance_unsigned(&utf32_term[0],
619 int(utf32_term.size()),
620 &utf32_word[0],
621 int(utf32_word.size()),
622 edist_best);
623 LOGLINE(SPELLING, "Edit distance " << edist);
624
625 if (edist <= edist_best) {
626 Xapian::doccount freq = 0;
627 for (size_t j = 0; j < internal.size(); ++j)
628 freq += internal[j]->get_spelling_frequency(term);
629
630 LOGLINE(SPELLING, "Freq " << freq << " best " << freq_best);
631 // Even if we have an exact match, there may be a much more
632 // frequent potential correction which will still be
633 // interesting.
634 if (edist == 0) {
635 freq_exact = freq;
636 continue;
637 }
638
639 if (edist < edist_best || freq > freq_best) {
640 LOGLINE(SPELLING, "Best so far: \"" << term <<
641 "\" edist " << edist << " freq " << freq);
642 result = term;
643 edist_best = edist;
644 freq_best = freq;
645 }
646 }
647 }
648 }
649 if (freq_best < freq_exact)
650 RETURN(string());
651 RETURN(result);
652 }
653
654 TermIterator
spellings_begin() const655 Database::spellings_begin() const
656 {
657 LOGCALL(API, TermIterator, "Database::spellings_begin", NO_ARGS);
658 AutoPtr<TermList> merger;
659 for (size_t i = 0; i < internal.size(); ++i) {
660 TermList * tl = internal[i]->open_spelling_wordlist();
661 if (tl) {
662 if (merger.get()) {
663 merger.reset(new FreqAdderOrTermList(merger.release(), tl));
664 } else {
665 merger.reset(tl);
666 }
667 }
668 }
669 RETURN(TermIterator(merger.release()));
670 }
671
672 TermIterator
synonyms_begin(const std::string & term) const673 Database::synonyms_begin(const std::string &term) const
674 {
675 LOGCALL(API, TermIterator, "Database::synonyms_begin", term);
676 AutoPtr<TermList> merger;
677 for (size_t i = 0; i < internal.size(); ++i) {
678 TermList * tl = internal[i]->open_synonym_termlist(term);
679 if (tl) {
680 if (merger.get()) {
681 merger.reset(new OrTermList(merger.release(), tl));
682 } else {
683 merger.reset(tl);
684 }
685 }
686 }
687 RETURN(TermIterator(merger.release()));
688 }
689
690 TermIterator
synonym_keys_begin(const std::string & prefix) const691 Database::synonym_keys_begin(const std::string &prefix) const
692 {
693 LOGCALL(API, TermIterator, "Database::synonyms_keys_begin", prefix);
694 AutoPtr<TermList> merger;
695 for (size_t i = 0; i < internal.size(); ++i) {
696 TermList * tl = internal[i]->open_synonym_keylist(prefix);
697 if (tl) {
698 if (merger.get()) {
699 merger.reset(new OrTermList(merger.release(), tl));
700 } else {
701 merger.reset(tl);
702 }
703 }
704 }
705 RETURN(TermIterator(merger.release()));
706 }
707
708 string
get_metadata(const string & key) const709 Database::get_metadata(const string & key) const
710 {
711 LOGCALL(API, string, "Database::get_metadata", key);
712 if (key.empty())
713 throw InvalidArgumentError("Empty metadata keys are invalid");
714 if (internal.empty()) RETURN(std::string());
715 RETURN(internal[0]->get_metadata(key));
716 }
717
718 Xapian::TermIterator
metadata_keys_begin(const std::string & prefix) const719 Database::metadata_keys_begin(const std::string &prefix) const
720 {
721 LOGCALL(API, Xapian::TermIterator, "Database::metadata_keys_begin", NO_ARGS);
722 if (internal.empty()) RETURN(TermIterator());
723 RETURN(TermIterator(internal[0]->open_metadata_keylist(prefix)));
724 }
725
726 std::string
get_uuid() const727 Database::get_uuid() const
728 {
729 LOGCALL(API, std::string, "Database::get_uuid", NO_ARGS);
730 string uuid;
731 for (size_t i = 0; i < internal.size(); ++i) {
732 string sub_uuid = internal[i]->get_uuid();
733 // If any of the sub-databases have no uuid, we can't make a uuid for
734 // the combined database.
735 if (sub_uuid.empty())
736 RETURN(sub_uuid);
737 if (!uuid.empty()) uuid += ':';
738 uuid += sub_uuid;
739 }
740 RETURN(uuid);
741 }
742
743 ///////////////////////////////////////////////////////////////////////////
744
WritableDatabase()745 WritableDatabase::WritableDatabase() : Database()
746 {
747 LOGCALL_CTOR(API, "WritableDatabase", NO_ARGS);
748 }
749
WritableDatabase(Database::Internal * internal_)750 WritableDatabase::WritableDatabase(Database::Internal *internal_)
751 : Database(internal_)
752 {
753 LOGCALL_CTOR(API, "WritableDatabase", internal_);
754 }
755
WritableDatabase(const WritableDatabase & other)756 WritableDatabase::WritableDatabase(const WritableDatabase &other)
757 : Database(other)
758 {
759 LOGCALL_CTOR(API, "WritableDatabase", other);
760 }
761
762 void
operator =(const WritableDatabase & other)763 WritableDatabase::operator=(const WritableDatabase &other)
764 {
765 LOGCALL_VOID(API, "WritableDatabase::operator=", other);
766 Database::operator=(other);
767 }
768
~WritableDatabase()769 WritableDatabase::~WritableDatabase()
770 {
771 LOGCALL_DTOR(API, "WritableDatabase");
772 }
773
774 XAPIAN_NORETURN(static void only_one_subdatabase_allowed());
only_one_subdatabase_allowed()775 static void only_one_subdatabase_allowed()
776 {
777 throw Xapian::InvalidOperationError("WritableDatabase needs exactly one subdatabase");
778 }
779
780 void
commit()781 WritableDatabase::commit()
782 {
783 LOGCALL_VOID(API, "WritableDatabase::commit", NO_ARGS);
784 if (internal.size() != 1) only_one_subdatabase_allowed();
785 internal[0]->commit();
786 }
787
788 void
begin_transaction(bool flushed)789 WritableDatabase::begin_transaction(bool flushed)
790 {
791 LOGCALL_VOID(API, "WritableDatabase::begin_transaction", NO_ARGS);
792 if (internal.size() != 1) only_one_subdatabase_allowed();
793 internal[0]->begin_transaction(flushed);
794 }
795
796 void
commit_transaction()797 WritableDatabase::commit_transaction()
798 {
799 LOGCALL_VOID(API, "WritableDatabase::commit_transaction", NO_ARGS);
800 if (internal.size() != 1) only_one_subdatabase_allowed();
801 internal[0]->commit_transaction();
802 }
803
804 void
cancel_transaction()805 WritableDatabase::cancel_transaction()
806 {
807 LOGCALL_VOID(API, "WritableDatabase::cancel_transaction", NO_ARGS);
808 if (internal.size() != 1) only_one_subdatabase_allowed();
809 internal[0]->cancel_transaction();
810 }
811
812
813 Xapian::docid
add_document(const Document & document)814 WritableDatabase::add_document(const Document & document)
815 {
816 LOGCALL(API, Xapian::docid, "WritableDatabase::add_document", document);
817 if (internal.size() != 1) only_one_subdatabase_allowed();
818 RETURN(internal[0]->add_document(document));
819 }
820
821 void
delete_document(Xapian::docid did)822 WritableDatabase::delete_document(Xapian::docid did)
823 {
824 LOGCALL_VOID(API, "WritableDatabase::delete_document", did);
825 if (internal.size() != 1) only_one_subdatabase_allowed();
826 if (did == 0)
827 docid_zero_invalid();
828 internal[0]->delete_document(did);
829 }
830
831 void
delete_document(const std::string & unique_term)832 WritableDatabase::delete_document(const std::string & unique_term)
833 {
834 LOGCALL_VOID(API, "WritableDatabase::delete_document", unique_term);
835 if (internal.size() != 1) only_one_subdatabase_allowed();
836 if (unique_term.empty())
837 throw InvalidArgumentError("Empty termnames are invalid");
838 internal[0]->delete_document(unique_term);
839 }
840
841 void
replace_document(Xapian::docid did,const Document & document)842 WritableDatabase::replace_document(Xapian::docid did, const Document & document)
843 {
844 LOGCALL_VOID(API, "WritableDatabase::replace_document", did | document);
845 if (internal.size() != 1) only_one_subdatabase_allowed();
846 if (did == 0)
847 docid_zero_invalid();
848 internal[0]->replace_document(did, document);
849 }
850
851 Xapian::docid
replace_document(const std::string & unique_term,const Document & document)852 WritableDatabase::replace_document(const std::string & unique_term,
853 const Document & document)
854 {
855 LOGCALL(API, Xapian::docid, "WritableDatabase::replace_document", unique_term | document);
856 if (internal.size() != 1) only_one_subdatabase_allowed();
857 if (unique_term.empty())
858 throw InvalidArgumentError("Empty termnames are invalid");
859 RETURN(internal[0]->replace_document(unique_term, document));
860 }
861
862 void
add_spelling(const std::string & word,Xapian::termcount freqinc) const863 WritableDatabase::add_spelling(const std::string & word,
864 Xapian::termcount freqinc) const
865 {
866 LOGCALL_VOID(API, "WritableDatabase::add_spelling", word | freqinc);
867 if (internal.size() != 1) only_one_subdatabase_allowed();
868 internal[0]->add_spelling(word, freqinc);
869 }
870
871 void
remove_spelling(const std::string & word,Xapian::termcount freqdec) const872 WritableDatabase::remove_spelling(const std::string & word,
873 Xapian::termcount freqdec) const
874 {
875 LOGCALL_VOID(API, "WritableDatabase::remove_spelling", word | freqdec);
876 if (internal.size() != 1) only_one_subdatabase_allowed();
877 internal[0]->remove_spelling(word, freqdec);
878 }
879
880 void
add_synonym(const std::string & term,const std::string & synonym) const881 WritableDatabase::add_synonym(const std::string & term,
882 const std::string & synonym) const
883 {
884 LOGCALL_VOID(API, "WritableDatabase::add_synonym", term | synonym);
885 if (internal.size() != 1) only_one_subdatabase_allowed();
886 internal[0]->add_synonym(term, synonym);
887 }
888
889 void
remove_synonym(const std::string & term,const std::string & synonym) const890 WritableDatabase::remove_synonym(const std::string & term,
891 const std::string & synonym) const
892 {
893 LOGCALL_VOID(API, "WritableDatabase::remove_synonym", term | synonym);
894 if (internal.size() != 1) only_one_subdatabase_allowed();
895 internal[0]->remove_synonym(term, synonym);
896 }
897
898 void
clear_synonyms(const std::string & term) const899 WritableDatabase::clear_synonyms(const std::string & term) const
900 {
901 LOGCALL_VOID(API, "WritableDatabase::clear_synonyms", term);
902 if (internal.size() != 1) only_one_subdatabase_allowed();
903 internal[0]->clear_synonyms(term);
904 }
905
906 void
set_metadata(const string & key,const string & value)907 WritableDatabase::set_metadata(const string & key, const string & value)
908 {
909 LOGCALL_VOID(API, "WritableDatabase::set_metadata", key | value);
910 if (internal.size() != 1) only_one_subdatabase_allowed();
911 if (key.empty())
912 throw InvalidArgumentError("Empty metadata keys are invalid");
913 internal[0]->set_metadata(key, value);
914 }
915
916 string
get_description() const917 WritableDatabase::get_description() const
918 {
919 /// \todo display contents of the writable database
920 return "WritableDatabase()";
921 }
922
923 }
924