1 /** @file database.h 2 * @brief database class declarations 3 */ 4 /* Copyright 1999,2000,2001 BrightStation PLC 5 * Copyright 2002 Ananova Ltd 6 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2011,2013,2014,2015,2016 Olly Betts 7 * Copyright 2006,2008 Lemur Consulting Ltd 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License as 11 * published by the Free Software Foundation; either version 2 of the 12 * License, or (at your option) any later version. 13 * 14 * This program is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 * GNU General Public License for more details. 18 * 19 * You should have received a copy of the GNU General Public License 20 * along with this program; if not, write to the Free Software 21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 22 * USA 23 */ 24 25 #ifndef OM_HGUARD_DATABASE_H 26 #define OM_HGUARD_DATABASE_H 27 28 #include <string> 29 30 #include "internaltypes.h" 31 32 #include "xapian/intrusive_ptr.h" 33 #include <xapian/types.h> 34 #include <xapian/database.h> 35 #include <xapian/document.h> 36 #include <xapian/positioniterator.h> 37 #include <xapian/termiterator.h> 38 #include <xapian/valueiterator.h> 39 40 using namespace std; 41 42 class LeafPostList; 43 class RemoteDatabase; 44 45 typedef Xapian::TermIterator::Internal TermList; 46 typedef Xapian::PositionIterator::Internal PositionList; 47 typedef Xapian::ValueIterator::Internal ValueList; 48 49 namespace Xapian { 50 51 class Query; 52 struct ReplicationInfo; 53 54 /** Base class for databases. 55 */ 56 class Database::Internal : public Xapian::Internal::intrusive_base { 57 private: 58 /// Copies are not allowed. 59 Internal(const Internal &); 60 61 /// Assignment is not allowed. 62 void operator=(const Internal &); 63 64 protected: 65 /// Transaction state. 66 enum { 67 TRANSACTION_UNIMPLEMENTED = -1, // Used by InMemory. 68 TRANSACTION_NONE = 0, 69 TRANSACTION_UNFLUSHED = 1, 70 TRANSACTION_FLUSHED = 2 71 } transaction_state; 72 transaction_active()73 bool transaction_active() const { return int(transaction_state) > 0; } 74 75 /** Create a database - called only by derived classes. */ Internal()76 Internal() : transaction_state(TRANSACTION_NONE) { } 77 78 /** Internal method to perform cleanup when a writable database is 79 * destroyed with uncommitted changes. 80 * 81 * A derived class' destructor should call this method before 82 * destroying the database to ensure that no sessions or 83 * transactions are in progress at destruction time. 84 * 85 * Note that it is not safe to throw exceptions from destructors, 86 * so this method will catch and discard any exceptions. 87 */ 88 void dtor_called(); 89 90 public: 91 /** Destroy the database. 92 * 93 * This method should not be called until all objects using the 94 * database have been cleaned up. 95 * 96 * If any transactions are in progress, they should 97 * be finished by cancel_transaction() or 98 * commit_transaction() - if this is not done, the destructor 99 * will attempt to clean things up by cancelling the transaction, 100 * but any errors produced by these operations will not be reported. 101 */ 102 virtual ~Internal(); 103 104 /** Send a keep-alive signal to a remote database, to stop 105 * it from timing out. 106 */ 107 virtual void keep_alive(); 108 109 virtual void readahead_for_query(const Xapian::Query & query); 110 111 ////////////////////////////////////////////////////////////////// 112 // Database statistics: 113 // ==================== 114 115 /** Return the number of docs in this (sub) database. 116 */ 117 virtual Xapian::doccount get_doccount() const = 0; 118 119 /** Return the last used document id of this (sub) database. 120 */ 121 virtual Xapian::docid get_lastdocid() const = 0; 122 123 /** Return the total length of all documents in this database. */ 124 virtual Xapian::totallength get_total_length() const = 0; 125 126 /** Get the length of a given document. 127 * 128 * Document length, for the purposes of Xapian, is defined to be 129 * the number of instances of terms within a document. Expressed 130 * differently, the sum of the within document frequencies over 131 * all the terms in the document. 132 * 133 * @param did The document id of the document whose length is 134 * being requested. 135 */ 136 virtual Xapian::termcount get_doclength(Xapian::docid did) const = 0; 137 138 /** Get the number of unique term in document. 139 * 140 * @param did The document id of the document whose number of terms is 141 * being requested. 142 */ 143 virtual Xapian::termcount get_unique_terms(Xapian::docid did) const = 0; 144 145 /** Returns frequencies for a term. 146 * 147 * @param term The term to get frequencies for 148 * @param termfreq_ptr Point to return number of docs indexed by @a 149 * term (or NULL not to return) 150 * @param collfreq_ptr Point to return number of occurrences of @a 151 * term in the database (or NULL not to return) 152 */ 153 virtual void get_freqs(const string & term, 154 Xapian::doccount * termfreq_ptr, 155 Xapian::termcount * collfreq_ptr) const = 0; 156 157 /** Return the frequency of a given value slot. 158 * 159 * This is the number of documents which have a (non-empty) value 160 * stored in the slot. 161 * 162 * @param slot The value slot to examine. 163 * 164 * @exception UnimplementedError The frequency of the value isn't 165 * available for this database type. 166 */ 167 virtual Xapian::doccount get_value_freq(Xapian::valueno slot) const = 0; 168 169 /** Get a lower bound on the values stored in the given value slot. 170 * 171 * If the lower bound isn't available for the given database type, 172 * this will return the lowest possible bound - the empty string. 173 * 174 * @param slot The value slot to examine. 175 */ 176 virtual std::string get_value_lower_bound(Xapian::valueno slot) const = 0; 177 178 /** Get an upper bound on the values stored in the given value slot. 179 * 180 * @param slot The value slot to examine. 181 * 182 * @exception UnimplementedError The upper bound of the values isn't 183 * available for this database type. 184 */ 185 virtual std::string get_value_upper_bound(Xapian::valueno slot) const = 0; 186 187 /// Get a lower bound on the length of a document in this DB. 188 virtual Xapian::termcount get_doclength_lower_bound() const; 189 190 /// Get an upper bound on the length of a document in this DB. 191 virtual Xapian::termcount get_doclength_upper_bound() const; 192 193 /// Get an upper bound on the wdf of term @a term. 194 virtual Xapian::termcount get_wdf_upper_bound(const std::string & term) const; 195 196 /** Check whether a given term is in the database. 197 * 198 * @param tname The term whose presence is being checked. 199 */ 200 virtual bool term_exists(const string & tname) const = 0; 201 202 /** Check whether this database contains any positional information. 203 */ 204 virtual bool has_positions() const = 0; 205 206 ////////////////////////////////////////////////////////////////// 207 // Data item access methods: 208 // ========================= 209 210 /** Open a posting list. 211 * 212 * Method defined by subclass to open a posting list. 213 * This is a list of all the documents which contain a given term. 214 * 215 * @param tname The term whose posting list is being requested. 216 * 217 * @return A pointer to the newly created posting list. 218 * If the term doesn't exist, a LeafPostList object 219 * returning no documents is returned, which makes it 220 * easier to implement a search over multiple databases. 221 * This object must be deleted by the caller after 222 * use. 223 */ 224 virtual LeafPostList * open_post_list(const string & tname) const = 0; 225 226 /** Open a value stream. 227 * 228 * This returns the value in a particular slot for each document. 229 * 230 * @param slot The value slot. 231 * 232 * @return Pointer to a new ValueList object which should be 233 * deleted by the caller once it is no longer needed. 234 */ 235 virtual ValueList * open_value_list(Xapian::valueno slot) const; 236 237 /** Open a term list. 238 * 239 * This is a list of all the terms contained by a given document. 240 * 241 * @param did The document id whose term list is being requested. 242 * 243 * @return A pointer to the newly created term list. 244 * This object must be deleted by the caller after 245 * use. 246 */ 247 virtual TermList * open_term_list(Xapian::docid did) const = 0; 248 249 /** Open an allterms list. 250 * 251 * This is a list of all the terms in the database 252 * 253 * @param prefix The prefix to restrict the terms to. 254 * @return A pointer to the newly created allterms list. 255 * This object must be deleted by the caller after 256 * use. 257 */ 258 virtual TermList * open_allterms(const string & prefix) const = 0; 259 260 /** Open a position list for the given term in the given document. 261 * 262 * @param did The document id for which a position list is being 263 * requested. 264 * @param tname The term for which a position list is being 265 * requested. 266 * 267 * @return A pointer to the newly created position list. 268 * This object must be deleted by the caller after 269 * use. 270 */ 271 virtual PositionList * open_position_list(Xapian::docid did, 272 const string & tname) const = 0; 273 274 /** Open a document. 275 * 276 * This is used to access the values and data associated with a 277 * document. See class Xapian::Document::Internal for further details. 278 * 279 * @param did The document id which is being requested. 280 * 281 * @param lazy No need to check that this document actually exists. 282 * Used when we already know that this document exists 283 * (only a hint - the backend may still check). 284 * 285 * @return A pointer to the newly created document object. 286 * This object must be deleted by the caller after 287 * use. 288 */ 289 virtual Xapian::Document::Internal * 290 open_document(Xapian::docid did, bool lazy) const = 0; 291 292 /** Create a termlist tree from trigrams of @a word. 293 * 294 * You can assume word.size() > 1. 295 * 296 * If there are no trigrams, returns NULL. 297 */ 298 virtual TermList * open_spelling_termlist(const string & word) const; 299 300 /** Return a termlist which returns the words which are spelling 301 * correction targets. 302 * 303 * If there are no spelling correction targets, returns NULL. 304 */ 305 virtual TermList * open_spelling_wordlist() const; 306 307 /** Return the number of times @a word was added as a spelling. */ 308 virtual Xapian::doccount get_spelling_frequency(const string & word) const; 309 310 /** Add a word to the spelling dictionary. 311 * 312 * If the word is already present, its frequency is increased. 313 * 314 * @param word The word to add. 315 * @param freqinc How much to increase its frequency by. 316 */ 317 virtual void add_spelling(const string & word, 318 Xapian::termcount freqinc) const; 319 320 /** Remove a word from the spelling dictionary. 321 * 322 * The word's frequency is decreased, and if would become zero or less 323 * then the word is removed completely. 324 * 325 * @param word The word to remove. 326 * @param freqdec How much to decrease its frequency by. 327 */ 328 virtual void remove_spelling(const string & word, 329 Xapian::termcount freqdec) const; 330 331 /** Open a termlist returning synonyms for a term. 332 * 333 * If @a term has no synonyms, returns NULL. 334 */ 335 virtual TermList * open_synonym_termlist(const string & term) const; 336 337 /** Open a termlist returning each term which has synonyms. 338 * 339 * @param prefix If non-empty, only terms with this prefix are 340 * returned. 341 */ 342 virtual TermList * open_synonym_keylist(const string & prefix) const; 343 344 /** Add a synonym for a term. 345 * 346 * If @a synonym is already a synonym for @a term, then no action is 347 * taken. 348 */ 349 virtual void add_synonym(const string & term, const string & synonym) const; 350 351 /** Remove a synonym for a term. 352 * 353 * If @a synonym isn't a synonym for @a term, then no action is taken. 354 */ 355 virtual void remove_synonym(const string & term, const string & synonym) const; 356 357 /** Clear all synonyms for a term. 358 * 359 * If @a term has no synonyms, no action is taken. 360 */ 361 virtual void clear_synonyms(const string & term) const; 362 363 /** Get the metadata associated with a given key. 364 * 365 * See Database::get_metadata() for more information. 366 */ 367 virtual string get_metadata(const string & key) const; 368 369 /** Open a termlist returning each metadata key. 370 * 371 * Only metadata keys which are associated with a non-empty value will 372 * be returned. 373 * 374 * @param prefix If non-empty, only keys with this prefix are 375 * returned. 376 */ 377 virtual TermList * open_metadata_keylist(const std::string &prefix) const; 378 379 /** Set the metadata associated with a given key. 380 * 381 * See WritableDatabase::set_metadata() for more information. 382 */ 383 virtual void set_metadata(const string & key, const string & value); 384 385 /** Reopen the database to the latest available revision. 386 * 387 * Database backends which don't support simultaneous update and 388 * reading probably don't need to do anything here. 389 */ 390 virtual bool reopen(); 391 392 /** Close the database 393 */ 394 virtual void close() = 0; 395 396 ////////////////////////////////////////////////////////////////// 397 // Modifying the database: 398 // ======================= 399 400 /** Commit pending modifications to the database. 401 * 402 * See WritableDatabase::commit() for more information. 403 */ 404 virtual void commit(); 405 406 /** Cancel pending modifications to the database. */ 407 virtual void cancel(); 408 409 /** Begin a transaction. 410 * 411 * See WritableDatabase::begin_transaction() for more information. 412 */ 413 void begin_transaction(bool flushed); 414 415 /** Commit a transaction. 416 * 417 * See WritableDatabase::commit_transaction() for more information. 418 */ 419 void commit_transaction(); 420 421 /** Cancel a transaction. 422 * 423 * See WritableDatabase::cancel_transaction() for more information. 424 */ 425 void cancel_transaction(); 426 427 /** Add a new document to the database. 428 * 429 * See WritableDatabase::add_document() for more information. 430 */ 431 virtual Xapian::docid add_document(const Xapian::Document & document); 432 433 /** Delete a document in the database. 434 * 435 * See WritableDatabase::delete_document() for more information. 436 */ 437 virtual void delete_document(Xapian::docid did); 438 439 /** Delete any documents indexed by a term from the database. 440 * 441 * See WritableDatabase::delete_document() for more information. 442 */ 443 virtual void delete_document(const string & unique_term); 444 445 /** Replace a given document in the database. 446 * 447 * See WritableDatabase::replace_document() for more information. 448 */ 449 virtual void replace_document(Xapian::docid did, 450 const Xapian::Document & document); 451 452 /** Replace any documents matching a term. 453 * 454 * See WritableDatabase::replace_document() for more information. 455 */ 456 virtual Xapian::docid replace_document(const string & unique_term, 457 const Xapian::Document & document); 458 459 /** Request and later collect a document from the database. 460 * Multiple documents can be requested with request_document(), 461 * and then collected with collect_document(). Allows the backend 462 * to optimise (e.g. the remote backend can start requests for all 463 * the documents so they fetch in parallel). 464 * 465 * If a backend doesn't support this, request_document() can be a 466 * no-op and collect_document() the same as open_document(). 467 */ 468 //@{ 469 virtual void request_document(Xapian::docid /*did*/) const; 470 471 virtual Xapian::Document::Internal * collect_document(Xapian::docid did) const; 472 //@} 473 474 /** Write a set of changesets to a file descriptor. 475 * 476 * This call may reopen the database, leaving it pointing to a more 477 * recent version of the database. 478 */ 479 virtual void write_changesets_to_fd(int fd, 480 const std::string & start_revision, 481 bool need_whole_db, 482 Xapian::ReplicationInfo * info); 483 484 /// Get a string describing the current revision of the database. 485 virtual string get_revision_info() const; 486 487 /** Get a UUID for the database. 488 * 489 * The UUID will persist for the lifetime of the database. 490 * 491 * Replicas (eg, made with the replication protocol, or by copying all 492 * the database files) will have the same UUID. However, copies (made 493 * with copydatabase, or xapian-compact) will have different UUIDs. 494 * 495 * If the backend does not support UUIDs the empty string is returned. 496 */ 497 virtual string get_uuid() const; 498 499 /** Notify the database that document is no longer valid. 500 * 501 * This is used to invalidate references to a document kept by a 502 * database for doing lazy updates. If we moved to using a weak_ptr 503 * instead we wouldn't need a special method for this, but it would 504 * involve a fair bit of reorganising of other parts of the code. 505 */ 506 virtual void invalidate_doc_object(Xapian::Document::Internal * obj) const; 507 508 /** Get backend information about this database. 509 * 510 * @param path If non-NULL, and set the pointed to string to the file 511 * path of this database (or if to some string describing 512 * the database in a backend-specified format if "path" 513 * isn't a concept which make sense). 514 * 515 * @return A constant indicating the backend type. 516 */ 517 virtual int get_backend_info(string * path) const = 0; 518 519 /** Find lowest and highest docids actually in use. 520 * 521 * Only used by compaction, so only needs to be implemented by 522 * backends which support compaction. 523 */ 524 virtual void get_used_docid_range(Xapian::docid & first, 525 Xapian::docid & last) const; 526 527 /** Return true if the database is open for writing. 528 * 529 * If this is a WritableDatabase, always returns true. 530 * 531 * For a Database, test if there's a writer holding the lock (or if 532 * we can't test for a lock without taking it on the current platform, 533 * throw Xapian::UnimplementedError). 534 */ 535 virtual bool locked() const; 536 }; 537 538 } 539 540 #endif /* OM_HGUARD_DATABASE_H */ 541