1 /** @file database.h
2  * @brief database class declarations
3  */
4 /* Copyright 1999,2000,2001 BrightStation PLC
5  * Copyright 2002 Ananova Ltd
6  * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2011,2013,2014,2015,2016 Olly Betts
7  * Copyright 2006,2008 Lemur Consulting Ltd
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License as
11  * published by the Free Software Foundation; either version 2 of the
12  * License, or (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
22  * USA
23  */
24 
25 #ifndef OM_HGUARD_DATABASE_H
26 #define OM_HGUARD_DATABASE_H
27 
28 #include <string>
29 
30 #include "internaltypes.h"
31 
32 #include "xapian/intrusive_ptr.h"
33 #include <xapian/types.h>
34 #include <xapian/database.h>
35 #include <xapian/document.h>
36 #include <xapian/positioniterator.h>
37 #include <xapian/termiterator.h>
38 #include <xapian/valueiterator.h>
39 
40 using namespace std;
41 
42 class LeafPostList;
43 class RemoteDatabase;
44 
45 typedef Xapian::TermIterator::Internal TermList;
46 typedef Xapian::PositionIterator::Internal PositionList;
47 typedef Xapian::ValueIterator::Internal ValueList;
48 
49 namespace Xapian {
50 
51 class Query;
52 struct ReplicationInfo;
53 
54 /** Base class for databases.
55  */
56 class Database::Internal : public Xapian::Internal::intrusive_base {
57     private:
58 	/// Copies are not allowed.
59 	Internal(const Internal &);
60 
61 	/// Assignment is not allowed.
62 	void operator=(const Internal &);
63 
64     protected:
65 	/// Transaction state.
66 	enum {
67 	    TRANSACTION_UNIMPLEMENTED = -1, // Used by InMemory.
68 	    TRANSACTION_NONE = 0,
69 	    TRANSACTION_UNFLUSHED = 1,
70 	    TRANSACTION_FLUSHED = 2
71 	} transaction_state;
72 
transaction_active()73 	bool transaction_active() const { return int(transaction_state) > 0; }
74 
75 	/** Create a database - called only by derived classes. */
Internal()76 	Internal() : transaction_state(TRANSACTION_NONE) { }
77 
78 	/** Internal method to perform cleanup when a writable database is
79 	 *  destroyed with uncommitted changes.
80 	 *
81 	 *  A derived class' destructor should call this method before
82 	 *  destroying the database to ensure that no sessions or
83 	 *  transactions are in progress at destruction time.
84 	 *
85 	 *  Note that it is not safe to throw exceptions from destructors,
86 	 *  so this method will catch and discard any exceptions.
87 	 */
88 	void dtor_called();
89 
90     public:
91 	/** Destroy the database.
92 	 *
93 	 *  This method should not be called until all objects using the
94 	 *  database have been cleaned up.
95 	 *
96 	 *  If any transactions are in progress, they should
97 	 *  be finished by cancel_transaction() or
98 	 *  commit_transaction() - if this is not done, the destructor
99 	 *  will attempt to clean things up by cancelling the transaction,
100 	 *  but any errors produced by these operations will not be reported.
101 	 */
102 	virtual ~Internal();
103 
104 	/** Send a keep-alive signal to a remote database, to stop
105 	 *  it from timing out.
106 	 */
107 	virtual void keep_alive();
108 
109 	virtual void readahead_for_query(const Xapian::Query & query);
110 
111 	//////////////////////////////////////////////////////////////////
112 	// Database statistics:
113 	// ====================
114 
115 	/** Return the number of docs in this (sub) database.
116 	 */
117 	virtual Xapian::doccount get_doccount() const = 0;
118 
119 	/** Return the last used document id of this (sub) database.
120 	 */
121 	virtual Xapian::docid get_lastdocid() const = 0;
122 
123 	/** Return the total length of all documents in this database. */
124 	virtual Xapian::totallength get_total_length() const = 0;
125 
126 	/** Get the length of a given document.
127 	 *
128 	 *  Document length, for the purposes of Xapian, is defined to be
129 	 *  the number of instances of terms within a document.  Expressed
130 	 *  differently, the sum of the within document frequencies over
131 	 *  all the terms in the document.
132 	 *
133 	 *  @param did  The document id of the document whose length is
134 	 *              being requested.
135 	 */
136 	virtual Xapian::termcount get_doclength(Xapian::docid did) const = 0;
137 
138 	/** Get the number of unique term in document.
139 	 *
140 	 *  @param did  The document id of the document whose number of terms is
141 	 *		being requested.
142 	 */
143 	virtual	Xapian::termcount get_unique_terms(Xapian::docid did) const = 0;
144 
145 	/** Returns frequencies for a term.
146 	 *
147 	 *  @param term		The term to get frequencies for
148 	 *  @param termfreq_ptr	Point to return number of docs indexed by @a
149 	 *			term (or NULL not to return)
150 	 *  @param collfreq_ptr	Point to return number of occurrences of @a
151 	 *			term in the database (or NULL not to return)
152 	 */
153 	virtual void get_freqs(const string & term,
154 			       Xapian::doccount * termfreq_ptr,
155 			       Xapian::termcount * collfreq_ptr) const = 0;
156 
157 	/** Return the frequency of a given value slot.
158 	 *
159 	 *  This is the number of documents which have a (non-empty) value
160 	 *  stored in the slot.
161 	 *
162 	 *  @param slot The value slot to examine.
163 	 *
164 	 *  @exception UnimplementedError The frequency of the value isn't
165 	 *  available for this database type.
166 	 */
167 	virtual Xapian::doccount get_value_freq(Xapian::valueno slot) const = 0;
168 
169 	/** Get a lower bound on the values stored in the given value slot.
170 	 *
171 	 *  If the lower bound isn't available for the given database type,
172 	 *  this will return the lowest possible bound - the empty string.
173 	 *
174 	 *  @param slot The value slot to examine.
175 	 */
176 	virtual std::string get_value_lower_bound(Xapian::valueno slot) const = 0;
177 
178 	/** Get an upper bound on the values stored in the given value slot.
179 	 *
180 	 *  @param slot The value slot to examine.
181 	 *
182 	 *  @exception UnimplementedError The upper bound of the values isn't
183 	 *  available for this database type.
184 	 */
185 	virtual std::string get_value_upper_bound(Xapian::valueno slot) const = 0;
186 
187 	/// Get a lower bound on the length of a document in this DB.
188 	virtual Xapian::termcount get_doclength_lower_bound() const;
189 
190 	/// Get an upper bound on the length of a document in this DB.
191 	virtual Xapian::termcount get_doclength_upper_bound() const;
192 
193 	/// Get an upper bound on the wdf of term @a term.
194 	virtual Xapian::termcount get_wdf_upper_bound(const std::string & term) const;
195 
196 	/** Check whether a given term is in the database.
197 	 *
198 	 *  @param tname  The term whose presence is being checked.
199 	 */
200 	virtual bool term_exists(const string & tname) const = 0;
201 
202 	/** Check whether this database contains any positional information.
203 	 */
204 	virtual bool has_positions() const = 0;
205 
206 	//////////////////////////////////////////////////////////////////
207 	// Data item access methods:
208 	// =========================
209 
210 	/** Open a posting list.
211 	 *
212 	 *  Method defined by subclass to open a posting list.
213 	 *  This is a list of all the documents which contain a given term.
214 	 *
215 	 *  @param tname  The term whose posting list is being requested.
216 	 *
217 	 *  @return       A pointer to the newly created posting list.
218 	 *		  If the term doesn't exist, a LeafPostList object
219 	 *		  returning no documents is returned, which makes it
220 	 *		  easier to implement a search over multiple databases.
221 	 *		  This object must be deleted by the caller after
222 	 *                use.
223 	 */
224 	virtual LeafPostList * open_post_list(const string & tname) const = 0;
225 
226 	/** Open a value stream.
227 	 *
228 	 *  This returns the value in a particular slot for each document.
229 	 *
230 	 *  @param slot	The value slot.
231 	 *
232 	 *  @return	Pointer to a new ValueList object which should be
233 	 *		deleted by the caller once it is no longer needed.
234 	 */
235 	virtual ValueList * open_value_list(Xapian::valueno slot) const;
236 
237 	/** Open a term list.
238 	 *
239 	 *  This is a list of all the terms contained by a given document.
240 	 *
241 	 *  @param did    The document id whose term list is being requested.
242 	 *
243 	 *  @return       A pointer to the newly created term list.
244 	 *                This object must be deleted by the caller after
245 	 *                use.
246 	 */
247 	virtual TermList * open_term_list(Xapian::docid did) const = 0;
248 
249 	/** Open an allterms list.
250 	 *
251 	 *  This is a list of all the terms in the database
252 	 *
253 	 *  @param prefix The prefix to restrict the terms to.
254 	 *  @return       A pointer to the newly created allterms list.
255 	 *                This object must be deleted by the caller after
256 	 *                use.
257 	 */
258 	virtual TermList * open_allterms(const string & prefix) const = 0;
259 
260 	/** Open a position list for the given term in the given document.
261 	 *
262 	 *  @param did    The document id for which a position list is being
263 	 *                requested.
264 	 *  @param tname  The term for which a position list is being
265 	 *                requested.
266 	 *
267 	 *  @return       A pointer to the newly created position list.
268 	 *                This object must be deleted by the caller after
269 	 *                use.
270 	 */
271 	virtual PositionList * open_position_list(Xapian::docid did,
272 					const string & tname) const = 0;
273 
274 	/** Open a document.
275 	 *
276 	 *  This is used to access the values and data associated with a
277 	 *  document.  See class Xapian::Document::Internal for further details.
278 	 *
279 	 *  @param did    The document id which is being requested.
280 	 *
281 	 *  @param lazy   No need to check that this document actually exists.
282 	 *                Used when we already know that this document exists
283 	 *                (only a hint - the backend may still check).
284 	 *
285 	 *  @return       A pointer to the newly created document object.
286 	 *                This object must be deleted by the caller after
287 	 *                use.
288 	 */
289 	virtual Xapian::Document::Internal *
290 	open_document(Xapian::docid did, bool lazy) const = 0;
291 
292 	/** Create a termlist tree from trigrams of @a word.
293 	 *
294 	 *  You can assume word.size() > 1.
295 	 *
296 	 *  If there are no trigrams, returns NULL.
297 	 */
298 	virtual TermList * open_spelling_termlist(const string & word) const;
299 
300 	/** Return a termlist which returns the words which are spelling
301 	 *  correction targets.
302 	 *
303 	 *  If there are no spelling correction targets, returns NULL.
304 	 */
305 	virtual TermList * open_spelling_wordlist() const;
306 
307 	/** Return the number of times @a word was added as a spelling. */
308 	virtual Xapian::doccount get_spelling_frequency(const string & word) const;
309 
310 	/** Add a word to the spelling dictionary.
311 	 *
312 	 *  If the word is already present, its frequency is increased.
313 	 *
314 	 *  @param word	    The word to add.
315 	 *  @param freqinc  How much to increase its frequency by.
316 	 */
317 	virtual void add_spelling(const string & word,
318 				  Xapian::termcount freqinc) const;
319 
320 	/** Remove a word from the spelling dictionary.
321 	 *
322 	 *  The word's frequency is decreased, and if would become zero or less
323 	 *  then the word is removed completely.
324 	 *
325 	 *  @param word	    The word to remove.
326 	 *  @param freqdec  How much to decrease its frequency by.
327 	 */
328 	virtual void remove_spelling(const string & word,
329 				     Xapian::termcount freqdec) const;
330 
331 	/** Open a termlist returning synonyms for a term.
332 	 *
333 	 *  If @a term has no synonyms, returns NULL.
334 	 */
335 	virtual TermList * open_synonym_termlist(const string & term) const;
336 
337 	/** Open a termlist returning each term which has synonyms.
338 	 *
339 	 *  @param prefix   If non-empty, only terms with this prefix are
340 	 *		    returned.
341 	 */
342 	virtual TermList * open_synonym_keylist(const string & prefix) const;
343 
344 	/** Add a synonym for a term.
345 	 *
346 	 *  If @a synonym is already a synonym for @a term, then no action is
347 	 *  taken.
348 	 */
349 	virtual void add_synonym(const string & term, const string & synonym) const;
350 
351 	/** Remove a synonym for a term.
352 	 *
353 	 *  If @a synonym isn't a synonym for @a term, then no action is taken.
354 	 */
355 	virtual void remove_synonym(const string & term, const string & synonym) const;
356 
357 	/** Clear all synonyms for a term.
358 	 *
359 	 *  If @a term has no synonyms, no action is taken.
360 	 */
361 	virtual void clear_synonyms(const string & term) const;
362 
363 	/** Get the metadata associated with a given key.
364 	 *
365 	 *  See Database::get_metadata() for more information.
366 	 */
367 	virtual string get_metadata(const string & key) const;
368 
369 	/** Open a termlist returning each metadata key.
370 	 *
371 	 *  Only metadata keys which are associated with a non-empty value will
372 	 *  be returned.
373 	 *
374 	 *  @param prefix   If non-empty, only keys with this prefix are
375 	 *		    returned.
376 	 */
377 	virtual TermList * open_metadata_keylist(const std::string &prefix) const;
378 
379 	/** Set the metadata associated with a given key.
380 	 *
381 	 *  See WritableDatabase::set_metadata() for more information.
382 	 */
383 	virtual void set_metadata(const string & key, const string & value);
384 
385 	/** Reopen the database to the latest available revision.
386 	 *
387 	 *  Database backends which don't support simultaneous update and
388 	 *  reading probably don't need to do anything here.
389 	 */
390 	virtual bool reopen();
391 
392 	/** Close the database
393 	 */
394 	virtual void close() = 0;
395 
396 	//////////////////////////////////////////////////////////////////
397 	// Modifying the database:
398 	// =======================
399 
400 	/** Commit pending modifications to the database.
401 	 *
402 	 *  See WritableDatabase::commit() for more information.
403 	 */
404 	virtual void commit();
405 
406 	/** Cancel pending modifications to the database. */
407 	virtual void cancel();
408 
409 	/** Begin a transaction.
410 	 *
411 	 *  See WritableDatabase::begin_transaction() for more information.
412 	 */
413 	void begin_transaction(bool flushed);
414 
415 	/** Commit a transaction.
416 	 *
417 	 *  See WritableDatabase::commit_transaction() for more information.
418 	 */
419 	void commit_transaction();
420 
421 	/** Cancel a transaction.
422 	 *
423 	 *  See WritableDatabase::cancel_transaction() for more information.
424 	 */
425 	void cancel_transaction();
426 
427 	/** Add a new document to the database.
428 	 *
429 	 *  See WritableDatabase::add_document() for more information.
430 	 */
431 	virtual Xapian::docid add_document(const Xapian::Document & document);
432 
433 	/** Delete a document in the database.
434 	 *
435 	 *  See WritableDatabase::delete_document() for more information.
436 	 */
437 	virtual void delete_document(Xapian::docid did);
438 
439 	/** Delete any documents indexed by a term from the database.
440 	 *
441 	 *  See WritableDatabase::delete_document() for more information.
442 	 */
443 	virtual void delete_document(const string & unique_term);
444 
445 	/** Replace a given document in the database.
446 	 *
447 	 *  See WritableDatabase::replace_document() for more information.
448 	 */
449 	virtual void replace_document(Xapian::docid did,
450 				      const Xapian::Document & document);
451 
452 	/** Replace any documents matching a term.
453 	 *
454 	 *  See WritableDatabase::replace_document() for more information.
455 	 */
456 	virtual Xapian::docid replace_document(const string & unique_term,
457 					       const Xapian::Document & document);
458 
459 	/** Request and later collect a document from the database.
460 	 *  Multiple documents can be requested with request_document(),
461 	 *  and then collected with collect_document().  Allows the backend
462 	 *  to optimise (e.g. the remote backend can start requests for all
463 	 *  the documents so they fetch in parallel).
464 	 *
465 	 *  If a backend doesn't support this, request_document() can be a
466 	 *  no-op and collect_document() the same as open_document().
467 	 */
468 	//@{
469 	virtual void request_document(Xapian::docid /*did*/) const;
470 
471 	virtual Xapian::Document::Internal * collect_document(Xapian::docid did) const;
472 	//@}
473 
474 	/** Write a set of changesets to a file descriptor.
475 	 *
476 	 *  This call may reopen the database, leaving it pointing to a more
477 	 *  recent version of the database.
478 	 */
479 	virtual void write_changesets_to_fd(int fd,
480 					    const std::string & start_revision,
481 					    bool need_whole_db,
482 					    Xapian::ReplicationInfo * info);
483 
484 	/// Get a string describing the current revision of the database.
485 	virtual string get_revision_info() const;
486 
487 	/** Get a UUID for the database.
488 	 *
489 	 *  The UUID will persist for the lifetime of the database.
490 	 *
491 	 *  Replicas (eg, made with the replication protocol, or by copying all
492 	 *  the database files) will have the same UUID.  However, copies (made
493 	 *  with copydatabase, or xapian-compact) will have different UUIDs.
494 	 *
495 	 *  If the backend does not support UUIDs the empty string is returned.
496 	 */
497 	virtual string get_uuid() const;
498 
499 	/** Notify the database that document is no longer valid.
500 	 *
501 	 *  This is used to invalidate references to a document kept by a
502 	 *  database for doing lazy updates.  If we moved to using a weak_ptr
503 	 *  instead we wouldn't need a special method for this, but it would
504 	 *  involve a fair bit of reorganising of other parts of the code.
505 	 */
506 	virtual void invalidate_doc_object(Xapian::Document::Internal * obj) const;
507 
508 	/** Get backend information about this database.
509 	 *
510 	 *  @param path  If non-NULL, and set the pointed to string to the file
511 	 *		 path of this database (or if to some string describing
512 	 *		 the database in a backend-specified format if "path"
513 	 *		 isn't a concept which  make sense).
514 	 *
515 	 *  @return	A constant indicating the backend type.
516 	 */
517 	virtual int get_backend_info(string * path) const = 0;
518 
519 	/** Find lowest and highest docids actually in use.
520 	 *
521 	 *  Only used by compaction, so only needs to be implemented by
522 	 *  backends which support compaction.
523 	 */
524 	virtual void get_used_docid_range(Xapian::docid & first,
525 					  Xapian::docid & last) const;
526 
527 	/** Return true if the database is open for writing.
528 	 *
529 	 *  If this is a WritableDatabase, always returns true.
530 	 *
531 	 *  For a Database, test if there's a writer holding the lock (or if
532 	 *  we can't test for a lock without taking it on the current platform,
533 	 *  throw Xapian::UnimplementedError).
534 	 */
535 	virtual bool locked() const;
536 };
537 
538 }
539 
540 #endif /* OM_HGUARD_DATABASE_H */
541