1 /** @file remote-database.h
2  *  @brief RemoteDatabase is the baseclass for remote database implementations.
3  */
4 /* Copyright (C) 2006,2007,2009,2010 Olly Betts
5  * Copyright (C) 2007,2009,2010 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
20  */
21 
22 #ifndef XAPIAN_INCLUDED_REMOTE_DATABASE_H
23 #define XAPIAN_INCLUDED_REMOTE_DATABASE_H
24 
25 #include "database.h"
26 #include "omenquireinternal.h"
27 #include "omqueryinternal.h"
28 #include "remoteconnection.h"
29 #include "valuestats.h"
30 #include "xapian/weight.h"
31 
32 namespace Xapian {
33     class RSet;
34 }
35 
36 class NetworkPostList;
37 
38 /** RemoteDatabase is the baseclass for remote database implementations.
39  *
40  *  A subclass of this class is required which opens a TCP connection or
41  *  pipe to the remote database server.  This subclass works in combination
42  *  with the RemoteSubMatch class during the match process.
43  */
44 class RemoteDatabase : public Xapian::Database::Internal {
45     /// Don't allow assignment.
46     void operator=(const RemoteDatabase &);
47 
48     /// Don't allow copying.
49     RemoteDatabase(const RemoteDatabase &);
50 
51     /// The object which does the I/O.
52     mutable RemoteConnection link;
53 
54     /// The remote document count, given at open.
55     mutable Xapian::doccount doccount;
56 
57     /// The remote last docid, given at open.
58     mutable Xapian::docid lastdocid;
59 
60     /// A lower bound on the smallest document length in this database.
61     mutable Xapian::termcount doclen_lbound;
62 
63     /// An upper bound on the greatest document length in this database.
64     mutable Xapian::termcount doclen_ubound;
65 
66     /// The total length of all documents in this database.
67     mutable totlen_t total_length;
68 
69     /// Has positional information?
70     mutable bool has_positional_info;
71 
72     /// The UUID of the remote database.
73     mutable string uuid;
74 
75     /// The context to return with any error messages
76     string context;
77 
78     mutable bool cached_stats_valid;
79 
80     /** The most recently used value statistics. */
81     mutable ValueStats mru_valstats;
82 
83     /** The value slot for the most recently used value statistics.
84      *
85      *  Set to BAD_VALUENO if no value statistics have yet been looked up.
86      */
87     mutable Xapian::valueno mru_slot;
88 
89     void update_stats(message_type msg_code = MSG_UPDATE) const;
90 
91     void apply_stats_update(const char * p, const char * p_end) const;
92 
93   protected:
94     /** Constructor.  The constructor is protected so that raw instances
95      *  can't be created - a derived class must be instantiated which
96      *  has code in the constructor to open the socket.
97      *
98      *  @param fd	The file descriptor for the connection to the server.
99      *  @param timeout_ The timeout used with the network operations.
100      *			Generally a Xapian::NetworkTimeoutError exception will
101      *			be thrown if the remote end doesn't respond for this
102      *			length of time (in seconds).  A timeout of 0 means that
103      *			operations will never timeout.
104      *  @param context_ The context to return with any error messages.
105      *	@param writable	Is this a WritableDatabase?
106      */
107     RemoteDatabase(int fd, double timeout_, const string & context_,
108 		   bool writable);
109 
110     /// Receive a message from the server.
111     reply_type get_message(string & message, reply_type required_type = REPLY_MAX) const;
112 
113     /// Send a message to the server.
114     void send_message(message_type type, const string & data) const;
115 
116     /// Close the socket
117     void do_close();
118 
119     bool get_posting(Xapian::docid &did, Xapian::weight &w, string &value);
120 
121     /// The timeout value used in network communications, in seconds.
122     double timeout;
123 
124   public:
125     /// Return this pointer as a RemoteDatabase*.
126     RemoteDatabase * as_remotedatabase();
127 
128     /// Send a keep-alive message.
129     void keep_alive();
130 
131     /** Set the query
132      *
133      * @param query			The query.
134      * @param qlen			The query length.
135      * @param collapse_max		Max number of items with the same key
136      *					to leave after collapsing (0 for don't
137      *					collapse).
138      * @param collapse_key		The value number to collapse matches on.
139      * @param order			Sort order for docids.
140      * @param sort_key			The value number to sort on.
141      * @param sort_by			Which order to apply sorts in.
142      * @param sort_value_forward	Sort order for values.
143      * @param percent_cutoff		Percentage cutoff.
144      * @param weight_cutoff		Weight cutoff.
145      * @param wtscheme			Weighting scheme.
146      * @param omrset			The rset.
147      * @param matchspies                The matchspies to use.
148      */
149     void set_query(const Xapian::Query::Internal *query,
150 		   Xapian::termcount qlen,
151 		   Xapian::doccount collapse_max,
152 		   Xapian::valueno collapse_key,
153 		   Xapian::Enquire::docid_order order,
154 		   Xapian::valueno sort_key,
155 		   Xapian::Enquire::Internal::sort_setting sort_by,
156 		   bool sort_value_forward,
157 		   int percent_cutoff, Xapian::weight weight_cutoff,
158 		   const Xapian::Weight *wtscheme,
159 		   const Xapian::RSet &omrset,
160 		   const vector<Xapian::MatchSpy *> & matchspies);
161 
162     /** Get the stats from the remote server.
163      *
164      *  @return	true if we got the remote stats; false if we should try again.
165      */
166     bool get_remote_stats(bool nowait, Xapian::Weight::Internal &out);
167 
168     /// Send the global stats to the remote server.
169     void send_global_stats(Xapian::doccount first,
170 			   Xapian::doccount maxitems,
171 			   Xapian::doccount check_at_least,
172 			   const Xapian::Weight::Internal &stats);
173 
174     /// Get the MSet from the remote server.
175     void get_mset(Xapian::MSet &mset,
176 		  const vector<Xapian::MatchSpy *> & matchspies);
177 
178     /// Get remote metadata key list.
179     TermList * open_metadata_keylist(const std::string & prefix) const;
180 
181     /// Get remote termlist.
182     TermList * open_term_list(Xapian::docid did) const;
183 
184     /// Iterate all terms.
185     TermList * open_allterms(const string & prefix) const;
186 
187     bool has_positions() const;
188 
189     void reopen();
190 
191     void close();
192 
193     LeafPostList * open_post_list(const string & tname) const;
194 
195     Xapian::doccount read_post_list(const string &term, NetworkPostList & pl) const;
196 
197     PositionList * open_position_list(Xapian::docid did,
198 				      const string & tname) const;
199 
200     /// Get a remote document.
201     Xapian::Document::Internal * open_document(Xapian::docid did, bool lazy) const;
202 
203     /// Get the document count.
204     Xapian::doccount get_doccount() const;
205 
206     /// Get the last used docid.
207     Xapian::docid get_lastdocid() const;
208 
209     totlen_t get_total_length() const;
210 
211     Xapian::termcount get_doclength(Xapian::docid did) const;
212 
213     /// Check if term exists.
214     bool term_exists(const string & tname) const;
215 
216     /// Find frequency of term.
217     Xapian::doccount get_termfreq(const string & tname) const;
218 
219     Xapian::termcount get_collection_freq(const string & tname) const;
220 
221     /// Read the value statistics for a value from a remote database.
222     void read_value_stats(Xapian::valueno slot) const;
223     Xapian::doccount get_value_freq(Xapian::valueno slot) const;
224     std::string get_value_lower_bound(Xapian::valueno slot) const;
225     std::string get_value_upper_bound(Xapian::valueno slot) const;
226 
227     Xapian::termcount get_doclength_lower_bound() const;
228     Xapian::termcount get_doclength_upper_bound() const;
229     Xapian::termcount get_wdf_upper_bound(const string & term) const;
230 
231     void commit();
232 
233     void cancel();
234 
235     Xapian::docid add_document(const Xapian::Document & doc);
236 
237     void delete_document(Xapian::docid did);
238     void delete_document(const std::string & unique_term);
239 
240     void replace_document(Xapian::docid did, const Xapian::Document & doc);
241     Xapian::docid replace_document(const std::string & unique_term,
242 				   const Xapian::Document & document);
243 
244     std::string get_uuid() const;
245 
246     string get_metadata(const string & key) const;
247 
248     void set_metadata(const string & key, const string & value);
249 
250     void add_spelling(const std::string&, Xapian::termcount) const;
251 
252     void remove_spelling(const std::string&,  Xapian::termcount freqdec) const;
253 };
254 
255 #endif // XAPIAN_INCLUDED_REMOTE_DATABASE_H
256