1 /** @file
2  *  @brief RemoteDatabase is the baseclass for remote database implementations.
3  */
4 /* Copyright (C) 2006,2007,2009,2010,2011,2014,2015,2019,2020 Olly Betts
5  * Copyright (C) 2007,2009,2010 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License as
9  * published by the Free Software Foundation; either version 2 of the
10  * License, or (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
20  */
21 
22 #ifndef XAPIAN_INCLUDED_REMOTE_DATABASE_H
23 #define XAPIAN_INCLUDED_REMOTE_DATABASE_H
24 
25 #include "backends/backends.h"
26 #include "backends/database.h"
27 #include "api/omenquireinternal.h"
28 #include "api/queryinternal.h"
29 #include "net/remoteconnection.h"
30 #include "backends/valuestats.h"
31 #include "xapian/weight.h"
32 
33 namespace Xapian {
34     class RSet;
35 }
36 
37 class NetworkPostList;
38 
39 /** RemoteDatabase is the baseclass for remote database implementations.
40  *
41  *  A subclass of this class is required which opens a TCP connection or
42  *  pipe to the remote database server.  This subclass works in combination
43  *  with the RemoteSubMatch class during the match process.
44  */
45 class RemoteDatabase : public Xapian::Database::Internal {
46     /// Don't allow assignment.
47     void operator=(const RemoteDatabase &);
48 
49     /// Don't allow copying.
50     RemoteDatabase(const RemoteDatabase &);
51 
52     /// The object which does the I/O.
53     mutable OwnedRemoteConnection link;
54 
55     /// The remote document count, given at open.
56     mutable Xapian::doccount doccount;
57 
58     /// The remote last docid, given at open.
59     mutable Xapian::docid lastdocid;
60 
61     /// A lower bound on the smallest document length in this database.
62     mutable Xapian::termcount doclen_lbound;
63 
64     /// An upper bound on the greatest document length in this database.
65     mutable Xapian::termcount doclen_ubound;
66 
67     /// The total length of all documents in this database.
68     mutable Xapian::totallength total_length;
69 
70     /// Has positional information?
71     mutable bool has_positional_info;
72 
73     /** Are we currently expecting a reply?
74      *
75      *  Our caller might send a message but then an exception (from another
76      *  shard or locally) might cause it not to try to read the reply before
77      *  sending another message.  This flag allows us to detect that situation
78      *  and discard the unwanted reply rather than trying to read it as the
79      *  response to the new message.
80      *
81      *  Unhelpfully the remote protocol in 1.4.x can send REPLY_DOCLENGTH in
82      *  response to MSG_DOCLENGTH (when it's a final reply) or in response to
83      *  MSG_TERMLIST (when further replies are expected).  To allow use to
84      *  distinguish these cases, pending_reply is set to the MSG_* code, or
85      *  -1 if we're not currently expecting a reply.
86      */
87     mutable int pending_reply = -1;
88 
89     /// The UUID of the remote database.
90     mutable string uuid;
91 
92     /// The context to return with any error messages
93     string context;
94 
95     mutable bool cached_stats_valid;
96 
97     /** The most recently used value statistics. */
98     mutable ValueStats mru_valstats;
99 
100     /** The value slot for the most recently used value statistics.
101      *
102      *  Set to BAD_VALUENO if no value statistics have yet been looked up.
103      */
104     mutable Xapian::valueno mru_slot;
105 
106     /** True if there are (or may be) uncommitted changes.
107      *
108      *  Used to optimise away commit()/cancel() calls.  These can be explicit,
109      *  but also can happen implicitly when the WritableDatabase destructor is
110      *  called.
111      */
112     mutable bool uncommitted_changes = false;
113 
114     bool update_stats(message_type msg_code = MSG_UPDATE,
115 		      const std::string & body = std::string()) const;
116 
117   protected:
118     /** Constructor.  The constructor is protected so that raw instances
119      *  can't be created - a derived class must be instantiated which
120      *  has code in the constructor to open the socket.
121      *
122      *  @param fd	The file descriptor for the connection to the server.
123      *  @param timeout_ The timeout used with the network operations.
124      *			Generally a Xapian::NetworkTimeoutError exception will
125      *			be thrown if the remote end doesn't respond for this
126      *			length of time (in seconds).  A timeout of 0 means that
127      *			operations will never timeout.
128      *  @param context_ The context to return with any error messages.
129      *	@param writable	Is this a WritableDatabase?
130      *	@param flags	Xapian::DB_RETRY_LOCK or 0.
131      */
132     RemoteDatabase(int fd, double timeout_, const string & context_,
133 		   bool writable, int flags);
134 
135     /// Receive a message from the server.
136     reply_type get_message(std::string& message,
137 			   reply_type required_type,
138 			   reply_type required_type2) const;
139 
get_message(std::string & message,reply_type required_type)140     void get_message(std::string& message,
141 		     reply_type required_type) const {
142 	(void)get_message(message, required_type, required_type);
143     }
144 
get_message_or_done(std::string & message,reply_type required_type)145     bool get_message_or_done(std::string& message,
146 			     reply_type required_type) const {
147 	return get_message(message, required_type, REPLY_DONE) != REPLY_DONE;
148     }
149 
150     /// Send a message to the server.
151     void send_message(message_type type, const string & data) const;
152 
153     /// Close the socket
154     void do_close();
155 
156     bool get_posting(Xapian::docid &did, double &w, string &value);
157 
158     /// The timeout value used in network communications, in seconds.
159     double timeout;
160 
161   public:
162     /// Send a keep-alive message.
163     void keep_alive();
164 
165     /** Set the query
166      *
167      * @param query			The query.
168      * @param qlen			The query length.
169      * @param collapse_max		Max number of items with the same key
170      *					to leave after collapsing (0 for don't
171      *					collapse).
172      * @param collapse_key		The value number to collapse matches on.
173      * @param order			Sort order for docids.
174      * @param sort_key			The value number to sort on.
175      * @param sort_by			Which order to apply sorts in.
176      * @param sort_value_forward	Sort order for values.
177      * @param time_limit_		Seconds to reduce check_at_least after
178      *					(or <= 0 for no limit).
179      * @param percent_cutoff		Percentage cutoff.
180      * @param weight_cutoff		Weight cutoff.
181      * @param wtscheme			Weighting scheme.
182      * @param omrset			The rset.
183      * @param matchspies                The matchspies to use.
184      */
185     void set_query(const Xapian::Query& query,
186 		   Xapian::termcount qlen,
187 		   Xapian::doccount collapse_max,
188 		   Xapian::valueno collapse_key,
189 		   Xapian::Enquire::docid_order order,
190 		   Xapian::valueno sort_key,
191 		   Xapian::Enquire::Internal::sort_setting sort_by,
192 		   bool sort_value_forward,
193 		   double time_limit,
194 		   int percent_cutoff, double weight_cutoff,
195 		   const Xapian::Weight *wtscheme,
196 		   const Xapian::RSet &omrset,
197 		   const vector<Xapian::Internal::opt_intrusive_ptr<Xapian::MatchSpy>> & matchspies);
198 
199     /** Get the stats from the remote server.
200      *
201      *  @return	true if we got the remote stats; false if we should try again.
202      */
203     bool get_remote_stats(bool nowait, Xapian::Weight::Internal &out);
204 
205     /// Send the global stats to the remote server.
206     void send_global_stats(Xapian::doccount first,
207 			   Xapian::doccount maxitems,
208 			   Xapian::doccount check_at_least,
209 			   const Xapian::Weight::Internal &stats);
210 
211     /// Get the MSet from the remote server.
212     void get_mset(Xapian::MSet &mset,
213 		  const vector<Xapian::Internal::opt_intrusive_ptr<Xapian::MatchSpy>> & matchspies);
214 
215     /// Get remote metadata key list.
216     TermList * open_metadata_keylist(const std::string & prefix) const;
217 
218     /// Get remote termlist.
219     TermList * open_term_list(Xapian::docid did) const;
220 
221     /// Iterate all terms.
222     TermList * open_allterms(const string & prefix) const;
223 
224     bool has_positions() const;
225 
226     bool reopen();
227 
228     void close();
229 
230     LeafPostList * open_post_list(const string & tname) const;
231 
232     Xapian::doccount read_post_list(const string &term, NetworkPostList & pl) const;
233 
234     PositionList * open_position_list(Xapian::docid did,
235 				      const string & tname) const;
236 
237     /// Get a remote document.
238     Xapian::Document::Internal * open_document(Xapian::docid did, bool lazy) const;
239 
240     /// Get the document count.
241     Xapian::doccount get_doccount() const;
242 
243     /// Get the last used docid.
244     Xapian::docid get_lastdocid() const;
245 
246     Xapian::totallength get_total_length() const;
247 
248     Xapian::termcount get_doclength(Xapian::docid did) const;
249     Xapian::termcount get_unique_terms(Xapian::docid did) const;
250 
251     /// Check if term exists.
252     bool term_exists(const string & tname) const;
253 
254     void get_freqs(const string & term,
255 		   Xapian::doccount * termfreq_ptr,
256 		   Xapian::termcount * collfreq_ptr) const;
257 
258     /// Read the value statistics for a value from a remote database.
259     void read_value_stats(Xapian::valueno slot) const;
260     Xapian::doccount get_value_freq(Xapian::valueno slot) const;
261     std::string get_value_lower_bound(Xapian::valueno slot) const;
262     std::string get_value_upper_bound(Xapian::valueno slot) const;
263 
264     Xapian::termcount get_doclength_lower_bound() const;
265     Xapian::termcount get_doclength_upper_bound() const;
266     Xapian::termcount get_wdf_upper_bound(const string & term) const;
267 
268     void commit();
269 
270     void cancel();
271 
272     Xapian::docid add_document(const Xapian::Document & doc);
273 
274     void delete_document(Xapian::docid did);
275     void delete_document(const std::string & unique_term);
276 
277     void replace_document(Xapian::docid did, const Xapian::Document & doc);
278     Xapian::docid replace_document(const std::string & unique_term,
279 				   const Xapian::Document & document);
280 
281     std::string get_uuid() const;
282 
283     string get_metadata(const string & key) const;
284 
285     void set_metadata(const string & key, const string & value);
286 
287     void add_spelling(const std::string&, Xapian::termcount) const;
288 
289     void remove_spelling(const std::string&, Xapian::termcount freqdec) const;
290 
get_backend_info(string * path)291     int get_backend_info(string * path) const {
292 	if (path) *path = context;
293 	return BACKEND_REMOTE;
294     }
295 
296     bool locked() const;
297 };
298 
299 #endif // XAPIAN_INCLUDED_REMOTE_DATABASE_H
300