1 /** @file chert_values.h
2  * @brief ChertValueManager class
3  */
4 /* Copyright (C) 2008 Olly Betts
5  * Copyright (C) 2008 Lemur Consulting Ltd
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
20  */
21 
22 #ifndef XAPIAN_INCLUDED_CHERT_VALUES_H
23 #define XAPIAN_INCLUDED_CHERT_VALUES_H
24 
25 #include "pack.h"
26 #include "valuestats.h"
27 
28 #include "xapian/error.h"
29 #include "xapian/types.h"
30 
31 #include "autoptr.h"
32 #include <map>
33 #include <string>
34 
35 class ChertCursor;
36 
37 /** Generate a key for a value stream chunk. */
38 inline std::string
make_valuechunk_key(Xapian::valueno slot,Xapian::docid did)39 make_valuechunk_key(Xapian::valueno slot, Xapian::docid did)
40 {
41     std::string key("\0\xd8", 2);
42     pack_uint(key, slot);
43     pack_uint_preserving_sort(key, did);
44     return key;
45 }
46 
47 inline Xapian::docid
docid_from_key(Xapian::valueno required_slot,const std::string & key)48 docid_from_key(Xapian::valueno required_slot, const std::string & key)
49 {
50     const char * p = key.data();
51     const char * end = p + key.length();
52     // Fail if not a value chunk key.
53     if (end - p < 2 || *p++ != '\0' || *p++ != '\xd8') return 0;
54     Xapian::valueno slot;
55     if (!unpack_uint(&p, end, &slot))
56        	throw Xapian::DatabaseCorruptError("bad value key");
57     // Fail if for a different slot.
58     if (slot != required_slot) return 0;
59     Xapian::docid did;
60     if (!unpack_uint_preserving_sort(&p, end, &did))
61        	throw Xapian::DatabaseCorruptError("bad value key");
62     return did;
63 }
64 
65 namespace Xapian {
66     class Document;
67 }
68 
69 class ChertPostListTable;
70 class ChertTermListTable;
71 struct ValueStats;
72 
73 class ChertValueManager {
74     /** The value number for the most recently used value statistics.
75      *
76      *  Set to Xapian::BAD_VALUENO if no value statistics are currently
77      *  cached.
78      */
79     mutable Xapian::valueno mru_slot;
80 
81     /** The most recently used value statistics. */
82     mutable ValueStats mru_valstats;
83 
84     ChertPostListTable * postlist_table;
85 
86     ChertTermListTable * termlist_table;
87 
88     std::map<Xapian::docid, std::string> slots;
89 
90     std::map<Xapian::valueno, std::map<Xapian::docid, std::string> > changes;
91 
92     mutable AutoPtr<ChertCursor> cursor;
93 
94     void add_value(Xapian::docid did, Xapian::valueno slot,
95 		   const std::string & val);
96 
97     void remove_value(Xapian::docid did, Xapian::valueno slot);
98 
99     Xapian::docid get_chunk_containing_did(Xapian::valueno slot,
100 					   Xapian::docid did,
101 					   std::string &chunk) const;
102 
103     /** Get the statistics for value slot @a slot. */
104     void get_value_stats(Xapian::valueno slot) const;
105 
106     void get_value_stats(Xapian::valueno slot, ValueStats & stats) const;
107 
108   public:
109     /** Create a new ChertValueManager object. */
ChertValueManager(ChertPostListTable * postlist_table_,ChertTermListTable * termlist_table_)110     ChertValueManager(ChertPostListTable * postlist_table_,
111 		      ChertTermListTable * termlist_table_)
112 	: mru_slot(Xapian::BAD_VALUENO),
113 	  postlist_table(postlist_table_),
114 	  termlist_table(termlist_table_) { }
115 
116     // Merge in batched-up changes.
117     void merge_changes();
118 
119     void add_document(Xapian::docid did, const Xapian::Document &doc,
120 		      std::map<Xapian::valueno, ValueStats> & value_stats);
121 
122     void delete_document(Xapian::docid did,
123 			 std::map<Xapian::valueno, ValueStats> & value_stats);
124 
125     void replace_document(Xapian::docid did, const Xapian::Document &doc,
126 			  std::map<Xapian::valueno, ValueStats> & value_stats);
127 
128     std::string get_value(Xapian::docid did, Xapian::valueno slot) const;
129 
130     void get_all_values(std::map<Xapian::valueno, std::string> & values,
131 			Xapian::docid did) const;
132 
get_value_freq(Xapian::valueno slot)133     Xapian::doccount get_value_freq(Xapian::valueno slot) const {
134 	if (mru_slot != slot) get_value_stats(slot);
135 	return mru_valstats.freq;
136     }
137 
get_value_lower_bound(Xapian::valueno slot)138     std::string get_value_lower_bound(Xapian::valueno slot) const {
139 	if (mru_slot != slot) get_value_stats(slot);
140 	return mru_valstats.lower_bound;
141     }
142 
get_value_upper_bound(Xapian::valueno slot)143     std::string get_value_upper_bound(Xapian::valueno slot) const {
144 	if (mru_slot != slot) get_value_stats(slot);
145 	return mru_valstats.upper_bound;
146     }
147 
148     /** Write the updated statistics to the table.
149      *
150      *  If the @a freq member of the statistics for a particular slot is 0, the
151      *  statistics for that slot will be cleared.
152      *
153      *  @param value_stats The statistics to set.
154      */
155     void set_value_stats(std::map<Xapian::valueno, ValueStats> & value_stats);
156 
reset()157     void reset() {
158 	/// Ignore any old cached valuestats.
159 	mru_slot = Xapian::BAD_VALUENO;
160     }
161 
is_modified()162     bool is_modified() const {
163 	return !changes.empty();
164     }
165 
cancel()166     void cancel() {
167 	// Discard batched-up changes.
168 	slots.clear();
169 	changes.clear();
170     }
171 };
172 
173 class ValueChunkReader {
174     const char *p;
175     const char *end;
176 
177     Xapian::docid did;
178 
179     std::string value;
180 
181   public:
182     /// Create a ValueChunkReader which is already at_end().
ValueChunkReader()183     ValueChunkReader() : p(NULL) { }
184 
ValueChunkReader(const char * p_,size_t len,Xapian::docid did_)185     ValueChunkReader(const char * p_, size_t len, Xapian::docid did_) {
186 	assign(p_, len, did_);
187     }
188 
189     void assign(const char * p_, size_t len, Xapian::docid did_);
190 
at_end()191     bool at_end() const { return p == NULL; }
192 
get_docid()193     Xapian::docid get_docid() const { return did; }
194 
get_value()195     const std::string & get_value() const { return value; }
196 
197     void next();
198 
199     void skip_to(Xapian::docid target);
200 };
201 
202 #endif // XAPIAN_INCLUDED_CHERT_VALUES_H
203