1 /** @file 2 * @brief Inverter class which "inverts the file". 3 */ 4 /* Copyright (C) 2009,2010,2013,2014 Olly Betts 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21 #ifndef XAPIAN_INCLUDED_GLASS_INVERTER_H 22 #define XAPIAN_INCLUDED_GLASS_INVERTER_H 23 24 #include "xapian/types.h" 25 26 #include <map> 27 #include <string> 28 #include <vector> 29 30 #include "omassert.h" 31 #include "str.h" 32 #include "xapian/error.h" 33 34 class GlassPostListTable; 35 class GlassPositionListTable; 36 37 namespace Xapian { 38 class TermIterator; 39 } 40 41 /** Magic wdf value used for a deleted posting. */ 42 const Xapian::termcount DELETED_POSTING = Xapian::termcount(-1); 43 44 /** Class which "inverts the file". */ 45 class Inverter { 46 friend class GlassPostListTable; 47 48 /// Class for storing the changes in frequencies for a term. 49 class PostingChanges { 50 friend class GlassPostListTable; 51 52 /// Change in term frequency, 53 Xapian::termcount_diff tf_delta; 54 55 /// Change in collection frequency. 56 Xapian::termcount_diff cf_delta; 57 58 /// Changes to this term's postlist. 59 std::map<Xapian::docid, Xapian::termcount> pl_changes; 60 61 public: 62 /// Constructor for an added posting. PostingChanges(Xapian::docid did,Xapian::termcount wdf)63 PostingChanges(Xapian::docid did, Xapian::termcount wdf) 64 : tf_delta(1), cf_delta(Xapian::termcount_diff(wdf)) 65 { 66 pl_changes.insert(std::make_pair(did, wdf)); 67 } 68 69 /// Constructor for a removed posting. PostingChanges(Xapian::docid did,Xapian::termcount wdf,bool)70 PostingChanges(Xapian::docid did, Xapian::termcount wdf, bool) 71 : tf_delta(-1), cf_delta(-Xapian::termcount_diff(wdf)) 72 { 73 pl_changes.insert(std::make_pair(did, DELETED_POSTING)); 74 } 75 76 /// Constructor for an updated posting. PostingChanges(Xapian::docid did,Xapian::termcount old_wdf,Xapian::termcount new_wdf)77 PostingChanges(Xapian::docid did, Xapian::termcount old_wdf, 78 Xapian::termcount new_wdf) 79 : tf_delta(0), cf_delta(Xapian::termcount_diff(new_wdf - old_wdf)) 80 { 81 pl_changes.insert(std::make_pair(did, new_wdf)); 82 } 83 84 /// Add a posting. add_posting(Xapian::docid did,Xapian::termcount wdf)85 void add_posting(Xapian::docid did, Xapian::termcount wdf) { 86 ++tf_delta; 87 cf_delta += wdf; 88 // Add did to term's postlist 89 pl_changes[did] = wdf; 90 } 91 92 /// Remove a posting. remove_posting(Xapian::docid did,Xapian::termcount wdf)93 void remove_posting(Xapian::docid did, Xapian::termcount wdf) { 94 --tf_delta; 95 cf_delta -= wdf; 96 // Remove did from term's postlist. 97 pl_changes[did] = DELETED_POSTING; 98 } 99 100 /// Update a posting. update_posting(Xapian::docid did,Xapian::termcount old_wdf,Xapian::termcount new_wdf)101 void update_posting(Xapian::docid did, Xapian::termcount old_wdf, 102 Xapian::termcount new_wdf) { 103 cf_delta += new_wdf - old_wdf; 104 pl_changes[did] = new_wdf; 105 } 106 107 /// Get the term frequency delta. get_tfdelta()108 Xapian::termcount_diff get_tfdelta() const { return tf_delta; } 109 110 /// Get the collection frequency delta. get_cfdelta()111 Xapian::termcount_diff get_cfdelta() const { return cf_delta; } 112 }; 113 114 /// Buffered changes to postlists. 115 std::map<std::string, PostingChanges> postlist_changes; 116 117 /// Buffered changes to positional data. 118 std::map<std::string, std::map<Xapian::docid, std::string>> pos_changes; 119 120 void store_positions(const GlassPositionListTable & position_table, 121 Xapian::docid did, 122 const std::string & tname, 123 const std::vector<Xapian::termpos> & posvec, 124 bool modifying); 125 126 void set_positionlist(Xapian::docid did, 127 const std::string & term, 128 const std::string & s); 129 130 public: 131 /// Buffered changes to document lengths. 132 std::map<Xapian::docid, Xapian::termcount> doclen_changes; 133 134 public: add_posting(Xapian::docid did,const std::string & term,Xapian::doccount wdf)135 void add_posting(Xapian::docid did, const std::string & term, 136 Xapian::doccount wdf) { 137 std::map<std::string, PostingChanges>::iterator i; 138 i = postlist_changes.find(term); 139 if (i == postlist_changes.end()) { 140 postlist_changes.insert( 141 std::make_pair(term, PostingChanges(did, wdf))); 142 } else { 143 i->second.add_posting(did, wdf); 144 } 145 } 146 remove_posting(Xapian::docid did,const std::string & term,Xapian::doccount wdf)147 void remove_posting(Xapian::docid did, const std::string & term, 148 Xapian::doccount wdf) { 149 std::map<std::string, PostingChanges>::iterator i; 150 i = postlist_changes.find(term); 151 if (i == postlist_changes.end()) { 152 postlist_changes.insert( 153 std::make_pair(term, PostingChanges(did, wdf, false))); 154 } else { 155 i->second.remove_posting(did, wdf); 156 } 157 } 158 update_posting(Xapian::docid did,const std::string & term,Xapian::termcount old_wdf,Xapian::termcount new_wdf)159 void update_posting(Xapian::docid did, const std::string & term, 160 Xapian::termcount old_wdf, 161 Xapian::termcount new_wdf) { 162 std::map<std::string, PostingChanges>::iterator i; 163 i = postlist_changes.find(term); 164 if (i == postlist_changes.end()) { 165 postlist_changes.insert( 166 std::make_pair(term, PostingChanges(did, old_wdf, new_wdf))); 167 } else { 168 i->second.update_posting(did, old_wdf, new_wdf); 169 } 170 } 171 172 void set_positionlist(const GlassPositionListTable & position_table, 173 Xapian::docid did, 174 const std::string & tname, 175 const Xapian::TermIterator & term, 176 bool modifying = false); 177 178 void delete_positionlist(Xapian::docid did, 179 const std::string & term); 180 181 bool get_positionlist(Xapian::docid did, 182 const std::string & term, 183 std::string & s) const; 184 185 bool has_positions(const GlassPositionListTable & position_table) const; 186 clear()187 void clear() { 188 doclen_changes.clear(); 189 postlist_changes.clear(); 190 pos_changes.clear(); 191 } 192 set_doclength(Xapian::docid did,Xapian::termcount doclen,bool add)193 void set_doclength(Xapian::docid did, Xapian::termcount doclen, bool add) { 194 if (add) { 195 Assert(doclen_changes.find(did) == doclen_changes.end() || doclen_changes[did] == DELETED_POSTING); 196 } 197 doclen_changes[did] = doclen; 198 } 199 delete_doclength(Xapian::docid did)200 void delete_doclength(Xapian::docid did) { 201 Assert(doclen_changes.find(did) == doclen_changes.end() || doclen_changes[did] != DELETED_POSTING); 202 doclen_changes[did] = DELETED_POSTING; 203 } 204 get_doclength(Xapian::docid did,Xapian::termcount & doclen)205 bool get_doclength(Xapian::docid did, Xapian::termcount & doclen) const { 206 std::map<Xapian::docid, Xapian::termcount>::const_iterator i; 207 i = doclen_changes.find(did); 208 if (i == doclen_changes.end()) 209 return false; 210 if (rare(i->second == DELETED_POSTING)) 211 throw Xapian::DocNotFoundError("Document not found: " + str(did)); 212 doclen = i->second; 213 return true; 214 } 215 216 /// Flush document length changes. 217 void flush_doclengths(GlassPostListTable & table); 218 219 /// Flush postlist changes for @a term. 220 void flush_post_list(GlassPostListTable & table, const std::string & term); 221 222 /// Flush postlist changes for all terms. 223 void flush_all_post_lists(GlassPostListTable & table); 224 225 /// Flush postlist changes for all terms which start with @a pfx. 226 void flush_post_lists(GlassPostListTable & table, const std::string & pfx); 227 228 /// Flush all postlist table changes. 229 void flush(GlassPostListTable & table); 230 231 /// Flush position changes. 232 void flush_pos_lists(GlassPositionListTable & table); 233 get_deltas(const std::string & term,Xapian::termcount_diff & tf_delta,Xapian::termcount_diff & cf_delta)234 bool get_deltas(const std::string & term, 235 Xapian::termcount_diff & tf_delta, 236 Xapian::termcount_diff & cf_delta) const { 237 std::map<std::string, PostingChanges>::const_iterator i; 238 i = postlist_changes.find(term); 239 if (i == postlist_changes.end()) { 240 return false; 241 } 242 tf_delta = i->second.get_tfdelta(); 243 cf_delta = i->second.get_cfdelta(); 244 return true; 245 } 246 }; 247 248 #endif // XAPIAN_INCLUDED_GLASS_INVERTER_H 249