1 /** @file brass_synonym.cc
2  * @brief Synonym data for a brass database.
3  */
4 /* Copyright (C) 2004,2005,2006,2007,2008,2009 Olly Betts
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
19  */
20 
21 #include <config.h>
22 #include "brass_synonym.h"
23 
24 #include "xapian/error.h"
25 
26 #include "brass_cursor.h"
27 #include "debuglog.h"
28 #include "stringutils.h"
29 #include "vectortermlist.h"
30 
31 #include <set>
32 #include <string>
33 #include <vector>
34 
35 using namespace std;
36 
37 // We XOR the length values with this so that they are more likely to coincide
38 // with lower case ASCII letters, which are likely to be common.  This means
39 // that zlib should do a better job of compressing tag values.
40 #define MAGIC_XOR_VALUE 96
41 
42 void
merge_changes()43 BrassSynonymTable::merge_changes()
44 {
45     if (last_term.empty()) return;
46 
47     if (last_synonyms.empty()) {
48 	del(last_term);
49     } else {
50 	string tag;
51 
52 	set<string>::const_iterator i;
53 	for (i = last_synonyms.begin(); i != last_synonyms.end(); ++i) {
54 	    const string & synonym = *i;
55 	    tag += byte(synonym.size() ^ MAGIC_XOR_VALUE);
56 	    tag += synonym;
57 	}
58 
59 	add(last_term, tag);
60 	last_synonyms.clear();
61     }
62     last_term.resize(0);
63 }
64 
65 void
add_synonym(const string & term,const string & synonym)66 BrassSynonymTable::add_synonym(const string & term, const string & synonym)
67 {
68     if (last_term != term) {
69 	merge_changes();
70 	last_term = term;
71 
72 	string tag;
73 	if (get_exact_entry(term, tag)) {
74 	    const char * p = tag.data();
75 	    const char * end = p + tag.size();
76 	    while (p != end) {
77 		size_t len;
78 		if (p == end ||
79 		    (len = byte(*p) ^ MAGIC_XOR_VALUE) >= size_t(end - p))
80 		    throw Xapian::DatabaseCorruptError("Bad synonym data");
81 		++p;
82 		last_synonyms.insert(string(p, len));
83 		p += len;
84 	    }
85 	}
86     }
87 
88     last_synonyms.insert(synonym);
89 }
90 
91 void
remove_synonym(const string & term,const string & synonym)92 BrassSynonymTable::remove_synonym(const string & term, const string & synonym)
93 {
94     if (last_term != term) {
95 	merge_changes();
96 	last_term = term;
97 
98 	string tag;
99 	if (get_exact_entry(term, tag)) {
100 	    const char * p = tag.data();
101 	    const char * end = p + tag.size();
102 	    while (p != end) {
103 		size_t len;
104 		if (p == end ||
105 		    (len = byte(*p) ^ MAGIC_XOR_VALUE) >= size_t(end - p))
106 		    throw Xapian::DatabaseCorruptError("Bad synonym data");
107 		++p;
108 		last_synonyms.insert(string(p, len));
109 		p += len;
110 	    }
111 	}
112     }
113 
114     last_synonyms.erase(synonym);
115 }
116 
117 void
clear_synonyms(const string & term)118 BrassSynonymTable::clear_synonyms(const string & term)
119 {
120     // We don't actually ever need to merge_changes() here, but it's quite
121     // likely that someone might clear_synonyms() and then add_synonym() for
122     // the same term.  The alternative we could otherwise optimise for (modify
123     // synonyms for a term, then clear those for another, then modify those for
124     // the first term again) seems much less likely.
125     if (last_term == term) {
126 	last_synonyms.clear();
127     } else {
128 	merge_changes();
129 	last_term = term;
130     }
131 }
132 
133 TermList *
open_termlist(const string & term)134 BrassSynonymTable::open_termlist(const string & term)
135 {
136     vector<string> synonyms;
137 
138     if (last_term == term) {
139 	if (last_synonyms.empty()) return NULL;
140 
141 	synonyms.reserve(last_synonyms.size());
142 	set<string>::const_iterator i;
143 	for (i = last_synonyms.begin(); i != last_synonyms.end(); ++i) {
144 	    synonyms.push_back(*i);
145 	}
146     } else {
147 	string tag;
148 	if (!get_exact_entry(term, tag)) return NULL;
149 
150 	const char * p = tag.data();
151 	const char * end = p + tag.size();
152 	while (p != end) {
153 	    size_t len;
154 	    if (p == end ||
155 		(len = byte(*p) ^ MAGIC_XOR_VALUE) >= size_t(end - p))
156 		throw Xapian::DatabaseCorruptError("Bad synonym data");
157 	    ++p;
158 	    synonyms.push_back(string(p, len));
159 	    p += len;
160 	}
161     }
162 
163     return new VectorTermList(synonyms.begin(), synonyms.end());
164 }
165 
166 ///////////////////////////////////////////////////////////////////////////
167 
~BrassSynonymTermList()168 BrassSynonymTermList::~BrassSynonymTermList()
169 {
170     LOGCALL_DTOR(DB, "BrassSynonymTermList");
171     delete cursor;
172 }
173 
174 string
get_termname() const175 BrassSynonymTermList::get_termname() const
176 {
177     LOGCALL(DB, string, "BrassSynonymTermList::get_termname", NO_ARGS);
178     Assert(cursor);
179     Assert(!cursor->current_key.empty());
180     Assert(!at_end());
181     RETURN(cursor->current_key);
182 }
183 
184 Xapian::doccount
get_termfreq() const185 BrassSynonymTermList::get_termfreq() const
186 {
187     throw Xapian::InvalidOperationError("BrassSynonymTermList::get_termfreq() not meaningful");
188 }
189 
190 Xapian::termcount
get_collection_freq() const191 BrassSynonymTermList::get_collection_freq() const
192 {
193     throw Xapian::InvalidOperationError("BrassSynonymTermList::get_collection_freq() not meaningful");
194 }
195 
196 TermList *
next()197 BrassSynonymTermList::next()
198 {
199     LOGCALL(DB, TermList *, "BrassSynonymTermList::next", NO_ARGS);
200     Assert(!at_end());
201 
202     cursor->next();
203     if (!cursor->after_end() && !startswith(cursor->current_key, prefix)) {
204 	// We've reached the end of the end of the prefixed terms.
205 	cursor->to_end();
206     }
207 
208     RETURN(NULL);
209 }
210 
211 TermList *
skip_to(const string & tname)212 BrassSynonymTermList::skip_to(const string &tname)
213 {
214     LOGCALL(DB, TermList *, "BrassSynonymTermList::skip_to", tname);
215     Assert(!at_end());
216 
217     if (!cursor->find_entry_ge(tname)) {
218 	// The exact term we asked for isn't there, so check if the next
219 	// term after it also has the right prefix.
220 	if (!cursor->after_end() && !startswith(cursor->current_key, prefix)) {
221 	    // We've reached the end of the prefixed terms.
222 	    cursor->to_end();
223 	}
224     }
225     RETURN(NULL);
226 }
227 
228 bool
at_end() const229 BrassSynonymTermList::at_end() const
230 {
231     LOGCALL(DB, bool, "BrassSynonymTermList::at_end", NO_ARGS);
232     RETURN(cursor->after_end());
233 }
234