1 /** @file brass_synonym.cc
2 * @brief Synonym data for a brass database.
3 */
4 /* Copyright (C) 2004,2005,2006,2007,2008,2009 Olly Betts
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <config.h>
22 #include "brass_synonym.h"
23
24 #include "xapian/error.h"
25
26 #include "brass_cursor.h"
27 #include "debuglog.h"
28 #include "stringutils.h"
29 #include "vectortermlist.h"
30
31 #include <set>
32 #include <string>
33 #include <vector>
34
35 using namespace std;
36
37 // We XOR the length values with this so that they are more likely to coincide
38 // with lower case ASCII letters, which are likely to be common. This means
39 // that zlib should do a better job of compressing tag values.
40 #define MAGIC_XOR_VALUE 96
41
42 void
merge_changes()43 BrassSynonymTable::merge_changes()
44 {
45 if (last_term.empty()) return;
46
47 if (last_synonyms.empty()) {
48 del(last_term);
49 } else {
50 string tag;
51
52 set<string>::const_iterator i;
53 for (i = last_synonyms.begin(); i != last_synonyms.end(); ++i) {
54 const string & synonym = *i;
55 tag += byte(synonym.size() ^ MAGIC_XOR_VALUE);
56 tag += synonym;
57 }
58
59 add(last_term, tag);
60 last_synonyms.clear();
61 }
62 last_term.resize(0);
63 }
64
65 void
add_synonym(const string & term,const string & synonym)66 BrassSynonymTable::add_synonym(const string & term, const string & synonym)
67 {
68 if (last_term != term) {
69 merge_changes();
70 last_term = term;
71
72 string tag;
73 if (get_exact_entry(term, tag)) {
74 const char * p = tag.data();
75 const char * end = p + tag.size();
76 while (p != end) {
77 size_t len;
78 if (p == end ||
79 (len = byte(*p) ^ MAGIC_XOR_VALUE) >= size_t(end - p))
80 throw Xapian::DatabaseCorruptError("Bad synonym data");
81 ++p;
82 last_synonyms.insert(string(p, len));
83 p += len;
84 }
85 }
86 }
87
88 last_synonyms.insert(synonym);
89 }
90
91 void
remove_synonym(const string & term,const string & synonym)92 BrassSynonymTable::remove_synonym(const string & term, const string & synonym)
93 {
94 if (last_term != term) {
95 merge_changes();
96 last_term = term;
97
98 string tag;
99 if (get_exact_entry(term, tag)) {
100 const char * p = tag.data();
101 const char * end = p + tag.size();
102 while (p != end) {
103 size_t len;
104 if (p == end ||
105 (len = byte(*p) ^ MAGIC_XOR_VALUE) >= size_t(end - p))
106 throw Xapian::DatabaseCorruptError("Bad synonym data");
107 ++p;
108 last_synonyms.insert(string(p, len));
109 p += len;
110 }
111 }
112 }
113
114 last_synonyms.erase(synonym);
115 }
116
117 void
clear_synonyms(const string & term)118 BrassSynonymTable::clear_synonyms(const string & term)
119 {
120 // We don't actually ever need to merge_changes() here, but it's quite
121 // likely that someone might clear_synonyms() and then add_synonym() for
122 // the same term. The alternative we could otherwise optimise for (modify
123 // synonyms for a term, then clear those for another, then modify those for
124 // the first term again) seems much less likely.
125 if (last_term == term) {
126 last_synonyms.clear();
127 } else {
128 merge_changes();
129 last_term = term;
130 }
131 }
132
133 TermList *
open_termlist(const string & term)134 BrassSynonymTable::open_termlist(const string & term)
135 {
136 vector<string> synonyms;
137
138 if (last_term == term) {
139 if (last_synonyms.empty()) return NULL;
140
141 synonyms.reserve(last_synonyms.size());
142 set<string>::const_iterator i;
143 for (i = last_synonyms.begin(); i != last_synonyms.end(); ++i) {
144 synonyms.push_back(*i);
145 }
146 } else {
147 string tag;
148 if (!get_exact_entry(term, tag)) return NULL;
149
150 const char * p = tag.data();
151 const char * end = p + tag.size();
152 while (p != end) {
153 size_t len;
154 if (p == end ||
155 (len = byte(*p) ^ MAGIC_XOR_VALUE) >= size_t(end - p))
156 throw Xapian::DatabaseCorruptError("Bad synonym data");
157 ++p;
158 synonyms.push_back(string(p, len));
159 p += len;
160 }
161 }
162
163 return new VectorTermList(synonyms.begin(), synonyms.end());
164 }
165
166 ///////////////////////////////////////////////////////////////////////////
167
~BrassSynonymTermList()168 BrassSynonymTermList::~BrassSynonymTermList()
169 {
170 LOGCALL_DTOR(DB, "BrassSynonymTermList");
171 delete cursor;
172 }
173
174 string
get_termname() const175 BrassSynonymTermList::get_termname() const
176 {
177 LOGCALL(DB, string, "BrassSynonymTermList::get_termname", NO_ARGS);
178 Assert(cursor);
179 Assert(!cursor->current_key.empty());
180 Assert(!at_end());
181 RETURN(cursor->current_key);
182 }
183
184 Xapian::doccount
get_termfreq() const185 BrassSynonymTermList::get_termfreq() const
186 {
187 throw Xapian::InvalidOperationError("BrassSynonymTermList::get_termfreq() not meaningful");
188 }
189
190 Xapian::termcount
get_collection_freq() const191 BrassSynonymTermList::get_collection_freq() const
192 {
193 throw Xapian::InvalidOperationError("BrassSynonymTermList::get_collection_freq() not meaningful");
194 }
195
196 TermList *
next()197 BrassSynonymTermList::next()
198 {
199 LOGCALL(DB, TermList *, "BrassSynonymTermList::next", NO_ARGS);
200 Assert(!at_end());
201
202 cursor->next();
203 if (!cursor->after_end() && !startswith(cursor->current_key, prefix)) {
204 // We've reached the end of the end of the prefixed terms.
205 cursor->to_end();
206 }
207
208 RETURN(NULL);
209 }
210
211 TermList *
skip_to(const string & tname)212 BrassSynonymTermList::skip_to(const string &tname)
213 {
214 LOGCALL(DB, TermList *, "BrassSynonymTermList::skip_to", tname);
215 Assert(!at_end());
216
217 if (!cursor->find_entry_ge(tname)) {
218 // The exact term we asked for isn't there, so check if the next
219 // term after it also has the right prefix.
220 if (!cursor->after_end() && !startswith(cursor->current_key, prefix)) {
221 // We've reached the end of the prefixed terms.
222 cursor->to_end();
223 }
224 }
225 RETURN(NULL);
226 }
227
228 bool
at_end() const229 BrassSynonymTermList::at_end() const
230 {
231 LOGCALL(DB, bool, "BrassSynonymTermList::at_end", NO_ARGS);
232 RETURN(cursor->after_end());
233 }
234