1 //
2 // Fuzzy.cc
3 //
4 // Fuzzy: This is the base class for all the different types of fuzzy searches.
5 // We only define the interface.
6 //
7 // There are two main uses of classes derived from this class:
8 // 1) Creation of a fuzzy index
9 // 2) Searching for a word using the fuzzy index
10 //
11 // Part of the ht://Dig package <http://www.htdig.org/>
12 // Copyright (c) 1995-2004 The ht://Dig Group
13 // For copyright details, see the file COPYING in your distribution
14 // or the GNU Library General Public License (LGPL) version 2 or later
15 // <http://www.gnu.org/copyleft/lgpl.html>
16 //
17 // $Id: Fuzzy.cc,v 1.20 2004/05/28 13:15:20 lha Exp $
18 //
19
20 #ifdef HAVE_CONFIG_H
21 #include "htconfig.h"
22 #endif /* HAVE_CONFIG_H */
23
24 #include <fcntl.h>
25
26 #include "Fuzzy.h"
27 #include "htfuzzy.h"
28 #include "HtConfiguration.h"
29 #include "List.h"
30 #include "StringList.h"
31 #include "Endings.h"
32 #include "Exact.h"
33 #include "Metaphone.h"
34 #include "Soundex.h"
35 #include "Synonym.h"
36 #include "Substring.h"
37 #include "Prefix.h"
38 #include "Regexp.h"
39 #include "Speling.h"
40 #include "Accents.h"
41
42 //*****************************************************************************
43 // Fuzzy::Fuzzy(const HtConfiguration& config)
44 //
Fuzzy(const HtConfiguration & config_arg)45 Fuzzy::Fuzzy(const HtConfiguration& config_arg) :
46 config(config_arg)
47 {
48 dict = 0;
49 index = 0;
50 }
51
52
53 //*****************************************************************************
54 // Fuzzy::~Fuzzy()
55 //
~Fuzzy()56 Fuzzy::~Fuzzy()
57 {
58 if (index)
59 {
60 index->Close();
61 delete index;
62 index = 0;
63 }
64 delete dict;
65 }
66
67
68 //*****************************************************************************
69 // void Fuzzy::getWords(char *word, List &words)
70 //
71 void
getWords(char * word,List & words)72 Fuzzy::getWords(char *word, List &words)
73 {
74 if (!index)
75 return;
76 if (!word || !*word)
77 return;
78
79 //
80 // Convert the word to a fuzzy key
81 //
82 String fuzzyKey;
83 String data;
84 String stripped = word;
85 HtStripPunctuation(stripped);
86 generateKey(stripped, fuzzyKey);
87 if (debug > 2)
88 cout << "\n\tkey: " << fuzzyKey << endl;
89
90 words.Destroy();
91
92 if (index->Get(fuzzyKey, data) == OK)
93 {
94 //
95 // Found the entry
96 //
97 char *token = strtok(data.get(), " ");
98 while (token)
99 {
100 if (mystrcasecmp(token, word) != 0)
101 {
102 words.Add(new String(token));
103 }
104 token = strtok(0, " ");
105 }
106 }
107 else
108 {
109 //
110 // The key wasn't found.
111 //
112 }
113 }
114
115
116 //*****************************************************************************
117 // int Fuzzy::openIndex(const HtConfiguration &config)
118 //
119 int
openIndex()120 Fuzzy::openIndex()
121 {
122 String var = name;
123 var << "_db";
124 const String filename = config[var];
125
126 index = Database::getDatabaseInstance(DB_HASH);
127 if (index->OpenRead(filename) == NOTOK)
128 {
129 delete index;
130 index = 0;
131 return NOTOK;
132 }
133
134 return OK;
135 }
136
137
138 //*****************************************************************************
139 // int Fuzzy::writeDB(HtConfiguration &config)
140 //
141 int
writeDB()142 Fuzzy::writeDB()
143 {
144 String var = name;
145 var << "_db";
146 const String filename = config[var];
147
148 index = Database::getDatabaseInstance(DB_HASH);
149 if (index->OpenReadWrite(filename, 0664) == NOTOK)
150 return NOTOK;
151
152 String *s;
153 char *fuzzyKey;
154
155 int count = 0;
156
157 dict->Start_Get();
158 while ((fuzzyKey = dict->Get_Next()))
159 {
160 s = (String *) dict->Find(fuzzyKey);
161 index->Put(fuzzyKey, *s);
162
163 if (debug > 1)
164 {
165 cout << "htfuzzy: '" << fuzzyKey << "' ==> '" << s->get() << "'\n";
166 }
167 count++;
168 if ((count % 100) == 0 && debug == 1)
169 {
170 cout << "htfuzzy: keys: " << count << '\n';
171 cout.flush();
172 }
173 }
174 if (debug == 1)
175 {
176 cout << "htfuzzy:Total keys: " << count << "\n";
177 }
178 return OK;
179 }
180
181
182 //*****************************************************************************
183 // Fuzzy algorithm factory.
184 //
185 Fuzzy *
getFuzzyByName(char * name,const HtConfiguration & config)186 Fuzzy::getFuzzyByName(char *name, const HtConfiguration& config)
187 {
188 if (mystrcasecmp(name, "exact") == 0)
189 return new Exact(config);
190 else if (mystrcasecmp(name, "soundex") == 0)
191 return new Soundex(config);
192 else if (mystrcasecmp(name, "metaphone") == 0)
193 return new Metaphone(config);
194 else if (mystrcasecmp(name, "accents") == 0)
195 return new Accents(config);
196 else if (mystrcasecmp(name, "endings") == 0)
197 return new Endings(config);
198 else if (mystrcasecmp(name, "synonyms") == 0)
199 return new Synonym(config);
200 else if (mystrcasecmp(name, "substring") == 0)
201 return new Substring(config);
202 else if (mystrcasecmp(name, "prefix") == 0)
203 return new Prefix(config);
204 else if (mystrcasecmp(name, "regex") == 0)
205 return new Regexp(config);
206 else if (mystrcasecmp(name, "speling") == 0)
207 return new Speling(config);
208 else
209 return 0;
210 }
211
212 //*****************************************************************************
213 int
createDB(const HtConfiguration &)214 Fuzzy::createDB(const HtConfiguration &)
215 {
216 return OK;
217 }
218
219 void
generateKey(char *,String &)220 Fuzzy::generateKey(char *, String &)
221 {
222 }
223
224
225 void
addWord(char *)226 Fuzzy::addWord(char *)
227 {
228 }
229
230