1 //
2 // Fuzzy.cc
3 //
4 // Fuzzy: This is the base class for all the different types of fuzzy searches.
5 //        We only define the interface.
6 //
7 // There are two main uses of classes derived from this class:
8 //    1) Creation of a fuzzy index
9 //    2) Searching for a word using the fuzzy index
10 //
11 // Part of the ht://Dig package   <http://www.htdig.org/>
12 // Copyright (c) 1995-2004 The ht://Dig Group
13 // For copyright details, see the file COPYING in your distribution
14 // or the GNU Library General Public License (LGPL) version 2 or later
15 // <http://www.gnu.org/copyleft/lgpl.html>
16 //
17 // $Id: Fuzzy.cc,v 1.20 2004/05/28 13:15:20 lha Exp $
18 //
19 
20 #ifdef HAVE_CONFIG_H
21 #include "htconfig.h"
22 #endif /* HAVE_CONFIG_H */
23 
24 #include <fcntl.h>
25 
26 #include "Fuzzy.h"
27 #include "htfuzzy.h"
28 #include "HtConfiguration.h"
29 #include "List.h"
30 #include "StringList.h"
31 #include "Endings.h"
32 #include "Exact.h"
33 #include "Metaphone.h"
34 #include "Soundex.h"
35 #include "Synonym.h"
36 #include "Substring.h"
37 #include "Prefix.h"
38 #include "Regexp.h"
39 #include "Speling.h"
40 #include "Accents.h"
41 
42 //*****************************************************************************
43 // Fuzzy::Fuzzy(const HtConfiguration& config)
44 //
Fuzzy(const HtConfiguration & config_arg)45 Fuzzy::Fuzzy(const HtConfiguration& config_arg) :
46   config(config_arg)
47 {
48     dict = 0;
49     index = 0;
50 }
51 
52 
53 //*****************************************************************************
54 // Fuzzy::~Fuzzy()
55 //
~Fuzzy()56 Fuzzy::~Fuzzy()
57 {
58     if (index)
59     {
60 	index->Close();
61 	delete index;
62 	index = 0;
63     }
64     delete dict;
65 }
66 
67 
68 //*****************************************************************************
69 // void Fuzzy::getWords(char *word, List &words)
70 //
71 void
getWords(char * word,List & words)72 Fuzzy::getWords(char *word, List &words)
73 {
74     if (!index)
75 	return;
76     if (!word || !*word)
77       return;
78 
79     //
80     // Convert the word to a fuzzy key
81     //
82     String	fuzzyKey;
83     String	data;
84     String	stripped = word;
85     HtStripPunctuation(stripped);
86     generateKey(stripped, fuzzyKey);
87     if (debug > 2)
88       cout << "\n\tkey: " << fuzzyKey << endl;
89 
90     words.Destroy();
91 
92     if (index->Get(fuzzyKey, data) == OK)
93     {
94 	//
95 	// Found the entry
96 	//
97 	char	*token = strtok(data.get(), " ");
98 	while (token)
99 	{
100 	    if (mystrcasecmp(token, word) != 0)
101 	    {
102 		words.Add(new String(token));
103 	    }
104 	    token = strtok(0, " ");
105 	}
106     }
107     else
108     {
109 	//
110 	// The key wasn't found.
111 	//
112     }
113 }
114 
115 
116 //*****************************************************************************
117 // int Fuzzy::openIndex(const HtConfiguration &config)
118 //
119 int
openIndex()120 Fuzzy::openIndex()
121 {
122     String	var = name;
123     var << "_db";
124     const String	filename = config[var];
125 
126     index = Database::getDatabaseInstance(DB_HASH);
127     if (index->OpenRead(filename) == NOTOK)
128       {
129 	delete index;
130 	index = 0;
131         return NOTOK;
132       }
133 
134     return OK;
135 }
136 
137 
138 //*****************************************************************************
139 // int Fuzzy::writeDB(HtConfiguration &config)
140 //
141 int
writeDB()142 Fuzzy::writeDB()
143 {
144     String	var = name;
145     var << "_db";
146     const String	filename = config[var];
147 
148     index = Database::getDatabaseInstance(DB_HASH);
149     if (index->OpenReadWrite(filename, 0664) == NOTOK)
150 	return NOTOK;
151 
152     String	*s;
153     char	*fuzzyKey;
154 
155     int		count = 0;
156 
157     dict->Start_Get();
158     while ((fuzzyKey = dict->Get_Next()))
159     {
160 	s = (String *) dict->Find(fuzzyKey);
161 	index->Put(fuzzyKey, *s);
162 
163 	if (debug > 1)
164 	{
165 	    cout << "htfuzzy: '" << fuzzyKey << "' ==> '" << s->get() << "'\n";
166 	}
167 	count++;
168 	if ((count % 100) == 0 && debug == 1)
169 	{
170 	    cout << "htfuzzy: keys: " << count << '\n';
171 	    cout.flush();
172 	}
173     }
174     if (debug == 1)
175     {
176 	cout << "htfuzzy:Total keys: " << count << "\n";
177     }
178     return OK;
179 }
180 
181 
182 //*****************************************************************************
183 // Fuzzy algorithm factory.
184 //
185 Fuzzy *
getFuzzyByName(char * name,const HtConfiguration & config)186 Fuzzy::getFuzzyByName(char *name, const HtConfiguration& config)
187 {
188     if (mystrcasecmp(name, "exact") == 0)
189 	return new Exact(config);
190     else if (mystrcasecmp(name, "soundex") == 0)
191 	return new Soundex(config);
192     else if (mystrcasecmp(name, "metaphone") == 0)
193 	return new Metaphone(config);
194     else if (mystrcasecmp(name, "accents") == 0)
195 	return new Accents(config);
196     else if (mystrcasecmp(name, "endings") == 0)
197 	return new Endings(config);
198     else if (mystrcasecmp(name, "synonyms") == 0)
199 	return new Synonym(config);
200     else if (mystrcasecmp(name, "substring") == 0)
201 	return new Substring(config);
202     else if (mystrcasecmp(name, "prefix") == 0)
203 	return new Prefix(config);
204     else if (mystrcasecmp(name, "regex") == 0)
205 	return new Regexp(config);
206     else if (mystrcasecmp(name, "speling") == 0)
207 	return new Speling(config);
208     else
209 	return 0;
210 }
211 
212 //*****************************************************************************
213 int
createDB(const HtConfiguration &)214 Fuzzy::createDB(const HtConfiguration &)
215 {
216     return OK;
217 }
218 
219 void
generateKey(char *,String &)220 Fuzzy::generateKey(char *, String &)
221 {
222 }
223 
224 
225 void
addWord(char *)226 Fuzzy::addWord(char *)
227 {
228 }
229 
230