1 //
2 // Prefix.cc
3 //
4 // Prefix: The prefix fuzzy algorithm. Performs a O(log n) search on for words
5 // matching the *prefix* specified--thus significantly faster than a full
6 // substring search.
7 //
8 // Part of the ht://Dig package <http://www.htdig.org/>
9 // Copyright (c) 1995-2004 The ht://Dig Group
10 // For copyright details, see the file COPYING in your distribution
11 // or the GNU Library General Public License (LGPL) version 2 or later
12 // <http://www.gnu.org/copyleft/lgpl.html>
13 //
14 // $Id: Prefix.cc,v 1.17 2004/05/28 13:15:20 lha Exp $
15 //
16
17 #ifdef HAVE_CONFIG_H
18 #include "htconfig.h"
19 #endif /* HAVE_CONFIG_H */
20
21 #include <fcntl.h>
22
23 #include "Prefix.h"
24 #include "htString.h"
25 #include "List.h"
26 #include "StringMatch.h"
27 #include "HtConfiguration.h"
28
29
30 //*****************************************************************************
31 // Prefix::Prefix(const HtConfiguration& config_arg)
32 //
Prefix(const HtConfiguration & config_arg)33 Prefix::Prefix(const HtConfiguration& config_arg) :
34 Fuzzy(config_arg)
35 {
36 name = "prefix";
37 }
38
39
40 //*****************************************************************************
41 // Prefix::~Prefix()
42 //
~Prefix()43 Prefix::~Prefix()
44 {
45 }
46
47
48 //*****************************************************************************
49 //
50 // Prefix search
51 //
52 void
getWords(char * w,List & words)53 Prefix::getWords(char *w, List &words)
54 {
55 if (w == NULL || w[0] == '\0')
56 return;
57
58 String stripped = w;
59 HtStripPunctuation(stripped);
60 w = stripped.get();
61
62 const String prefix_suffix = config["prefix_match_character"];
63 int prefix_suffix_length = prefix_suffix.length();
64 int minimum_prefix_length = config.Value("minimum_prefix_length");
65
66 if (debug)
67 cerr << " word=" << w << " prefix_suffix=" << prefix_suffix
68 << " prefix_suffix_length=" << prefix_suffix_length
69 << " minimum_prefix_length=" << minimum_prefix_length << "\n";
70
71 if ((int)strlen(w) < minimum_prefix_length + prefix_suffix_length)
72 return;
73
74 // A null prefix character means that prefix matching should be
75 // applied to every search word; otherwise return if the word does
76 // not end in the prefix character(s).
77 //
78 if (prefix_suffix_length > 0
79 && strcmp(prefix_suffix, w+strlen(w)-prefix_suffix_length))
80 return;
81
82 HtWordList wordDB(config);
83 if (wordDB.Open(config["word_db"], O_RDONLY) == NOTOK)
84 return;
85
86 int wordCount = 0;
87 int maximumWords = config.Value("max_prefix_matches", 1000);
88 String s;
89 int len = strlen(w) - prefix_suffix_length;
90
91 // Strip the prefix character(s)
92 char w2[8192];
93 strncpy(w2, w, sizeof(w2) - 1);
94 w2[sizeof(w2) - 1] = '\0';
95 w2[strlen(w2) - prefix_suffix_length] = '\0';
96 String w3(w2);
97 w3.lowercase();
98 List *wordList = wordDB.Prefix(w3.get());
99 WordReference *word_ref;
100 String last_word;
101
102 wordList->Start_Get();
103 while (wordCount < maximumWords && (word_ref = (WordReference *) wordList->Get_Next() ))
104 {
105 s = word_ref->Key().GetWord();
106
107 // If we're somehow past the original word, we're done
108 if (mystrncasecmp(s.get(), w, len))
109 break;
110
111 // If this is a duplicate word, ignore it
112 if (last_word.length() != 0 && last_word == s)
113 continue;
114
115 last_word = s;
116 words.Add(new String(s));
117 wordCount++;
118 }
119 if (wordList) {
120 wordList->Destroy();
121 delete wordList;
122 }
123 wordDB.Close();
124 }
125
126
127 //*****************************************************************************
128 int
openIndex()129 Prefix::openIndex()
130 {
131 return 0;
132 }
133
134
135 //*****************************************************************************
136 void
generateKey(char *,String &)137 Prefix::generateKey(char *, String &)
138 {
139 }
140
141
142 //*****************************************************************************
143 void
addWord(char *)144 Prefix::addWord(char *)
145 {
146 }
147
148
149
150
151