1 //
2 // Prefix.cc
3 //
4 // Prefix: The prefix fuzzy algorithm. Performs a O(log n) search on for words
5 //         matching the *prefix* specified--thus significantly faster than a full
6 //         substring search.
7 //
8 // Part of the ht://Dig package   <http://www.htdig.org/>
9 // Copyright (c) 1995-2004 The ht://Dig Group
10 // For copyright details, see the file COPYING in your distribution
11 // or the GNU Library General Public License (LGPL) version 2 or later
12 // <http://www.gnu.org/copyleft/lgpl.html>
13 //
14 // $Id: Prefix.cc,v 1.17 2004/05/28 13:15:20 lha Exp $
15 //
16 
17 #ifdef HAVE_CONFIG_H
18 #include "htconfig.h"
19 #endif /* HAVE_CONFIG_H */
20 
21 #include <fcntl.h>
22 
23 #include "Prefix.h"
24 #include "htString.h"
25 #include "List.h"
26 #include "StringMatch.h"
27 #include "HtConfiguration.h"
28 
29 
30 //*****************************************************************************
31 // Prefix::Prefix(const HtConfiguration& config_arg)
32 //
Prefix(const HtConfiguration & config_arg)33 Prefix::Prefix(const HtConfiguration& config_arg) :
34   Fuzzy(config_arg)
35 {
36     name = "prefix";
37 }
38 
39 
40 //*****************************************************************************
41 // Prefix::~Prefix()
42 //
~Prefix()43 Prefix::~Prefix()
44 {
45 }
46 
47 
48 //*****************************************************************************
49 //
50 //  Prefix search
51 //
52 void
getWords(char * w,List & words)53 Prefix::getWords(char *w, List &words)
54 {
55     if (w == NULL || w[0] == '\0')
56 	return;
57 
58     String	stripped = w;
59     HtStripPunctuation(stripped);
60     w = stripped.get();
61 
62     const String	prefix_suffix = config["prefix_match_character"];
63     int 		prefix_suffix_length = prefix_suffix.length();
64     int 		minimum_prefix_length = config.Value("minimum_prefix_length");
65 
66     if (debug)
67          cerr << " word=" << w << " prefix_suffix=" << prefix_suffix
68 		<< " prefix_suffix_length=" << prefix_suffix_length
69 		<< " minimum_prefix_length=" << minimum_prefix_length << "\n";
70 
71     if ((int)strlen(w) < minimum_prefix_length + prefix_suffix_length)
72 	return;
73 
74     //  A null prefix character means that prefix matching should be
75     //  applied to every search word; otherwise return if the word does
76     //	not end in the prefix character(s).
77     //
78     if (prefix_suffix_length > 0
79 	    && strcmp(prefix_suffix, w+strlen(w)-prefix_suffix_length))
80 	return;
81 
82     HtWordList	wordDB(config);
83     if (wordDB.Open(config["word_db"], O_RDONLY) == NOTOK)
84       return;
85 
86     int		wordCount = 0;
87     int		maximumWords = config.Value("max_prefix_matches", 1000);
88     String	s;
89     int		len = strlen(w) - prefix_suffix_length;
90 
91     // Strip the prefix character(s)
92     char w2[8192];
93     strncpy(w2, w, sizeof(w2) - 1);
94     w2[sizeof(w2) - 1] = '\0';
95     w2[strlen(w2) - prefix_suffix_length] = '\0';
96     String w3(w2);
97     w3.lowercase();
98     List		*wordList = wordDB.Prefix(w3.get());
99     WordReference	*word_ref;
100     String		last_word;
101 
102     wordList->Start_Get();
103     while (wordCount < maximumWords && (word_ref = (WordReference *) wordList->Get_Next() ))
104     {
105 	s = word_ref->Key().GetWord();
106 
107 	// If we're somehow past the original word, we're done
108 	if (mystrncasecmp(s.get(), w, len))
109 	    break;
110 
111 	// If this is a duplicate word, ignore it
112 	if (last_word.length() != 0 && last_word == s)
113 	    continue;
114 
115 	last_word = s;
116 	words.Add(new String(s));
117 	wordCount++;
118     }
119     if (wordList) {
120       wordList->Destroy();
121       delete wordList;
122     }
123     wordDB.Close();
124 }
125 
126 
127 //*****************************************************************************
128 int
openIndex()129 Prefix::openIndex()
130 {
131   return 0;
132 }
133 
134 
135 //*****************************************************************************
136 void
generateKey(char *,String &)137 Prefix::generateKey(char *, String &)
138 {
139 }
140 
141 
142 //*****************************************************************************
143 void
addWord(char *)144 Prefix::addWord(char *)
145 {
146 }
147 
148 
149 
150 
151