1 /************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996,1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /************************************************************************/
33 /* Author: Richard Caley (rjc@cstr.ed.ac.uk) */
34 /************************************************************************/
35
36 #ifndef __EST_THASH_H__
37 #define __EST_THASH_H__
38
39 #include <iostream>
40
41 using namespace std;
42
43 #include "EST_String.h"
44 #include "EST_system.h"
45 #include "EST_bool.h"
46 #include "EST_TIterator.h"
47
48 #include "instantiate/EST_THashI.h"
49 #include "instantiate/EST_TStringHashI.h"
50
51 /**@name Hash Tables
52 *
53 * @author Richard Caley <rjc@cstr.ed.ac.uk>
54 * @version $Id: EST_THash.h,v 1.6 2004/09/29 08:24:17 robert Exp $
55 */
56 //@{
57
58 /** This is just a convenient place to put some useful hash functions.
59 */
60 class EST_HashFunctions {
61 public:
62 /// A generally useful hash function.
63 static unsigned int DefaultHash(const void *data, size_t size, unsigned int n);
64
65 /// A hash function for strings.
66 static unsigned int StringHash(const EST_String &key, unsigned int size);
67 };
68
69 template<class K, class V> class EST_THash;
70
71 /** This class is used in hash tables to hold a key value pair.
72 * Not much to say beyond that.
73 */
74 template<class K, class V>
75 class EST_Hash_Pair {
76 public:
77 /// The key
78 K k;
79 /// The value
80 V v;
81
82 private:
83 /// Pointer used to chain entries into buckets.
84 EST_Hash_Pair<K,V> *next;
85
86 /// The hash table must be a friend so it can see the pointer.
87 friend class EST_THash<K, V>;
88 };
89
90 /** An open hash table. The number of buckets should be set to allow
91 * enough space that there are relatively few entries per bucket on
92 * average.
93 */
94
95 template<class K, class V>
96 class EST_THash : protected EST_HashFunctions {
97
98 private:
99 /// Something to return when there is no entry in the table.
100 static V Dummy_Value;
101 static K Dummy_Key;
102
103 /// Total number of entries.
104 unsigned int p_num_entries;
105
106 /// Number of buckets.
107 unsigned int p_num_buckets;
108
109 /// Pointer to an array of <variable>p_num_buckets</variable>buckets.
110 EST_Hash_Pair<K,V> **p_buckets;
111
112 /// The hash function to use on this table.
113 unsigned int (*p_hash_function)(const K &key, unsigned int size);
114
115 public:
116 /** Create a table with the given number of buckets. Optionally setting
117 * a custom hash function.
118 */
119 EST_THash(int size,
120 unsigned int (*hash_function)(const K &key, unsigned int size)= NULL);
121
122 /// Create a copy
123 EST_THash(const EST_THash<K,V> &from);
124
125 /// Destroy the table.
126 ~EST_THash(void);
127
128 /// Empty the table.
129 void clear(void);
130
131 /// Return the total number of entries in the table.
num_entries(void)132 unsigned int num_entries(void) const
133 { return p_num_entries; };
134
135 /// Does the key have an entry?
136 int present(const K &key) const;
137
138 /** Return the value associated with the key.
139 * <parameter>found</parameter> is set to whether such an entry was found.
140 */
141 V &val(const K &key, int &found) const;
142
143 /// Return the value associated with the key.
val(const K & key)144 V &val(const K &key) const {int x; return val(key, x); }
145
146 const K &key(const V &val, int &found) const;
key(const V & val)147 const K &key(const V &val) const {int x; return key(val, x); }
148
149 /// Copy all entries
150 void copy(const EST_THash<K,V> &from);
151
152 /// Apply <parameter>func</parameter> to each entry in the table.
153 void map(void (*func)(K&, V&));
154
155 /// Add an entry to the table.
156 int add_item(const K &key, const V &value, int no_search = 0);
157
158 /// Remove an entry from the table.
159 int remove_item(const K &rkey, int quiet = 0);
160
161 /// Assignment is a copy operation
162 EST_THash<K,V> &operator = (const EST_THash<K,V> &from);
163
164 /// Print the table to <parameter>stream</parameter> in a human readable format.
165 void dump(ostream &stream, int all=0);
166
167 /**@name Pair Iteration
168 *
169 * This iterator steps through the table returning key-value pairs.
170 */
171 //@{
172 protected:
173 /** A position in the table is given by a bucket number and a
174 * pointer into the bucket.
175 */
176 // struct IPointer{ unsigned int b; EST_Hash_Pair<K, V> *p; };
177 struct IPointer_s{ unsigned int b; EST_Hash_Pair<K, V> *p; };
178
179 typedef struct IPointer_s IPointer;
180
181 /// Shift to point at something.
skip_blank(IPointer & ip)182 void skip_blank(IPointer &ip) const
183 {
184 while (ip.p==NULL && ip.b<p_num_buckets)
185 {ip.b++; ip.p = ip.b<p_num_buckets?p_buckets[ip.b]:0; }
186 }
187
188 /// Go to start of the table.
point_to_first(IPointer & ip)189 void point_to_first(IPointer &ip) const
190 { ip.b=0; ip.p = ip.b<p_num_buckets?p_buckets[ip.b]:0;
191 skip_blank(ip);}
192
193 /// Move pointer forwards, at the end of the bucket, move down.
move_pointer_forwards(IPointer & ip)194 void move_pointer_forwards(IPointer &ip) const
195 {
196 ip.p = ip.p->next;
197 skip_blank(ip);
198 }
199
200 /// We are at the end if the pointer ever becomes NULL
points_to_something(const IPointer & ip)201 bool points_to_something(const IPointer &ip) const { return ip.b<p_num_buckets; }
202
203 /// Return the contents of this entry.
points_at(const IPointer & ip)204 EST_Hash_Pair<K, V> &points_at(const IPointer &ip) { return *(ip.p); }
205
206 /// The iterator must be a friend to access this private interface.
207 friend class EST_TStructIterator< EST_THash<K, V>, IPointer, EST_Hash_Pair<K, V> >;
208 friend class EST_TRwStructIterator< EST_THash<K, V>, IPointer, EST_Hash_Pair<K, V> >;
209 friend class EST_TIterator< EST_THash<K, V>, IPointer, EST_Hash_Pair<K, V> >;
210 friend class EST_TRwIterator< EST_THash<K, V>, IPointer, EST_Hash_Pair<K, V> >;
211
212 public:
213 /// An entry returned by the iterator is a key value pair.
214 typedef EST_Hash_Pair<K, V> Entry;
215
216 /// Give the iterator a sensible name.
217 typedef EST_TStructIterator< EST_THash<K, V>, IPointer, EST_Hash_Pair<K, V> > Entries;
218 typedef EST_TRwStructIterator< EST_THash<K, V>, IPointer, EST_Hash_Pair<K, V> > RwEntries;
219 //@}
220
221 /**@name Key Iteration
222 *
223 * This iterator steps through the table returning just keys.
224 */
225 //@{
226 protected:
227 /** A position in the table is given by a bucket number and a
228 * pointer into the bucket.
229 */
230 struct IPointer_k_s { unsigned int b; EST_Hash_Pair<K, V> *p; };
231
232 typedef struct IPointer_k_s IPointer_k;
233
234 /// Shift to point at something.
skip_blank(IPointer_k & ip)235 void skip_blank(IPointer_k &ip) const
236 {
237 while (ip.p==NULL && ip.b<p_num_buckets)
238 {ip.b++; ip.p = ip.b<p_num_buckets?p_buckets[ip.b]:0; }
239 }
240
241 /// Go to start of the table.
point_to_first(IPointer_k & ip)242 void point_to_first(IPointer_k &ip) const
243 { ip.b=0; ip.p = ip.b<p_num_buckets?p_buckets[ip.b]:0;
244 skip_blank(ip);}
245
246 /// Move pointer forwards, at the end of the bucket, move down.
move_pointer_forwards(IPointer_k & ip)247 void move_pointer_forwards(IPointer_k &ip) const
248 {
249 ip.p = ip.p->next;
250 skip_blank(ip);
251 }
252
253 /// We are at the end if the pointer ever becomes NULL
points_to_something(const IPointer_k & ip)254 bool points_to_something(const IPointer_k &ip) const { return ip.b<p_num_buckets; }
255
256 /// Return the key of this entry.
points_at(const IPointer_k & ip)257 K &points_at(const IPointer_k &ip) { return (ip.p)->k; }
258
259 /// The iterator must be a friend to access this private interface.
260 friend class EST_TIterator< EST_THash<K, V>, IPointer_k, K >;
261 friend class EST_TRwIterator< EST_THash<K, V>, IPointer_k, K >;
262
263 public:
264 /// An entry returned by this iterator is just a key.
265 typedef K KeyEntry;
266
267 /// Give the iterator a sensible name.
268 typedef EST_TIterator< EST_THash<K, V>, IPointer_k, K > KeyEntries;
269 typedef EST_TRwIterator< EST_THash<K, V>, IPointer_k, K > KeyRwEntries;
270 //@}
271
272 };
273
274 /** A specialised hash table for when the key is an EST_String.
275 *
276 * This is just a version of <classname>EST_THash</classname> which
277 * has a different default hash function.
278 */
279
280 template<class V>
281 class EST_TStringHash : public EST_THash<EST_String, V> {
282 public:
283
284 /// Create a string hash table of <parameter>size</parameter> buckets.
EST_TStringHash(int size)285 EST_TStringHash(int size) : EST_THash<EST_String, V>(size, EST_HashFunctions::StringHash) {};
286
287 /// An entry returned by the iterator is a key value pair.
288 typedef EST_Hash_Pair<EST_String, V> Entry;
289
290 /* struct IPointer_s{ unsigned int b; Entry *p; };
291 typedef struct IPointer_s IPointer; */
292
293 // Convince GCC that the IPointer we're going to use is a typename
294 typedef typename EST_THash<EST_String, V>::IPointer TN_IPointer;
295
296 /// Give the iterator a sensible name.
297 typedef EST_TStructIterator< EST_THash<EST_String, V>, typename EST_THash<EST_String, V>::IPointer,
298 EST_Hash_Pair<EST_String, V> > Entries;
299
300 typedef EST_TRwStructIterator< EST_THash<EST_String, V>, typename EST_THash<EST_String, V>::IPointer,
301 EST_Hash_Pair<EST_String, V> > RwEntries;
302 //@}
303
304 typedef EST_String KeyEntry;
305
306 /* struct IPointer_k_s { unsigned int b; EST_Hash_Pair<EST_String, V> *p; };
307 typedef struct IPointer_k_s IPointer_k; */
308
309 /// Give the iterator a sensible name.
310
311 // Convince GCC that the IPointer_k we're going to use is a typename
312 typedef typename EST_THash<EST_String, V>::IPointer_k TN_IPointer_k;
313
314 typedef EST_TIterator< EST_THash<EST_String, V>, typename EST_THash<EST_String, V>::IPointer_k,
315 EST_String > KeyEntries;
316 typedef EST_TRwIterator< EST_THash<EST_String, V>, typename EST_THash<EST_String, V>::IPointer_k,
317 EST_String > KeyRwEntries;
318 };
319
320
321 /** The default hash function used by <classname>EST_THash</classname>
322 */
DefaultHashFunction(const void * data,size_t size,unsigned int n)323 inline static unsigned int DefaultHashFunction(const void *data, size_t size, unsigned int n)
324 {
325 unsigned int x=0;
326 const char *p = (const char *)data;
327 for(; size>0 ; p++, size--)
328 x = ((x+*p)*33) % n;
329 return x;
330 }
331
332 //@}
333 #endif
334