1 /*
2 * The contents of this file are subject to the Mozilla Public
3 * License Version 1.1 (the "License"); you may not use this file
4 * except in compliance with the License. You may obtain a copy of
5 * the License at http://www.mozilla.org/MPL/
6 *
7 * Software distributed under the License is distributed on an "AS
8 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
9 * implied. See the License for the specific language governing
10 * rights and limitations under the License.
11 *
12 * The Original Code is the Sablotron XSLT Processor.
13 *
14 * The Initial Developer of the Original Code is Ginger Alliance Ltd.
15 * Portions created by Ginger Alliance are Copyright (C) 2000-2002
16 * Ginger Alliance Ltd. All Rights Reserved.
17 *
18 * Contributor(s):
19 * Bob Jenkins <bob_jenkins@burtleburtle.net>
20 * Tom Moog <tmoog@polhode.com>
21 *
22 * Alternatively, the contents of this file may be used under the
23 * terms of the GNU General Public License Version 2 or later (the
24 * "GPL"), in which case the provisions of the GPL are applicable
25 * instead of those above. If you wish to allow use of your
26 * version of this file only under the terms of the GPL and not to
27 * allow others to use your version of this file under the MPL,
28 * indicate your decision by deleting the provisions above and
29 * replace them with the notice and other provisions required by
30 * the GPL. If you do not delete the provisions above, a recipient
31 * may use your version of this file under either the MPL or the
32 * GPL.
33 */
34
35 /*
36 * based on hash function and code by Bob Jenkins, <bob_jenkins@burtleburtle.net>, 1996.
37 */
38
39 #include "hash.h"
40 #include "proc.h"
41 #include "arena.h"
42
43 // GP: clean (only 1 M() macro)
44
45 // the unsigned long is split into two parts: a masked hash in the lower bits, and
46 // age in the upper;
47 // HASH_CODE_BITS determines the max width of the hash code
48 #define HASH_CODE_BITS 24
49
50 #define hashMask( ID, BITS ) ( (ID) & ( ( 1L << (BITS) ) - 1 ) )
51 #define hashIdCode( ID ) hashMask( (ID), HASH_CODE_BITS )
52 #define hashIdAge( ID ) ( (ID) >> HASH_CODE_BITS )
53 #define hashIdCompose( CODE, AGE ) ( ( ((HashId) AGE) << HASH_CODE_BITS )\
54 | hashMask( (CODE), HASH_CODE_BITS ) )
55
56 oolong hash(const Str& k);
57
58 //
59 //
60 // HashTable
61 //
62 //
63
initialize()64 void HashTable::initialize()
65 {
66 // initialize buckets to NULLs
67 int i, bucketsCount = TWO_TO( logSize );
68 for (i = 0; i < bucketsCount; i++)
69 buckets.append( NULL );
70 visitedBuckets = 0;
71 itemsCount = 0;
72 }
73
HashTable(SabArena * arena_,int logSize_)74 HashTable::HashTable(SabArena *arena_, int logSize_)
75 : buckets( logSize_ ), theArena( arena_ ), logSize( logSize_ ),
76 _theEmptyKey(NULL)
77 {
78 _theEmptyKey = new Str;
79 // initialize();
80 visitedBuckets = 0;
81 itemsCount = -1; // not initialized yet
82 }
83
destroy(Sit S)84 void HashTable::destroy(Sit S)
85 {
86 Log2(S, L2_DISP_HASH, Str(itemsCount), Str(buckets.number()));
87 // only kill the HashItems
88 if (!theArena)
89 {
90 HashItem *p, *old_p;
91 for (int i = 0; i < buckets.number(); i++)
92 for (p = buckets[i]; (old_p = p) != NULL; p = old_p -> next)
93 delete p;
94 }
95 else
96 ; // do nothing as the ListItems were allocated in an arena
97 buckets.deppendall();
98 (*this).HashTable::~HashTable();
99 }
100
~HashTable()101 HashTable::~HashTable()
102 {
103 // destroy();
104 cdelete(_theEmptyKey);
105 }
106
codeToIndex(oolong code) const107 inline oolong HashTable::codeToIndex(oolong code) const
108 {
109 return hashMask(code, logSize);
110 }
111
112 //
113 // for internal use only. Returns true if the desired item was found. Sets p
114 // to this item. If not found then to its predecessor. If that's not found too then
115 // to NULL.
116 //
117
lookupOrPreceding(const Str & key,oolong hashed,HashItem * & p) const118 Bool HashTable::lookupOrPreceding(const Str& key, oolong hashed, HashItem *&p) const
119 {
120 sabassert(itemsCount != -1); // assert hash table initialized
121 for ( p = buckets[hashMask(hashed, logSize)]; p; p = p -> next )
122 {
123 if (p -> key == key)
124 return TRUE;
125 else if (!p -> next)
126 return FALSE;
127 }
128 // empty bucket, p is NULL
129 return FALSE;
130 }
131
insert(const Str & key,HashId & id,const void * data)132 void HashTable::insert(const Str& key, HashId& id, const void *data /* = NULL */)
133 {
134 sabassert(itemsCount != -1); // assert hash table initialized
135 oolong hashed = hash( key );
136 HashItem *p, *q;
137 // try to find our item, or at least its predecessor
138 if (!lookupOrPreceding( key, hashed, p ))
139 {
140 // NOT FOUND. create and insert a new hash item
141 // if buckets grow, need to update p
142 if ( buckets.number() <= itemsCount )
143 p = expandWatching( hashed );
144 ++itemsCount;
145 q = new( theArena )
146 HashItem( key, hashed, data, p ? p -> age + 1 : 0, theArena );
147 // sorry, can't check for the allocation - no situation to report to
148 //M( S, q ); // GP: OK (only fails if nothing to dealloc)
149 if (p)
150 p = p -> next = q;
151 else
152 {
153 p = buckets[codeToIndex(hashed)] = q;
154 ++visitedBuckets;
155 };
156 }
157 id = hashIdCompose( hashed, p -> age );
158 }
159
insert(const Str & key)160 HashId HashTable::insert(const Str& key)
161 {
162 HashId id;
163 insert(key, id, NULL);
164 return id;
165 }
166
lookup(const Str & key,const void ** data) const167 HashId HashTable::lookup(const Str& key, const void **data /* = NULL */) const
168 {
169 sabassert(itemsCount != -1); // assert hash table initialized
170 oolong hashed = hash( key );
171 HashItem *p;
172 if (!lookupOrPreceding( key, hashed, p ))
173 {
174 if (data) *data = NULL;
175 return ITEM_NOT_FOUND;
176 }
177 if (data) *data = p -> stuff;
178 return hashIdCompose( hashed, p -> age );
179 }
180
181 #define chain( NEWPTR, TAIL, ROOT_NDX )\
182 { if (TAIL) TAIL = TAIL -> next = NEWPTR;\
183 else { TAIL = buckets[ROOT_NDX] = NEWPTR; visitedBuckets++; }}
184
185 // this function fixed by Tom Moog
186
expandWatching(oolong hashed)187 HashItem* HashTable::expandWatching( oolong hashed )
188 {
189 sabassert(itemsCount != -1); // assert hash table initialized
190 oolong i, oldBucketCount = buckets.number();
191 for (i = 0; i < oldBucketCount; i++)
192 buckets.append(NULL);
193 oolong distinguish = 1L << logSize;
194
195 HashItem *p,
196 *upperTail,
197 *lowerTail,
198 *retval = NULL;
199
200 visitedBuckets = 0;
201
202 for (i = 0; i < oldBucketCount; i++)
203 {
204 upperTail = lowerTail = NULL;
205 for (p = buckets[i]; p; p = p -> next)
206 {
207 if (p -> code & distinguish)
208 chain(p, upperTail, i + oldBucketCount)
209 else
210 chain(p, lowerTail, i);
211 };
212 if (buckets[i])
213 ++visitedBuckets;
214 if (lowerTail)
215 lowerTail -> next = NULL;
216 else
217 buckets[i] = NULL;
218 if (upperTail)
219 upperTail -> next = NULL;
220 // do the watching
221 if ( codeToIndex(hashed) == i )
222 retval = (hashed & distinguish) ? upperTail : lowerTail;
223 }
224
225 ++logSize; // can't do this before because codeToIndex() depends on it
226 sabassert( logSize <= HASH_CODE_BITS );
227 return retval;
228 }
229
getKey(HashId id) const230 const Str& HashTable::getKey(HashId id) const
231 {
232 sabassert(itemsCount != -1); // assert hash table initialized
233 if (id == UNDEF_PHRASE)
234 {
235 return *_theEmptyKey; /* __PH__ */
236 }
237 int age = hashIdAge(id);
238 HashItem *p;
239 for (p = buckets[codeToIndex(hashIdCode(id))];
240 p && p -> age != age; p = p -> next);
241 // DROPME
242 #ifdef _DEBUG
243 if (!p)
244 {
245 dump();
246 printf("\n HASH FAULT: id=%08lx ", id);
247 }
248 #endif
249 return NZ(p) -> key;
250 }
251
252 #ifdef _DEBUG
dump() const253 void HashTable::dump() const
254 {
255 printf("----- %d elements %d buckets %d visited -----\n",
256 itemsCount, buckets.number(), visitedBuckets);
257 for (int i = 0; i < buckets.number(); i++)
258 {
259 if (buckets[i])
260 {
261 printf("[%04x] ", i);
262 for (HashItem *j = buckets[i]; j; j = j -> next)
263 {
264 printf("%s(%08x) ", (char*)(j -> key), j -> code );
265 };
266 puts("");
267 }
268 }
269 puts("--------------------------------------------------\n\n");
270 }
271 #endif
272
report(Sit S,MsgType type,MsgCode code,const Str & arg1,const Str & arg2)273 void HashTable::report(Sit S, MsgType type, MsgCode code, const Str& arg1, const Str& arg2)
274 {
275 S.message(type, code, arg1, arg2);
276 }
277
278
279
280
281 //
282 // the real internals
283 //
284
285 //
286 // 3-number mixer
287 //
288
289 #define mix(a,b,c) \
290 { \
291 a -= b; a -= c; a ^= (c>>13); \
292 b -= c; b -= a; b ^= (a<<8); \
293 c -= a; c -= b; c ^= (b>>13); \
294 a -= b; a -= c; a ^= (c>>12); \
295 b -= c; b -= a; b ^= (a<<16); \
296 c -= a; c -= b; c ^= (b>>5); \
297 a -= b; a -= c; a ^= (c>>3); \
298 b -= c; b -= a; b ^= (a<<10); \
299 c -= a; c -= b; c ^= (b>>15); \
300 }
301
302 //
303 // the hash function
304 //
305
hash(const Str & key)306 oolong hash(const Str& key)
307 {
308 register oolong a, b, c, len;
309 register const char *k = (const char*) key;
310
311 /* Set up the internal state */
312 len = key.length();
313 a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
314 c = 0; // should have been either the previous hash value or any number :-)
315
316 /*---------------------------------------- handle most of the key */
317 while (len >= 12)
318 {
319 a += (k[0] +((oolong)k[1]<<8) +((oolong)k[2]<<16) +((oolong)k[3]<<24));
320 b += (k[4] +((oolong)k[5]<<8) +((oolong)k[6]<<16) +((oolong)k[7]<<24));
321 c += (k[8] +((oolong)k[9]<<8) +((oolong)k[10]<<16)+((oolong)k[11]<<24));
322 mix(a,b,c);
323 k += 12; len -= 12;
324 }
325
326 /*------------------------------------- handle the last 11 bytes */
327 c += key.length();
328 switch(len) /* all the case statements fall through */
329 {
330 case 11: c+=((oolong)k[10]<<24);
331 case 10: c+=((oolong)k[9]<<16);
332 case 9 : c+=((oolong)k[8]<<8);
333 /* the first byte of c is reserved for the length */
334 case 8 : b+=((oolong)k[7]<<24);
335 case 7 : b+=((oolong)k[6]<<16);
336 case 6 : b+=((oolong)k[5]<<8);
337 case 5 : b+=k[4];
338 case 4 : a+=((oolong)k[3]<<24);
339 case 3 : a+=((oolong)k[2]<<16);
340 case 2 : a+=((oolong)k[1]<<8);
341 case 1 : a+=k[0];
342 /* case 0: nothing left to add */
343 }
344 mix(a,b,c);
345 /*-------------------------------------------- report the result */
346 return c;
347 }
348