1 /*
2  * The contents of this file are subject to the Mozilla Public
3  * License Version 1.1 (the "License"); you may not use this file
4  * except in compliance with the License. You may obtain a copy of
5  * the License at http://www.mozilla.org/MPL/
6  *
7  * Software distributed under the License is distributed on an "AS
8  * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
9  * implied. See the License for the specific language governing
10  * rights and limitations under the License.
11  *
12  * The Original Code is the Sablotron XSLT Processor.
13  *
14  * The Initial Developer of the Original Code is Ginger Alliance Ltd.
15  * Portions created by Ginger Alliance are Copyright (C) 2000-2002
16  * Ginger Alliance Ltd. All Rights Reserved.
17  *
18  * Contributor(s):
19  *      Bob Jenkins <bob_jenkins@burtleburtle.net>
20  *      Tom Moog <tmoog@polhode.com>
21  *
22  * Alternatively, the contents of this file may be used under the
23  * terms of the GNU General Public License Version 2 or later (the
24  * "GPL"), in which case the provisions of the GPL are applicable
25  * instead of those above.  If you wish to allow use of your
26  * version of this file only under the terms of the GPL and not to
27  * allow others to use your version of this file under the MPL,
28  * indicate your decision by deleting the provisions above and
29  * replace them with the notice and other provisions required by
30  * the GPL.  If you do not delete the provisions above, a recipient
31  * may use your version of this file under either the MPL or the
32  * GPL.
33  */
34 
35 /*
36  * based on hash function and code by Bob Jenkins, <bob_jenkins@burtleburtle.net>, 1996.
37  */
38 
39 #include "hash.h"
40 #include "proc.h"
41 #include "arena.h"
42 
43 // GP: clean (only 1 M() macro)
44 
45 // the unsigned long is split into two parts: a masked hash in the lower bits, and
46 // age in the upper;
47 // HASH_CODE_BITS determines the max width of the hash code
48 #define HASH_CODE_BITS       24
49 
50 #define hashMask( ID, BITS ) ( (ID) & ( ( 1L << (BITS) ) - 1 ) )
51 #define hashIdCode( ID ) hashMask( (ID), HASH_CODE_BITS )
52 #define hashIdAge( ID ) ( (ID) >> HASH_CODE_BITS )
53 #define hashIdCompose( CODE, AGE ) ( ( ((HashId) AGE) << HASH_CODE_BITS )\
54             | hashMask( (CODE), HASH_CODE_BITS ) )
55 
56 oolong hash(const Str& k);
57 
58 //
59 //
60 //      HashTable
61 //
62 //
63 
initialize()64 void HashTable::initialize()
65 {
66     // initialize buckets to NULLs
67     int i, bucketsCount = TWO_TO( logSize );
68     for (i = 0; i < bucketsCount; i++)
69         buckets.append( NULL );
70     visitedBuckets = 0;
71     itemsCount = 0;
72 }
73 
HashTable(SabArena * arena_,int logSize_)74 HashTable::HashTable(SabArena *arena_, int logSize_)
75 : buckets( logSize_ ), theArena( arena_ ), logSize( logSize_ ),
76   _theEmptyKey(NULL)
77 {
78     _theEmptyKey = new Str;
79     //    initialize();
80     visitedBuckets = 0;
81     itemsCount = -1; // not initialized yet
82 }
83 
destroy(Sit S)84 void HashTable::destroy(Sit S)
85 {
86     Log2(S, L2_DISP_HASH, Str(itemsCount), Str(buckets.number()));
87     // only kill the HashItems
88     if (!theArena)
89     {
90         HashItem *p, *old_p;
91         for (int i = 0; i < buckets.number(); i++)
92             for (p = buckets[i]; (old_p = p) != NULL; p = old_p -> next)
93                 delete p;
94     }
95     else
96         ;   // do nothing as the ListItems were allocated in an arena
97     buckets.deppendall();
98     (*this).HashTable::~HashTable();
99 }
100 
~HashTable()101 HashTable::~HashTable()
102 {
103     // destroy();
104     cdelete(_theEmptyKey);
105 }
106 
codeToIndex(oolong code) const107 inline oolong HashTable::codeToIndex(oolong code) const
108 {
109     return hashMask(code, logSize);
110 }
111 
112 //
113 //  for internal use only. Returns true if the desired item was found. Sets p
114 //  to this item. If not found then to its predecessor. If that's not found too then
115 //  to NULL.
116 //
117 
lookupOrPreceding(const Str & key,oolong hashed,HashItem * & p) const118 Bool HashTable::lookupOrPreceding(const Str& key, oolong hashed, HashItem *&p) const
119 {
120     sabassert(itemsCount != -1);     // assert hash table initialized
121     for ( p = buckets[hashMask(hashed, logSize)]; p; p = p -> next )
122     {
123         if (p -> key == key)
124             return TRUE;
125         else if (!p -> next)
126             return FALSE;
127     }
128     // empty bucket, p is NULL
129     return FALSE;
130 }
131 
insert(const Str & key,HashId & id,const void * data)132 void HashTable::insert(const Str& key, HashId& id, const void *data /* = NULL */)
133 {
134     sabassert(itemsCount != -1);     // assert hash table initialized
135     oolong hashed = hash( key );
136     HashItem *p, *q;
137     // try to find our item, or at least its predecessor
138     if (!lookupOrPreceding( key, hashed, p ))
139     {
140         // NOT FOUND. create and insert a new hash item
141         // if buckets grow, need to update p
142         if ( buckets.number() <= itemsCount )
143             p = expandWatching( hashed );
144         ++itemsCount;
145         q = new( theArena )
146             HashItem( key, hashed, data, p ? p -> age + 1 : 0, theArena );
147 	    // sorry, can't check for the allocation - no situation to report to
148         //M( S, q );  // GP: OK (only fails if nothing to dealloc)
149         if (p)
150             p = p -> next = q;
151         else
152         {
153             p = buckets[codeToIndex(hashed)] = q;
154             ++visitedBuckets;
155         };
156     }
157     id = hashIdCompose( hashed, p -> age );
158 }
159 
insert(const Str & key)160 HashId HashTable::insert(const Str& key)
161 {
162     HashId id;
163     insert(key, id, NULL);
164     return id;
165 }
166 
lookup(const Str & key,const void ** data) const167 HashId HashTable::lookup(const Str& key, const void **data /* = NULL */) const
168 {
169     sabassert(itemsCount != -1);     // assert hash table initialized
170     oolong hashed = hash( key );
171     HashItem *p;
172     if (!lookupOrPreceding( key, hashed, p ))
173     {
174         if (data) *data = NULL;
175         return ITEM_NOT_FOUND;
176     }
177     if (data) *data = p -> stuff;
178     return hashIdCompose( hashed, p -> age );
179 }
180 
181 #define chain( NEWPTR, TAIL, ROOT_NDX )\
182 { if (TAIL) TAIL = TAIL -> next = NEWPTR;\
183 else { TAIL = buckets[ROOT_NDX] = NEWPTR; visitedBuckets++; }}
184 
185 // this function fixed by Tom Moog
186 
expandWatching(oolong hashed)187 HashItem* HashTable::expandWatching( oolong hashed )
188 {
189     sabassert(itemsCount != -1);     // assert hash table initialized
190     oolong i, oldBucketCount = buckets.number();
191     for (i = 0; i < oldBucketCount; i++)
192         buckets.append(NULL);
193     oolong distinguish = 1L << logSize;
194 
195     HashItem *p,
196         *upperTail,
197         *lowerTail,
198         *retval = NULL;
199 
200     visitedBuckets = 0;
201 
202     for (i = 0; i < oldBucketCount; i++)
203     {
204         upperTail = lowerTail = NULL;
205         for (p = buckets[i]; p; p = p -> next)
206         {
207             if (p -> code & distinguish)
208                 chain(p, upperTail, i + oldBucketCount)
209             else
210                 chain(p, lowerTail, i);
211         };
212         if (buckets[i])
213             ++visitedBuckets;
214         if (lowerTail)
215             lowerTail -> next = NULL;
216         else
217             buckets[i] = NULL;
218         if (upperTail)
219             upperTail -> next = NULL;
220         // do the watching
221         if ( codeToIndex(hashed) == i )
222             retval = (hashed & distinguish) ? upperTail : lowerTail;
223     }
224 
225     ++logSize;  // can't do this before because codeToIndex() depends on it
226     sabassert( logSize <= HASH_CODE_BITS );
227     return retval;
228 }
229 
getKey(HashId id) const230 const Str& HashTable::getKey(HashId id) const
231 {
232   sabassert(itemsCount != -1);     // assert hash table initialized
233   if (id == UNDEF_PHRASE)
234     {
235       return *_theEmptyKey; /* __PH__ */
236     }
237   int age = hashIdAge(id);
238   HashItem *p;
239   for (p = buckets[codeToIndex(hashIdCode(id))];
240        p && p -> age != age; p = p -> next);
241   // DROPME
242 #ifdef _DEBUG
243   if (!p)
244     {
245       dump();
246       printf("\n HASH FAULT: id=%08lx ", id);
247     }
248 #endif
249     return NZ(p) -> key;
250 }
251 
252 #ifdef _DEBUG
dump() const253 void HashTable::dump() const
254     {
255         printf("----- %d elements  %d buckets  %d visited -----\n",
256             itemsCount, buckets.number(), visitedBuckets);
257         for (int i = 0; i < buckets.number(); i++)
258         {
259             if (buckets[i])
260             {
261                 printf("[%04x] ", i);
262                 for (HashItem *j = buckets[i]; j; j = j -> next)
263                 {
264                     printf("%s(%08x) ", (char*)(j -> key), j -> code );
265                 };
266                 puts("");
267             }
268         }
269         puts("--------------------------------------------------\n\n");
270     }
271 #endif
272 
report(Sit S,MsgType type,MsgCode code,const Str & arg1,const Str & arg2)273 void HashTable::report(Sit S, MsgType type, MsgCode code, const Str& arg1, const Str& arg2)
274 {
275     S.message(type, code, arg1, arg2);
276 }
277 
278 
279 
280 
281 //
282 //  the real internals
283 //
284 
285 //
286 // 3-number mixer
287 //
288 
289 #define mix(a,b,c) \
290 { \
291   a -= b; a -= c; a ^= (c>>13); \
292   b -= c; b -= a; b ^= (a<<8); \
293   c -= a; c -= b; c ^= (b>>13); \
294   a -= b; a -= c; a ^= (c>>12);  \
295   b -= c; b -= a; b ^= (a<<16); \
296   c -= a; c -= b; c ^= (b>>5); \
297   a -= b; a -= c; a ^= (c>>3);  \
298   b -= c; b -= a; b ^= (a<<10); \
299   c -= a; c -= b; c ^= (b>>15); \
300 }
301 
302 //
303 // the hash function
304 //
305 
hash(const Str & key)306 oolong hash(const Str& key)
307 {
308    register oolong a, b, c, len;
309    register const char *k = (const char*) key;
310 
311    /* Set up the internal state */
312    len = key.length();
313    a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
314    c = 0;   // should have been either the previous hash value or any number :-)
315 
316    /*---------------------------------------- handle most of the key */
317    while (len >= 12)
318    {
319       a += (k[0] +((oolong)k[1]<<8) +((oolong)k[2]<<16) +((oolong)k[3]<<24));
320       b += (k[4] +((oolong)k[5]<<8) +((oolong)k[6]<<16) +((oolong)k[7]<<24));
321       c += (k[8] +((oolong)k[9]<<8) +((oolong)k[10]<<16)+((oolong)k[11]<<24));
322       mix(a,b,c);
323       k += 12; len -= 12;
324    }
325 
326    /*------------------------------------- handle the last 11 bytes */
327    c += key.length();
328    switch(len)              /* all the case statements fall through */
329    {
330    case 11: c+=((oolong)k[10]<<24);
331    case 10: c+=((oolong)k[9]<<16);
332    case 9 : c+=((oolong)k[8]<<8);
333       /* the first byte of c is reserved for the length */
334    case 8 : b+=((oolong)k[7]<<24);
335    case 7 : b+=((oolong)k[6]<<16);
336    case 6 : b+=((oolong)k[5]<<8);
337    case 5 : b+=k[4];
338    case 4 : a+=((oolong)k[3]<<24);
339    case 3 : a+=((oolong)k[2]<<16);
340    case 2 : a+=((oolong)k[1]<<8);
341    case 1 : a+=k[0];
342      /* case 0: nothing left to add */
343    }
344    mix(a,b,c);
345    /*-------------------------------------------- report the result */
346    return c;
347 }
348