1 /*
2 * epos/src/hash.h
3 * (c) geo@cuni.cz
4 *
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License in doc/COPYING for more details.
14 *
15 * The hash tables are no longer independent from Epos, esp.
16 * because they now use the standard text file preprocessor.
17 */
18
19 #ifndef __hash_tables
20 #define __hash_tables
21
22 //#define POWER_OF_TWO // will avoid division in hash::fn()
23 // by forcing the table size to be 2^n.
24 // Not recommended for most processors.
25
26 #define MAX_DIGITS 10 //how many decimal digits the biggest int stored may have
27 // no performance penalty if much bigger
28 #define KEY_NOT_FOUND NULL
29 #define INT_NOT_FOUND -32768 //guaranted to be negative, but that's all
30
31 #define _HASH_DEPTH 64 //allows up to cca ten trillion elements
32
33 #define key_t hash_key_t
34 #define data_t hash_data_t
35
36 template <class key_t, class data_t>
37 struct hsearchtree;
38
39 extern hsearchtree<void, void> ** _hash_stk [_HASH_DEPTH+1];
40 extern int _hash_sp;
41
42
43 class hash;
44
45 template <class key_t, class data_t>
46 class hash_table
47 {
48
49 inline int fn(const key_t *key);
50 int min_items;
51 int max_items;
52 int tree_too_deep;
53 #ifdef POWER_OF_TWO
54 int hash_fn_mask;
55 #endif
56 protected:
57 int maxdep;
58 hsearchtree<key_t, data_t> **ht;
59 int capacity;
60 public:
61 int items; // items inside
62 int perc_optimal;
63 int perc_too_sparse;
64 int perc_too_dense;
65 int dupkey;
66 int dupdata;
67 int longest; //the longest "key" ever inside (meaningless if key_t is not char)
68 hash_table(int size);
69 hash_table(hash_table<key_t, data_t> *); // moderately slow copy constructor
70 ~hash_table();
71 void add(const key_t *key, const data_t *value); //Will strcpy the arguments
72 data_t* remove(const key_t *key); //Its "data" will be returned (just free() it)
73 void forall(void usefn(key_t *key, data_t *value, void *parm), void *parm);
forall(void usefn (key_t * key,data_t * value,int parm),int parm)74 inline void forall(void usefn(key_t *key, data_t *value, int parm), int parm) {
75 forall((void(*)(key_t*, data_t*, void *))usefn, (void *)parm);
76 }
77 data_t* translate(const key_t *key); //Will NOT strdup the result
78 key_t* get_random();
79 void rehash(int new_capacity); //Adjust the capacity. Boring.
80 void rehash(); //Adjust the capacity as needed
81 void cfg_rehash(int min_perc, int max_perc, int max_tree);
82 //When will the rehash occur?
83 private:
84 void rehash_tree(hsearchtree<key_t, data_t> *t); //internal, called by rehash()
85 void fortree(hsearchtree<key_t, data_t> *tree,
86 void usefn(key_t *key, data_t *value, void *parm), void *parm);
87 void dissolvetree(hsearchtree<key_t, data_t> *tree);
88 };
89
90 //void _hdump(key_t *s1, data_t *s2); //See hash::debug
91 //void _hfree(key_t *s1, data_t *s2); //See the destructor
92
93 class hash: public hash_table<char, char>
94 {
95 public:
hash(int i)96 hash (int i): hash_table<char, char> (i) {};
hash(hash * h)97 hash (hash *h): hash_table<char, char> (h) {};
98 hash(const char *filename, const char *dirname, const char *treename,
99 const char *description,
100 int perc_full, // initial and optimal ratio items/capacity
101 int perc_downsize, // if under that percentage full, call rehash()
102 int perc_upsize, // if over that percentage full, rehash()
103 int max_tree_depth, // if an AVL tree reaches this height, rehash()
104 bool allow_id, // true, if we accept single word input
105 // as both key and data
106 // false, if we reject single word input
107 // anything else - default data for this case
108 bool multi_data); // true, if the data may consist of multiple words
109 void add_int(const char *key,
110 int value); //The integer is stored human-readable (decimal) :-)
111 int translate_int(const char *key);//returns -1 if not found, garbage if not int
112
113 void debug(); //currently "dump thyself"
114 void listtree(hsearchtree<char, char> *tree, int indent);//See hash::debug
115 };
116
117 #undef key_t // a stupid name collision with linux kernel 2.1.?? include/types.h
118 #undef data_t // requires this creeping attitude
119
120 #ifdef WANT_DEFAULT_HASH_FN
121
122 inline unsigned int
contiguous_hash_fn(void * ptr,int size)123 contiguous_hash_fn(void *ptr, int size) // a hash function for contiguous keys
124 {
125 const char *p;
126 unsigned int j = 0;
127
128 if (size % sizeof(int)) for (p=(char *)ptr;p<(char *)ptr + size;p++) j=173*j+*p;
129 else for (p=(char *)ptr; p<(char *)ptr + size; p += sizeof(int))
130 j = 233*j + *(unsigned int *)p;
131 return j;
132 }
133
134 #endif
135
136 void shutdown_hashing();
137
138 #endif //__hash_tables
139