1 /*
2  *	epos/src/hash.h
3  *	(c) geo@cuni.cz
4  *
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9 
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License in doc/COPYING for more details.
14  *
15  *      The hash tables are no longer independent from Epos, esp.
16  *	because they now use the standard text file preprocessor.
17  */
18 
19 #ifndef __hash_tables
20 #define __hash_tables
21 
22 //#define POWER_OF_TWO		// will avoid division in hash::fn()
23 				// by forcing the table size to be 2^n.
24 				// Not recommended for most processors.
25 
26 #define MAX_DIGITS 10            //how many decimal digits the biggest int stored may have
27                                  //  no performance penalty if much bigger
28 #define KEY_NOT_FOUND NULL
29 #define INT_NOT_FOUND      -32768   //guaranted to be negative, but that's all
30 
31 #define _HASH_DEPTH	       64   //allows up to cca ten trillion elements
32 
33 #define key_t hash_key_t
34 #define data_t hash_data_t
35 
36 template <class key_t, class data_t>
37 struct hsearchtree;
38 
39 extern hsearchtree<void, void> ** _hash_stk [_HASH_DEPTH+1];
40 extern int _hash_sp;
41 
42 
43 class hash;
44 
45 template <class key_t, class data_t>
46 class hash_table
47 {
48 
49 	inline int  fn(const key_t *key);
50 	int min_items;
51 	int max_items;
52 	int tree_too_deep;
53 #ifdef POWER_OF_TWO
54 	int hash_fn_mask;
55 #endif
56    protected:
57 	int  maxdep;
58 	hsearchtree<key_t, data_t> **ht;
59 	int  capacity;
60    public:
61 	int items;	// items inside
62 	int perc_optimal;
63 	int perc_too_sparse;
64 	int perc_too_dense;
65 	int dupkey;
66 	int dupdata;
67 	int longest;                     //the longest "key" ever inside (meaningless if key_t is not char)
68 	     hash_table(int size);
69 	     hash_table(hash_table<key_t, data_t> *);	// moderately slow copy constructor
70 	    ~hash_table();
71 	void 	add(const key_t *key, const data_t *value); //Will strcpy the arguments
72 	data_t*	remove(const key_t *key);       //Its "data" will be returned (just free() it)
73 	void 	forall(void usefn(key_t *key, data_t *value, void *parm), void *parm);
forall(void usefn (key_t * key,data_t * value,int parm),int parm)74 	inline void forall(void usefn(key_t *key, data_t *value, int parm), int parm) {
75 		forall((void(*)(key_t*, data_t*, void *))usefn, (void *)parm);
76 	}
77 	data_t*	translate(const key_t *key);    //Will NOT strdup the result
78 	key_t*	get_random();
79 	void 	rehash(int new_capacity);     //Adjust the capacity. Boring.
80 	void 	rehash();		     //Adjust the capacity as needed
81 	void 	cfg_rehash(int min_perc, int max_perc, int max_tree);
82 					     //When will the rehash occur?
83  private:
84 	void 	rehash_tree(hsearchtree<key_t, data_t> *t);    //internal, called by rehash()
85 	void 	fortree(hsearchtree<key_t, data_t> *tree,
86 				void usefn(key_t *key, data_t *value, void *parm), void *parm);
87 	void 	dissolvetree(hsearchtree<key_t, data_t> *tree);
88 };
89 
90 //void _hdump(key_t *s1, data_t *s2);        //See hash::debug
91 //void _hfree(key_t *s1, data_t *s2);      //See the destructor
92 
93 class hash: public hash_table<char, char>
94 {
95    public:
hash(int i)96 	hash (int i): hash_table<char, char> (i) {};
hash(hash * h)97 	hash (hash *h): hash_table<char, char> (h) {};
98 	     hash(const char *filename, const char *dirname, const char *treename,
99 	     	const char *description,
100 		int perc_full,		// initial and optimal ratio items/capacity
101 		int perc_downsize,	// if under that percentage full, call rehash()
102 		int perc_upsize,	// if over that percentage full, rehash()
103 		int max_tree_depth,	// if an AVL tree reaches this height, rehash()
104 		bool allow_id,		// true, if we accept single word input
105 					// 	as both key and data
106 					// false, if we reject single word input
107 					// anything else - default data for this case
108 		bool multi_data);	// true, if the data may consist of multiple words
109 	void 	add_int(const char *key,
110 			int value);     //The integer is stored human-readable (decimal) :-)
111 	int 	translate_int(const char *key);//returns -1 if not found, garbage if not int
112 
113 	void 	debug();                     //currently "dump thyself"
114 	void	listtree(hsearchtree<char, char> *tree, int indent);//See hash::debug
115 };
116 
117 #undef key_t		// a stupid name collision with linux kernel 2.1.?? include/types.h
118 #undef data_t		//   requires this creeping attitude
119 
120 #ifdef WANT_DEFAULT_HASH_FN
121 
122 inline unsigned int
contiguous_hash_fn(void * ptr,int size)123 contiguous_hash_fn(void *ptr, int size)		// a hash function for contiguous keys
124 {
125 	const char *p;
126 	unsigned int j = 0;
127 
128 	if (size % sizeof(int)) for (p=(char *)ptr;p<(char *)ptr + size;p++) j=173*j+*p;
129 	else for (p=(char *)ptr; p<(char *)ptr + size; p += sizeof(int))
130 		j = 233*j + *(unsigned int *)p;
131 	return j;
132 }
133 
134 #endif
135 
136 void shutdown_hashing();
137 
138 #endif                     //__hash_tables
139