1 /*
2 ------------------------------------------------------------------------------
3 perfect.h: code to generate code for a hash for perfect hashing.
4 (c) Bob Jenkins, September 1996
5 You may use this code in any way you wish, and it is free.  No warranty.
6 I hereby place this in the public domain.
7 Source is http://burtleburtle.net/bob/c/perfect.h
8 ------------------------------------------------------------------------------
9 */
10 
11 #ifndef STANDARD
12 #include "standard.h"
13 #endif
14 
15 #ifndef PERFECT
16 #define PERFECT
17 
18 #define MAXKEYLEN 30                              /* maximum length of a key */
19 #define USE_SCRAMBLE  4096           /* use scramble if blen >= USE_SCRAMBLE */
20 #define SCRAMBLE_LEN ((ub4)1<<16)                    /* length of *scramble* */
21 #define RETRY_INITKEY 2048  /* number of times to try to find distinct (a,b) */
22 #define RETRY_PERFECT 1     /* number of times to try to make a perfect hash */
23 #define RETRY_HEX     200               /* RETRY_PERFECT when hex keys given */
24 
25 /* the generated code for the final hash, assumes initial hash is done */
26 struct gencode
27 {
28   char **line;                       /* array of text lines, 80 bytes apiece */
29   /*
30    * The code placed here must declare "ub4 rsl"
31    * and assign it the value of the perfect hash using the function inputs.
32    * Later code will be tacked on which returns rsl or manipulates it according
33    * to the user directives.
34    *
35    * This code is at the top of the routine; it may and must declare any
36    * local variables it needs.
37    *
38    * Each way of filling in **line should be given a comment that is a unique
39    * tag.  A testcase named with that tag should also be found which tests
40    * the generated code.
41    */
42   ub4    len;                    /* number of lines available for final hash */
43   ub4    used;                         /* number of lines used by final hash */
44 
45   ub4    lowbit;                          /* for HEX, lowest interesting bit */
46   ub4    highbit;                        /* for HEX, highest interesting bit */
47   ub4    diffbits;                         /* bits which differ for some key */
48   ub4    i,j,k,l,m,n,o;                      /* state machine used in hexn() */
49 };
50 typedef  struct gencode  gencode;
51 
52 /* user directives: perfect hash? minimal perfect hash? input is an int? */
53 struct hashform
54 {
55   enum {
56     NORMAL_HM,                                            /* key is a string */
57     INLINE_HM,    /* user will do initial hash, we must choose salt for them */
58     HEX_HM,              /* key to be hashed is a hexidecimal 4-byte integer */
59     DECIMAL_HM,              /* key to be hashed is a decimal 4-byte integer */
60     AB_HM,      /* key to be hashed is "A B", where A and B are (A,B) in hex */
61     ABDEC_HM                                   /* like AB_HM, but in decimal */
62   } mode;
63   enum {
64     STRING_HT,                                            /* key is a string */
65     INT_HT,                                             /* key is an integer */
66     AB_HT             /* dunno what key is, but input is distinct (A,B) pair */
67   } hashtype;
68   enum {
69     NORMAL_HP,                                   /* just find a perfect hash */
70     MINIMAL_HP                                /* find a minimal perfect hash */
71   } perfect;
72   enum {
73     FAST_HS,                                                    /* fast mode */
74     SLOW_HS                                                     /* slow mode */
75   } speed;
76 };
77 typedef  struct hashform  hashform;
78 
79 /* representation of a key */
80 struct key
81 {
82   char       *name_k;                                      /* the actual key */
83   ub4         len_k;                         /* the length of the actual key */
84   ub4         hash_k;                 /* the initial hash value for this key */
85   struct key *next_k;                                            /* next key */
86 /* beyond this point is mapping-dependent */
87   ub4         a_k;                            /* a, of the key maps to (a,b) */
88   ub4         b_k;                            /* b, of the key maps to (a,b) */
89   struct key *nextb_k;                               /* next key with this b */
90 };
91 typedef  struct key  key;
92 
93 /* things indexed by b of original (a,b) pair */
94 struct bstuff
95 {
96   ub2  val_b;                                        /* hash=a^tabb[b].val_b */
97   key *list_b;                   /* tabb[i].list_b is list of keys with b==i */
98   ub4  listlen_b;                                        /* length of list_b */
99   ub4  water_b;           /* high watermark of who has visited this map node */
100 };
101 typedef  struct bstuff  bstuff;
102 
103 /* things indexed by final hash value */
104 struct hstuff
105 {
106   key *key_h;                   /* tabh[i].key_h is the key with a hash of i */
107 };
108 typedef  struct hstuff hstuff;
109 
110 /* things indexed by queue position */
111 struct qstuff
112 {
113   bstuff *b_q;                        /* b that currently occupies this hash */
114   ub4     parent_q;     /* queue position of parent that could use this hash */
115   ub2     newval_q;      /* what to change parent tab[b] to to use this hash */
116   ub2     oldval_q;                              /* original value of tab[b] */
117 };
118 typedef  struct qstuff  qstuff;
119 
120 /* return ceiling(log based 2 of x) */
121 ub4 phash_log2(ub4 x);
122 
123 /* Given the keys, scramble[], and hash mode, find the perfect hash */
124 void findhash(bstuff **tabb, hstuff **tabh, ub4 *alen, ub4 *blen, ub4 *salt,
125                 gencode *final, ub4 *scramble, ub4 *smax, key *keys, ub4 nkeys,
126                 hashform *form);
127 
128 /* private, but in a different file because it's excessively verbose */
129 int inithex(key *keys, ub4 nkeys, ub4 alen, ub4 blen, ub4 smax, ub4 salt,
130             gencode *final, hashform *form);
131 
132 #endif /* PERFECT */
133