1 /*-
2  * Copyright (c) 1990, 1993, 1994
3  *  The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Margo Seltzer.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. ***REMOVED*** - see
17  *    ftp://ftp.cs.berkeley.edu/pub/4bsd/README.Impt.License.Change
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *  @(#)hash.h  8.3 (Berkeley) 5/31/94
35  */
36 
37 /* Operations */
38 
39 #include <stdio.h>
40 #include "mcom_db.h"
41 typedef enum {
42     HASH_GET,
43     HASH_PUT,
44     HASH_PUTNEW,
45     HASH_DELETE,
46     HASH_FIRST,
47     HASH_NEXT
48 } ACTION;
49 
50 /* Buffer Management structures */
51 typedef struct _bufhead BUFHEAD;
52 
53 struct _bufhead {
54     BUFHEAD *prev; /* LRU links */
55     BUFHEAD *next; /* LRU links */
56     BUFHEAD *ovfl; /* Overflow page buffer header */
57     uint32 addr;   /* Address of this page */
58     char *page;    /* Actual page data */
59     char is_disk;
60     char flags;
61 #define BUF_MOD 0x0001
62 #define BUF_DISK 0x0002
63 #define BUF_BUCKET 0x0004
64 #define BUF_PIN 0x0008
65 };
66 
67 #define IS_BUCKET(X) ((X)&BUF_BUCKET)
68 
69 typedef BUFHEAD **SEGMENT;
70 
71 typedef int DBFILE_PTR;
72 #define NO_FILE -1
73 #ifdef macintosh
74 #define DBFILE_OPEN(path, flag, mode) open((path), flag)
75 #define EXISTS(path)
76 #else
77 #define DBFILE_OPEN(path, flag, mode) open((path), (flag), (mode))
78 #endif
79 /* Hash Table Information */
80 typedef struct hashhdr {     /* Disk resident portion */
81     int32 magic;             /* Magic NO for hash tables */
82     int32 version;           /* Version ID */
83     uint32 lorder;           /* Byte Order */
84     int32 bsize;             /* Bucket/Page Size */
85     int32 bshift;            /* Bucket shift */
86     int32 dsize;             /* Directory Size */
87     int32 ssize;             /* Segment Size */
88     int32 sshift;            /* Segment shift */
89     int32 ovfl_point;        /* Where overflow pages are being
90                               * allocated */
91     int32 last_freed;        /* Last overflow page freed */
92     int32 max_bucket;        /* ID of Maximum bucket in use */
93     int32 high_mask;         /* Mask to modulo into entire table */
94     int32 low_mask;          /* Mask to modulo into lower half of
95                               * table */
96     int32 ffactor;           /* Fill factor */
97     int32 nkeys;             /* Number of keys in hash table */
98     int32 hdrpages;          /* Size of table header */
99     uint32 h_charkey;        /* value of hash(CHARKEY) */
100 #define NCACHED 32           /* number of bit maps and spare \
101                               * points */
102     int32 spares[NCACHED];   /* spare pages for overflow */
103     uint16 bitmaps[NCACHED]; /* address of overflow page
104                               * bitmaps */
105 } HASHHDR;
106 
107 typedef struct htab { /* Memory resident data structure */
108     HASHHDR hdr;      /* Header */
109     int nsegs;        /* Number of allocated segments */
110     int exsegs;       /* Number of extra allocated
111                        * segments */
112     uint32            /* Hash function */
113         (*hash)(const void *, size_t);
114     int flags;     /* Flag values */
115     DBFILE_PTR fp; /* File pointer */
116     char *filename;
117     char *tmp_buf;         /* Temporary Buffer for BIG data */
118     char *tmp_key;         /* Temporary Buffer for BIG keys */
119     BUFHEAD *cpage;        /* Current page */
120     int cbucket;           /* Current bucket */
121     int cndx;              /* Index of next item on cpage */
122     int dbmerrno;          /* Error Number -- for DBM
123                             * compatability */
124     int new_file;          /* Indicates if fd is backing store
125                             * or no */
126     int save_file;         /* Indicates whether we need to flush
127                             * file at
128                             * exit */
129     uint32 *mapp[NCACHED]; /* Pointers to page maps */
130     int nmaps;             /* Initial number of bitmaps */
131     int nbufs;             /* Number of buffers left to
132                             * allocate */
133     BUFHEAD bufhead;       /* Header of buffer lru list */
134     SEGMENT *dir;          /* Hash Bucket directory */
135     off_t file_size;       /* in bytes */
136     char is_temp;          /* unlink file on close */
137     char updateEOF;        /* force EOF update on flush */
138 } HTAB;
139 
140 /*
141  * Constants
142  */
143 #define DATABASE_CORRUPTED_ERROR -999 /* big ugly abort, delete database */
144 #define OLD_MAX_BSIZE 65536           /* 2^16 */
145 #define MAX_BSIZE 32l * 1024l         /* 2^15 */
146 #define MIN_BUFFERS 6
147 #define MINHDRSIZE 512
148 #define DEF_BUFSIZE 65536l /* 64 K */
149 #define DEF_BUCKET_SIZE 4096
150 #define DEF_BUCKET_SHIFT 12 /* log2(BUCKET) */
151 #define DEF_SEGSIZE 256
152 #define DEF_SEGSIZE_SHIFT 8 /* log2(SEGSIZE)     */
153 #define DEF_DIRSIZE 256
154 #define DEF_FFACTOR 65536l
155 #define MIN_FFACTOR 4
156 #define SPLTMAX 8
157 #define CHARKEY "%$sniglet^&"
158 #define NUMKEY 1038583l
159 #define BYTE_SHIFT 3
160 #define INT_TO_BYTE 2
161 #define INT_BYTE_SHIFT 5
162 #define ALL_SET ((uint32)0xFFFFFFFF)
163 #define ALL_CLEAR 0
164 
165 #define PTROF(X) ((ptrdiff_t)(X) == BUF_DISK ? 0 : (X))
166 #define ISDISK(X) ((X) ? ((ptrdiff_t)(X) == BUF_DISK ? BUF_DISK      \
167                                                      : (X)->is_disk) \
168                        : 0)
169 
170 #define BITS_PER_MAP 32
171 
172 /* Given the address of the beginning of a big map, clear/set the nth bit */
173 #define CLRBIT(A, N) ((A)[(N) / BITS_PER_MAP] &= ~(1 << ((N) % BITS_PER_MAP)))
174 #define SETBIT(A, N) ((A)[(N) / BITS_PER_MAP] |= (1 << ((N) % BITS_PER_MAP)))
175 #define ISSET(A, N) ((A)[(N) / BITS_PER_MAP] & (1 << ((N) % BITS_PER_MAP)))
176 
177 /* Overflow management */
178 /*
179  * Overflow page numbers are allocated per split point.  At each doubling of
180  * the table, we can allocate extra pages.  So, an overflow page number has
181  * the top 5 bits indicate which split point and the lower 11 bits indicate
182  * which page at that split point is indicated (pages within split points are
183  * numberered starting with 1).
184  */
185 
186 #define SPLITSHIFT 11
187 #define SPLITMASK 0x7FF
188 #define SPLITNUM(N) (((uint32)(N)) >> SPLITSHIFT)
189 #define OPAGENUM(N) ((N)&SPLITMASK)
190 #define OADDR_OF(S, O) ((uint32)((uint32)(S) << SPLITSHIFT) + (O))
191 
192 #define BUCKET_TO_PAGE(B) \
193     (B) + hashp->HDRPAGES + ((B) ? hashp->SPARES[dbm_log2((uint32)((B) + 1)) - 1] : 0)
194 #define OADDR_TO_PAGE(B) \
195     BUCKET_TO_PAGE((1 << SPLITNUM((B))) - 1) + OPAGENUM((B));
196 
197 /*
198  * page.h contains a detailed description of the page format.
199  *
200  * Normally, keys and data are accessed from offset tables in the top of
201  * each page which point to the beginning of the key and data.  There are
202  * four flag values which may be stored in these offset tables which indicate
203  * the following:
204  *
205  *
206  * OVFLPAGE Rather than a key data pair, this pair contains
207  *      the address of an overflow page.  The format of
208  *      the pair is:
209  *          OVERFLOW_PAGE_NUMBER OVFLPAGE
210  *
211  * PARTIAL_KEY  This must be the first key/data pair on a page
212  *      and implies that page contains only a partial key.
213  *      That is, the key is too big to fit on a single page
214  *      so it starts on this page and continues on the next.
215  *      The format of the page is:
216  *          KEY_OFF PARTIAL_KEY OVFL_PAGENO OVFLPAGE
217  *
218  *          KEY_OFF -- offset of the beginning of the key
219  *          PARTIAL_KEY -- 1
220  *          OVFL_PAGENO - page number of the next overflow page
221  *          OVFLPAGE -- 0
222  *
223  * FULL_KEY This must be the first key/data pair on the page.  It
224  *      is used in two cases.
225  *
226  *      Case 1:
227  *          There is a complete key on the page but no data
228  *          (because it wouldn't fit).  The next page contains
229  *          the data.
230  *
231  *          Page format it:
232  *          KEY_OFF FULL_KEY OVFL_PAGENO OVFL_PAGE
233  *
234  *          KEY_OFF -- offset of the beginning of the key
235  *          FULL_KEY -- 2
236  *          OVFL_PAGENO - page number of the next overflow page
237  *          OVFLPAGE -- 0
238  *
239  *      Case 2:
240  *          This page contains no key, but part of a large
241  *          data field, which is continued on the next page.
242  *
243  *          Page format it:
244  *          DATA_OFF FULL_KEY OVFL_PAGENO OVFL_PAGE
245  *
246  *          KEY_OFF -- offset of the beginning of the data on
247  *              this page
248  *          FULL_KEY -- 2
249  *          OVFL_PAGENO - page number of the next overflow page
250  *          OVFLPAGE -- 0
251  *
252  * FULL_KEY_DATA
253  *      This must be the first key/data pair on the page.
254  *      There are two cases:
255  *
256  *      Case 1:
257  *          This page contains a key and the beginning of the
258  *          data field, but the data field is continued on the
259  *          next page.
260  *
261  *          Page format is:
262  *          KEY_OFF FULL_KEY_DATA OVFL_PAGENO DATA_OFF
263  *
264  *          KEY_OFF -- offset of the beginning of the key
265  *          FULL_KEY_DATA -- 3
266  *          OVFL_PAGENO - page number of the next overflow page
267  *          DATA_OFF -- offset of the beginning of the data
268  *
269  *      Case 2:
270  *          This page contains the last page of a big data pair.
271  *          There is no key, only the  tail end of the data
272  *          on this page.
273  *
274  *          Page format is:
275  *          DATA_OFF FULL_KEY_DATA <OVFL_PAGENO> <OVFLPAGE>
276  *
277  *          DATA_OFF -- offset of the beginning of the data on
278  *              this page
279  *          FULL_KEY_DATA -- 3
280  *          OVFL_PAGENO - page number of the next overflow page
281  *          OVFLPAGE -- 0
282  *
283  *          OVFL_PAGENO and OVFLPAGE are optional (they are
284  *          not present if there is no next page).
285  */
286 
287 #define OVFLPAGE 0
288 #define PARTIAL_KEY 1
289 #define FULL_KEY 2
290 #define FULL_KEY_DATA 3
291 #define REAL_KEY 4
292 
293 /* Short hands for accessing structure */
294 #undef BSIZE
295 #define BSIZE hdr.bsize
296 #undef BSHIFT
297 #define BSHIFT hdr.bshift
298 #define DSIZE hdr.dsize
299 #define SGSIZE hdr.ssize
300 #define SSHIFT hdr.sshift
301 #define LORDER hdr.lorder
302 #define OVFL_POINT hdr.ovfl_point
303 #define LAST_FREED hdr.last_freed
304 #define MAX_BUCKET hdr.max_bucket
305 #define FFACTOR hdr.ffactor
306 #define HIGH_MASK hdr.high_mask
307 #define LOW_MASK hdr.low_mask
308 #define NKEYS hdr.nkeys
309 #define HDRPAGES hdr.hdrpages
310 #define SPARES hdr.spares
311 #define BITMAPS hdr.bitmaps
312 #define VERSION hdr.version
313 #define MAGIC hdr.magic
314 #define NEXT_FREE hdr.next_free
315 #define H_CHARKEY hdr.h_charkey
316 
317 extern uint32 (*dbm_default_hash)(const void *, size_t);
318 void dbm_buf_init(HTAB *hashp, int32 nbytes);
319 int dbm_big_delete(HTAB *hashp, BUFHEAD *bufp);
320 BUFHEAD *dbm_get_buf(HTAB *hashp, uint32 addr, BUFHEAD *prev_bp, int newpage);
321 uint32 dbm_call_hash(HTAB *hashp, char *k, size_t len);
322 #include "page.h"
323 extern int dbm_big_split(HTAB *hashp, BUFHEAD *op, BUFHEAD *np,
324                          BUFHEAD *big_keyp, uint32 addr, uint32 obucket, SPLIT_RETURN *ret);
325 void dbm_free_ovflpage(HTAB *hashp, BUFHEAD *obufp);
326 BUFHEAD *dbm_add_ovflpage(HTAB *hashp, BUFHEAD *bufp);
327 int dbm_big_insert(HTAB *hashp, BUFHEAD *bufp, const DBT *key, const DBT *val);
328 int dbm_expand_table(HTAB *hashp);
329 uint32 dbm_log2(uint32 num);
330 void dbm_reclaim_buf(HTAB *hashp, BUFHEAD *bp);
331 int dbm_get_page(HTAB *hashp, char *p, uint32 bucket, int is_bucket, int is_disk, int is_bitmap);
332 int dbm_put_page(HTAB *hashp, char *p, uint32 bucket, int is_bucket, int is_bitmap);
333 int dbm_ibitmap(HTAB *hashp, int pnum, int nbits, int ndx);
334 int dbm_buf_free(HTAB *hashp, int do_free, int to_disk);
335 int dbm_find_bigpair(HTAB *hashp, BUFHEAD *bufp, int ndx, char *key, int size);
336 uint16 dbm_find_last_page(HTAB *hashp, BUFHEAD **bpp);
337 int dbm_addel(HTAB *hashp, BUFHEAD *bufp, const DBT *key, const DBT *val);
338 int dbm_big_return(HTAB *hashp, BUFHEAD *bufp, int ndx, DBT *val, int set_current);
339 int dbm_delpair(HTAB *hashp, BUFHEAD *bufp, int ndx);
340 int dbm_big_keydata(HTAB *hashp, BUFHEAD *bufp, DBT *key, DBT *val, int set);
341 int dbm_split_page(HTAB *hashp, uint32 obucket, uint32 nbucket);
342