1 /*-
2 * Copyright (c) 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Margo Seltzer.
7 *
8 * %sccs.include.redist.c%
9 */
10
11 #if defined(LIBC_SCCS) && !defined(lint)
12 static char sccsid[] = "@(#)hash_bigkey.c 8.2 (Berkeley) 02/21/94";
13 #endif /* LIBC_SCCS and not lint */
14
15 /*
16 * PACKAGE: hash
17 * DESCRIPTION:
18 * Big key/data handling for the hashing package.
19 *
20 * ROUTINES:
21 * External
22 * __big_keydata
23 * __big_split
24 * __big_insert
25 * __big_return
26 * __big_delete
27 * __find_last_page
28 * Internal
29 * collect_key
30 * collect_data
31 */
32
33 #include <sys/param.h>
34
35 #include <errno.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39
40 #ifdef DEBUG
41 #include <assert.h>
42 #endif
43
44 #include <db.h>
45 #include "hash.h"
46 #include "page.h"
47 #include "extern.h"
48
49 static int collect_key __P((HTAB *, BUFHEAD *, int, DBT *, int));
50 static int collect_data __P((HTAB *, BUFHEAD *, int, int));
51
52 /*
53 * Big_insert
54 *
55 * You need to do an insert and the key/data pair is too big
56 *
57 * Returns:
58 * 0 ==> OK
59 *-1 ==> ERROR
60 */
61 extern int
62 __big_insert(hashp, bufp, key, val)
63 HTAB *hashp;
64 BUFHEAD *bufp;
65 const DBT *key, *val;
66 {
67 register u_short *p;
68 int key_size, n, val_size;
69 u_short space, move_bytes, off;
70 char *cp, *key_data, *val_data;
71
72 cp = bufp->page; /* Character pointer of p. */
73 p = (u_short *)cp;
74
75 key_data = (char *)key->data;
76 key_size = key->size;
77 val_data = (char *)val->data;
78 val_size = val->size;
79
80 /* First move the Key */
81 for (space = FREESPACE(p) - BIGOVERHEAD; key_size;
82 space = FREESPACE(p) - BIGOVERHEAD) {
83 move_bytes = MIN(space, key_size);
84 off = OFFSET(p) - move_bytes;
85 memmove(cp + off, key_data, move_bytes);
86 key_size -= move_bytes;
87 key_data += move_bytes;
88 n = p[0];
89 p[++n] = off;
90 p[0] = ++n;
91 FREESPACE(p) = off - PAGE_META(n);
92 OFFSET(p) = off;
93 p[n] = PARTIAL_KEY;
94 bufp = __add_ovflpage(hashp, bufp);
95 if (!bufp)
96 return (-1);
97 n = p[0];
98 if (!key_size)
99 if (FREESPACE(p)) {
100 move_bytes = MIN(FREESPACE(p), val_size);
101 off = OFFSET(p) - move_bytes;
102 p[n] = off;
103 memmove(cp + off, val_data, move_bytes);
104 val_data += move_bytes;
105 val_size -= move_bytes;
106 p[n - 2] = FULL_KEY_DATA;
107 FREESPACE(p) = FREESPACE(p) - move_bytes;
108 OFFSET(p) = off;
109 } else
110 p[n - 2] = FULL_KEY;
111 p = (u_short *)bufp->page;
112 cp = bufp->page;
113 bufp->flags |= BUF_MOD;
114 }
115
116 /* Now move the data */
117 for (space = FREESPACE(p) - BIGOVERHEAD; val_size;
118 space = FREESPACE(p) - BIGOVERHEAD) {
119 move_bytes = MIN(space, val_size);
120 /*
121 * Here's the hack to make sure that if the data ends on the
122 * same page as the key ends, FREESPACE is at least one.
123 */
124 if (space == val_size && val_size == val->size)
125 move_bytes--;
126 off = OFFSET(p) - move_bytes;
127 memmove(cp + off, val_data, move_bytes);
128 val_size -= move_bytes;
129 val_data += move_bytes;
130 n = p[0];
131 p[++n] = off;
132 p[0] = ++n;
133 FREESPACE(p) = off - PAGE_META(n);
134 OFFSET(p) = off;
135 if (val_size) {
136 p[n] = FULL_KEY;
137 bufp = __add_ovflpage(hashp, bufp);
138 if (!bufp)
139 return (-1);
140 cp = bufp->page;
141 p = (u_short *)cp;
142 } else
143 p[n] = FULL_KEY_DATA;
144 bufp->flags |= BUF_MOD;
145 }
146 return (0);
147 }
148
149 /*
150 * Called when bufp's page contains a partial key (index should be 1)
151 *
152 * All pages in the big key/data pair except bufp are freed. We cannot
153 * free bufp because the page pointing to it is lost and we can't get rid
154 * of its pointer.
155 *
156 * Returns:
157 * 0 => OK
158 *-1 => ERROR
159 */
160 extern int
161 __big_delete(hashp, bufp)
162 HTAB *hashp;
163 BUFHEAD *bufp;
164 {
165 register BUFHEAD *last_bfp, *rbufp;
166 u_short *bp, pageno;
167 int key_done, n;
168
169 rbufp = bufp;
170 last_bfp = NULL;
171 bp = (u_short *)bufp->page;
172 pageno = 0;
173 key_done = 0;
174
175 while (!key_done || (bp[2] != FULL_KEY_DATA)) {
176 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA)
177 key_done = 1;
178
179 /*
180 * If there is freespace left on a FULL_KEY_DATA page, then
181 * the data is short and fits entirely on this page, and this
182 * is the last page.
183 */
184 if (bp[2] == FULL_KEY_DATA && FREESPACE(bp))
185 break;
186 pageno = bp[bp[0] - 1];
187 rbufp->flags |= BUF_MOD;
188 rbufp = __get_buf(hashp, pageno, rbufp, 0);
189 if (last_bfp)
190 __free_ovflpage(hashp, last_bfp);
191 last_bfp = rbufp;
192 if (!rbufp)
193 return (-1); /* Error. */
194 bp = (u_short *)rbufp->page;
195 }
196
197 /*
198 * If we get here then rbufp points to the last page of the big
199 * key/data pair. Bufp points to the first one -- it should now be
200 * empty pointing to the next page after this pair. Can't free it
201 * because we don't have the page pointing to it.
202 */
203
204 /* This is information from the last page of the pair. */
205 n = bp[0];
206 pageno = bp[n - 1];
207
208 /* Now, bp is the first page of the pair. */
209 bp = (u_short *)bufp->page;
210 if (n > 2) {
211 /* There is an overflow page. */
212 bp[1] = pageno;
213 bp[2] = OVFLPAGE;
214 bufp->ovfl = rbufp->ovfl;
215 } else
216 /* This is the last page. */
217 bufp->ovfl = NULL;
218 n -= 2;
219 bp[0] = n;
220 FREESPACE(bp) = hashp->BSIZE - PAGE_META(n);
221 OFFSET(bp) = hashp->BSIZE - 1;
222
223 bufp->flags |= BUF_MOD;
224 if (rbufp)
225 __free_ovflpage(hashp, rbufp);
226 if (last_bfp != rbufp)
227 __free_ovflpage(hashp, last_bfp);
228
229 hashp->NKEYS--;
230 return (0);
231 }
232 /*
233 * Returns:
234 * 0 = key not found
235 * -1 = get next overflow page
236 * -2 means key not found and this is big key/data
237 * -3 error
238 */
239 extern int
240 __find_bigpair(hashp, bufp, ndx, key, size)
241 HTAB *hashp;
242 BUFHEAD *bufp;
243 int ndx;
244 char *key;
245 int size;
246 {
247 register u_short *bp;
248 register char *p;
249 int ksize;
250 u_short bytes;
251 char *kkey;
252
253 bp = (u_short *)bufp->page;
254 p = bufp->page;
255 ksize = size;
256 kkey = key;
257
258 for (bytes = hashp->BSIZE - bp[ndx];
259 bytes <= size && bp[ndx + 1] == PARTIAL_KEY;
260 bytes = hashp->BSIZE - bp[ndx]) {
261 if (memcmp(p + bp[ndx], kkey, bytes))
262 return (-2);
263 kkey += bytes;
264 ksize -= bytes;
265 bufp = __get_buf(hashp, bp[ndx + 2], bufp, 0);
266 if (!bufp)
267 return (-3);
268 p = bufp->page;
269 bp = (u_short *)p;
270 ndx = 1;
271 }
272
273 if (bytes != ksize || memcmp(p + bp[ndx], kkey, bytes)) {
274 #ifdef HASH_STATISTICS
275 ++hash_collisions;
276 #endif
277 return (-2);
278 } else
279 return (ndx);
280 }
281
282 /*
283 * Given the buffer pointer of the first overflow page of a big pair,
284 * find the end of the big pair
285 *
286 * This will set bpp to the buffer header of the last page of the big pair.
287 * It will return the pageno of the overflow page following the last page
288 * of the pair; 0 if there isn't any (i.e. big pair is the last key in the
289 * bucket)
290 */
291 extern u_short
292 __find_last_page(hashp, bpp)
293 HTAB *hashp;
294 BUFHEAD **bpp;
295 {
296 BUFHEAD *bufp;
297 u_short *bp, pageno;
298 int n;
299
300 bufp = *bpp;
301 bp = (u_short *)bufp->page;
302 for (;;) {
303 n = bp[0];
304
305 /*
306 * This is the last page if: the tag is FULL_KEY_DATA and
307 * either only 2 entries OVFLPAGE marker is explicit there
308 * is freespace on the page.
309 */
310 if (bp[2] == FULL_KEY_DATA &&
311 ((n == 2) || (bp[n] == OVFLPAGE) || (FREESPACE(bp))))
312 break;
313
314 pageno = bp[n - 1];
315 bufp = __get_buf(hashp, pageno, bufp, 0);
316 if (!bufp)
317 return (0); /* Need to indicate an error! */
318 bp = (u_short *)bufp->page;
319 }
320
321 *bpp = bufp;
322 if (bp[0] > 2)
323 return (bp[3]);
324 else
325 return (0);
326 }
327
328 /*
329 * Return the data for the key/data pair that begins on this page at this
330 * index (index should always be 1).
331 */
332 extern int
333 __big_return(hashp, bufp, ndx, val, set_current)
334 HTAB *hashp;
335 BUFHEAD *bufp;
336 int ndx;
337 DBT *val;
338 int set_current;
339 {
340 BUFHEAD *save_p;
341 u_short *bp, len, off, save_addr;
342 char *tp;
343
344 bp = (u_short *)bufp->page;
345 while (bp[ndx + 1] == PARTIAL_KEY) {
346 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
347 if (!bufp)
348 return (-1);
349 bp = (u_short *)bufp->page;
350 ndx = 1;
351 }
352
353 if (bp[ndx + 1] == FULL_KEY) {
354 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
355 if (!bufp)
356 return (-1);
357 bp = (u_short *)bufp->page;
358 save_p = bufp;
359 save_addr = save_p->addr;
360 off = bp[1];
361 len = 0;
362 } else
363 if (!FREESPACE(bp)) {
364 /*
365 * This is a hack. We can't distinguish between
366 * FULL_KEY_DATA that contains complete data or
367 * incomplete data, so we require that if the data
368 * is complete, there is at least 1 byte of free
369 * space left.
370 */
371 off = bp[bp[0]];
372 len = bp[1] - off;
373 save_p = bufp;
374 save_addr = bufp->addr;
375 bufp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
376 if (!bufp)
377 return (-1);
378 bp = (u_short *)bufp->page;
379 } else {
380 /* The data is all on one page. */
381 tp = (char *)bp;
382 off = bp[bp[0]];
383 val->data = (u_char *)tp + off;
384 val->size = bp[1] - off;
385 if (set_current) {
386 if (bp[0] == 2) { /* No more buckets in
387 * chain */
388 hashp->cpage = NULL;
389 hashp->cbucket++;
390 hashp->cndx = 1;
391 } else {
392 hashp->cpage = __get_buf(hashp,
393 bp[bp[0] - 1], bufp, 0);
394 if (!hashp->cpage)
395 return (-1);
396 hashp->cndx = 1;
397 if (!((u_short *)
398 hashp->cpage->page)[0]) {
399 hashp->cbucket++;
400 hashp->cpage = NULL;
401 }
402 }
403 }
404 return (0);
405 }
406
407 val->size = collect_data(hashp, bufp, (int)len, set_current);
408 if (val->size == -1)
409 return (-1);
410 if (save_p->addr != save_addr) {
411 /* We are pretty short on buffers. */
412 errno = EINVAL; /* OUT OF BUFFERS */
413 return (-1);
414 }
415 memmove(hashp->tmp_buf, (save_p->page) + off, len);
416 val->data = (u_char *)hashp->tmp_buf;
417 return (0);
418 }
419 /*
420 * Count how big the total datasize is by recursing through the pages. Then
421 * allocate a buffer and copy the data as you recurse up.
422 */
423 static int
collect_data(hashp,bufp,len,set)424 collect_data(hashp, bufp, len, set)
425 HTAB *hashp;
426 BUFHEAD *bufp;
427 int len, set;
428 {
429 register u_short *bp;
430 register char *p;
431 BUFHEAD *xbp;
432 u_short save_addr;
433 int mylen, totlen;
434
435 p = bufp->page;
436 bp = (u_short *)p;
437 mylen = hashp->BSIZE - bp[1];
438 save_addr = bufp->addr;
439
440 if (bp[2] == FULL_KEY_DATA) { /* End of Data */
441 totlen = len + mylen;
442 if (hashp->tmp_buf)
443 free(hashp->tmp_buf);
444 if ((hashp->tmp_buf = (char *)malloc(totlen)) == NULL)
445 return (-1);
446 if (set) {
447 hashp->cndx = 1;
448 if (bp[0] == 2) { /* No more buckets in chain */
449 hashp->cpage = NULL;
450 hashp->cbucket++;
451 } else {
452 hashp->cpage =
453 __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
454 if (!hashp->cpage)
455 return (-1);
456 else if (!((u_short *)hashp->cpage->page)[0]) {
457 hashp->cbucket++;
458 hashp->cpage = NULL;
459 }
460 }
461 }
462 } else {
463 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
464 if (!xbp || ((totlen =
465 collect_data(hashp, xbp, len + mylen, set)) < 1))
466 return (-1);
467 }
468 if (bufp->addr != save_addr) {
469 errno = EINVAL; /* Out of buffers. */
470 return (-1);
471 }
472 memmove(&hashp->tmp_buf[len], (bufp->page) + bp[1], mylen);
473 return (totlen);
474 }
475
476 /*
477 * Fill in the key and data for this big pair.
478 */
479 extern int
480 __big_keydata(hashp, bufp, key, val, set)
481 HTAB *hashp;
482 BUFHEAD *bufp;
483 DBT *key, *val;
484 int set;
485 {
486 key->size = collect_key(hashp, bufp, 0, val, set);
487 if (key->size == -1)
488 return (-1);
489 key->data = (u_char *)hashp->tmp_key;
490 return (0);
491 }
492
493 /*
494 * Count how big the total key size is by recursing through the pages. Then
495 * collect the data, allocate a buffer and copy the key as you recurse up.
496 */
497 static int
collect_key(hashp,bufp,len,val,set)498 collect_key(hashp, bufp, len, val, set)
499 HTAB *hashp;
500 BUFHEAD *bufp;
501 int len;
502 DBT *val;
503 int set;
504 {
505 BUFHEAD *xbp;
506 char *p;
507 int mylen, totlen;
508 u_short *bp, save_addr;
509
510 p = bufp->page;
511 bp = (u_short *)p;
512 mylen = hashp->BSIZE - bp[1];
513
514 save_addr = bufp->addr;
515 totlen = len + mylen;
516 if (bp[2] == FULL_KEY || bp[2] == FULL_KEY_DATA) { /* End of Key. */
517 if (hashp->tmp_key != NULL)
518 free(hashp->tmp_key);
519 if ((hashp->tmp_key = (char *)malloc(totlen)) == NULL)
520 return (-1);
521 if (__big_return(hashp, bufp, 1, val, set))
522 return (-1);
523 } else {
524 xbp = __get_buf(hashp, bp[bp[0] - 1], bufp, 0);
525 if (!xbp || ((totlen =
526 collect_key(hashp, xbp, totlen, val, set)) < 1))
527 return (-1);
528 }
529 if (bufp->addr != save_addr) {
530 errno = EINVAL; /* MIS -- OUT OF BUFFERS */
531 return (-1);
532 }
533 memmove(&hashp->tmp_key[len], (bufp->page) + bp[1], mylen);
534 return (totlen);
535 }
536
537 /*
538 * Returns:
539 * 0 => OK
540 * -1 => error
541 */
542 extern int
543 __big_split(hashp, op, np, big_keyp, addr, obucket, ret)
544 HTAB *hashp;
545 BUFHEAD *op; /* Pointer to where to put keys that go in old bucket */
546 BUFHEAD *np; /* Pointer to new bucket page */
547 /* Pointer to first page containing the big key/data */
548 BUFHEAD *big_keyp;
549 int addr; /* Address of big_keyp */
550 u_int obucket;/* Old Bucket */
551 SPLIT_RETURN *ret;
552 {
553 register BUFHEAD *tmpp;
554 register u_short *tp;
555 BUFHEAD *bp;
556 DBT key, val;
557 u_int change;
558 u_short free_space, n, off;
559
560 bp = big_keyp;
561
562 /* Now figure out where the big key/data goes */
563 if (__big_keydata(hashp, big_keyp, &key, &val, 0))
564 return (-1);
565 change = (__call_hash(hashp, key.data, key.size) != obucket);
566
567 if (ret->next_addr = __find_last_page(hashp, &big_keyp)) {
568 if (!(ret->nextp =
569 __get_buf(hashp, ret->next_addr, big_keyp, 0)))
570 return (-1);;
571 } else
572 ret->nextp = NULL;
573
574 /* Now make one of np/op point to the big key/data pair */
575 #ifdef DEBUG
576 assert(np->ovfl == NULL);
577 #endif
578 if (change)
579 tmpp = np;
580 else
581 tmpp = op;
582
583 tmpp->flags |= BUF_MOD;
584 #ifdef DEBUG1
585 (void)fprintf(stderr,
586 "BIG_SPLIT: %d->ovfl was %d is now %d\n", tmpp->addr,
587 (tmpp->ovfl ? tmpp->ovfl->addr : 0), (bp ? bp->addr : 0));
588 #endif
589 tmpp->ovfl = bp; /* one of op/np point to big_keyp */
590 tp = (u_short *)tmpp->page;
591 #ifdef DEBUG
592 assert(FREESPACE(tp) >= OVFLSIZE);
593 #endif
594 n = tp[0];
595 off = OFFSET(tp);
596 free_space = FREESPACE(tp);
597 tp[++n] = (u_short)addr;
598 tp[++n] = OVFLPAGE;
599 tp[0] = n;
600 OFFSET(tp) = off;
601 FREESPACE(tp) = free_space - OVFLSIZE;
602
603 /*
604 * Finally, set the new and old return values. BIG_KEYP contains a
605 * pointer to the last page of the big key_data pair. Make sure that
606 * big_keyp has no following page (2 elements) or create an empty
607 * following page.
608 */
609
610 ret->newp = np;
611 ret->oldp = op;
612
613 tp = (u_short *)big_keyp->page;
614 big_keyp->flags |= BUF_MOD;
615 if (tp[0] > 2) {
616 /*
617 * There may be either one or two offsets on this page. If
618 * there is one, then the overflow page is linked on normally
619 * and tp[4] is OVFLPAGE. If there are two, tp[4] contains
620 * the second offset and needs to get stuffed in after the
621 * next overflow page is added.
622 */
623 n = tp[4];
624 free_space = FREESPACE(tp);
625 off = OFFSET(tp);
626 tp[0] -= 2;
627 FREESPACE(tp) = free_space + OVFLSIZE;
628 OFFSET(tp) = off;
629 tmpp = __add_ovflpage(hashp, big_keyp);
630 if (!tmpp)
631 return (-1);
632 tp[4] = n;
633 } else
634 tmpp = big_keyp;
635
636 if (change)
637 ret->newp = tmpp;
638 else
639 ret->oldp = tmpp;
640 return (0);
641 }
642