1 /* 2 * Copyright (c) 1990, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Margo Seltzer. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 /* 38 * PACKAGE: hash 39 * DESCRIPTION: 40 * Big key/data handling for the hashing package. 41 * 42 * ROUTINES: 43 * External 44 * __big_keydata 45 * __big_split 46 * __big_insert 47 * __big_return 48 * __big_delete 49 * __find_last_page 50 * Internal 51 * collect_key 52 * collect_data 53 */ 54 #include <sys/types.h> 55 56 #include <stdlib.h> 57 #include <string.h> 58 59 #ifdef DEBUG 60 #include <assert.h> 61 #endif 62 63 #include "db-int.h" 64 #include "hash.h" 65 #include "page.h" 66 #include "extern.h" 67 68 static int32_t collect_key __P((HTAB *, PAGE16 *, int32_t, db_pgno_t *)); 69 static int32_t collect_data __P((HTAB *, PAGE16 *, int32_t)); 70 71 /* 72 * Big_insert 73 * 74 * You need to do an insert and the key/data pair is greater than 75 * MINFILL * the bucket size 76 * 77 * Returns: 78 * 0 ==> OK 79 * -1 ==> ERROR 80 */ 81 int32_t 82 __big_insert(hashp, pagep, key, val) 83 HTAB *hashp; 84 PAGE16 *pagep; 85 const DBT *key, *val; 86 { 87 size_t key_size, val_size; 88 indx_t key_move_bytes, val_move_bytes; 89 int8_t *key_data, *val_data, base_page; 90 91 key_data = (int8_t *)key->data; 92 key_size = key->size; 93 val_data = (int8_t *)val->data; 94 val_size = val->size; 95 96 NUM_ENT(pagep) = NUM_ENT(pagep) + 1; 97 98 for (base_page = 1; key_size + val_size;) { 99 /* Add a page! */ 100 pagep = 101 __add_bigpage(hashp, pagep, NUM_ENT(pagep) - 1, base_page); 102 if (!pagep) 103 return (-1); 104 105 /* There's just going to be one entry on this page. */ 106 NUM_ENT(pagep) = 1; 107 108 /* Move the key's data. */ 109 key_move_bytes = MIN(FREESPACE(pagep), key_size); 110 /* Mark the page as to how much key & data is on this page. */ 111 BIGKEYLEN(pagep) = key_move_bytes; 112 val_move_bytes = 113 MIN(FREESPACE(pagep) - key_move_bytes, val_size); 114 BIGDATALEN(pagep) = val_move_bytes; 115 116 /* Note big pages build beginning --> end, not vice versa. */ 117 if (key_move_bytes) 118 memmove(BIGKEY(pagep), key_data, key_move_bytes); 119 if (val_move_bytes) 120 memmove(BIGDATA(pagep), val_data, val_move_bytes); 121 122 key_size -= key_move_bytes; 123 key_data += key_move_bytes; 124 val_size -= val_move_bytes; 125 val_data += val_move_bytes; 126 127 base_page = 0; 128 } 129 __put_page(hashp, pagep, A_RAW, 1); 130 return (0); 131 } 132 133 /* 134 * Called when we need to delete a big pair. 135 * 136 * Returns: 137 * 0 => OK 138 * -1 => ERROR 139 */ 140 int32_t 141 #ifdef __STDC__ 142 __big_delete(HTAB *hashp, PAGE16 *pagep, indx_t ndx) 143 #else 144 __big_delete(hashp, pagep, ndx) 145 HTAB *hashp; 146 PAGE16 *pagep; 147 u_int32_t ndx; /* Index of big pair on base page. */ 148 #endif 149 { 150 PAGE16 *last_pagep; 151 152 /* Get first page with big key/data. */ 153 pagep = __get_page(hashp, OADDR_TO_PAGE(DATA_OFF(pagep, ndx)), A_RAW); 154 if (!pagep) 155 return (-1); 156 157 /* 158 * Traverse through the pages, freeing the previous one (except 159 * the first) at each new page. 160 */ 161 while (NEXT_PGNO(pagep) != INVALID_PGNO) { 162 last_pagep = pagep; 163 pagep = __get_page(hashp, NEXT_PGNO(pagep), A_RAW); 164 if (!pagep) 165 return (-1); 166 __delete_page(hashp, last_pagep, A_OVFL); 167 } 168 169 /* Free the last page in the chain. */ 170 __delete_page(hashp, pagep, A_OVFL); 171 return (0); 172 } 173 174 /* 175 * Given a key, indicates whether the big key at cursorp matches the 176 * given key. 177 * 178 * Returns: 179 * 1 = Found! 180 * 0 = Key not found 181 * -1 error 182 */ 183 int32_t 184 __find_bigpair(hashp, cursorp, key, size) 185 HTAB *hashp; 186 CURSOR *cursorp; 187 int8_t *key; 188 int32_t size; 189 { 190 PAGE16 *pagep, *hold_pagep; 191 db_pgno_t next_pgno; 192 int32_t ksize; 193 int8_t *kkey; 194 195 ksize = size; 196 kkey = key; 197 198 hold_pagep = NULL; 199 /* Chances are, hashp->cpage is the base page. */ 200 if (cursorp->pagep) 201 pagep = hold_pagep = cursorp->pagep; 202 else { 203 pagep = __get_page(hashp, cursorp->pgno, A_RAW); 204 if (!pagep) 205 return (-1); 206 } 207 208 /* 209 * Now, get the first page with the big stuff on it. 210 * 211 * XXX 212 * KLUDGE: we know that cursor is looking at the _next_ item, so 213 * we have to look at pgndx - 1. 214 */ 215 next_pgno = OADDR_TO_PAGE(DATA_OFF(pagep, (cursorp->pgndx - 1))); 216 if (!hold_pagep) 217 __put_page(hashp, pagep, A_RAW, 0); 218 pagep = __get_page(hashp, next_pgno, A_RAW); 219 if (!pagep) 220 return (-1); 221 222 /* While there are both keys to compare. */ 223 while ((ksize > 0) && (BIGKEYLEN(pagep))) { 224 if (ksize < KEY_OFF(pagep, 0) || 225 memcmp(BIGKEY(pagep), kkey, BIGKEYLEN(pagep))) { 226 __put_page(hashp, pagep, A_RAW, 0); 227 return (0); 228 } 229 kkey += BIGKEYLEN(pagep); 230 ksize -= BIGKEYLEN(pagep); 231 if (NEXT_PGNO(pagep) != INVALID_PGNO) { 232 next_pgno = NEXT_PGNO(pagep); 233 __put_page(hashp, pagep, A_RAW, 0); 234 pagep = __get_page(hashp, next_pgno, A_RAW); 235 if (!pagep) 236 return (-1); 237 } 238 } 239 __put_page(hashp, pagep, A_RAW, 0); 240 #ifdef DEBUG 241 assert(ksize >= 0); 242 #endif 243 if (ksize != 0) { 244 #ifdef HASH_STATISTICS 245 ++hash_collisions; 246 #endif 247 return (0); 248 } else 249 return (1); 250 } 251 252 /* 253 * Fill in the key and data for this big pair. 254 */ 255 int32_t 256 __big_keydata(hashp, pagep, key, val, ndx) 257 HTAB *hashp; 258 PAGE16 *pagep; 259 DBT *key, *val; 260 int32_t ndx; 261 { 262 ITEM_INFO ii; 263 PAGE16 *key_pagep; 264 db_pgno_t last_page; 265 266 key_pagep = 267 __get_page(hashp, OADDR_TO_PAGE(DATA_OFF(pagep, ndx)), A_RAW); 268 if (!key_pagep) 269 return (-1); 270 key->size = collect_key(hashp, key_pagep, 0, &last_page); 271 key->data = hashp->bigkey_buf; 272 __put_page(hashp, key_pagep, A_RAW, 0); 273 274 if (key->size == -1) 275 return (-1); 276 277 /* Create an item_info to direct __big_return to the beginning pgno. */ 278 ii.pgno = last_page; 279 return (__big_return(hashp, &ii, val, 1)); 280 } 281 282 /* 283 * Return the big key on page, ndx. 284 */ 285 int32_t 286 #ifdef __STDC__ 287 __get_bigkey(HTAB *hashp, PAGE16 *pagep, indx_t ndx, DBT *key) 288 #else 289 __get_bigkey(hashp, pagep, ndx, key) 290 HTAB *hashp; 291 PAGE16 *pagep; 292 u_int32_t ndx; 293 DBT *key; 294 #endif 295 { 296 PAGE16 *key_pagep; 297 298 key_pagep = 299 __get_page(hashp, OADDR_TO_PAGE(DATA_OFF(pagep, ndx)), A_RAW); 300 if (!pagep) 301 return (-1); 302 key->size = collect_key(hashp, key_pagep, 0, NULL); 303 key->data = hashp->bigkey_buf; 304 305 __put_page(hashp, key_pagep, A_RAW, 0); 306 307 return (0); 308 } 309 310 /* 311 * Return the big key and data indicated in item_info. 312 */ 313 int32_t 314 __big_return(hashp, item_info, val, on_bigkey_page) 315 HTAB *hashp; 316 ITEM_INFO *item_info; 317 DBT *val; 318 int32_t on_bigkey_page; 319 { 320 PAGE16 *pagep; 321 db_pgno_t next_pgno; 322 323 if (!on_bigkey_page) { 324 /* Get first page with big pair on it. */ 325 pagep = __get_page(hashp, 326 OADDR_TO_PAGE(item_info->data_off), A_RAW); 327 if (!pagep) 328 return (-1); 329 } else { 330 pagep = __get_page(hashp, item_info->pgno, A_RAW); 331 if (!pagep) 332 return (-1); 333 } 334 335 /* Traverse through the bigkey pages until a page with data is found. */ 336 while (!BIGDATALEN(pagep)) { 337 next_pgno = NEXT_PGNO(pagep); 338 __put_page(hashp, pagep, A_RAW, 0); 339 pagep = __get_page(hashp, next_pgno, A_RAW); 340 if (!pagep) 341 return (-1); 342 } 343 344 val->size = collect_data(hashp, pagep, 0); 345 if (val->size < 1) 346 return (-1); 347 val->data = (void *)hashp->bigdata_buf; 348 349 __put_page(hashp, pagep, A_RAW, 0); 350 return (0); 351 } 352 353 /* 354 * Given a page with a big key on it, traverse through the pages counting data 355 * length, and collect all of the data on the way up. Store the key in 356 * hashp->bigkey_buf. last_page indicates to the calling function what the 357 * last page with key on it is; this will help if you later want to retrieve 358 * the data portion. 359 * 360 * Does the work for __get_bigkey. 361 * 362 * Return total length of data; -1 if error. 363 */ 364 static int32_t 365 collect_key(hashp, pagep, len, last_page) 366 HTAB *hashp; 367 PAGE16 *pagep; 368 int32_t len; 369 db_pgno_t *last_page; 370 { 371 PAGE16 *next_pagep; 372 int32_t totlen, retval; 373 db_pgno_t next_pgno; 374 #ifdef DEBUG 375 db_pgno_t save_addr; 376 #endif 377 378 /* If this is the last page with key. */ 379 if (BIGDATALEN(pagep)) { 380 totlen = len + BIGKEYLEN(pagep); 381 if (hashp->bigkey_buf) 382 free(hashp->bigkey_buf); 383 hashp->bigkey_buf = (u_int8_t *)malloc(totlen); 384 if (!hashp->bigkey_buf) 385 return (-1); 386 memcpy(hashp->bigkey_buf + len, 387 BIGKEY(pagep), BIGKEYLEN(pagep)); 388 if (last_page) 389 *last_page = ADDR(pagep); 390 return (totlen); 391 } 392 393 /* Key filled up all of last key page, so we've gone 1 too far. */ 394 if (BIGKEYLEN(pagep) == 0) { 395 if (hashp->bigkey_buf) 396 free(hashp->bigkey_buf); 397 hashp->bigkey_buf = (u_int8_t *)malloc(len); 398 return (hashp->bigkey_buf ? len : -1); 399 } 400 totlen = len + BIGKEYLEN(pagep); 401 402 /* Set pagep to the next page in the chain. */ 403 if (last_page) 404 *last_page = ADDR(pagep); 405 next_pgno = NEXT_PGNO(pagep); 406 next_pagep = __get_page(hashp, next_pgno, A_RAW); 407 if (!next_pagep) 408 return (-1); 409 #ifdef DEBUG 410 save_addr = ADDR(pagep); 411 #endif 412 retval = collect_key(hashp, next_pagep, totlen, last_page); 413 414 #ifdef DEBUG 415 assert(save_addr == ADDR(pagep)); 416 #endif 417 memcpy(hashp->bigkey_buf + len, BIGKEY(pagep), BIGKEYLEN(pagep)); 418 __put_page(hashp, next_pagep, A_RAW, 0); 419 420 return (retval); 421 } 422 423 /* 424 * Given a page with big data on it, recur through the pages counting data 425 * length, and collect all of the data on the way up. Store the data in 426 * hashp->bigdata_buf. 427 * 428 * Does the work for __big_return. 429 * 430 * Return total length of data; -1 if error. 431 */ 432 static int32_t 433 collect_data(hashp, pagep, len) 434 HTAB *hashp; 435 PAGE16 *pagep; 436 int32_t len; 437 { 438 PAGE16 *next_pagep; 439 int32_t totlen, retval; 440 db_pgno_t next_pgno; 441 #ifdef DEBUG 442 db_pgno_t save_addr; 443 #endif 444 445 /* If there is no next page. */ 446 if (NEXT_PGNO(pagep) == INVALID_PGNO) { 447 if (hashp->bigdata_buf) 448 free(hashp->bigdata_buf); 449 totlen = len + BIGDATALEN(pagep); 450 hashp->bigdata_buf = (u_int8_t *)malloc(totlen); 451 if (!hashp->bigdata_buf) 452 return (-1); 453 memcpy(hashp->bigdata_buf + totlen - BIGDATALEN(pagep), 454 BIGDATA(pagep), BIGDATALEN(pagep)); 455 return (totlen); 456 } 457 totlen = len + BIGDATALEN(pagep); 458 459 /* Set pagep to the next page in the chain. */ 460 next_pgno = NEXT_PGNO(pagep); 461 next_pagep = __get_page(hashp, next_pgno, A_RAW); 462 if (!next_pagep) 463 return (-1); 464 465 #ifdef DEBUG 466 save_addr = ADDR(pagep); 467 #endif 468 retval = collect_data(hashp, next_pagep, totlen); 469 #ifdef DEBUG 470 assert(save_addr == ADDR(pagep)); 471 #endif 472 memcpy(hashp->bigdata_buf + totlen - BIGDATALEN(pagep), 473 BIGDATA(pagep), BIGDATALEN(pagep)); 474 __put_page(hashp, next_pagep, A_RAW, 0); 475 476 return (retval); 477 } 478