1 /*- 2 * Copyright (c) 1990, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Mike Olson. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)bt_overflow.c 8.5 (Berkeley) 7/16/94 33 * $DragonFly: src/lib/libc/db/btree/bt_overflow.c,v 1.4 2005/11/12 23:01:54 swildner Exp $ 34 */ 35 36 #include <sys/param.h> 37 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include <string.h> 41 42 #include <db.h> 43 #include "btree.h" 44 45 /* 46 * Big key/data code. 47 * 48 * Big key and data entries are stored on linked lists of pages. The initial 49 * reference is byte string stored with the key or data and is the page number 50 * and size. The actual record is stored in a chain of pages linked by the 51 * nextpg field of the PAGE header. 52 * 53 * The first page of the chain has a special property. If the record is used 54 * by an internal page, it cannot be deleted and the P_PRESERVE bit will be set 55 * in the header. 56 * 57 * XXX 58 * A single DBT is written to each chain, so a lot of space on the last page 59 * is wasted. This is a fairly major bug for some data sets. 60 */ 61 62 /* 63 * __OVFL_GET -- Get an overflow key/data item. 64 * 65 * Parameters: 66 * t: tree 67 * p: pointer to { pgno_t, u_int32_t } 68 * buf: storage address 69 * bufsz: storage size 70 * 71 * Returns: 72 * RET_ERROR, RET_SUCCESS 73 */ 74 int 75 __ovfl_get(BTREE *t, void *p, size_t *ssz, void **buf, size_t *bufsz) 76 { 77 PAGE *h; 78 pgno_t pg; 79 size_t nb, plen; 80 u_int32_t sz; 81 82 memmove(&pg, p, sizeof(pgno_t)); 83 memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(u_int32_t)); 84 *ssz = sz; 85 86 #ifdef DEBUG 87 if (pg == P_INVALID || sz == 0) 88 abort(); 89 #endif 90 /* Make the buffer bigger as necessary. */ 91 if (*bufsz < sz) { 92 *buf = (char *)(*buf == NULL ? malloc(sz) : reallocf(*buf, sz)); 93 if (*buf == NULL) 94 return (RET_ERROR); 95 *bufsz = sz; 96 } 97 98 /* 99 * Step through the linked list of pages, copying the data on each one 100 * into the buffer. Never copy more than the data's length. 101 */ 102 plen = t->bt_psize - BTDATAOFF; 103 for (p = *buf;; p = (char *)p + nb, pg = h->nextpg) { 104 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 105 return (RET_ERROR); 106 107 nb = MIN(sz, plen); 108 memmove(p, (char *)h + BTDATAOFF, nb); 109 mpool_put(t->bt_mp, h, 0); 110 111 if ((sz -= nb) == 0) 112 break; 113 } 114 return (RET_SUCCESS); 115 } 116 117 /* 118 * __OVFL_PUT -- Store an overflow key/data item. 119 * 120 * Parameters: 121 * t: tree 122 * data: DBT to store 123 * pgno: storage page number 124 * 125 * Returns: 126 * RET_ERROR, RET_SUCCESS 127 */ 128 int 129 __ovfl_put(BTREE *t, const DBT *dbt, pgno_t *pg) 130 { 131 PAGE *h, *last; 132 void *p; 133 pgno_t npg; 134 size_t nb, plen; 135 u_int32_t sz; 136 137 /* 138 * Allocate pages and copy the key/data record into them. Store the 139 * number of the first page in the chain. 140 */ 141 plen = t->bt_psize - BTDATAOFF; 142 for (last = NULL, p = dbt->data, sz = dbt->size;; 143 p = (char *)p + plen, last = h) { 144 if ((h = __bt_new(t, &npg)) == NULL) 145 return (RET_ERROR); 146 147 h->pgno = npg; 148 h->nextpg = h->prevpg = P_INVALID; 149 h->flags = P_OVERFLOW; 150 h->lower = h->upper = 0; 151 152 nb = MIN(sz, plen); 153 memmove((char *)h + BTDATAOFF, p, nb); 154 155 if (last) { 156 last->nextpg = h->pgno; 157 mpool_put(t->bt_mp, last, MPOOL_DIRTY); 158 } else 159 *pg = h->pgno; 160 161 if ((sz -= nb) == 0) { 162 mpool_put(t->bt_mp, h, MPOOL_DIRTY); 163 break; 164 } 165 } 166 return (RET_SUCCESS); 167 } 168 169 /* 170 * __OVFL_DELETE -- Delete an overflow chain. 171 * 172 * Parameters: 173 * t: tree 174 * p: pointer to { pgno_t, u_int32_t } 175 * 176 * Returns: 177 * RET_ERROR, RET_SUCCESS 178 */ 179 int 180 __ovfl_delete(BTREE *t, void *p) 181 { 182 PAGE *h; 183 pgno_t pg; 184 size_t plen; 185 u_int32_t sz; 186 187 memmove(&pg, p, sizeof(pgno_t)); 188 memmove(&sz, (char *)p + sizeof(pgno_t), sizeof(u_int32_t)); 189 190 #ifdef DEBUG 191 if (pg == P_INVALID || sz == 0) 192 abort(); 193 #endif 194 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 195 return (RET_ERROR); 196 197 /* Don't delete chains used by internal pages. */ 198 if (h->flags & P_PRESERVE) { 199 mpool_put(t->bt_mp, h, 0); 200 return (RET_SUCCESS); 201 } 202 203 /* Step through the chain, calling the free routine for each page. */ 204 for (plen = t->bt_psize - BTDATAOFF;; sz -= plen) { 205 pg = h->nextpg; 206 __bt_free(t, h); 207 if (sz <= plen) 208 break; 209 if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL) 210 return (RET_ERROR); 211 } 212 return (RET_SUCCESS); 213 } 214