1 /*- 2 * Copyright (c) 1990 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Mike Olson. 7 * 8 * %sccs.include.redist.c% 9 */ 10 11 #if defined(LIBC_SCCS) && !defined(lint) 12 static char sccsid[] = "@(#)bt_overflow.c 5.2 (Berkeley) 02/22/91"; 13 #endif /* LIBC_SCCS and not lint */ 14 15 #include <sys/types.h> 16 #include <db.h> 17 #include <stdlib.h> 18 #include <string.h> 19 #include "btree.h" 20 21 /* 22 * _BT_GETBIG -- Get big data from indirect pages. 23 * 24 * This routine chases indirect blocks for the big object at the 25 * specified page to a buffer, and return the address of the buffer. 26 * 27 * Parameters: 28 * t -- btree with the indirect blocks 29 * pgno -- page number that starts the chain 30 * p -- (char **) to get the address of the buffer containing 31 * the key or datum. 32 * sz -- pointer to an int to get the size of the instantiated 33 * object. 34 * 35 * Returns: 36 * RET_SUCCESS, RET_ERROR. 37 * 38 * Side Effects: 39 * None. 40 */ 41 42 int 43 _bt_getbig(t, pgno, p, sz) 44 BTREE_P t; 45 pgno_t pgno; 46 char **p; 47 int *sz; 48 { 49 pgno_t save; 50 size_t nbytes; 51 size_t nhere; 52 BTHEADER *h; 53 char *top, *from, *where; 54 55 save = t->bt_curpage->h_pgno; 56 if (_bt_getpage(t, pgno) == RET_ERROR) 57 return (RET_ERROR); 58 59 h = t->bt_curpage; 60 61 bcopy((char *) &(h->h_linp[0]), 62 (char *) &nbytes, 63 (size_t) sizeof(nbytes)); 64 65 if ((*p = (char *) malloc(nbytes)) == (char *) NULL) 66 return (RET_ERROR); 67 68 *sz = nbytes; 69 from = ((char *) (&h->h_linp[0])) + sizeof(nbytes); 70 top = ((char *) h) + t->bt_psize; 71 72 /* need more space for data? */ 73 74 where = *p; 75 76 while (nbytes > 0) { 77 nhere = (int) (top - from); 78 if (nhere > nbytes) { 79 (void) bcopy(from, where, (size_t) nbytes); 80 nbytes = 0; 81 } else { 82 (void) bcopy(from, where, nhere); 83 where += nhere; 84 nbytes -= nhere; 85 if (_bt_getpage(t, h->h_nextpg) == RET_ERROR) 86 return (RET_ERROR); 87 h = t->bt_curpage; 88 top = ((char *) h) + t->bt_psize; 89 from = (char *) &(h->h_linp[0]); 90 } 91 } 92 93 if (_bt_getpage(t, save) == RET_ERROR) 94 return (RET_ERROR); 95 96 return (RET_SUCCESS); 97 } 98 99 /* 100 * _BT_DELINDIR -- Delete a chain of indirect blocks from the btree. 101 * 102 * When a large item is deleted from the tree, this routine puts the 103 * space that it occupied onto the free list for later reuse. The 104 * bt_free entry in the btree structure points at the head of this list. 105 * This value is also stored on disk in the btree's metadata. 106 * 107 * Parameters: 108 * t -- btree from which to delete pages 109 * chain -- page number that starts the chain. 110 * 111 * Returns: 112 * RET_SUCCESS, RET_ERROR. 113 * 114 * Side Effects: 115 * Invalidates the current on-disk version of the btree's 116 * metadata (if any), and updates the free list appropriately. 117 */ 118 119 int 120 _bt_delindir(t, chain) 121 BTREE_P t; 122 pgno_t chain; 123 { 124 BTHEADER *h; 125 pgno_t save; 126 pgno_t oldfree; 127 128 h = t->bt_curpage; 129 save = h->h_pgno; 130 if (_bt_getpage(t, chain) == RET_ERROR) 131 return (RET_ERROR); 132 133 /* 134 * If some internal node is pointing at this chain, don't 135 * delete it. 136 */ 137 138 if (t->bt_curpage->h_flags & F_PRESERVE) { 139 if (_bt_getpage(t, save) == RET_ERROR) 140 return (RET_ERROR); 141 return (RET_SUCCESS); 142 } 143 144 /* if there's nothing on the free list, this is easy... */ 145 if (t->bt_free == P_NONE) { 146 t->bt_free = chain; 147 } else { 148 oldfree = t->bt_free; 149 150 /* find the end of the data chain for the deleted datum */ 151 t->bt_free = chain; 152 do { 153 if (_bt_getpage(t, chain) == RET_ERROR) 154 return (RET_ERROR); 155 h = t->bt_curpage; 156 if (h->h_nextpg != P_NONE) 157 chain = h->h_nextpg; 158 } while (h->h_nextpg != P_NONE); 159 160 /* link freed pages into free list */ 161 h->h_nextpg = oldfree; 162 if (_bt_write(t, h, RELEASE) == RET_ERROR) 163 return (RET_ERROR); 164 if (_bt_getpage(t, oldfree) == RET_ERROR) 165 return (RET_ERROR); 166 h = t->bt_curpage; 167 h->h_prevpg = chain; 168 if (_bt_write(t, h, RELEASE) == RET_ERROR) 169 return (RET_ERROR); 170 } 171 172 /* restore the tree's current page pointer */ 173 if (_bt_getpage(t, save) == RET_ERROR) 174 return (RET_ERROR); 175 176 /* we have trashed the tree metadata; rewrite it later */ 177 t->bt_flags &= ~BTF_METAOK; 178 179 return (RET_SUCCESS); 180 } 181 182 /* 183 * _BT_INDIRECT -- Write a series of indirect pages for big objects. 184 * 185 * A chain of indirect pages looks like 186 * 187 * +-------------------+ +---------------------+ 188 * |hdr|size| | |hdr| | 189 * +---+----+ | +---+ | 190 * | ... data ... | | ... data ... | ... 191 * | | | | 192 * +-------------------+ +---------------------+ 193 * 194 * where hdr is a standard btree page header (with the indirect bit 195 * set), size on the first page is the real size of the datum, and 196 * data are bytes of the datum, split across as many pages as necessary. 197 * Indirect pages are chained together with the h_prevpg and h_nextpg 198 * entries of the page header struct. 199 * 200 * A single DBT is written per chain, so space on the last page is 201 * wasted. 202 * 203 * We return the page number of the start of the chain. 204 * 205 * When a big object is deleted from a tree, the space that it occupied 206 * is placed on a free list for later reuse. This routine checks that 207 * free list before allocating new pages to the big datum being inserted. 208 * 209 * Parameters: 210 * t -- btree in which to store indirect blocks 211 * data -- DBT with the big datum in it 212 * pgno -- place to put the starting page number of the chain 213 * 214 * Returns: 215 * RET_SUCCESS, RET_ERROR. 216 * 217 * Side Effects: 218 * Current page is changed on return. 219 */ 220 221 int 222 _bt_indirect(t, data, pgno) 223 BTREE_P t; 224 DBT *data; 225 pgno_t *pgno; 226 { 227 pgno_t prev; 228 char *top; 229 char *where; 230 char *from; 231 size_t dsize; 232 pgno_t nextchn; 233 int ischain; 234 BTHEADER *cur; 235 236 /* get set for first page in chain */ 237 prev = P_NONE; 238 dsize = data->size; 239 from = (char *) data->data; 240 241 /* if there are blocks on the free list, use them first */ 242 if ((*pgno = t->bt_free) == P_NONE) { 243 if ((cur = _bt_allocpg(t)) == (BTHEADER *) NULL) 244 return (RET_ERROR); 245 246 ischain = 0; 247 *pgno = cur->h_pgno = ++(t->bt_npages); 248 } else { 249 if (_bt_getpage(t, *pgno) == RET_ERROR) 250 return (RET_ERROR); 251 ischain = 1; 252 cur = t->bt_curpage; 253 } 254 255 cur->h_flags = F_CONT|F_LEAF; 256 (void) bcopy((char *) &dsize, (char *) &cur->h_linp[0], sizeof(size_t)); 257 where = ((char *) (&cur->h_linp[0])) + sizeof(size_t); 258 259 /* fill and write pages in the chain */ 260 for (;;) { 261 int nhere; 262 263 top = ((char *) cur) + t->bt_psize; 264 cur->h_prevpg = prev; 265 nextchn = cur->h_nextpg; 266 nhere = (int) (top - where); 267 268 if (nhere >= dsize) { 269 (void) bcopy(from, where, (int) dsize); 270 cur->h_nextpg = P_NONE; 271 dsize = 0; 272 } else { 273 (void) bcopy(from, where, (int) nhere); 274 dsize -= nhere; 275 from += nhere; 276 if (nextchn == P_NONE) 277 cur->h_nextpg = t->bt_npages + 1; 278 prev = cur->h_pgno; 279 } 280 281 /* current page is ready to go; write it out */ 282 if (_bt_write(t, cur, RELEASE) == RET_ERROR) 283 return (RET_ERROR); 284 285 /* free the current page, if appropriate */ 286 if (ISDISK(t) && !ISCACHE(t) && !ischain) { 287 (void) free ((char *) cur); 288 } 289 290 /* done? */ 291 if (dsize == 0) 292 break; 293 294 /* allocate another page */ 295 if (nextchn == P_NONE) { 296 if ((cur = _bt_allocpg(t)) == (BTHEADER *) NULL) 297 return (RET_ERROR); 298 ischain = 0; 299 cur->h_pgno = ++(t->bt_npages); 300 } else { 301 if (_bt_getpage(t, nextchn) == RET_ERROR) 302 return (RET_ERROR); 303 ischain = 1; 304 cur = t->bt_curpage; 305 } 306 cur->h_flags = F_LEAF | F_CONT; 307 308 where = (char *) (&cur->h_linp[0]); 309 } 310 311 /* if we used pages from the free list, record changes to it */ 312 if (*pgno == t->bt_free) { 313 t->bt_free = nextchn; 314 t->bt_flags &= ~BTF_METAOK; 315 } 316 317 return (RET_SUCCESS); 318 } 319 320 /* 321 * _BT_MARKCHAIN -- Mark a chain of pages as used by an internal node. 322 * 323 * Chains of indirect blocks pointed to by leaf nodes get reclaimed 324 * when the item that points to them gets deleted. Chains pointed 325 * to by internal nodes never get deleted. This routine marks a 326 * chain as pointed to by an internal node. 327 * 328 * Parameters: 329 * t -- tree in which to mark 330 * chain -- number of first page in chain 331 * 332 * Returns: 333 * RET_SUCCESS, RET_ERROR. 334 * 335 * Side Effects: 336 * None. 337 */ 338 339 int 340 _bt_markchain(t, chain) 341 BTREE_P t; 342 pgno_t chain; 343 { 344 pgno_t save; 345 346 save = t->bt_curpage->h_pgno; 347 348 if (_bt_getpage(t, chain) == RET_ERROR) 349 return (RET_ERROR); 350 351 t->bt_curpage->h_flags |= (F_DIRTY|F_PRESERVE); 352 353 if (_bt_getpage(t, save) == RET_ERROR) 354 return (RET_ERROR); 355 356 return (RET_SUCCESS); 357 } 358