xref: /original-bsd/lib/libc/db/btree/bt_get.c (revision e0c0d005)
1 /*-
2  * Copyright (c) 1990 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Mike Olson.
7  *
8  * %sccs.include.redist.c%
9  */
10 
11 #if defined(LIBC_SCCS) && !defined(lint)
12 static char sccsid[] = "@(#)bt_get.c	5.2 (Berkeley) 02/22/91";
13 #endif /* LIBC_SCCS and not lint */
14 
15 #include <sys/param.h>
16 #include <db.h>
17 #include <errno.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <unistd.h>
21 #include "btree.h"
22 
23 /*
24  *  BT_GETPAGE -- Make pgno the current page of the btree.
25  *
26  *	This routine is just a wrapper that decides whether to call the
27  *	memory or disk-based routine to do the work.
28  *
29  *	Parameters:
30  *		t -- btree in which to get page
31  *		pgno -- page number to get
32  *
33  *	Returns:
34  *		RET_SUCCESS or RET_ERROR.
35  */
36 
37 int
38 _bt_getpage(t, pgno)
39 	BTREE_P t;
40 	pgno_t pgno;
41 {
42 #ifdef DEBUG
43 	if (pgno == P_NONE)
44 		_punt();
45 #endif /* DEBUG */
46 
47 	/* see if we can get away without doing any work */
48 	if (t->bt_curpage != (BTHEADER *) NULL) {
49 		if (t->bt_curpage->h_pgno == pgno)
50 			return (RET_SUCCESS);
51 	}
52 
53 	if (t->bt_fname == (char *) NULL)
54 		return (_bt_getmpage(t, pgno));
55 	else
56 		return (_bt_getdpage(t, pgno));
57 }
58 
59 /*
60  *  _BT_GETMPAGE -- Make pgno the current page of the btree.
61  *
62  *	This routine gets pages for in-memory btrees.
63  *
64  *	Parameters:
65  *		t -- btree in which to get page
66  *		pgno -- page number to get
67  *
68  *	Returns:
69  *		RET_SUCCESS or RET_ERROR.
70  */
71 
72 int
73 _bt_getmpage(t, pgno)
74 	register BTREE_P t;
75 	pgno_t pgno;
76 {
77 	int htindex;
78 	BTHEADER *h;
79 	HTBUCKET *b;
80 
81 	if (t->bt_curpage == (BTHEADER *) NULL) {
82 		if (pgno != P_ROOT) {
83 			errno = EBADF;
84 			return (RET_ERROR);
85 		}
86 
87 		t->bt_npages++;
88 		h = (BTHEADER *) malloc((unsigned) t->bt_psize);
89 		if (h == (BTHEADER *) NULL)
90 			return (RET_ERROR);
91 
92 		h->h_pgno = P_ROOT;
93 		h->h_flags = F_LEAF;
94 		h->h_lower = (index_t)
95 				(((char *) &(h->h_linp[0])) - ((char *) h));
96 		h->h_upper = t->bt_psize;
97 		h->h_prevpg = h->h_nextpg = P_NONE;
98 
99 		t->bt_curpage = h;
100 
101 		/* get the root page into the hash table */
102 		if (_bt_write(t, h, RELEASE) == RET_ERROR)
103 			return (RET_ERROR);
104 	}
105 
106 	htindex = HASHKEY(pgno);
107 
108 	for (b = t->bt_s.bt_ht[htindex];
109 	     b != (HTBUCKET *) NULL;
110 	     b = b->ht_next) {
111 		if (b->ht_pgno == pgno) {
112 			t->bt_curpage = b->ht_page;
113 			return (RET_SUCCESS);
114 		}
115 	}
116 	return (RET_ERROR);
117 }
118 
119 /*
120  *  _BT_GETDPAGE -- Make pgno the current page of the btree.
121  *
122  *	This routine gets pages for disk btrees.
123  *
124  *	Because disk btree pages must be readable across machine architectures,
125  *	the btree code writes integers out in network format.  This routine
126  *	converts them back to host format before returning the page.
127  *
128  *	Parameters:
129  *		t -- btree in which to get page
130  *		pgno -- page number to get
131  *
132  *	Returns:
133  *		RET_SUCCESS, RET_ERROR.
134  */
135 
136 int
137 _bt_getdpage(t, pgno)
138 	register BTREE_P t;
139 	pgno_t pgno;
140 {
141 	BTHEADER *h;
142 	char *cache;
143 	long pos;
144 	int n, nbytes;
145 
146 	/* if we have an lru cache, let the cache code do the work */
147 	if (ISCACHE(t)) {
148 		cache = t->bt_s.bt_d.d_cache;
149 
150 		/* release the old page */
151 		if (t->bt_curpage != (BTHEADER *) NULL) {
152 			pgno_t opgno = t->bt_curpage->h_pgno;
153 			t->bt_curpage->h_flags &= ~F_DIRTY;
154 
155 			if (lruwrite(cache, (int) opgno) < 0)
156 				return (RET_ERROR);
157 
158 			if (lrurelease(cache, (int) opgno) < 0)
159 				return (RET_ERROR);
160 		}
161 
162 		if (pgno > t->bt_npages) {
163 			if ((h = (BTHEADER *) lrugetnew(cache, (int)pgno, &nbytes))
164 			    == (BTHEADER *) NULL)
165 				return (RET_ERROR);
166 			t->bt_npages = pgno;
167 		} else {
168 			if ((h = (BTHEADER *) lruget(cache, (int)pgno, &nbytes))
169 			    == (BTHEADER *) NULL)
170 				return (RET_ERROR);
171 		}
172 
173 		/* init this page, if necessary */
174 		if (nbytes == 0) {
175 			h->h_pgno = pgno;
176 			h->h_flags = F_LEAF;
177 			h->h_lower = (index_t)
178 				(((char *) &(h->h_linp[0])) - ((char *) h));
179 			h->h_upper = t->bt_psize;
180 			h->h_prevpg = h->h_nextpg = P_NONE;
181 		}
182 
183 		t->bt_curpage = h;
184 
185 		return (RET_SUCCESS);
186 	}
187 
188 	/* sync the current page, if necessary */
189 	if (t->bt_curpage != (BTHEADER *) NULL) {
190 		if (t->bt_curpage->h_flags & F_DIRTY)
191 			if (_bt_write(t, t->bt_curpage, RELEASE) == RET_ERROR)
192 				return (RET_ERROR);
193 	} else {
194 		if (t->bt_npages == 0)
195 			t->bt_npages = 1;
196 
197 		/* if no current page, get space for one */
198 		if ((t->bt_curpage = (BTHEADER *) malloc((unsigned) t->bt_psize))
199 		    == (BTHEADER *) NULL) {
200 			return (RET_ERROR);
201 		}
202 	}
203 
204 	n = t->bt_psize;
205 	pos = (long) (pgno * n);
206 
207 	/* seek to correct location in file */
208 	if (lseek(t->bt_s.bt_d.d_fd, pos, L_SET) != pos) {
209 		return (RET_ERROR);
210 	}
211 
212 	/* read the page */
213 	if ((nbytes = read(t->bt_s.bt_d.d_fd, t->bt_curpage, n)) < n) {
214 
215 		/*
216 		 *  If we didn't get a full page, we must have gotten no
217 		 *  data at all -- in which case we're trying to read a
218 		 *  root page that doesn't exist yet.  This is the only
219 		 *  case in which this is okay.  If this happens, construct
220 		 *  an empty root page by hand.
221 		 */
222 		if (nbytes != 0 || pgno != P_ROOT) {
223 			errno = EBADF;
224 			return (RET_ERROR);
225 		}
226 
227 		h = (BTHEADER *) t->bt_curpage;
228 		h->h_pgno = pgno;
229 		h->h_flags = F_LEAF;
230 		h->h_lower = (index_t)
231 				(((char *) &(h->h_linp[0])) - ((char *) h));
232 		h->h_upper = t->bt_psize;
233 		h->h_prevpg = h->h_nextpg = P_NONE;
234 	} else
235 		(void) _bt_pgin(t->bt_curpage, (char *) t->bt_lorder);
236 
237 	return (RET_SUCCESS);
238 }
239 
240 /*
241  *  _BT_PGOUT, _BT_PGIN -- Convert host-specific number layout to/from
242  *			   the host-independent format stored on disk.
243  *
244  *	Parameters:
245  *		h -- page to convert
246  *		_lorder -- byte order for pages (stored as a char * in the
247  *			   cache, and passed around as a magic cookie).
248  *
249  *	Returns:
250  *		RET_SUCCESS (lru code requires a return value).
251  *
252  *	Side Effects:
253  *		Layout of tree metadata on the page is changed in place.
254  *
255  *	Warnings:
256  *		Everywhere else in the code, the types pgno_t and index_t
257  *		are opaque.  These two routines know what they really
258  *		are.
259  */
260 
261 int
262 _bt_pgout(h, _lorder)
263 	BTHEADER *h;
264 	char *_lorder;
265 {
266 	int i;
267 	int top;
268 	int lorder;
269 	DATUM *d;
270 	IDATUM *id;
271 	size_t chain;
272 
273 	lorder = (int) _lorder;
274 	if (lorder == BYTE_ORDER)
275 		return (RET_SUCCESS);
276 
277 	if (h->h_flags & F_LEAF) {
278 		if (h->h_flags & F_CONT) {
279 			if (h->h_prevpg == P_NONE) {
280 				size_t longsz;
281 
282 				(void) bcopy((char *) &(h->h_linp[0]),
283 					      (char *) &longsz,
284 					      sizeof(longsz));
285 				BLSWAP(longsz);
286 				(void) bcopy((char *) &longsz,
287 					      (char *) &(h->h_linp[0]),
288 					      sizeof(longsz));
289 			}
290 		} else {
291 			top = NEXTINDEX(h);
292 			for (i = 0; i < top; i++) {
293 				d = (DATUM *) GETDATUM(h, i);
294 				if (d->d_flags & D_BIGKEY) {
295 					(void) bcopy((char *) &(d->d_bytes[0]),
296 						      (char *) &chain,
297 						      sizeof(chain));
298 					BLSWAP(chain);
299 					(void) bcopy((char *) &chain,
300 						      (char *) &(d->d_bytes[0]),
301 						      sizeof(chain));
302 				}
303 				if (d->d_flags & D_BIGDATA) {
304 					(void) bcopy((char *) &(d->d_bytes[d->d_ksize]),
305 						      (char *) &chain,
306 						      sizeof(chain));
307 					BLSWAP(chain);
308 					(void) bcopy((char *) &chain,
309 						      (char *) &(d->d_bytes[d->d_ksize]),
310 						      sizeof(chain));
311 				}
312 				BLSWAP(d->d_dsize);
313 				BLSWAP(d->d_ksize);
314 				BLSWAP(d->d_flags);
315 				BLSWAP(h->h_linp[i]);
316 			}
317 		}
318 	} else {
319 		top = NEXTINDEX(h);
320 		for (i = 0; i < top; i++) {
321 			id = (IDATUM *) GETDATUM(h, i);
322 			BLSWAP(id->i_size);
323 			BLSWAP(id->i_pgno);
324 			BLSWAP(id->i_flags);
325 			if (id->i_flags & D_BIGKEY) {
326 				(void) bcopy((char *) &(id->i_bytes[0]),
327 					      (char *) &chain,
328 					      sizeof(chain));
329 				BLSWAP(chain);
330 				(void) bcopy((char *) &chain,
331 					      (char *) &(id->i_bytes[0]),
332 					      sizeof(chain));
333 			}
334 			BLSWAP(h->h_linp[i]);
335 		}
336 	}
337 	BLSWAP(h->h_flags);
338 	BLSWAP(h->h_pgno);
339 	BLSWAP(h->h_prevpg);
340 	BLSWAP(h->h_nextpg);
341 	BLSWAP(h->h_lower);
342 	BLSWAP(h->h_upper);
343 
344 	return (RET_SUCCESS);
345 }
346 
347 int
348 _bt_pgin(h, _lorder)
349 	BTHEADER *h;
350 	char *_lorder;
351 {
352 	int i;
353 	int top;
354 	int lorder;
355 	DATUM *d;
356 	IDATUM *id;
357 	size_t chain;
358 
359 	/*
360 	 *  If btree pages are to be stored in the host byte order, don't
361 	 *  bother swapping.
362 	 */
363 	lorder = (int) _lorder;
364 	if (lorder == BYTE_ORDER)
365 		return (RET_SUCCESS);
366 
367 	BLSWAP(h->h_upper);
368 	BLSWAP(h->h_lower);
369 	BLSWAP(h->h_nextpg);
370 	BLSWAP(h->h_prevpg);
371 	BLSWAP(h->h_pgno);
372 	BLSWAP(h->h_flags);
373 
374 	if (h->h_flags & F_LEAF) {
375 		if (h->h_flags & F_CONT) {
376 			if (h->h_prevpg == P_NONE) {
377 				size_t longsz;
378 
379 				(void) bcopy((char *) &(h->h_linp[0]),
380 					      (char *) &longsz,
381 					      sizeof(longsz));
382 				BLSWAP(longsz);
383 				(void) bcopy((char *) &longsz,
384 					      (char *) &(h->h_linp[0]),
385 					      sizeof(longsz));
386 			}
387 		} else {
388 			top = NEXTINDEX(h);
389 			for (i = 0; i < top; i++) {
390 				BLSWAP(h->h_linp[i]);
391 				d = (DATUM *) GETDATUM(h, i);
392 				BLSWAP(d->d_dsize);
393 				BLSWAP(d->d_ksize);
394 				BLSWAP(d->d_flags);
395 				if (d->d_flags & D_BIGKEY) {
396 					(void) bcopy((char *) &(d->d_bytes[0]),
397 						      (char *) &chain,
398 						      sizeof(chain));
399 					BLSWAP(chain);
400 					(void) bcopy((char *) &chain,
401 						      (char *) &(d->d_bytes[0]),
402 						      sizeof(chain));
403 				}
404 				if (d->d_flags & D_BIGDATA) {
405 					(void) bcopy((char *) &(d->d_bytes[d->d_ksize]),
406 						      (char *) &chain,
407 						      sizeof(chain));
408 					BLSWAP(chain);
409 					(void) bcopy((char *) &chain,
410 						      (char *) &(d->d_bytes[d->d_ksize]),
411 						      sizeof(chain));
412 				}
413 			}
414 		}
415 	} else {
416 		top = NEXTINDEX(h);
417 		for (i = 0; i < top; i++) {
418 			BLSWAP(h->h_linp[i]);
419 			id = (IDATUM *) GETDATUM(h, i);
420 			BLSWAP(id->i_size);
421 			BLSWAP(id->i_pgno);
422 			BLSWAP(id->i_flags);
423 			if (id->i_flags & D_BIGKEY) {
424 				(void) bcopy((char *) &(id->i_bytes[0]),
425 					      (char *) &chain,
426 					      sizeof(chain));
427 				BLSWAP(chain);
428 				(void) bcopy((char *) &chain,
429 					      (char *) &(id->i_bytes[0]),
430 					      sizeof(chain));
431 			}
432 		}
433 	}
434 	return (RET_SUCCESS);
435 }
436 
437 /*
438  *  _BT_ALLOCPG -- allocate a new page in the btree.
439  *
440  *	This is called when we split a page, to get space to do the split.
441  *	For disk btrees, these pages are released when the split is done.
442  *	For memory btrees, they are not.
443  *
444  *	Parameters:
445  *		t -- tree in which to do split
446  *
447  *	Returns:
448  *		Pointer to the newly-allocated page
449  */
450 
451 BTHEADER *
452 _bt_allocpg(t)
453 	BTREE_P t;
454 {
455 	BTHEADER *h = t->bt_curpage;
456 	BTHEADER *nh;
457 	int nbytes;
458 
459 	/* if we have a cache, let the cache code do the work */
460 	if (ISDISK(t) && ISCACHE(t)) {
461 		nh = (BTHEADER *) lrugetnew(t->bt_s.bt_d.d_cache,
462 					    (int) (t->bt_npages + 1),
463 					    &nbytes);
464 	} else {
465 		nh = (BTHEADER *) malloc((unsigned) t->bt_psize);
466 	}
467 
468 	if (nh != (BTHEADER *) NULL) {
469 		nh->h_pgno = nh->h_prevpg = nh->h_nextpg = P_NONE;
470 		nh->h_flags = h->h_flags;
471 		nh->h_lower = (index_t)
472 				(((char *) &(nh->h_linp[0])) - ((char *) nh));
473 		nh->h_upper = t->bt_psize;
474 	}
475 
476 	return (nh);
477 }
478 
479 /*
480  *  _BT_WRITE -- Write a specific page to a btree file.
481  *
482  *	Parameters:
483  *		t -- btree to get the page
484  *		h -- page to write
485  *		relflag -- (int) this page may/may not be released
486  *
487  *	Returns:
488  *		RET_SUCCESS, RET_ERROR.
489  *
490  *	Side Effects:
491  *		Writes a metadata page if none has been written yet.
492  */
493 
494 int
495 _bt_write(t, h, relflag)
496 	BTREE_P t;
497 	BTHEADER *h;
498 	int relflag;
499 {
500 	long pos;
501 	int htindex;
502 	HTBUCKET *b;
503 	char *cache;
504 	pgno_t pgno;
505 
506 	h->h_flags &= ~F_DIRTY;
507 	if (ISDISK(t)) {
508 
509 		/* if we haven't done so yet, write the metadata */
510 		if (!(t->bt_flags & BTF_METAOK)) {
511 			if (_bt_wrtmeta(t) == RET_ERROR)
512 				return (RET_ERROR);
513 		}
514 
515 		pgno = h->h_pgno;
516 
517 
518 		/* if we have a cache, let the cache code do the work */
519 		if ((cache = t->bt_s.bt_d.d_cache) != (char *) NULL) {
520 			if (lruwrite(cache, (int) pgno) < 0)
521 				return (RET_ERROR);
522 			if (relflag && lrurelease(cache, (int) pgno) < 0)
523 				return (RET_ERROR);
524 
525 		} else {
526 			(void) _bt_pgout(h, (char *) t->bt_lorder);
527 			/* now write the current page */
528 			pos = (long) (pgno * t->bt_psize);
529 			if (lseek(t->bt_s.bt_d.d_fd, pos, L_SET) != pos)
530 				return (RET_ERROR);
531 			if (write(t->bt_s.bt_d.d_fd, (char *) h, (int) t->bt_psize)
532 			    < t->bt_psize)
533 				return (RET_ERROR);
534 			if (!relflag)
535 				(void) _bt_pgin(h, (char *) t->bt_lorder);
536 		}
537 	} else {
538 		/* in-memory btree */
539 		htindex = HASHKEY(h->h_pgno);
540 
541 		/* see if we need to overwrite existing entry */
542 		for (b = t->bt_s.bt_ht[htindex];
543 		     b != (HTBUCKET *) NULL;
544 		     b = b->ht_next) {
545 			if (b->ht_pgno == h->h_pgno) {
546 				b->ht_page = h;
547 				return (RET_SUCCESS);
548 			}
549 		}
550 
551 		/* new entry, write it */
552 		b = (HTBUCKET *) malloc((unsigned) sizeof (HTBUCKET));
553 		if (b == (HTBUCKET *) NULL)
554 			return (RET_ERROR);
555 
556 		b->ht_pgno = h->h_pgno;
557 		b->ht_page = h;
558 		b->ht_next = t->bt_s.bt_ht[htindex];
559 		t->bt_s.bt_ht[htindex] = b;
560 	}
561 	return (RET_SUCCESS);
562 }
563 
564 /*
565  *  _BT_RELEASE -- Release a locked-down cache page
566  *
567  *	During page splits, we want to force pages out to the cache
568  *	before we actually release them, in some cases.  This routine
569  *	releases such a page when it is no longer needed.
570  *
571  *	Parameters:
572  *		t -- btree in which to release page
573  *		h -- page to release
574  *
575  *	Returns:
576  *		RET_SUCCESS, RET_ERROR.
577  *
578  *	Side Effects:
579  *		None.
580  */
581 
582 int
583 _bt_release(t, h)
584 	BTREE_P t;
585 	BTHEADER *h;
586 {
587 	if (ISDISK(t) && ISCACHE(t)) {
588 		if (lrurelease(t->bt_s.bt_d.d_cache, (int) (h->h_pgno)) < 0)
589 			return (RET_ERROR);
590 	}
591 	return (RET_SUCCESS);
592 }
593 
594 /*
595  *  _BT_WRTMETA -- Write metadata to the btree.
596  *
597  *	Parameters:
598  *		t -- tree to which to write
599  *
600  *	Returns:
601  *		RET_SUCCESS, RET_ERROR.
602  */
603 
604 int
605 _bt_wrtmeta(t)
606 	BTREE_P t;
607 {
608 	BTMETA m;
609 
610 	if (lseek(t->bt_s.bt_d.d_fd, 0l, L_SET) != 0l)
611 		return (RET_ERROR);
612 
613 	/* lorder has to be in host-independent format */
614 	m.m_lorder = (u_long) htonl((long) t->bt_lorder);
615 
616 	m.m_magic = BTREEMAGIC;
617 	m.m_version = BTREEVERSION;
618 	m.m_psize = t->bt_psize;
619 	m.m_free = t->bt_free;
620 	m.m_flags = t->bt_flags & BTF_NODUPS;
621 
622 	if (t->bt_lorder != BYTE_ORDER) {
623 		BLSWAP(m.m_magic);
624 		BLSWAP(m.m_version);
625 		BLSWAP(m.m_psize);
626 		BLSWAP(m.m_free);
627 		BLSWAP(m.m_flags);
628 	}
629 
630 	if (write(t->bt_s.bt_d.d_fd, (char *) &m, sizeof(m))
631 	    != sizeof(m)) {
632 		return (RET_ERROR);
633 	}
634 
635 	t->bt_flags |= BTF_METAOK;
636 
637 	return (RET_SUCCESS);
638 }
639