1 /*-
2  * Copyright (c) 1996, 2020 Oracle and/or its affiliates.  All rights reserved.
3  *
4  * See the file LICENSE for license information.
5  *
6  * $Id$
7  */
8 
9 #include "db_config.h"
10 
11 #include "db_int.h"
12 #include "dbinc/db_page.h"
13 #include "dbinc/btree.h"
14 
15 static int __db_build_bi __P((DB *, DB_FH *, PAGE *, PAGE *, u_int32_t, int *));
16 static int __db_build_ri __P((DB *, DB_FH *, PAGE *, PAGE *, u_int32_t, int *));
17 static int __db_up_ovref __P((DB *, DB_FH *, db_pgno_t));
18 
19 #define	GET_PAGE(dbp, fhp, pgno, page) {				\
20 	if ((ret = __os_seek(						\
21 	    dbp->env, fhp, pgno, (dbp)->pgsize, 0)) != 0)		\
22 		goto err;						\
23 	if ((ret = __os_read(dbp->env,				\
24 	    fhp, page, (dbp)->pgsize, &n)) != 0)			\
25 		goto err;						\
26 }
27 #define	PUT_PAGE(dbp, fhp, pgno, page) {				\
28 	if ((ret = __os_seek(						\
29 	    dbp->env, fhp, pgno, (dbp)->pgsize, 0)) != 0)		\
30 		goto err;						\
31 	if ((ret = __os_write(dbp->env,				\
32 	    fhp, page, (dbp)->pgsize, &n)) != 0)			\
33 		goto err;						\
34 }
35 
36 /*
37  * __db_31_offdup --
38  *	Convert 3.0 off-page duplicates to 3.1 off-page duplicates.
39  *
40  *	This code and its descendants should be removed when support for
41  *	upgrading from a 3.0 database format is removed.
42  *
43  * PUBLIC: int __db_31_offdup __P((DB *, char *, DB_FH *, int, db_pgno_t *));
44  */
45 int
__db_31_offdup(dbp,real_name,fhp,sorted,pgnop)46 __db_31_offdup(dbp, real_name, fhp, sorted, pgnop)
47 	DB *dbp;
48 	char *real_name;
49 	DB_FH *fhp;
50 	int sorted;
51 	db_pgno_t *pgnop;
52 {
53 	PAGE *ipage, *page;
54 	db_indx_t indx;
55 	db_pgno_t cur_cnt, i, next_cnt, pgno, *pgno_cur, pgno_last;
56 	db_pgno_t *pgno_next, pgno_max, *tmp;
57 	db_recno_t nrecs;
58 	size_t n;
59 	int level, nomem, ret;
60 
61 	ipage = page = NULL;
62 	pgno_cur = pgno_next = NULL;
63 
64 	/* Allocate room to hold a page. */
65 	if ((ret = __os_malloc(dbp->env, dbp->pgsize, &page)) != 0)
66 		goto err;
67 
68 	/*
69 	 * Walk the chain of 3.0 off-page duplicates.  Each one is converted
70 	 * in place to a 3.1 off-page duplicate page.  If the duplicates are
71 	 * sorted, they are converted to a Btree leaf page, otherwise to a
72 	 * Recno leaf page.
73 	 */
74 	for (nrecs = 0, cur_cnt = pgno_max = 0,
75 	    pgno = *pgnop; pgno != PGNO_INVALID;) {
76 		if (pgno_max == cur_cnt) {
77 			pgno_max += 20;
78 			if ((ret = __os_realloc(dbp->env, pgno_max *
79 			    sizeof(db_pgno_t), &pgno_cur)) != 0)
80 				goto err;
81 		}
82 		pgno_cur[cur_cnt++] = pgno;
83 
84 		GET_PAGE(dbp, fhp, pgno, page);
85 		nrecs += NUM_ENT(page);
86 		LEVEL(page) = LEAFLEVEL;
87 		TYPE(page) = sorted ? P_LDUP : P_LRECNO;
88 		/*
89 		 * !!!
90 		 * DB didn't zero the LSNs on off-page duplicates pages.
91 		 */
92 		ZERO_LSN(LSN(page));
93 		PUT_PAGE(dbp, fhp, pgno, page);
94 
95 		pgno = NEXT_PGNO(page);
96 	}
97 
98 	/* If we only have a single page, it's easy. */
99 	if (cur_cnt <= 1)
100 		goto done;
101 
102 	/*
103 	 * pgno_cur is the list of pages we just converted.  We're
104 	 * going to walk that list, but we'll need to create a new
105 	 * list while we do so.
106 	 */
107 	if ((ret = __os_malloc(dbp->env,
108 	    cur_cnt * sizeof(db_pgno_t), &pgno_next)) != 0)
109 		goto err;
110 
111 	/* Figure out where we can start allocating new pages. */
112 	if ((ret = __db_lastpgno(dbp, real_name, fhp, &pgno_last)) != 0)
113 		goto err;
114 
115 	/* Allocate room for an internal page. */
116 	if ((ret = __os_malloc(dbp->env, dbp->pgsize, &ipage)) != 0)
117 		goto err;
118 	PGNO(ipage) = PGNO_INVALID;
119 
120 	/*
121 	 * Repeatedly walk the list of pages, building internal pages, until
122 	 * there's only one page at a level.
123 	 */
124 	for (level = LEAFLEVEL + 1; cur_cnt > 1; ++level) {
125 		for (indx = 0, i = next_cnt = 0; i < cur_cnt;) {
126 			if (indx == 0) {
127 				P_INIT(ipage, dbp->pgsize, pgno_last,
128 				    PGNO_INVALID, PGNO_INVALID,
129 				    level, sorted ? P_IBTREE : P_IRECNO);
130 				ZERO_LSN(LSN(ipage));
131 
132 				pgno_next[next_cnt++] = pgno_last++;
133 			}
134 
135 			GET_PAGE(dbp, fhp, pgno_cur[i], page);
136 
137 			/*
138 			 * If the duplicates are sorted, put the first item on
139 			 * the lower-level page onto a Btree internal page. If
140 			 * the duplicates are not sorted, create an internal
141 			 * Recno structure on the page.  If either case doesn't
142 			 * fit, push out the current page and start a new one.
143 			 */
144 			nomem = 0;
145 			if (sorted) {
146 				if ((ret = __db_build_bi(
147 				    dbp, fhp, ipage, page, indx, &nomem)) != 0)
148 					goto err;
149 			} else
150 				if ((ret = __db_build_ri(
151 				    dbp, fhp, ipage, page, indx, &nomem)) != 0)
152 					goto err;
153 			if (nomem) {
154 				indx = 0;
155 				PUT_PAGE(dbp, fhp, PGNO(ipage), ipage);
156 			} else {
157 				++indx;
158 				++NUM_ENT(ipage);
159 				++i;
160 			}
161 		}
162 
163 		/*
164 		 * Push out the last internal page.  Set the top-level record
165 		 * count if we've reached the top.
166 		 */
167 		if (next_cnt == 1)
168 			RE_NREC_SET(ipage, nrecs);
169 		PUT_PAGE(dbp, fhp, PGNO(ipage), ipage);
170 
171 		/* Swap the current and next page number arrays. */
172 		cur_cnt = next_cnt;
173 		tmp = pgno_cur;
174 		pgno_cur = pgno_next;
175 		pgno_next = tmp;
176 	}
177 
178 done:	*pgnop = pgno_cur[0];
179 
180 err:	if (pgno_cur != NULL)
181 		__os_free(dbp->env, pgno_cur);
182 	if (pgno_next != NULL)
183 		__os_free(dbp->env, pgno_next);
184 	if (ipage != NULL)
185 		__os_free(dbp->env, ipage);
186 	if (page != NULL)
187 		__os_free(dbp->env, page);
188 
189 	return (ret);
190 }
191 
192 /*
193  * __db_build_bi --
194  *	Build a BINTERNAL entry for a parent page.
195  */
196 static int
__db_build_bi(dbp,fhp,ipage,page,indx,nomemp)197 __db_build_bi(dbp, fhp, ipage, page, indx, nomemp)
198 	DB *dbp;
199 	DB_FH *fhp;
200 	PAGE *ipage, *page;
201 	u_int32_t indx;
202 	int *nomemp;
203 {
204 	BINTERNAL bi, *child_bi;
205 	BKEYDATA *child_bk;
206 	u_int8_t *p;
207 	int ret;
208 	db_indx_t *inp;
209 
210 	inp = P_INP(dbp, ipage);
211 	switch (TYPE(page)) {
212 	case P_IBTREE:
213 		child_bi = GET_BINTERNAL(dbp, page, 0);
214 		if (P_FREESPACE(dbp, ipage) < BINTERNAL_PSIZE(child_bi->len)) {
215 			*nomemp = 1;
216 			return (0);
217 		}
218 		inp[indx] =
219 		    HOFFSET(ipage) -= BINTERNAL_SIZE(child_bi->len);
220 		p = P_ENTRY(dbp, ipage, indx);
221 
222 		bi.len = child_bi->len;
223 		B_TSET(bi.type, child_bi->type);
224 		bi.pgno = PGNO(page);
225 		bi.nrecs = __bam_total(dbp, page);
226 		memcpy(p, &bi, SSZA(BINTERNAL, data));
227 		p += SSZA(BINTERNAL, data);
228 		memcpy(p, child_bi->data, child_bi->len);
229 
230 		/* Increment the overflow ref count. */
231 		if (B_TYPE(child_bi->type) == B_OVERFLOW)
232 			if ((ret = __db_up_ovref(dbp, fhp,
233 			    ((BOVERFLOW *)(child_bi->data))->pgno)) != 0)
234 				return (ret);
235 		break;
236 	case P_LDUP:
237 		child_bk = GET_BKEYDATA(dbp, page, 0);
238 		switch (B_TYPE(child_bk->type)) {
239 		case B_KEYDATA:
240 			if (P_FREESPACE(dbp, ipage) <
241 			    BINTERNAL_PSIZE(child_bk->len)) {
242 				*nomemp = 1;
243 				return (0);
244 			}
245 			inp[indx] =
246 			    HOFFSET(ipage) -= BINTERNAL_SIZE(child_bk->len);
247 			p = P_ENTRY(dbp, ipage, indx);
248 
249 			bi.len = child_bk->len;
250 			B_TSET(bi.type, child_bk->type);
251 			bi.pgno = PGNO(page);
252 			bi.nrecs = __bam_total(dbp, page);
253 			memcpy(p, &bi, SSZA(BINTERNAL, data));
254 			p += SSZA(BINTERNAL, data);
255 			memcpy(p, child_bk->data, child_bk->len);
256 			break;
257 		case B_OVERFLOW:
258 			if (P_FREESPACE(dbp, ipage) <
259 			    BINTERNAL_PSIZE(BOVERFLOW_SIZE)) {
260 				*nomemp = 1;
261 				return (0);
262 			}
263 			inp[indx] =
264 			    HOFFSET(ipage) -= BINTERNAL_SIZE(BOVERFLOW_SIZE);
265 			p = P_ENTRY(dbp, ipage, indx);
266 
267 			bi.len = BOVERFLOW_SIZE;
268 			B_TSET(bi.type, child_bk->type);
269 			bi.pgno = PGNO(page);
270 			bi.nrecs = __bam_total(dbp, page);
271 			memcpy(p, &bi, SSZA(BINTERNAL, data));
272 			p += SSZA(BINTERNAL, data);
273 			memcpy(p, child_bk, BOVERFLOW_SIZE);
274 
275 			/* Increment the overflow ref count. */
276 			if ((ret = __db_up_ovref(dbp, fhp,
277 			    ((BOVERFLOW *)child_bk)->pgno)) != 0)
278 				return (ret);
279 			break;
280 		default:
281 			return (__db_pgfmt(dbp->env, PGNO(page)));
282 		}
283 		break;
284 	default:
285 		return (__db_pgfmt(dbp->env, PGNO(page)));
286 	}
287 
288 	return (0);
289 }
290 
291 /*
292  * __db_build_ri --
293  *	Build a RINTERNAL entry for an internal parent page.
294  */
295 static int
__db_build_ri(dbp,fhp,ipage,page,indx,nomemp)296 __db_build_ri(dbp, fhp, ipage, page, indx, nomemp)
297 	DB *dbp;
298 	DB_FH *fhp;
299 	PAGE *ipage, *page;
300 	u_int32_t indx;
301 	int *nomemp;
302 {
303 	RINTERNAL ri;
304 	db_indx_t *inp;
305 
306 	COMPQUIET(fhp, NULL);
307 	inp = P_INP(dbp, ipage);
308 	if (P_FREESPACE(dbp, ipage) < RINTERNAL_PSIZE) {
309 		*nomemp = 1;
310 		return (0);
311 	}
312 
313 	ri.pgno = PGNO(page);
314 	ri.nrecs = __bam_total(dbp, page);
315 	inp[indx] = HOFFSET(ipage) -= RINTERNAL_SIZE;
316 	memcpy(P_ENTRY(dbp, ipage, indx), &ri, RINTERNAL_SIZE);
317 
318 	return (0);
319 }
320 
321 /*
322  * __db_up_ovref --
323  *	Increment the reference count on an overflow page.
324  */
325 static int
__db_up_ovref(dbp,fhp,pgno)326 __db_up_ovref(dbp, fhp, pgno)
327 	DB *dbp;
328 	DB_FH *fhp;
329 	db_pgno_t pgno;
330 {
331 	PAGE *page;
332 	size_t n;
333 	int ret;
334 
335 	/* Allocate room to hold a page. */
336 	if ((ret = __os_malloc(dbp->env, dbp->pgsize, &page)) != 0)
337 		return (ret);
338 
339 	GET_PAGE(dbp, fhp, pgno, page);
340 	++OV_REF(page);
341 	PUT_PAGE(dbp, fhp, pgno, page);
342 
343 err:	__os_free(dbp->env, page);
344 
345 	return (ret);
346 }
347