1 /*-
2 * Copyright (c) 1996, 2020 Oracle and/or its affiliates. All rights reserved.
3 *
4 * See the file LICENSE for license information.
5 *
6 * $Id$
7 */
8
9 #include "db_config.h"
10
11 #include "db_int.h"
12 #include "dbinc/db_page.h"
13 #include "dbinc/btree.h"
14
15 static int __db_build_bi __P((DB *, DB_FH *, PAGE *, PAGE *, u_int32_t, int *));
16 static int __db_build_ri __P((DB *, DB_FH *, PAGE *, PAGE *, u_int32_t, int *));
17 static int __db_up_ovref __P((DB *, DB_FH *, db_pgno_t));
18
19 #define GET_PAGE(dbp, fhp, pgno, page) { \
20 if ((ret = __os_seek( \
21 dbp->env, fhp, pgno, (dbp)->pgsize, 0)) != 0) \
22 goto err; \
23 if ((ret = __os_read(dbp->env, \
24 fhp, page, (dbp)->pgsize, &n)) != 0) \
25 goto err; \
26 }
27 #define PUT_PAGE(dbp, fhp, pgno, page) { \
28 if ((ret = __os_seek( \
29 dbp->env, fhp, pgno, (dbp)->pgsize, 0)) != 0) \
30 goto err; \
31 if ((ret = __os_write(dbp->env, \
32 fhp, page, (dbp)->pgsize, &n)) != 0) \
33 goto err; \
34 }
35
36 /*
37 * __db_31_offdup --
38 * Convert 3.0 off-page duplicates to 3.1 off-page duplicates.
39 *
40 * This code and its descendants should be removed when support for
41 * upgrading from a 3.0 database format is removed.
42 *
43 * PUBLIC: int __db_31_offdup __P((DB *, char *, DB_FH *, int, db_pgno_t *));
44 */
45 int
__db_31_offdup(dbp,real_name,fhp,sorted,pgnop)46 __db_31_offdup(dbp, real_name, fhp, sorted, pgnop)
47 DB *dbp;
48 char *real_name;
49 DB_FH *fhp;
50 int sorted;
51 db_pgno_t *pgnop;
52 {
53 PAGE *ipage, *page;
54 db_indx_t indx;
55 db_pgno_t cur_cnt, i, next_cnt, pgno, *pgno_cur, pgno_last;
56 db_pgno_t *pgno_next, pgno_max, *tmp;
57 db_recno_t nrecs;
58 size_t n;
59 int level, nomem, ret;
60
61 ipage = page = NULL;
62 pgno_cur = pgno_next = NULL;
63
64 /* Allocate room to hold a page. */
65 if ((ret = __os_malloc(dbp->env, dbp->pgsize, &page)) != 0)
66 goto err;
67
68 /*
69 * Walk the chain of 3.0 off-page duplicates. Each one is converted
70 * in place to a 3.1 off-page duplicate page. If the duplicates are
71 * sorted, they are converted to a Btree leaf page, otherwise to a
72 * Recno leaf page.
73 */
74 for (nrecs = 0, cur_cnt = pgno_max = 0,
75 pgno = *pgnop; pgno != PGNO_INVALID;) {
76 if (pgno_max == cur_cnt) {
77 pgno_max += 20;
78 if ((ret = __os_realloc(dbp->env, pgno_max *
79 sizeof(db_pgno_t), &pgno_cur)) != 0)
80 goto err;
81 }
82 pgno_cur[cur_cnt++] = pgno;
83
84 GET_PAGE(dbp, fhp, pgno, page);
85 nrecs += NUM_ENT(page);
86 LEVEL(page) = LEAFLEVEL;
87 TYPE(page) = sorted ? P_LDUP : P_LRECNO;
88 /*
89 * !!!
90 * DB didn't zero the LSNs on off-page duplicates pages.
91 */
92 ZERO_LSN(LSN(page));
93 PUT_PAGE(dbp, fhp, pgno, page);
94
95 pgno = NEXT_PGNO(page);
96 }
97
98 /* If we only have a single page, it's easy. */
99 if (cur_cnt <= 1)
100 goto done;
101
102 /*
103 * pgno_cur is the list of pages we just converted. We're
104 * going to walk that list, but we'll need to create a new
105 * list while we do so.
106 */
107 if ((ret = __os_malloc(dbp->env,
108 cur_cnt * sizeof(db_pgno_t), &pgno_next)) != 0)
109 goto err;
110
111 /* Figure out where we can start allocating new pages. */
112 if ((ret = __db_lastpgno(dbp, real_name, fhp, &pgno_last)) != 0)
113 goto err;
114
115 /* Allocate room for an internal page. */
116 if ((ret = __os_malloc(dbp->env, dbp->pgsize, &ipage)) != 0)
117 goto err;
118 PGNO(ipage) = PGNO_INVALID;
119
120 /*
121 * Repeatedly walk the list of pages, building internal pages, until
122 * there's only one page at a level.
123 */
124 for (level = LEAFLEVEL + 1; cur_cnt > 1; ++level) {
125 for (indx = 0, i = next_cnt = 0; i < cur_cnt;) {
126 if (indx == 0) {
127 P_INIT(ipage, dbp->pgsize, pgno_last,
128 PGNO_INVALID, PGNO_INVALID,
129 level, sorted ? P_IBTREE : P_IRECNO);
130 ZERO_LSN(LSN(ipage));
131
132 pgno_next[next_cnt++] = pgno_last++;
133 }
134
135 GET_PAGE(dbp, fhp, pgno_cur[i], page);
136
137 /*
138 * If the duplicates are sorted, put the first item on
139 * the lower-level page onto a Btree internal page. If
140 * the duplicates are not sorted, create an internal
141 * Recno structure on the page. If either case doesn't
142 * fit, push out the current page and start a new one.
143 */
144 nomem = 0;
145 if (sorted) {
146 if ((ret = __db_build_bi(
147 dbp, fhp, ipage, page, indx, &nomem)) != 0)
148 goto err;
149 } else
150 if ((ret = __db_build_ri(
151 dbp, fhp, ipage, page, indx, &nomem)) != 0)
152 goto err;
153 if (nomem) {
154 indx = 0;
155 PUT_PAGE(dbp, fhp, PGNO(ipage), ipage);
156 } else {
157 ++indx;
158 ++NUM_ENT(ipage);
159 ++i;
160 }
161 }
162
163 /*
164 * Push out the last internal page. Set the top-level record
165 * count if we've reached the top.
166 */
167 if (next_cnt == 1)
168 RE_NREC_SET(ipage, nrecs);
169 PUT_PAGE(dbp, fhp, PGNO(ipage), ipage);
170
171 /* Swap the current and next page number arrays. */
172 cur_cnt = next_cnt;
173 tmp = pgno_cur;
174 pgno_cur = pgno_next;
175 pgno_next = tmp;
176 }
177
178 done: *pgnop = pgno_cur[0];
179
180 err: if (pgno_cur != NULL)
181 __os_free(dbp->env, pgno_cur);
182 if (pgno_next != NULL)
183 __os_free(dbp->env, pgno_next);
184 if (ipage != NULL)
185 __os_free(dbp->env, ipage);
186 if (page != NULL)
187 __os_free(dbp->env, page);
188
189 return (ret);
190 }
191
192 /*
193 * __db_build_bi --
194 * Build a BINTERNAL entry for a parent page.
195 */
196 static int
__db_build_bi(dbp,fhp,ipage,page,indx,nomemp)197 __db_build_bi(dbp, fhp, ipage, page, indx, nomemp)
198 DB *dbp;
199 DB_FH *fhp;
200 PAGE *ipage, *page;
201 u_int32_t indx;
202 int *nomemp;
203 {
204 BINTERNAL bi, *child_bi;
205 BKEYDATA *child_bk;
206 u_int8_t *p;
207 int ret;
208 db_indx_t *inp;
209
210 inp = P_INP(dbp, ipage);
211 switch (TYPE(page)) {
212 case P_IBTREE:
213 child_bi = GET_BINTERNAL(dbp, page, 0);
214 if (P_FREESPACE(dbp, ipage) < BINTERNAL_PSIZE(child_bi->len)) {
215 *nomemp = 1;
216 return (0);
217 }
218 inp[indx] =
219 HOFFSET(ipage) -= BINTERNAL_SIZE(child_bi->len);
220 p = P_ENTRY(dbp, ipage, indx);
221
222 bi.len = child_bi->len;
223 B_TSET(bi.type, child_bi->type);
224 bi.pgno = PGNO(page);
225 bi.nrecs = __bam_total(dbp, page);
226 memcpy(p, &bi, SSZA(BINTERNAL, data));
227 p += SSZA(BINTERNAL, data);
228 memcpy(p, child_bi->data, child_bi->len);
229
230 /* Increment the overflow ref count. */
231 if (B_TYPE(child_bi->type) == B_OVERFLOW)
232 if ((ret = __db_up_ovref(dbp, fhp,
233 ((BOVERFLOW *)(child_bi->data))->pgno)) != 0)
234 return (ret);
235 break;
236 case P_LDUP:
237 child_bk = GET_BKEYDATA(dbp, page, 0);
238 switch (B_TYPE(child_bk->type)) {
239 case B_KEYDATA:
240 if (P_FREESPACE(dbp, ipage) <
241 BINTERNAL_PSIZE(child_bk->len)) {
242 *nomemp = 1;
243 return (0);
244 }
245 inp[indx] =
246 HOFFSET(ipage) -= BINTERNAL_SIZE(child_bk->len);
247 p = P_ENTRY(dbp, ipage, indx);
248
249 bi.len = child_bk->len;
250 B_TSET(bi.type, child_bk->type);
251 bi.pgno = PGNO(page);
252 bi.nrecs = __bam_total(dbp, page);
253 memcpy(p, &bi, SSZA(BINTERNAL, data));
254 p += SSZA(BINTERNAL, data);
255 memcpy(p, child_bk->data, child_bk->len);
256 break;
257 case B_OVERFLOW:
258 if (P_FREESPACE(dbp, ipage) <
259 BINTERNAL_PSIZE(BOVERFLOW_SIZE)) {
260 *nomemp = 1;
261 return (0);
262 }
263 inp[indx] =
264 HOFFSET(ipage) -= BINTERNAL_SIZE(BOVERFLOW_SIZE);
265 p = P_ENTRY(dbp, ipage, indx);
266
267 bi.len = BOVERFLOW_SIZE;
268 B_TSET(bi.type, child_bk->type);
269 bi.pgno = PGNO(page);
270 bi.nrecs = __bam_total(dbp, page);
271 memcpy(p, &bi, SSZA(BINTERNAL, data));
272 p += SSZA(BINTERNAL, data);
273 memcpy(p, child_bk, BOVERFLOW_SIZE);
274
275 /* Increment the overflow ref count. */
276 if ((ret = __db_up_ovref(dbp, fhp,
277 ((BOVERFLOW *)child_bk)->pgno)) != 0)
278 return (ret);
279 break;
280 default:
281 return (__db_pgfmt(dbp->env, PGNO(page)));
282 }
283 break;
284 default:
285 return (__db_pgfmt(dbp->env, PGNO(page)));
286 }
287
288 return (0);
289 }
290
291 /*
292 * __db_build_ri --
293 * Build a RINTERNAL entry for an internal parent page.
294 */
295 static int
__db_build_ri(dbp,fhp,ipage,page,indx,nomemp)296 __db_build_ri(dbp, fhp, ipage, page, indx, nomemp)
297 DB *dbp;
298 DB_FH *fhp;
299 PAGE *ipage, *page;
300 u_int32_t indx;
301 int *nomemp;
302 {
303 RINTERNAL ri;
304 db_indx_t *inp;
305
306 COMPQUIET(fhp, NULL);
307 inp = P_INP(dbp, ipage);
308 if (P_FREESPACE(dbp, ipage) < RINTERNAL_PSIZE) {
309 *nomemp = 1;
310 return (0);
311 }
312
313 ri.pgno = PGNO(page);
314 ri.nrecs = __bam_total(dbp, page);
315 inp[indx] = HOFFSET(ipage) -= RINTERNAL_SIZE;
316 memcpy(P_ENTRY(dbp, ipage, indx), &ri, RINTERNAL_SIZE);
317
318 return (0);
319 }
320
321 /*
322 * __db_up_ovref --
323 * Increment the reference count on an overflow page.
324 */
325 static int
__db_up_ovref(dbp,fhp,pgno)326 __db_up_ovref(dbp, fhp, pgno)
327 DB *dbp;
328 DB_FH *fhp;
329 db_pgno_t pgno;
330 {
331 PAGE *page;
332 size_t n;
333 int ret;
334
335 /* Allocate room to hold a page. */
336 if ((ret = __os_malloc(dbp->env, dbp->pgsize, &page)) != 0)
337 return (ret);
338
339 GET_PAGE(dbp, fhp, pgno, page);
340 ++OV_REF(page);
341 PUT_PAGE(dbp, fhp, pgno, page);
342
343 err: __os_free(dbp->env, page);
344
345 return (ret);
346 }
347